├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── data
    ├── bechmarks.md
    ├── bimanual
    │   ├── benchmarks.md
    │   ├── datasets.md
    │   └── simulators.md
    ├── bipedal
    │   ├── humanoid-control
    │   │   ├── benchmarks.md
    │   │   └── datatsets.md
    │   └── simulated-humanoid-control
    │   │   └── datatsets.md
    ├── dexterous
    │   ├── benchmarks.md
    │   └── datasets.md
    ├── manipulation
    │   ├── benchmarks.md
    │   ├── datasets.md
    │   └── deformable-object-manipulation
    │   │   └── benchmarks.md
    ├── quadruped
    │   └── benchmarks.md
    ├── reinforcement-learning
    │   └── benchmarks.md
    └── simulators.md
├── fm
    ├── README.md
    ├── data
    │   ├── benchmarks.md
    │   ├── datasets.md
    │   ├── eai
    │   │   ├── benchmarks.md
    │   │   ├── datasets.md
    │   │   └── simulators.md
    │   ├── multimodal-understanding-and-reasoning
    │   │   └── benchmarks.md
    │   ├── simulators.md
    │   └── vla
    │   │   ├── benchmarks.md
    │   │   └── datasets.md
    └── papers
    │   ├── 00-Latest
    │       ├── CVPR2025.md
    │       ├── CoRL2025.md
    │       ├── ICML2025.md
    │       ├── spatial-ai.md
    │       ├── vla.md
    │       └── world-model.md
    │   ├── 00-Surveys
    │       ├── foundation-model.md
    │       └── navigation.md
    │   ├── 01-Vision-Language-Action
    │       ├── 3d-vla.md
    │       ├── general-robotic-manipulation.md
    │       ├── vla-for-bimanual.md
    │       ├── vla-for-mobile-manipulation.md
    │       └── vla.md
    │   ├── 02-Robotic-Transformer
    │       ├── robotic-graph-transformer.md
    │       └── robotic-transformer.md
    │   ├── 03-LLM-Application
    │       ├── high-level-task-planning.md
    │       └── llm-based-code-generation.md
    │   ├── 04-Multimodal-Large-Model-Application
    │       ├── finetuning-mmlm-to-vla.md
    │       ├── generating-representation.md
    │       ├── multimal-reasoning-task-planning.md
    │       └── vision-audio-tactile.md
    │   ├── 06-Embodiment-AI
    │       ├── embodied-ai.md
    │       ├── multimodal-understanding-and-reasoning
    │       │   ├── spatial-understanding-and-reasoning.md
    │       │   ├── video-understanding.md
    │       │   └── vision-understanding.md
    │       ├── platform.md
    │       ├── replica-creation.md
    │       └── visual-representation-learning.md
    │   ├── 07-World-Models-and-Application
    │       ├── application
    │       │   ├── application.md
    │       │   ├── video-wm-for-evaluation.md
    │       │   ├── wm-for-grasping.md
    │       │   ├── wm-for-huamnoid.md
    │       │   ├── wm-for-manipulation.md
    │       │   └── wm-for-navigation.md
    │       └── world-model.md
    │   ├── 08-Generative-AI-Application
    │       ├── finetuning-image-editing-for-action.md
    │       ├── finetuning-text-to-image-for-action.md
    │       ├── generative-planning.md
    │       └── text-to-video-for-action
    │       │   ├── combine-video-and-action-generation.md
    │       │   ├── finetuning-video-generation-for-action.md
    │       │   ├── planning-by-video-generation-model.md
    │       │   └── vision-representation-from-vdm.md
    │   ├── 09-Vision-Foundation-Model-Application
    │       └── vfm-for-manipulation.md
    │   ├── 10-Navigation
    │       └── navigation.md
    │   ├── README.md
    │   ├── instructions.md
    │   └── learning-from-video.md
├── papers
    ├── 00-Latest
    │   ├── CVPR2025.md
    │   ├── CoRL2025.md
    │   ├── ICLR2025.md
    │   ├── ICML2025.md
    │   ├── ICRA2025.md
    │   ├── IJRR2025.md
    │   ├── Nvidia.md
    │   ├── RSS2025.md
    │   ├── SIGGRAPH2025.md
    │   ├── bipedal.md
    │   ├── human-robot-interaction.md
    │   ├── imtation-learning.md
    │   ├── manipulation.md
    │   ├── physical-based-character-control.md
    │   └── quadruped.md
    ├── 00-Surveys
    │   └── survey.md
    ├── 01-Robot-Learning-Theory
    │   ├── latent-action.md
    │   ├── lifelong-learning-for-robot-learning.md
    │   ├── lifelong-rl-for-robot-learning.md
    │   ├── meta-learning-for-robot-learning.md
    │   ├── reinforcement-learning-for-continuous-control.md
    │   ├── reinforcement-learning-for-robot-learning.md
    │   ├── singal-temporal-logical.md
    │   └── theory.md
    ├── 02-Data-Acquisition
    │   ├── bimanual.md
    │   ├── manipulation.md
    │   ├── sim.md
    │   ├── synthetic-data.md
    │   └── theory.md
    ├── 03-Legged-Robot
    │   ├── bipedal
    │   │   ├── hardware-design.md
    │   │   ├── humanoid-creator.md
    │   │   ├── lower-body-control
    │   │   │   ├── gaits-control.md
    │   │   │   ├── humanoid-control.md
    │   │   │   └── locomotion-over-challenging-terrain.md
    │   │   ├── motion-generation.md
    │   │   ├── resources.md
    │   │   ├── survey.md
    │   │   ├── teleoperation
    │   │   │   ├── dexterous-teleoperation.md
    │   │   │   ├── human2humanoid-controller.md
    │   │   │   ├── pose-estimation.md
    │   │   │   └── whole-body-teleoperation.md
    │   │   ├── upper-body-control
    │   │   │   └── dexterous-manipulation.md
    │   │   └── whole-body-control
    │   │   │   ├── advanced-controller
    │   │   │       ├── humanoid-control-with-llm.md
    │   │   │       └── motion-prior-controller.md
    │   │   │   ├── human2humanoid-controller.md
    │   │   │   ├── humanoid-loco-manipulation.md
    │   │   │   ├── multi-task.md
    │   │   │   ├── posture-control.md
    │   │   │   ├── standing-up.md
    │   │   │   └── whole-body-control.md
    │   ├── mpc-locomotion-control.md
    │   ├── quadruped
    │   │   ├── advanced-controller
    │   │   │   └── motion-prior-controller.md
    │   │   ├── differentiable-simulator-for-quadruped.md
    │   │   ├── gaits-control.md
    │   │   ├── hardware-design.md
    │   │   ├── locomotion-and-manipulation.md
    │   │   ├── locomotion-over-challenging-terrain.md
    │   │   ├── locomotion.md
    │   │   ├── multi-task-controller.md
    │   │   ├── navigation-and-locomotion.md
    │   │   ├── physical-based-control.md
    │   │   └── recovery-controller.md
    │   └── wheeled-legged-robot.md
    ├── 04-Manipulation
    │   ├── 00-manipulation.md
    │   ├── 01-manipulation-with-3d.md
    │   ├── 02-manipulation-with-multimodal-sensing.md
    │   ├── 03-mobile-manipulation.md
    │   ├── 04-visual-rl-for-manipulation.md
    │   ├── 05-interactive-learning-for-manipulation.md
    │   ├── 06-lifelong-manipulation.md
    │   ├── 07-force-control-for-manipulation.md
    │   ├── 08-data-augmentation.md
    │   ├── 09-hardware-design.md
    │   ├── base-model-for-manipulation
    │   │   ├── diffusion-model-for-manipulation.md
    │   │   ├── dit-for-manipulation.md
    │   │   ├── flow-for-manipulation.md
    │   │   ├── generative-models-for-manipulation.md
    │   │   └── mamba-for-manipulation.md
    │   ├── bimanual
    │   │   ├── bimanual-manipulation.md
    │   │   ├── data-collecting.md
    │   │   ├── data-generation.md
    │   │   ├── dexterous-manipulation.md
    │   │   └── mobile-bimanual.md
    │   ├── dexterous
    │   │   ├── dexterous-hand-grasping.md
    │   │   ├── dexterous-manipulation.md
    │   │   ├── dexterous-teleoperation.md
    │   │   ├── force-control.md
    │   │   └── in-hand-manipulation.md
    │   ├── perception
    │   │   ├── 3d-visual-representation.md
    │   │   ├── pose-estimation.md
    │   │   ├── representation-learning.md
    │   │   └── tactile-representation.md
    │   └── tasks
    │   │   ├── deformable-object-manipulation.md
    │   │   ├── grasping.md
    │   │   ├── long-horizon-manipulation.md
    │   │   ├── packing.md
    │   │   ├── planning.md
    │   │   ├── task-motion-planning.md
    │   │   └── tool-usage.md
    ├── 05-Multi-Embodiment-Learning
    │   ├── locomotion.md
    │   ├── manipulation.md
    │   └── representation-learning.md
    ├── 06-Sim-to-Real
    │   ├── imitation-gap.md
    │   └── transfer-method
    │   │   ├── adaption-method.md
    │   │   ├── advanced-method.md
    │   │   ├── randomization-menthod.md
    │   │   ├── survey.md
    │   │   └── system-identification.md
    ├── 07-Navigation
    │   ├── legged-robot.md
    │   └── navigation.md
    ├── 08-Physical-based-Character-Control
    │   ├── behavior-foundation-model.md
    │   ├── challenge-terrain-traversal.md
    │   ├── human-object-interaction.md
    │   ├── humanoid-scene-interaction.md
    │   ├── motion-generation.md
    │   ├── motion-imitation
    │   │   ├── motion-prior.md
    │   │   └── motion-tracking.md
    │   ├── multi-objective-rl-method.md
    │   ├── multi-task-controller.md
    │   ├── realtime-controller.md
    │   └── survey.md
    ├── 09-Industrial-Application
    │   ├── assembly.md
    │   └── cooking.md
    ├── 10-AI-System-on-Robotics
    │   ├── deepmind.md
    │   ├── google.md
    │   ├── intel.md
    │   ├── meta.md
    │   ├── nvidia.md
    │   ├── tencent.md
    │   └── unitree.md
    ├── 11-Human-Robot-Interaction
    │   ├── handover.md
    │   ├── human-robot-interaction.md
    │   └── humanoid-human-interaction.md
    └── README.md
└── workshop
    ├── CoRL
        └── 2024CoRL.md
    └── RSS
        └── 2025RSS.md


/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Anyone interested in robot learning is welcomed to contribute to this repo (by pull request):
 2 | 
 3 | - You can add the latest publications / codes  / datasets / benchmark directly to `corresponding folder or file`.
 4 | - If you would like to modify structure of files/tables, please give reason!
 5 | - You are welcomed to update anything helpful!
 6 | 
 7 | 如果你对本项目感兴趣，非常欢迎你加入！
 8 | 
 9 | - 可以推荐最新的相关论文/代码/数据集/benchmark，将信息通过pull request的方式更新到对应文件或文件夹中。
10 | - 如果想要更改文章/表格结构，请给出原因!
11 | - 任何有用的建议都可进行贡献!
12 | 
13 | 欢迎!


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 动千山
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Robot Learning
 2 | [![Awesome](https://awesome.re/badge.svg)](https://awesome.re) [![MIT License](https://img.shields.io/badge/license-MIT-green.svg)](https://opensource.org/licenses/MIT) [![LICENSE](https://img.shields.io/badge/license-Anti%20996-blue.svg)](https://github.com/996icu/996.ICU/blob/master/LICENSE)
 3 | 
 4 | Papers, datasets, applications, tutorials.
 5 | 
 6 | **Widely used by top conferences, journals and workshops:**
 7 | 
 8 | - Conferences: [[CoRL](https://www.corl.org/)] [[RSS](https://roboticsconference.org/)] [[CoLLAs](https://lifelong-ml.cc/)] [[ICML](https://icml.cc/)] [[ICLR](https://iclr.cc/)]  [[NeurlPS](https://nips.cc/)]  [[PMLR](https://proceedings.mlr.press/)] [[CVPR](https://cvpr.thecvf.com/)] [[IROS](https://ieee-iros.org/)] [[ICRA](https://www.ieee-ras.org/conferences-workshops/fully-sponsored/icra)] [[RAL](https://www.ieee-ras.org/publications/ra-l)] [[Humanoids](https://www.ieee-ras.org/conferences-workshops/fully-sponsored/humanoids)] [[AAMAS](https://aamas2025.org/)] [[SIGGRAPH](https://www.siggraph.org/)] [[RLC](https://rl-conference.cc/)]
 9 | - Journals: [[IJRR](https://journals.sagepub.com/home/ijr)] [[Science Robotics](https://www.science.org/journal/scirobotics)] [[T-RO](https://www.ieee-ras.org/publications/t-ro)] [[IJCV](https://link.springer.com/journal/11263)]  [[Nature Machine Intelligence](https://www.nature.com/natmachintell/)]
10 | - Workshops: [[Embodied-AI](https://embodied-ai.org/)]
11 | 
12 | ---
13 | 
14 | - **[New!]** Add the new part of [CVPR2025](https://github.com/Evan-wyl/robotlearning/blob/master/papers/00-Latest/CVPR2025.md)
15 | - **[New!]** Add the new part of [RSS2025](https://github.com/Evan-wyl/robotlearning/blob/master/papers/00-Latest/RSS2025.md)
16 | - **[New!]** Add the new part of [CoRL2025](https://github.com/Evan-wyl/robotlearning/blob/master/papers/00-Latest/CoRL2025.md)
17 | - **[New!]** Add the new part of [2024 CoRL WorkShop](https://github.com/Evan-wyl/robotlearning/blob/master/workshop/CoRL/2024CoRL.md)
18 | 
19 | ---
20 | 
21 | *The research result of  **Embodied AI** and **Foundation Models** can be found in [fm](https://github.com/Evan-wyl/Robot-Learning/tree/master/fm).*
22 | 
23 | 
24 | 
25 | ## 0. Research Areas and Papers
26 | 
27 | :speaker: ***Must-read [papers](https://github.com/Evan-wyl/robotlearning/tree/master/papers) on Robot Learning***
28 | 
29 | - [Robot Learning Theory](https://github.com/Evan-wyl/robotlearning/tree/master/papers/01-Robot-Learning-Theory)
30 | - [Legged Robot](https://github.com/Evan-wyl/robotlearning/tree/master/papers/02-Legged-Robot)
31 | - [Manipulation](https://github.com/Evan-wyl/robotlearning/tree/master/papers/03-Manipulation)
32 | - [Multi-Embodiment Learning](https://github.com/Evan-wyl/robotlearning/tree/master/papers/04-Multi-Embodiment-Learning)
33 | - [Sim-to-Real](https://github.com/Evan-wyl/robotlearning/tree/master/papers/05-Sim-to-Real)
34 | - [Physical-based Character Control](https://github.com/Evan-wyl/robotlearning/tree/master/papers/06-Physical-based-Character-Control)
35 | - [Data Acquisition](https://github.com/Evan-wyl/robotlearning/tree/master/papers/07-Data-Acquisition)
36 | - [Industrial Application](https://github.com/Evan-wyl/robotlearning/tree/master/papers/08-Industrial-Application)
37 | 
38 | 
39 | 
40 | ## 1. Datasets and Benchmarks
41 | 
42 | Please see [HERE](https://github.com/Evan-wyl/Robot-Learning/tree/master/data) for the popular robot learning **datasets and benchmarks** results. 
43 | 
44 | 
45 | 
46 | ## 2. Contributing
47 | 
48 | If you are interested in contributing, please refer to [HERE](https://github.com/Evan-wyl/Robot-Learning/blob/master/CONTRIBUTING.md) for instructions in contribution.
49 | 
50 | ------
51 | 
52 | ***Copyright notice***
53 | 
54 | > ***[Notes]This Github repo can be used by following the corresponding licenses. I want to emphasis that it may contain some PDFs or thesis, which were downloaded by me and can only be used for academic purposes. The copyrights of these materials are owned by corresponding publishers or organizations. All this are for better adademic research. If any of the authors or publishers have concerns, please contact me to delete or replace them.***
55 | 
56 | ## Star History
57 | 
58 | <div align="center">
59 |   <img src="https://api.star-history.com/svg?repos=Evan-wyl/Robot-Learning&type=Date)](https://star-history.com/#Evan-wyl/Robot-Learning&Date" />
60 | </div>
61 | 


--------------------------------------------------------------------------------
/data/bechmarks.md:
--------------------------------------------------------------------------------
1 | ## Bechmarks
2 | 
3 | - [LIBERO](https://github.com/Lifelong-Robot-Learning/LIBERO): Benchmarking Knowledge Transfer in Lifelong Robot Learning.
4 | 


--------------------------------------------------------------------------------
/data/bimanual/benchmarks.md:
--------------------------------------------------------------------------------
1 | ## Benchmarks
2 | 
3 | - [Bi-DexHands](https://arxiv.org/abs/2206.08686): A simulator that involves two dexterous hands with tens of bimanual manipulation tasks and thousands of target objects
4 | - [BiGym](https://chernyadev.github.io/bigym/): A Demo-Driven Mobile Bi-Manual Manipulation Benchmark.
5 | - [RoboTwin](https://robotwin-benchmark.github.io/early-version/): Dual-Arm Robot Benchmark with Generative Digital Twins.
6 | - [TACO](https://taco2024.github.io/): Benchmarking Generalizable Bimanual Tool-ACtion-Object Understanding.
7 | - [RoboTwin](https://arxiv.org/abs/2504.13059): Dual-Arm Robot Benchmark with Generative Digital Twins
8 | 
9 | 


--------------------------------------------------------------------------------
/data/bimanual/datasets.md:
--------------------------------------------------------------------------------
 1 | ## Datasets
 2 | 
 3 | - [ALOHA](https://drive.google.com/drive/folders/1FP5eakcxQrsHyiWBRDsMRvUfSxeykiDc): Static and Dynamic manipulation datasets of [low-cost bimanual hardware](https://tonyzhaozh.github.io/aloha/).
 4 | - [Google Scanned Objects](https://research.google/blog/scanned-objects-by-google-research-a-dataset-of-3d-scanned-common-household-items/): A Dataset of 3D-Scanned Common Household Items.
 5 | - [RP1M](https://rp1m.github.io/): A Large-Scale Motion Dataset for Piano Playing with Bimanual Dexterous Robot Hands.
 6 | 
 7 | 
 8 | 
 9 | ### Revelant Resources
10 | 
11 | - [LeRobot](https://huggingface.co/lerobot) aims to provide models, datasets, and tools for real-world robotics in PyTorch.
12 | 


--------------------------------------------------------------------------------
/data/bimanual/simulators.md:
--------------------------------------------------------------------------------
1 | ## Simulators
2 | 
3 | - [RFUniverse](https://sites.google.com/view/rfuniverse): Large-Scale Simulation of Everyday Tasks for Generalist Robots.
4 | - [Humanoid-Gym](https://sites.google.com/view/humanoid-gym/): Reinforcement Learning for Humanoid Robot with Zero-Shot Sim2Real Transfer.


--------------------------------------------------------------------------------
/data/bipedal/humanoid-control/benchmarks.md:
--------------------------------------------------------------------------------
1 | ## Benchmarks
2 | 
3 | - [HumanoidBench](https://humanoid-bench.github.io/): Simulated Humanoid Benchmark for Whole-Body Locomotion and Manipulation.
4 | - [loco-mujoco](https://github.com/robfiras/loco-mujoco): An **imitation learning benchmark** specifically targeted towards **locomotion**. 
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/data/bipedal/humanoid-control/datatsets.md:
--------------------------------------------------------------------------------
1 | ## Datasets
2 | 
3 | - [AMASS](https://amass.is.tue.mpg.de/): a large and varied database of human motion that unifies 15 different optical marker-based mocap datasets by representing them within a common framework and parameterization. 
4 | - [CMU mocap](http://mocap.cs.cmu.edu/): CMU Graphics Lab Motion Capture Database.
5 | - [OmniH2O-6](https://omni.human2humanoid.com/): humanoid whole-body control dataset, containing six everyday tasks, and demonstrate humanoid whole-body skill learning from teleoperated datasets.
6 | - [SFU mocap](https://mocap.cs.sfu.ca/): SFU Motion Capture Database.
7 | 


--------------------------------------------------------------------------------
/data/bipedal/simulated-humanoid-control/datatsets.md:
--------------------------------------------------------------------------------
 1 | ## Datasets
 2 | 
 3 | - [MoCapAct](https://microsoft.github.io/MoCapAct/): A Multi-Task Dataset for Simulated Humanoid Control.
 4 | 
 5 | 
 6 | 
 7 | ### Text to Motion
 8 | 
 9 | - [BABEL](https://arxiv.org/abs/2106.09696): Bodies, Action and Behavior with English Labels
10 | - [TEMOS](https://arxiv.org/abs/2204.14109): Generating diverse human motions from textual descriptions


--------------------------------------------------------------------------------
/data/dexterous/benchmarks.md:
--------------------------------------------------------------------------------
1 | ## Benchmarks
2 | 
3 | - [DexArt](https://arxiv.org/abs/2305.05706): Benchmarking Generalizable Dexterous Manipulation with Articulated Objects.
4 | - [DexMV](https://arxiv.org/abs/2108.05877): Benchmarking complex and generalizable dexterous manipulation.
5 | 


--------------------------------------------------------------------------------
/data/dexterous/datasets.md:
--------------------------------------------------------------------------------
1 | ## Datasets
2 | 
3 | - [DexGraspNet](https://pku-epic.github.io/DexGraspNet/): A Large-Scale Robotic Dexterous Grasp Dataset for General Objects Based on Simulation


--------------------------------------------------------------------------------
/data/manipulation/benchmarks.md:
--------------------------------------------------------------------------------
 1 | ## Benchmarks
 2 | 
 3 | - [ClevrSkills](https://arxiv.org/abs/2411.09052): A benchmark suite for compositional reasoning  in robotics.
 4 | - [FurnitureBench](https://clvrai.github.io/furniture-bench/): Real-World Furniture Assembly Benchmark.
 5 | - [FMB](https://functional-manipulation-benchmark.github.io/): A Functional Manipulation Benchmark for Generalizable Robotic Learning.
 6 | - [ManiSkill-HAB](https://arxiv.org/abs/2412.13211): A Benchmark for Low-Level Manipulation in Home Rearrangement Tasks.
 7 | - [RoboCAS-v0](https://github.com/notFoundThisPerson/RoboCAS-v0):  Benchmark for Robotic Manipulation in Complex Object Arrangement Scenarios.
 8 | - [RLBech](https://sites.google.com/view/rlbench): An ambitious large-scale benchmark and learning environment featuring 100 unique, hand-design tasks, tailored to facilitate research in a number of vision-guided manipulation research areas.
 9 | 
10 | ---
11 | 
12 | :speaker: ***If you would like to acquire benchmarks of vision-language-action model, please click [here](https://github.com/Evan-wyl/robotlearning/tree/master/fm/data/vla).***
13 | 


--------------------------------------------------------------------------------
/data/manipulation/datasets.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | - [BC-Z](https://sites.google.com/view/bc-z/home): A large-scale VR-teleoperated dataset of demonstrations for 100 manipulation tasks, and train a convolutional neural network to imitate closed-loop actions from RGB pixel observations.
 4 | - [Block-Push](https://diffusion-policy.cs.columbia.edu/data/training/):  Pushing two blocks into two squares in any order.
 5 | - [DROID](https://droid-dataset.github.io/): A diverse robot manipulation dataset with 76k demonstration trajectories or 350h of interaction data, collected across 564 scenes and 86 tasks by 50 data collectors in North America, Asia, and Europe over the course of 12 months.
 6 | - [Franka Kitchen](https://robotics.farama.org/envs/franka_kitchen/) or [Kitchen](https://diffusion-policy.cs.columbia.edu/data/training/): Multitask environment in which a 9-DoF Franka robot is placed in a kitchen containing several common household items. 
 7 | - [Grasp-Anything](https://arxiv.org/abs/2309.09818): Large-scale Grasp Dataset from Foundation Models.
 8 | - [MIME](https://sites.google.com/view/mimedataset):  The largest available robotic-demonstration dataset (MIME) that contains 8260 human-robot demonstrations over 20 different robotic tasks .
 9 | - [MimicGen](https://mimicgen.github.io/): A system for automatically synthesizing large-scale, rich datasets from only a small number of human demonstrations by adapting them to new contexts.
10 | - [Push-T](https://diffusion-policy.cs.columbia.edu/data/training/): Pushing a T shaped block (gray) to a fixed target (red) with a circular and 50 environment initializations.
11 | - [RoboMimic](https://robomimic.github.io/docs/datasets/overview.html): A large-scale, diverse collection of task demonstrations spanning multiple human demonstrators of varying quality, multiple robot manipulation tasks of varying difficulty, and both simulated and real data.
12 | - [Robo Turk](https://roboturk.stanford.edu/): A Crowdsourcing Platform for Robotic Skill Learning through Imitation.
13 | - [RoboNet](https://www.robonet.wiki/): An open database for sharing robotic experience, which provides an initial pool of 15 million video frames, from 7 different robot platforms, and study how it can be used to learn generalizable models for vision-based robotic manipulation.
14 | - [RH20T](https://arxiv.org/abs/2307.00595): A Comprehensive Robotic Dataset for Learning Diverse Skills in One-Shot
15 | 
16 | ### Revelant Resources
17 | 
18 | - [LeRobot](https://huggingface.co/lerobot) aims to provide models, datasets, and tools for real-world robotics in PyTorch.
19 | 
20 | ---
21 | 
22 | :speaker: ***If you would like to acquire datasets of vision-language-action model, please click [here](https://github.com/Evan-wyl/robotlearning/tree/master/fm/data/vla).***
23 | 
24 | 


--------------------------------------------------------------------------------
/data/manipulation/deformable-object-manipulation/benchmarks.md:
--------------------------------------------------------------------------------
1 | ## Benchmarks
2 | 
3 | - [PlasticineLab](https://plasticinelab.csail.mit.edu/): A Soft-Body Manipulation Benchmark with Differentiable Physics.
4 | - [SoftGym](https://sites.google.com/view/softgym): Benchmarking Deep Reinforcement Learning for Deformable Object Manipulation.


--------------------------------------------------------------------------------
/data/quadruped/benchmarks.md:
--------------------------------------------------------------------------------
1 | ## Benchmarks
2 | 
3 | - [Barkour](https://sites.google.com/view/barkour): Benchmarking Animal-level Agility with Quadruped Robots.
4 | - [loco-mujoco](https://github.com/robfiras/loco-mujoco): Imitation learning benchmark focusing on complex locomotion tasks using MuJoCo.
5 | 
6 | 


--------------------------------------------------------------------------------
/data/reinforcement-learning/benchmarks.md:
--------------------------------------------------------------------------------
 1 | ## Benchmarks
 2 | 
 3 | - [CoinRun](https://arxiv.org/abs/1812.02341): Benchmarking and Quantifying Generalization in Reinforcement Learning.
 4 | - [Crafter](https://danijar.com/project/crafter/): An open world survival game with visual inputs that evaluates a wide range of general abilities within a single environment.
 5 | - [Meta-World](https://meta-world.github.io/): A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning.
 6 | - [Procedural](https://arxiv.org/abs/1912.01588):  A suite of 16 procedurally generated game-like environments designed to benchmark both sample efficiency and generalization in reinforcement learning.
 7 | 
 8 | 
 9 | 
10 | ### Vision Reinforcement Learning
11 | 
12 | - [RL-ViGen](https://gemcollector.github.io/RL-ViGen/):  A Reinforcement Learning Benchmark for Visual Generalization.
13 | 
14 | 
15 | 
16 | ### Offline Reinforcement Learning
17 | 
18 | - [D4RL](https://sites.google.com/view/d4rl-anonymous/): A collection of benchmarks and datasets for offline reinforcement learning.
19 | - [D5RL](https://sites.google.com/view/d5rl/): Diverse Datasets for Data-Driven Deep Reinforcement Learning.
20 | 
21 | 


--------------------------------------------------------------------------------
/data/simulators.md:
--------------------------------------------------------------------------------
 1 | ## Simulators
 2 | 
 3 | - [DM-Control](https://github.com/google-deepmind/dm_control): Google DeepMind's software stack for physics-based simulation and Reinforcement Learning environments, using MuJoCo physics.
 4 | - [Gym-Gazebo2](https://github.com/AcutronicRobotics/gym-gazebo2): a toolkit for developing and comparing reinforcement learning algorithms using ROS 2 and Gazebo.
 5 | - [Issac Gym](https://github.com/NVIDIA-Omniverse/IsaacGymEnvs): Isaac Gym Reinforcement Learning Environments.
 6 | - [Legged Gym](https://github.com/leggedrobotics/legged_gym): Isaac Gym Environments for Legged Robots.
 7 | - [MuJoCo](https://mujoco.org/)  is a free and open source physics engine that aims to facilitate research and development in robotics, biomechanics, graphics and animation, and other areas where fast and accurate simulation is needed.
 8 | - [Raisim](https://raisim.com/) RaiSim is a cross-platform multi-body physics engine for robotics and AI.
 9 | 
10 | - [RoboCasa](https://github.com/robocasa/robocasa): Large-Scale Simulation of Everyday Tasks for Generalist Robots.
11 | 
12 | - [RoboGen](https://github.com/Genesis-Embodied-AI/RoboGen): Towards Unleashing Infinite Data for Automated Robot Learning via Generative Simulation.
13 | 
14 | - [RFUniverse](https://sites.google.com/view/rfuniverse): A Multiphysics Simulation Platform for Embodied AI.
15 | 
16 | - [UBSoft](https://arxiv.org/abs/2411.12711): A Simulation Platform for Robotic Skill Learning in Unbounded Soft Environments
17 | 
18 | 
19 | 
20 | ### Task Planning in Manipulation
21 | 
22 | - [MuBlE](https://arxiv.org/abs/2503.02834): MuJoCo and Blender simulation Environment and Benchmark for Task Planning in Robot Manipulation
23 | 
24 | 
25 | 
26 | ### Tactile Simulation
27 | 
28 | - [TacEx](https://arxiv.org/abs/2411.04776): GelSight Tactile Simulation in Isaac Sim -- Combining Soft-Body and Visuotactile Simulators
29 | 
30 | 
31 | 
32 | 
33 | ### Relevant Resources
34 | 
35 | - [Simulately](https://github.com/geng-haoran/Simulately): A universal summary of current robotics simulators.
36 | - [Transic-Envs](https://github.com/transic-robot/transic-envs) is a collection of simulation environments built on [IsaacGym](https://developer.nvidia.com/isaac-gym) to support sim-to-real transfer of contact-rich robotic arm manipulation tasks.
37 | 


--------------------------------------------------------------------------------
/fm/README.md:
--------------------------------------------------------------------------------
 1 | # Foundation Model
 2 | 
 3 | [![Awesome](https://awesome.re/badge.svg)](https://awesome.re) [![MIT License](https://img.shields.io/badge/license-MIT-green.svg)](https://opensource.org/licenses/MIT) [![LICENSE](https://img.shields.io/badge/license-Anti%20996-blue.svg)](https://github.com/996icu/996.ICU/blob/master/LICENSE)
 4 | 
 5 | Papers, codes, datasets, tasks, applications, tutorials.
 6 | 
 7 | **Widely used by top conferences, journals and workshops:**
 8 | 
 9 | - Conferences: [[ICML](https://icml.cc/)] [[CVPR](https://cvpr.thecvf.com/)] 
10 | - Workshops: : [[MFM-EAI](https://icml-mfm-eai.github.io/)] [[Embodied-AI](https://embodied-ai.org/)]
11 | 
12 | 
13 | 
14 | ## 0.Research Areas and Papers
15 | 
16 | :speaker: ***Must-read [papers](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers) on Foundation Model***
17 | 
18 | - [Vision Language Action Model](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/01-Vision-Language-Action)
19 | - [Robotic Transformer](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/02-Robotic-Transformer)
20 | - [Large Language Models' Application](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/03-LLM-Application)
21 | - [Multi-Modal Large Models' Application](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/04-Multimodal-Large-Model-Application)
22 | 
23 | - [Embodied AI](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/06-Embodiment-AI)
24 | - [World Model and its Application](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/07-World-Models-and-Application)
25 | - [Generative AI's Application](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/08-Generative-AI-Application)
26 | - [Vision Foundation Models' Application](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/09-Vision-Foundation-Model-Application)
27 | - [Navigation](https://github.com/Evan-wyl/robotlearning/tree/master/fm/papers/10-Navigation)
28 | 
29 | 
30 | 
31 | ## 1.Datasets and Benchmarks
32 | 
33 | Please see [Here](https://github.com/Evan-wyl/Robot-Learning/tree/master/fm/data) for the popular robot learning **datasets and benchmark** results.
34 | 
35 | ------
36 | 
37 | ***Copyright notice***
38 | 
39 | > ***[Notes]This Github repo can be used by following the corresponding licenses. I want to emphasis that it may contain some PDFs or thesis, which were downloaded by me and can only be used for academic purposes. The copyrights of these materials are owned by corresponding publishers or organizations. All this are for better adademic research. If any of the authors or publishers have concerns, please contact me to delete or replace them.***


--------------------------------------------------------------------------------
/fm/data/benchmarks.md:
--------------------------------------------------------------------------------
 1 | ## Benchmarks
 2 | 
 3 | - [ALFRED](https://askforalfred.com/): A Benchmark for Interpreting Grounded Instructions for Everyday Tasks.
 4 | - [ARNOLD](https://arnold-benchmark.github.io/): A Benchmark for Language-Grounded Task Learning With Continuous States in Realistic 3D Scenes.
 5 | - [BEHAVIOR](https://behavior.stanford.edu/): Benchmark for Everyday Household Activities in Virtual, Interactive, and Ecological Environments
 6 | - [KitchenShift](https://openreview.net/pdf?id=DdglKo8hBq0): Evaluating Zero-Shot Generalization of Imitation-Based Policy Learning Under Domain Shifts.
 7 | - [LoTa-Bench](https://github.com/lbaa2022/LLMTaskPlanning): Benchmarking Language-oriented Task Planners for Embodied Agents.
 8 | - [ManiSkill2](https://maniskill2.github.io/): A Unified Benchmark for Generalizable Manipulation Skills.
 9 | - [VLMbench](https://sites.google.com/ucsc.edu/vlmbench/home):  A Compositional Benchmark for Vision-and-Language Manipulation.
10 | 
11 | 


--------------------------------------------------------------------------------
/fm/data/datasets.md:
--------------------------------------------------------------------------------
1 | ## Datasets
2 | 
3 | - [Ego4D](https://ego4d-data.org/): A massive-scale, egocentric dataset and benchmark suite collected across 74 worldwide locations and 9 countries, with over **3,670** hours of daily-life activity video.
4 | - [Language-Table](https://interactive-language.github.io/): comprising nearly 600,000 language-labeled trajectories, an order of magnitude larger than prior available datasets.
5 | - [Something-Somthing v2](https://developer.qualcomm.com/software/ai-datasets/something-something): A collection of 220,847 labeled video clips of humans performing pre-defined, basic actions with everyday objects.
6 | 
7 | 


--------------------------------------------------------------------------------
/fm/data/eai/benchmarks.md:
--------------------------------------------------------------------------------
1 | ## Benchmarks
2 | 
3 | - [ReALFRED](https://twoongg.github.io/projects/realfred/): An Embodied Instruction Following Benchmark in Photo-Realistic Environments
4 | 
5 | - [OpenEQA](https://open-eqa.github.io/): Embodied Question Answering in the Era of Foundation Models


--------------------------------------------------------------------------------
/fm/data/eai/datasets.md:
--------------------------------------------------------------------------------
1 | ## Datasets
2 | 
3 | - [ARIO](https://imaei.github.io/project_pages/ario/): A New Standard and Unified Dataset for Versatile, General-Purpose Embodied Agents.


--------------------------------------------------------------------------------
/fm/data/eai/simulators.md:
--------------------------------------------------------------------------------
1 | ## Simulators
2 | 
3 | - [ManiSkill3](https://arxiv.org/abs/2410.00425): GPU Parallelized Robotics Simulation and Rendering for Generalizable Embodied AI


--------------------------------------------------------------------------------
/fm/data/multimodal-understanding-and-reasoning/benchmarks.md:
--------------------------------------------------------------------------------
 1 | ## Benchmarks
 2 | 
 3 | ### Spatial Understanding and Reasoning
 4 | 
 5 | - [VSI-Bench](https://arxiv.org/abs/2412.14171): video-based visual-spatial intelligence benchmark
 6 | - [PhysBench](https://arxiv.org/abs/2501.16411): Benchmarking and Enhancing Vision-Language Models for Physical World Understanding
 7 | 
 8 | 
 9 | 
10 | ### Video Understanding
11 | 
12 | - [Mmbench-video](https://arxiv.org/abs/2406.14515): A long-form multi-shot benchmark for holistic video understanding
13 | - [Video-MME](https://arxiv.org/abs/2405.21075): The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis
14 | - [MVBench](https://arxiv.org/abs/2311.17005): A Comprehensive Multi-modal Video Understanding Benchmark
15 | - [EgoSchema](https://arxiv.org/abs/2308.09126): A Diagnostic Benchmark for Very Long-form Video Language Understanding


--------------------------------------------------------------------------------
/fm/data/simulators.md:
--------------------------------------------------------------------------------
1 | ## Simulators
2 | 
3 | - [iGibson](https://svl.stanford.edu/igibson/): Interactive Simulation of Large Scale Virtualized Realistic Scenes for Robot Learning.
4 | - [Habitat](https://aihabitat.org/): An Embodied AI simulator for studying human-robot interaction at scale.
5 | - [RLBench](https://sites.google.com/view/rlbench): The Robot Learning Benchmark & Learning Environment.
6 | - [RoboTHOR](https://ai2thor.allenai.org/robothor/): an environment within the AI2-THOR framework, designed to develop embodied AI agents.
7 | - [ThreeDWorld (TDW)](https://www.threedworld.org/#about): A High-Fidelity, Multi-Modal Platform for Interactive Physical Simulation.
8 | - [VirtualHome](http://virtual-home.org/): A Multi-Agent Household Simulator.
9 | 


--------------------------------------------------------------------------------
/fm/data/vla/benchmarks.md:
--------------------------------------------------------------------------------
1 | ## Benchmarks
2 | 
3 | - [Calvin](http://calvin.cs.uni-freiburg.de/): A Benchmark for Language-conditioned Policy Learning for Long-horizon Robot Manipulation Tasks.
4 | 
5 | - [VIMA-Bench](https://github.com/vimalabs/VIMABench): Benchmark for Multimodal Robot Learning.
6 | 


--------------------------------------------------------------------------------
/fm/data/vla/datasets.md:
--------------------------------------------------------------------------------
1 | ## Datasets
2 | 
3 | - [BridgeData V2](https://rail-berkeley.github.io/bridgedata/): A large and diverse dataset of robotic manipulation behaviors designed to facilitate research in scalable robot learning.
4 | - [Open X-Embodiment Dataset](https://robotics-transformer-x.github.io/): The largest open-source real robot dataset to date. It contains 1M+ real robot trajectories spanning 22 robot embodiments, from single robot arms to bi-manual robots and quadrupeds.
5 | - [RoboSet](https://robopen.github.io/): Diverse multi-skill multi-task multi-modal dataset.
6 | - [RT-1](https://robotics-transformer1.github.io/): A large dataset of real-world robotic experiences that consists of over 130k episodes, which contain over 700 tasks, and was collected with a fleet of 13 robots over 17 months.
7 | - [RH20T](https://rh20t.github.io/): A Comprehensive Robotic Dataset for Learning Diverse Skills in One-Shot.
8 | - [VIMA-Data](https://huggingface.co/datasets/VIMA/VIMA-Data): This is the official dataset used to train general robot manipulation agents with multimodal prompts.


--------------------------------------------------------------------------------
/fm/papers/00-Latest/CVPR2025.md:
--------------------------------------------------------------------------------
1 | ## CVPR2025
2 | 
3 | [2025] [Multi-Step Guided Diffusion for Image Restoration on Edge Devices: Toward Lightweight Perception in Embodied AI](https://arxiv.org/abs/2506.07286)


--------------------------------------------------------------------------------
/fm/papers/00-Latest/CoRL2025.md:
--------------------------------------------------------------------------------
1 | ## CoRL2025
2 | 
3 | [2025] [OG-VLA: 3D-Aware Vision Language Action Model via Orthographic Image Generation](https://arxiv.org/abs/2506.01196)


--------------------------------------------------------------------------------
/fm/papers/00-Latest/ICML2025.md:
--------------------------------------------------------------------------------
1 | ## ICML2025
2 | 
3 | [2025] [Teaching Physical Awareness to LLMs through Sounds](https://arxiv.org/abs/2506.08524)
4 | 
5 | [2025] [General agents need world models](https://arxiv.org/abs/2506.01622)


--------------------------------------------------------------------------------
/fm/papers/00-Latest/spatial-ai.md:
--------------------------------------------------------------------------------
1 | ## Spatial AI
2 | 
3 | [2025] [Towards Visuospatial Cognition via Hierarchical Fusion of Visual Experts](https://arxiv.org/abs/2505.12363)


--------------------------------------------------------------------------------
/fm/papers/00-Latest/vla.md:
--------------------------------------------------------------------------------
 1 | ## Vision Language Action
 2 | 
 3 | [2025] [ChatVLA-2: Vision-Language-Action Model with Open-World Embodied Reasoning from Pretrained Knowledge](https://arxiv.org/abs/2505.21906)
 4 | 
 5 | [2025] [Interactive Post-Training for Vision-Language-Action Models](https://arxiv.org/abs/2505.17016)
 6 | 
 7 | [2025] [UniVLA: Learning to Act Anywhere with Task-centric Latent Actions](https://arxiv.org/abs/2505.06111)
 8 | 
 9 | [2025] [Imagine, Verify, Execute: Memory-Guided Agentic Exploration with Vision-Language Models](https://arxiv.org/abs/2505.07815)
10 | 
11 | [2025] [Pixel Motion as Universal Representation for Robot Control](https://arxiv.org/abs/2505.07817)
12 | 
13 | [2025] [π0.5: a Vision-Language-Action Model with Open-World Generalization](https://arxiv.org/abs/2504.16054)
14 | 
15 | [2025] [SAFE: Multitask Failure Detection for Vision-Language-Action Models](https://arxiv.org/abs/2506.09937)
16 | 
17 | [2025] [SwitchVLA: Execution-Aware Task Switching for Vision-Language-Action Models](https://arxiv.org/abs/2506.03574)


--------------------------------------------------------------------------------
/fm/papers/00-Latest/world-model.md:
--------------------------------------------------------------------------------
 1 | ### World Model
 2 | 
 3 | [2025] [WorldEval: World Model as Real-World Robot Policies Evaluator](https://arxiv.org/abs/2505.19017)
 4 | 
 5 | [2025] [FLARE: Robot Learning with Implicit World Modeling](https://arxiv.org/abs/2505.15659)
 6 | 
 7 | [2025] [FlowDreamer: A RGB-D World Model with Flow-based Motion Representations for Robot Manipulation](https://arxiv.org/abs/2505.10075)
 8 | 
 9 | [2025] [TesserAct: Learning 4D Embodied World Models](https://arxiv.org/abs/2504.20995)
10 | 
11 | 
12 | 
13 | ### Benchmark
14 | 
15 | [2025] [EWMBench: Evaluating Scene, Motion, and Semantic Quality in Embodied World Models](https://arxiv.org/abs/2505.09694)


--------------------------------------------------------------------------------
/fm/papers/00-Surveys/foundation-model.md:
--------------------------------------------------------------------------------
 1 | ## Foundation Models
 2 | 
 3 | [2021] [On the Opportunities and Risks of Foundation Models](https://arxiv.org/abs/2108.07258)
 4 | 
 5 | [2023] [Large language models for human–robot interaction: A review](https://www.sciencedirect.com/science/article/pii/S2667379723000451)
 6 | 
 7 | [2023] [Foundation Models in Robotics: Applications, Challenges, and the Future](https://arxiv.org/abs/2312.07843)
 8 | 
 9 | [2023] [Robot Learning in the Era of Foundation Models: A Survey](https://arxiv.org/abs/2311.14379)
10 | 
11 | [2023] [Toward General-Purpose Robots via Foundation Models: A Survey and Meta-Analysis](https://arxiv.org/abs/2312.08782)
12 | 
13 | [2023] [Foundation Models for Decision Making: Problems, Methods, and Opportunities](https://arxiv.org/abs/2303.04129)
14 | 
15 | [2024] [Large Language Models for Robotics: Opportunities, Challenges, and Perspectives](https://arxiv.org/abs/2401.04334)
16 | 
17 | [2024] [Real-World Robot Applications of Foundation Models: A Review](https://arxiv.org/abs/2402.05741)
18 | 
19 | [2024] [A Survey on Vision-Language-Action Models for Embodied AI](https://arxiv.org/abs/2405.14093)
20 | 
21 | [2024] [Aligning Cyber Space with Physical World: A Comprehensive Survey on Embodied AI](https://arxiv.org/abs/2407.06886)


--------------------------------------------------------------------------------
/fm/papers/00-Surveys/navigation.md:
--------------------------------------------------------------------------------
1 | ## Navigation
2 | 
3 | [2024] [Vision-and-Language Navigation Today and Tomorrow: A Survey in the Era of Foundation Models](https://arxiv.org/abs/2407.07035)


--------------------------------------------------------------------------------
/fm/papers/01-Vision-Language-Action/3d-vla.md:
--------------------------------------------------------------------------------
1 | ## 3D Vision Lanuage Action Model
2 | 
3 | [2024] [3D-VLA: A 3D Vision-Language-Action Generative World Model](https://arxiv.org/abs/2403.09631)
4 | 
5 | [2025] [SpatialVLA: Exploring Spatial Representations for Visual-Language-Action Model](https://arxiv.org/abs/2501.15830)
6 | 
7 | [2025] [FP3: A 3D Foundation Policy for Robotic Manipulation](https://arxiv.org/abs/2503.08950)
8 | 


--------------------------------------------------------------------------------
/fm/papers/01-Vision-Language-Action/general-robotic-manipulation.md:
--------------------------------------------------------------------------------
1 | ## General Robotic Manipulation
2 | 
3 | [2025] [RoBridge: A Hierarchical Architecture Bridging Cognition and Execution for General Robotic Manipulation](https://arxiv.org/abs/2505.01709)


--------------------------------------------------------------------------------
/fm/papers/01-Vision-Language-Action/vla-for-bimanual.md:
--------------------------------------------------------------------------------
1 | ## VLA for Bimanual
2 | 
3 | [2024] [RDT-1B: a Diffusion Foundation Model for Bimanual Manipulation](https://arxiv.org/abs/2410.07864)
4 | 
5 | [2024] [Towards Synergistic, Generalized, and Efficient Dual-System for Robotic Manipulation](https://arxiv.org/abs/2410.08001)
6 | 
7 | [2025] [AgiBot World Colosseo: A Large-scale Manipulation Platform for Scalable and Intelligent Embodied Systems](https://opendrivelab.com/assets/file/AgiBot_World_Colosseo.pdf)


--------------------------------------------------------------------------------
/fm/papers/01-Vision-Language-Action/vla-for-mobile-manipulation.md:
--------------------------------------------------------------------------------
1 | ## VLA for Mobile Manipulation
2 | 
3 | [2025] [MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation](https://arxiv.org/abs/2503.13446)


--------------------------------------------------------------------------------
/fm/papers/01-Vision-Language-Action/vla.md:
--------------------------------------------------------------------------------
 1 | ## Vision Language Action
 2 | 
 3 | [2022] [VIMA: General Robot Manipulation with Multimodal Prompts](https://arxiv.org/abs/2210.03094)
 4 | 
 5 | [2024] [Octo: An Open-Source Generalist Robot Policy](https://arxiv.org/abs/2405.12213)
 6 | 
 7 | [2024] [OpenVLA: An Open-Source Vision-Language-Action Model](https://arxiv.org/abs/2406.09246)
 8 | 
 9 | [2024] [TinyVLA: Towards Fast, Data-Efficient Vision-Language-Action Models for Robotic Manipulation](https://arxiv.org/abs/2409.12514)
10 | 
11 | [2024] [RDT-1B: a Diffusion Foundation Model for Bimanual Manipulation](https://arxiv.org/abs/2410.07864)
12 | 
13 | [2024] [Scaling Diffusion Policy in Transformer to 1 Billion Parameters for Robotic Manipulation](https://arxiv.org/abs/2409.14411)
14 | 
15 | [2024] [π0: A Vision-Language-Action Flow Model for General Robot Control](https://www.physicalintelligence.company/download/pi0.pdf)
16 | 
17 | [2024] [DeeR-VLA: Dynamic Inference of Multimodal Large Language Models for Efficient Robot Execution](https://arxiv.org/abs/2411.02359)
18 | 
19 | [2024] [Steering Your Generalists: Improving Robotic Foundation Models via Value Guidance](https://arxiv.org/abs/2410.13816)
20 | 
21 | [2024] [CogACT: A Foundational Vision-Language-Action Model for Synergizing Cognition and Action in Robotic Manipulation](https://arxiv.org/abs/2411.19650)
22 | 
23 | [2024] [GRAPE: Generalizing Robot Policy via Preference Alignment](https://arxiv.org/abs/2411.19309)
24 | 
25 | [2024] [Diffusion-VLA: Scaling Robot Foundation Models via Unified Diffusion and Autoregression](https://arxiv.org/abs/2412.03293)
26 | 
27 | [2024] [Towards Generalist Robot Policies: What Matters in Building Vision-Language-Action Models](https://arxiv.org/abs/2412.14058)
28 | 
29 | [2024] [Towards Generalist Robot Policies: What Matters in Building Vision-Language-Action Models](https://arxiv.org/abs/2412.14058)
30 | 
31 | [2025] [VLAS: Vision-Language-Action Model With Speech Instructions For Customized Robot Manipulation](https://arxiv.org/abs/2502.13508)
32 | 
33 | [2025] [Fine-Tuning Vision-Language-Action Models: Optimizing Speed and Success](https://arxiv.org/abs/2502.19645)
34 | 
35 | [2025] [ObjectVLA: End-to-End Open-World Object Manipulation Without Demonstration](https://arxiv.org/abs/2502.19250)
36 | 
37 | [2025] [GEVRM: Goal-Expressive Video Generation Model For Robust Visual Manipulation](https://arxiv.org/abs/2502.09268)
38 | 
39 | [2025] [CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models](https://arxiv.org/abs/2503.22020)
40 | 
41 | [2025] [DyWA: Dynamics-adaptive World Action Model for Generalizable Non-prehensile Manipulation](https://arxiv.org/abs/2503.16806)
42 | 
43 | [2025] [CrayonRobo: Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation](https://arxiv.org/abs/2505.02166)
44 | 
45 | [2025] [Interleave-VLA: Enhancing Robot Manipulation with Interleaved Image-Text Instructions](https://arxiv.org/abs/2505.02152)
46 | 
47 | 
48 | 
49 | ### Latent Action
50 | 
51 | [2024] [Latent Action Pretraining from Videos](https://arxiv.org/abs/2410.11758)
52 | 
53 | [2024] [Moto: Latent Motion Token as the Bridging Language for Robot Manipulation](https://arxiv.org/abs/2412.04445)
54 | 
55 | 
56 | 
57 | ### Tokenization
58 | 
59 | [2025] [FAST: Efficient Action Tokenization for Vision-Language-Action Models](https://arxiv.org/abs/2501.09747)


--------------------------------------------------------------------------------
/fm/papers/02-Robotic-Transformer/robotic-graph-transformer.md:
--------------------------------------------------------------------------------
 1 | ## Robotic Graph Transformer
 2 | 
 3 | [2022] [MetaMorph: Learning Universal Controllers with Transformers](https://arxiv.org/abs/2408.06316)
 4 | 
 5 | [2024] [Body Transformer: Leveraging Robot Embodiment for Policy Learning](https://arxiv.org/abs/2408.06316)
 6 | 
 7 | 
 8 | 
 9 | ### Graph Transformer
10 | 
11 | [2020] [On the Bottleneck of Graph Neural Networks and its Practical Implications](https://arxiv.org/abs/2006.05205)
12 | 
13 | [2021] [Do Transformers Really Perform Bad for Graph Representation?](https://arxiv.org/abs/2408.06316)
14 | 
15 | [2022] [GRPE: Relative Positional Encoding for Graph Transformer](https://arxiv.org/abs/2408.06316)
16 | 
17 | [2024] [Masked Attention is All You Need for Graphs](https://arxiv.org/abs/2402.10793)
18 | 


--------------------------------------------------------------------------------
/fm/papers/02-Robotic-Transformer/robotic-transformer.md:
--------------------------------------------------------------------------------
 1 | ## Robotic Transformer
 2 | 
 3 | [2022] [MetaMorph: Learning Universal Controllers with Transformers](https://arxiv.org/abs/2203.11931)
 4 | 
 5 | [2022] [RT-1: Robotics Transformer for Real-World Control at Scale](https://arxiv.org/abs/2212.06817)
 6 | 
 7 | [2022] [PACT: Perception-Action Causal Transformer for Autoregressive Robotics Pre-Training](https://arxiv.org/abs/2209.11133)
 8 | 
 9 | [2022] [Masked Visual Pre-training for Motor Control](https://arxiv.org/abs/2203.06173)
10 | 
11 | [2022] [Real-World Robot Learning with Masked Visual Pre-training](https://arxiv.org/abs/2210.03109)
12 | 
13 | [2023] [RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control](https://arxiv.org/abs/2307.15818)
14 | 
15 | [2023] [Open X-Embodiment: Robotic Learning Datasets and RT-X Models](https://arxiv.org/abs/2310.08864)
16 | 
17 | [2024] [RT-Sketch: Goal-Conditioned Imitation Learning from Hand-Drawn Sketches](https://arxiv.org/abs/2403.02709)
18 | 
19 | [2024] [RT-H: Action Hierarchies Using Language](https://arxiv.org/abs/2403.01823)
20 | 


--------------------------------------------------------------------------------
/fm/papers/03-LLM-Application/high-level-task-planning.md:
--------------------------------------------------------------------------------
 1 | ## High-Level Task Planning
 2 | 
 3 | [2022] [Do As I Can, Not As I Say: Grounding Language in Robotic Affordances](https://arxiv.org/abs/2204.01691)
 4 | 
 5 | [2023] [NL2TL: Transforming Natural Languages to Temporal Logics using Large Language Models](https://arxiv.org/abs/2305.07766)
 6 | 
 7 | [2023] [AutoTAMP: Autoregressive Task and Motion Planning with LLMs as Translators and Checkers](https://arxiv.org/abs/2306.06531)
 8 | 
 9 | [2023] [VoxPoser: Composable 3D Value Maps for Robotic Manipulation with Language Models](https://arxiv.org/abs/2307.05973)
10 | 
11 | [2023] [Plan Diffuser: Grounding LLM Planners with Diffusion Models for Robotic Manipulation](https://openreview.net/forum?id=2a3sgm5YeX)
12 | 
13 | [2023] [TidyBot: Personalized Robot Assistance with Large Language Models](https://arxiv.org/abs/2305.05658)
14 | 
15 | [2024] [Grounding Language Plans in Demonstrations Through Counterfactual Perturbations](https://arxiv.org/abs/2403.17124)
16 | 
17 | [2024] [Interpreting and learning voice commands with a Large Language Model for a robot system](https://arxiv.org/abs/2407.21512)
18 | 
19 | [2024] [LLM as BT-Planner: Leveraging LLMs for Behavior Tree Generation in Robot Task Planning](https://arxiv.org/abs/2409.10444)
20 | 
21 | [2024] [Human-Object Interaction from Human-Level Instructions](https://arxiv.org/abs/2406.17840)
22 | 
23 | 
24 | 
25 | ### Theory
26 | 
27 | [2022] [Language Models as Zero-Shot Planners: Extracting Actionable Knowledge for Embodied Agents](https://arxiv.org/abs/2201.07207)
28 | 


--------------------------------------------------------------------------------
/fm/papers/03-LLM-Application/llm-based-code-generation.md:
--------------------------------------------------------------------------------
 1 | ## LLM-Based Code Generation
 2 | 
 3 | [2022] [ProgPrompt: Generating Situated Robot Task Plans using Large Language Models](https://arxiv.org/abs/2209.11302)
 4 | 
 5 | [2022] [Code as Policies: Language Model Programs for Embodied Control](https://arxiv.org/abs/2209.07753)
 6 | 
 7 | [2023] [ChatGPT for Robotics: Design Principles and Model Abilities](https://arxiv.org/abs/2306.17582)
 8 | 
 9 | [2024] [RoboScript: Code Generation for Free-Form Manipulation Tasks across Real and Simulation](https://arxiv.org/abs/2402.14623)
10 | 
11 | [2024] [Programming-by-Demonstration for Long-Horizon Robot Tasks](https://dl.acm.org/doi/pdf/10.1145/3632860)


--------------------------------------------------------------------------------
/fm/papers/04-Multimodal-Large-Model-Application/finetuning-mmlm-to-vla.md:
--------------------------------------------------------------------------------
1 | ## Finetuning Multimodal Large Model to VLA
2 | 
3 | [2023] [Vision-Language Foundation Models as Effective Robot Imitators](https://arxiv.org/abs/2311.01378)
4 | 
5 | [2024] [MotIF: Motion Instruction Fine-tuning](https://arxiv.org/abs/2409.10683)
6 | 
7 | [2024] [AHA: A Vision-Language-Model for Detecting and Reasoning Over Failures in Robotic Manipulation](https://arxiv.org/abs/2410.00371)
8 | 
9 | [2025] [DexVLA: Vision-Language Model with Plug-In Diffusion Expert for General Robot Control](https://arxiv.org/abs/2502.05855)


--------------------------------------------------------------------------------
/fm/papers/04-Multimodal-Large-Model-Application/generating-representation.md:
--------------------------------------------------------------------------------
1 | ## Generating Representation
2 | 
3 | [2024] [ReKep: Spatio-Temporal Reasoning of Relational Keypoint Constraints for Robotic Manipulation](https://rekep-robot.github.io/)
4 | 
5 | [2024] [Lift3D Foundation Policy: Lifting 2D Large-Scale Pretrained Models for Robust 3D Robotic Manipulation](https://arxiv.org/abs/2411.18623)


--------------------------------------------------------------------------------
/fm/papers/04-Multimodal-Large-Model-Application/multimal-reasoning-task-planning.md:
--------------------------------------------------------------------------------
 1 | ## Multi-Moal Reasoning and Task Planning
 2 | 
 3 | [2024] [Closed-Loop Open-Vocabulary Mobile Manipulation with GPT-4V](https://arxiv.org/abs/2404.10220)
 4 | 
 5 | [2024] [Towards Open-World Grasping with Large Vision-Language Models](https://arxiv.org/abs/2406.18722)
 6 | 
 7 | [2024] [Keypoint Abstraction using Large Models for Object-Relative Imitation Learning](https://arxiv.org/abs/2410.23254)
 8 | 
 9 | [2025] [SoFar: Language-Grounded Orientation Bridges Spatial Reasoning and Object Manipulation](https://arxiv.org/abs/2502.13143)
10 | 
11 | [2025] [KUDA: Keypoints to Unify Dynamics Learning and Visual Prompting for Open-Vocabulary Robotic Manipulation](https://arxiv.org/abs/2503.10546)
12 | 


--------------------------------------------------------------------------------
/fm/papers/04-Multimodal-Large-Model-Application/vision-audio-tactile.md:
--------------------------------------------------------------------------------
1 | ## Vision Audio Tactile Action
2 | 
3 | [2024] [Hearing Touch: Audio-Visual Pretraining for Contact-Rich Manipulation](https://arxiv.org/abs/2405.08576)
4 | 
5 | 


--------------------------------------------------------------------------------
/fm/papers/06-Embodiment-AI/embodied-ai.md:
--------------------------------------------------------------------------------
 1 | ## Embodied AI
 2 | 
 3 | ### Survey
 4 | 
 5 | [2024] [Aligning Cyber Space with Physical World: A Comprehensive Survey on Embodied AI](https://arxiv.org/abs/2407.06886)
 6 | 
 7 | [2025] [Embodied Intelligent Industrial Robotics: Concepts and Techniques](https://arxiv.org/abs/2505.09305)
 8 | 
 9 | 
10 | 
11 | ### Great Paper
12 | 
13 | [2022] [Language Models as Zero-Shot Planners: Extracting Actionable Knowledge for Embodied Agents](https://arxiv.org/abs/2201.07207)
14 | 
15 | [2022] [Video PreTraining (VPT): Learning to Act by Watching Unlabeled Online Videos](https://arxiv.org/abs/2206.11795)
16 | 
17 | [2022] [MineDojo: Building Open-Ended Embodied Agents with Internet-Scale Knowledge](https://arxiv.org/abs/2206.08853)
18 | 
19 | [2023] [Statler: State-Maintaining Language Models for Embodied Reasoning](https://arxiv.org/abs/2306.17840)
20 | 
21 | [2023] [EmbodiedGPT: Vision-Language Pre-Training via Embodied Chain of Thought](https://arxiv.org/abs/2305.15021)
22 | 
23 | [2023] [Voyager: An Open-Ended Embodied Agent with Large Language Models](https://arxiv.org/abs/2305.16291)
24 | 
25 | [2023] [EmbodiedGPT: Vision-Language Pre-Training via Embodied Chain of Thought](https://arxiv.org/abs/2305.15021)
26 | 
27 | [2023] [ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation](https://arxiv.org/abs/2312.16217)
28 | 
29 | [2024] [Robotic Control via Embodied Chain-of-Thought Reasoning](https://arxiv.org/abs/2407.08693)
30 | 
31 | [2024] [BadRobot: Jailbreaking LLM-based Embodied AI in the Physical World](https://arxiv.org/abs/2407.20242)
32 | 
33 | [2024] [Emma-X: An Embodied Multimodal Action Model with Grounded Chain of Thought and Look-ahead Spatial Reasoning](https://arxiv.org/abs/2412.11974)
34 | 
35 | 
36 | 
37 | ### Fine-Tuning
38 | 
39 | [2024] [Embodied Instruction Following in Unknown Environments](https://arxiv.org/abs/2406.11818)
40 | 


--------------------------------------------------------------------------------
/fm/papers/06-Embodiment-AI/multimodal-understanding-and-reasoning/spatial-understanding-and-reasoning.md:
--------------------------------------------------------------------------------
 1 | ## Spatial Understanding
 2 | 
 3 | [2024] [SpatialBot: Precise Spatial Understanding with Vision Language Models](https://arxiv.org/abs/2406.13642)
 4 | 
 5 | [2024] [SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities](https://arxiv.org/abs/2401.12168)
 6 | 
 7 | [2024] [Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model](https://arxiv.org/abs/2408.00754)
 8 | 
 9 | [2025] [Towards Ambiguity-Free Spatial Foundation Model: Rethinking and Decoupling Depth Ambiguity](https://arxiv.org/abs/2503.06014)
10 | 
11 | 
12 | 
13 | 
14 | 
15 | ## Spatial Reasoning
16 | 
17 | [2024] [Multi-modal Situated Reasoning in 3D Scenes](https://arxiv.org/abs/2409.02389)
18 | 
19 | [2024] [SpatialRGPT: Grounded Spatial Reasoning in Vision Language Models](https://arxiv.org/abs/2406.01584)
20 | 
21 | [2024] [TopViewRS: Vision-Language Models as Top-View Spatial Reasoners](https://arxiv.org/abs/2406.02537)
22 | 
23 | [2024] [Mind's Eye of LLMs: Visualization-of-Thought Elicits Spatial Reasoning in Large Language Models](https://arxiv.org/abs/2404.03622)
24 | 
25 | [2024] [Sparkle: Mastering Basic Spatial Capabilities in Vision Language Models Elicits Generalization to Composite Spatial Reasoning](https://arxiv.org/abs/2410.16162)
26 | 
27 | 


--------------------------------------------------------------------------------
/fm/papers/06-Embodiment-AI/multimodal-understanding-and-reasoning/video-understanding.md:
--------------------------------------------------------------------------------
1 | ## Video Understanding
2 | 
3 | [2024] [Hourvideo: 1-hour video-language understanding](https://arxiv.org/abs/2412.14171)
4 | 
5 | [2024] [TempCompass: Do Video LLMs Really Understand Videos?](https://arxiv.org/abs/2403.00476)
6 | 
7 | 


--------------------------------------------------------------------------------
/fm/papers/06-Embodiment-AI/multimodal-understanding-and-reasoning/vision-understanding.md:
--------------------------------------------------------------------------------
1 | ## Vision Understanding
2 | 
3 | [2024] [MetaMorph: Multimodal Understanding and Generation via Instruction Tuning](https://arxiv.org/abs/2412.14164)


--------------------------------------------------------------------------------
/fm/papers/06-Embodiment-AI/platform.md:
--------------------------------------------------------------------------------
1 | ## Platform
2 | 
3 | [2024] [LEGENT: Open Platform for Embodied Agents](https://arxiv.org/abs/2404.18243)


--------------------------------------------------------------------------------
/fm/papers/06-Embodiment-AI/replica-creation.md:
--------------------------------------------------------------------------------
1 | ## Replica Creation
2 | 
3 | [2025] [MetaScenes: Towards Automated Replica Creation for Real-world 3D Scans](https://arxiv.org/abs/2505.02388)


--------------------------------------------------------------------------------
/fm/papers/06-Embodiment-AI/visual-representation-learning.md:
--------------------------------------------------------------------------------
1 | ## Vision Representation Learning
2 | 
3 | [2023] [Where are we in the search for an Artificial Visual Cortex for Embodied Intelligence?](https://arxiv.org/abs/2303.18240)
4 | 
5 | [2024] [DecisionNCE: Embodied Multimodal Representations via Implicit Preference Learning](https://arxiv.org/abs/2402.18137)


--------------------------------------------------------------------------------
/fm/papers/07-World-Models-and-Application/application/application.md:
--------------------------------------------------------------------------------
1 | ## Application
2 | 
3 | [2020] [Planning to Explore via Self-Supervised World Models](https://arxiv.org/abs/2005.05960)
4 | 
5 | [2021] [Discovering and Achieving Goals via World Models](https://arxiv.org/abs/2110.09514)
6 | 
7 | [2024] [TD-MPC2: Scalable, Robust World Models for Continuous Control](https://arxiv.org/abs/2310.16828)
8 | 


--------------------------------------------------------------------------------
/fm/papers/07-World-Models-and-Application/application/video-wm-for-evaluation.md:
--------------------------------------------------------------------------------
1 | ## Video World Model for Evaluation
2 | 
3 | [2025] [Learning Real-World Action-Video Dynamics with Heterogeneous Masked Autoregression](https://arxiv.org/abs/2502.04296)


--------------------------------------------------------------------------------
/fm/papers/07-World-Models-and-Application/application/wm-for-grasping.md:
--------------------------------------------------------------------------------
1 | ## World Model for Application
2 | 
3 | [2024] [World Models for General Surgical Grasping](https://arxiv.org/abs/2405.17940)


--------------------------------------------------------------------------------
/fm/papers/07-World-Models-and-Application/application/wm-for-huamnoid.md:
--------------------------------------------------------------------------------
1 | ## World Model for Humanoid
2 | 
3 | [2023] [Gradient-based Planning with World Models](https://jyothirsv.github.io/pdfs/Gradient_based_Planning.pdf)


--------------------------------------------------------------------------------
/fm/papers/07-World-Models-and-Application/application/wm-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## World Model for Manipulation
2 | 
3 | [2024] [ManiGaussian: Dynamic Gaussian Splatting for Multi-task Robotic Manipulation](https://arxiv.org/abs/2403.08321)
4 | 
5 | [2024] [Multi-Task Interactive Robot Fleet Learning with Visual World Models](https://arxiv.org/abs/2410.22689)
6 | 
7 | [2024] [PIVOT-R: Primitive-Driven Waypoint-Aware World Model for Robotic Manipulation](https://arxiv.org/abs/2410.10394)
8 | 
9 | [2025] [Strengthening Generative Robot Policies through Predictive World Modeling](https://arxiv.org/abs/2502.00622)


--------------------------------------------------------------------------------
/fm/papers/07-World-Models-and-Application/application/wm-for-navigation.md:
--------------------------------------------------------------------------------
1 | ## World Model for Navigation
2 | 
3 | [2024] [Navigation World Models](https://arxiv.org/abs/2412.03572)


--------------------------------------------------------------------------------
/fm/papers/07-World-Models-and-Application/world-model.md:
--------------------------------------------------------------------------------
 1 | ## World Model
 2 | 
 3 | ### Survey
 4 | 
 5 | [2024] [Is Sora a World Simulator? A Comprehensive Survey on General World Models and Beyond](https://arxiv.org/abs/2405.03520)
 6 | 
 7 | 
 8 | 
 9 | ### Models
10 | 
11 | [2015] [On Learning to Think: Algorithmic Information Theory for Novel Combinations of Reinforcement Learning Controllers and Recurrent Neural World Models](https://arxiv.org/abs/1511.09249)
12 | 
13 | [2018] [World Models](https://arxiv.org/abs/1803.10122)
14 | 
15 | [2023] [Structured World Models from Human Videos](https://arxiv.org/abs/2308.10901)
16 | 
17 | [2023] [Transformers are Sample-Efficient World Models](https://arxiv.org/abs/2209.00588)
18 | 
19 | [2024] [Physically Embodied Gaussian Splatting: A Realtime Correctable World Model for Robotics](https://arxiv.org/abs/2406.10788)
20 | 
21 | [2024] [PIVOT-R: Primitive-Driven Waypoint-Aware World Model for Robotic Manipulation](https://arxiv.org/abs/2410.10394)
22 | 
23 | [2024] [WHALE: Towards Generalizable and Scalable World Models for Embodied Decision-making](https://arxiv.org/abs/2411.05619)
24 | 
25 | [2025] [Aether: Geometric-Aware Unified World Modeling](https://arxiv.org/abs/2503.18945)
26 | 
27 | #### Diffusion Model and World Model
28 | 
29 | [2023] [World Models via Policy-Guided Trajectory Diffusion](https://arxiv.org/abs/2312.08533v2)
30 | 
31 | [2024] [Diffusion World Model](https://arxiv.org/abs/2402.03570v1)
32 | 
33 | 
34 | 
35 | ### Motion Dynamic World Model
36 | 
37 | [2025] [Neural Motion Simulator: Pushing the Limit of World Models in Reinforcement Learning](https://arxiv.org/abs/2504.07095)
38 | 
39 | 
40 | 
41 | ### Latent Action
42 | 
43 | [2024] [Latent Action Pretraining from Videos](https://arxiv.org/abs/2410.11758)
44 | 
45 | [2025] [AdaWorld: Learning Adaptable World Models with Latent Actions](https://arxiv.org/abs/2503.18938)
46 | 


--------------------------------------------------------------------------------
/fm/papers/08-Generative-AI-Application/finetuning-image-editing-for-action.md:
--------------------------------------------------------------------------------
1 | ## Finetuning Image Editing Model for Action
2 | 
3 | [2023] [SuSIE: Subgoal Synthesis via Image Editing](https://rail-berkeley.github.io/susie/)


--------------------------------------------------------------------------------
/fm/papers/08-Generative-AI-Application/finetuning-text-to-image-for-action.md:
--------------------------------------------------------------------------------
1 | ## Finetuning Text-to-Image Model for Action
2 | 
3 | [2024] [Generative Image as Action Models](https://arxiv.org/abs/2407.07875)


--------------------------------------------------------------------------------
/fm/papers/08-Generative-AI-Application/generative-planning.md:
--------------------------------------------------------------------------------
1 | ## Generative Planning
2 | 
3 | [2024] [FLIP: Flow-Centric Generative Planning for General-Purpose Manipulation Tasks](https://arxiv.org/abs/2412.08261)


--------------------------------------------------------------------------------
/fm/papers/08-Generative-AI-Application/text-to-video-for-action/combine-video-and-action-generation.md:
--------------------------------------------------------------------------------
1 | ## Combine Video and Action Generation
2 | 
3 | [2025] [Unified Video Action Model](https://arxiv.org/abs/2503.00200)


--------------------------------------------------------------------------------
/fm/papers/08-Generative-AI-Application/text-to-video-for-action/finetuning-video-generation-for-action.md:
--------------------------------------------------------------------------------
1 | ## Finetuning Video Generation for Action
2 | 
3 | [2023] [Unleashing Large-Scale Video Generative Pre-training for Visual Robot Manipulation](https://arxiv.org/abs/2312.13139)
4 | 
5 | [2024] [GR-2: A Generative Video-Language-Action Model with Web-Scale Knowledge for Robot Manipulation](https://gr2-manipulation.github.io/)


--------------------------------------------------------------------------------
/fm/papers/08-Generative-AI-Application/text-to-video-for-action/planning-by-video-generation-model.md:
--------------------------------------------------------------------------------
1 | ## Planning by Video Generation Model
2 | 
3 | [2023] [Compositional Foundation Models for Hierarchical Planning](https://arxiv.org/abs/2309.08587)
4 | 
5 | [2024] [Closed-Loop Visuomotor Control with Generative Expectation for Robotic Manipulation](https://arxiv.org/abs/2409.09016)


--------------------------------------------------------------------------------
/fm/papers/08-Generative-AI-Application/text-to-video-for-action/vision-representation-from-vdm.md:
--------------------------------------------------------------------------------
1 | ## Vision Representation from VDM
2 | 
3 | [2024] [Video Prediction Policy: A Generalist Robot Policy with Predictive Visual Representations](https://arxiv.org/abs/2412.14803)


--------------------------------------------------------------------------------
/fm/papers/09-Vision-Foundation-Model-Application/vfm-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Vision Foundation Model for Manipulation
2 | 
3 | [2024] [Scaling Proprioceptive-Visual Learning with Heterogeneous Pre-trained Transformers](https://arxiv.org/abs/2409.20537)
4 | 
5 | [2024] [STRAP: Robot Sub-Trajectory Retrieval for Augmented Policy Learning](https://arxiv.org/abs/2412.15182)
6 | 
7 | [2024] [Predictive Inverse Dynamics Models are Scalable Learners for Robotic Manipulation](https://arxiv.org/abs/2412.15109)
8 | 
9 | [2025] [SAM2Act: Integrating Visual Foundation Model with A Memory Architecture for Robotic Manipulation](https://arxiv.org/abs/2501.18564)


--------------------------------------------------------------------------------
/fm/papers/10-Navigation/navigation.md:
--------------------------------------------------------------------------------
 1 | ## Navigation
 2 | 
 3 | ### Algorithms
 4 | 
 5 | [2024] [PoliFormer: Scaling On-Policy RL with Transformers Results in Masterful Navigators](https://arxiv.org/abs/2406.20083)
 6 | 
 7 | [2024] [Visual-Geometry GP-based Navigable Space for Autonomous Navigation](https://arxiv.org/abs/2407.06545)
 8 | 
 9 | [2024] [Fast-Slow Test-Time Adaptation for Online Vision-and-Language Navigation](https://openreview.net/pdf?id=Zos5wsaB5r)
10 | 
11 | [2024] [Neural Control Barrier Functions for Safe Navigation](https://arxiv.org/abs/2407.19907)
12 | 
13 | [2024] [TrustNavGPT: Modeling Uncertainty to Improve Trustworthiness of Audio-Guided LLM-Based Robot Navigation](https://arxiv.org/abs/2408.01867)
14 | 
15 | [2024] [FloNa: Floor Plan Guided Embodied Visual Navigation](https://arxiv.org/abs/2412.18335)
16 | 
17 | 
18 | 
19 | ### Vision and Language Navigation
20 | 
21 | [2021] [Episodic Transformer for Vision-and-Language Navigation](https://arxiv.org/abs/2105.06453)
22 | 
23 | [2024] [NavGPT-2: Unleashing Navigational Reasoning Capability for Large Vision-Language Models](https://arxiv.org/abs/2407.12366)
24 | 
25 | [2024] [Navigating Beyond Instructions: Vision-and-Language Navigation in Obstructed Environments](https://arxiv.org/abs/2407.21452)
26 | 
27 | [2024] [ET tu, CLIP? Addressing Common Object Errors for Unseen Environments](https://arxiv.org/abs/2406.17876)
28 | 
29 | [2024] [NaVILA: Legged Robot Vision-Language-Action Model for Navigation](https://arxiv.org/abs/2412.04453)
30 | 
31 | [2024] [Enhancing Multi-Robot Semantic Navigation Through Multimodal Chain-of-Thought Score Collaboration](https://arxiv.org/abs/2412.18292)
32 | 
33 | 
34 | 
35 | ### Offline RL and RL
36 | 
37 | [2023] [GNM: A General Navigation Model to Drive Any Robot](https://sites.google.com/view/drive-any-robot)


--------------------------------------------------------------------------------
/fm/papers/README.md:
--------------------------------------------------------------------------------
1 | ## Must-read papers on Foundation Model
2 | 
3 | 


--------------------------------------------------------------------------------
/fm/papers/instructions.md:
--------------------------------------------------------------------------------
1 | ## Intructions
2 | 
3 | [2025] [Robotic Visual Instruction](https://arxiv.org/abs/2505.00693)


--------------------------------------------------------------------------------
/fm/papers/learning-from-video.md:
--------------------------------------------------------------------------------
 1 | ## Learning from Video
 2 | 
 3 | [2023] [Learning to Act without Actions](https://arxiv.org/abs/2312.10812)
 4 | 
 5 | [2023] [Any-point Trajectory Modeling for Policy Learning](https://arxiv.org/abs/2401.00025)
 6 | 
 7 | [2024] [General Flow as Foundation Affordance for Scalable Robot Learning](https://arxiv.org/abs/2401.11439)
 8 | 
 9 | [2024] [Track2Act: Predicting Point Tracks from Internet Videos enables Diverse Zero-shot Robot Manipulation](https://arxiv.org/abs/2405.01527)
10 | 
11 | [2024] [Grounding Video Models to Actions through Goal Conditioned Exploration](https://arxiv.org/abs/2411.07223)
12 | 
13 | [2024] [VidMan: Exploiting Implicit Dynamics from Video Diffusion Model for Effective Robot Manipulation](https://arxiv.org/abs/2411.09153)
14 | 


--------------------------------------------------------------------------------
/papers/00-Latest/CVPR2025.md:
--------------------------------------------------------------------------------
 1 | ## CVPR2025
 2 | 
 3 | [2025] [Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection](https://arxiv.org/abs/2412.04455)
 4 | 
 5 | [2025] [MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data](https://arxiv.org/abs/2501.04595)
 6 | 
 7 | [2025] [Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction](https://arxiv.org/abs/2504.14588)
 8 | 
 9 | [2025] [Robotic Visual Instruction](https://arxiv.org/abs/2505.00693)
10 | 
11 | 
12 | 
13 | ### Humanoid
14 | 
15 | [2025] [Let Humanoids Hike! Integrative Skill Development on Complex Trails](https://arxiv.org/abs/2505.06218)
16 | 
17 | 
18 | 
19 | ### Manipulation
20 | 
21 | [2025] [Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation](https://arxiv.org/abs/2406.14235)
22 | 
23 | [2025] DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI
24 | 
25 | [2025] [TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation](https://arxiv.org/abs/2503.11423)
26 | 
27 | [2025] [VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation](https://arxiv.org/abs/2503.07135)
28 | 
29 | [2025] PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation
30 | 
31 | [2025] FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation
32 | 
33 | [2025] [Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation](https://arxiv.org/abs/2406.14235)
34 | 
35 | [2025] [OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints](https://arxiv.org/abs/2501.03841)
36 | 
37 | [2025] [Lift3D Foundation Policy: Lifting 2D Large-Scale Pretrained Models for Robust 3D Robotic Manipulation](https://arxiv.org/abs/2411.18623)
38 | 
39 | [2025] [RoboGround: Robotic Manipulation with Grounded Vision-Language Priors](https://arxiv.org/abs/2504.21530)
40 | 
41 | [2025] [Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation](https://arxiv.org/abs/2504.00420)
42 | 
43 | [2025] [RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete](https://arxiv.org/abs/2502.21257)
44 | 
45 | 
46 | 
47 | ### Bimanual
48 | 
49 | [2025] [Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation](https://arxiv.org/abs/2503.10743)
50 | 
51 | [2025] [BG-HOP: A Bimanual Generative Hand-Object Prior](https://arxiv.org/abs/2506.09068)
52 | 
53 | 
54 | 
55 | ### Grasping
56 | 
57 | [2025] [ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping](https://arxiv.org/abs/2504.10857)
58 | 
59 | [2025] [DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness](https://arxiv.org/abs/2503.08257)
60 | 
61 | [2025] [UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping](https://arxiv.org/abs/2412.02699)
62 | 
63 | 
64 | 
65 | ### VLM
66 | 
67 | [2025] [RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics](https://arxiv.org/abs/2411.16537)
68 | 
69 | [2025] [PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability](https://arxiv.org/abs/2503.08481)
70 | 
71 | 
72 | 
73 | ### VLA
74 | 
75 | [2025] [CrayonRobo: Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation](https://arxiv.org/abs/2505.02166)
76 | 
77 | 
78 | 
79 | ### Pose Estimation
80 | 
81 | [2025] [RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training](https://arxiv.org/abs/2411.17662)
82 | 
83 | 
84 | 
85 | ### Benchmark
86 | 
87 | [2025] [RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins](https://arxiv.org/abs/2409.02920)
88 | 
89 | [2025] [RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments](https://arxiv.org/abs/2408.15503)
90 | 
91 | [2025] [CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation](https://arxiv.org/abs/2506.09343)


--------------------------------------------------------------------------------
/papers/00-Latest/CoRL2025.md:
--------------------------------------------------------------------------------
1 | ## CoRL2025
2 | 
3 | [2025] [DiffusionRL: Efficient Training of Diffusion Policies for Robotic Grasping Using RL-Adapted Large-Scale Datasets](https://arxiv.org/abs/2505.18876)


--------------------------------------------------------------------------------
/papers/00-Latest/ICLR2025.md:
--------------------------------------------------------------------------------
  1 | ## ICLR2025
  2 | 
  3 | [2025] [Offline Learning of Controllable Diverse Behaviors](https://arxiv.org/abs/2504.18160)
  4 | 
  5 | [2025] [Dynamic Contrastive Skill Learning with State-Transition Based Skill Clustering and Dynamic Length Adjustment](https://arxiv.org/abs/2504.14805)
  6 | 
  7 | 
  8 | 
  9 | ### RL
 10 | 
 11 | [2025] [Mitigating Information Loss in Tree-Based Reinforcement Learning via Direct Optimization](https://arxiv.org/abs/2408.08761)
 12 | 
 13 | [2025] [UTILITY: Utilizing Explainable Reinforcement Learning to Improve Reinforcement Learning](https://openreview.net/forum?id=Tk1VQDadfL)
 14 | 
 15 | [2025] [ActSafe: Active Exploration with Safety Constraints for Reinforcement Learning](https://arxiv.org/abs/2410.09486)
 16 | 
 17 | [2025] [Neuroplastic Expansion in Deep Reinforcement Learning](https://arxiv.org/abs/2410.07994)
 18 | 
 19 | [2025] [On the Geometry of Reinforcement Learning in Continuous State and Action Spaces](https://arxiv.org/abs/2301.00009)
 20 | 
 21 | [2025] [Flat Reward in Policy Parameter Space Implies Robust Reinforcement Learning](https://openreview.net/forum?id=4OaO3GjP7k)
 22 | 
 23 | [2025] [Highly Efficient Self-Adaptive Reward Shaping for Reinforcement Learning](https://arxiv.org/abs/2408.03029)
 24 | 
 25 | 
 26 | 
 27 | ### Inverse RL
 28 | 
 29 | [2025] [Understanding Constraint Inference in Safety-Critical Inverse Reinforcement Learning](https://openreview.net/forum?id=B2RXwASSpy)
 30 | 
 31 | [2025] [Non-Adversarial Inverse Reinforcement Learning via Successor Feature Matching](https://arxiv.org/abs/2411.07007)
 32 | 
 33 | 
 34 | 
 35 | ### Safe RL
 36 | 
 37 | [2025] [Latent Safety-Constrained Policy Approach for Safe Offline Reinforcement Learning](https://openreview.net/forum?id=bDt5qc7TfO)
 38 | 
 39 | [2025] [HASARD: A Benchmark for Vision-Based Safe Reinforcement Learning in Embodied Agents](https://arxiv.org/abs/2503.08241)
 40 | 
 41 | 
 42 | 
 43 | ### **Goal-Conditioned RL**
 44 | 
 45 | [2025] [Accelerating Goal-Conditioned RL Algorithms and Research](https://arxiv.org/abs/2408.11052)
 46 | 
 47 | 
 48 | 
 49 | ### Offline RL
 50 | 
 51 | [2025] [Efficient Online Reinforcement Learning Fine-Tuning Need Not Retain Offline Data](https://arxiv.org/abs/2412.07762)
 52 | 
 53 | [2025] [Preference Elicitation for Offline Reinforcement Learning](https://arxiv.org/abs/2406.18450)
 54 | 
 55 | [2025] [Behavioral Entropy-Guided Dataset Generation for Offline Reinforcement Learning](https://arxiv.org/abs/2502.04141)
 56 | 
 57 | [2025] [Tackling Data Corruption in Offline Reinforcement Learning via Sequence Modeling](https://arxiv.org/abs/2407.04285)
 58 | 
 59 | [2025] [Value-aligned Behavior Cloning for Offline Reinforcement Learning via Bi-level Optimization](https://openreview.net/forum?id=elTJBP7Fbv)
 60 | 
 61 | [2025] [Constraint-Conditioned Actor-Critic for Offline Safe Reinforcement Learning](https://openreview.net/forum?id=nrRkAAAufl)
 62 | 
 63 | 
 64 | 
 65 | ### In-Context RL
 66 | 
 67 | [2025] [XLand-100B: A Large-Scale Multi-Task Dataset for In-Context Reinforcement Learning](https://arxiv.org/abs/2406.08973)
 68 | 
 69 | 
 70 | 
 71 | ### Multi-Objective RL
 72 | 
 73 | [2025] [Conflict-Averse Gradient Aggregation for Constrained Multi-Objective Reinforcement Learning](https://arxiv.org/abs/2403.00282)
 74 | 
 75 | 
 76 | 
 77 | ### Visual RL
 78 | 
 79 | [2025] [Learning Robust Representations with Long-Term Information for Generalization in Visual Reinforcement Learning](https://openreview.net/forum?id=PDtMrogheZ)
 80 | 
 81 | 
 82 | 
 83 | ###  RL Theory
 84 | 
 85 | [2025] [Reinforcement learning with combinatorial actions for coupled restless bandits](https://arxiv.org/abs/2503.01919)
 86 | 
 87 | [2025] [Online Reinforcement Learning in Non-Stationary Context-Driven Environments](http://openreview.net/forum?id=l6QnSQizmN)
 88 | 
 89 | [2025] [Minimax Optimal Reinforcement Learning with Quasi-Optimism](https://openreview.net/forum?id=i8LCUpKvAz)
 90 | 
 91 | 
 92 | 
 93 | ### Manipulation
 94 | 
 95 | [2025] [Predictive Inverse Dynamics Models are Scalable Learners for Robotic Manipulation](https://arxiv.org/abs/2412.15109)
 96 | 
 97 | [2025] [RoboCat: A Self-Improving Generalist Agent for Robotic Manipulation](https://arxiv.org/abs/2306.11706)
 98 | 
 99 | [2025] [HAMSTER: Hierarchical Action Models for Open-World Robot Manipulation](https://arxiv.org/abs/2502.05485)
100 | 
101 | [2025] [Dream to Manipulate: Compositional World Models Empowering Robot Imitation Learning with Imagination](https://arxiv.org/abs/2412.14957)
102 | 
103 | [2025] [6D Object Pose Tracking in Internet Videos for Robotic Manipulation](https://arxiv.org/abs/2503.10307)
104 | 
105 | [2025] [Learning Geometric Reasoning Networks For Robot Task And Motion Planning](https://openreview.net/forum?id=ajxAJ8GUX4)
106 | 
107 | [2025] [Physics-informed Temporal Difference Metric Learning for Robot Motion Planning](https://openreview.net/forum?id=TOiageVNru)
108 | 
109 | [2025] [Robots Pre-train Robots: Manipulation-Centric Robotic Representation from Large-Scale Robot Dataset](https://arxiv.org/abs/2410.22325)
110 | 
111 | [2025] [Learning View-invariant World Models for Visual Robotic Manipulation](https://openreview.net/forum?id=vJwjWyt4Ed)
112 | 
113 | 
114 | 
115 | ### Datasets
116 | 
117 | [2025] [VTDexManip: A Dataset and Benchmark for Visual-tactile Pretraining and Dexterous Manipulation with Reinforcement Learning](https://openreview.net/forum?id=jf7C7EGw21)
118 | 
119 | [2025] [Data Scaling Laws in Imitation Learning for Robotic Manipulation](https://arxiv.org/abs/2410.18647)
120 | 
121 | [2025] [LLaRA: Supercharging Robot Learning Data for Vision-Language Policy](https://arxiv.org/abs/2406.20095)
122 | 
123 | [2025] [What Matters in Learning from Large-Scale Datasets for Robot Manipulation](https://openreview.net/forum?id=LqhorpRLIm)
124 | 
125 | 
126 | 
127 | ### VLA
128 | 
129 | [2025] [TraceVLA: Visual Trace Prompting Enhances Spatial-Temporal Awareness for Generalist Robotic Policies](https://arxiv.org/abs/2412.10345)
130 | 
131 | [2025] [AHA: A Vision-Language-Model for Detecting and Reasoning Over Failures in Robotic Manipulation](https://arxiv.org/abs/2410.00371)
132 | 
133 | [2025] [VLAS: Vision-Language-Action Model with Speech Instructions for Customized Robot Manipulation](https://arxiv.org/abs/2502.13508)
134 | 
135 | [2025] [Solving New Tasks by Adapting Internet Video Knowledge](https://arxiv.org/abs/2504.15369)


--------------------------------------------------------------------------------
/papers/00-Latest/ICML2025.md:
--------------------------------------------------------------------------------
 1 | ## ICML2025
 2 | 
 3 | [2025] [Efficient Robotic Policy Learning via Latent Space Backward Planning](https://arxiv.org/abs/2505.06861)
 4 | 
 5 | [2025] [Video Prediction Policy: A Generalist Robot Policy with Predictive Visual Representations](https://arxiv.org/abs/2412.14803)
 6 | 
 7 | [2025] [STAR: Learning Diverse Robot Skill Abstractions through Rotation-Augmented Vector Quantization](https://arxiv.org/abs/2506.03863)
 8 | 
 9 | 
10 | 
11 | ### Imitation Learning
12 | 
13 | [2025] [Robot-Gated Interactive Imitation Learning with Adaptive Intervention Mechanism](https://arxiv.org/abs/2506.09176)


--------------------------------------------------------------------------------
/papers/00-Latest/ICRA2025.md:
--------------------------------------------------------------------------------
 1 | ## ICRA2025
 2 | 
 3 | [2025] [Learning IMU Bias with Diffusion Model](https://arxiv.org/abs/2505.11763)
 4 | 
 5 | [2025] [Integrating Learning-Based Manipulation and Physics-Based Locomotion for Whole-Body Badminton Robot Control](https://arxiv.org/abs/2504.17771)
 6 | 
 7 | 
 8 | 
 9 | ### Manipulation
10 | 
11 | [2025] [Streaming Flow Policy: Simplifying diffusion/flow-matching policies by treating action trajectories as flow trajectories](https://arxiv.org/abs/2505.21851)
12 | 
13 | [2025] [Beyond Static Perception: Integrating Temporal Context into VLMs for Cloth Folding](https://arxiv.org/abs/2505.07600)
14 | 
15 | #### Assembly
16 | 
17 | [2025] [Learning the Contact Manifold for Accurate Pose Estimation During Peg-in-Hole Insertion of Complex Geometries](https://arxiv.org/abs/2505.19215)
18 | 
19 | #### Contact-rich Manipulation
20 | 
21 | [2025] [PolyTouch: A Robust Multi-Modal Tactile Sensor for Contact-rich Manipulation Using Tactile-Diffusion Policies](https://arxiv.org/abs/2504.19341)
22 | 
23 | 
24 | 
25 | ### Hardware Design
26 | 
27 | [2025] [BiDexHand: Design and Evaluation of an Open-Source 16-DoF Biomimetic Dexterous Hand](https://arxiv.org/abs/2504.14712)


--------------------------------------------------------------------------------
/papers/00-Latest/IJRR2025.md:
--------------------------------------------------------------------------------
1 | ## IJRR2025
2 | 
3 | [2025] [Growable and Interpretable Neural Control with Online Continual Learning for Autonomous Lifelong Locomotion Learning Machines](https://arxiv.org/abs/2505.12029)


--------------------------------------------------------------------------------
/papers/00-Latest/Nvidia.md:
--------------------------------------------------------------------------------
1 | ## Nvidia
2 | 
3 | [2025] [DexMachina: Functional Retargeting for Bimanual Dexterous Manipulation](https://arxiv.org/abs/2505.24853)


--------------------------------------------------------------------------------
/papers/00-Latest/RSS2025.md:
--------------------------------------------------------------------------------
  1 | ## RSS2025
  2 | 
  3 | ### Imitation Learning
  4 | 
  5 | [2025] [Curating Demonstrations using Online Experience](https://roboticsconference.org/program/papers/71/)
  6 | 
  7 | [2025] [CodeDiffuser: Attention-Enhanced Diffusion Policy via VLM-Generated Code for Instruction Ambiguity](https://roboticsconference.org/program/papers/72/)
  8 | 
  9 | [2025] [Can We Detect Failures Without Failure Data? Uncertainty-Aware Runtime Failure Detection for Imitation Learning Policies](https://roboticsconference.org/program/papers/73/)
 10 | 
 11 | [2025] [Unified Video Action Model](https://roboticsconference.org/program/papers/74/)
 12 | 
 13 | [2025] [From Foresight to Forethought: VLM-In-the-Loop Policy Steering via Latent Alignment](https://roboticsconference.org/program/papers/76/)
 14 | 
 15 | [2025] [Is Your Imitation Learning Policy Better than Mine? Policy Comparison with Near-Optimal Stopping](https://roboticsconference.org/program/papers/77/)
 16 | 
 17 | [2025] [DDAT: Diffusion Policies Enforcing Dynamically Admissible Robot Trajectories](https://roboticsconference.org/program/papers/78/)
 18 | 
 19 | [2025] [FACTR: Force-Attending Curriculum Training for Contact-Rich Policy Learning](https://roboticsconference.org/program/papers/79/)
 20 | 
 21 | [2025] [Bridging Perception and Action: Spatially-Grounded Mid-Level Representations for Robot Generalization](https://roboticsconference.org/program/papers/155/)
 22 | 
 23 | [2025] [DemoGen: Synthetic Demonstration Generation for Data-Efficient Visuomotor Policy Learning](https://roboticsconference.org/program/papers/157/)
 24 | 
 25 | [2025] [IMLE Policy: Fast and Sample Efficient Visuomotor Policy Learning via Implicit Maximum Likelihood Estimation](https://roboticsconference.org/program/papers/158/)
 26 | 
 27 | [2025] [Dynamic Rank Adjustment in Diffusion Policies for Efficient and Flexible Training](https://roboticsconference.org/program/papers/159/)
 28 | 
 29 | [2025] [Diffeomorphic Obstacle Avoidance for Contractive Dynamical Systems via Implicit Representations](https://roboticsconference.org/program/papers/162/)
 30 | 
 31 | [2025] [Learning Interpretable Features from Interventions](https://roboticsconference.org/program/papers/163/)
 32 | 
 33 | 
 34 | 
 35 | ### Scaling Robot Learning
 36 | 
 37 | [2025] [Robot Data Curation with Mutual Information Estimators](https://roboticsconference.org/program/papers/23/)
 38 | 
 39 | [2025] [Robot Learning with Super-Linear Scaling](https://roboticsconference.org/program/papers/25/)
 40 | 
 41 | [2025] [Action Flow Matching for Lifelong Learning](https://roboticsconference.org/program/papers/26/)
 42 | 
 43 | [2025] [RLDG: Robotic Generalist Policy Distillation via Reinforcement Learning](https://roboticsconference.org/program/papers/28/)
 44 | 
 45 | 
 46 | 
 47 | ### Humanoid
 48 | 
 49 | [2025] [Learning Getting-Up Policies for Real-World Humanoid Robots](https://arxiv.org/abs/2502.12152)
 50 | 
 51 | [2025] [Learning Humanoid Standing-up Control across Diverse Postures](https://arxiv.org/abs/2502.08378)
 52 | 
 53 | #### Whole Body Control
 54 | 
 55 | [2025] [AMO: Adaptive Motion Optimization for Hyper-Dexterous Humanoid Whole-Body Control](https://arxiv.org/abs/2505.03738)
 56 | 
 57 | [2025] [LangWBC: Language-directed Humanoid Whole-Body Control via End-to-end Learning](https://arxiv.org/abs/2504.21738)
 58 | 
 59 | [2025] [ASAP: Aligning Simulation and Real-World Physics for Learning Agile Humanoid Whole-Body Skills](https://arxiv.org/abs/2502.01143)
 60 | 
 61 | [2025] [A Unified and General Humanoid Whole-Body Controller for Versatile Locomotion](https://arxiv.org/abs/2502.03206)
 62 | 
 63 | [2025] [BeamDojo: Learning Agile Humanoid Locomotion on Sparse Footholds](https://arxiv.org/abs/2502.10363)
 64 | 
 65 | [2025] [HOMIE: Humanoid Loco-Manipulation with Isomorphic Exoskeleton Cockpit](https://arxiv.org/abs/2502.13013)
 66 | 
 67 | 
 68 | 
 69 | ### Mobile Manipulation and Locomotion
 70 | 
 71 | [2025] [Human2LocoMan: Learning Versatile Quadrupedal Manipulation with Human Pretraining](https://roboticsconference.org/program/papers/122/)
 72 | 
 73 | [2025] [SafeMimic: Towards Safe and Autonomous Human-to-Robot Imitation for Mobile Manipulation](https://roboticsconference.org/program/papers/128/)
 74 | 
 75 | [2025] [Gain Tuning Is Not What You Need: Reward Gain Adaptation for Constrained Locomotion Learning](https://roboticsconference.org/program/papers/123/)
 76 | 
 77 | [2025] [SATA: Safe and Adaptive Torque-Based Locomotion Policies Inspired by Animal Learning](https://roboticsconference.org/program/papers/124/)
 78 | 
 79 | [2025] [Bridging the Sim-to-Real Gap for Athletic Loco-Manipulation](https://roboticsconference.org/program/papers/125/)
 80 | 
 81 | [2025] [Adaptive Locomotion on Mud through Proprioceptive Sensing of Substrate Properties](https://roboticsconference.org/program/papers/126/)
 82 | 
 83 | [2025] [Discrete-Time Hybrid Automata Learning: Legged Locomotion Meets Skateboarding](https://roboticsconference.org/program/papers/127/)
 84 | 
 85 | [2025] [DVS: Dynamic Virtual-Real Simulation Platform for Mobile Robotic Tasks](https://roboticsconference.org/program/papers/129/)
 86 | 
 87 | [2025] [Flying Hand: End-Effector-Centric Framework for Versatile Aerial Manipulation Teleoperation and Policy Learning](https://roboticsconference.org/program/papers/130/)
 88 | 
 89 | [2025] [DexWild: Dexterous Human Interactions for In-the-Wild Robot Policies](https://arxiv.org/abs/2505.07813)
 90 | 
 91 | [2025] [STDArm: Transfer Visuomotor Policy From Static Data Training to Dynamic Robot Manipulation](https://roboticsconference.org/program/papers/147/)
 92 | 
 93 | 
 94 | 
 95 | ### Manipulation
 96 | 
 97 | [2025] [Sketch-to-Skill: Bootstrapping Robot Learning with Human Drawn Trajectory Sketches](https://roboticsconference.org/program/papers/151/)
 98 | 
 99 | [2025] [Reactive Diffusion Policy: Slow-Fast Visual-Tactile Policy Learning for Contact-Rich Manipulation](https://roboticsconference.org/program/papers/52/)
100 | 
101 | [2025] [Physics-Driven Data Generation for Contact-Rich Manipulation via Trajectory Optimization](https://roboticsconference.org/program/papers/53/)
102 | 
103 | [2025] [ViTaSCOPE: Visuo-tactile Implicit Representation for In-hand Pose and Extrinsic Contact Estimation](https://roboticsconference.org/program/papers/54/)
104 | 
105 | [2025] [A low-cost and lightweight 6 DoF bimanual arm for dynamic and contact-rich manipulation](https://roboticsconference.org/program/papers/55/)
106 | 
107 | [2025] [Robust Peg-in-Hole Assembly under Uncertainties via Compliant and Interactive Contact-Rich Manipulation](https://roboticsconference.org/program/papers/60/)
108 | 
109 | [2025] [ArticuBot: Learning Universal Articulated Object Manipulation Policy via Large Scale Simulation](https://roboticsconference.org/program/papers/156/)
110 | 
111 | [2025] [SKIL: Semantic Keypoint Imitation Learning for Generalizable Data-efficient Manipulation](https://roboticsconference.org/program/papers/161/)
112 | 
113 | [2025] [Novel Demonstration Generation with Gaussian Splatting Enables Robust One-Shot Manipulation](https://roboticsconference.org/program/papers/146/)
114 | 
115 | [2025] [PartInstruct: Part-level Instruction Following for Fine-grained Robot Manipulation](https://roboticsconference.org/program/papers/148/)
116 | 
117 | [2025] [Sketch-to-Skill: Bootstrapping Robot Learning with Human Drawn Trajectory Sketches](https://roboticsconference.org/program/papers/151/)
118 | 
119 | [2025] [PIN-WM: Learning Physics-INformed World Models for Non-Prehensile Manipulation](https://roboticsconference.org/program/papers/153/)
120 | 
121 | [2025] [Hierarchical and Modular Network on Non-prehensile Manipulation in General Environments](https://roboticsconference.org/program/papers/154/)
122 | 
123 | [2025] [Sim-and-Real Co-Training: A Simple Recipe for Vision-Based Robotic Manipulation](https://roboticsconference.org/program/papers/109/)
124 | 
125 | [2025] [Behavior Synthesis via Contact-Aware Fisher Information Maximization](https://arxiv.org/abs/2505.12214)
126 | 
127 | [2025] [Flow Matching Ergodic Coverage](http://www.arxiv.org/abs/2504.17872)
128 | 
129 | 
130 | 
131 | ### Bimanual
132 | 
133 | [2025] [Gripper Pose and Object Pointflow as Interfaces for Robotic Bimanual Manipulation](https://roboticsconference.org/program/papers/160/)
134 | 
135 | [2025] [You Only Teach Once: Learn One-Shot Bimanual Robotic Manipulation from Video Demonstrations](https://roboticsconference.org/program/papers/149/)
136 | 
137 | 
138 | 
139 | ### Pick
140 | 
141 | [2025] [Demonstrating Multi-Suction Item Picking at Scale via Multi-Modal Learning of Pick Success](https://roboticsconference.org/program/papers/107/)
142 | 
143 | 
144 | 
145 | ### Dexterous
146 | 
147 | [2025] [DexWild: Dexterous Human Interactions for In-the-Wild Robot Policies](https://www.arxiv.org/abs/2505.07813)
148 | 
149 | [2025] [PP-Tac: Paper Picking Using Omnidirectional Tactile Feedback in Dexterous Robotic Hands](https://roboticsconference.org/program/papers/56/)
150 | 
151 | [2025] [GeoDEx: A Unified Geometric Framework for Tactile Dexterous and Extrinsic Manipulation under Force Uncertainty](https://roboticsconference.org/program/papers/57/)
152 | 
153 | [2025] [DexterityGen: Foundation Controller for Unprecedented Dexterity](https://roboticsconference.org/program/papers/103/)
154 | 
155 | [2025] [DOGlove: Dexterous Manipulation with a Low-Cost Open-Source Haptic Force Feedback Glove](https://roboticsconference.org/program/papers/104/)
156 | 
157 | [2025] [Dexonomy: Synthesizing All Dexterous Grasp Types in a Grasp Taxonomy](https://roboticsconference.org/program/papers/105/)
158 | 
159 | [2025] [Dex1B: Learning with 1B Demonstrations for Dexterous Manipulation](https://roboticsconference.org/program/papers/106/)
160 | 
161 | [2025] [CordViP: Correspondence-based Visuomotor Policy for Dexterous Manipulation in Real-World](https://roboticsconference.org/program/papers/110/)
162 | 
163 | [2025] [Complementarity-Free Multi-Contact Modeling and Optimization for Dexterous Manipulation](https://roboticsconference.org/program/papers/111/)
164 | 
165 | 
166 | 
167 | ### Assembly
168 | 
169 | [2025] [Demonstrating REASSEMBLE: A Multimodal Dataset for Contact-rich Robotic Assembly and Disassembly](https://roboticsconference.org/program/papers/59/)
170 | 
171 | 
172 | 
173 | ### VTL
174 | 
175 | [2025] [Demonstrating the Octopi-1.5 Visual-Tactile-Language Model](https://roboticsconference.org/program/papers/58/)
176 | 
177 | 
178 | 
179 | ### VLA
180 | 
181 | [2025] [π₀: A Vision-Language-Action Flow Model for General Robot Control](https://roboticsconference.org/program/papers/10/)
182 | 
183 | [2025] [SpatialVLA: Exploring Spatial Representations for Visual-Language-Action Models](https://roboticsconference.org/program/papers/11/)
184 | 
185 | [2025] [FAST: Efficient Action Tokenization for Vision-Language-Action Models](https://roboticsconference.org/program/papers/12/)
186 | 
187 | [2025] [Uni-NaVid: A Video-based Vision-Language-Action Model for Unifying Embodied Navigation Tasks](https://roboticsconference.org/program/papers/13/)
188 | 
189 | [2025] [Learning to Act Anywhere with Task-centric Latent Actions](https://roboticsconference.org/program/papers/14/)
190 | 
191 | [2025] [Unified World Models: Coupling Video and Action Diffusion for Pretraining on Large Robotic Datasets](https://roboticsconference.org/program/papers/15/)
192 | 
193 | [2025] [CLIP-RT: Learning Language-Conditioned Robotic Policies from Natural Language Supervision](https://roboticsconference.org/program/papers/16/)
194 | 
195 | [2025] [Fine-Tuning Vision-Language-Action Models: Optimizing Speed and Success](https://roboticsconference.org/program/papers/17/)
196 | 
197 | [2025] [NaVILA: Legged Robot Vision-Language-Action Model for Navigation](https://roboticsconference.org/program/papers/18/)
198 | 
199 | [2025] [ConRFT: A Reinforced Fine-tuning Method for VLA Models via Consistency Policy](https://roboticsconference.org/program/papers/19/)
200 | 
201 | [2025] [Manual2Skill: Learning to Read Manuals and Acquire Robotic Skills for Furniture Assembly Using Vision-Language Models](https://roboticsconference.org/program/papers/150/)
202 | 
203 | [2025] [UniVLA: Learning to Act Anywhere with Task-centric Latent Actions](https://arxiv.org/abs/2505.06111)
204 | 
205 | 
206 | 
207 | ### Benchmark
208 | 
209 | [2025] [RoboMIND: Benchmark on Multi-embodiment Intelligence Normative Data for Robot Manipulation](https://roboticsconference.org/program/papers/152/)
210 | 
211 | 
212 | 
213 | ### Robot Design
214 | 
215 | [2025] [RUKA: Rethinking the Design of Humanoid Hands with Learning](https://roboticsconference.org/program/papers/131/)
216 | 
217 | [2025] [Demonstrating Berkeley Humanoid Lite: An Open-source, Accessible, and Customizable 3D-printed Humanoid Robot](https://arxiv.org/abs/2504.17249)
218 | 
219 | 
220 | 
221 | ### Human Robot Interaction
222 | 
223 | [2025] [Demonstrating a Control Framework for Physical Human-Robot Interaction Toward Industrial Applications](https://arxiv.org/abs/2502.02967)
224 | 
225 | [2025] [Users and Wizards in Conversations: How WoZ Interface Choices Define Human-Robot Interactions](https://roboticsconference.org/program/papers/85/)
226 | 
227 | [2025] [Demonstrating Shared Force-Language Embeddings for Natural Human-Robot Communication](https://roboticsconference.org/program/papers/86/)
228 | 
229 | [2025] [Safety with Agency: Human-Centered Safety Filter with Application to AI-Assisted Motorsports](https://roboticsconference.org/program/papers/93/)
230 | 
231 | [2025] [Morpheus: A Neural-driven Animatronic Face with Hybrid Actuation and Diverse Emotion Control](https://roboticsconference.org/program/papers/80/)
232 | 
233 | [2025] [Interface-level Intent Inference for Environment-agnostic Robot Teleoperation Assistance](https://roboticsconference.org/program/papers/81/)
234 | 
235 | [2025] [Optimal Interactive Learning on the Job via Facility Location Planning](https://roboticsconference.org/program/papers/87/)
236 | 
237 | [2025] [Interruption Handling for Conversational Robots](https://roboticsconference.org/program/papers/89/)
238 | 
239 | [2025] [Towards Uncertainty Unification: A Case Study for Preference Learning](https://roboticsconference.org/program/papers/91/)
240 | 
241 | [2025] [Safety with Agency: Human-Centered Safety Filter with Application to AI-Assisted Motorsports](https://roboticsconference.org/program/papers/93/)
242 | 
243 | 
244 | 
245 | ### State Estimation
246 | 
247 | [2025] [Boxi: Design Decisions in the Context of Algorithmic Performance for Robotics](https://arxiv.org/abs/2504.18500)


--------------------------------------------------------------------------------
/papers/00-Latest/SIGGRAPH2025.md:
--------------------------------------------------------------------------------
1 | ## SIGGRAPH2025
2 | 
3 | [2025] [AMOR: Adaptive Character Control through Multi-Objective Reinforcement Learning](https://arxiv.org/abs/2505.23708)


--------------------------------------------------------------------------------
/papers/00-Latest/bipedal.md:
--------------------------------------------------------------------------------
 1 | ### Whole-Body Control
 2 | 
 3 | [2025] [AMO: Adaptive Motion Optimization for Hyper-Dexterous Humanoid Whole-Body Control](https://arxiv.org/abs/2505.03738)
 4 | 
 5 | [2025] [Visual Imitation Enables Contextual Humanoid Control](*https://arxiv.org/abs/2505.03729*)
 6 | 
 7 | [2025] [HuB: Learning Extreme Humanoid Balance](https://arxiv.org/abs/2505.07294)
 8 | 
 9 | [2025] [Let Humanoids Hike! Integrative Skill Development on Complex Trails](https://arxiv.org/abs/2505.06218)
10 | 
11 | 
12 | 
13 | ### Data Collection
14 | 
15 | [2025] [DreamGen: Unlocking Generalization in Robot Learning through Neural Trajectories](https://arxiv.org/abs/2505.12705)
16 | 
17 | 
18 | 
19 | ### Large Scale Training
20 | 
21 | [2025] [FastTD3: Simple, Fast, and Capable Reinforcement Learning for Humanoid Control](https://arxiv.org/abs/2505.22642)
22 | 
23 | 
24 | 
25 | ### Loco-Manipulation
26 | 
27 | [2025] [Learning Unified Force and Position Control for Legged Loco-Manipulation](https://arxiv.org/abs/2505.20829)
28 | 
29 | 
30 | 
31 | ### Human-Humanoid Interaction
32 | 
33 | [2025] [H2-COMPACT: Human-Humanoid Co-Manipulation via Adaptive Contact Trajectory Policies](https://arxiv.org/abs/2505.17627)


--------------------------------------------------------------------------------
/papers/00-Latest/human-robot-interaction.md:
--------------------------------------------------------------------------------
1 | ## Human-Robot Interaction
2 | 
3 | [2025] [Context-aware collaborative pushing of heavy objects using skeleton-based intention prediction](https://arxiv.org/abs/2505.10239)
4 | 
5 | [2025] [Towards Balancing Preference and Performance through Adaptive Personalized Explainability](https://arxiv.org/abs/2504.13856)


--------------------------------------------------------------------------------
/papers/00-Latest/imtation-learning.md:
--------------------------------------------------------------------------------
1 | ## Imitation Learning
2 | 
3 | [2025] [BEAST: Efficient Tokenization of B-Splines Encoded Action Sequences for Imitation Learning](https://arxiv.org/abs/2506.06072)


--------------------------------------------------------------------------------
/papers/00-Latest/manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Manipulation
 2 | 
 3 | [2025] [Learning Compositional Behaviors from Demonstration and Language](https://arxiv.org/abs/2505.21981)
 4 | 
 5 | [2025] [MTIL: Encoding Full History with Mamba for Temporal Imitation Learning](https://arxiv.org/abs/2505.12410)
 6 | 
 7 | [2025] [UniSkill: Imitating Human Videos via Cross-Embodiment Skill Representations](https://arxiv.org/abs/2505.08787)
 8 | 
 9 | [2025] [MOSAIC: A Skill-Centric Algorithmic Framework for Long-Horizon Manipulation Planning](https://arxiv.org/abs/2504.16738)
10 | 
11 | [2025] [Eye, Robot: Learning to Look to Act with a BC-RL Perception-Action Loop](https://arxiv.org/abs/2506.10968)
12 | 
13 | [2025] [Real-Time Execution of Action Chunking Flow Policies](https://arxiv.org/abs/2506.07339)
14 | 
15 | 
16 | 
17 | ### Perception
18 | 
19 | [2025] [UAD: Unsupervised Affordance Distillation for Generalization in Robotic Manipulation](https://arxiv.org/abs/2506.09284)
20 | 
21 | 
22 | 
23 | ### Diffusion Policy
24 | 
25 | [2025] [Learning Long-Context Diffusion Policies via Past-Token Prediction](https://arxiv.org/abs/2505.09561)
26 | 
27 | [2025] [H**3**DP: Triply-Hierarchical Diffusion Policy for Visuomotor Learning](https://arxiv.org/abs/2505.07819)
28 | 
29 | 
30 | 
31 | ### Mobile Policy
32 | 
33 | [2025] [Mobi-π: Mobilizing Your Robot Learning Policy](https://arxiv.org/abs/2505.23692)
34 | 
35 | 
36 | 
37 | ### Evaluation
38 | 
39 | [2025] [EnerVerse-AC: Envisioning Embodied Environments with Action Condition](https://arxiv.org/abs/2505.09723)
40 | 
41 | 
42 | 
43 | ### Data Collection
44 | 
45 | [2025] [Guiding Data Collection via Factored Scaling Curves](https://arxiv.org/abs/2505.07728)


--------------------------------------------------------------------------------
/papers/00-Latest/physical-based-character-control.md:
--------------------------------------------------------------------------------
1 | ## Physical-based Character Control
2 | 
3 | [2025] [Emergent Active Perception and Dexterity of Simulated Humanoids from Visual Reinforcement Learning](https://arxiv.org/abs/2505.12278)


--------------------------------------------------------------------------------
/papers/00-Latest/quadruped.md:
--------------------------------------------------------------------------------
1 | ## Quadruped
2 | 
3 | [2025] [LocoTouch: Learning Dexterous Quadrupedal Transport with Tactile Sensing](https://arxiv.org/abs/2505.23175)


--------------------------------------------------------------------------------
/papers/00-Surveys/survey.md:
--------------------------------------------------------------------------------
 1 | ## Surveys
 2 | 
 3 | [2021] [Robot Learning from Randomized Simulations: A Review](https://arxiv.org/abs/2111.00956)
 4 | 
 5 | [2022] [Guided reinforcement learning: A review and evaluation for efficient and effective real-world robotics](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9926159)
 6 | 
 7 | [2023] [Transfer Learning in Robotics: An Upcoming Breakthrough? A Review of Promises and Challenges](https://arxiv.org/abs/2311.18044)
 8 | 
 9 | [2024] [Survey of Learning Approaches for Robotic In-Hand Manipulation](https://arxiv.org/abs/2401.07915)
10 | 
11 | [2024] [A Survey of Robotic Language Grounding: Tradeoffs Between Symbols and Embeddings](https://arxiv.org/abs/2405.13245)
12 | 
13 | [2024] [Benchmarking Neural Radiance Fields for Autonomous Robots: An Overview](https://arxiv.org/abs/2405.05526)
14 | 
15 | [2024] [Deep Reinforcement Learning for Robotics: A Survey of Real-World Successes](https://www.arxiv.org/abs/2408.03539)
16 | 
17 | [2024] [Deep Generative Models in Robotics: A Survey on Learning from Multimodal Demonstrations](https://arxiv.org/abs/2408.04380)
18 | 
19 | [2022] Teleoperation methods and enhancement techniques for mobile robots:A comprehensive survey.
20 | 
21 | 
22 | 
23 | ### Bipedal
24 | 
25 | [2024] [AI Robots and Humanoid AI: Review, Perspectives and Directions](https://arxiv.org/abs/2405.15775)
26 | 
27 | [2025] [Humanoid Locomotion and Manipulation: Current Progress and Challenges in Control, Planning, and Learning](https://arxiv.org/abs/2501.02116)


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/latent-action.md:
--------------------------------------------------------------------------------
1 | ## Latent Action Model
2 | 
3 | [2025] [CLAM: Continuous Latent Action Models for Robot Learning from Unlabeled Demonstrations](https://arxiv.org/abs/2505.04999)


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/lifelong-learning-for-robot-learning.md:
--------------------------------------------------------------------------------
1 | ## Lifelong Learning for Robot Learning
2 | 
3 | [2025] [Parental Guidance: Efficient Lifelong Learning through Evolutionary Distillation](https://arxiv.org/abs/2503.18531)


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/lifelong-rl-for-robot-learning.md:
--------------------------------------------------------------------------------
1 | ## Lifelong RL for Robot Learning
2 | 
3 | [2025] [Preserving and combining knowledge in robotic lifelong reinforcement learning](https://www.nature.com/articles/s42256-025-00983-2)


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/meta-learning-for-robot-learning.md:
--------------------------------------------------------------------------------
1 | ## Meta Learning for Robot Learning
2 | 
3 | [2025] [Efficient Continual Adaptation of Pretrained Robotic Policy with Online Meta-Learned Adapters](https://arxiv.org/abs/2503.18684)


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/reinforcement-learning-for-continuous-control.md:
--------------------------------------------------------------------------------
 1 | ## Reinforcement Learning for Continuous Control
 2 | 
 3 | ### Q Learning
 4 | 
 5 | [2021] [Is Bang-Bang Control All You Need? Solving Continuous Control with Bernoulli Policies](https://arxiv.org/abs/2111.02552)
 6 | 
 7 | [2022] [Solving Continuous Control via Q-learning](https://arxiv.org/abs/2210.12566)
 8 | 
 9 | 
10 | 
11 | ### Policy-Gradient  Based Method
12 | 
13 | [2015] [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971)
14 | 
15 | [2015] [Trust Region Policy Optimization](https://arxiv.org/abs/1502.05477)
16 | 
17 | [2017] [Proximal Policy Optimization Algorithms](https://arxiv.org/abs/1707.06347)
18 | 
19 | [2018] [Maximum a Posteriori Policy Optimisation](https://arxiv.org/abs/1806.06920)
20 | 
21 | [2018] [Boosting Trust Region Policy Optimization by Normalizing Flows Policy](https://arxiv.org/abs/1809.10326)
22 | 
23 | [2024] [Continuous Control with Coarse-to-fine Reinforcement Learning](https://arxiv.org/abs/2407.07787)
24 | 
25 | 
26 | 
27 | ### Actor Critic Method
28 | 
29 | [2016] [Asynchronous Methods for Deep Reinforcement Learning](https://arxiv.org/abs/1602.01783)
30 | 
31 | [2018] [Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor](https://arxiv.org/abs/1801.01290)
32 | 
33 | [2018] [Soft Actor-Critic Algorithms and Applications](https://arxiv.org/abs/1812.05905)
34 | 
35 | [2018] [Addressing Function Approximation Error in Actor-Critic Methods](https://arxiv.org/abs/1802.09477)
36 | 
37 | 
38 | 
39 | ### Discretizing Action Space
40 | 
41 | [2019] [Discretizing Continuous Action Space for On-Policy Optimization](https://arxiv.org/abs/1901.10500)


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/reinforcement-learning-for-robot-learning.md:
--------------------------------------------------------------------------------
1 | ## Reinforcement Learning for Robot Learning
2 | 
3 | [2017] [Asymmetric Actor Critic for Image-Based Robot Learning](https://arxiv.org/abs/1710.06542)
4 | 
5 | [2024] [Reinforcement Learning with Foundation Priors: Let the Embodied Agent Efficiently Learn on Its Own](https://arxiv.org/abs/2310.02635)
6 | 
7 | [2024] [Reinforcement Learning with Action Sequence for Data-Efficient Robot Learning](https://arxiv.org/abs/2411.12155)
8 | 
9 | 


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/singal-temporal-logical.md:
--------------------------------------------------------------------------------
1 | ## Singal Temporal Logical Planning
2 | 
3 | [2025] [TeLoGraF: Temporal Logic Planning via Graph-encoded Flow Matching](https://arxiv.org/abs/2505.00562)


--------------------------------------------------------------------------------
/papers/01-Robot-Learning-Theory/theory.md:
--------------------------------------------------------------------------------
1 | ## Robot Learning Theory
2 | 
3 | [2025] [Skill Expansion and Composition in Parameter Space](https://arxiv.org/abs/2502.05932)


--------------------------------------------------------------------------------
/papers/02-Data-Acquisition/bimanual.md:
--------------------------------------------------------------------------------
1 | ## Data Collection of Bimanual
2 | 
3 | [2024] [DexMimicGen: Automated Data Generation for Bimanual Dexterous Manipulation via Imitation Learning](https://arxiv.org/abs/2410.24185)


--------------------------------------------------------------------------------
/papers/02-Data-Acquisition/manipulation.md:
--------------------------------------------------------------------------------
1 | ## Data Collection of Manipulation
2 | 
3 | [2024] [ARCap: Collecting High-quality Human Demonstrations for Robot Learning with Augmented Reality Feedback](https://arxiv.org/abs/2410.08464)


--------------------------------------------------------------------------------
/papers/02-Data-Acquisition/sim.md:
--------------------------------------------------------------------------------
1 | ## Simulator
2 | 
3 | [2024] [ACDC: Automated Creation of Digital Cousins for Robust Policy Learning](https://digital-cousins.github.io/)


--------------------------------------------------------------------------------
/papers/02-Data-Acquisition/synthetic-data.md:
--------------------------------------------------------------------------------
1 | ## Synthetic Data
2 | 
3 | [2023] [GRADE: Generating Realistic And Dynamic Environments for Robotics Research with Isaac Sim](https://arxiv.org/abs/2303.04466)


--------------------------------------------------------------------------------
/papers/02-Data-Acquisition/theory.md:
--------------------------------------------------------------------------------
 1 | ## Theory
 2 | 
 3 | [2024] [DsDm: Model-Aware Dataset Selection with Datamodels](https://arxiv.org/abs/2401.12926)
 4 | 
 5 | [2024] [Attribute-to-Delete: Machine Unlearning via Datamodel Matching](https://arxiv.org/abs/2410.23232)
 6 | 
 7 | 
 8 | 
 9 | ### Survey
10 | 
11 | [2022] [Training Data Influence Analysis and Estimation: A Survey](https://arxiv.org/abs/2212.04612)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/hardware-design.md:
--------------------------------------------------------------------------------
 1 | ## Humanoid Hardware Design
 2 | 
 3 | [2024] [Berkeley Humanoid: A Research Platform for Learning-based Control](https://arxiv.org/abs/2407.21781)
 4 | 
 5 | [2024] [The NING Humanoid: The Concurrent Design and Development of a Dynamic and Agile Platform](https://arxiv.org/abs/2408.01056)
 6 | 
 7 | [2025] [ToddlerBot: Open-Source ML-Compatible Humanoid Platform for Loco-Manipulation](https://arxiv.org/abs/2502.00893)
 8 | 
 9 | [2025] [BodyGen: Advancing Towards Efficient Embodiment Co-Design](https://arxiv.org/abs/2503.00533)
10 | 
11 | [2025] [Demonstrating Berkeley Humanoid Lite: An Open-source, Accessible, and Customizable 3D-printed Humanoid Robot](https://arxiv.org/abs/2504.17249)
12 | 
13 | 
14 | 
15 | ### Humanoid Hands
16 | 
17 | [2025] [RUKA: Rethinking the Design of Humanoid Hands with Learning](https://arxiv.org/abs/2504.13165)
18 | 
19 | 
20 | 
21 | ### Sensors
22 | 
23 | [2024] [ARMOR: Egocentric Perception for Humanoid Robot Collision Avoidance and Motion Planning](https://arxiv.org/abs/2412.00396)
24 | 
25 | 
26 | 
27 | ### Humanoid Teleoperation
28 | 
29 | [2025] [HOMIE: Humanoid Loco-Manipulation with Isomorphic Exoskeleton Cockpit](https://arxiv.org/abs/2502.13013)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/humanoid-creator.md:
--------------------------------------------------------------------------------
1 | ## Humanoid Creator
2 | 
3 | [2022] [From Universal Humanoid Control to Automatic Physically Valid Character Creation](https://arxiv.org/abs/2206.09286)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/lower-body-control/gaits-control.md:
--------------------------------------------------------------------------------
 1 | ## Gaits Control
 2 | 
 3 | [2020] [Sim-to-Real Learning of All Common Bipedal Gaits via Periodic Reward Composition](https://arxiv.org/abs/2011.01387)
 4 | 
 5 | [2025] [A Unified and General Humanoid Whole-Body Controller for Fine-Grained Locomotion](https://arxiv.org/abs/2502.03206)
 6 | 
 7 | 
 8 | 
 9 | ### Jumping
10 | 
11 | [2021] [Robust and Versatile Bipedal Jumping Control through Reinforcement Learning](https://arxiv.org/abs/2302.09450)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/lower-body-control/humanoid-control.md:
--------------------------------------------------------------------------------
 1 | ## Humanoid Control
 2 | 
 3 | ### Survey
 4 | 
 5 | [2024] [Humanoid Robots at work: where are we ?](https://arxiv.org/abs/2404.04249)
 6 | 
 7 | [2024] [Deep Reinforcement Learning for Bipedal Locomotion: A Brief Survey](https://arxiv.org/abs/2404.17070)
 8 | 
 9 | 
10 | 
11 | ### Reinforcement Learning
12 | 
13 | [2021] [Reward-Adaptive Reinforcement Learning: Dynamic Policy Gradient Optimization for Bipedal Locomotion](https://arxiv.org/abs/2107.01908)
14 | 
15 | [2023] Robust and versatile bipedal jumping control through multi-task reinforcement learning
16 | 
17 | [2024] [Real-world humanoid locomotion with reinforcement learning](https://arxiv.org/abs/2303.03381v2)
18 | 
19 | [2024] [I-CTRL: Imitation to Control Humanoid Robots Through Constrained Reinforcement Learning](https://arxiv.org/abs/2405.08726)
20 | 
21 | [2024] [CrossLoco: Human Motion Driven Control of Legged Robots via Guided Unsupervised Reinforcement Learning](https://arxiv.org/abs/2309.17046)
22 | 
23 | [2024] [Humanoid Parkour Learning](https://arxiv.org/abs/2406.10759)
24 | 
25 | [2024] [Learning Smooth Humanoid Locomotion through Lipschitz-Constrained Policies](https://arxiv.org/abs/2410.11825)
26 | 
27 | [2024] [Reinforcement Learning for Versatile, Dynamic, and Robust Bipedal Locomotion Control](https://arxiv.org/abs/2401.16889)
28 | 
29 | 
30 | 
31 | ### Multi-Objective Reinforcement Learning
32 | 
33 | [2024] [Stage-Wise Reward Shaping for Acrobatic Robots: A Constrained Multi-Objective Reinforcement Learning Approach](https://arxiv.org/abs/2409.15755)
34 | 


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/lower-body-control/locomotion-over-challenging-terrain.md:
--------------------------------------------------------------------------------
 1 | ## Locomotion over Challenging Terrain
 2 | 
 3 | [2024] [Learning Generic and Dynamic Locomotion of Humanoids Across Discrete Terrains](https://arxiv.org/abs/2405.17227)
 4 | 
 5 | [2024] [Advancing Humanoid Locomotion: Mastering Challenging Terrains with Denoising World Model Learning](https://arxiv.org/abs/2408.14472)
 6 | 
 7 | [2025] [BeamDojo: Learning Agile Humanoid Locomotion on Sparse Footholds](https://arxiv.org/abs/2502.10363)
 8 | 
 9 | 
10 | 
11 | ### Stair Traversal
12 | 
13 | [2021] [Blind Bipedal Stair Traversal via Sim-to-Real Reinforcement Learning](https://arxiv.org/abs/2105.08328)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/motion-generation.md:
--------------------------------------------------------------------------------
1 | ## Motion Generation
2 | 
3 | [2024] [Harmon: Whole-Body Motion Generation of Humanoid Robots from Language Descriptions](https://openreview.net/pdf?id=UUZ4Yw3lt0)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/resources.md:
--------------------------------------------------------------------------------
1 | ### Hardware
2 | 
3 | - [Red Rabbit Robotics](https://www.redrabbitrobotics.cc/)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/survey.md:
--------------------------------------------------------------------------------
1 | ## Survey
2 | 
3 | [2025] [Humanoid Locomotion and Manipulation: Current Progress and Challenges in Control, Planning, and Learning](https://arxiv.org/abs/2501.02116)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/teleoperation/dexterous-teleoperation.md:
--------------------------------------------------------------------------------
1 | ## Dexterous Teleoperation
2 | 
3 | [2024] [Bunny-VisionPro: Real-Time Bimanual Dexterous Teleoperation for Imitation Learning](https://arxiv.org/abs/2407.03162)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/teleoperation/human2humanoid-controller.md:
--------------------------------------------------------------------------------
1 | ## Human-to-Humanoid Controller
2 | 
3 | [2024] [Learning Human-to-Humanoid Real-Time Whole-Body Teleoperation](https://arxiv.org/abs/2403.04436)
4 | 
5 | [2024] [OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning](https://arxiv.org/abs/2406.08858)
6 | 
7 | [2025] [ASAP: Aligning Simulation and Real-World Physics for Learning Agile Humanoid Whole-Body Skills](https://arxiv.org/abs/2502.01143)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/teleoperation/pose-estimation.md:
--------------------------------------------------------------------------------
1 | ## Pose Estimation
2 | 
3 | [2022] [Embodied Scene-aware Human Pose Estimation](https://arxiv.org/abs/2206.09106)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/teleoperation/whole-body-teleoperation.md:
--------------------------------------------------------------------------------
 1 | ## Whole Body Teleoperation
 2 | 
 3 | ### Survey
 4 | 
 5 | [2023] [Teleoperation of Humanoid Robots: A Survey](https://arxiv.org/abs/2301.04317)
 6 | 
 7 | 
 8 | 
 9 | ### Method
10 | 
11 | [2023] [Deep Imitation Learning for Humanoid Loco-manipulation through Human Teleoperation](https://arxiv.org/abs/2309.01952)
12 | 
13 | [2024] [Learning Human-to-Humanoid Real-Time Whole-Body Teleoperation](https://arxiv.org/abs/2403.04436)
14 | 
15 | [2024] [OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning](https://arxiv.org/abs/2406.08858)
16 | 
17 | [2024] [Open-TeleVision: Teleoperation with Immersive Active Visual Feedback](https://arxiv.org/abs/2407.01512)
18 | 
19 | [2024] [Mobile-TeleVision: Predictive Motion Priors for Humanoid Whole-Body Control](https://arxiv.org/abs/2412.07773)
20 | 
21 | [2025] [TWIST: Teleoperated Whole-Body Imitation System](https://arxiv.org/abs/2505.02833)
22 | 


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/upper-body-control/dexterous-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Dexterous Manipulation
 2 | 
 3 | [2024] [Generalizable Humanoid Manipulation with Improved 3D Diffusion Policies](https://arxiv.org/abs/2410.10803)
 4 | 
 5 | [2024] [OKAMI: Teaching Humanoid Robots Manipulation Skills through Single Video Imitation](https://arxiv.org/abs/2410.11792)
 6 | 
 7 | [2024] [Bimanual Dexterity for Complex Tasks](https://openreview.net/pdf?id=55tYfHvanf)
 8 | 
 9 | [2024] [AsymDex: Leveraging Asymmetry and Relative Motion in Learning Bimanual Dexterity](https://arxiv.org/abs/2411.13020)
10 | 
11 | [2025] [Sim-to-Real Reinforcement Learning for Vision-Based Dexterous Manipulation on Humanoids](https://arxiv.org/abs/2502.20396)
12 | 
13 | 
14 | 
15 | ### Cross Embodiment
16 | 
17 | [2025] [Humanoid Policy ~ Human Policy](https://arxiv.org/abs/2503.13441)
18 | 
19 | 
20 | 
21 | ### High-Precision Manipulation
22 | 
23 | [2025] [High-Precision Transformer-Based Visual Servoing for Humanoid Robots in Aligning Tiny Objects](https://arxiv.org/abs/2503.04862)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/advanced-controller/humanoid-control-with-llm.md:
--------------------------------------------------------------------------------
1 | ## Humanoid Control with LLM
2 | 
3 | [2023] [Words into Action: Learning Diverse Humanoid Robot Behaviors using Language Guided Iterative Motion Refinement](https://arxiv.org/abs/2310.06226)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/advanced-controller/motion-prior-controller.md:
--------------------------------------------------------------------------------
1 | ## Motion Prior Controller
2 | 
3 | [2022] [Imitate and Repurpose: Learning Reusable Robot Movement Skills From Human and Animal Behaviors](https://arxiv.org/abs/2203.17138)
4 | 
5 | [2024] [HumanMimic: Learning Natural Locomotion and Transitions for Humanoid Robot via Wasserstein Adversarial Imitation](https://arxiv.org/abs/2309.14225)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/human2humanoid-controller.md:
--------------------------------------------------------------------------------
1 | ## Human-to-Humanoid Controller
2 | 
3 | [2024] [Learning Human-to-Humanoid Real-Time Whole-Body Teleoperation](https://arxiv.org/abs/2403.04436)
4 | 
5 | [2024] [OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning](https://arxiv.org/abs/2406.08858)
6 | 
7 | [2025] [ASAP: Aligning Simulation and Real-World Physics for Learning Agile Humanoid Whole-Body Skills](https://arxiv.org/abs/2502.01143)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/humanoid-loco-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Humanoid Loco-Manipulation
 2 | 
 3 | [2023] [Deep Imitation Learning for Humanoid Loco-manipulation through Human Teleoperation](https://arxiv.org/abs/2309.01952)
 4 | 
 5 | [2023] [Sim-to-Real Learning for Humanoid Box Loco-Manipulation](https://arxiv.org/abs/2310.03191)
 6 | 
 7 | [2024] [Learning Decentralized Multi-Biped Control for Payload Transport](https://arxiv.org/abs/2406.17279)
 8 | 
 9 | [2024] [Autonomous Behavior Planning For Humanoid Loco-manipulation Through Grounded Language Model](https://arxiv.org/abs/2408.08282)
10 | 
11 | [2024] [SkillBlender: Towards Versatile Humanoid Whole-Body Control via Skill Blending](https://openreview.net/pdf?id=4MUgd8EN0F)
12 | 


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/multi-task.md:
--------------------------------------------------------------------------------
1 | ## Multi-Task Controller
2 | 
3 | [2024] [HOVER: Versatile Neural Whole-Body Controller for Humanoid Robots](https://arxiv.org/abs/2410.21229)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/posture-control.md:
--------------------------------------------------------------------------------
1 | ## Posture Control
2 | 
3 | [2024] [Leveraging Symmetry in RL-based Legged Locomotion Control](https://arxiv.org/abs/2403.17320)
4 | 
5 | [2025] [A Unified and General Humanoid Whole-Body Controller for Fine-Grained Locomotion](https://arxiv.org/abs/2502.03206)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/standing-up.md:
--------------------------------------------------------------------------------
1 | ## Standing Up
2 | 
3 | [2025] [Learning Humanoid Standing-up Control across Diverse Postures](https://www.arxiv.org/abs/2502.08378)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/bipedal/whole-body-control/whole-body-control.md:
--------------------------------------------------------------------------------
 1 | ## Whole Body Control
 2 | 
 3 | [2024] [Expressive Whole-Body Control for Humanoid Robots](https://arxiv.org/abs/2402.16796)
 4 | 
 5 | [2024] [HumanPlus: Humanoid Shadowing and Imitation from Humans](https://humanoid-ai.github.io/)
 6 | 
 7 | [2024] [Learning Multi-Modal Whole-Body Control for Real-World Humanoid Robots](https://masked-humanoid.github.io/mhc/)
 8 | 
 9 | [2024] [HOVER: Versatile Neural Whole-Body Controller for Humanoid Robots](https://arxiv.org/abs/2410.21229)
10 | 
11 | [2024] [ExBody2: Advanced Expressive Humanoid Whole-Body Control](https://arxiv.org/abs/2412.13196)
12 | 
13 | [2024] [WoCoCo: Learning Whole-Body Humanoid Control with Sequential Contacts](https://arxiv.org/abs/2406.06005)
14 | 
15 | [2025] [ASAP: Aligning Simulation and Real-World Physics for Learning Agile Humanoid Whole-Body Skills](https://arxiv.org/abs/2502.01143)
16 | 
17 | [2025] [A Unified and General Humanoid Whole-Body Controller for Fine-Grained Locomotion](https://arxiv.org/abs/2502.03206)
18 | 
19 | [2025] [HOMIE: Humanoid Loco-Manipulation with Isomorphic Exoskeleton Cockpit](https://arxiv.org/abs/2502.13013)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/mpc-locomotion-control.md:
--------------------------------------------------------------------------------
1 | ## MPC for Locomotion Control
2 | 
3 | [2024] [Full-Order Sampling-Based MPC for Torque-Level Locomotion Control via Diffusion-Style Annealing](https://arxiv.org/abs/2409.15610)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/advanced-controller/motion-prior-controller.md:
--------------------------------------------------------------------------------
1 | ## Motion Prior Controller
2 | 
3 | [2022] [Learning Agile Skills via Adversarial Imitation of Rough Partial Demonstrations](https://arxiv.org/abs/2206.11693)
4 | 
5 | [2023] [Lifelike Agility and Play in Quadrupedal Robots using Reinforcement Learning and Generative Pre-trained Models](https://arxiv.org/abs/2308.15143)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/differentiable-simulator-for-quadruped.md:
--------------------------------------------------------------------------------
1 | ## Differentiable Simulator for Quadruped
2 | 
3 | [2024] [DiffSim2Real: Deploying Quadrupedal Locomotion Policies Purely Trained in Differentiable Simulation](https://arxiv.org/abs/2411.02189)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/gaits-control.md:
--------------------------------------------------------------------------------
1 | ## Gaits Control
2 | 
3 | [2022] [Walk These Ways: Tuning Robot Control for Generalization with Multiplicity of Behavior](https://arxiv.org/abs/2212.03238)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/hardware-design.md:
--------------------------------------------------------------------------------
1 | ## Hardware Design
2 | 
3 | [2024] [Robust Ladder Climbing with a Quadrupedal Robot](https://arxiv.org/abs/2409.17731v1)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/locomotion-and-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Locomotion and Manipulation
 2 | 
 3 | [2022] [Deep Whole-Body Control: Learning a Unified Policy for Manipulation and Locomotion](https://arxiv.org/abs/2210.10044)
 4 | 
 5 | [2022] [Efficient multitask learning with an embodied predictive model for door opening and entry with whole-body control](https://www.science.org/doi/abs/10.1126/scirobotics.aax8177)
 6 | 
 7 | [2022] [Hierarchical Reinforcement Learning for Precise Soccer Shooting Skills using a Quadrupedal Robot](https://arxiv.org/abs/2208.01160)
 8 | 
 9 | [2023] [Legs as Manipulator: Pushing Quadrupedal Agility Beyond Locomotion](https://arxiv.org/abs/2303.11330)
10 | 
11 | [2023] [Learning Whole-body Manipulation for Quadrupedal Robot](https://arxiv.org/abs/2308.16820)
12 | 
13 | [2023] [Curiosity-Driven Learning of Joint Locomotion and Manipulation Tasks](https://openreview.net/pdf?id=QG_ERxtDAP-)
14 | 
15 | [2023] [DribbleBot: Dynamic Legged Manipulation in the Wild](https://arxiv.org/abs/2304.01159)
16 | 
17 | [2024] [HiLMa-Res: A General Hierarchical Framework via Residual RL for Combining Quadrupedal Locomotion and Manipulation](https://arxiv.org/abs/2407.06584)
18 | 
19 | [2024] [Helpful DoggyBot: Open-World Object Fetching using Legged Robots and Vision-Language Models](https://arxiv.org/abs/2410.00231)
20 | 
21 | [2024] [Continuously Improving Mobile Manipulation with Autonomous Real-World RL](https://arxiv.org/abs/2409.20568)
22 | 
23 | [2024] [Guided Reinforcement Learning for Robust Multi-Contact Loco-Manipulation](https://arxiv.org/abs/2410.13817)
24 | 
25 | [2024] [WildLMa: Long Horizon Loco-Manipulation in the Wild](https://arxiv.org/abs/2411.15131)
26 | 
27 | [2024] [Combining Planning and Diffusion for Mobility with Unknown Dynamics](https://arxiv.org/abs/2410.06911)
28 | 
29 | [2025] [Bridging the Sim-to-Real Gap for Athletic Loco-Manipulation](https://arxiv.org/abs/2502.10894)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/locomotion-over-challenging-terrain.md:
--------------------------------------------------------------------------------
 1 | ## Locomotion over Challenging Terrain
 2 | 
 3 | [2020] [Learning Quadrupedal Locomotion over Challenging Terrain](https://arxiv.org/abs/2010.11251)
 4 | 
 5 | [2022] [Legged Locomotion in Challenging Terrains using Egocentric Vision](https://arxiv.org/abs/2211.07638)
 6 | 
 7 | [2023] [DreamWaQ: Learning Robust Quadrupedal Locomotion With Implicit Terrain Imagination via Deep Reinforcement Learning](https://arxiv.org/abs/2301.10602)
 8 | 
 9 | [2024] [Agile Continuous Jumping in Discontinuous Terrains](https://arxiv.org/abs/2409.10923)
10 | 
11 | 
12 | 
13 | ### In the Wild
14 | 
15 | [2023] [Learning Robust, Agile, Natural Legged Locomotion Skills in the Wild](https://arxiv.org/abs/2304.10888)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/locomotion.md:
--------------------------------------------------------------------------------
 1 | ## Quadruped Locomotion
 2 | 
 3 | ### Reinforcement Learning
 4 | 
 5 | [2017] [Preparing for the Unknown: Learning a Universal Policy with Online System Identification](https://arxiv.org/abs/1702.02453)
 6 | 
 7 | [2020] [Dynamics Randomization Revisited:A Case Study for Quadrupedal Locomotion](https://arxiv.org/abs/2011.02404)
 8 | 
 9 | [2021] [RMA: Rapid Motor Adaptation for Legged Robots](https://arxiv.org/abs/2107.04034)
10 | 
11 | [2022] [Rapid Locomotion via Reinforcement Learning](https://arxiv.org/abs/2205.02824)
12 | 
13 | [2022] [Walk These Ways: Tuning Robot Control for Generalization with Multiplicity of Behavior](https://arxiv.org/abs/2212.03238)
14 | 
15 | [2022] [Learning Visual Locomotion with Cross-Modal Supervision](https://arxiv.org/abs/2211.03785)
16 | 
17 | [2022] [Concurrent Training of a Control Policy and a State Estimator for Dynamic and Robust Legged Locomotion](https://arxiv.org/abs/2202.05481)
18 | 
19 | [2023] [Learning Robust, Agile, Natural Legged Locomotion Skills in the Wild](https://arxiv.org/abs/2304.10888)
20 | 
21 | [2023] [Extreme Parkour with Legged Robots](https://arxiv.org/abs/2309.14341)
22 | 
23 | [2023] [Learning and Adapting Agile Locomotion Skills by Transferring Experience](https://arxiv.org/abs/2304.09834)
24 | 
25 | [2024] [Agile But Safe: Learning Collision-Free High-Speed Legged Locomotion](https://arxiv.org/abs/2401.17583)
26 | 
27 | [2024] [Hybrid Internal Model: Learning Agile Legged Locomotion with Simulated Robot Response](https://arxiv.org/abs/2312.11460)
28 | 
29 | [2024] [Rethinking Robustness Assessment: Adversarial Attacks on Learning-based Quadrupedal Locomotion Controllers](https://arxiv.org/abs/2405.12424)
30 | 
31 | [2024] [Quadruped robot traversing 3D complex environments with limited perception](https://arxiv.org/abs/2404.18225)
32 | 
33 | [2024] [Rethinking Robustness Assessment: Adversarial Attacks on Learning-based Quadrupedal Locomotion Controllers](https://arxiv.org/abs/2405.12424)
34 | 
35 | [2024] [PA-LOCO: Learning Perturbation-Adaptive Locomotion for Quadruped Robots](https://arxiv.org/abs/2407.04224)
36 | 
37 | [2024] [SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience](https://arxiv.org/abs/2409.13678)
38 | 
39 | [2024] [Obstacle-Aware Quadrupedal Locomotion With Resilient Multi-Modal Reinforcement Learning](https://arxiv.org/abs/2409.19709)
40 | 
41 | [2024] [Reinforcement Learning For Quadrupedal Locomotion: Current Advancements And Future Perspectives](https://arxiv.org/abs/2410.10438)
42 | 
43 | [2024] [Reinforcement Learning from Wild Animal Videos](https://arxiv.org/abs/2412.04273)
44 | 
45 | [2024] [RobotKeyframing: Learning Locomotion with High-Level Objectives via Mixture of Dense and Sparse Rewards](https://arxiv.org/abs/2407.11562)
46 | 
47 | [2025] [SATA: Safe and Adaptive Torque-Based Locomotion Policies Inspired by Animal Learning](https://arxiv.org/abs/2502.12674)
48 | 
49 | [2025] [DFM: Deep Fourier Mimic for Expressive Dance Motion Learning](https://arxiv.org/abs/2502.10980)
50 | 
51 | [2025] [Unified Locomotion Transformer with Simultaneous Sim-to-Real Transfer for Quadrupeds](https://arxiv.org/abs/2503.08997)
52 | 
53 | 
54 | 
55 | ### Multi-Objective Reinforcement Learning
56 | 
57 | [2024] [Stage-Wise Reward Shaping for Acrobatic Robots: A Constrained Multi-Objective Reinforcement Learning Approach](https://arxiv.org/abs/2409.15755)
58 | 
59 | 
60 | 
61 | ### Imitation Learning
62 | 
63 | [2020] [Learning Agile Robotic Locomotion Skills by Imitating Animals](https://arxiv.org/abs/2004.00784)
64 | 
65 | 
66 | 
67 | ### Meta Reinforcement Learning
68 | 
69 | [2024] [Meta-Reinforcement Learning for Universal Quadrupedal Locomotion Control](https://arxiv.org/abs/2407.17502)
70 | 
71 | 
72 | 
73 | ### Control Theory
74 | 
75 | [2024] [DTC: Deep Tracking Control](https://www.science.org/doi/abs/10.1126/scirobotics.adh5401)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/multi-task-controller.md:
--------------------------------------------------------------------------------
1 | ## Multi-task Controller
2 | 
3 | [2025] [MoE-Loco: Mixture of Experts for Multitask Locomotion](https://arxiv.org/abs/2503.08564)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/navigation-and-locomotion.md:
--------------------------------------------------------------------------------
1 | ## Navigation and Locomotion
2 | 
3 | [2025] [VR-Robo: A Real-to-Sim-to-Real Framework for Visual Robot Navigation and Locomotion](https://arxiv.org/abs/2502.01536)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/physical-based-control.md:
--------------------------------------------------------------------------------
1 | ## Physical-based Control
2 | 
3 | [2020] [CARL: Controllable Agent with Reinforcement Learning for Quadruped Locomotion](https://arxiv.org/abs/2005.03288)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/quadruped/recovery-controller.md:
--------------------------------------------------------------------------------
1 | ## Recovery Controller
2 | 
3 | [2019] [Robust Recovery Controller for a Quadrupedal Robot using Deep Reinforcement Learning](https://arxiv.org/abs/1901.07517)


--------------------------------------------------------------------------------
/papers/03-Legged-Robot/wheeled-legged-robot.md:
--------------------------------------------------------------------------------
1 | ## Wheeled Legged Robots
2 | 
3 | [2023] [Curiosity-Driven Learning of Joint Locomotion and Manipulation Tasks](https://openreview.net/forum?id=QG_ERxtDAP-)


--------------------------------------------------------------------------------
/papers/04-Manipulation/00-manipulation.md:
--------------------------------------------------------------------------------
  1 | ## Manipulation
  2 | 
  3 | ### Reinforcement Learning
  4 | 
  5 | [2018] [QT-Opt: Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation](https://arxiv.org/abs/1806.10293)
  6 | 
  7 | [2021] [Learning Visible Connectivity Dynamics for Cloth Smoothing](https://arxiv.org/abs/2105.10389)
  8 | 
  9 | [2022] [Monte Carlo Augmented Actor-Critic for Sparse Reward Deep Reinforcement Learning from Suboptimal Demonstrations](https://arxiv.org/abs/2210.07432)
 10 | 
 11 | [2022] [Inferring Smooth Control: Monte Carlo Posterior Policy Iteration with Gaussian Processes](https://arxiv.org/abs/2210.03512)
 12 | 
 13 | [2024] [FOTS: A Fast Optical Tactile Simulator for Sim2Real Learning of Tactile-motor Robot Manipulation Skills](https://arxiv.org/abs/2404.19217)
 14 | 
 15 | [2024] [Stabilizing Contrastive RL: Techniques for Robotic Goal Reaching from Offline Data](https://openreview.net/pdf?id=Xkf2EBj4w3)
 16 | 
 17 | [2024] [Value-Penalized Auxiliary Control from Examples for Learning without Rewards or Demonstrations](https://arxiv.org/abs/2407.03311)
 18 | 
 19 | [2024] [Continuous Control with Coarse-to-fine Reinforcement Learning](https://arxiv.org/abs/2407.07787)
 20 | 
 21 | [2025] [Multi-Stage Manipulation with Demonstration-Augmented Reward, Policy, and World Model Learning](https://arxiv.org/abs/2503.01837)
 22 | 
 23 | 
 24 | 
 25 | ### Imitation Learning
 26 | 
 27 | [2021] [Implicit Behavioral Cloning](https://arxiv.org/abs/2109.00137)
 28 | 
 29 | [2022] [Temporal Logic Imitation: Learning Plan-Satisficing Motion Policies from Demonstrations](https://arxiv.org/abs/2206.04632)
 30 | 
 31 | [2023] [What Matters in Learning from Offline Human Demonstrations for Robot Manipulation](https://arxiv.org/abs/2108.03298)
 32 | 
 33 | [2023] [Multimodal and Force-Matched Imitation Learning with a See-Through Visuotactile Sensor](https://arxiv.org/abs/2311.01248)
 34 | 
 35 | [2024] [Redundancy-aware Action Spaces for Robot Learning](https://arxiv.org/abs/2406.04144)
 36 | 
 37 | [2024] [Scaling Manipulation Learning with Visual Kinematic Chain Prediction](https://arxiv.org/abs/2406.07837)
 38 | 
 39 | [2024] [Bidirectional Decoding: Improving Action Chunking via Closed-Loop Resampling](https://arxiv.org/abs/2408.17355)
 40 | 
 41 | [2024] [Data Scaling Laws in Imitation Learning for Robotic Manipulation](https://arxiv.org/abs/2410.18647)
 42 | 
 43 | [2024] [LEGATO: Cross-Embodiment Imitation Using a Grasping Tool](https://arxiv.org/abs/2411.03682)
 44 | 
 45 | [2024] [Prediction with Action: Visual Policy Learning via Joint Denoising Process](https://arxiv.org/abs/2411.18179)
 46 | 
 47 | [2024] [Fast and Robust Visuomotor Riemannian Flow Matching Policy](https://arxiv.org/abs/2412.10855)
 48 | 
 49 | [2025] [IMLE Policy: Fast and Sample Efficient Visuomotor Policy Learning via Implicit Maximum Likelihood Estimation](https://arxiv.org/abs/2502.12371)
 50 | 
 51 | [2025] [AdaManip: Adaptive Articulated Object Manipulation Environments and Policy Learning](https://arxiv.org/abs/2502.11124)
 52 | 
 53 | [2025] [Pick-and-place Manipulation Across Grippers Without Retraining: A Learning-optimization Diffusion Policy Approach](https://arxiv.org/abs/2502.15613)
 54 | 
 55 | [2025] [DemoGen: Synthetic Demonstration Generation for Data-Efficient Visuomotor Policy Learning](https://arxiv.org/abs/2502.16932)
 56 | 
 57 | [2025] [Phantom: Training Robots Without Robots Using Only Human Videos](https://arxiv.org/abs/2503.00779)
 58 | 
 59 | [2025] [Train Robots in a JIF: Joint Inverse and Forward Dynamics with Human and Robot Demonstrations](https://arxiv.org/abs/2503.12297)
 60 | 
 61 | [2025] [Sim-and-Real Co-Training: A Simple Recipe for Vision-Based Robotic Manipulation](https://arxiv.org/abs/2503.24361)
 62 | 
 63 | #### Universal Policy
 64 | 
 65 | [2024] [One-Shot Imitation Learning with Invariance Matching for Robotic Manipulation](https://arxiv.org/abs/2405.13178)
 66 | 
 67 | #### Sample Efficiency
 68 | 
 69 | [2024] [Leveraging Locality to Boost Sample Efficiency in Robotic Manipulation](https://arxiv.org/abs/2406.10615)
 70 | 
 71 | 
 72 | 
 73 | ### Imitation Learning and Reinforcement Learning
 74 | 
 75 | [2024] [Policy Decorator: Model-Agnostic Online Refinement for Large Policy Model](https://arxiv.org/abs/2412.13630)
 76 | 
 77 | [2024] [From Imitation to Refinement -- Residual RL for Precise Visual Assembly](https://arxiv.org/abs/2407.16677)
 78 | 
 79 | 
 80 | 
 81 | ### In-Context Learning
 82 | 
 83 | [2024] [Instant Policy: In-Context Imitation Learning via Graph Diffusion](https://arxiv.org/abs/2411.12633)
 84 | 
 85 | 
 86 | 
 87 | ### One-Short Learning
 88 | 
 89 | [2024] [One-Shot Manipulation Strategy Learning by Making Contact Analogies](https://arxiv.org/abs/2411.09627)
 90 | 
 91 | 
 92 | 
 93 | ### Flow and Imitation Learning
 94 | 
 95 | [2024] [Flow as the Cross-Domain Manipulation Interface](https://arxiv.org/abs/2407.15208)
 96 | 
 97 | 
 98 | 
 99 | ### Long-Horizon Task
100 | 
101 | [2024] [A Backbone for Long-Horizon Robot Task Understanding](https://arxiv.org/abs/2408.01334)
102 | 


--------------------------------------------------------------------------------
/papers/04-Manipulation/01-manipulation-with-3d.md:
--------------------------------------------------------------------------------
 1 | # Manipulation with 3D Vision
 2 | 
 3 | [2022] [Perceiver-Actor: A Multi-Task Transformer for Robotic Manipulation](https://arxiv.org/abs/2209.05451)
 4 | 
 5 | [2023] [GNFactor: Multi-Task Real Robot Learning with Generalizable Neural Feature Fields](https://arxiv.org/abs/2308.16891)
 6 | 
 7 | [2023] [Act3D: 3D Feature Field Transformers for Multi-Task Robotic Manipulation](https://arxiv.org/abs/2306.17817)
 8 | 
 9 | [2023] [NeRFuser: Diffusion Guided Multi-Task 3D Policy Learning](https://openreview.net/forum?id=8GmPLkO0oR)
10 | 
11 | [2023] [RVT: Robotic View Transformer for 3D Object Manipulation](https://arxiv.org/abs/2306.14896)
12 | 
13 | [2024] [RVT-2: Learning Precise Manipulation from Few Demonstrations](https://arxiv.org/abs/2406.08545)
14 | 
15 | 
16 | 
17 | ## Point Cloud
18 | 
19 | [2020] [Goal-Auxiliary Actor-Critic for 6D Robotic Grasping with Point Clouds](https://arxiv.org/abs/2010.00824)
20 | 
21 | [2023] [LangSplat: 3D Language Gaussian Splatting](https://arxiv.org/abs/2312.16084)
22 | 
23 | [2024] [3D Diffusion Policy](https://arxiv.org/abs/2403.03954)
24 | 
25 | [2024] [3D Diffuser Actor: Policy Diffusion with 3D Scene Representations](https://arxiv.org/abs/2402.10885)
26 | 
27 | [2024] [Learning Robotic Manipulation Policies from Point Clouds with Conditional Flow Matching](https://arxiv.org/abs/2409.07343)
28 | 
29 | ### Closed-end Grasping
30 | 
31 | [2020] [Grasping in the Wild:Learning 6DoF Closed-Loop Grasping from Low-Cost Demonstrations](https://arxiv.org/abs/1912.04344)
32 | 
33 | 
34 | 
35 | ## Manipulation with NeRF
36 | 
37 | [2023] [Language Embedded Radiance Fields for Zero-Shot Task-Oriented Grasping](https://arxiv.org/abs/2309.07970)
38 | 
39 | ### Open-end Grasping
40 | 
41 | [2023] [Affordance-Driven Next-Best-View Planning for Robotic Grasping](https://arxiv.org/abs/2309.09556)
42 | 
43 | ### Closed-end Grasping
44 | 
45 | #### Imitation Learning
46 | 
47 | [2023] [NeRF in the Palm of Your Hand: Corrective Augmentation for Robotics via Novel-View Synthesis](https://arxiv.org/abs/2301.08556)
48 | 
49 | #### Reinforcement Learning
50 | 
51 | [2022] [Reinforcement Learning with Neural Radiance Fields](https://arxiv.org/abs/2206.01634)
52 | 
53 | [2023] [SNeRL: Semantic-aware Neural Radiance Fields for Reinforcement Learning](https://arxiv.org/abs/2301.11520)
54 | 
55 | [2023] [Language Embedded Radiance Fields for Zero-Shot Task-Oriented Grasping](https://arxiv.org/abs/2309.07970)
56 | 
57 | 
58 | 
59 | ### Manipulation with 3D Gaussian Splatting
60 | 
61 | [2024] [MANUS: Markerless Grasp Capture using Articulated 3D Gaussians](https://arxiv.org/abs/2312.02137)
62 | 
63 | [2024] [ManiGaussian: Dynamic Gaussian Splatting for Multi-task Robotic Manipulation](https://arxiv.org/abs/2403.08321)
64 | 
65 | [2024] [GraspSplats: Efficient Manipulation with 3D Feature Splatting](https://arxiv.org/abs/2409.02084)
66 | 
67 | [2024] [D3Fields: Dynamic 3D Descriptor Fields for Zero-Shot Generalizable Robotic Manipulation](https://arxiv.org/abs/2309.16118)
68 | 
69 | [2025] [Novel Demonstration Generation with Gaussian Splatting Enables Robust One-Shot Manipulation](https://arxiv.org/abs/2504.13175)
70 | 


--------------------------------------------------------------------------------
/papers/04-Manipulation/02-manipulation-with-multimodal-sensing.md:
--------------------------------------------------------------------------------
1 | ## Manipulation with Multimodal Sensing
2 | 
3 | [2024] [3D-ViTac: Learning Fine-Grained Manipulation with Visuo-Tactile Sensing](https://arxiv.org/abs/2410.24091)


--------------------------------------------------------------------------------
/papers/04-Manipulation/03-mobile-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Mobile Manipulation
 2 | 
 3 | [2024] [Combining Planning and Diffusion for Mobility with Unknown Dynamics](https://arxiv.org/abs/2410.06911)
 4 | 
 5 | 
 6 | 
 7 | ### TAMP
 8 | 
 9 | [2023] [Sequence-Based Plan Feasibility Prediction for Efficient Task and Motion Planning](https://arxiv.org/abs/2211.01576)
10 | 
11 | [2024] [Guiding Long-Horizon Task and Motion Planning with Vision Language Models](https://arxiv.org/abs/2410.02193)


--------------------------------------------------------------------------------
/papers/04-Manipulation/04-visual-rl-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Visual RL for Manipulation
2 | 
3 | [2024] [Learning to Manipulate Anywhere: A Visual Generalizable Framework For Reinforcement Learning](https://arxiv.org/abs/2407.15815v1)
4 | 
5 | [2024] [Precise and Dexterous Robotic Manipulation via Human-in-the-Loop Reinforcement Learning](https://hil-serl.github.io/)
6 | 
7 | [2024] [When Should We Prefer State-to-Visual DAgger Over Visual Reinforcement Learning?](https://arxiv.org/abs/2412.13662)
8 | 
9 | [2025] [Merging and Disentangling Views in Visual Reinforcement Learning for Robotic Manipulation](https://arxiv.org/abs/2505.04619)


--------------------------------------------------------------------------------
/papers/04-Manipulation/05-interactive-learning-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Interactive Learning for Manipulation
2 | 
3 | [2024] [Precise and Dexterous Robotic Manipulation via Human-in-the-Loop Reinforcement Learning](https://hil-serl.github.io/)
4 | 
5 | [2025] [Optimal Interactive Learning on the Job via Facility Location Planning](https://arxiv.org/abs/2505.00490)


--------------------------------------------------------------------------------
/papers/04-Manipulation/06-lifelong-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Lifelong Manipulation
2 | 
3 | [2025] [Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation](https://arxiv.org/abs/2504.00420)


--------------------------------------------------------------------------------
/papers/04-Manipulation/07-force-control-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Force Control for Manipulation
2 | 
3 | [2025] [GeoDEx: A Unified Geometric Framework for Tactile Dexterous and Extrinsic Manipulation under Force Uncertainty](https://arxiv.org/abs/2505.00647)


--------------------------------------------------------------------------------
/papers/04-Manipulation/08-data-augmentation.md:
--------------------------------------------------------------------------------
1 | ## Data Augmentation
2 | 
3 | [2024] [Causal Action Influence Aware Counterfactual Data Augmentation](https://arxiv.org/abs/2405.18917)
4 | 
5 | [2024] [View-Invariant Policy Learning via Zero-Shot Novel View Synthesis](https://arxiv.org/abs/2409.03685)
6 | 
7 | [2025] [RoboEngine: Plug-and-Play Robot Data Augmentation with Semantic Robot Segmentation and Background Generation](https://arxiv.org/abs/2503.18738)


--------------------------------------------------------------------------------
/papers/04-Manipulation/09-hardware-design.md:
--------------------------------------------------------------------------------
1 | ## Hardware Design
2 | 
3 | [2025] [RoboPanoptes: The All-seeing Robot with Whole-body Dexterity](https://arxiv.org/abs/2501.05420)


--------------------------------------------------------------------------------
/papers/04-Manipulation/base-model-for-manipulation/diffusion-model-for-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Diffusion Model for Manipulation
 2 | 
 3 | [2022] [Planning with Diffusion for Flexible Behavior Synthesis](https://arxiv.org/abs/2205.09991)
 4 | 
 5 | [2023] [Diffusion Policy: Visuomotor Policy Learning via Action Diffusion](https://arxiv.org/abs/2303.04137)
 6 | 
 7 | [2024] [3D Diffusion Policy: Generalizable Visuomotor Policy Learning via Simple 3D Representations](https://arxiv.org/abs/2403.03954)
 8 | 
 9 | [2024] [Consistency Policy: Accelerated Visuomotor Policies via Consistency Distillation](https://arxiv.org/abs/2405.07503)
10 | 
11 | [2024] [Multimodal Diffusion Transformer: Learning Versatile Behavior from Multimodal Goals](https://intuitive-robots.github.io/mdt_policy/)
12 | 
13 | [2024] [Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation](https://arxiv.org/abs/2403.03890)
14 | 
15 | [2024] [Variational Distillation of Diffusion Policies into Mixture of Experts](https://arxiv.org/abs/2406.12538)
16 | 
17 | [2024] [EquiBot: SIM(3)-Equivariant Diffusion Policy for Generalizable and Data Efficient Learning](https://arxiv.org/abs/2407.01479)
18 | 
19 | [2024] [Precise Pick-and-Place using Score-Based Diffusion Networks](https://arxiv.org/abs/2409.09725)
20 | 
21 | [2024] [Diff-Control: A Stateful Diffusion-based Policy for Imitation Learning](https://diff-control.github.io/)
22 | 
23 | [2024] [ET-SEED: Efficient Trajectory-Level SE(3) Equivariant Diffusion Policy](https://arxiv.org/abs/2411.03990)


--------------------------------------------------------------------------------
/papers/04-Manipulation/base-model-for-manipulation/dit-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## DiTs for Manipulation
2 | 
3 | [2024] [The Ingredients for Robotic Diffusion Transformers](https://arxiv.org/abs/2410.10088)


--------------------------------------------------------------------------------
/papers/04-Manipulation/base-model-for-manipulation/flow-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Flow Model for Manipulation
2 | 
3 | [2024] [Learning Robotic Manipulation Policies from Point Clouds with Conditional Flow Matching](https://arxiv.org/abs/2409.07343)


--------------------------------------------------------------------------------
/papers/04-Manipulation/base-model-for-manipulation/generative-models-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Generative Models for Manipulation
2 | 
3 | [2024] [G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation](https://arxiv.org/abs/2411.18369)


--------------------------------------------------------------------------------
/papers/04-Manipulation/base-model-for-manipulation/mamba-for-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Mamba for Manipulation
2 | 
3 | [2024] [RoboMamba: Multimodal State Space Model for Efficient Robot Reasoning and Manipulation](https://arxiv.org/abs/2406.04339)


--------------------------------------------------------------------------------
/papers/04-Manipulation/bimanual/bimanual-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Bimanual Manipulation
 2 | 
 3 | [2017] [Dex-Net 2.0: Deep Learning to Plan Robust Grasps with Synthetic Point Clouds and Analytic Grasp Metrics](https://arxiv.org/abs/1703.09312)
 4 | 
 5 | [2020] [Deep Imitation Learning for Bimanual Robotic Manipulation](https://arxiv.org/abs/2010.05134)
 6 | 
 7 | [2022] [Evo-NeRF: Evolving NeRF for Sequential Robot Grasping of Transparent Objects](https://openreview.net/pdf?id=Bxr45keYrf) 
 8 | 
 9 | [2024] [Yell At Your Robot: Improving On-the-Fly from Language Corrections](https://arxiv.org/abs/2403.12910)
10 | 
11 | [2024] [Learning Visuotactile Skills with Two Multifingered Hands](https://arxiv.org/abs/2404.16823)
12 | 
13 | [2024] [Bi-KVIL: Keypoints-based Visual Imitation Learning of Bimanual Manipulation Tasks](https://arxiv.org/abs/2403.03270)
14 | 
15 | [2024] [BiKC: Keypose-Conditioned Consistency Policy for Bimanual Robotic Manipulation](https://arxiv.org/abs/2406.10093)
16 | 
17 | [2024] [Open-TeleVision: Teleoperation with Immersive Active Visual Feedback](https://arxiv.org/abs/2407.01512)
18 | 
19 | [2024] [ScrewMimic: Bimanual Imitation from Human Videos with Screw Space Projection](https://arxiv.org/abs/2405.03666)
20 | 
21 | [2024] [Robot See Robot Do: Imitating Articulated Object Manipulation with Monocular 4D Reconstruction](https://arxiv.org/abs/2409.18121)
22 | 
23 | [2025] [FACTR: Force-Attending Curriculum Training for Contact-Rich Policy Learning](https://arxiv.org/abs/2502.17432)
24 | 
25 | [2025] [Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation](https://arxiv.org/abs/2503.10743)
26 | 
27 | [2025] [Learning Coordinated Bimanual Manipulation Policies using State Diffusion and Inverse Dynamics Models](https://arxiv.org/abs/2503.23271)
28 | 
29 | 
30 | 
31 | ### ALOHAs
32 | 
33 | [2023] [Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware](https://arxiv.org/abs/2304.13705)
34 | 
35 | [2024] [Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation](https://arxiv.org/abs/2401.02117)
36 | 
37 | [2024] [ALOHA 2: An Enhanced Low-Cost Hardware for Bimanual Teleoperation](https://aloha-2.github.io/)
38 | 
39 | [2024] [InterACT: Inter-dependency Aware Action Chunking with Hierarchical Attention Transformers for Bimanual Manipulation](https://arxiv.org/abs/2409.07914)
40 | 
41 | 
42 | 
43 | ### Flow and Imitation Learning
44 | 
45 | [2024] [Flow Matching Imitation Learning for Multi-Support Manipulation](https://arxiv.org/abs/2407.12381)


--------------------------------------------------------------------------------
/papers/04-Manipulation/bimanual/data-collecting.md:
--------------------------------------------------------------------------------
1 | ## Data Collecting
2 | 
3 | [2024] [Universal Manipulation Interface: In-The-Wild Robot Teaching Without In-The-Wild Robots](https://arxiv.org/abs/2402.10329)
4 | 
5 | [2025] [ViTaMIn: Learning Contact-Rich Tasks Through Robot-Free Visuo-Tactile Manipulation Interface](https://arxiv.org/abs/2504.06156)


--------------------------------------------------------------------------------
/papers/04-Manipulation/bimanual/data-generation.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Evan-wyl/robotlearning/44a54f4dc66b12455ed40c36d319a85aae09e2f6/papers/04-Manipulation/bimanual/data-generation.md


--------------------------------------------------------------------------------
/papers/04-Manipulation/bimanual/dexterous-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Dexterous Manipulation
2 | 
3 | [2025] [ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning](https://arxiv.org/abs/2503.21860)


--------------------------------------------------------------------------------
/papers/04-Manipulation/bimanual/mobile-bimanual.md:
--------------------------------------------------------------------------------
1 | ## Bimanual Mobile Manipulation
2 | 
3 | ### Manipulator
4 | 
5 | [2025] [AhaRobot: A Low-Cost Open-Source Bimanual Mobile Manipulator for Embodied AI](https://arxiv.org/abs/2503.10070)
6 | 
7 | [2025] [BEHAVIOR Robot Suite: Streamlining Real-World Whole-Body Manipulation for Everyday Household Activities](https://arxiv.org/abs/2503.05652)
8 | 
9 | [2025] [AgiBot World Colosseo: A Large-scale Manipulation Platform for Scalable and Intelligent Embodied Systems](https://arxiv.org/abs/2503.06669)


--------------------------------------------------------------------------------
/papers/04-Manipulation/dexterous/dexterous-hand-grasping.md:
--------------------------------------------------------------------------------
 1 | ## Dexterous Hand Grasping
 2 | 
 3 | [2023] [Dexterous Functional Grasping](https://arxiv.org/abs/2312.02975)
 4 | 
 5 | [2023] [UniDexGrasp++: Improving Dexterous Grasping Policy Learning via Geometry-aware Curriculum and Iterative Generalist-Specialist Learning](https://arxiv.org/abs/2304.00464)
 6 | 
 7 | [2024] [GenDexGrasp: Generalizable Dexterous Grasping](https://arxiv.org/abs/2210.00722)
 8 | 
 9 | [2024] [FFHFlow: A Flow-based Variational Approach for Multi-fingered Grasp Synthesis in Real Time](https://arxiv.org/abs/2407.15002)
10 | 
11 | [2024] [A Surprisingly Efficient Representation for Multi-Finger Grasping](https://arxiv.org/abs/2408.02455)
12 | 
13 | [2025] [AnyDexGrasp: General Dexterous Grasping for Different Hands with Human-level Learning Efficiency](https://arxiv.org/abs/2502.16420)
14 | 
15 | [2025] [Multi-Keypoint Affordance Representation for Functional Dexterous Grasping](https://arxiv.org/abs/2502.20018)
16 | 
17 | [2025] [DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness](https://arxiv.org/abs/2503.08257)
18 | 
19 | [2025] [RobustDexGrasp: Robust Dexterous Grasping of General Objects from Single-view Perception](https://arxiv.org/abs/2504.05287)


--------------------------------------------------------------------------------
/papers/04-Manipulation/dexterous/dexterous-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Dexterous Manipulation
 2 | 
 3 | [2022] [Learning Continuous Grasping Function with a Dexterous Hand from Human Demonstrations](https://arxiv.org/abs/2207.05053)
 4 | 
 5 | [2023] [Teach a Robot to FISH: Versatile Imitation from One Minute of Demonstrations](https://arxiv.org/abs/2303.01497)
 6 | 
 7 | [2023] [Learning Dexterous Manipulation from Exemplar Object Trajectories and Pre-Grasps](https://arxiv.org/abs/2209.11221)
 8 | 
 9 | [2023] [Learning a Universal Human Prior for Dexterous Manipulation from Human Preference](https://arxiv.org/abs/2304.04602)
10 | 
11 | [2023] [RoboPianist: Dexterous Piano Playing with Deep Reinforcement Learning](https://arxiv.org/abs/2304.04150)
12 | 
13 | [2024] [SparseDFF: Sparse-View Feature Distillation for One-Shot Dexterous Manipulation](https://arxiv.org/abs/2310.16838)
14 | 
15 | [2024] [Stable Tool-Use with Flexible Musculoskeletal Hands by Learning the Predictive Model of Sensor State Transition](https://arxiv.org/abs/2406.17136)
16 | 
17 | [2024] [GET-Zero: Graph Embodiment Transformer for Zero-shot Embodiment Generalization](https://arxiv.org/abs/2407.15002)
18 | 
19 | [2024] [DexForce: Extracting Force-informed Actions from Kinesthetic Demonstrations for Dexterous Manipulation](https://arxiv.org/abs/2501.10356)
20 | 
21 | [2025] [DexterityGen: Foundation Controller for Unprecedented Dexterity](https://arxiv.org/abs/2502.04307)
22 | 
23 | [2025] [DexTrack: Towards Generalizable Neural Tracking Control for Dexterous Manipulation from Human References](https://arxiv.org/abs/2502.09614)
24 | 
25 | 
26 | 
27 | ### Reinforcement Learning
28 | 
29 | [2025] [DexterityGen: Foundation Controller for Unprecedented Dexterity](https://arxiv.org/abs/2502.04307)
30 | 


--------------------------------------------------------------------------------
/papers/04-Manipulation/dexterous/dexterous-teleoperation.md:
--------------------------------------------------------------------------------
 1 | ## Dexterous Teleoperation
 2 | 
 3 | [2019] [DexPilot: Vision Based Teleoperation of Dexterous Robotic Hand-Arm System](https://arxiv.org/abs/1910.03135)
 4 | 
 5 | [2024] [ACE: A Cross-Platform Visual-Exoskeletons System for Low-Cost Dexterous Teleoperation](https://arxiv.org/abs/2408.11805)
 6 | 
 7 | [2024] [DexCap: Scalable and Portable Mocap Data Collection System for Dexterous Manipulation](https://arxiv.org/abs/2403.07788)
 8 | 
 9 | [2024] [ResPilot: Teleoperated Finger Gaiting via Gaussian Process Residual Learning](https://arxiv.org/abs/2409.09140)
10 | 
11 | [2025] [AMO: Adaptive Motion Optimization for Hyper-Dexterous Humanoid Whole-Body Control](https://arxiv.org/abs/2505.03738)


--------------------------------------------------------------------------------
/papers/04-Manipulation/dexterous/force-control.md:
--------------------------------------------------------------------------------
1 | ## Force Control
2 | 
3 | [2025] [ForceGrip: Data-Free Curriculum Learning for Realistic Grip Force Control in VR Hand Manipulation](https://arxiv.org/abs/2503.08061)


--------------------------------------------------------------------------------
/papers/04-Manipulation/dexterous/in-hand-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## In-Hand Manipulation
 2 | 
 3 | [2025] [Variable-Friction In-Hand Manipulation for Arbitrary Objects via Diffusion-Based Imitation Learning](https://arxiv.org/abs/2503.02738)
 4 | 
 5 | 
 6 | 
 7 | ### In-Hand Reorientation
 8 | 
 9 | [2022] [In-Hand Object Rotation via Rapid Motor Adaptation](https://arxiv.org/abs/2210.04887)
10 | 
11 | [2025] [From Simple to Complex Skills: The Case of In-Hand Object Reorientation](https://arxiv.org/abs/2501.05439)


--------------------------------------------------------------------------------
/papers/04-Manipulation/perception/3d-visual-representation.md:
--------------------------------------------------------------------------------
1 | ## 3D Vision Representation
2 | 
3 | [2024] [SUGAR: Pre-training 3D Visual Representations for Robotics](https://arxiv.org/abs/2404.01491)
4 | 
5 | [2024] [RAM: Retrieval-Based Affordance Transfer for Generalizable Zero-Shot Robotic Manipulation](https://arxiv.org/abs/2407.04689)


--------------------------------------------------------------------------------
/papers/04-Manipulation/perception/pose-estimation.md:
--------------------------------------------------------------------------------
1 | ## Pose Estimation
2 | 
3 | [2025] [6D Object Pose Tracking in Internet Videos for Robotic Manipulation](https://arxiv.org/abs/2503.10307)


--------------------------------------------------------------------------------
/papers/04-Manipulation/perception/representation-learning.md:
--------------------------------------------------------------------------------
 1 | ## Representation Learning for Manipulation
 2 | 
 3 | [2024] [QueST: Self-Supervised Skill Abstractions for Learning Continuous Control](https://arxiv.org/abs/2407.15840)
 4 | 
 5 | 
 6 | 
 7 | ### Vision Representation
 8 | 
 9 | [2022] [R3M: A Universal Visual Representation for Robot Manipulation](https://arxiv.org/abs/2203.12601)
10 | 
11 | [2023] [Language-Driven Representation Learning for Robotics](https://arxiv.org/abs/2302.12766)
12 | 
13 | [2023] [Inverse Dynamics Pretraining Learns Good Representations for Multitask Imitation](https://arxiv.org/abs/2305.16985)
14 | 
15 | [2024] [Recasting Generic Pretrained Vision Transformers As Object-Centric Scene Encoders For Manipulation Policies](https://arxiv.org/abs/2405.15916)
16 | 
17 | [2024] [Learning Manipulation by Predicting Interaction](https://arxiv.org/abs/2406.00439)
18 | 
19 | [2024] [Adapting Pretrained ViTs with Convolution Injector for Visuo-Motor Control](https://arxiv.org/abs/2406.06072)
20 | 
21 | [2025] [Bridging the Sim2Real Gap: Vision Encoder Pre-Training for Visuomotor Policy Transfer](https://arxiv.org/abs/2501.16389)
22 | 
23 | 
24 | 
25 | ### Dense Correspondence Learning
26 | 
27 | [2018] [Dense Object Nets: Learning Dense Visual Object Descriptors By and For Robotic Manipulation](https://arxiv.org/abs/1806.08756)
28 | 
29 | [2024] [UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence](https://arxiv.org/abs/2405.06903)
30 | 
31 | 
32 | 
33 | ### Affordance
34 | 
35 | [2023] [Affordances from Human Videos as a Versatile Representation for Robotics](https://arxiv.org/abs/2304.08488)
36 | 
37 | [2023] [Learning Foresightful Dense Visual Affordance for Deformable Object Manipulation](https://arxiv.org/abs/2303.11057)
38 | 
39 | [2024] [Learning Precise Affordances from Egocentric Videos for Robotic Manipulation](https://arxiv.org/abs/2408.10123)


--------------------------------------------------------------------------------
/papers/04-Manipulation/perception/tactile-representation.md:
--------------------------------------------------------------------------------
1 | ## Tactile Representation
2 | 
3 | [2024] [UniT: Unified Tactile Representation for Robot Learning](https://arxiv.org/abs/2408.06481)


--------------------------------------------------------------------------------
/papers/04-Manipulation/tasks/deformable-object-manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Deformable Object Manipulation
 2 | 
 3 | ### Survey
 4 | 
 5 | [2023] [A Survey on Robotic Manipulation of Deformable Objects: Recent Advances, Open Challenges and New Frontiers](https://arxiv.org/abs/2312.10419v1)
 6 | 
 7 | 
 8 | 
 9 | ### Method
10 | 
11 | [2024] [DeformPAM: Data-Efficient Learning for Long-horizon Deformable Object Manipulation via Preference-based Action Alignment](https://arxiv.org/abs/2410.11584)
12 | 
13 | [2025] [One-Shot Affordance Grounding of Deformable Objects in Egocentric Organizing Scenes](https://www.arxiv.org/abs/2503.01092)
14 | 
15 | 
16 | 
17 | ### Representation Learning
18 | 
19 | [2023] [Learning Foresightful Dense Visual Affordance for Deformable Object Manipulation](https://arxiv.org/abs/2303.11057)
20 | 
21 | [2024] [DeformGS: Scene Flow in Highly Deformable Scenes for Deformable Object Manipulation](https://deformgs.github.io/)
22 | 
23 | [2024] [UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence](https://arxiv.org/abs/2405.06903)
24 | 
25 | 
26 | 
27 | ### Unfolding
28 | 
29 | [2024] [SIS: Seam-Informed Strategy for T-shirt Unfolding](https://arxiv.org/abs/2409.06990)
30 | 
31 | 
32 | 
33 | ### Benchmark
34 | 
35 | [2024] [GarmentLab: A Unified Simulation and Benchmark for Garment Manipulation](https://arxiv.org/abs/2411.01200)


--------------------------------------------------------------------------------
/papers/04-Manipulation/tasks/grasping.md:
--------------------------------------------------------------------------------
 1 | ## Grasping Manipulation
 2 | 
 3 | [2025] [ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping](https://arxiv.org/abs/2504.10857)
 4 | 
 5 | 
 6 | 
 7 | ### Dexterous
 8 | 
 9 | [2024] [DexGANGrasp: Dexterous Generative Adversarial Grasping Synthesis for Task-Oriented Manipulation](https://arxiv.org/abs/2407.17348)
10 | 
11 | [2024] [LAC-Net: Linear-Fusion Attention-Guided Convolutional Network for Accurate Robotic Grasping Under the Occlusion](https://arxiv.org/abs/2408.03238)
12 | 
13 | [2024] [Adapting Skills to Novel Grasps: A Self-Supervised Approach](https://arxiv.org/abs/2408.00178)
14 | 
15 | [2024] [Target-Oriented Object Grasping via Multimodal Human Guidance](https://arxiv.org/abs/2408.11138)
16 | 
17 | [2024] [FunGrasp: Functional Grasping for Diverse Dexterous Hands](https://arxiv.org/abs/2411.16755)
18 | 
19 | [2024] [UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping](https://arxiv.org/abs/2412.02699)


--------------------------------------------------------------------------------
/papers/04-Manipulation/tasks/long-horizon-manipulation.md:
--------------------------------------------------------------------------------
1 | ## Long Horizon Manipulation
2 | 
3 | [2021] [Adversarial Skill Chaining for Long-Horizon Robot Manipulation via Terminal State Regularization](https://arxiv.org/abs/2111.07999)


--------------------------------------------------------------------------------
/papers/04-Manipulation/tasks/packing.md:
--------------------------------------------------------------------------------
1 | ## Packing
2 | 
3 | [2024] [Learning Physically Realizable Skills for Online Packing of General 3D Shapes](https://arxiv.org/abs/2212.02094)


--------------------------------------------------------------------------------
/papers/04-Manipulation/tasks/planning.md:
--------------------------------------------------------------------------------
1 | ## Planning
2 | 
3 | [2022] [Planning with Diffusion for Flexible Behavior Synthesis](https://arxiv.org/abs/2205.09991)
4 | 
5 | [2024] [Language-Augmented Symbolic Planner for Open-World Task Planning](https://arxiv.org/abs/2407.09792)
6 | 
7 | [2024] [Configuration Space Distance Fields for Manipulation Planning](https://arxiv.org/abs/2406.01137)
8 | 
9 | [2024] [Neural MP: A Generalist Neural Motion Planner](https://arxiv.org/abs/2409.05864)


--------------------------------------------------------------------------------
/papers/04-Manipulation/tasks/task-motion-planning.md:
--------------------------------------------------------------------------------
1 | ## Task and Motion Planning
2 | 
3 | [2025] [Meta-Optimization and Program Search using Language Models for Task and Motion Planning](https://arxiv.org/abs/2505.03725)


--------------------------------------------------------------------------------
/papers/04-Manipulation/tasks/tool-usage.md:
--------------------------------------------------------------------------------
1 | ## Tool Usage
2 | 
3 | [2025] [Tool-as-Interface: Learning Robot Policies from Human Tool Usage through Imitation Learning](https://arxiv.org/abs/2504.04612)


--------------------------------------------------------------------------------
/papers/05-Multi-Embodiment-Learning/locomotion.md:
--------------------------------------------------------------------------------
1 | ## Multi-embodiment Learning for Locomotion
2 | 
3 | [2024] [One Policy to Run Them All: an End-to-end Learning Approach to Multi-Embodiment Locomotion](https://arxiv.org/abs/2409.06366)


--------------------------------------------------------------------------------
/papers/05-Multi-Embodiment-Learning/manipulation.md:
--------------------------------------------------------------------------------
 1 | ## Multi-embodiment Learning for Manipulation
 2 | 
 3 | [2024] [General Flow as Foundation Affordance for Scalable Robot Learning](https://arxiv.org/abs/2401.11439)
 4 | 
 5 | [2024] [Scaling Proprioceptive-Visual Learning with Heterogeneous Pre-trained Transformers](https://arxiv.org/abs/2409.20537)
 6 | 
 7 | [2024] [QueST: Self-Supervised Skill Abstractions for Learning Continuous Control](https://arxiv.org/abs/2407.15840)
 8 | 
 9 | [2024] [LEGATO: Cross-Embodiment Imitation Using a Grasping Tool](https://arxiv.org/abs/2411.03682)
10 | 
11 | [2025] [Shadow: Leveraging Segmentation Masks for Cross-Embodiment Policy Transfer](https://arxiv.org/abs/2503.00774)
12 | 


--------------------------------------------------------------------------------
/papers/05-Multi-Embodiment-Learning/representation-learning.md:
--------------------------------------------------------------------------------
1 | ## Representation Learning
2 | 
3 | [2024] [Meta-Controller: Few-Shot Imitation of Unseen Embodiments and Tasks in Continuous Control](https://arxiv.org/abs/2412.12147)


--------------------------------------------------------------------------------
/papers/06-Sim-to-Real/imitation-gap.md:
--------------------------------------------------------------------------------
1 | ## Imitation GAP
2 | 
3 | [2020] [Bridging the Imitation Gap by Adaptive Insubordination](https://arxiv.org/abs/2007.12173)


--------------------------------------------------------------------------------
/papers/06-Sim-to-Real/transfer-method/adaption-method.md:
--------------------------------------------------------------------------------
 1 | ## Adaption Method
 2 | 
 3 | ### Domain Adaption
 4 | 
 5 | [2017] [Few-Shot Adversarial Domain Adaptation](https://arxiv.org/abs/1711.02536)
 6 | 
 7 | [2017] [Adversarial Discriminative Domain Adaptation](https://arxiv.org/abs/1702.05464)
 8 | 
 9 | [2019] [Active Domain Randomization](https://arxiv.org/abs/1904.04762)
10 | 
11 | [2024] [Domain Adaptation of Visual Policies with a Single Demonstration](https://arxiv.org/abs/2407.16820)
12 | 
13 | 
14 | 
15 | ### Rapid Motor Adaption
16 | 
17 | [2021] [RMA: Rapid Motor Adaptation for Legged Robots](https://arxiv.org/abs/2107.04034)
18 | 
19 | [2021] [Learning to Jump from Pixels](https://arxiv.org/abs/2110.15344)
20 | 
21 | [2020] [Learning Quadrupedal Locomotion over Challenging Terrain](https://arxiv.org/abs/2010.11251)
22 | 
23 | [2022] [Learning robust perceptive locomotion for quadrupedal robots in the wild](https://arxiv.org/abs/2201.08117)
24 | 
25 | 
26 | 
27 | ### Regularized Online Adaptation
28 | 
29 | [2022] [Deep Whole-Body Control: Learning a Unified Policy for Manipulation and Locomotion](https://arxiv.org/abs/2210.10044)
30 | 


--------------------------------------------------------------------------------
/papers/06-Sim-to-Real/transfer-method/advanced-method.md:
--------------------------------------------------------------------------------
 1 | ## Advanced Method
 2 | 
 3 | ### LLM-Guided Transfer
 4 | 
 5 | [2023] [Language to Rewards for Robotic Skill Synthesis](https://arxiv.org/abs/2306.08647)
 6 | 
 7 | [2023] [Text2Reward: Automated Dense Reward Function Generation for Reinforcement Learning](https://arxiv.org/abs/2309.11489)
 8 | 
 9 | [2023] [Eureka: Human-Level Reward Design via Coding Large Language Models](https://arxiv.org/abs/2310.12931)
10 | 
11 | [2024] [DrEureka: Language Model Guided Sim-To-Real Transfer](https://eureka-research.github.io/dr-eureka/)
12 | 
13 | [2024] [Natural Language Can Help Bridge the Sim2Real Gap](https://arxiv.org/abs/2405.10020)
14 | 
15 | 
16 | 
17 | ### Curriculum Learning
18 | 
19 | [2020] [Self-Paced Deep Reinforcement Learning](https://arxiv.org/abs/2004.11812)
20 | 
21 | [2022] [Curriculum reinforcement learning via constrained optimal transport](https://proceedings.mlr.press/v162/klink22a/klink22a.pdf)
22 | 
23 | [2022] [Curriculum Reinforcement Learning using Optimal Transport via Gradual Domain Adaptation](https://arxiv.org/abs/2210.10195)
24 | 
25 | [2023] [Outcome-directed Reinforcement Learning by Uncertainty & Temporal Distance-Aware Curriculum Goal Generation](https://arxiv.org/abs/2301.11741)
26 | 
27 | [2023] [Reward-Machine-Guided, Self-Paced Reinforcement Learning](https://arxiv.org/abs/2305.16505)
28 | 
29 | 
30 | 
31 | ### System Identification
32 | 
33 | [2017] [Fast Model Identification via Physics Engines for Data-Efficient Policy Search](https://arxiv.org/abs/1710.08893)
34 | 
35 | [2024] [Dynamics as Prompts: In-Context Learning for Sim-to-Real System Identifications](https://arxiv.org/abs/2410.20357)
36 | 
37 | 
38 | 
39 | ### Evolutionary Method
40 | 
41 | [2024] [Enabling Adaptive Agent Training in Open-Ended Simulators by Targeting Diversity](https://arxiv.org/abs/2411.04466)
42 | 
43 | 
44 | 
45 | ### NeRF2Real
46 | 
47 | [2023] [NeRF2Real: Sim2real Transfer of Vision-guided Bipedal Motion Skills using Neural Radiance Fields](https://arxiv.org/abs/2210.04932)


--------------------------------------------------------------------------------
/papers/06-Sim-to-Real/transfer-method/randomization-menthod.md:
--------------------------------------------------------------------------------
 1 | ## Randomization Method
 2 | 
 3 | ### Randomization
 4 | 
 5 | [2022] [Analysis of Randomization Effects on Sim2Real Transfer in Reinforcement Learning for Robotic Manipulation Tasks](https://arxiv.org/abs/2206.06282)
 6 | 
 7 | [2024] [Overcoming the Sim-to-Real Gap: Leveraging Simulation to Learn to Explore for Real-World RL](https://arxiv.org/abs/2410.20254)
 8 | 
 9 | 
10 | 
11 | ### Dynamics Randomization
12 | 
13 | [2018] [Sim-to-Real Transfer of Robotic Control with Dynamics Randomization](https://arxiv.org/abs/1710.06537)
14 | 
15 | [2020] [Dynamics Randomization Revisited:A Case Study for Quadrupedal Locomotion](https://arxiv.org/abs/2011.02404)
16 | 
17 | 
18 | 
19 | ### Domain Randomization
20 | 
21 | [2018] [Policy Transfer with Strategy Optimization](https://arxiv.org/abs/1810.05751)
22 | 
23 | [2019] [How to pick the domain randomization parameters for sim-to-real transfer of reinforcement learning policies?](https://arxiv.org/abs/1903.11774)
24 | 
25 | [2019] [Solving Rubik's Cube with a Robot Hand](https://arxiv.org/abs/1910.07113)
26 | 
27 | [2019] [Learning Domain Randomization Distributions for Training Robust Locomotion Policies](https://arxiv.org/abs/1906.00410)
28 | 
29 | [2019] [Domain Randomization and Pyramid Consistency: Simulation-to-Real Generalization without Accessing Target Domain Data](https://arxiv.org/abs/1909.00889)
30 | 
31 | [2021] [Understanding Domain Randomization for Sim-to-real Transfer](https://arxiv.org/abs/2110.03239)
32 | 
33 | [2022] [Online vs. Offline Adaptive Domain Randomization Benchmark](https://arxiv.org/abs/2206.14661)
34 | 
35 | [2023] [DROPO: Sim-to-Real Transfer with Offline Domain Randomization](https://arxiv.org/abs/2201.08434)
36 | 
37 | [2024] [Domain Randomization via Entropy Maximization](https://arxiv.org/abs/2311.01885)
38 | 
39 | [2024] [Continual Domain Randomization](https://continual-dr.github.io/)


--------------------------------------------------------------------------------
/papers/06-Sim-to-Real/transfer-method/survey.md:
--------------------------------------------------------------------------------
1 | ## Survey
2 | 
3 | [2020] [Sim-to-Real Transfer in Deep Reinforcement Learning for Robotics: a Survey](https://arxiv.org/abs/2009.13303)


--------------------------------------------------------------------------------
/papers/06-Sim-to-Real/transfer-method/system-identification.md:
--------------------------------------------------------------------------------
1 | ## System Identification
2 | 
3 | [2025] [Sampling-Based System Identification with Active Exploration for Legged Robot Sim2Real Learning](https://arxiv.org/abs/2505.14266)


--------------------------------------------------------------------------------
/papers/07-Navigation/legged-robot.md:
--------------------------------------------------------------------------------
 1 | ## Legged Robot
 2 | 
 3 | [2025] [NaVILA: Legged Robot Vision-Language-Action Model for Navigation](https://arxiv.org/abs/2412.04453)
 4 | 
 5 | 
 6 | 
 7 | ## Quraduped
 8 | 
 9 | [2025] [Learned Perceptive Forward Dynamics Model for Safe and Platform-aware Robotic Navigation](https://arxiv.org/abs/2504.19322)
10 | 


--------------------------------------------------------------------------------
/papers/07-Navigation/navigation.md:
--------------------------------------------------------------------------------
1 | ## Navigation
2 | 
3 | [2022] [LM-Nav: Robotic Navigation with Large Pre-Trained Models of Language, Vision, and Action](https://arxiv.org/abs/2207.04429)
4 | 
5 | [2024] [Mobility VLA: Multimodal Instruction Navigation with Long-Context VLMs and Topological Graphs](https://arxiv.org/abs/2407.07775)


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/behavior-foundation-model.md:
--------------------------------------------------------------------------------
 1 | ## Behavior Foundation Model
 2 | 
 3 | [2024] [Zero-Shot Whole-Body Humanoid Control via Behavioral Foundation Models](https://ai.meta.com/research/publications/zero-shot-whole-body-humanoid-control-via-behavioral-foundation-models/)
 4 | 
 5 | [2024] [MaskedMimic: Unified Physics-Based Character Control Through Masked Motion Inpainting](https://arxiv.org/abs/2409.14393)
 6 | 
 7 | [2024] [H-GAP: Humanoid Control with a Generalist Planner](https://arxiv.org/abs/2312.02682)
 8 | 
 9 | 
10 | 
11 | ### Composite Motion
12 | 
13 | [2023] [Composite Motion Learning with Task Control](*https://arxiv.org/abs/2305.03286*)
14 | 
15 | [2019] [MCP: Learning Composable Hierarchical Control with Multiplicative Compositional Policies](https://arxiv.org/abs/1905.09808)
16 | 


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/challenge-terrain-traversal.md:
--------------------------------------------------------------------------------
1 | ## Challenged Terrain Traversal
2 | 
3 | [2025] [PARC: Physics-based Augmentation with Reinforcement Learning for Character Controllers](https://arxiv.org/abs/2505.04002)


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/human-object-interaction.md:
--------------------------------------------------------------------------------
1 | ## Human Object Interaction
2 | 
3 | [2025] [InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions](https://arxiv.org/abs/2502.20390)


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/humanoid-scene-interaction.md:
--------------------------------------------------------------------------------
 1 | ## Humanoid Scene Interaction
 2 | 
 3 | [2023] [Synthesizing Physical Character-Scene Interactions](https://arxiv.org/abs/2302.00883)
 4 | 
 5 | [2023] [Trace and Pace: Controllable Pedestrian Animation via Guided Trajectory Diffusion](https://arxiv.org/abs/2304.01893)
 6 | 
 7 | [2023] [PhysHOI: Physics-Based Imitation of Dynamic Human-Object Interaction](https://arxiv.org/abs/2312.04393)
 8 | 
 9 | [2023] [Unified Human-Scene Interaction via Prompted Chain-of-Contacts](https://arxiv.org/abs/2309.07918)
10 | 
11 | [2023] [Synthesizing Physically Plausible Human Motions in 3D Scenes](https://arxiv.org/abs/2308.09036)
12 | 
13 | [2024] [PACER+: On-Demand Pedestrian Animation Controller in Driving Scenarios](https://arxiv.org/abs/2404.19722)
14 | 
15 | [2025] [Synthesizing Physically Plausible Human Motions in 3D Scenes](https://arxiv.org/abs/2308.09036)
16 | 
17 | [2025] [TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization](https://arxiv.org/abs/2503.19901)


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/motion-generation.md:
--------------------------------------------------------------------------------
 1 | ## Motion Generation
 2 | 
 3 | [2025] [Generating Physically Realistic and Directable Human Motions from Multi-Modal Inputs](https://arxiv.org/abs/2502.05641)
 4 | 
 5 | [2025] [UniPhys: Unified Planner and Controller with Diffusion for Flexible Physics-Based Character Control](https://arxiv.org/abs/2504.12540)
 6 | 
 7 | 
 8 | 
 9 | ### Text to Motion
10 | 
11 | [2022] [Human Motion Diffusion Model](https://arxiv.org/abs/2209.14916)
12 | 
13 | [2023] [OmniControl: Control Any Joint at Any Time for Human Motion Generation](https://arxiv.org/abs/2310.08580)
14 | 
15 | [2024] [SuperPADL: Scaling Language-Directed Physics-Based Control with Progressive Supervised Distillation](https://arxiv.org/abs/2407.10481)
16 | 
17 | 
18 | 
19 | ### Motion Transition
20 | 
21 | [2021] [Robust Motion In-betweening](https://arxiv.org/abs/2102.04942)
22 | 
23 | 
24 | 
25 | ### Motion Diffusion Model
26 | 
27 | [2024] [Robot Motion Diffusion Model: Motion Generation for Robotic Characters](https://la.disneyresearch.com/wp-content/uploads/RobotMDM_red.pdf)


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/motion-imitation/motion-prior.md:
--------------------------------------------------------------------------------
 1 | ## Motion Prior
 2 | 
 3 | [2024] [Grasping Diverse Objects with Simulated Humanoids](https://arxiv.org/abs/2407.11385)
 4 | 
 5 | 
 6 | 
 7 | ### Adversarial Method
 8 | 
 9 | [2021] [AMP: Adversarial Motion Priors for Stylized Physics-Based Character Control](https://arxiv.org/abs/2104.02180)
10 | 
11 | [2022]  [ASE: Large-Scale Reusable Adversarial Skill Embeddings for Physically Simulated Characters](https://arxiv.org/abs/2205.01906)
12 | 
13 | [2023] [C⋅ASE: Learning Conditional Adversarial Skill Embeddings for Physics-based Characters](https://arxiv.org/abs/2309.11351)
14 | 
15 | [2023] [CALM: Conditional Adversarial Latent Models for Directable Virtual Characters](https://arxiv.org/abs/2305.02195)
16 | 
17 | [2024] [Learning to Walk and Fly with Adversarial Motion Priors](https://arxiv.org/abs/2309.12784)
18 | 
19 | [2025] [ADD: Physics-Based Motion Imitation with Adversarial Differential Discriminators](https://arxiv.org/abs/2505.04961)
20 | 
21 | 
22 | 
23 | ### Variational Autoencoder Method
24 | 
25 | [2022] [Physics-based Character Controllers Using Conditional VAEs](https://research.facebook.com/publications/physics-based-character-controllers-using-conditional-vaes/)
26 | 
27 | [2022] [ControlVAE: Model-Based Learning of Generative Controllers for Physics-Based Characters](https://arxiv.org/abs/2210.06063)
28 | 
29 | [2022] [Leveraging Demonstrations with Latent Space Priors](https://arxiv.org/abs/2210.14685)
30 | 
31 | [2023] [Neural Categorical Priors for Physics-Based Character Control](https://arxiv.org/abs/2308.07200)
32 | 
33 | [2023] [Universal Humanoid Motion Representations for Physics-Based Control](https://arxiv.org/abs/2310.04582)
34 | 
35 | [2024] [MoConVQ: Unified Physics-Based Motion Control via Scalable Discrete Representations](https://arxiv.org/abs/2310.10198)
36 | 
37 | 
38 | 
39 | ### Versatile Motion Priors
40 | 
41 | [2024] [VMP: Versatile Motion Priors for Robustly Tracking Motion on Physical Characters](https://la.disneyresearch.com/wp-content/uploads/VMP_paper.pdf)
42 | 


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/motion-imitation/motion-tracking.md:
--------------------------------------------------------------------------------
 1 | ## Motion Tracking
 2 | 
 3 | [2018] [DeepMimic: Example-Guided Deep Reinforcement Learning of Physics-Based Character Skill](https://arxiv.org/abs/1804.02717)
 4 | 
 5 | [2018] [SFV: Reinforcement Learning of Physical Skills from Videos](https://arxiv.org/abs/1810.03599)
 6 | 
 7 | 
 8 | 
 9 | ### Adversarial Method
10 | 
11 | [2021] [AMP: Adversarial Motion Priors for Stylized Physics-Based Character Control](https://arxiv.org/abs/2104.02180)
12 | 
13 | 
14 | 
15 | ### Variational Autoencoder Method
16 | 
17 | [2024] [VMP: Versatile Motion Priors for Robustly Tracking Motion on Physical Characters](https://la.disneyresearch.com/wp-content/uploads/VMP_paper.pdf)
18 | 
19 | 
20 | 
21 | ### Diffusion Method
22 | 
23 | [2024] [PDP: Physics-Based Character Animation via Diffusion Policy](https://arxiv.org/abs/2406.00960)
24 | 
25 | [2024] [Robot Motion Diffusion Model: Motion Generation for Robotic Characters](https://la.disneyresearch.com/wp-content/uploads/RobotMDM_2.pdf)
26 | 


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/multi-objective-rl-method.md:
--------------------------------------------------------------------------------
1 | ## Multi Objective Reinforcement Learning Method
2 | 
3 | [2024] [Composite Motion Learning with Task Control](https://arxiv.org/abs/2305.03286)
4 | 
5 | [2025] [AMOR: Adaptive Character Control through Multi-Objective Reinforcement Learning](https://arxiv.org/abs/2505.23708)


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/multi-task-controller.md:
--------------------------------------------------------------------------------
1 | ## Multi-task Controller
2 | 
3 | [2023] [AdaptNet: Policy Adaptation for Physics-Based Character Control](https://arxiv.org/abs/2310.00239)
4 | 
5 | [2024] [CLoSD: Closing the Loop between Simulation and Diffusion for multi-task character control](https://arxiv.org/abs/2410.03441)
6 | 


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/realtime-controller.md:
--------------------------------------------------------------------------------
1 | ## Realtime Controller
2 | 
3 | [2023] [Perpetual Humanoid Control for Real-time Simulated Avatars](https://arxiv.org/abs/2305.06456)
4 | 
5 | [2023] [Universal Humanoid Motion Representations for Physics-Based Control](https://arxiv.org/abs/2310.04582)
6 | 
7 | [2024] [Real-Time Simulated Avatar from Head-Mounted Sensors](https://arxiv.org/abs/2403.06862)


--------------------------------------------------------------------------------
/papers/08-Physical-based-Character-Control/survey.md:
--------------------------------------------------------------------------------
1 | ## Survey
2 | 
3 | [2022] [A Survey on Reinforcement Learning Methods in Character Animation](https://arxiv.org/abs/2203.04735)


--------------------------------------------------------------------------------
/papers/09-Industrial-Application/assembly.md:
--------------------------------------------------------------------------------
1 | ## Assembly
2 | 
3 | [2024] [Blox-Net: Generative Design-for-Robot-Assembly Using VLM Supervision, Physics Simulation, and a Robot with Reset](https://arxiv.org/abs/2409.17126)
4 | 
5 | [2024] [From Imitation to Refinement -- Residual RL for Precise Assembly](https://arxiv.org/abs/2407.16677)
6 | 
7 | [2025] [Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation](https://arxiv.org/abs/2504.06961)


--------------------------------------------------------------------------------
/papers/09-Industrial-Application/cooking.md:
--------------------------------------------------------------------------------
1 | ## Cooking
2 | 
3 | [2024] [Real-World Cooking Robot System from Recipes Based on Food State Recognition Using Foundation Models and PDDL](https://arxiv.org/abs/2410.02874)


--------------------------------------------------------------------------------
/papers/10-AI-System-on-Robotics/deepmind.md:
--------------------------------------------------------------------------------
 1 | ## Deepmind
 2 | 
 3 | ### Go
 4 | 
 5 | [2016] [Mastering the game of Go with deep neural networks and tree search](https://eecs.csuohio.edu/~sschung/CIS601/Paper_AlhpaGo_deep_neural_networks_and_tree_search.pdf)
 6 | 
 7 | [2017]  [Mastering chess and shogi by self-play with a general reinforcement learning algorithm](https://arxiv.org/abs/1712.01815)
 8 | 
 9 | [2019] [Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model](https://arxiv.org/abs/1911.08265)
10 | 
11 | 
12 | 
13 | ### StarCraft
14 | 
15 | [2019] [Grandmaster level in StarCraft II using multi-agent reinforcement learning](https://www.seas.upenn.edu/~cis520/papers/RL_for_starcraft.pdf)
16 | 
17 | 
18 | 
19 | ### AlphaFold
20 | 
21 | [2020] [AlphaFold: Improved protein structure prediction using 2 potentials from deep learning](https://discovery.ucl.ac.uk/id/eprint/10089234/1/343019_3_art_0_py4t4l_convrt.pdf)
22 | 
23 | 
24 | 
25 | ### AlphaGeometry
26 | 
27 | [2024] [Solving olympiad geometry without human demonstrations](https://www.nature.com/articles/s41586-023-06747-5)
28 | 
29 | 
30 | 
31 | ### Humanoid
32 | 
33 | [2021] [From Motor Control to Team Play in Simulated Humanoid Football](https://arxiv.org/abs/2105.12196)
34 | 
35 | [2022] [Imitate and Repurpose: Learning Reusable Robot Movement Skills From Human and Animal Behaviors](https://arxiv.org/abs/2203.17138)
36 | 
37 | [2023] [NeRF2Real: Sim2real Transfer of Vision-guided Bipedal Motion Skills using Neural Radiance Fields](https://arxiv.org/abs/2210.04932)
38 | 
39 | [2024] [Learning Agile Soccer Skills for a Bipedal Robot with Deep Reinforcement Learning](https://arxiv.org/abs/2304.13653)
40 | 
41 | 
42 | 
43 | ### Table Tennis
44 | 
45 | [2020] [Robotic Table Tennis with Model-Free Reinforcement Learning](https://arxiv.org/abs/2003.14398)
46 | 
47 | [2023] [i-Sim2Real: Reinforcement Learning of Robotic Policies in Tight Human-Robot Interaction Loops](https://arxiv.org/abs/2207.06572)
48 | 
49 | [2023] [Robotic Table Tennis: A Case Study into a High Speed Learning System](https://arxiv.org/abs/2309.03315)
50 | 
51 | [2024] [Achieving Human Level Competitive Robot Table Tennis](https://www.arxiv.org/abs/2408.03906)
52 | 
53 | 
54 | 
55 | ### Large Model for Robotics
56 | 
57 | [2024] [Vision Language Models are In-Context Value Learners](https://arxiv.org/abs/2411.04549)
58 | 
59 | [2024] [STEER: Flexible Robotic Manipulation via Dense Language Grounding](https://arxiv.org/abs/2411.03409)
60 | 
61 | 
62 | 
63 | ### Video Generation
64 | 
65 | [2024] [Motion Prompting: Controlling Video Generation with Motion Trajectories](https://arxiv.org/abs/2412.02700)
66 | 
67 | 
68 | 
69 | ### Data Processing
70 | 
71 | [2025] [Robot Data Curation with Mutual Information Estimators](https://arxiv.org/abs/2502.08623)
72 | 
73 | 
74 | 
75 | ### RL
76 | 
77 | [2025] [Stop Regressing: Training Value Functions via Classification for Scalable Deep RL](https://arxiv.org/abs/2403.03950)
78 | 


--------------------------------------------------------------------------------
/papers/10-AI-System-on-Robotics/google.md:
--------------------------------------------------------------------------------
1 | ## Robotics at Google
2 | 
3 | [2023] [Deep RL at Scale: Sorting Waste in Office Buildings with a Fleet of Mobile Manipulators](https://arxiv.org/abs/2305.03270)


--------------------------------------------------------------------------------
/papers/10-AI-System-on-Robotics/intel.md:
--------------------------------------------------------------------------------
1 | ## Intel EAI Lab
2 | 
3 | ### Humanoid
4 | 
5 | [2023] [Hierarchical generative modelling for  autonomous robots](https://www.nature.com/articles/s42256-023-00752-z.pdf)


--------------------------------------------------------------------------------
/papers/10-AI-System-on-Robotics/meta.md:
--------------------------------------------------------------------------------
1 | ## Meta
2 | 
3 | [2024] [DynaMem: Online Dynamic Spatio-Semantic Memory for Open World Mobile Manipulation](https://arxiv.org/abs/2411.04999)


--------------------------------------------------------------------------------
/papers/10-AI-System-on-Robotics/nvidia.md:
--------------------------------------------------------------------------------
 1 | ## Nvidia
 2 | 
 3 | [2025] [GR00T N1: An Open Foundation Model for Generalist Humanoid Robots](https://arxiv.org/abs/2503.14734)
 4 | 
 5 | 
 6 | 
 7 | ### Cosmos
 8 | 
 9 | [2025] [Cosmos-Reason1: From Physical Common Sense To Embodied Reasoning](https://arxiv.org/abs/2503.15558)
10 | 
11 | [2025] [Cosmos-Transfer1: Conditional World Generation with Adaptive Multimodal Control](https://arxiv.org/abs/2503.14492)


--------------------------------------------------------------------------------
/papers/10-AI-System-on-Robotics/tencent.md:
--------------------------------------------------------------------------------
1 | ## Tencent
2 | 
3 | ### Quadruped
4 | 
5 | [2024] [Lifelike Agility and Play in Quadrupedal Robots using Reinforcement Learning and Generative Pre-trained Models](https://arxiv.org/abs/2308.15143)
6 | 


--------------------------------------------------------------------------------
/papers/10-AI-System-on-Robotics/unitree.md:
--------------------------------------------------------------------------------
1 | ## Unitree
2 | 
3 | ### Humanoid
4 | 
5 | [2024] [SkillMimic: Learning Reusable Basketball Skills from Demonstrations](https://arxiv.org/abs/2408.15270v1)


--------------------------------------------------------------------------------
/papers/11-Human-Robot-Interaction/handover.md:
--------------------------------------------------------------------------------
1 | ## Handover
2 | 
3 | [2025] [Learning-based Dynamic Robot-to-Human Handover](https://arxiv.org/abs/2502.12602)


--------------------------------------------------------------------------------
/papers/11-Human-Robot-Interaction/human-robot-interaction.md:
--------------------------------------------------------------------------------
 1 | ## Human Robot Interaction
 2 | 
 3 | ### Survey
 4 | 
 5 | [2024] [Advancements in Gesture Recognition Techniques and Machine Learning for Enhanced Human-Robot Interaction: A Comprehensive Review](https://arxiv.org/abs/2409.06503)
 6 | 
 7 | 
 8 | 
 9 | ### Methods
10 | 
11 | [2024] [Vocal Sandbox: Continual Learning and Adaptation for Situated Human-Robot Collaboration](https://arxiv.org/abs/2411.02599)
12 | 
13 | [2025] [FABG : End-to-end Imitation Learning for Embodied Affective Human-Robot Interaction](https://arxiv.org/abs/2503.01363)
14 | 
15 | 
16 | 
17 | ### Benchmark
18 | 
19 | [2025] [PARTNR: A Benchmark for Planning and Reasoning in Embodied Multi-agent Tasks](https://ai.meta.com/research/publications/partnr-a-benchmark-for-planning-and-reasoning-in-embodied-multi-agent-tasks/)


--------------------------------------------------------------------------------
/papers/11-Human-Robot-Interaction/humanoid-human-interaction.md:
--------------------------------------------------------------------------------
 1 | ## Humanoid Human Interaction
 2 | 
 3 | [2023] [NOIR: Neural Signal Operated Intelligent Robots for Everyday Activities](https://arxiv.org/abs/2311.01454)
 4 | 
 5 | [2025] [RHINO: Learning Real-Time Humanoid-Human-Object Interaction from Human Demonstrations](https://arxiv.org/abs/2502.13134)
 6 | 
 7 | [2025] [Joint Decision-Making in Robot Teleoperation: When are Two Heads Better Than One?](https://arxiv.org/abs/2503.15510)
 8 | 
 9 | [2025] [ImageInThat: Manipulating Images to Convey User Instructions to Robots](https://arxiv.org/abs/2503.15500)
10 | 
11 | [2025] [Think-Then-React: Towards Unconstrained Human Action-to-Reaction Generation](https://arxiv.org/abs/2503.16451)
12 | 
13 | [2025] [Enhancing Explainability with Multimodal Context Representations for Smarter Robots](https://arxiv.org/abs/2503.16467)
14 | 
15 | [2025] [H2-COMPACT: Human-Humanoid Co-Manipulation via Adaptive Contact Trajectory Policies](https://arxiv.org/abs/2505.17627)
16 | 
17 | ### Datasets
18 | 
19 | [2025] [NatSGLD: A Dataset with Speech, Gesture, Logic, and Demonstration for Robot Learning in Natural Human-Robot Interaction](https://arxiv.org/abs/2502.16718)
20 | 


--------------------------------------------------------------------------------
/papers/README.md:
--------------------------------------------------------------------------------
 1 | ## Must-read papers on Robot Learning
 2 | 
 3 | 01-Surveys: 
 4 | 
 5 | 01-Robot-Learning-Theory:
 6 | 
 7 | 02-Legged-Robot:
 8 | 
 9 | 03-Manipulation: 
10 | 
11 | 04-Multi-Embodiment-Learning: 
12 | 
13 | 05-Sim-to-Real: 
14 | 
15 | 06-Physical-based-Character-Control: 
16 | 
17 | 07-Data-Acquisition: 
18 | 
19 | 08-Industrial-Application: 
20 | 
21 | 09-AI-System-on-Robotics


--------------------------------------------------------------------------------
/workshop/CoRL/2024CoRL.md:
--------------------------------------------------------------------------------
 1 | ## WorkShop of 2024 CoRL
 2 | 
 3 | - [Language and Robot Learning Language as an Interface](https://sites.google.com/view/langrob-corl24/)
 4 | - [Learning Robot Fine and Dexterous Manipulation: Perception and Control](https://dex-manipulation.github.io/corl2024/index.html)
 5 | - [Mastering Robot Manipulation in a World of Abundant Data](https://www.dynsyslab.org/mastering-robot-manipulation-in-a-world-of-abundant-data/)
 6 | - [CoRoboLearn: Advancing Learning for Human-Centered Collaborative Robots](https://sites.google.com/view/corobolearn)
 7 | - [X-Embodiment Robot Learning](https://sites.google.com/view/xembodimentworkshop)
 8 | - [Lifelong Learning for Home Robots](https://llhomerobots.github.io/)
 9 | - [A World Built by Robots: Workshop on Learning Robotic Assembly of Industrial and Everyday Objects](https://roboassembly.github.io/)
10 | - [Morphology-Aware Policy and Design Learning Workshop](https://sites.google.com/view/corl-mapodel-workshop/home)


--------------------------------------------------------------------------------
/workshop/RSS/2025RSS.md:
--------------------------------------------------------------------------------
1 | ## WorkShop of 2025RSS
2 | 
3 | - [Workshop on Whole-body Control and Bimanual Manipulation: Applications in Humanoids and Beyond](https://wcbm-workshop.github.io/#about)


--------------------------------------------------------------------------------