├── VLA.png
├── vlas.png
├── Plot_script
    ├── .gitkeep
    ├── plots
    │   ├── .gitkeep
    │   ├── forest_plot.png
    │   ├── VLA_FEB_Hist.png
    │   ├── factor_analysis.png
    │   ├── scale_analysis_4panel.png
    │   ├── decoder_analysis_2panel.png
    │   ├── encoder_analysis_4panel.png
    │   ├── encoder_domain_faceted.png
    │   ├── merged_decoder_encoder_6panel.png
    │   ├── scale_analysis_adjusted_4panel.png
    │   ├── domain_component_analysis_4panel.png
    │   ├── vla_fusion_theory_visualization_3panel.pdf
    │   ├── vla_fusion_theory_visualization_3panel.png
    │   ├── factor_loadings.csv
    │   ├── factor_analysis.svg
    │   ├── decoder_analysis_2panel.svg
    │   ├── forest_plot.svg
    │   ├── scale_analysis_4panel.svg
    │   └── scale_analysis_adjusted_4panel.svg
    ├── requirements.txt
    ├── README.md
    ├── dataset_plot.py
    └── top75.csv
├── benchmarkdataset.png
├── dataset_plot.py
└── README.md


/VLA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/VLA.png


--------------------------------------------------------------------------------
/vlas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/vlas.png


--------------------------------------------------------------------------------
/Plot_script/.gitkeep:
--------------------------------------------------------------------------------
1 | # Placeholder file to maintain empty directory structure


--------------------------------------------------------------------------------
/Plot_script/plots/.gitkeep:
--------------------------------------------------------------------------------
1 | # Placeholder file to maintain empty directory structure


--------------------------------------------------------------------------------
/benchmarkdataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/benchmarkdataset.png


--------------------------------------------------------------------------------
/Plot_script/plots/forest_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/forest_plot.png


--------------------------------------------------------------------------------
/Plot_script/plots/VLA_FEB_Hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/VLA_FEB_Hist.png


--------------------------------------------------------------------------------
/Plot_script/plots/factor_analysis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/factor_analysis.png


--------------------------------------------------------------------------------
/Plot_script/plots/scale_analysis_4panel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/scale_analysis_4panel.png


--------------------------------------------------------------------------------
/Plot_script/plots/decoder_analysis_2panel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/decoder_analysis_2panel.png


--------------------------------------------------------------------------------
/Plot_script/plots/encoder_analysis_4panel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/encoder_analysis_4panel.png


--------------------------------------------------------------------------------
/Plot_script/plots/encoder_domain_faceted.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/encoder_domain_faceted.png


--------------------------------------------------------------------------------
/Plot_script/plots/merged_decoder_encoder_6panel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/merged_decoder_encoder_6panel.png


--------------------------------------------------------------------------------
/Plot_script/plots/scale_analysis_adjusted_4panel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/scale_analysis_adjusted_4panel.png


--------------------------------------------------------------------------------
/Plot_script/plots/domain_component_analysis_4panel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/domain_component_analysis_4panel.png


--------------------------------------------------------------------------------
/Plot_script/plots/vla_fusion_theory_visualization_3panel.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/vla_fusion_theory_visualization_3panel.pdf


--------------------------------------------------------------------------------
/Plot_script/plots/vla_fusion_theory_visualization_3panel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Muhayyuddin/VLAs/HEAD/Plot_script/plots/vla_fusion_theory_visualization_3panel.png


--------------------------------------------------------------------------------
/Plot_script/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.24.0
2 | pandas>=2.0.0
3 | matplotlib>=3.7.0
4 | seaborn>=0.12.0
5 | scipy>=1.10.0
6 | statsmodels>=0.14.0
7 | scikit-learn>=1.3.0
8 | 


--------------------------------------------------------------------------------
/Plot_script/plots/factor_loadings.csv:
--------------------------------------------------------------------------------
1 | ,Factor1_Architecture,Factor2_Scale,Factor3_Performance
2 | Fusion Depth,0.394,0.183,-0.222
3 | Vision Model Size,0.669,0.175,-0.051
4 | Language Model Size,0.706,-0.153,0.099
5 | Task Difficulty,0.148,-0.519,-0.143
6 | Sensor Modalities,0.185,0.114,0.263
7 | Dataset Size,-0.03,0.143,-0.24
8 | 


--------------------------------------------------------------------------------
/Plot_script/README.md:
--------------------------------------------------------------------------------
 1 | # VLA Models Evaluation & Visualization
 2 | 
 3 | Comprehensive analysis and visualization suite for Vision-Language-Action (VLA) models evaluation.
 4 | 
 5 | ## Quick Start
 6 | 
 7 | ### 1. Create Virtual Environment (Recommended)
 8 | ```bash
 9 | python3 -m venv .venv
10 | source .venv/bin/activate  # On Windows: .venv\Scripts\activate
11 | ```
12 | 
13 | ### 2. Install Dependencies
14 | ```bash
15 | pip install -r requirements.txt
16 | ```
17 | 
18 | ### 3. Run Analysis
19 | ```bash
20 | python final_plots.py
21 | ```
22 | 
23 | ## Files
24 | 
25 | - **`final_plots.py`** - Main script generating all visualizations
26 | - **`new_vla_models.csv`** - Dataset with 101 VLA models and evaluation metrics
27 | - **`top75.csv`** - Subset with VLA-FEB component scores (CMAS, E_fusion, R2S, GI)
28 | 
29 | ## Output
30 | 
31 | Plots are saved to:
32 | - `plots/` - Publication-ready figures (PNG/SVG/PDF)
33 | - Main plots include: forest plot, encoder analysis, domain analysis, fusion theory, VLA-FEB histogram
34 | 
35 | ## Key Metrics
36 | 
37 | - **VLA-FEB Score**: Composite metric combining Cross-Modal Alignment (CMAS), Fusion Energy (E_fusion), Real-to-Sim Transfer (R2S), and Generalization Index (GI)
38 | - **Adjusted Success**: Normalized task success rates (0-1 scale)
39 | - **Generalization Index**: Multi-task capability measure
40 | - **Difficulty Index**: Task complexity metric
41 | 
42 | ## Requirements
43 | 
44 | - Python 3.10+
45 | - See `requirements.txt` for package dependencies
46 | 


--------------------------------------------------------------------------------
/dataset_plot.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | from matplotlib.lines import Line2D
  4 | 
  5 | # 1) Define per-dataset attributes (updated with corrected values and new datasets)
  6 | # Descriptions:
  7 | # T: Number of distinct tasks / skill types (higher is broader)
  8 | # S: Scene diversity (number of unique environments)
  9 | # D: Task difficulty (normalized, 0–1, higher is more challenging)
 10 | # L: Task/episode length or complexity (normalized, higher = longer)
 11 | # M: Number of modalities (vision, lang, proprioception, depth, audio, etc.)
 12 | # Q: List of quality/success scores (per modality or overall)
 13 | # A: Average annotation or benchmark score (0–1)
 14 | # R: Real-robot validation (1 = yes, 0 = sim-only)
 15 | dataset_attrs = {
 16 |     "DROID":              {"T":10, "S":5,  "D":0.2,  "L":1.0, "M":3, "Q":[0.9,0.8,0.85],           "A":0.9,  "R":1},
 17 |     "Open X-Embodiment":  {"T":15, "S":20, "D":0.5,  "L":2.0, "M":4, "Q":[0.8,0.8,0.9,0.7],     "A":0.8,  "R":1},
 18 |     "ALFRED":             {"T":30, "S":10, "D":0.8,  "L":3.0, "M":4, "Q":[0.9,0.9,0.9,0.9],     "A":0.95, "R":1},
 19 |     "RLBench":            {"T":8,  "S":6,  "D":0.3,  "L":1.5, "M":3, "Q":[0.7,0.8,0.7],         "A":0.7,  "R":0},
 20 |     "TEACh":              {"T":12, "S":4,  "D":0.6,  "L":2.5, "M":3, "Q":[0.8,0.85,0.8],       "A":0.85, "R":0},
 21 |     "DialFRED":           {"T":25, "S":10, "D":0.75, "L":3.0, "M":4, "Q":[0.85,0.9,0.9,0.85],  "A":0.9,  "R":1},
 22 |     "EmbodiedQA":         {"T":5,  "S":2,  "D":0.1,  "L":1.0, "M":2, "Q":[0.7,0.7],             "A":0.6,  "R":0},
 23 |     "R2R":                {"T":6,  "S":3,  "D":0.2,  "L":1.2, "M":2, "Q":[0.8,0.75],            "A":0.7,  "R":0},
 24 |     "Ego4D":              {"T":20, "S":0,  "D":0.4,  "L":1.0, "M":3, "Q":[0.9,0.9,0.8],         "A":0.9,  "R":0},
 25 |     "CVDN":               {"T":15, "S":5,  "D":0.5,  "L":2.0, "M":3, "Q":[0.85,0.8,0.8],        "A":0.85, "R":0},
 26 |     "CALVIN":             {"T":35, "S":15, "D":0.9,  "L":3.5, "M":4, "Q":[0.9,0.85,0.9,0.9],   "A":0.9,  "R":1},
 27 |     "RoboSpatial":        {"T":4,  "S":1,  "D":0.1,  "L":0.5, "M":2, "Q":[0.6,0.65],           "A":0.5,  "R":0},
 28 |     "CoVLA":              {"T":18, "S":8,  "D":0.7,  "L":2.5, "M":4, "Q":[0.85,0.8,0.85,0.8], "A":0.9,  "R":1},
 29 |     "AgiBot World":       {"T":30, "S":25, "D":0.95, "L":4.0, "M":3, "Q":[0.8,0.8,0.8],       "A":0.8,  "R":1},
 30 |     "RoboData":           {"T":25, "S":12, "D":0.7,  "L":2.5, "M":4, "Q":[0.85,0.9,0.9,0.8], "A":0.9,  "R":1},
 31 |     "Interleave-VLA":     {"T":18, "S":8,  "D":0.6,  "L":2.0, "M":4, "Q":[0.8,0.85,0.8,0.75], "A":0.85, "R":1},
 32 |     "Iref-VLA":           {"T":22, "S":10, "D":0.65, "L":3.0, "M":5, "Q":[0.9,0.9,0.85,0.9,0.8], "A":0.9,  "R":1},
 33 |     "RH20T":              {"T":10, "S":4,  "D":0.3,  "L":1.5, "M":2, "Q":[0.75,0.8],           "A":0.8,  "R":0},
 34 |     "Robo360":            {"T":30, "S":15, "D":0.8,  "L":3.5, "M":5, "Q":[0.9,0.85,0.9,0.85,0.9],"A":0.95, "R":1},
 35 |     "REASSEMBLE":         {"T":28, "S":12, "D":0.7,  "L":3.0, "M":4, "Q":[0.8,0.8,0.85,0.8],   "A":0.9,  "R":1},
 36 |     "RoboCerebra":        {"T":12, "S":6,  "D":0.4,  "L":2.0, "M":3, "Q":[0.85,0.9,0.85],      "A":0.9,  "R":0},
 37 |     "TLA":                {"T":35, "S":18, "D":0.85, "L":3.8, "M":4, "Q":[0.9,0.9,0.9,0.85],  "A":0.9,  "R":1},
 38 |     "Kaiwu":              {"T":30, "S":20, "D":0.7,  "L":4.0, "M":7, "Q":[0.9]*7,               "A":0.9,  "R":1},  # Source: arXiv:2503.05231
 39 |     "RefSpatial-Bench":   {"T":2, "S":3, "D":1.0,  "L":4.0, "M":2, "Q":[0.4696, 0.0582, 0.2287, 0.2191, 0.4577, 0.47, 0.52, 0.52, 0.2421, 0.0431, 0.0927, 0.1285, 0.1474, 0.48, 0.53, 0.54], "A":0.9,  "R":1},  # Source: arXiv:2506.04308
 40 | }
 41 | 
 42 | # 2) Weights
 43 | α1, α2, α3, α4 = 1.0, 1.0, 1.0, 1.0
 44 | β1, β2, β3, β4 = 1.0, 1.0, 1.0, 1.0
 45 | 
 46 | # 3) Compute raw task & modality scores
 47 | c_task_raw = {}
 48 | c_mod_raw  = {}
 49 | for name, a in dataset_attrs.items():
 50 |     T, S, D, L = a["T"], a["S"], a["D"], a["L"]
 51 |     c_task_raw[name] = α1 * np.log1p(T) + α2 * S + α3 * D + α4 * L
 52 |     M = a["M"]
 53 |     Qm = np.mean(a["Q"])
 54 |     A, R = a["A"], a["R"]
 55 |     c_mod_raw[name]  = β1 * M + β2 * Qm + β3 * A + β4 * R
 56 | 
 57 | # 4a) Normalize task to [1,5]
 58 | def norm15(d):
 59 |     arr = np.array(list(d.values()))
 60 |     mn, mx = arr.min(), arr.max()
 61 |     return {k: 1 + 4*(v-mn)/(mx-mn) for k, v in d.items()}
 62 | 
 63 | # 4b) Normalize modality to [2,5]
 64 | def norm25(d):
 65 |     arr = np.array(list(d.values()))
 66 |     mn, mx = arr.min(), arr.max()
 67 |     return {k: 2 + 3*(v-mn)/(mx-mn) for k, v in d.items()}
 68 | 
 69 | c_task = norm15(c_task_raw)
 70 | c_mod  = norm25(c_mod_raw)
 71 | 
 72 | # 5) Point sizes by dataset scale 
 73 | raw_sizes = [
 74 |     5000, 500000, 25025, 10000, 15000, 18000, 9000, 21000, 360000,
 75 |     15000, 23000, 5000, 12000, 20000, 20000, 15000, 10000, 5000,
 76 |     25000, 18000, 12000, 22000, 8000, 11664
 77 | ]
 78 | names = list(dataset_attrs.keys())
 79 | smin, smax = min(raw_sizes), max(raw_sizes)
 80 | sizes = {n: (800*(sz-smin)/(smax-smin) + 300)*6 for n, sz in zip(names, raw_sizes)}
 81 | 
 82 | # 6) Color map
 83 | colors = plt.cm.tab20(np.linspace(0, 1, len(names)))
 84 | col_map = dict(zip(names, colors))
 85 | # override to distinguish R2R and Ego4D
 86 | col_map["R2R"] = "tab:red"
 87 | col_map["Ego4D"] = "tab:purple"
 88 | 
 89 | # 7) Plot (no text labels on bubbles)
 90 | fig, ax = plt.subplots(figsize=(12, 8))
 91 | for n in names:
 92 |     x, y = c_task[n], c_mod[n]
 93 |     ax.scatter(
 94 |         x, y,
 95 |         s=sizes[n],
 96 |         facecolor=col_map[n],
 97 |         edgecolor=col_map[n],
 98 |         alpha=0.7,
 99 |         linewidth=0.5,
100 |     )
101 | 
102 | # 8) Axes settings
103 | ax.set_xlim(0.75, 5.25)
104 | ax.set_ylim(1.75, 5.25)
105 | ax.set_title("Dataset & Benchmark Landscape", fontsize=16, weight='bold')
106 | ax.set_xlabel("Task Complexity", fontsize=12)
107 | ax.set_ylabel("Modality Richness", fontsize=12)
108 | ax.set_xticks([1, 2, 3, 4, 5])
109 | ax.set_xticklabels(["Very Low", "Low", "Medium", "High", "Very High"], rotation=25, ha="right")
110 | ax.set_yticks([2, 3, 4, 5])
111 | ax.set_yticklabels(["Minimal", "Moderate", "Rich", "Comprehensive"])
112 | ax.grid(True, linestyle='--', alpha=0.4)
113 | 
114 | # 9) Legend
115 | handles = [
116 |     Line2D([], [], marker='o', color='w',
117 |            markerfacecolor=col_map[n], markeredgecolor=col_map[n],
118 |            markersize=6, label=n, linestyle='')
119 |     for n in names
120 | ]
121 | ax.legend(handles=handles,
122 |           loc='upper center',
123 |           bbox_to_anchor=(0.5, -0.15),
124 |           ncol=4,
125 |           fontsize=8,
126 |           frameon=True)
127 | 
128 | plt.tight_layout()
129 | plt.show()
130 | 


--------------------------------------------------------------------------------
/Plot_script/dataset_plot.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | from matplotlib.lines import Line2D
  4 | 
  5 | # 1) Define per-dataset attributes (updated with corrected values and new datasets)
  6 | # Descriptions:
  7 | # T: Number of distinct tasks / skill types (higher is broader)
  8 | # S: Scene diversity (number of unique environments)
  9 | # D: Task difficulty (normalized, 0–1, higher is more challenging)
 10 | # L: Task/episode length or complexity (normalized, higher = longer)
 11 | # M: Number of modalities (vision, lang, proprioception, depth, audio, etc.)
 12 | # Q: List of quality/success scores (per modality or overall)
 13 | # A: Average annotation or benchmark score (0–1)
 14 | # R: Real-robot validation (1 = yes, 0 = sim-only)
 15 | dataset_attrs = {
 16 |     "DROID":              {"T":10, "S":5,  "D":0.2,  "L":1.0, "M":3, "Q":[0.9,0.8,0.85],           "A":0.9,  "R":1},
 17 |     "Open X-Embodiment":  {"T":15, "S":20, "D":0.5,  "L":2.0, "M":4, "Q":[0.8,0.8,0.9,0.7],     "A":0.8,  "R":1},
 18 |     "ALFRED":             {"T":30, "S":10, "D":0.8,  "L":3.0, "M":4, "Q":[0.9,0.9,0.9,0.9],     "A":0.95, "R":1},
 19 |     "RLBench":            {"T":8,  "S":6,  "D":0.3,  "L":1.5, "M":3, "Q":[0.7,0.8,0.7],         "A":0.7,  "R":0},
 20 |     "TEACh":              {"T":12, "S":4,  "D":0.6,  "L":2.5, "M":3, "Q":[0.8,0.85,0.8],       "A":0.85, "R":0},
 21 |     "DialFRED":           {"T":25, "S":10, "D":0.75, "L":3.0, "M":4, "Q":[0.85,0.9,0.9,0.85],  "A":0.9,  "R":1},
 22 |     "EmbodiedQA":         {"T":5,  "S":2,  "D":0.1,  "L":1.0, "M":2, "Q":[0.7,0.7],             "A":0.6,  "R":0},
 23 |     "R2R":                {"T":6,  "S":3,  "D":0.2,  "L":1.2, "M":2, "Q":[0.8,0.75],            "A":0.7,  "R":0},
 24 |     "Ego4D":              {"T":20, "S":0,  "D":0.4,  "L":1.0, "M":3, "Q":[0.9,0.9,0.8],         "A":0.9,  "R":0},
 25 |     "CVDN":               {"T":15, "S":5,  "D":0.5,  "L":2.0, "M":3, "Q":[0.85,0.8,0.8],        "A":0.85, "R":0},
 26 |     "CALVIN":             {"T":35, "S":15, "D":0.9,  "L":3.5, "M":4, "Q":[0.9,0.85,0.9,0.9],   "A":0.9,  "R":1},
 27 |     "RoboSpatial":        {"T":4,  "S":1,  "D":0.1,  "L":0.5, "M":2, "Q":[0.6,0.65],           "A":0.5,  "R":0},
 28 |     "CoVLA":              {"T":18, "S":8,  "D":0.7,  "L":2.5, "M":4, "Q":[0.85,0.8,0.85,0.8], "A":0.9,  "R":1},
 29 |     "AgiBot World":       {"T":30, "S":25, "D":0.95, "L":4.0, "M":3, "Q":[0.8,0.8,0.8],       "A":0.8,  "R":1},
 30 |     "RoboData":           {"T":25, "S":12, "D":0.7,  "L":2.5, "M":4, "Q":[0.85,0.9,0.9,0.8], "A":0.9,  "R":1},
 31 |     "Interleave-VLA":     {"T":18, "S":8,  "D":0.6,  "L":2.0, "M":4, "Q":[0.8,0.85,0.8,0.75], "A":0.85, "R":1},
 32 |     "Iref-VLA":           {"T":22, "S":10, "D":0.65, "L":3.0, "M":5, "Q":[0.9,0.9,0.85,0.9,0.8], "A":0.9,  "R":1},
 33 |     "RH20T":              {"T":10, "S":4,  "D":0.3,  "L":1.5, "M":2, "Q":[0.75,0.8],           "A":0.8,  "R":0},
 34 |     "Robo360":            {"T":30, "S":15, "D":0.8,  "L":3.5, "M":5, "Q":[0.9,0.85,0.9,0.85,0.9],"A":0.95, "R":1},
 35 |     "REASSEMBLE":         {"T":28, "S":12, "D":0.7,  "L":3.0, "M":4, "Q":[0.8,0.8,0.85,0.8],   "A":0.9,  "R":1},
 36 |     "RoboCerebra":        {"T":12, "S":6,  "D":0.4,  "L":2.0, "M":3, "Q":[0.85,0.9,0.85],      "A":0.9,  "R":0},
 37 |     "TLA":                {"T":35, "S":18, "D":0.85, "L":3.8, "M":4, "Q":[0.9,0.9,0.9,0.85],  "A":0.9,  "R":1},
 38 |     "Kaiwu":              {"T":30, "S":20, "D":0.7,  "L":4.0, "M":7, "Q":[0.9]*7,               "A":0.9,  "R":1},  # Source: arXiv:2503.05231
 39 |     "RefSpatial-Bench":   {"T":2, "S":3, "D":1.0,  "L":4.0, "M":2, "Q":[0.4696, 0.0582, 0.2287, 0.2191, 0.4577, 0.47, 0.52, 0.52, 0.2421, 0.0431, 0.0927, 0.1285, 0.1474, 0.48, 0.53, 0.54], "A":0.9,  "R":1},  # Source: arXiv:2506.04308
 40 | }
 41 | 
 42 | # 2) Weights
 43 | α1, α2, α3, α4 = 1.0, 1.0, 1.0, 1.0
 44 | β1, β2, β3, β4 = 1.0, 1.0, 1.0, 1.0
 45 | 
 46 | # 3) Compute raw task & modality scores
 47 | c_task_raw = {}
 48 | c_mod_raw  = {}
 49 | for name, a in dataset_attrs.items():
 50 |     T, S, D, L = a["T"], a["S"], a["D"], a["L"]
 51 |     c_task_raw[name] = α1 * np.log1p(T) + α2 * S + α3 * D + α4 * L
 52 |     M = a["M"]
 53 |     Qm = np.mean(a["Q"])
 54 |     A, R = a["A"], a["R"]
 55 |     c_mod_raw[name]  = β1 * M + β2 * Qm + β3 * A + β4 * R
 56 | 
 57 | # 4a) Normalize task to [1,5]
 58 | def norm15(d):
 59 |     arr = np.array(list(d.values()))
 60 |     mn, mx = arr.min(), arr.max()
 61 |     return {k: 1 + 4*(v-mn)/(mx-mn) for k, v in d.items()}
 62 | 
 63 | # 4b) Normalize modality to [2,5]
 64 | def norm25(d):
 65 |     arr = np.array(list(d.values()))
 66 |     mn, mx = arr.min(), arr.max()
 67 |     return {k: 2 + 3*(v-mn)/(mx-mn) for k, v in d.items()}
 68 | 
 69 | c_task = norm15(c_task_raw)
 70 | c_mod  = norm25(c_mod_raw)
 71 | 
 72 | # 5) Point sizes by dataset scale 
 73 | raw_sizes = [
 74 |     5000, 500000, 25025, 10000, 15000, 18000, 9000, 21000, 360000,
 75 |     15000, 23000, 5000, 12000, 20000, 20000, 15000, 10000, 5000,
 76 |     25000, 18000, 12000, 22000, 8000, 11664
 77 | ]
 78 | names = list(dataset_attrs.keys())
 79 | smin, smax = min(raw_sizes), max(raw_sizes)
 80 | sizes = {n: (800*(sz-smin)/(smax-smin) + 300)*6 for n, sz in zip(names, raw_sizes)}
 81 | 
 82 | # 6) Color map
 83 | colors = plt.cm.tab20(np.linspace(0, 1, len(names)))
 84 | col_map = dict(zip(names, colors))
 85 | # override to distinguish R2R and Ego4D
 86 | col_map["R2R"] = "tab:red"
 87 | col_map["Ego4D"] = "tab:purple"
 88 | 
 89 | # 7) Plot (no text labels on bubbles)
 90 | fig, ax = plt.subplots(figsize=(12, 8))
 91 | for n in names:
 92 |     x, y = c_task[n], c_mod[n]
 93 |     ax.scatter(
 94 |         x, y,
 95 |         s=sizes[n],
 96 |         facecolor=col_map[n],
 97 |         edgecolor=col_map[n],
 98 |         alpha=0.7,
 99 |         linewidth=0.5,
100 |     )
101 | 
102 | # 8) Axes settings
103 | ax.set_xlim(0.75, 5.25)
104 | ax.set_ylim(1.75, 5.25)
105 | ax.set_title("Dataset & Benchmark Landscape", fontsize=16, weight='bold')
106 | ax.set_xlabel("Task Complexity", fontsize=12)
107 | ax.set_ylabel("Modality Richness", fontsize=12)
108 | ax.set_xticks([1, 2, 3, 4, 5])
109 | ax.set_xticklabels(["Very Low", "Low", "Medium", "High", "Very High"], rotation=25, ha="right")
110 | ax.set_yticks([2, 3, 4, 5])
111 | ax.set_yticklabels(["Minimal", "Moderate", "Rich", "Comprehensive"])
112 | ax.grid(True, linestyle='--', alpha=0.4)
113 | 
114 | # 9) Legend
115 | handles = [
116 |     Line2D([], [], marker='o', color='w',
117 |            markerfacecolor=col_map[n], markeredgecolor=col_map[n],
118 |            markersize=6, label=n, linestyle='')
119 |     for n in names
120 | ]
121 | ax.legend(handles=handles,
122 |           loc='upper center',
123 |           bbox_to_anchor=(0.5, -0.15),
124 |           ncol=4,
125 |           fontsize=8,
126 |           frameon=True)
127 | 
128 | plt.tight_layout()
129 | plt.show()
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Vision–Language–Action (VLA) Models in Robotics
  2 | 
  3 | This repository was developed alongside the paper [Vision Language Action Models in Robotic Manipulation: A Systematic Review](https://muhayyuddin.github.io/VLAs/) and provides a living catalog of:
  4 | 
  5 | - **Dataset Benchmarking Code**  
  6 |   Code to benchmark the datasets based on the task complexity and modality richness.
  7 |   
  8 | - **VLA Models**  
  9 |   Key vision–language–action models that are used in the review, with links to the original papers.
 10 | - **Datasets**  
 11 |   Major benchmarks and large‑scale collections used to train and evaluate VLA systems, including QA/navigation datasets, manipulation demonstrations, and multimodal embodiment data.
 12 | - **Simulators**  
 13 |   Widely adopted simulation platforms for generating VLA data—spanning photorealistic navigation, dexterous manipulation, multi‑robot coordination, and more—each linked to its official website.
 14 | 
 15 | We aim to keep this list up to date as new VLA models, datasets, and simulation tools emerge. Contributions and pull requests adding recently published work or tooling are most welcome!  
 16 | 
 17 | ---
 18 | 
 19 | ## Table of Contents
 20 | - [Dataset Benchmarking Code](#Dataset-Benchmarking-Code)
 21 | - [VLA Models](#vla-models)  
 22 | - [Datasets](#datasets)  
 23 | - [Simulators](#simulators)
 24 | - [Reference for Citation](#reference)
 25 | 
 26 | 
 27 | 
 28 | ---
 29 | # Dataset Benchmarking Code 
 30 | Benchmarking VLA Datasets by Task Complexity and Modality Richness. Each bubble represents a VLA dataset, positioned according to its normalized task-complexity score (x-axis) and its modality-richness score (y-axis). The bubble area is proportional to the dataset scale that is number of annotated episodes or interactions. 
 31 | 
 32 | ![Dataset Benchmarking](https://github.com/Muhayyuddin/VLAs/blob/main/benchmarkdataset.png)
 33 | 
 34 | [Code](https://github.com/Muhayyuddin/VLAs/blob/main/dataset_plot.py)
 35 | # VLA Models
 36 | 
 37 | ![VLA Models Trend](https://github.com/Muhayyuddin/VLAs/blob/main/VLA.png)
 38 | The top row presents major VLA
 39 | models introduced each year, alongside their associated institutions. The bottom row
 40 | displays key datasets used to train and evaluate VLA models, grouped by release year. The figure highlights the
 41 | increasing scale and diversity of datasets and institutional involvement, with contributions from academic (e.g.,
 42 | CMU, CNRS, UC, Peking Uni) and industrial labs (e.g., Google, NVIDIA, Microsoft). This timeline highlights
 43 | the rapid advancements in VLA research.
 44 | 
 45 | Below is the list of the VLAs reviewed in the paper
 46 | 
 47 | [2022][Cliport: What and where pathways for robotic manipulation](https://proceedings.mlr.press/v164/shridhar22a/shridhar22a.pdf)  
 48 | [2022][Rt-1: Robotics transformer for real‑world control at scale](https://arxiv.org/abs/2212.06817)  
 49 | [2022][A Generalist Agent](https://arxiv.org/abs/2205.06175)  
 50 | [2022][VIMA: General Robot Manipulation with Multimodal Prompts](https://arxiv.org/abs/2210.03094)  
 51 | [2022][PERCEIVER-ACTOR:A Multi-Task Transformer for Robotic Manipulation](https://peract.github.io/paper/peract_corl2022.pdf) <br>
 52 | [2022][Do As I Can, Not As I Say: Grounding Language in Robotic Affordances](https://arxiv.org/abs/2204.01691)  
 53 | [2023][RoboAgent: Generalist Robot Agent with Semantic and Temporal Understanding](https://arxiv.org/abs/2310.08560)  
 54 | [2023][Robotic Task Generalization via Hindsight Trajectory Sketches](https://arxiv.org/abs/2311.01977)  
 55 | [2023][Learning fine‑grained bimanual manipulation with low‑cost hardware](https://arxiv.org/abs/2304.13705)  
 56 | [2023][Rt-2: Vision‑language‑action models transfer web knowledge to robotic control](Link TBD)  
 57 | [2023][Voxposer: Composable 3D value maps for robotic manipulation with language models](https://arxiv.org/abs/2307.05973)  
 58 | [2024][CLIP‑RT: Learning Language‑Conditioned Robotic Policies with Natural Language Supervision](https://arxiv.org/abs/2411.00508)  
 59 | [2023][Diffusion Policy: Visuomotor policy learning via action diffusion](https://arxiv.org/pdf/2303.04137)  
 60 | [2024][Octo: An open‑source generalist robot policy](https://arxiv.org/abs/2405.12213)  
 61 | [2024][Towards testing and evaluating vision‑language manipulation: An empirical study](https://arxiv.org/abs/2409.12894)  
 62 | [2024][NaVILA: Legged robot vision‑language‑action model for navigation](https://arxiv.org/abs/2412.04453)  
 63 | [2024][RoboNurse‑VLA: Real‑time voice‑to‑action pipeline for surgical instrument handover](https://arxiv.org/pdf/2409.19590)  
 64 | [2024][Mobility VLA: Multimodal instruction navigation with topological mapping](https://arxiv.org/pdf/2407.07775)  
 65 | [2024][ReVLA: Domain adaptation adapters for robotic foundation models](https://arxiv.org/pdf/2409.15250.pdf)  
 66 | [2024][Uni‑NaVid: Video‑based VLA unifying embodied navigation tasks](https://arxiv.org/pdf/2412.06224.pdf)  
 67 | [2024][RDT‑1B: 1.2B‑parameter diffusion foundation model for manipulation](https://arxiv.org/pdf/2410.07864.pdf)  
 68 | [2024][RoboMamba: Mamba‑based unified VLA with linear‑time inference](https://arxiv.org/pdf/2406.04339.pdf)   
 69 | [2024][Chain‑of‑Affordance: Sequential affordance reasoning for spatial planning](https://arxiv.org/pdf/2412.20451.pdf)  
 70 | [2024][Edge VLA:Self-Adapting Large Visual-Language Models to Edge Devices across Visual Modalities](https://arxiv.org/pdf/2403.04908)  
 71 | [2024][OpenVLA: LORA‑fine‑tuned open‑source VLA with high‑success transfer](https://arxiv.org/pdf/2406.09246.pdf)  
 72 | [2024][CogACT: Componentized diffusion action transformer for VLA](https://arxiv.org/pdf/2411.19650.pdf)  
 73 | [2024][ShowUI‑2B: GUI/web navigation via screenshot grounding and token selection](https://arxiv.org/pdf/2411.17465)  
 74 | [2024][HiRT: Hierarchical planning/control separation for VLA](https://arxiv.org/pdf/2410.05273)  
 75 | [2024][Pi‑0: General robot control flow model for open‑world tasks](https://arxiv.org/pdf/2410.24164.pdf) <br> 
 76 | [2024][A3VLM: Articulation‑aware affordance grounding from RGB video](https://arxiv.org/pdf/2406.07549.pdf)  
 77 | [2024][SVLR: Modular “segment‑to‑action” pipeline using visual prompt retrieval](https://arxiv.org/pdf/2502.01071.pdf)  
 78 | [2024][Bi‑VLA: Dual‑arm instruction‑to‑action planner for recipe demonstrations](https://arxiv.org/pdf/2405.06039.pdf)  
 79 | [2024][QUAR‑VLA: Quadruped‑specific VLA with adaptive gait mapping](https://arxiv.org/pdf/2312.14457.pdf)  
 80 | [2024][3D‑VLA: Integrating 3D generative diffusion heads for world reconstruction](https://arxiv.org/pdf/2403.09631)  
 81 | [2024][RoboMM: MIM‑based multimodal decoder unifying 3D perception and language](https://arxiv.org/pdf/2412.07215.pdf)  
 82 | [2025][FAST: Frequency‑space action tokenization for faster inference](https://arxiv.org/pdf/2501.09747.pdf)  
 83 | [2025][OpenVLA‑OFT: Optimized fine‑tuning of OpenVLA with parallel decoding](https://arxiv.org/pdf/2502.19645.pdf)  
 84 | [2025][CoVLA: Autonomous driving VLA trained on annotated scene data](https://arxiv.org/pdf/2408.10845.pdf)  
 85 | [2025][ORION: Holistic end‑to‑end driving VLA with semantic trajectory control](https://arxiv.org/pdf/2503.19755.pdf)  
 86 | [2025][UAV‑VLA: Zero‑shot aerial mission VLA combining satellite/UAV imagery](https://arxiv.org/pdf/2501.05014.pdf)  
 87 | [2025][Combat VLA: Ultra‑fast tactical reasoning in 3D environments](https://arxiv.org/pdf/2503.09527.pdf)  
 88 | [2025][HybridVLA: Ensemble decoding combining diffusion and autoregressive policies](https://arxiv.org/pdf/2503.10631.pdf)  
 89 | [2025][NORA: Low‑overhead VLA with integrated visual reasoning and FAST decoding](https://arxiv.org/pdf/2504.19854.pdf)  
 90 | [2025][SpatialVLA: 3D spatial encoding and adaptive action discretization](https://arxiv.org/pdf/2501.15830.pdf)  
 91 | [2025][MoLe‑VLA: Selective layer activation for faster inference](https://arxiv.org/pdf/2503.20384.pdf)  
 92 | [2025][JARVIS‑VLA: Open‑world instruction following in 3D games with keyboard/mouse](https://arxiv.org/pdf/2503.16365.pdf)  
 93 | [2025][UP‑VLA: Unified understanding and prediction model for embodied agents](https://arxiv.org/pdf/2501.18867.pdf)  
 94 | [2025][Shake‑VLA: Modular bimanual VLA for cocktail‑mixing tasks](https://arxiv.org/pdf/2501.06919.pdf)  
 95 | [2025][MORE: Scalable mixture‑of‑experts RL for VLA models](https://arxiv.org/pdf/2503.08007.pdf)  
 96 | [2025][DexGraspVLA: Diffusion‑based dexterous grasping framework](https://arxiv.org/pdf/2502.20900.pdf)  
 97 | [2025][DexVLA: Cross‑embodiment diffusion expert for rapid adaptation](https://arxiv.org/pdf/2502.05855.pdf)  
 98 | [2025][Humanoid‑VLA: Hierarchical full‑body humanoid control VLA](https://arxiv.org/pdf/2502.14795.pdf)  
 99 | [2025][ObjectVLA: End‑to‑end open‑world object manipulation](https://arxiv.org/pdf/2502.19250.pdf)  
100 | [2025][Gemini Robotics: Bringing AI into the Physical World](https://arxiv.org/pdf/2503.20020.pdf)  
101 | [2025][ECoT: Robotic Control via Embodied Chain‑of‑Thought Reasoning](https://arxiv.org/pdf/2407.08693.pdf)  
102 | [2025][OTTER: A Vision‑Language‑Action Model with Text‑Aware Visual Feature Extraction](https://arxiv.org/pdf/2503.03734.pdf)  
103 | [2025][π‑0.5: A VLA Model with Open‑World Generalization](https://arxiv.org/pdf/2504.16054.pdf)  
104 | [2025][OneTwoVLA: A Unified Model with Adaptive Reasoning](https://arxiv.org/pdf/2505.11917.pdf)  
105 | [2025][Helix: A Vision-Language-Action Model for Generalist Humanoid Control](https://www.figure.ai/news/helix)<br>
106 | [2025][SmolVLA: A Vision‑Language‑Action Model for Affordable and Efficient Robotics](https://arxiv.org/pdf/2506.01844.pdf)  
107 | [2025][EF‑VLA: Vision‑Language‑Action Early Fusion with Causal Transformers](https://openreview.net/pdf/32c153a3b16174884cf62b285adbfbdcc57b163e.pdf)  
108 | [2025][PD‑VLA: Accelerating vision‑language‑action inference via parallel decoding](https://arxiv.org/pdf/2503.02310.pdf)  
109 | [2025][LeVERB: Humanoid Whole‑Body Control via Latent Verb Generation](https://arxiv.org/pdf/2506.13751.pdf)  
110 | [2025][TLA: Tactile‑Language‑Action Model for High‑Precision Contact Tasks](https://arxiv.org/pdf/2503.08548.pdf)  
111 | [2025][Interleave‑VLA: Enhancing VLM‑LLM interleaved instruction processing](https://arxiv.org/pdf/2505.02152.pdf)  
112 | [2025][iRe‑VLA: Iterative reinforcement and supervised fine‑tuning for robust VLA](https://arxiv.org/pdf/2501.16664.pdf)  
113 | [2025][TraceVLA: Visual trace prompting for spatio‑temporal manipulation cues](https://arxiv.org/pdf/2412.10345.pdf)  
114 | [2025][OpenDrive VLA: End‑to‑End Driving with Semantic Scene Alignment](https://arxiv.org/pdf/2503.23463.pdf)  
115 | [2025][V‑JEPA 2: Dual‑Stream Video JEPA for Predictive Robotic Planning](https://arxiv.org/pdf/2506.09985.pdf)  
116 | [2025][Knowledge Insulating VLA: Insulation Layers for Modular VLA Training](https://arxiv.org/pdf/2505.23705.pdf)  
117 | [2025][GR00T N1: Diffusion Foundation Model for Humanoid Control](https://arxiv.org/pdf/2503.14734.pdf)  
118 | [2025][AgiBot World Colosseo: Unified Embodied Dataset Platform](https://arxiv.org/pdf/2503.06669.pdf)  
119 | [2025][Hi Robot: Hierarchical Planning and Control for Complex Environments](https://arxiv.org/pdf/2502.19417.pdf)  
120 | [2025][EnerVerse: World‑Model LLM for Long‑Horizon Manipulation](https://arxiv.org/pdf/2501.01895.pdf)  
121 | [2024][FLaRe: Large-Scale RL Fine-Tuning for Adaptive Robotic Policies](https://arxiv.org/pdf/2409.16578.pdf)  
122 | [2025][Beyond Sight: Sensor Fusion via Language-Grounded Attention](https://arxiv.org/pdf/2501.04693.pdf)  
123 | [2025][GeoManip: Geometric Constraint Encoding for Robust Manipulation](https://arxiv.org/pdf/2501.09783.pdf)  
124 | [2025][Universal Actions: Standardizing Action Dictionaries for Transfer](https://arxiv.org/pdf/2501.10105.pdf)  
125 | [2025][RoboHorizon: Multi-View Environment Modeling with LLM Planning](https://arxiv.org/pdf/2501.06605.pdf)  
126 | [2025][SAM2Act: Segmentation‑Augmented Memory for Object‑Centric Manipulation](https://arxiv.org/pdf/2501.18564.pdf)  
127 | [2025][VLA‑Cache: Token Caching for Efficient VLA Inference](https://arxiv.org/pdf/2502.02175.pdf)  
128 | [2025][Forethought VLA: Latent Alignment for Foresight‑Driven Policies](https://arxiv.org/pdf/2502.01828.pdf)  
129 | [2024][GRAPE: Preference‑Guided Policy Adaptation via Feedback](https://arxiv.org/pdf/2409.16578.pdf)  
130 | [2025][HAMSTER: Hierarchical Skill Decomposition for Multi‑Step Manipulation](https://arxiv.org/pdf/2502.05485.pdf)  
131 | [2025][TempoRep VLA: Successor Representation for Temporal Planning](https://arxiv.org/pdf/2507.10672v1)  
132 | [2025][ConRFT: Consistency Regularized Fine‑Tuning with Reinforcement](https://arxiv.org/pdf/2502.05450.pdf)  
133 | [2025][RoboBERT: Unified Multimodal Transformer for Manipulation](https://arxiv.org/pdf/2502.07837.pdf)  
134 | [2024][Diffusion Transformer Policy: Robust Multimodal Action Sampling](https://arxiv.org/pdf/2410.15959.pdf)  
135 | [2025][GEVRM: Generative Video Modeling for Goal‑Oriented Planning](https://arxiv.org/pdf/2502.09268.pdf)  
136 | [2025][SoFar: Successor‑Feature Orientation Representations](https://arxiv.org/pdf/2502.13143.pdf)  
137 | [2025][ARM4R: Auto‑Regressive 4D Transition Modeling for Trajectories](https://arxiv.org/pdf/2502.13142.pdf)  
138 | [2025][Magma: Foundation Multimodal Agent Model for Control](https://arxiv.org/pdf/2502.13130.pdf)  
139 | [2025][An Atomic Skill Library: Modular Skill Composition for Robotics](https://arxiv.org/pdf/2501.15068.pdf)  
140 | [2025][RoboBrain: Knowledge‑Grounded Policy Brain for Multimodal Tasks](https://arxiv.org/pdf/2502.21257.pdf)  
141 | [2025][SafeVLA: Safety‑Aware Vision‑Language‑Action Policies](https://arxiv.org/pdf/2503.03480.pdf)  
142 | [2025][CognitiveDrone: Embodied Reasoning VLA for UAV Planning](https://arxiv.org/pdf/2503.01378.pdf)  
143 | [2025][VLAS: Voice‑Driven Vision‑Language‑Action Control](https://arxiv.org/pdf/2502.13508.pdf)  
144 | [2025][ChatVLA: Conversational VLA for Interactive Control](https://arxiv.org/pdf/2502.14420.pdf)  
145 | [2024][Diffusion‑VLA: Diffusion‑Based Policy for Generalizable Manipulation](https://arxiv.org/pdf/2412.03293.pdf)  
146 | [2025][RoboRefer: Towards Spatial Referring with Reasoning in Vision-Language Models for Robotics](https://arxiv.org/pdf/2506.04308.pdf)  
147 | [2025][Cross-Platform Scaling of Vision-Language-Action Models from Edge to Cloud GPUs ](https://arxiv.org/pdf/2509.11480)  
148 | [2025][VOTE: Vision-Language-Action Optimization with Trajectory Ensemble Voting](https://arxiv.org/pdf/2507.05116)  
149 | 
150 | # Datasets
151 | [2018][EmbodiedQA: Embodied Question Answering](https://openaccess.thecvf.com/content_cvpr_2018/papers_backup/Das_Embodied_Question_Answering_CVPR_2018_paper.pdf)  
152 | [2018][R2R: Vision‑and‑Language Navigation: Interpreting Visually‑Grounded Navigation Instructions in Real Environments](https://openaccess.thecvf.com/content_cvpr_2018/papers/Anderson_Vision-and-Language_Navigation_Interpreting_CVPR_2018_paper.pdf)  
153 | [2020][ALFRED](https://arxiv.org/abs/1912.01734)  
154 | [2020][RLBench: The Robot Learning Benchmark & Learning Environment](https://arxiv.org/pdf/1909.12271.pdf)  
155 | [2019][Vision‑and‑Dialog Navigation](https://arxiv.org/abs/1907.04957)  
156 | [2021][TEACh: Task‑driven Embodied Agents that Chat](https://arxiv.org/abs/2110.00534)  
157 | [2022][DialFRED: Dialogue‑Enabled Agents for Embodied Instruction Following](https://arxiv.org/pdf/2202.13330.pdf)  
158 | [2022][Ego4D: Around the World in 3,000 Hours of Egocentric Video](https://arxiv.org/abs/2110.07058)  
159 | [2022][CALVIN: A Benchmark for Language‑Conditioned Long‑Horizon Robot Manipulation Tasks](https://arxiv.org/abs/2112.03227)  
160 | [2024][DROID: A Large‑Scale In‑The‑Wild Robot Manipulation Dataset](https://droid-dataset.github.io/)  
161 | [2025][Open X-Embodiment: Robotic Learning Datasets and RT‑X Models](https://arxiv.org/abs/2310.08864)  
162 | [2025][RoboSpatial: Teaching Spatial Understanding via Vision‑Language Models for Robotics](https://arxiv.org/abs/2411.16537)  
163 | [2024][CoVLA: Comprehensive Vision‑Language‑Action Dataset for Autonomous Driving](https://arxiv.org/abs/2408.10845)  
164 | [2025][TLA: Tactile‑Language‑Action Model for Contact‑Rich Manipulation](https://arxiv.org/abs/2503.08548)  
165 | [2023][BridgeData V2: A Dataset for Robot Learning at Scale](https://proceedings.mlr.press/v229/walke23a/walke23a.pdf)  
166 | [2023][LIBERO: Benchmarking Knowledge Transfer for Lifelong Robot Learning](https://proceedings.neurips.cc/paper_files/paper/2023/file/8c3c666820ea055a77726d66fc7d447f-Paper-Datasets_and_Benchmarks.pdf)  
167 | [2025][Kaiwu: A Multimodal Manipulation Dataset and Framework for Robotic Perception and Interaction](https://arxiv.org/abs/2503.05231)  
168 | [2025][PLAICraft: Large‑Scale Time‑Aligned Vision‑Speech‑Action Dataset for Embodied AI](https://arxiv.org/abs/2505.12707)  
169 | [2025][AgiBot World Colosseo: A Large‑Scale Manipulation Dataset for Intelligent Embodied Systems](https://arxiv.org/abs/2503.06669)  
170 | [2023][Robo360: A 3D Omnispective Multi‑Modal Robotic Manipulation Dataset](https://arxiv.org/abs/2312.06686)  
171 | [2025][REASSEMBLE: A Multimodal Dataset for Contact‑Rich Robotic Assembly and Disassembly](https://arxiv.org/abs/2502.05086)  
172 | [2025][RoboCerebra: A Large‑Scale Benchmark for Long‑Horizon Robotic Manipulation Evaluation](https://arxiv.org/abs/2506.06677)  
173 | [2025][IRef‑VLA: A Benchmark for Interactive Referential Grounding with Imperfect Language in 3D Scenes](https://arxiv.org/abs/2503.17406)  
174 | [2025][Interleave‑VLA: Enhancing Robot Manipulation with Interleaved Image‑Text Instructions](https://arxiv.org/abs/2406.07000)  
175 | [2024][RoboMM: All‑in‑One Multimodal Large Model for Robotic Manipulation](https://arxiv.org/abs/2412.07215)  
176 | [2024][All Robots in One: A New Standard and Unified Dataset for Versatile, General‑Purpose Embodied Agents](https://arxiv.org/abs/2408.10899)
177 | [2025][RoboRefer: Towards Spatial Referring with Reasoning in Vision-Language Models for Robotics](https://arxiv.org/pdf/2506.04308.pdf)  
178 | 
179 | # Simulators
180 | [2017][AI2-THOR:][AI2-THOR](https://ai2thor.allenai.org)  
181 | [2019][Habitat:][Habitat](https://aihabitat.org)  
182 | [2020][NVIDIA Isaac Sim:][NVIDIA Isaac Sim](https://developer.nvidia.com/isaac-sim)  
183 | [2004][Gazebo:][Gazebo](http://gazebosim.org)  
184 | [2016][PyBullet:][PyBullet](https://pybullet.org)  
185 | [2013][CoppeliaSim:][CoppeliaSim](https://www.coppeliarobotics.com)  
186 | [2004][Webots:][Webots](https://cyberbotics.com)  
187 | [2018][Unity ML‑Agents:][Unity ML‑Agents](https://unity-technologies.github.io/ml-agents/)  
188 | [2012][MuJoCo:][MuJoCo](https://mujoco.org)  
189 | [2020][iGibson:][iGibson](https://svl.stanford.edu/igibson)  
190 | [2023][UniSim:][UniSim](https://universal-simulator.github.io/unisim/)  
191 | [2020][SAPIEN:][SAPIEN](https://sapien.ucsd.edu) 
192 | 
193 | # Reference
194 | ```
195 | @article{din2025multimodal,
196 |   title={Multimodal Fusion with Vision-Language-Action Models for Robotic Manipulation: A Systematic Review},
197 |   author={Muhayy, Ud Din and Akram, Waseem and Saoud, Lyes Saad and Rosell, Jan and Hussain, Irfan},
198 |   journal={Information Fusion},
199 |   year={2025},
200 |   publisher={Elsevier}
201 | }
202 | ```
203 | 
204 | 
205 | 


--------------------------------------------------------------------------------
/Plot_script/plots/factor_analysis.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="685.675913pt" height="375.444406pt" viewBox="0 0 685.675913 375.444406" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-18T00:03:19.842211</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.7, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 375.444406 
 25 | L 685.675913 375.444406 
 26 | L 685.675913 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 144.722344 229.708 
 34 | L 589.738344 229.708 
 35 | L 589.738344 7.2 
 36 | L 144.722344 7.2 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="matplotlib.axis_1">
 41 |     <g id="xtick_1">
 42 |      <g id="line2d_1">
 43 |       <path d="M 181.80701 229.708 
 44 | L 181.80701 7.2 
 45 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
 46 |      </g>
 47 |      <g id="text_1">
 48 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(184.842323 308.890344) rotate(-90)">Fusion Depth</text>
 49 |      </g>
 50 |     </g>
 51 |     <g id="xtick_2">
 52 |      <g id="line2d_2">
 53 |       <path d="M 255.976344 229.708 
 54 | L 255.976344 7.2 
 55 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
 56 |      </g>
 57 |      <g id="text_2">
 58 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(259.011656 332.131281) rotate(-90)">Vision Model Size</text>
 59 |      </g>
 60 |     </g>
 61 |     <g id="xtick_3">
 62 |      <g id="line2d_3">
 63 |       <path d="M 330.145677 229.708 
 64 | L 330.145677 7.2 
 65 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
 66 |      </g>
 67 |      <g id="text_3">
 68 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(333.18099 353.598469) rotate(-90)">Language Model Size</text>
 69 |      </g>
 70 |     </g>
 71 |     <g id="xtick_4">
 72 |      <g id="line2d_4">
 73 |       <path d="M 404.31501 229.708 
 74 | L 404.31501 7.2 
 75 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
 76 |      </g>
 77 |      <g id="text_4">
 78 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(407.350323 313.173469) rotate(-90)">Task Difficulty</text>
 79 |      </g>
 80 |     </g>
 81 |     <g id="xtick_5">
 82 |      <g id="line2d_5">
 83 |       <path d="M 478.484344 229.708 
 84 | L 478.484344 7.2 
 85 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
 86 |      </g>
 87 |      <g id="text_5">
 88 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(481.519656 333.834563) rotate(-90)">Sensor Modalities</text>
 89 |      </g>
 90 |     </g>
 91 |     <g id="xtick_6">
 92 |      <g id="line2d_6">
 93 |       <path d="M 552.653677 229.708 
 94 | L 552.653677 7.2 
 95 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
 96 |      </g>
 97 |      <g id="text_6">
 98 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(555.68899 305.860187) rotate(-90)">Dataset Size</text>
 99 |      </g>
100 |     </g>
101 |     <g id="text_7">
102 |      <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="367.230344" y="365.95675" transform="rotate(-0 367.230344 365.95675)">Model Features</text>
103 |     </g>
104 |    </g>
105 |    <g id="matplotlib.axis_2">
106 |     <g id="ytick_1">
107 |      <g id="line2d_7">
108 |       <path d="M 144.722344 44.284667 
109 | L 589.738344 44.284667 
110 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
111 |      </g>
112 |      <g id="text_8">
113 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="137.722344" y="47.16701" transform="rotate(-0 137.722344 47.16701)">Factor1_Architecture</text>
114 |      </g>
115 |     </g>
116 |     <g id="ytick_2">
117 |      <g id="line2d_8">
118 |       <path d="M 144.722344 118.454 
119 | L 589.738344 118.454 
120 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
121 |      </g>
122 |      <g id="text_9">
123 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="137.722344" y="121.336344" transform="rotate(-0 137.722344 121.336344)">Factor2_Scale</text>
124 |      </g>
125 |     </g>
126 |     <g id="ytick_3">
127 |      <g id="line2d_9">
128 |       <path d="M 144.722344 192.623333 
129 | L 589.738344 192.623333 
130 | " clip-path="url(#p76ed4062de)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
131 |      </g>
132 |      <g id="text_10">
133 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="137.722344" y="195.505677" transform="rotate(-0 137.722344 195.505677)">Factor3_Performance</text>
134 |      </g>
135 |     </g>
136 |     <g id="text_11">
137 |      <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="15.558281" y="118.454" transform="rotate(-90 15.558281 118.454)">Latent Factors</text>
138 |     </g>
139 |    </g>
140 |    <g id="QuadMesh_1">
141 |     <path d="M 144.722344 7.2 
142 | L 218.891677 7.2 
143 | L 218.891677 81.369333 
144 | L 144.722344 81.369333 
145 | L 144.722344 7.2 
146 | " clip-path="url(#p76ed4062de)" style="fill: #dc6e57"/>
147 |     <path d="M 218.891677 7.2 
148 | L 293.06101 7.2 
149 | L 293.06101 81.369333 
150 | L 218.891677 81.369333 
151 | L 218.891677 7.2 
152 | " clip-path="url(#p76ed4062de)" style="fill: #790622"/>
153 |     <path d="M 293.06101 7.2 
154 | L 367.230344 7.2 
155 | L 367.230344 81.369333 
156 | L 293.06101 81.369333 
157 | L 293.06101 7.2 
158 | " clip-path="url(#p76ed4062de)" style="fill: #67001f"/>
159 |     <path d="M 367.230344 7.2 
160 | L 441.399677 7.2 
161 | L 441.399677 81.369333 
162 | L 367.230344 81.369333 
163 | L 367.230344 7.2 
164 | " clip-path="url(#p76ed4062de)" style="fill: #fcd7c2"/>
165 |     <path d="M 441.399677 7.2 
166 | L 515.56901 7.2 
167 | L 515.56901 81.369333 
168 | L 441.399677 81.369333 
169 | L 441.399677 7.2 
170 | " clip-path="url(#p76ed4062de)" style="fill: #facab1"/>
171 |     <path d="M 515.56901 7.2 
172 | L 589.738344 7.2 
173 | L 589.738344 81.369333 
174 | L 515.56901 81.369333 
175 | L 515.56901 7.2 
176 | " clip-path="url(#p76ed4062de)" style="fill: #eff3f5"/>
177 |     <path d="M 144.722344 81.369333 
178 | L 218.891677 81.369333 
179 | L 218.891677 155.538667 
180 | L 144.722344 155.538667 
181 | L 144.722344 81.369333 
182 | " clip-path="url(#p76ed4062de)" style="fill: #facab1"/>
183 |     <path d="M 218.891677 81.369333 
184 | L 293.06101 81.369333 
185 | L 293.06101 155.538667 
186 | L 218.891677 155.538667 
187 | L 218.891677 81.369333 
188 | " clip-path="url(#p76ed4062de)" style="fill: #fbceb7"/>
189 |     <path d="M 293.06101 81.369333 
190 | L 367.230344 81.369333 
191 | L 367.230344 155.538667 
192 | L 293.06101 155.538667 
193 | L 293.06101 81.369333 
194 | " clip-path="url(#p76ed4062de)" style="fill: #cce2ef"/>
195 |     <path d="M 367.230344 81.369333 
196 | L 441.399677 81.369333 
197 | L 441.399677 155.538667 
198 | L 367.230344 155.538667 
199 | L 367.230344 81.369333 
200 | " clip-path="url(#p76ed4062de)" style="fill: #2c75b4"/>
201 |     <path d="M 441.399677 81.369333 
202 | L 515.56901 81.369333 
203 | L 515.56901 155.538667 
204 | L 441.399677 155.538667 
205 | L 441.399677 81.369333 
206 | " clip-path="url(#p76ed4062de)" style="fill: #fce0d0"/>
207 |     <path d="M 515.56901 81.369333 
208 | L 589.738344 81.369333 
209 | L 589.738344 155.538667 
210 | L 515.56901 155.538667 
211 | L 515.56901 81.369333 
212 | " clip-path="url(#p76ed4062de)" style="fill: #fdd9c4"/>
213 |     <path d="M 144.722344 155.538667 
214 | L 218.891677 155.538667 
215 | L 218.891677 229.708 
216 | L 144.722344 229.708 
217 | L 144.722344 155.538667 
218 | " clip-path="url(#p76ed4062de)" style="fill: #acd2e5"/>
219 |     <path d="M 218.891677 155.538667 
220 | L 293.06101 155.538667 
221 | L 293.06101 229.708 
222 | L 218.891677 229.708 
223 | L 218.891677 155.538667 
224 | " clip-path="url(#p76ed4062de)" style="fill: #e9f0f4"/>
225 |     <path d="M 293.06101 155.538667 
226 | L 367.230344 155.538667 
227 | L 367.230344 229.708 
228 | L 293.06101 229.708 
229 | L 293.06101 155.538667 
230 | " clip-path="url(#p76ed4062de)" style="fill: #fbe3d4"/>
231 |     <path d="M 367.230344 155.538667 
232 | L 441.399677 155.538667 
233 | L 441.399677 229.708 
234 | L 367.230344 229.708 
235 | L 367.230344 155.538667 
236 | " clip-path="url(#p76ed4062de)" style="fill: #cfe4ef"/>
237 |     <path d="M 441.399677 155.538667 
238 | L 515.56901 155.538667 
239 | L 515.56901 229.708 
240 | L 441.399677 229.708 
241 | L 441.399677 155.538667 
242 | " clip-path="url(#p76ed4062de)" style="fill: #f5ac8b"/>
243 |     <path d="M 515.56901 155.538667 
244 | L 589.738344 155.538667 
245 | L 589.738344 229.708 
246 | L 515.56901 229.708 
247 | L 515.56901 155.538667 
248 | " clip-path="url(#p76ed4062de)" style="fill: #a5cee3"/>
249 |    </g>
250 |    <g id="text_12">
251 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #ffffff" x="181.80701" y="47.595917" transform="rotate(-0 181.80701 47.595917)">0.39</text>
252 |    </g>
253 |    <g id="text_13">
254 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #ffffff" x="255.976344" y="47.595917" transform="rotate(-0 255.976344 47.595917)">0.67</text>
255 |    </g>
256 |    <g id="text_14">
257 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #ffffff" x="330.145677" y="47.595917" transform="rotate(-0 330.145677 47.595917)">0.71</text>
258 |    </g>
259 |    <g id="text_15">
260 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="404.31501" y="47.595917" transform="rotate(-0 404.31501 47.595917)">0.15</text>
261 |    </g>
262 |    <g id="text_16">
263 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="478.484344" y="47.595917" transform="rotate(-0 478.484344 47.595917)">0.18</text>
264 |    </g>
265 |    <g id="text_17">
266 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="552.653677" y="47.595917" transform="rotate(-0 552.653677 47.595917)">-0.03</text>
267 |    </g>
268 |    <g id="text_18">
269 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="181.80701" y="121.76525" transform="rotate(-0 181.80701 121.76525)">0.18</text>
270 |    </g>
271 |    <g id="text_19">
272 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="255.976344" y="121.76525" transform="rotate(-0 255.976344 121.76525)">0.18</text>
273 |    </g>
274 |    <g id="text_20">
275 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="330.145677" y="121.76525" transform="rotate(-0 330.145677 121.76525)">-0.15</text>
276 |    </g>
277 |    <g id="text_21">
278 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #ffffff" x="404.31501" y="121.76525" transform="rotate(-0 404.31501 121.76525)">-0.52</text>
279 |    </g>
280 |    <g id="text_22">
281 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="478.484344" y="121.76525" transform="rotate(-0 478.484344 121.76525)">0.11</text>
282 |    </g>
283 |    <g id="text_23">
284 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="552.653677" y="121.76525" transform="rotate(-0 552.653677 121.76525)">0.14</text>
285 |    </g>
286 |    <g id="text_24">
287 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="181.80701" y="195.934583" transform="rotate(-0 181.80701 195.934583)">-0.22</text>
288 |    </g>
289 |    <g id="text_25">
290 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="255.976344" y="195.934583" transform="rotate(-0 255.976344 195.934583)">-0.051</text>
291 |    </g>
292 |    <g id="text_26">
293 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="330.145677" y="195.934583" transform="rotate(-0 330.145677 195.934583)">0.099</text>
294 |    </g>
295 |    <g id="text_27">
296 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="404.31501" y="195.934583" transform="rotate(-0 404.31501 195.934583)">-0.14</text>
297 |    </g>
298 |    <g id="text_28">
299 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="478.484344" y="195.934583" transform="rotate(-0 478.484344 195.934583)">0.26</text>
300 |    </g>
301 |    <g id="text_29">
302 |     <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="552.653677" y="195.934583" transform="rotate(-0 552.653677 195.934583)">-0.24</text>
303 |    </g>
304 |   </g>
305 |   <g id="axes_2">
306 |    <g id="patch_3">
307 |     <path d="M 617.551844 215.09 
308 | L 627.215444 215.09 
309 | L 627.215444 21.818 
310 | L 617.551844 21.818 
311 | z
312 | " style="fill: #ffffff"/>
313 |    </g>
314 |    <g id="matplotlib.axis_3"/>
315 |    <g id="matplotlib.axis_4">
316 |     <g id="ytick_4">
317 |      <g id="line2d_10">
318 |       <defs>
319 |        <path id="md70a5ba312" d="M 0 0 
320 | L 3.5 0 
321 | " style="stroke: #262626; stroke-width: 0.8"/>
322 |       </defs>
323 |       <g>
324 |        <use xlink:href="#md70a5ba312" x="627.215444" y="196.382404" style="fill: #262626; stroke: #262626; stroke-width: 0.8"/>
325 |       </g>
326 |      </g>
327 |      <g id="text_30">
328 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="634.215444" y="200.561545" transform="rotate(-0 634.215444 200.561545)">−0.4</text>
329 |      </g>
330 |     </g>
331 |     <g id="ytick_5">
332 |      <g id="line2d_11">
333 |       <g>
334 |        <use xlink:href="#md70a5ba312" x="627.215444" y="164.819281" style="fill: #262626; stroke: #262626; stroke-width: 0.8"/>
335 |       </g>
336 |      </g>
337 |      <g id="text_31">
338 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="634.215444" y="168.998422" transform="rotate(-0 634.215444 168.998422)">−0.2</text>
339 |      </g>
340 |     </g>
341 |     <g id="ytick_6">
342 |      <g id="line2d_12">
343 |       <g>
344 |        <use xlink:href="#md70a5ba312" x="627.215444" y="133.256159" style="fill: #262626; stroke: #262626; stroke-width: 0.8"/>
345 |       </g>
346 |      </g>
347 |      <g id="text_32">
348 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="634.215444" y="137.435299" transform="rotate(-0 634.215444 137.435299)">0.0</text>
349 |      </g>
350 |     </g>
351 |     <g id="ytick_7">
352 |      <g id="line2d_13">
353 |       <g>
354 |        <use xlink:href="#md70a5ba312" x="627.215444" y="101.693036" style="fill: #262626; stroke: #262626; stroke-width: 0.8"/>
355 |       </g>
356 |      </g>
357 |      <g id="text_33">
358 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="634.215444" y="105.872176" transform="rotate(-0 634.215444 105.872176)">0.2</text>
359 |      </g>
360 |     </g>
361 |     <g id="ytick_8">
362 |      <g id="line2d_14">
363 |       <g>
364 |        <use xlink:href="#md70a5ba312" x="627.215444" y="70.129913" style="fill: #262626; stroke: #262626; stroke-width: 0.8"/>
365 |       </g>
366 |      </g>
367 |      <g id="text_34">
368 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="634.215444" y="74.309054" transform="rotate(-0 634.215444 74.309054)">0.4</text>
369 |      </g>
370 |     </g>
371 |     <g id="ytick_9">
372 |      <g id="line2d_15">
373 |       <g>
374 |        <use xlink:href="#md70a5ba312" x="627.215444" y="38.56679" style="fill: #262626; stroke: #262626; stroke-width: 0.8"/>
375 |       </g>
376 |      </g>
377 |      <g id="text_35">
378 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="634.215444" y="42.745931" transform="rotate(-0 634.215444 42.745931)">0.6</text>
379 |      </g>
380 |     </g>
381 |     <g id="text_36">
382 |      <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="675.56435" y="118.454" transform="rotate(-90 675.56435 118.454)">Factor Loading</text>
383 |     </g>
384 |    </g>
385 |    <image xlink:href="data:image/png;base64,
386 | iVBORw0KGgoAAAANSUhEUgAAACgAAAMlCAYAAAABiLEuAAAE9klEQVR4nO3d2XHbQBAAURwTjtJzZkoPu8qA/ugPNKv6RdDF4R4EKfv8+fe7D7Hr7YD/KZAqkJrrOt9u+Ej/ChZIzSlPlOd9QeCcZ9sMog/sJKH0gXM2YkYf2IipAqkCqbm6bjEFUp0klD6wEVP6wD7VUfrAns1Q+sCesFL6wDZqqkCqQEof2JWf0gd2H6T0gV0WKH1gGzWlD5y7ETP6wEZM6QMbMVUgVSClD2ybofSBjZjSB840YkYf2Cqm9IGNmCqQmvtyN7rrjgK5AqlOEkof2IgpfWAjpvSBc/ejCkYf2CqmCqR6D1L6wB4BUwVSBVJt1JQ+sBFT+sBGTOkDGzFVINV7kNIHNmJKH9iIqQKpAqlWMaUPbMSUPrARUwVS/SSA0gf277BS+sC53RP2v4L6wP61FKpAqkCq6xalD+y6RRVIdR+k9IHdByl9YCcJpQ9so6YKpAqk2qgpfWDf1VEFUm0zlD6w+yClD2wVU/rALguUPtD+98/+V7BAqkCqr2OpAqnug5Q+cG55ojzvCwJbxZQ+sMsCpQ9sFVP6wM5iqkCqQEof2ElC6QNHPmH/K6gPnOtwz1j/CuoDW8WUPrDv6ih9YKuYKpDqskAVSBVIdZJQ+sDug5Q+cOQT9r+C+sCesFL6wM5iqkBq7IX2vgKxAqk55UeJ/hXUB/apjtIHdh+k9IFdtyh9YGcxVSDVZYHSB/YImCqQKpBqo6b0gd0HKX1gq5jSB3bdogqk+kkApQ/sETClD5xz77cbPtK/ggVSBVJz7PV2w0f6V1AfOGcjZvSBrWKqQGqO7oOMPrBthtIHdlmgCqQKpNqoKX1gI6b0gY2YKpCaY/UeRPSBXfkpfWAnCaUP7PEbVSBVINVGTRVIdR+k9IFtM5Q+sBFT+sBGTOkDO4spfWBPWKkCqQIpfWDPByl9YPdBSh/YfZDSB7aKKX1gI6b0gXOs5+2Gj/SvoD5wdh/cmQKpAil9YCcJpQ9sxJQ+cPbTiBF9YF/kUPrANmpKHzi7ETMFUgVS+sAuC5Q+sJOE0gd2H6T0gW3UlD6wZzOUPrCNmiqQ6j1IFUgVSPWrD0of2EZN6QMbMaUPbKOm9IGtYqpAqvcgpQ/s+SBVIFUg1Rc5lD6ws5jSB/ajCkof2Ad3qkBq9tN7ENEHNmJKH9hJQhVIFUi1UVP6wEZM6QNn9RCdKZDqPkjpAztJKH1gI6YKpAqkOospfeCsNmqmQKrLAqUPbMSUPrDLAqUPbBVT+sBGTBVIFUjN6ixmCqQ6SSh9YH90RekD+1RH6QPbqCl9YCOm9IF9kUMVSBVI6QO7D1L6wC4LlD5w9tN/NI/oA7sPUvrA2atVjOgDZ7VRMwVSBVL6wK78lD6wKz+lD+yyQOkD26gpfeCsPtUx+sDOYkof2OM3qkCqQEof2ElC6QMbMaUP7LJA6QP7ro7SB/b4jdIH9viNKpDqPkgVSBVIdVmg9IH9VJ7SB7aKKX1g1y1KH9inOqpAqpOE0gd2klAFUgVSs3arGNEHztOIGX3gyI9i/ytYINU2Q+kD22YofWCrmCqQKpBqo6b0gW3UlD6wVUwVSPUepPSBnSSUPrBVTOkDW8VUgVSBVBs1pQ9so6YKpNpmKH1g2wylDxz3z7y/4BXUB7aKKX1gZzFVIFUg1UZNFUh1klD6wLYZSh/YKqb0ga1iSh/YKqYKpAqk/gCFWFGYhUdLJAAAAABJRU5ErkJggg==" id="image845da7469c" transform="scale(1 -1) translate(0 -193.2)" x="617.52" y="-21.84" width="9.6" height="193.2"/>
387 |    <g id="LineCollection_1"/>
388 |    <g id="patch_4">
389 |     <path d="M 617.551844 215.09 
390 | L 622.383644 215.09 
391 | L 627.215444 215.09 
392 | L 627.215444 21.818 
393 | L 622.383644 21.818 
394 | L 617.551844 21.818 
395 | L 617.551844 215.09 
396 | z
397 | " style="fill: none"/>
398 |    </g>
399 |   </g>
400 |  </g>
401 |  <defs>
402 |   <clipPath id="p76ed4062de">
403 |    <rect x="144.722344" y="7.2" width="445.016" height="222.508"/>
404 |   </clipPath>
405 |  </defs>
406 | </svg>
407 | 


--------------------------------------------------------------------------------
/Plot_script/plots/decoder_analysis_2panel.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1140.353125pt" height="420.008313pt" viewBox="0 0 1140.353125 420.008313" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-18T00:03:20.856855</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.7, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 420.008313 
 25 | L 1140.353125 420.008313 
 26 | L 1140.353125 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 50.833125 317.085989 
 34 | L 563.648125 317.085989 
 35 | L 563.648125 25.3575 
 36 | L 50.833125 25.3575 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="matplotlib.axis_1">
 41 |     <g id="xtick_1">
 42 |      <g id="line2d_1">
 43 |       <path d="M 123.216103 317.085989 
 44 | L 123.216103 25.3575 
 45 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 46 |      </g>
 47 |      <g id="text_1">
 48 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(107.336032 366.976435) rotate(-45)">diffusion</text>
 49 |      </g>
 50 |     </g>
 51 |     <g id="xtick_2">
 52 |      <g id="line2d_2">
 53 |       <path d="M 245.899118 317.085989 
 54 | L 245.899118 25.3575 
 55 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 56 |      </g>
 57 |      <g id="text_2">
 58 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(232.036289 362.941949) rotate(-45)">planner</text>
 59 |      </g>
 60 |     </g>
 61 |     <g id="xtick_3">
 62 |      <g id="line2d_3">
 63 |       <path d="M 368.582132 317.085989 
 64 | L 368.582132 25.3575 
 65 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 66 |      </g>
 67 |      <g id="text_3">
 68 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(339.443146 393.494265) rotate(-45)">autoregressive</text>
 69 |      </g>
 70 |     </g>
 71 |     <g id="xtick_4">
 72 |      <g id="line2d_4">
 73 |       <path d="M 491.265147 317.085989 
 74 | L 491.265147 25.3575 
 75 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 76 |      </g>
 77 |      <g id="text_4">
 78 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(485.601884 346.542817) rotate(-45)">mlp</text>
 79 |      </g>
 80 |     </g>
 81 |     <g id="text_5">
 82 |      <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="307.240625" y="409.896751" transform="rotate(-0 307.240625 409.896751)">Decoder Family</text>
 83 |     </g>
 84 |    </g>
 85 |    <g id="matplotlib.axis_2">
 86 |     <g id="ytick_1">
 87 |      <g id="line2d_5">
 88 |       <path d="M 50.833125 317.085989 
 89 | L 563.648125 317.085989 
 90 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 91 |      </g>
 92 |      <g id="text_6">
 93 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="321.645052" transform="rotate(-0 43.833125 321.645052)">0.0</text>
 94 |      </g>
 95 |     </g>
 96 |     <g id="ytick_2">
 97 |      <g id="line2d_6">
 98 |       <path d="M 50.833125 281.880195 
 99 | L 563.648125 281.880195 
100 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
101 |      </g>
102 |      <g id="text_7">
103 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="286.439257" transform="rotate(-0 43.833125 286.439257)">0.1</text>
104 |      </g>
105 |     </g>
106 |     <g id="ytick_3">
107 |      <g id="line2d_7">
108 |       <path d="M 50.833125 246.674401 
109 | L 563.648125 246.674401 
110 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
111 |      </g>
112 |      <g id="text_8">
113 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="251.233463" transform="rotate(-0 43.833125 251.233463)">0.2</text>
114 |      </g>
115 |     </g>
116 |     <g id="ytick_4">
117 |      <g id="line2d_8">
118 |       <path d="M 50.833125 211.468606 
119 | L 563.648125 211.468606 
120 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
121 |      </g>
122 |      <g id="text_9">
123 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="216.027669" transform="rotate(-0 43.833125 216.027669)">0.3</text>
124 |      </g>
125 |     </g>
126 |     <g id="ytick_5">
127 |      <g id="line2d_9">
128 |       <path d="M 50.833125 176.262812 
129 | L 563.648125 176.262812 
130 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
131 |      </g>
132 |      <g id="text_10">
133 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="180.821875" transform="rotate(-0 43.833125 180.821875)">0.4</text>
134 |      </g>
135 |     </g>
136 |     <g id="ytick_6">
137 |      <g id="line2d_10">
138 |       <path d="M 50.833125 141.057018 
139 | L 563.648125 141.057018 
140 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
141 |      </g>
142 |      <g id="text_11">
143 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="145.61608" transform="rotate(-0 43.833125 145.61608)">0.5</text>
144 |      </g>
145 |     </g>
146 |     <g id="ytick_7">
147 |      <g id="line2d_11">
148 |       <path d="M 50.833125 105.851223 
149 | L 563.648125 105.851223 
150 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
151 |      </g>
152 |      <g id="text_12">
153 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="110.410286" transform="rotate(-0 43.833125 110.410286)">0.6</text>
154 |      </g>
155 |     </g>
156 |     <g id="ytick_8">
157 |      <g id="line2d_12">
158 |       <path d="M 50.833125 70.645429 
159 | L 563.648125 70.645429 
160 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
161 |      </g>
162 |      <g id="text_13">
163 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="75.204492" transform="rotate(-0 43.833125 75.204492)">0.7</text>
164 |      </g>
165 |     </g>
166 |     <g id="ytick_9">
167 |      <g id="line2d_13">
168 |       <path d="M 50.833125 35.439635 
169 | L 563.648125 35.439635 
170 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
171 |      </g>
172 |      <g id="text_14">
173 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="43.833125" y="39.998697" transform="rotate(-0 43.833125 39.998697)">0.8</text>
174 |      </g>
175 |     </g>
176 |     <g id="text_15">
177 |      <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="17.837813" y="171.221745" transform="rotate(-90 17.837813 171.221745)">Normalized Success</text>
178 |     </g>
179 |    </g>
180 |    <g id="patch_3">
181 |     <path d="M 74.142898 317.085989 
182 | L 172.289309 317.085989 
183 | L 172.289309 133.171156 
184 | L 74.142898 133.171156 
185 | z
186 | " clip-path="url(#pf59b323a40)" style="fill: #4c1d4b; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
187 |    </g>
188 |    <g id="patch_4">
189 |     <path d="M 196.825912 317.085989 
190 | L 294.972324 317.085989 
191 | L 294.972324 195.615864 
192 | L 196.825912 195.615864 
193 | z
194 | " clip-path="url(#pf59b323a40)" style="fill: #a11a5b; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
195 |    </g>
196 |    <g id="patch_5">
197 |     <path d="M 319.508926 317.085989 
198 | L 417.655338 317.085989 
199 | L 417.655338 200.917435 
200 | L 319.508926 200.917435 
201 | z
202 | " clip-path="url(#pf59b323a40)" style="fill: #e83f3f; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
203 |    </g>
204 |    <g id="patch_6">
205 |     <path d="M 442.191941 317.085989 
206 | L 540.338352 317.085989 
207 | L 540.338352 217.111354 
208 | L 442.191941 217.111354 
209 | z
210 | " clip-path="url(#pf59b323a40)" style="fill: #f69c73; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
211 |    </g>
212 |    <g id="LineCollection_1">
213 |     <path d="M 123.216103 227.092979 
214 | L 123.216103 39.249333 
215 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #000000; stroke-width: 2"/>
216 |     <path d="M 245.899118 294.079522 
217 | L 245.899118 97.152206 
218 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #000000; stroke-width: 2"/>
219 |     <path d="M 368.582132 277.268892 
220 | L 368.582132 124.565978 
221 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #000000; stroke-width: 2"/>
222 |     <path d="M 491.265147 279.634478 
223 | L 491.265147 154.588229 
224 | " clip-path="url(#pf59b323a40)" style="fill: none; stroke: #000000; stroke-width: 2"/>
225 |    </g>
226 |    <g id="line2d_14">
227 |     <defs>
228 |      <path id="m72b91172a6" d="M 5 0 
229 | L -5 -0 
230 | " style="stroke: #000000"/>
231 |     </defs>
232 |     <g clip-path="url(#pf59b323a40)">
233 |      <use xlink:href="#m72b91172a6" x="123.216103" y="227.092979" style="fill: #f77189; stroke: #000000"/>
234 |      <use xlink:href="#m72b91172a6" x="245.899118" y="294.079522" style="fill: #f77189; stroke: #000000"/>
235 |      <use xlink:href="#m72b91172a6" x="368.582132" y="277.268892" style="fill: #f77189; stroke: #000000"/>
236 |      <use xlink:href="#m72b91172a6" x="491.265147" y="279.634478" style="fill: #f77189; stroke: #000000"/>
237 |     </g>
238 |    </g>
239 |    <g id="line2d_15">
240 |     <g clip-path="url(#pf59b323a40)">
241 |      <use xlink:href="#m72b91172a6" x="123.216103" y="39.249333" style="fill: #f77189; stroke: #000000"/>
242 |      <use xlink:href="#m72b91172a6" x="245.899118" y="97.152206" style="fill: #f77189; stroke: #000000"/>
243 |      <use xlink:href="#m72b91172a6" x="368.582132" y="124.565978" style="fill: #f77189; stroke: #000000"/>
244 |      <use xlink:href="#m72b91172a6" x="491.265147" y="154.588229" style="fill: #f77189; stroke: #000000"/>
245 |     </g>
246 |    </g>
247 |    <g id="patch_7">
248 |     <path d="M 50.833125 317.085989 
249 | L 50.833125 25.3575 
250 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
251 |    </g>
252 |    <g id="patch_8">
253 |     <path d="M 50.833125 317.085989 
254 | L 563.648125 317.085989 
255 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
256 |    </g>
257 |    <g id="text_16">
258 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="80.277048" y="307.757174" transform="rotate(-0 80.277048 307.757174)">0.522</text>
259 |    </g>
260 |    <g id="text_17">
261 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="202.960063" y="307.757174" transform="rotate(-0 202.960063 307.757174)">0.345</text>
262 |    </g>
263 |    <g id="text_18">
264 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="325.643077" y="307.757174" transform="rotate(-0 325.643077 307.757174)">0.330</text>
265 |    </g>
266 |    <g id="text_19">
267 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="448.326092" y="307.757174" transform="rotate(-0 448.326092 307.757174)">0.284</text>
268 |    </g>
269 |    <g id="text_20">
270 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="307.240625" y="19.3575" transform="rotate(-0 307.240625 19.3575)">(a) Success Rate by Decoder Family</text>
271 |    </g>
272 |   </g>
273 |   <g id="axes_2">
274 |    <g id="patch_9">
275 |     <path d="M 620.338125 317.085989 
276 | L 1133.153125 317.085989 
277 | L 1133.153125 25.3575 
278 | L 620.338125 25.3575 
279 | z
280 | " style="fill: #ffffff"/>
281 |    </g>
282 |    <g id="matplotlib.axis_3">
283 |     <g id="xtick_5">
284 |      <g id="line2d_16">
285 |       <path d="M 692.721103 317.085989 
286 | L 692.721103 25.3575 
287 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
288 |      </g>
289 |      <g id="text_21">
290 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(676.841032 366.976435) rotate(-45)">diffusion</text>
291 |      </g>
292 |     </g>
293 |     <g id="xtick_6">
294 |      <g id="line2d_17">
295 |       <path d="M 815.404118 317.085989 
296 | L 815.404118 25.3575 
297 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
298 |      </g>
299 |      <g id="text_22">
300 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(801.541289 362.941949) rotate(-45)">planner</text>
301 |      </g>
302 |     </g>
303 |     <g id="xtick_7">
304 |      <g id="line2d_18">
305 |       <path d="M 938.087132 317.085989 
306 | L 938.087132 25.3575 
307 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
308 |      </g>
309 |      <g id="text_23">
310 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(932.42387 346.542817) rotate(-45)">mlp</text>
311 |      </g>
312 |     </g>
313 |     <g id="xtick_8">
314 |      <g id="line2d_19">
315 |       <path d="M 1060.770147 317.085989 
316 | L 1060.770147 25.3575 
317 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
318 |      </g>
319 |      <g id="text_24">
320 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; fill: #262626" transform="translate(1031.63116 393.494265) rotate(-45)">autoregressive</text>
321 |      </g>
322 |     </g>
323 |     <g id="text_25">
324 |      <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="876.745625" y="409.896751" transform="rotate(-0 876.745625 409.896751)">Decoder Family</text>
325 |     </g>
326 |    </g>
327 |    <g id="matplotlib.axis_4">
328 |     <g id="ytick_10">
329 |      <g id="line2d_20">
330 |       <path d="M 620.338125 317.085989 
331 | L 1133.153125 317.085989 
332 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
333 |      </g>
334 |      <g id="text_26">
335 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="613.338125" y="321.645052" transform="rotate(-0 613.338125 321.645052)">0.0</text>
336 |      </g>
337 |     </g>
338 |     <g id="ytick_11">
339 |      <g id="line2d_21">
340 |       <path d="M 620.338125 253.494162 
341 | L 1133.153125 253.494162 
342 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
343 |      </g>
344 |      <g id="text_27">
345 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="613.338125" y="258.053225" transform="rotate(-0 613.338125 258.053225)">0.1</text>
346 |      </g>
347 |     </g>
348 |     <g id="ytick_12">
349 |      <g id="line2d_22">
350 |       <path d="M 620.338125 189.902336 
351 | L 1133.153125 189.902336 
352 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
353 |      </g>
354 |      <g id="text_28">
355 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="613.338125" y="194.461398" transform="rotate(-0 613.338125 194.461398)">0.2</text>
356 |      </g>
357 |     </g>
358 |     <g id="ytick_13">
359 |      <g id="line2d_23">
360 |       <path d="M 620.338125 126.310509 
361 | L 1133.153125 126.310509 
362 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
363 |      </g>
364 |      <g id="text_29">
365 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="613.338125" y="130.869571" transform="rotate(-0 613.338125 130.869571)">0.3</text>
366 |      </g>
367 |     </g>
368 |     <g id="ytick_14">
369 |      <g id="line2d_24">
370 |       <path d="M 620.338125 62.718682 
371 | L 1133.153125 62.718682 
372 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
373 |      </g>
374 |      <g id="text_30">
375 |       <text style="font-size: 12px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="613.338125" y="67.277744" transform="rotate(-0 613.338125 67.277744)">0.4</text>
376 |      </g>
377 |     </g>
378 |     <g id="text_31">
379 |      <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="587.342812" y="171.221745" transform="rotate(-90 587.342812 171.221745)">Generalization Index</text>
380 |     </g>
381 |    </g>
382 |    <g id="patch_10">
383 |     <path d="M 643.647898 317.085989 
384 | L 741.794309 317.085989 
385 | L 741.794309 144.479602 
386 | L 643.647898 144.479602 
387 | z
388 | " clip-path="url(#pf7dc2f3ef4)" style="fill: #382a54; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
389 |    </g>
390 |    <g id="patch_11">
391 |     <path d="M 766.330912 317.085989 
392 | L 864.477324 317.085989 
393 | L 864.477324 154.290913 
394 | L 766.330912 154.290913 
395 | z
396 | " clip-path="url(#pf7dc2f3ef4)" style="fill: #395d9c; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
397 |    </g>
398 |    <g id="patch_12">
399 |     <path d="M 889.013926 317.085989 
400 | L 987.160338 317.085989 
401 | L 987.160338 155.986695 
402 | L 889.013926 155.986695 
403 | z
404 | " clip-path="url(#pf7dc2f3ef4)" style="fill: #3497a9; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
405 |    </g>
406 |    <g id="patch_13">
407 |     <path d="M 1011.696941 317.085989 
408 | L 1109.843352 317.085989 
409 | L 1109.843352 177.789607 
410 | L 1011.696941 177.789607 
411 | z
412 | " clip-path="url(#pf7dc2f3ef4)" style="fill: #60ceac; opacity: 0.8; stroke: #000000; stroke-linejoin: miter"/>
413 |    </g>
414 |    <g id="LineCollection_2">
415 |     <path d="M 692.721103 249.709871 
416 | L 692.721103 39.249333 
417 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #000000; stroke-width: 2"/>
418 |     <path d="M 815.404118 225.898811 
419 | L 815.404118 82.683014 
420 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #000000; stroke-width: 2"/>
421 |     <path d="M 938.087132 264.602826 
422 | L 938.087132 47.370564 
423 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #000000; stroke-width: 2"/>
424 |     <path d="M 1060.770147 261.541304 
425 | L 1060.770147 94.03791 
426 | " clip-path="url(#pf7dc2f3ef4)" style="fill: none; stroke: #000000; stroke-width: 2"/>
427 |    </g>
428 |    <g id="line2d_25">
429 |     <g clip-path="url(#pf7dc2f3ef4)">
430 |      <use xlink:href="#m72b91172a6" x="692.721103" y="249.709871" style="fill: #f77189; stroke: #000000"/>
431 |      <use xlink:href="#m72b91172a6" x="815.404118" y="225.898811" style="fill: #f77189; stroke: #000000"/>
432 |      <use xlink:href="#m72b91172a6" x="938.087132" y="264.602826" style="fill: #f77189; stroke: #000000"/>
433 |      <use xlink:href="#m72b91172a6" x="1060.770147" y="261.541304" style="fill: #f77189; stroke: #000000"/>
434 |     </g>
435 |    </g>
436 |    <g id="line2d_26">
437 |     <g clip-path="url(#pf7dc2f3ef4)">
438 |      <use xlink:href="#m72b91172a6" x="692.721103" y="39.249333" style="fill: #f77189; stroke: #000000"/>
439 |      <use xlink:href="#m72b91172a6" x="815.404118" y="82.683014" style="fill: #f77189; stroke: #000000"/>
440 |      <use xlink:href="#m72b91172a6" x="938.087132" y="47.370564" style="fill: #f77189; stroke: #000000"/>
441 |      <use xlink:href="#m72b91172a6" x="1060.770147" y="94.03791" style="fill: #f77189; stroke: #000000"/>
442 |     </g>
443 |    </g>
444 |    <g id="patch_14">
445 |     <path d="M 620.338125 317.085989 
446 | L 620.338125 25.3575 
447 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
448 |    </g>
449 |    <g id="patch_15">
450 |     <path d="M 620.338125 317.085989 
451 | L 1133.153125 317.085989 
452 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
453 |    </g>
454 |    <g id="text_32">
455 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="649.782048" y="302.079968" transform="rotate(-0 649.782048 302.079968)">0.271</text>
456 |    </g>
457 |    <g id="text_33">
458 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="772.465063" y="302.079968" transform="rotate(-0 772.465063 302.079968)">0.256</text>
459 |    </g>
460 |    <g id="text_34">
461 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="895.148077" y="302.079968" transform="rotate(-0 895.148077 302.079968)">0.253</text>
462 |    </g>
463 |    <g id="text_35">
464 |     <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="1017.831092" y="302.079968" transform="rotate(-0 1017.831092 302.079968)">0.219</text>
465 |    </g>
466 |    <g id="text_36">
467 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="876.745625" y="19.3575" transform="rotate(-0 876.745625 19.3575)">(b) Generalization Index by Decoder Family</text>
468 |    </g>
469 |   </g>
470 |  </g>
471 |  <defs>
472 |   <clipPath id="pf59b323a40">
473 |    <rect x="50.833125" y="25.3575" width="512.815" height="291.728489"/>
474 |   </clipPath>
475 |   <clipPath id="pf7dc2f3ef4">
476 |    <rect x="620.338125" y="25.3575" width="512.815" height="291.728489"/>
477 |   </clipPath>
478 |  </defs>
479 | </svg>
480 | 


--------------------------------------------------------------------------------
/Plot_script/plots/forest_plot.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="709.080981pt" height="348.321406pt" viewBox="0 0 709.080981 348.321406" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-18T00:03:18.734840</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.7, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 348.321406 
 25 | L 709.080981 348.321406 
 26 | L 709.080981 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 63.37 308.68 
 34 | L 701.880981 308.68 
 35 | L 701.880981 7.2 
 36 | L 63.37 7.2 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="patch_3">
 41 |     <path d="M 63.37 308.68 
 42 | L 701.880981 308.68 
 43 | L 701.880981 233.31 
 44 | L 63.37 233.31 
 45 | z
 46 | " clip-path="url(#p920cdc3825)" style="fill: #e5f5f4; opacity: 0.4; stroke: #ffffff; stroke-linejoin: miter"/>
 47 |    </g>
 48 |    <g id="patch_4">
 49 |     <path d="M 63.37 233.31 
 50 | L 701.880981 233.31 
 51 | L 701.880981 157.94 
 52 | L 63.37 157.94 
 53 | z
 54 | " clip-path="url(#p920cdc3825)" style="fill: #fee9e7; opacity: 0.4; stroke: #ffffff; stroke-linejoin: miter"/>
 55 |    </g>
 56 |    <g id="patch_5">
 57 |     <path d="M 63.37 157.94 
 58 | L 701.880981 157.94 
 59 | L 701.880981 82.57 
 60 | L 63.37 82.57 
 61 | z
 62 | " clip-path="url(#p920cdc3825)" style="fill: #f0f4e5; opacity: 0.4; stroke: #ffffff; stroke-linejoin: miter"/>
 63 |    </g>
 64 |    <g id="patch_6">
 65 |     <path d="M 63.37 82.57 
 66 | L 701.880981 82.57 
 67 | L 701.880981 7.2 
 68 | L 63.37 7.2 
 69 | z
 70 | " clip-path="url(#p920cdc3825)" style="fill: #f0edf7; opacity: 0.4; stroke: #ffffff; stroke-linejoin: miter"/>
 71 |    </g>
 72 |    <g id="matplotlib.axis_1">
 73 |     <g id="xtick_1">
 74 |      <g id="line2d_1">
 75 |       <path d="M 88.746791 308.68 
 76 | L 88.746791 7.2 
 77 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 78 |      </g>
 79 |      <g id="text_1">
 80 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="88.746791" y="324.038281" transform="rotate(-0 88.746791 324.038281)">−0.2</text>
 81 |      </g>
 82 |     </g>
 83 |     <g id="xtick_2">
 84 |      <g id="line2d_2">
 85 |       <path d="M 172.275683 308.68 
 86 | L 172.275683 7.2 
 87 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 88 |      </g>
 89 |      <g id="text_2">
 90 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="172.275683" y="324.038281" transform="rotate(-0 172.275683 324.038281)">−0.1</text>
 91 |      </g>
 92 |     </g>
 93 |     <g id="xtick_3">
 94 |      <g id="line2d_3">
 95 |       <path d="M 255.804575 308.68 
 96 | L 255.804575 7.2 
 97 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 98 |      </g>
 99 |      <g id="text_3">
100 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="255.804575" y="324.038281" transform="rotate(-0 255.804575 324.038281)">0.0</text>
101 |      </g>
102 |     </g>
103 |     <g id="xtick_4">
104 |      <g id="line2d_4">
105 |       <path d="M 339.333467 308.68 
106 | L 339.333467 7.2 
107 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
108 |      </g>
109 |      <g id="text_4">
110 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="339.333467" y="324.038281" transform="rotate(-0 339.333467 324.038281)">0.1</text>
111 |      </g>
112 |     </g>
113 |     <g id="xtick_5">
114 |      <g id="line2d_5">
115 |       <path d="M 422.862359 308.68 
116 | L 422.862359 7.2 
117 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
118 |      </g>
119 |      <g id="text_5">
120 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="422.862359" y="324.038281" transform="rotate(-0 422.862359 324.038281)">0.2</text>
121 |      </g>
122 |     </g>
123 |     <g id="xtick_6">
124 |      <g id="line2d_6">
125 |       <path d="M 506.391252 308.68 
126 | L 506.391252 7.2 
127 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
128 |      </g>
129 |      <g id="text_6">
130 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="506.391252" y="324.038281" transform="rotate(-0 506.391252 324.038281)">0.3</text>
131 |      </g>
132 |     </g>
133 |     <g id="xtick_7">
134 |      <g id="line2d_7">
135 |       <path d="M 589.920144 308.68 
136 | L 589.920144 7.2 
137 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
138 |      </g>
139 |      <g id="text_7">
140 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="589.920144" y="324.038281" transform="rotate(-0 589.920144 324.038281)">0.4</text>
141 |      </g>
142 |     </g>
143 |     <g id="xtick_8">
144 |      <g id="line2d_8">
145 |       <path d="M 673.449036 308.68 
146 | L 673.449036 7.2 
147 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
148 |      </g>
149 |      <g id="text_8">
150 |       <text style="font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="673.449036" y="324.038281" transform="rotate(-0 673.449036 324.038281)">0.5</text>
151 |      </g>
152 |     </g>
153 |     <g id="text_9">
154 |      <text style="font-weight: 700; font-size: 11px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="382.625491" y="338.83375" transform="rotate(-0 382.625491 338.83375)">Standardized Coefficients (β)</text>
155 |     </g>
156 |    </g>
157 |    <g id="matplotlib.axis_2">
158 |     <g id="ytick_1">
159 |      <g id="line2d_9">
160 |       <path d="M 63.37 289.8375 
161 | L 701.880981 289.8375 
162 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
163 |      </g>
164 |      <g id="text_10">
165 |       <!-- $D_f$ -->
166 |       <g style="fill: #262626" transform="translate(44.82 294.016641)">
167 |        <text>
168 |         <tspan x="0" y="-0.984375" style="font-style: oblique; font-size: 11px; font-family: 'DejaVu Sans'; fill: #262626">D</tspan>
169 |         <tspan x="8.470215" y="0.820312" style="font-style: oblique; font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">f</tspan>
170 |        </text>
171 |       </g>
172 |      </g>
173 |     </g>
174 |     <g id="ytick_2">
175 |      <g id="line2d_10">
176 |       <path d="M 63.37 252.1525 
177 | L 701.880981 252.1525 
178 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
179 |      </g>
180 |      <g id="text_11">
181 |       <!-- $\mathbb{I}_{\mathrm{hierarchical}}$ -->
182 |       <g style="fill: #262626" transform="translate(7.2 256.331641)">
183 |        <text>
184 |         <tspan x="0" y="-0.8125" style="font-style: italic; font-size: 11px; font-family: 'STIXNonUnicode'; fill: #262626"></tspan>
185 |         <tspan x="3.625262" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">h</tspan>
186 |         <tspan x="8.505438" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">i</tspan>
187 |         <tspan x="10.644745" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">e</tspan>
188 |         <tspan x="15.38205" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">r</tspan>
189 |         <tspan x="18.547772" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">a</tspan>
190 |         <tspan x="23.266278" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">r</tspan>
191 |         <tspan x="26.432001" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">c</tspan>
192 |         <tspan x="30.665497" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">h</tspan>
193 |         <tspan x="35.545673" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">i</tspan>
194 |         <tspan x="37.684979" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">c</tspan>
195 |         <tspan x="41.918475" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">a</tspan>
196 |         <tspan x="46.636981" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">l</tspan>
197 |        </text>
198 |       </g>
199 |      </g>
200 |     </g>
201 |     <g id="ytick_3">
202 |      <g id="line2d_11">
203 |       <path d="M 63.37 214.4675 
204 | L 701.880981 214.4675 
205 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
206 |      </g>
207 |      <g id="text_12">
208 |       <!-- $S_v$ -->
209 |       <g style="fill: #262626" transform="translate(44.49 218.646641)">
210 |        <text>
211 |         <tspan x="0" y="-0.828125" style="font-style: oblique; font-size: 11px; font-family: 'DejaVu Sans'; fill: #262626">S</tspan>
212 |         <tspan x="6.982422" y="0.976562" style="font-style: oblique; font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">v</tspan>
213 |        </text>
214 |       </g>
215 |      </g>
216 |     </g>
217 |     <g id="ytick_4">
218 |      <g id="line2d_12">
219 |       <path d="M 63.37 176.7825 
220 | L 701.880981 176.7825 
221 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
222 |      </g>
223 |      <g id="text_13">
224 |       <!-- $S_\ell$ -->
225 |       <g style="fill: #262626" transform="translate(45.81 180.961641)">
226 |        <text>
227 |         <tspan x="0" y="-0.828125" style="font-style: oblique; font-size: 11px; font-family: 'DejaVu Sans'; fill: #262626">S</tspan>
228 |         <tspan x="6.982422" y="0.976562" style="font-style: oblique; font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">ℓ</tspan>
229 |        </text>
230 |       </g>
231 |      </g>
232 |     </g>
233 |     <g id="ytick_5">
234 |      <g id="line2d_13">
235 |       <path d="M 63.37 139.0975 
236 | L 701.880981 139.0975 
237 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
238 |      </g>
239 |      <g id="text_14">
240 |       <!-- $C_{\mathrm{task}}$ -->
241 |       <g style="fill: #262626" transform="translate(32.17 143.276641)">
242 |        <text>
243 |         <tspan x="0" y="-0.828125" style="font-style: oblique; font-size: 11px; font-family: 'DejaVu Sans'; fill: #262626">C</tspan>
244 |         <tspan x="7.680664" y="0.976562" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">t</tspan>
245 |         <tspan x="10.699756" y="0.976562" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">a</tspan>
246 |         <tspan x="15.418262" y="0.976562" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">s</tspan>
247 |         <tspan x="19.429932" y="0.976562" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">k</tspan>
248 |        </text>
249 |       </g>
250 |      </g>
251 |     </g>
252 |     <g id="ytick_6">
253 |      <g id="line2d_14">
254 |       <path d="M 63.37 101.4125 
255 | L 701.880981 101.4125 
256 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
257 |      </g>
258 |      <g id="text_15">
259 |       <!-- $C_{\mathrm{mod}}$ -->
260 |       <g style="fill: #262626" transform="translate(31.18 105.591641)">
261 |        <text>
262 |         <tspan x="0" y="-0.828125" style="font-style: oblique; font-size: 11px; font-family: 'DejaVu Sans'; fill: #262626">C</tspan>
263 |         <tspan x="7.680664" y="0.976562" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">m</tspan>
264 |         <tspan x="15.181396" y="0.976562" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">o</tspan>
265 |         <tspan x="19.892383" y="0.976562" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">d</tspan>
266 |        </text>
267 |       </g>
268 |      </g>
269 |     </g>
270 |     <g id="ytick_7">
271 |      <g id="line2d_15">
272 |       <path d="M 63.37 63.7275 
273 | L 701.880981 63.7275 
274 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
275 |      </g>
276 |      <g id="text_16">
277 |       <!-- $\mathbb{I}_{\mathrm{flow}}$ -->
278 |       <g style="fill: #262626" transform="translate(36.57 67.906641)">
279 |        <text>
280 |         <tspan x="0" y="-0.8125" style="font-style: italic; font-size: 11px; font-family: 'STIXNonUnicode'; fill: #262626"></tspan>
281 |         <tspan x="3.625262" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">f</tspan>
282 |         <tspan x="6.336053" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">l</tspan>
283 |         <tspan x="8.47536" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">o</tspan>
284 |         <tspan x="13.186346" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">w</tspan>
285 |        </text>
286 |       </g>
287 |      </g>
288 |     </g>
289 |     <g id="ytick_8">
290 |      <g id="line2d_16">
291 |       <path d="M 63.37 26.0425 
292 | L 701.880981 26.0425 
293 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e6e6e6; stroke-width: 0.8; stroke-linecap: round"/>
294 |      </g>
295 |      <g id="text_17">
296 |       <!-- $\mathbb{I}_{\mathrm{diffusion}}$ -->
297 |       <g style="fill: #262626" transform="translate(19.3 30.221641)">
298 |        <text>
299 |         <tspan x="0" y="-0.8125" style="font-style: italic; font-size: 11px; font-family: 'STIXNonUnicode'; fill: #262626"></tspan>
300 |         <tspan x="3.625262" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">d</tspan>
301 |         <tspan x="8.512958" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">i</tspan>
302 |         <tspan x="10.652264" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">f</tspan>
303 |         <tspan x="13.363055" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">f</tspan>
304 |         <tspan x="16.073846" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">u</tspan>
305 |         <tspan x="20.954022" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">s</tspan>
306 |         <tspan x="24.965692" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">i</tspan>
307 |         <tspan x="27.104999" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">o</tspan>
308 |         <tspan x="31.815985" y="0.992188" style="font-size: 7.7px; font-family: 'DejaVu Sans'; fill: #262626">n</tspan>
309 |        </text>
310 |       </g>
311 |      </g>
312 |     </g>
313 |    </g>
314 |    <g id="line2d_17">
315 |     <path d="M 255.804575 308.68 
316 | L 255.804575 7.2 
317 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke-dasharray: 5.55,2.4; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.6; stroke-width: 1.5"/>
318 |    </g>
319 |    <g id="line2d_18">
320 |     <path d="M 137.884516 289.8375 
321 | L 325.661911 289.8375 
322 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #5daca8; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
323 |    </g>
324 |    <g id="line2d_19">
325 |     <path d="M 211.942961 252.1525 
326 | L 672.857755 252.1525 
327 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #5daca8; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
328 |    </g>
329 |    <g id="line2d_20">
330 |     <path d="M 226.837701 214.4675 
331 | L 314.233687 214.4675 
332 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e85d48; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
333 |    </g>
334 |    <g id="line2d_21">
335 |     <path d="M 229.749179 176.7825 
336 | L 318.857615 176.7825 
337 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #e85d48; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
338 |    </g>
339 |    <g id="line2d_22">
340 |     <path d="M 204.734459 139.0975 
341 | L 282.322635 139.0975 
342 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #8bc34a; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
343 |    </g>
344 |    <g id="line2d_23">
345 |     <path d="M 211.021565 101.4125 
346 | L 290.37576 101.4125 
347 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #8bc34a; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
348 |    </g>
349 |    <g id="line2d_24">
350 |     <path d="M 92.393226 63.7275 
351 | L 615.323352 63.7275 
352 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #9b8ac4; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
353 |    </g>
354 |    <g id="line2d_25">
355 |     <path d="M 348.509303 26.0425 
356 | L 533.989609 26.0425 
357 | " clip-path="url(#p920cdc3825)" style="fill: none; stroke: #9b8ac4; stroke-opacity: 0.7; stroke-width: 3; stroke-linecap: round"/>
358 |    </g>
359 |    <g id="patch_7">
360 |     <path d="M 63.37 308.68 
361 | L 63.37 7.2 
362 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
363 |    </g>
364 |    <g id="patch_8">
365 |     <path d="M 63.37 308.68 
366 | L 701.880981 308.68 
367 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
368 |    </g>
369 |    <g id="line2d_26">
370 |     <defs>
371 |      <path id="m323dae04f0" d="M 0 5 
372 | C 1.326016 5 2.597899 4.473168 3.535534 3.535534 
373 | C 4.473168 2.597899 5 1.326016 5 0 
374 | C 5 -1.326016 4.473168 -2.597899 3.535534 -3.535534 
375 | C 2.597899 -4.473168 1.326016 -5 0 -5 
376 | C -1.326016 -5 -2.597899 -4.473168 -3.535534 -3.535534 
377 | C -4.473168 -2.597899 -5 -1.326016 -5 0 
378 | C -5 1.326016 -4.473168 2.597899 -3.535534 3.535534 
379 | C -2.597899 4.473168 -1.326016 5 0 5 
380 | z
381 | " style="stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
382 |     </defs>
383 |     <g clip-path="url(#p920cdc3825)">
384 |      <use xlink:href="#m323dae04f0" x="231.773213" y="289.8375" style="fill: #5daca8; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
385 |     </g>
386 |    </g>
387 |    <g id="line2d_27">
388 |     <g clip-path="url(#p920cdc3825)">
389 |      <use xlink:href="#m323dae04f0" x="442.400358" y="252.1525" style="fill: #5daca8; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
390 |     </g>
391 |    </g>
392 |    <g id="line2d_28">
393 |     <defs>
394 |      <path id="m8ba7ed4030" d="M 0 5 
395 | C 1.326016 5 2.597899 4.473168 3.535534 3.535534 
396 | C 4.473168 2.597899 5 1.326016 5 0 
397 | C 5 -1.326016 4.473168 -2.597899 3.535534 -3.535534 
398 | C 2.597899 -4.473168 1.326016 -5 0 -5 
399 | C -1.326016 -5 -2.597899 -4.473168 -3.535534 -3.535534 
400 | C -4.473168 -2.597899 -5 -1.326016 -5 0 
401 | C -5 1.326016 -4.473168 2.597899 -3.535534 3.535534 
402 | C -2.597899 4.473168 -1.326016 5 0 5 
403 | z
404 | " style="stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
405 |     </defs>
406 |     <g clip-path="url(#p920cdc3825)">
407 |      <use xlink:href="#m8ba7ed4030" x="270.535694" y="214.4675" style="fill: #e85d48; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
408 |     </g>
409 |    </g>
410 |    <g id="line2d_29">
411 |     <g clip-path="url(#p920cdc3825)">
412 |      <use xlink:href="#m8ba7ed4030" x="274.303397" y="176.7825" style="fill: #e85d48; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
413 |     </g>
414 |    </g>
415 |    <g id="line2d_30">
416 |     <defs>
417 |      <path id="mdd38d0bd7b" d="M 0 5 
418 | C 1.326016 5 2.597899 4.473168 3.535534 3.535534 
419 | C 4.473168 2.597899 5 1.326016 5 0 
420 | C 5 -1.326016 4.473168 -2.597899 3.535534 -3.535534 
421 | C 2.597899 -4.473168 1.326016 -5 0 -5 
422 | C -1.326016 -5 -2.597899 -4.473168 -3.535534 -3.535534 
423 | C -4.473168 -2.597899 -5 -1.326016 -5 0 
424 | C -5 1.326016 -4.473168 2.597899 -3.535534 3.535534 
425 | C -2.597899 4.473168 -1.326016 5 0 5 
426 | z
427 | " style="stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
428 |     </defs>
429 |     <g clip-path="url(#p920cdc3825)">
430 |      <use xlink:href="#mdd38d0bd7b" x="243.528547" y="139.0975" style="fill: #8bc34a; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
431 |     </g>
432 |    </g>
433 |    <g id="line2d_31">
434 |     <g clip-path="url(#p920cdc3825)">
435 |      <use xlink:href="#mdd38d0bd7b" x="250.698662" y="101.4125" style="fill: #8bc34a; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
436 |     </g>
437 |    </g>
438 |    <g id="line2d_32">
439 |     <defs>
440 |      <path id="m7398d3deb7" d="M 0 5 
441 | C 1.326016 5 2.597899 4.473168 3.535534 3.535534 
442 | C 4.473168 2.597899 5 1.326016 5 0 
443 | C 5 -1.326016 4.473168 -2.597899 3.535534 -3.535534 
444 | C 2.597899 -4.473168 1.326016 -5 0 -5 
445 | C -1.326016 -5 -2.597899 -4.473168 -3.535534 -3.535534 
446 | C -4.473168 -2.597899 -5 -1.326016 -5 0 
447 | C -5 1.326016 -4.473168 2.597899 -3.535534 3.535534 
448 | C -2.597899 4.473168 -1.326016 5 0 5 
449 | z
450 | " style="stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
451 |     </defs>
452 |     <g clip-path="url(#p920cdc3825)">
453 |      <use xlink:href="#m7398d3deb7" x="353.858289" y="63.7275" style="fill: #9b8ac4; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
454 |     </g>
455 |    </g>
456 |    <g id="line2d_33">
457 |     <g clip-path="url(#p920cdc3825)">
458 |      <use xlink:href="#m7398d3deb7" x="441.249456" y="26.0425" style="fill: #9b8ac4; fill-opacity: 0.95; stroke: #000000; stroke-opacity: 0.95; stroke-width: 1.5"/>
459 |     </g>
460 |    </g>
461 |    <g id="text_18">
462 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="244.302547" y="292.320937" transform="rotate(-0 244.302547 292.320937)">-0.029</text>
463 |    </g>
464 |    <g id="text_19">
465 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="454.929692" y="254.635937" transform="rotate(-0 454.929692 254.635937)">0.223</text>
466 |    </g>
467 |    <g id="text_20">
468 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="283.065028" y="216.950937" transform="rotate(-0 283.065028 216.950937)">0.018</text>
469 |    </g>
470 |    <g id="text_21">
471 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="286.832731" y="179.265937" transform="rotate(-0 286.832731 179.265937)">0.022</text>
472 |    </g>
473 |    <g id="text_22">
474 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="256.057881" y="141.580937" transform="rotate(-0 256.057881 141.580937)">-0.015</text>
475 |    </g>
476 |    <g id="text_23">
477 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="263.227996" y="103.895937" transform="rotate(-0 263.227996 103.895937)">-0.006</text>
478 |    </g>
479 |    <g id="text_24">
480 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="366.387623" y="66.210937" transform="rotate(-0 366.387623 66.210937)">0.117</text>
481 |    </g>
482 |    <g id="text_25">
483 |     <text style="font-weight: 700; font-size: 9px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start" x="453.77879" y="28.525937" transform="rotate(-0 453.77879 28.525937)">0.222</text>
484 |    </g>
485 |    <g id="legend_1">
486 |     <g id="patch_9">
487 |      <path d="M 565.966919 305.68 
488 | L 696.880981 305.68 
489 | Q 698.880981 305.68 698.880981 303.68 
490 | L 698.880981 245.9675 
491 | Q 698.880981 243.9675 696.880981 243.9675 
492 | L 565.966919 243.9675 
493 | Q 563.966919 243.9675 563.966919 245.9675 
494 | L 563.966919 303.68 
495 | Q 563.966919 305.68 565.966919 305.68 
496 | z
497 | " style="fill: #4d4d4d; opacity: 0.5; stroke: #4d4d4d; stroke-linejoin: miter"/>
498 |     </g>
499 |     <g id="patch_10">
500 |      <path d="M 563.966919 303.68 
501 | L 694.880981 303.68 
502 | Q 696.880981 303.68 696.880981 301.68 
503 | L 696.880981 243.9675 
504 | Q 696.880981 241.9675 694.880981 241.9675 
505 | L 563.966919 241.9675 
506 | Q 561.966919 241.9675 561.966919 243.9675 
507 | L 561.966919 301.68 
508 | Q 561.966919 303.68 563.966919 303.68 
509 | z
510 | " style="fill: #ffffff; opacity: 0.9; stroke: #cccccc; stroke-linejoin: miter"/>
511 |     </g>
512 |     <g id="patch_11">
513 |      <path d="M 565.966919 253.565937 
514 | L 585.966919 253.565937 
515 | L 585.966919 246.565937 
516 | L 565.966919 246.565937 
517 | z
518 | " style="fill: #5daca8; opacity: 0.9; stroke: #000000; stroke-linejoin: miter"/>
519 |     </g>
520 |     <g id="text_26">
521 |      <text style="font-size: 10px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="593.966919" y="253.565937" transform="rotate(-0 593.966919 253.565937)">Architecture Design</text>
522 |     </g>
523 |     <g id="patch_12">
524 |      <path d="M 565.966919 268.244062 
525 | L 585.966919 268.244062 
526 | L 585.966919 261.244062 
527 | L 565.966919 261.244062 
528 | z
529 | " style="fill: #e85d48; opacity: 0.9; stroke: #000000; stroke-linejoin: miter"/>
530 |     </g>
531 |     <g id="text_27">
532 |      <text style="font-size: 10px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="593.966919" y="268.244062" transform="rotate(-0 593.966919 268.244062)">Model Scale</text>
533 |     </g>
534 |     <g id="patch_13">
535 |      <path d="M 565.966919 282.922187 
536 | L 585.966919 282.922187 
537 | L 585.966919 275.922187 
538 | L 565.966919 275.922187 
539 | z
540 | " style="fill: #8bc34a; opacity: 0.9; stroke: #000000; stroke-linejoin: miter"/>
541 |     </g>
542 |     <g id="text_28">
543 |      <text style="font-size: 10px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="593.966919" y="282.922187" transform="rotate(-0 593.966919 282.922187)">Task Complexity</text>
544 |     </g>
545 |     <g id="patch_14">
546 |      <path d="M 565.966919 297.600312 
547 | L 585.966919 297.600312 
548 | L 585.966919 290.600312 
549 | L 565.966919 290.600312 
550 | z
551 | " style="fill: #9b8ac4; opacity: 0.9; stroke: #000000; stroke-linejoin: miter"/>
552 |     </g>
553 |     <g id="text_29">
554 |      <text style="font-size: 10px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: start; fill: #262626" x="593.966919" y="297.600312" transform="rotate(-0 593.966919 297.600312)">Decoder Policy</text>
555 |     </g>
556 |    </g>
557 |   </g>
558 |  </g>
559 |  <defs>
560 |   <clipPath id="p920cdc3825">
561 |    <rect x="63.37" y="7.2" width="638.510981" height="301.48"/>
562 |   </clipPath>
563 |  </defs>
564 | </svg>
565 | 


--------------------------------------------------------------------------------
/Plot_script/plots/scale_analysis_4panel.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1140.601562pt" height="348.144062pt" viewBox="0 0 1140.601562 348.144062" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-18T00:03:19.102506</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.7, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 348.144062 
 25 | L 1140.601562 348.144062 
 26 | L 1140.601562 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 67.481563 289.3775 
 34 | L 374.041563 289.3775 
 35 | L 374.041563 34.3575 
 36 | L 67.481563 34.3575 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="line2d_1">
 41 |     <path d="M 67.481563 231.418409 
 42 | L 374.041563 231.418409 
 43 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 44 |    </g>
 45 |    <g id="line2d_2">
 46 |     <path d="M 67.481563 185.051136 
 47 | L 374.041563 185.051136 
 48 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 49 |    </g>
 50 |    <g id="line2d_3">
 51 |     <path d="M 67.481563 138.683864 
 52 | L 374.041563 138.683864 
 53 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 54 |    </g>
 55 |    <g id="line2d_4">
 56 |     <path d="M 67.481563 92.316591 
 57 | L 374.041563 92.316591 
 58 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 59 |    </g>
 60 |    <g id="line2d_5">
 61 |     <path d="M 67.481563 45.949318 
 62 | L 374.041563 45.949318 
 63 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 64 |    </g>
 65 |    <g id="matplotlib.axis_1">
 66 |     <g id="xtick_1">
 67 |      <g id="text_1">
 68 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="118.574896" y="311.515312" transform="rotate(-0 118.574896 311.515312)">Small</text>
 69 |      </g>
 70 |     </g>
 71 |     <g id="xtick_2">
 72 |      <g id="text_2">
 73 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="220.761563" y="311.515312" transform="rotate(-0 220.761563 311.515312)">Medium</text>
 74 |      </g>
 75 |     </g>
 76 |     <g id="xtick_3">
 77 |      <g id="text_3">
 78 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="322.948229" y="311.515312" transform="rotate(-0 322.948229 311.515312)">Large</text>
 79 |      </g>
 80 |     </g>
 81 |     <g id="text_4">
 82 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="220.761563" y="337.824531" transform="rotate(-0 220.761563 337.824531)">Vision Model Size</text>
 83 |     </g>
 84 |    </g>
 85 |    <g id="matplotlib.axis_2">
 86 |     <g id="ytick_1">
 87 |      <g id="line2d_6">
 88 |       <path d="M 67.481563 277.785682 
 89 | L 374.041563 277.785682 
 90 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 91 |      </g>
 92 |      <g id="text_5">
 93 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="283.104588" transform="rotate(-0 55.981563 283.104588)">0.0</text>
 94 |      </g>
 95 |     </g>
 96 |     <g id="ytick_2">
 97 |      <g id="line2d_7">
 98 |       <path d="M 67.481563 231.418409 
 99 | L 374.041563 231.418409 
100 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
101 |      </g>
102 |      <g id="text_6">
103 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="236.737315" transform="rotate(-0 55.981563 236.737315)">0.2</text>
104 |      </g>
105 |     </g>
106 |     <g id="ytick_3">
107 |      <g id="line2d_8">
108 |       <path d="M 67.481563 185.051136 
109 | L 374.041563 185.051136 
110 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
111 |      </g>
112 |      <g id="text_7">
113 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="190.370043" transform="rotate(-0 55.981563 190.370043)">0.4</text>
114 |      </g>
115 |     </g>
116 |     <g id="ytick_4">
117 |      <g id="line2d_9">
118 |       <path d="M 67.481563 138.683864 
119 | L 374.041563 138.683864 
120 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
121 |      </g>
122 |      <g id="text_8">
123 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="144.00277" transform="rotate(-0 55.981563 144.00277)">0.6</text>
124 |      </g>
125 |     </g>
126 |     <g id="ytick_5">
127 |      <g id="line2d_10">
128 |       <path d="M 67.481563 92.316591 
129 | L 374.041563 92.316591 
130 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
131 |      </g>
132 |      <g id="text_9">
133 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="97.635497" transform="rotate(-0 55.981563 97.635497)">0.8</text>
134 |      </g>
135 |     </g>
136 |     <g id="ytick_6">
137 |      <g id="line2d_11">
138 |       <path d="M 67.481563 45.949318 
139 | L 374.041563 45.949318 
140 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
141 |      </g>
142 |      <g id="text_10">
143 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="51.268224" transform="rotate(-0 55.981563 51.268224)">1.0</text>
144 |      </g>
145 |     </g>
146 |     <g id="text_11">
147 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="18.597656" y="161.8675" transform="rotate(-90 18.597656 161.8675)">Normalized Success</text>
148 |     </g>
149 |    </g>
150 |    <g id="patch_3">
151 |     <path d="M 93.028229 239.558853 
152 | L 144.121563 239.558853 
153 | L 144.121563 185.505356 
154 | L 93.028229 185.505356 
155 | L 93.028229 239.558853 
156 | z
157 | " clip-path="url(#pf20d5afcd3)" style="fill: #abc9ea; stroke: #6f6f6f; stroke-linejoin: miter"/>
158 |    </g>
159 |    <g id="line2d_12">
160 |     <path d="M 118.574896 239.558853 
161 | L 118.574896 276.057308 
162 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
163 |    </g>
164 |    <g id="line2d_13">
165 |     <path d="M 118.574896 185.505356 
166 | L 118.574896 133.947998 
167 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
168 |    </g>
169 |    <g id="line2d_14">
170 |     <path d="M 105.801563 276.057308 
171 | L 131.348229 276.057308 
172 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
173 |    </g>
174 |    <g id="line2d_15">
175 |     <path d="M 105.801563 133.947998 
176 | L 131.348229 133.947998 
177 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
178 |    </g>
179 |    <g id="line2d_16">
180 |     <defs>
181 |      <path id="m07f96049fb" d="M 0 4 
182 | C 1.060812 4 2.078319 3.578535 2.828427 2.828427 
183 | C 3.578535 2.078319 4 1.060812 4 0 
184 | C 4 -1.060812 3.578535 -2.078319 2.828427 -2.828427 
185 | C 2.078319 -3.578535 1.060812 -4 0 -4 
186 | C -1.060812 -4 -2.078319 -3.578535 -2.828427 -2.828427 
187 | C -3.578535 -2.078319 -4 -1.060812 -4 0 
188 | C -4 1.060812 -3.578535 2.078319 -2.828427 2.828427 
189 | C -2.078319 3.578535 -1.060812 4 0 4 
190 | z
191 | " style="stroke: #6f6f6f"/>
192 |     </defs>
193 |     <g clip-path="url(#pf20d5afcd3)">
194 |      <use xlink:href="#m07f96049fb" x="118.574896" y="64.781318" style="fill-opacity: 0; stroke: #6f6f6f"/>
195 |      <use xlink:href="#m07f96049fb" x="118.574896" y="97.215011" style="fill-opacity: 0; stroke: #6f6f6f"/>
196 |     </g>
197 |    </g>
198 |    <g id="patch_4">
199 |     <path d="M 195.214896 225.874769 
200 | L 246.308229 225.874769 
201 | L 246.308229 152.804146 
202 | L 195.214896 152.804146 
203 | L 195.214896 225.874769 
204 | z
205 | " clip-path="url(#pf20d5afcd3)" style="fill: #efb792; stroke: #6f6f6f; stroke-linejoin: miter"/>
206 |    </g>
207 |    <g id="line2d_17">
208 |     <path d="M 220.761563 225.874769 
209 | L 220.761563 277.785682 
210 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
211 |    </g>
212 |    <g id="line2d_18">
213 |     <path d="M 220.761563 152.804146 
214 | L 220.761563 45.949318 
215 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
216 |    </g>
217 |    <g id="line2d_19">
218 |     <path d="M 207.988229 277.785682 
219 | L 233.534896 277.785682 
220 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
221 |    </g>
222 |    <g id="line2d_20">
223 |     <path d="M 207.988229 45.949318 
224 | L 233.534896 45.949318 
225 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
226 |    </g>
227 |    <g id="line2d_21"/>
228 |    <g id="patch_5">
229 |     <path d="M 297.401563 209.061456 
230 | L 348.494896 209.061456 
231 | L 348.494896 179.439069 
232 | L 297.401563 179.439069 
233 | L 297.401563 209.061456 
234 | z
235 | " clip-path="url(#pf20d5afcd3)" style="fill: #98daa7; stroke: #6f6f6f; stroke-linejoin: miter"/>
236 |    </g>
237 |    <g id="line2d_22">
238 |     <path d="M 322.948229 209.061456 
239 | L 322.948229 226.282422 
240 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
241 |    </g>
242 |    <g id="line2d_23">
243 |     <path d="M 322.948229 179.439069 
244 | L 322.948229 172.658986 
245 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
246 |    </g>
247 |    <g id="line2d_24">
248 |     <path d="M 310.174896 226.282422 
249 | L 335.721563 226.282422 
250 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
251 |    </g>
252 |    <g id="line2d_25">
253 |     <path d="M 310.174896 172.658986 
254 | L 335.721563 172.658986 
255 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
256 |    </g>
257 |    <g id="line2d_26"/>
258 |    <g id="line2d_27">
259 |     <path d="M 93.028229 208.045983 
260 | L 144.121563 208.045983 
261 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
262 |    </g>
263 |    <g id="line2d_28">
264 |     <path d="M 195.214896 202.986313 
265 | L 246.308229 202.986313 
266 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
267 |    </g>
268 |    <g id="line2d_29">
269 |     <path d="M 297.401563 194.802756 
270 | L 348.494896 194.802756 
271 | " clip-path="url(#pf20d5afcd3)" style="fill: none; stroke: #6f6f6f"/>
272 |    </g>
273 |    <g id="patch_6">
274 |     <path d="M 67.481563 289.3775 
275 | L 67.481563 34.3575 
276 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
277 |    </g>
278 |    <g id="patch_7">
279 |     <path d="M 67.481563 289.3775 
280 | L 374.041563 289.3775 
281 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
282 |    </g>
283 |    <g id="text_12">
284 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="220.761563" y="19.3575" transform="rotate(-0 220.761563 19.3575)">Vision Model Scale Impact</text>
285 |    </g>
286 |   </g>
287 |   <g id="axes_2">
288 |    <g id="patch_8">
289 |     <path d="M 447.161563 289.3775 
290 | L 753.721563 289.3775 
291 | L 753.721563 34.3575 
292 | L 447.161563 34.3575 
293 | z
294 | " style="fill: #ffffff"/>
295 |    </g>
296 |    <g id="line2d_30">
297 |     <path d="M 447.161563 231.418409 
298 | L 753.721563 231.418409 
299 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
300 |    </g>
301 |    <g id="line2d_31">
302 |     <path d="M 447.161563 185.051136 
303 | L 753.721563 185.051136 
304 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
305 |    </g>
306 |    <g id="line2d_32">
307 |     <path d="M 447.161563 138.683864 
308 | L 753.721563 138.683864 
309 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
310 |    </g>
311 |    <g id="line2d_33">
312 |     <path d="M 447.161563 92.316591 
313 | L 753.721563 92.316591 
314 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
315 |    </g>
316 |    <g id="line2d_34">
317 |     <path d="M 447.161563 45.949318 
318 | L 753.721563 45.949318 
319 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
320 |    </g>
321 |    <g id="matplotlib.axis_3">
322 |     <g id="xtick_4">
323 |      <g id="text_13">
324 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="498.254896" y="311.515312" transform="rotate(-0 498.254896 311.515312)">Small</text>
325 |      </g>
326 |     </g>
327 |     <g id="xtick_5">
328 |      <g id="text_14">
329 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="600.441563" y="311.515312" transform="rotate(-0 600.441563 311.515312)">Medium</text>
330 |      </g>
331 |     </g>
332 |     <g id="xtick_6">
333 |      <g id="text_15">
334 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="702.628229" y="311.515312" transform="rotate(-0 702.628229 311.515312)">Large</text>
335 |      </g>
336 |     </g>
337 |     <g id="text_16">
338 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="600.441563" y="337.824531" transform="rotate(-0 600.441563 337.824531)">Language Model Size</text>
339 |     </g>
340 |    </g>
341 |    <g id="matplotlib.axis_4">
342 |     <g id="ytick_7">
343 |      <g id="line2d_35">
344 |       <path d="M 447.161563 277.785682 
345 | L 753.721563 277.785682 
346 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
347 |      </g>
348 |      <g id="text_17">
349 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="283.104588" transform="rotate(-0 435.661563 283.104588)">0.0</text>
350 |      </g>
351 |     </g>
352 |     <g id="ytick_8">
353 |      <g id="line2d_36">
354 |       <path d="M 447.161563 231.418409 
355 | L 753.721563 231.418409 
356 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
357 |      </g>
358 |      <g id="text_18">
359 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="236.737315" transform="rotate(-0 435.661563 236.737315)">0.2</text>
360 |      </g>
361 |     </g>
362 |     <g id="ytick_9">
363 |      <g id="line2d_37">
364 |       <path d="M 447.161563 185.051136 
365 | L 753.721563 185.051136 
366 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
367 |      </g>
368 |      <g id="text_19">
369 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="190.370043" transform="rotate(-0 435.661563 190.370043)">0.4</text>
370 |      </g>
371 |     </g>
372 |     <g id="ytick_10">
373 |      <g id="line2d_38">
374 |       <path d="M 447.161563 138.683864 
375 | L 753.721563 138.683864 
376 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
377 |      </g>
378 |      <g id="text_20">
379 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="144.00277" transform="rotate(-0 435.661563 144.00277)">0.6</text>
380 |      </g>
381 |     </g>
382 |     <g id="ytick_11">
383 |      <g id="line2d_39">
384 |       <path d="M 447.161563 92.316591 
385 | L 753.721563 92.316591 
386 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
387 |      </g>
388 |      <g id="text_21">
389 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="97.635497" transform="rotate(-0 435.661563 97.635497)">0.8</text>
390 |      </g>
391 |     </g>
392 |     <g id="ytick_12">
393 |      <g id="line2d_40">
394 |       <path d="M 447.161563 45.949318 
395 | L 753.721563 45.949318 
396 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
397 |      </g>
398 |      <g id="text_22">
399 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="51.268224" transform="rotate(-0 435.661563 51.268224)">1.0</text>
400 |      </g>
401 |     </g>
402 |     <g id="text_23">
403 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="398.277656" y="161.8675" transform="rotate(-90 398.277656 161.8675)">Normalized Success</text>
404 |     </g>
405 |    </g>
406 |    <g id="patch_9">
407 |     <path d="M 472.708229 225.641845 
408 | L 523.801563 225.641845 
409 | L 523.801563 181.931922 
410 | L 472.708229 181.931922 
411 | L 472.708229 225.641845 
412 | z
413 | " clip-path="url(#p56f46ff4ba)" style="fill: #96cac1; stroke: #6a6a6a; stroke-linejoin: miter"/>
414 |    </g>
415 |    <g id="line2d_41">
416 |     <path d="M 498.254896 225.641845 
417 | L 498.254896 277.785682 
418 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
419 |    </g>
420 |    <g id="line2d_42">
421 |     <path d="M 498.254896 181.931922 
422 | L 498.254896 137.300361 
423 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
424 |    </g>
425 |    <g id="line2d_43">
426 |     <path d="M 485.481563 277.785682 
427 | L 511.028229 277.785682 
428 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
429 |    </g>
430 |    <g id="line2d_44">
431 |     <path d="M 485.481563 137.300361 
432 | L 511.028229 137.300361 
433 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
434 |    </g>
435 |    <g id="line2d_45">
436 |     <defs>
437 |      <path id="mefa1dde716" d="M 0 4 
438 | C 1.060812 4 2.078319 3.578535 2.828427 2.828427 
439 | C 3.578535 2.078319 4 1.060812 4 0 
440 | C 4 -1.060812 3.578535 -2.078319 2.828427 -2.828427 
441 | C 2.078319 -3.578535 1.060812 -4 0 -4 
442 | C -1.060812 -4 -2.078319 -3.578535 -2.828427 -2.828427 
443 | C -3.578535 -2.078319 -4 -1.060812 -4 0 
444 | C -4 1.060812 -3.578535 2.078319 -2.828427 2.828427 
445 | C -2.078319 3.578535 -1.060812 4 0 4 
446 | z
447 | " style="stroke: #6a6a6a"/>
448 |     </defs>
449 |     <g clip-path="url(#p56f46ff4ba)">
450 |      <use xlink:href="#mefa1dde716" x="498.254896" y="64.781318" style="fill-opacity: 0; stroke: #6a6a6a"/>
451 |      <use xlink:href="#mefa1dde716" x="498.254896" y="86.442853" style="fill-opacity: 0; stroke: #6a6a6a"/>
452 |      <use xlink:href="#mefa1dde716" x="498.254896" y="100.883588" style="fill-opacity: 0; stroke: #6a6a6a"/>
453 |      <use xlink:href="#mefa1dde716" x="498.254896" y="99.256608" style="fill-opacity: 0; stroke: #6a6a6a"/>
454 |     </g>
455 |    </g>
456 |    <g id="patch_10">
457 |     <path d="M 574.894896 238.659387 
458 | L 625.988229 238.659387 
459 | L 625.988229 139.878124 
460 | L 574.894896 139.878124 
461 | L 574.894896 238.659387 
462 | z
463 | " clip-path="url(#p56f46ff4ba)" style="fill: #f6f6bc; stroke: #6a6a6a; stroke-linejoin: miter"/>
464 |    </g>
465 |    <g id="line2d_46">
466 |     <path d="M 600.441563 238.659387 
467 | L 600.441563 276.057308 
468 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
469 |    </g>
470 |    <g id="line2d_47">
471 |     <path d="M 600.441563 139.878124 
472 | L 600.441563 45.949318 
473 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
474 |    </g>
475 |    <g id="line2d_48">
476 |     <path d="M 587.668229 276.057308 
477 | L 613.214896 276.057308 
478 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
479 |    </g>
480 |    <g id="line2d_49">
481 |     <path d="M 587.668229 45.949318 
482 | L 613.214896 45.949318 
483 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
484 |    </g>
485 |    <g id="line2d_50"/>
486 |    <g id="patch_11">
487 |     <path d="M 677.081563 206.044386 
488 | L 728.174896 206.044386 
489 | L 728.174896 137.067948 
490 | L 677.081563 137.067948 
491 | L 677.081563 206.044386 
492 | z
493 | " clip-path="url(#p56f46ff4ba)" style="fill: #c1bed6; stroke: #6a6a6a; stroke-linejoin: miter"/>
494 |    </g>
495 |    <g id="line2d_51">
496 |     <path d="M 702.628229 206.044386 
497 | L 702.628229 238.029508 
498 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
499 |    </g>
500 |    <g id="line2d_52">
501 |     <path d="M 702.628229 137.067948 
502 | L 702.628229 92.273507 
503 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
504 |    </g>
505 |    <g id="line2d_53">
506 |     <path d="M 689.854896 238.029508 
507 | L 715.401562 238.029508 
508 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
509 |    </g>
510 |    <g id="line2d_54">
511 |     <path d="M 689.854896 92.273507 
512 | L 715.401562 92.273507 
513 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
514 |    </g>
515 |    <g id="line2d_55"/>
516 |    <g id="line2d_56">
517 |     <path d="M 472.708229 209.87164 
518 | L 523.801563 209.87164 
519 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
520 |    </g>
521 |    <g id="line2d_57">
522 |     <path d="M 574.894896 197.596853 
523 | L 625.988229 197.596853 
524 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
525 |    </g>
526 |    <g id="line2d_58">
527 |     <path d="M 677.081563 192.434159 
528 | L 728.174896 192.434159 
529 | " clip-path="url(#p56f46ff4ba)" style="fill: none; stroke: #6a6a6a"/>
530 |    </g>
531 |    <g id="patch_12">
532 |     <path d="M 447.161563 289.3775 
533 | L 447.161563 34.3575 
534 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
535 |    </g>
536 |    <g id="patch_13">
537 |     <path d="M 447.161563 289.3775 
538 | L 753.721563 289.3775 
539 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
540 |    </g>
541 |    <g id="text_24">
542 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="600.441563" y="19.3575" transform="rotate(-0 600.441563 19.3575)">Language Model Scale Impact</text>
543 |    </g>
544 |   </g>
545 |   <g id="axes_3">
546 |    <g id="patch_14">
547 |     <path d="M 826.841563 289.3775 
548 | L 1133.401562 289.3775 
549 | L 1133.401562 34.3575 
550 | L 826.841563 34.3575 
551 | z
552 | " style="fill: #ffffff"/>
553 |    </g>
554 |    <g id="line2d_59">
555 |     <path d="M 826.841563 231.418409 
556 | L 1133.401562 231.418409 
557 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
558 |    </g>
559 |    <g id="line2d_60">
560 |     <path d="M 826.841563 185.051136 
561 | L 1133.401562 185.051136 
562 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
563 |    </g>
564 |    <g id="line2d_61">
565 |     <path d="M 826.841563 138.683864 
566 | L 1133.401562 138.683864 
567 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
568 |    </g>
569 |    <g id="line2d_62">
570 |     <path d="M 826.841563 92.316591 
571 | L 1133.401562 92.316591 
572 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
573 |    </g>
574 |    <g id="line2d_63">
575 |     <path d="M 826.841563 45.949318 
576 | L 1133.401562 45.949318 
577 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
578 |    </g>
579 |    <g id="matplotlib.axis_5">
580 |     <g id="xtick_7">
581 |      <g id="text_25">
582 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="877.934896" y="311.515312" transform="rotate(-0 877.934896 311.515312)">early</text>
583 |      </g>
584 |     </g>
585 |     <g id="xtick_8">
586 |      <g id="text_26">
587 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="980.121563" y="311.515312" transform="rotate(-0 980.121563 311.515312)">late</text>
588 |      </g>
589 |     </g>
590 |     <g id="xtick_9">
591 |      <g id="text_27">
592 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="1082.308229" y="311.515312" transform="rotate(-0 1082.308229 311.515312)">hierarch</text>
593 |      </g>
594 |     </g>
595 |     <g id="text_28">
596 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="980.121563" y="337.824531" transform="rotate(-0 980.121563 337.824531)">Fusion Depth</text>
597 |     </g>
598 |    </g>
599 |    <g id="matplotlib.axis_6">
600 |     <g id="ytick_13">
601 |      <g id="line2d_64">
602 |       <path d="M 826.841563 277.785682 
603 | L 1133.401562 277.785682 
604 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
605 |      </g>
606 |      <g id="text_29">
607 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="283.104588" transform="rotate(-0 815.341563 283.104588)">0.0</text>
608 |      </g>
609 |     </g>
610 |     <g id="ytick_14">
611 |      <g id="line2d_65">
612 |       <path d="M 826.841563 231.418409 
613 | L 1133.401562 231.418409 
614 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
615 |      </g>
616 |      <g id="text_30">
617 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="236.737315" transform="rotate(-0 815.341563 236.737315)">0.2</text>
618 |      </g>
619 |     </g>
620 |     <g id="ytick_15">
621 |      <g id="line2d_66">
622 |       <path d="M 826.841563 185.051136 
623 | L 1133.401562 185.051136 
624 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
625 |      </g>
626 |      <g id="text_31">
627 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="190.370043" transform="rotate(-0 815.341563 190.370043)">0.4</text>
628 |      </g>
629 |     </g>
630 |     <g id="ytick_16">
631 |      <g id="line2d_67">
632 |       <path d="M 826.841563 138.683864 
633 | L 1133.401562 138.683864 
634 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
635 |      </g>
636 |      <g id="text_32">
637 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="144.00277" transform="rotate(-0 815.341563 144.00277)">0.6</text>
638 |      </g>
639 |     </g>
640 |     <g id="ytick_17">
641 |      <g id="line2d_68">
642 |       <path d="M 826.841563 92.316591 
643 | L 1133.401562 92.316591 
644 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
645 |      </g>
646 |      <g id="text_33">
647 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="97.635497" transform="rotate(-0 815.341563 97.635497)">0.8</text>
648 |      </g>
649 |     </g>
650 |     <g id="ytick_18">
651 |      <g id="line2d_69">
652 |       <path d="M 826.841563 45.949318 
653 | L 1133.401562 45.949318 
654 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
655 |      </g>
656 |      <g id="text_34">
657 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="51.268224" transform="rotate(-0 815.341563 51.268224)">1.0</text>
658 |      </g>
659 |     </g>
660 |     <g id="text_35">
661 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="777.957656" y="161.8675" transform="rotate(-90 777.957656 161.8675)">Normalized Success</text>
662 |     </g>
663 |    </g>
664 |    <g id="patch_15">
665 |     <path d="M 852.388229 214.583391 
666 | L 903.481563 214.583391 
667 | L 903.481563 189.322698 
668 | L 852.388229 189.322698 
669 | L 852.388229 214.583391 
670 | z
671 | " clip-path="url(#pc427ae70c4)" style="fill: #b9dccc; stroke: #7a7a7a; stroke-linejoin: miter"/>
672 |    </g>
673 |    <g id="line2d_70">
674 |     <path d="M 877.934896 214.583391 
675 | L 877.934896 223.899866 
676 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
677 |    </g>
678 |    <g id="line2d_71">
679 |     <path d="M 877.934896 189.322698 
680 | L 877.934896 181.633462 
681 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
682 |    </g>
683 |    <g id="line2d_72">
684 |     <path d="M 865.161563 223.899866 
685 | L 890.708229 223.899866 
686 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
687 |    </g>
688 |    <g id="line2d_73">
689 |     <path d="M 865.161563 181.633462 
690 | L 890.708229 181.633462 
691 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
692 |    </g>
693 |    <g id="line2d_74"/>
694 |    <g id="patch_16">
695 |     <path d="M 954.574896 237.16936 
696 | L 1005.668229 237.16936 
697 | L 1005.668229 172.658986 
698 | L 954.574896 172.658986 
699 | L 954.574896 237.16936 
700 | z
701 | " clip-path="url(#pc427ae70c4)" style="fill: #f3cfb6; stroke: #7a7a7a; stroke-linejoin: miter"/>
702 |    </g>
703 |    <g id="line2d_75">
704 |     <path d="M 980.121563 237.16936 
705 | L 980.121563 277.785682 
706 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
707 |    </g>
708 |    <g id="line2d_76">
709 |     <path d="M 980.121563 172.658986 
710 | L 980.121563 83.386595 
711 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
712 |    </g>
713 |    <g id="line2d_77">
714 |     <path d="M 967.348229 277.785682 
715 | L 992.894896 277.785682 
716 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
717 |    </g>
718 |    <g id="line2d_78">
719 |     <path d="M 967.348229 83.386595 
720 | L 992.894896 83.386595 
721 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
722 |    </g>
723 |    <g id="line2d_79">
724 |     <defs>
725 |      <path id="mc84e76a1e7" d="M 0 4 
726 | C 1.060812 4 2.078319 3.578535 2.828427 2.828427 
727 | C 3.578535 2.078319 4 1.060812 4 0 
728 | C 4 -1.060812 3.578535 -2.078319 2.828427 -2.828427 
729 | C 2.078319 -3.578535 1.060812 -4 0 -4 
730 | C -1.060812 -4 -2.078319 -3.578535 -2.828427 -2.828427 
731 | C -3.578535 -2.078319 -4 -1.060812 -4 0 
732 | C -4 1.060812 -3.578535 2.078319 -2.828427 2.828427 
733 | C -2.078319 3.578535 -1.060812 4 0 4 
734 | z
735 | " style="stroke: #7a7a7a"/>
736 |     </defs>
737 |     <g clip-path="url(#pc427ae70c4)">
738 |      <use xlink:href="#mc84e76a1e7" x="980.121563" y="64.781318" style="fill-opacity: 0; stroke: #7a7a7a"/>
739 |      <use xlink:href="#mc84e76a1e7" x="980.121563" y="61.100981" style="fill-opacity: 0; stroke: #7a7a7a"/>
740 |      <use xlink:href="#mc84e76a1e7" x="980.121563" y="45.949318" style="fill-opacity: 0; stroke: #7a7a7a"/>
741 |     </g>
742 |    </g>
743 |    <g id="patch_17">
744 |     <path d="M 1056.761562 210.218373 
745 | L 1107.854896 210.218373 
746 | L 1107.854896 121.197899 
747 | L 1056.761562 121.197899 
748 | L 1056.761562 210.218373 
749 | z
750 | " clip-path="url(#pc427ae70c4)" style="fill: #cfd6e4; stroke: #7a7a7a; stroke-linejoin: miter"/>
751 |    </g>
752 |    <g id="line2d_80">
753 |     <path d="M 1082.308229 210.218373 
754 | L 1082.308229 260.845637 
755 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
756 |    </g>
757 |    <g id="line2d_81">
758 |     <path d="M 1082.308229 121.197899 
759 | L 1082.308229 86.442853 
760 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
761 |    </g>
762 |    <g id="line2d_82">
763 |     <path d="M 1069.534896 260.845637 
764 | L 1095.081563 260.845637 
765 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
766 |    </g>
767 |    <g id="line2d_83">
768 |     <path d="M 1069.534896 86.442853 
769 | L 1095.081563 86.442853 
770 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
771 |    </g>
772 |    <g id="line2d_84"/>
773 |    <g id="line2d_85">
774 |     <path d="M 852.388229 201.681838 
775 | L 903.481563 201.681838 
776 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
777 |    </g>
778 |    <g id="line2d_86">
779 |     <path d="M 954.574896 207.887026 
780 | L 1005.668229 207.887026 
781 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
782 |    </g>
783 |    <g id="line2d_87">
784 |     <path d="M 1056.761562 185.926493 
785 | L 1107.854896 185.926493 
786 | " clip-path="url(#pc427ae70c4)" style="fill: none; stroke: #7a7a7a"/>
787 |    </g>
788 |    <g id="patch_18">
789 |     <path d="M 826.841563 289.3775 
790 | L 826.841563 34.3575 
791 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
792 |    </g>
793 |    <g id="patch_19">
794 |     <path d="M 826.841563 289.3775 
795 | L 1133.401562 289.3775 
796 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
797 |    </g>
798 |    <g id="text_36">
799 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="980.121563" y="19.3575" transform="rotate(-0 980.121563 19.3575)">Fusion Depth Impact</text>
800 |    </g>
801 |   </g>
802 |  </g>
803 |  <defs>
804 |   <clipPath id="pf20d5afcd3">
805 |    <rect x="67.481563" y="34.3575" width="306.56" height="255.02"/>
806 |   </clipPath>
807 |   <clipPath id="p56f46ff4ba">
808 |    <rect x="447.161563" y="34.3575" width="306.56" height="255.02"/>
809 |   </clipPath>
810 |   <clipPath id="pc427ae70c4">
811 |    <rect x="826.841563" y="34.3575" width="306.56" height="255.02"/>
812 |   </clipPath>
813 |  </defs>
814 | </svg>
815 | 


--------------------------------------------------------------------------------
/Plot_script/top75.csv:
--------------------------------------------------------------------------------
 1 | Model,End_to_End,Component_Focused,Main_Contribution,Dataset,Vision_Encoder,Language_Encoder,Action_Decoder,FusionDepth,FusionType,DecoderFamily,Domain,VisionParams,LLMParams,CTask,CMod,LogN,Success,Adjusted_Success_0to1,Success_Provenance,Difficulty_Index,Generalization_Index_0to1,flag_zero_shot,flag_cross_embodiment,flag_sim2real,GInew_0to1,CMAS_raw,E_fusion_raw,R2S_raw,GI_actual,CMAS,E_fusion,R2S,VLA_FEB_Score
 2 | CLIPort,Yes,Yes,Pioneered the semantic grounding of visuomotor policies by integrating CLIP features into dense transport maps for precise pick-and-place.,Self-collected visuomotor demos,CLIP-ResNet50 + Transporter-ResNet,CLIP text encoder,LingUNet,early,additive,autoregressive,manipulation,7.0,7.0,0.7127598674935811,0.5237479914465233,3.580369559845169,0.1173249066721428,0.3525304888455273,proxy_existing,0.0,0.12,False,False,False,0.2115182933073163,0.3888888888888889,0.3733065489834531,2.0,0.2115182933073163,0.0,0.380879563654579,1.0,0.3980994642404738
 3 | RT-1,Yes,Yes,Introduced a discretized action transformer for scalable multi-task kitchen manipulation.,Self-collected RT-1-Kitchen,EfficientNet CNN,Universal Sentence Encoder,Discretized action transformer head,late,additive,autoregressive,manipulation,7.0,7.5,0.7229760638914537,0.7925844786523562,4.914731676171138,0.0811252643996296,0.3151200916749392,proxy_existing,0.0,0.12,False,False,False,0.1890720550049635,0.4027777777777778,0.5730196066775404,2.0,0.1890720550049635,0.125,0.7089581308826494,1.0,0.5057575464719032
 4 | Gato,Yes,Yes,"Demonstrated a unified tokenization scheme across vision, language, and control tasks, achieving zero-shot transfer across domains.",Self-collected multi-domain tasks,custom ViT,Sentence Piece tokenizer,Autoregressive Transformer,mid,additive,autoregressive,manipulation,8.0,7.5,0.8974576185571919,0.8359025511407572,3.3239110568563475,-0.1100576750589249,0.1299884343793348,proxy_existing,0.44999999999999996,0.12,True,False,True,0.4279930606276009,0.4305555555555556,0.7501871128926653,0.0,0.4279930606276009,0.375,1.0,0.0,0.4507482651569002
 5 | VIMA,Yes,Yes,Handled six distinct vision-language grounding tasks via a prompt-based multimodal policy.,VIMA self-collected,Mask R-CNN,T5-base,Transformer policy head,late,additive,autoregressive,manipulation,7.0,8.0,0.7991721913128478,0.8380069757685371,6.399450830221941,0.0118810966338154,0.2435599420368611,proxy_existing,0.0,0.12,False,False,False,0.1461359652221166,0.4166666666666667,0.6697118711603943,2.0,0.1461359652221166,0.25,0.8677993201361028,1.0,0.5659838213395548
 6 | PerAct,Yes,No,Uses voxel-based representation with language conditioning for high-precision manipulation; operates directly on point cloud voxels.,RLBench,Perceiver Transformer + voxel grid encoder,CLIP text encoder,Transformer voxel policy head,late,additive,autoregressive,manipulation,7.5,7.0,0.4591508171188656,0.6574843244062099,6.160410617474682,-0.0523324853515879,0.177198634161614,proxy_existing,0.0,0.12,False,False,False,0.1063191804969684,0.4027777777777778,0.3018844647939565,0.0,0.1063191804969684,0.125,0.2635509557013374,0.0,0.1237175340495764
 7 | RoboAgent,Yes,No,MT-ACT: multi-task transformer policy with semantically augmented CVAE encoding and action-chunking for strong real-world generalization.,RoboSet teleop demos,Multi-view CNN encoder,Semantic transformer encoder,CVAE + Chunked trajectory predictor,late,additive,autoregressive,manipulation,7.5,7.5,0.8449101813042241,0.7252329205938499,4.560081470746022,0.1842666858742209,0.4217112428541688,proxy_existing,0.0,0.12,False,False,False,0.2530267457125013,0.4166666666666667,0.6127566784267418,2.0,0.2530267457125013,0.25,0.7742361938988714,1.0,0.5693157349028432
 8 | RT-Trajectory,Yes,No,Conditioned policies on user-sketched trajectories to improve generalization to novel layouts and paths.,RT-1 dataset,EfficientNet-B3,,Sketch-conditioned behavioral cloning policy,late,additive,autoregressive,manipulation,7.0,7.5,0.6418766670542628,0.7605357478913903,3.8338340004908176,0.0241846148409261,0.2834099680141348,proxy_existing,0.0,0.12,False,False,False,0.1700459808084808,0.4027777777777778,0.4881701510321467,2.0,0.1700459808084808,0.125,0.5695717123898609,1.0,0.4661544232995854
 9 | ACT,Yes,Yes,Applied temporal ensembling to achieve smooth bimanual manipulation with 0.1 mm precision.,self-collected demos on ALOHA,ResNet-18,none,CVAE-Transformer head,late,additive,autoregressive,manipulation,7.0,7.5,0.5831591106725926,0.7786948181947049,6.689238755674514,-0.1450580825488966,0.0813716811514936,proxy_existing,0.0,0.12,False,False,False,0.0488230086908961,0.4027777777777778,0.4541029776637802,0.0,0.0488230086908961,0.125,0.5136078732836368,0.0,0.1718577204936332
10 | RT-2,Yes,Yes,"First large VLA co-finetuned on Internet VQA and robot data, unlocking emergent multi-robot zero-shot capabilities.",Internet VQA + RT-1-Kitchen,PaLI-X/PaLM-E ViT,PaLI-X/PaLM-E text encoder,Symbol-tuning transformer,late,additive,autoregressive,manipulation,8.0,7.5,0.8284117485771623,0.7932610098464953,3.5127330152894785,0.0846149942550634,0.3693478153018754,proxy_existing,0.5,0.64,True,True,False,0.6216086891811252,0.4305555555555556,0.6571467402450207,2.0,0.6216086891811252,0.375,0.8471579549928013,1.0,0.7109416610434817
11 | VoxPoser,Yes,Yes,Achieved zero-shot constraint-aware motion planning by composing a frozen VLM and LLM without additional training.,Self-collected motion demos+RLBench,OWL-ViT,GPT-4,MPC optimizer,late,additive,autoregressive,manipulation,8.0,7.5,0.3400593849515136,0.713772575414488,4.99268770790803,0.2066548795114432,0.5469016664199399,proxy_existing,0.3,0.44,True,False,False,0.578140999851964,0.4305555555555556,0.2427250629907086,2.0,0.578140999851964,0.375,0.1663668656584663,1.0,0.5298769663776075
12 | Diffusion Policy,Yes,Yes,Introduced diffusion-based policy modeling for multimodal visuomotor action distributions.,Self-collected demos,ResNet-18,,diffusion policy network,late,diffusion,diffusion,manipulation,7.0,7.5,0.6004810936459812,0.6980436623114735,5.503005149695753,0.5318835886685056,0.7809547486005879,proxy_existing,0.0,0.12,False,False,False,0.4685728491603527,0.4027777777777778,0.4191620217574396,1.4682813405760389,0.4685728491603527,0.125,0.4562086281682243,0.7341406702880194,0.445980536904149
13 | Octo,Yes,Yes,"First generalist diffusion policy trained on 4 M+ trajectories across 22 robot platforms, demonstrating broad transfer.",Open X-Embodiment,CNN encoder,T5-base,Diffusion Transformer head,late,diffusion,diffusion,manipulation,7.5,8.0,0.3227090520727584,0.8295345763188728,4.458287302375932,0.4658772778695578,0.7504740851197964,proxy_existing,0.15,0.8,False,False,True,0.5502844510718778,0.4305555555555556,0.2676983167854407,1.6108836398969508,0.5502844510718778,0.375,0.2073916710397382,0.8054418199484754,0.4845294855150229
14 | RevLA,Yes,No,Domain adaptation adapters to improve the generalization of robotic foundation models across visual domains.,Open X-Embodiment (OXE),DINO-v2 + SigLIP,LLama-7B,"Llama head, outputs 7 discrete action tokens",mid,additive,mlp,manipulation,8.0,8.0,0.7481729443270889,0.728190135386235,4.388740632873995,0.0352284522771958,0.281859914872038,proxy_existing,0.0,0.4,False,False,False,0.1691159489232228,0.4444444444444444,0.544812157621861,2.0,0.1691159489232228,0.5,0.6626203521805607,1.0,0.5829340752759459
15 | RDT-1B,Yes,Yes,1.2B-parameter diffusion foundation model excelling at bimanual manipulation and zero-shot generalization.,self-collected 6K ALOHA episodes,SigLIP,T5-XXL,Diffusion Transformer + MLP decoder,late,diffusion,diffusion,manipulation,8.0,8.0,0.6720586562785535,0.7670028742560422,5.147391144169445,0.5449403929944054,0.8785663095298731,proxy_existing,0.3,0.12,True,False,False,0.7771397857179239,0.4444444444444444,0.515470921034304,1.612224604423649,0.7771397857179239,0.5,0.6144200443414478,0.8061123022118245,0.6744180330677991
16 | RoboMamba,Yes,No,Mamba-based unified VLA with linear-time inference for real-time robotic reasoning.,SAPIEN sim benchmarks + real-world demos,Mamba VLM visual backbone,Mamba VLM text backbone,MLP policy head for SE(3) pose predicting,late,additive,mlp,manipulation,9.0,9.0,0.5989190983038765,0.7379112010598219,6.041328838185546,0.0706039411423014,0.304246865943765,proxy_existing,0.0,0.24,False,False,False,0.182548119566259,0.5,0.441949111167079,2.0,0.182548119566259,1.0,0.4936421126440558,1.0,0.6690475580525787
17 | Edge VLA,Yes,Yes,"Lightweight, edge-optimized VLA for low-power real-time inference.",OXE + Bridge robotics set,SigLIP + DINOV2,Qwen2,Non-autoregressive control head,late,additive,autoregressive,manipulation,8.0,9.0,0.8094491324648676,0.802111645826587,4.244493746892606,0.275430868818484,0.5159245576157531,proxy_existing,0.0,0.24,False,False,False,0.3095547345694518,0.4722222222222222,0.649268575854298,1.873154450076402,0.3095547345694518,0.75,0.834216102739682,0.9365772250382012,0.7075870155868338
18 | OpenVLA,Yes,Yes,LORA-fine-tuned open-source VLA achieving efficient transfer and high success.,OXE + DROID robot data,DINOv2 + SigLIP,Llama 2,Llama 2 output head (predicts discretized action tokens as output),late,additive,mlp,manipulation,8.0,8.0,0.8710812584877896,0.777229394845665,3.9601407565225455,0.1550862384637974,0.3915548087374619,proxy_existing,0.15,0.24,False,False,True,0.3349328852424771,0.4444444444444444,0.677029959395865,2.0,0.3349328852424771,0.5,0.8798211074486751,1.0,0.6786884981727881
19 | CogACT,Yes,Yes,"Componentized diffusion action transformer, +59.1% success over OpenVLA with specialized adaptation.",OXE subset + real trials,DINOv2 + SigLIP,LLaMA-2,Diffusion Transformer head,late,diffusion,diffusion,manipulation,8.0,8.0,0.4683348176971607,0.6784157112355569,4.188797063286583,0.5986942506852001,0.85,proxy_existing,0.0,0.24,False,False,False,0.51,0.4444444444444444,0.3177256984443941,1.41975640993242,0.51,0.5,0.2895741376563275,0.70987820496621,0.5023630856556345
20 | Pi-0,Yes,No,"General robot control flow model for high-frequency, open-world tasks.",Extended OXE called Pi-Cross-Embodiment,PaliGemma (SigLIP),PaliGemma (Gemma-2B),diffusion-based Flow matching action expert head,late,diffusion,diffusion,manipulation,8.0,8.0,0.8615764604594311,0.7188576246242815,6.057246465169008,0.3782902147459149,0.6222241987802719,proxy_existing,0.5,0.12,True,True,False,0.7733345192681632,0.4444444444444444,0.6193508077980628,1.6448329206670056,0.7733345192681632,0.5,0.7850686979986953,0.8224164603335028,0.7202049194000903
21 | HiRT,Yes,Yes,"Hierarchical planning/control separation, doubling execution speed and improving dynamic task success.",Self collected Real-world data,InstructBLIP,LLaMA-2,Latent-conditioned policy head (MLP),hierarch,additive,mlp,manipulation,8.0,8.0,0.7424706261681373,0.6183283851437078,3.651017809805437,0.3866988128803661,0.7088504756510811,proxy_existing,0.0,0.32,False,False,False,0.4253102853906486,0.4444444444444444,0.4590906632951819,1.83308159228919,0.4253102853906486,0.5,0.5218013923986502,0.916540796144595,0.5909131184834735
22 | QUAR-VLA,Yes,No,"Quadruped-specific VLA with adaptive gait and body command mapping, strong sim-to-real transfer.",QUART locomotion + manipulation,EfficientNet-B3,FiLM / VLM tokenizer,Transformer decoder (discrete tokens),late,additive,autoregressive,manipulation,7.0,7.5,0.8789528921693748,0.5517622731134857,6.378896212281935,-0.0875016671504077,0.1507957390644215,proxy_existing,0.15,0.24,False,False,True,0.1904774434386529,0.4027777777777778,0.4849730457430467,0.0,0.1904774434386529,0.125,0.5643196686094694,0.0,0.2199492780120305
23 | 3D-VLA,Yes,Yes,"Integrates 3D generative diffusion heads for world reconstruction, enabling planning in RGB+D and point-cloud spaces.",3D-language-action pairs,3D-aware transformer,3D-LLM,Multi-head diffusion planner,hierarch,diffusion,diffusion,manipulation,7.5,7.5,0.4384030747261169,0.7369067048032512,6.391823052221341,0.4038028464384491,0.7287100998409468,proxy_existing,0.0,0.32,False,False,False,0.4372260599045681,0.4166666666666667,0.3230621651720363,1.8046185317121648,0.4372260599045681,0.25,0.2983406168153762,0.9023092658560824,0.4719689856440067
24 | FAST,Yes,Yes,Frequency-space action tokenization for up to 15 times faster inference on general robot control.,DROID,PaliGemma (SigLIP),PaliGemma (Gemma-2B),FAST token generator,late,additive,mlp,manipulation,8.0,8.0,0.830380567853223,0.6192107215333876,5.878904525335356,0.073376766305158,0.3071124326763355,proxy_existing,0.0,0.12,False,False,False,0.1842674596058013,0.4444444444444444,0.5141805505676984,2.0,0.1842674596058013,0.5,0.6123002886341077,1.0,0.5741419370599772
25 | OpenVLA-OFT,Yes,Yes,"Optimized fine-tuning of OpenVLA with parallel chunked decoding, achieving 97.1 % success on LIBERO dataset and 26 time speed-up.",LIBERO,SigLIP + DINOv2,LLaMA-27B,Llama 2 Parallel chunking head,late,additive,autoregressive,manipulation,8.0,8.0,0.4998374778745145,0.7382447093624558,3.0696131738583787,-0.1304647427412237,0.8253499999999999,paper_numeric,0.0,0.12,False,False,False,0.4952099999999999,0.4444444444444444,0.3690023735819339,0.0,0.4952099999999999,0.5,0.3738088807701791,0.0,0.3422547201925447
26 | HybridVLA,Yes,No,Adaptive ensemble decoding that combines diffusion and autoregressive policies for robust multi-task generalization.,RT-X trajectories + synthetic task fusion,CLIP ViT + DINOV2,LLaMA-2,Diffusion policy head,late,diffusion,diffusion,manipulation,7.0,8.0,0.6050528944718216,0.461431006893817,5.769134126612075,0.2864995796461315,0.5273634791694917,proxy_existing,0.0,0.24,False,False,False,0.316418087501695,0.4166666666666667,0.279190166320151,1.8407129246781515,0.316418087501695,0.25,0.2262699035082603,0.9203564623390758,0.4282611133372578
27 | NORA,Yes,No,Low-overhead VLA with integrated visual reasoning and FAST token decoding for real-time performance.,OXE,Qwen-2.5-VL,Qwen-2.5-VL,FAST tokenizer head,late,additive,mlp,manipulation,9.0,9.0,0.3313439391961579,0.7295266372278755,5.418017977476206,0.1491638344176242,0.3854343204214691,proxy_existing,0.0,0.12,False,False,False,0.2312605922528814,0.5,0.2417242297276107,2.0,0.2312605922528814,1.0,0.1647227471076687,1.0,0.5989958348401374
28 | SpatialVLA,Yes,No,3D spatial encoding and adaptive action discretization to improve cross-robot manipulation generality.,OXE,SigLIP,PaliGemma (Gemma-2B),Adaptive action grid head,mid,additive,autoregressive,manipulation,8.0,8.0,0.7828334685797784,0.5129886648940307,3.2933772307593845,0.0140378213852738,0.2457888018949863,proxy_existing,0.0,0.12,False,False,False,0.1474732811369918,0.4444444444444444,0.4015846958811037,2.0,0.1474732811369918,0.5,0.4273334812159282,1.0,0.51870169058823
29 | MoLe-VLA,Yes,No,Selective layer activation in a multi-stage ViT yields 5.6 time faster inference and +8% task success.,RLBench + real-world trials,"DINOv2, SigLIP",LLaMA-2,Diffusion head,late,diffusion,diffusion,manipulation,8.0,8.0,0.3381384853626232,0.7531654120903784,5.878160849259938,0.0668179041229193,0.3003341988645883,proxy_existing,0.0,0.24,False,False,False,0.1802005193187529,0.4444444444444444,0.2546742116717564,2.0,0.1802005193187529,0.5,0.1859963261661426,1.0,0.4665492113712238
30 | UP-VLA,Yes,No,"Precise 3D spatial reasoning, achieving +33 % success on the CALVIN benchmark.",CALVIN,CLIP-ViT,Phi-1.5,MLP policy head,late,additive,mlp,manipulation,7.0,7.5,0.5181920159644303,0.6334131185569574,4.196988220821242,-0.0902076214084636,0.1380567036389008,proxy_existing,0.0,0.12,False,False,False,0.0828340221833404,0.4027777777777778,0.3282296208433464,0.0,0.0828340221833404,0.125,0.3068294531109721,0.0,0.1286658688235781
31 | Shake-VLA,Yes,Yes,Modular bimanual VLA achieving 100% success on cluttered cocktail-mixing tasks.,Cocktail mixing demos,"YOLOv8, EasyOCR","GPT-4o,Whisper-1",Bimanual arm controller,late,additive,autoregressive,humanoid,7.5,7.5,0.4448401886745534,0.6276009231406356,6.762219965790784,-0.0394644513160593,0.91,paper_numeric,0.0,0.12,False,False,False,0.546,0.4166666666666667,0.2791821130622042,0.0,0.546,0.25,0.2262566740211191,0.0,0.2555641685052798
32 | DexGraspVLA,Yes,No,Diffusion-based dexterous grasping with $\geq90\%$ zero-shot success across diverse objects.,Self-collected Dexterous grasp data,DINOv2,"Qwen-VL, Qwen2.5-VL",Diffusion policy head,late,diffusion,diffusion,manipulation,8.0,9.0,0.6385237394884777,0.7751252151945127,4.630556554737348,0.352775558805779,0.846,paper_numeric,0.3,0.12,True,False,False,0.7575999999999999,0.4722222222222222,0.4949358509778113,2.0,0.7575999999999999,0.75,0.5806860640006037,1.0,0.772071516000151
33 | DexVLA,Yes,No,Cross-embodiment diffusion expert enabling rapid adaptation without per-task tuning.,"OXE, RLBench","Qwen2-VL (ViT), ResNet-50","Qwen2-VL,DistilBERT",Diffusion Transformer head,late,diffusion,diffusion,manipulation,7.0,9.0,0.5241957388462013,0.7614303867871333,4.111684899578842,0.4168179284148171,0.6620405334163396,proxy_existing,0.2,0.24,False,True,False,0.5472243200498037,0.4444444444444444,0.3991385641818301,1.5883206750105938,0.5472243200498037,0.5,0.4233150990810048,0.7941603375052969,0.5661749391590263
34 | Humanoid-VLA,Yes,No,"Hierarchical VLA for full-body humanoid control, integrating perception and latent action planning.",Self-collected humanoid robot episodes,"Video Visual Encoder,Cross-Attention",Llama3-70B,Token-based Motion Decoder + RL Whole-Body Ctrlr,hierarch,additive,mlp,humanoid,7.5,8.0,0.7293618801983466,0.5517167898798925,4.90020348437308,0.3507757032082798,0.6671398782416281,proxy_existing,0.0,0.32,False,False,False,0.4002839269449769,0.4305555555555556,0.4024011952037944,1.901898769326965,0.4002839269449769,0.375,0.4286747852399314,0.9509493846634828,0.5387270242120977
35 | Gemini Robotics,Yes,Yes,"General-purpose VLA built on the Gemini 2.0 foundation, enabling long-horizon dexterous manipulation across diverse robot embodiments with zero-shot adaptability.",Self-collected ALOHA2 demos + web-scale VL Dataset,Gemini 2.0 vision component,Gemini 2.0 language component,Local zero-shot policy head,late,additive,mlp,manipulation,9.0,9.0,0.5024443708913685,0.4136220004578811,5.073136838466522,0.1122884523933304,0.4453939820120637,proxy_existing,0.3,0.8400000000000001,True,False,False,0.5172363892072382,0.5,0.2078220458068894,2.0,0.5172363892072382,1.0,0.109029944368401,1.0,0.6565665833939098
36 | ECoT,Yes,Yes,"Embodied chain-of-thought planning for interpretable, stepwise VLA control.",Bridge v2,"SigLIP, DINOv2",LLaMA-2 7B,Autoregressive VLA decoder with CoT module,hierarch,additive,autoregressive,manipulation,8.0,8.0,0.6121139121150764,0.4207910090872726,5.305822709931176,-0.163697706863153,0.0697808666961356,proxy_existing,0.0,0.32,False,False,False,0.0418685200176813,0.4444444444444444,0.2575720307552611,0.0,0.0418685200176813,0.5,0.1907567176196579,0.0,0.1831563094093348
37 | OTTER,Yes,Yes,Zero-shot generalization via a frozen CLIP backbone and causal transformer action decoding.,LIBERO,Frozen CLIP ViT,CLIP text encoder,Causal transformer delta-trajectory head,early,additive,autoregressive,manipulation,7.0,7.0,0.3873813757354211,0.4030068262562456,3.787310338808125,0.0114451996703417,0.2688504680641592,proxy_existing,0.3,0.12,True,False,False,0.4113102808384955,0.3888888888888889,0.1561173387859102,2.0,0.4113102808384955,0.0,0.0240920520029775,1.0,0.3588505832103682
38 | OneTwoVLA,Yes,Yes,Unified reasoning-acting framework that dynamically toggles between planning and control via decision tokens.,Self-collected 16K reasoning-augmented robot episodes,same as pi-0 vla,same as pi-0 vla,Diffusion policy head,late,diffusion,diffusion,manipulation,7.5,7.5,0.6857394471873495,0.4018821796952228,6.351023520346339,-0.1212285745536378,0.1190921010592182,proxy_existing,0.0,0.32,False,False,False,0.0714552606355309,0.4166666666666667,0.2755864637386491,0.0,0.0714552606355309,0.25,0.2203499221444665,0.0,0.1354512956949993
39 | Helix,Yes,Yes,"First 200 Hz VLA for full humanoid control on embedded systems, enabling zero-shot task transfer.",self-collected 200Hz teleop + sim logs,Pretrained VLM,Pretrained VLM,Fast transformer policy,hierarch,additive,autoregressive,humanoid,7.5,7.5,0.4485294028643533,0.8468204737367604,4.251979757421267,0.0445625847673235,0.3409583152292508,proxy_existing,0.44999999999999996,0.44,True,False,True,0.5545749891375504,0.4166666666666667,0.3798238814184579,2.0,0.5545749891375504,0.25,0.3915859096096872,1.0,0.5490402246868094
40 | Gemini Robotics On-Device,Yes,Yes,"On-device optimized variant of Gemini VLA, delivering low-latency dual-arm and humanoid control on embedded hardware.",Self-collected ALOHA2 + few-shot adaptation demos,Gemini SDK vision module,Gemini SDK language module,On-device optimized policy head,hierarch,additive,mlp,manipulation,9.0,9.0,0.6737207967711781,0.7512978084654224,4.58960465341014,0.0578192683198506,0.3269859209233182,proxy_existing,0.0,0.44,False,False,False,0.1961915525539909,0.5,0.5061649581317643,2.0,0.1961915525539909,1.0,0.5991326764998347,1.0,0.6988310572634564
41 | OE-VLA,Yes,Yes,Curriculum-tuned LLaVA backbone with interleaved multimodal prompting for improved generalization across vision-language-action tasks.,CALVIN,SigLIP-400M ViT,Qwen-1.5 language module,MLP token generator,mid,additive,mlp,manipulation,8.0,9.0,0.4023315440944844,0.5080506459769691,5.5938358251813005,-0.0791967599127408,0.149435840997341,proxy_existing,0.0,0.12,False,False,False,0.0896615045984045,0.4722222222222222,0.2044048008741142,0.0,0.0896615045984045,0.75,0.1034162662526238,0.0,0.2357694427127571
42 | SmolVLA,Yes,Yes,"Ultra-lightweight VLA trained on community-contributed robot demonstrations, capable of real-time inference on CPU.",22.9K community episodes,SigLIP (VLM-2) visual backbone,SmolVLM2 text backbone,Chunked flow-matching head,late,flow,flow,manipulation,8.0,7.5,0.4471432034565596,0.563694615968473,5.88563264010242,0.159064194319152,0.3956658136848474,proxy_existing,0.0,0.12,False,False,False,0.2373994882109084,0.4305555555555556,0.2520522163553582,2.0,0.2373994882109084,0.375,0.1816890441255245,1.0,0.4485221330841082
43 | EF-VLA,Yes,Yes,"Early fusion of fine-grained CLIP visual tokens into the language-action pipeline, boosting zero-shot generalization.",Self-collected real and simulated tasks,Frozen CLIP ViT,Frozen CLIP text encoder,causal transformer,early,additive,autoregressive,manipulation,7.0,7.0,0.5388591101212976,0.4685851285083851,3.5869546434921373,0.0809525659268737,0.348288376475499,proxy_existing,0.3,0.24,True,False,False,0.4589730258852994,0.3888888888888889,0.2525013653641023,2.0,0.4589730258852994,0.0,0.1824268835285263,1.0,0.4103499773534564
44 | PD-VLA,Yes,No,"First parallel decoding method with action chunking for VLA, achieving a 2.52 times speed-up without sacrificing control fidelity.",Chunked trajectory demonstrations,CLIP-ViT-Large-Patch14-336 (LLaVA),Vicuna-7B-v1.5 (LLaVA),Fixed-point token predictor,late,additive,mlp,manipulation,7.0,8.0,0.7531608497915556,0.5928706949108107,5.624121359444812,0.0230567439223281,0.2551093767123281,proxy_existing,0.0,0.12,False,False,False,0.1530656260273968,0.4166666666666667,0.4465269963955363,2.0,0.1530656260273968,0.25,0.5011624322667585,1.0,0.4760570145735388
45 | LeVERB,Yes,Yes,"Dual-process latent VLA for whole-body humanoid control, achieving 58.5 % success on sim-to-real humanoid demos.",sim-to-real humanoid demos,SigLIP ViT,SigLIP text encoder,Latent CVAE verb + transformer policy,hierarch,additive,autoregressive,humanoid,8.0,8.0,0.4677651620303111,0.8752518873022228,5.220993618710439,0.0662573231807932,0.593775,paper_numeric,0.15,0.32,False,False,True,0.456265,0.4444444444444444,0.4094123408812598,2.0,0.456265,0.5,0.4401923427549412,1.0,0.5991143356887353
46 | TLA,Yes,Yes,"First language-grounded tactile-action model for high-precision contact tasks, with 85 % success on peg-in-hole task.",TLA Data,ViT (Qwen2-VL),Qwen2-VL,Multimodal $,late,additive,autoregressive,manipulation,8.0,9.0,0.7669484351875,0.4149663161823027,4.457390751517612,0.03072178458818,0.7224999999999999,paper_numeric,0.0,0.12,False,False,False,0.4334999999999999,0.4722222222222222,0.3182577668515384,2.0,0.4334999999999999,0.75,0.290448192876846,1.0,0.6184870482192115
47 | iRe-VLA,Yes,Yes,Iterative RL and supervised fine-tuning pipeline for robust control and rapid generalization across embodiments.,"Franka-Kitchen, real Panda robot demos",BLIP-2 (pre-trained VLM),BLIP-2,MLP action head after token learner,mid,additive,mlp,manipulation,8.0,8.0,0.5783937533742227,0.5723963531739436,3.638683175672992,0.1105055446043245,0.3454830423131388,proxy_existing,0.2,0.4,False,True,False,0.3572898253878833,0.4444444444444444,0.3310704751299944,2.0,0.3572898253878833,0.5,0.3114962656610799,1.0,0.5421965227622407
48 | TraceVLA,Yes,Yes,"Visual trace prompting to incorporate spatio-temporal cues, boosting task success by 3.5 time over OpenVLA.",OXE + 150K trace-annotated demos,Phi-3-Vision with trace overlay,Phi-3 LLM,Quantized delta-motion tokens,late,additive,mlp,manipulation,7.5,7.5,0.8747681572690535,0.5723812407690863,3.399380958916897,-0.0145488699954366,0.2162459829274784,proxy_existing,0.0,0.24,False,False,False,0.129747589756487,0.4166666666666667,0.500700883242948,0.0,0.129747589756487,0.25,0.5901565690712678,0.0,0.2424760397069387
49 | V-JEPA 2,Yes,No,Dual-stream self-supervised video JEPA enabling predictive planning in vision-language-action tasks.,Droid video data,ViT (self-supervised),LLM for QA/alignment,Action-conditioned transformer predictor head,late,additive,autoregressive,manipulation,8.0,7.5,0.7273879472297227,0.78388197737624,4.94189810601112,0.023234551454188,0.2868293416385221,proxy_existing,0.0,0.32,False,False,False,0.1720976049831132,0.4305555555555556,0.5701863023940791,2.0,0.1720976049831132,0.375,0.7043037210980725,1.0,0.5628503315202964
50 | Knowledge Insulating VLA,Yes,No,"Implements insulation layers between vision-language and action modules, accelerating training and inference while maintaining generalization.",Multi-domain VL datasets,PaliGemma (SigLIP),PaliGemma (Gemma-2B) encoder,Diffusion Modular policy head,late,diffusion,diffusion,manipulation,8.0,8.0,0.8562742460021622,0.701619200843473,4.50872085098678,0.1539494004845021,0.3903799473948033,proxy_existing,0.0,0.12,False,False,False,0.2342279684368819,0.4444444444444444,0.6007784521828844,2.0,0.2342279684368819,0.5,0.7545589662333779,1.0,0.622196733667565
51 | GR00T N1,Yes,No,Self-collected Diffusion-based foundation model enabling unified humanoid control with policy tokenization.,Multi-modal humanoid demonstrations,SigLIP-2 ViT (Eagle-2 VLM),SmolLM2 (Eagle-2 VLM),Generative diffusion transformer based planner,hierarch,diffusion,diffusion,humanoid,8.0,9.0,0.8496329592839016,0.7549440183147114,4.811012121791057,0.4343490004782238,0.7641774729383877,proxy_existing,0.0,0.32,False,False,False,0.4585064837630326,0.4722222222222222,0.6414253203744082,1.759362798341929,0.4585064837630326,0.75,0.8213315970896437,0.8796813991709646,0.7273798700059102
52 | AgiBot World Colosseo,Yes,No,Integrates multiple embodied datasets into a unified platform for scalable training and evaluation of VLA models.,AgiBot World Data,PaliGemma (SigLIP),PaliGemma (Gemma-2B),Latent action planner + policy head,hierarch,additive,mlp,manipulation,8.0,8.0,0.7626189584378686,0.6755677872681651,6.665700127592268,0.0166972498376189,0.2792388307289179,proxy_existing,0.0,0.32,False,False,False,0.1675432984373507,0.4444444444444444,0.5152008022806237,2.0,0.1675432984373507,0.5,0.6139763068376907,1.0,0.5703799013187603
53 | Hi Robot,Yes,No,Hierarchical separation of planning and control for open-ended instruction following in complex environments.,Self-collected Instruction-following data,PaliGemma-3B (SigLIP),PaliGemma-3B (Gemma-2B),Flow-Matching Action Expert,hierarch,flow,flow,manipulation,8.0,8.0,0.58582712374957,0.6379285578026657,4.897091309085307,0.2584483445529367,0.595117218901334,proxy_existing,0.0,0.32,False,False,False,0.3570703313408003,0.4444444444444444,0.3737158521752469,2.0,0.3570703313408003,0.5,0.3815519463534172,1.0,0.5596555694235544
54 | EnerVerse,Yes,No,"World-model LLM for predictive future-space modeling, enabling long-horizon manipulation planning.",self-collected Synthetic task fusion data,Pretrained VAE + Diffusion Generator,Tokenized instruction prompt,Diffusion Policy Head,late,diffusion,diffusion,manipulation,7.5,7.5,0.5328958801276293,0.6145952370943397,5.688954969783967,0.177573106395872,0.4660330182882503,proxy_existing,0.0,0.32,False,False,False,0.2796198109729501,0.4166666666666667,0.3275152697936372,2.0,0.2796198109729501,0.25,0.3056559531325908,1.0,0.4588189410263852
55 | FLaRe,Yes,No,"Large-scale RL fine-tuning framework generating robust, adaptive robot policies across domains.",Multi-domain RL demonstrations,DinoV2,Transformer policy (language tokens),RL policy head,late,additive,mlp,manipulation,8.0,7.5,0.3776007567066373,0.880133513833798,6.637642281000272,-0.0427204702940124,0.1871321384268374,proxy_existing,0.0,0.12,False,False,False,0.1122792830561024,0.4305555555555556,0.3323390808265137,0.0,0.1122792830561024,0.375,0.3135802672987143,0.0,0.2002148875887041
56 | Beyond Sight,Yes,No,Fuses heterogeneous sensor modalities via language-grounded attention to improve VLA generalization.,self-collected Multi-sensor data,Multi-modal ViT,"Transformer (shared, task language input)",Transformer action head,late,additive,autoregressive,manipulation,8.0,7.5,0.5914017715629405,0.5134154999230425,4.789740088954402,-0.223796240162281,0.0,proxy_existing,0.0,0.12,False,False,False,0.0,0.4305555555555556,0.30363483620236,0.0,0.0,0.375,0.2664263778215356,0.0,0.1603565944553839
57 | GeoManip,Yes,No,"Encodes geometric constraints as model interfaces, enhancing robustness and precision in manipulation.",Self-collected Simulated geometry tasks,VLM (GPT-4o) + Grounding-DINO,GPT-4o,Constraint solver head,late,additive,autoregressive,manipulation,8.0,7.5,0.8592215756878294,0.7057043991085642,5.49446633883572,-0.0042860136644593,0.226852096892811,proxy_existing,0.0,0.12,False,False,False,0.1361112581356865,0.4305555555555556,0.6063564457718934,0.0,0.1361112581356865,0.375,0.7637222135734322,0.0,0.3187083679272797
58 | Universal Actions,Yes,No,Defines a universal action dictionary to standardize policy transfer and improve cross-task adaptability.,Self-collected Cross-domain manipulation demos,Shared VLM (LLaVA-OneVion-0.5B),LLaVA,Unified action tokenizer head,mid,additive,mlp,manipulation,7.5,7.5,0.5080775514611926,0.7479414564288672,6.123886852408592,0.049215753645793,0.2821433163291065,proxy_existing,0.15,0.12,False,False,True,0.2692859897974639,0.4166666666666667,0.3800122638186971,2.0,0.2692859897974639,0.25,0.3918953747426889,1.0,0.4777953411350382
59 | RoboHorizon,Yes,No,LLM-enhanced multi-view environment modeling for robust long-horizon task planning.,Self-collected Multi-view robot trajectories,Multi-view transformer (ViT),GPT‐based planner,DreamerV2 Actor-Critic RL Head,late,additive,autoregressive,manipulation,8.0,7.5,0.358775347039228,0.844277061541385,3.855277454890607,-0.0192404348836518,0.2375113101183754,proxy_existing,0.0,0.32,False,False,False,0.1425067860710252,0.4305555555555556,0.30290579575177,0.0,0.1425067860710252,0.375,0.2652287468341456,0.0,0.1956838832262927
60 | SAM2Act,Yes,No,Utilizes SAM-based segmentation prompts with memory-augmented VLA for improved object-centric manipulation.,SAM-labeled manipulation tasks,SAM2 segmentation encoder,CLIP text encoder,Memory-augmented policy head,late,additive,mlp,manipulation,7.5,7.0,0.3454384014982286,0.4094844458593771,6.974959026767888,-0.0011549529406109,0.2300878809473218,proxy_existing,0.0,0.12,False,False,False,0.138052728568393,0.4027777777777778,0.1414516524160511,0.0,0.138052728568393,0.125,0.0,0.0,0.0657631821420982
61 | LMM Planner Integration,Yes,No,Merges LMM-based strategic planning with 3D skill policies for generalizable manipulation.,skill library demos,DINO (2D semantics) + PointNext (3D),CLIP Language Encoder,3D Transformer head,late,additive,autoregressive,manipulation,8.0,7.0,0.4179610818764548,0.8514658040981538,6.7964056674593,-0.0368246936538394,0.2170940927615814,proxy_existing,0.0,0.32,False,False,False,0.1302564556569488,0.4166666666666667,0.3558795686616698,0.0,0.1302564556569488,0.25,0.3522513968181363,0.0,0.1831269631187712
62 | VLA-Cache,Yes,No,"Introduces token-caching to reuse computation across time steps, boosting inference efficiency.",LIBERO,CLIP ViT,LLaMA-2,Cached inference head,mid,additive,autoregressive,manipulation,7.0,8.0,0.6572118829346982,0.77285784951399,6.574836967372796,0.0654751969282156,0.2989465826815463,proxy_existing,0.0,0.12,False,False,False,0.1793679496089277,0.4166666666666667,0.507931362519951,2.0,0.1793679496089277,0.25,0.6020344367928915,1.0,0.5078505966004547
63 | HAMSTER,Yes,No,Hierarchical skill decomposition to sequence multi-step manipulation actions.,Self-collected Decomposed manipulation tasks,VILA-1.5-13B,VILA-1.5-13B,Robotic View Transformer Skill execution head,hierarch,additive,autoregressive,manipulation,7.5,7.5,0.5872725958857852,0.6122794789388659,5.155750814495258,0.274606334705625,0.5786990418678443,proxy_existing,0.0,0.32,False,False,False,0.3472194251207065,0.4166666666666667,0.3595749590040237,2.0,0.3472194251207065,0.25,0.3583219982243045,1.0,0.4888853558362528
64 | TempoRep VLA,Yes,No,Use successor representation temporal encoding for compositional action planning.,Self-collected Temporal demonstration sequences,ResNet-34 CNN,retrained transformer (CLIP-style),MLP (3x256) head on ResNet feature,late,additive,mlp,manipulation,7.0,7.0,0.3710916043128029,0.8319275862941342,5.733365924994738,-0.1718310310918341,0.0603372017239277,proxy_existing,0.0,0.32,False,False,False,0.0362023210343566,0.3888888888888889,0.308721342669968,0.0,0.0362023210343566,0.0,0.27478223483638,0.0,0.0777461389676841
65 | ConRFT,Yes,No,Applies consistency regularized fine-tuning with reinforcement for stable policy learning.,Self-collected data for fine-tuning,same as in octo,same as in octo,Reinforced policy head,late,additive,mlp,manipulation,7.5,7.5,0.4512695045083348,0.4605643162990613,5.266195097623432,-0.20518428655428,0.0192344601461592,proxy_existing,0.0,0.12,False,False,False,0.0115406760876955,0.4166666666666667,0.2078386308104974,0.0,0.0115406760876955,0.25,0.1090571893782367,0.0,0.092649466366483
66 | RoboBERT,Yes,No,"Unified multimodal Transformer for end-to-end vision-language-action manipulation, pre-trained on diverse robot and language data.",Self-collected Multi-domain robot demos,CLIP ViT,BERT-base,CNN-based Diffusion Policy Head,late,diffusion,diffusion,manipulation,7.0,7.5,0.73429447311975,0.5937061404903248,3.310947712946261,0.1093259225687766,0.3442639659330793,proxy_existing,0.0,0.12,False,False,False,0.2065583795598475,0.4027777777777778,0.4359551376193033,2.0,0.2065583795598475,0.125,0.4837955143481676,1.0,0.4538384734770038
67 | Diffusion Transformer Policy,Yes,No,"Adapts diffusion-based transformer architectures to VLA policy learning, enabling robust multimodal action sampling.",LIBERO + CALVIN,DINOv2,CLIP Text Encoder,Diffusion generator head,late,diffusion,diffusion,manipulation,8.0,7.0,0.586296483646221,0.5789505084739845,3.1318136039628754,0.4095749160585827,0.6545552669344674,proxy_existing,0.0,0.24,False,False,False,0.3927331601606804,0.4166666666666667,0.3394366473234888,1.598133189486742,0.3927331601606804,0.25,0.3252397925897463,0.799066594743371,0.4417598868734494
68 | GEVRM,Yes,No,Generative video modeling of goal-oriented tasks to enhance planning for visual manipulation.,CALVIN,ResNet-34,T5 Encoder,Diffusion Policy,late,diffusion,diffusion,manipulation,7.0,8.0,0.6167495034897343,0.8037992235722431,3.906151656571228,-0.0823315495698903,0.1642557342839666,proxy_existing,0.0,0.32,False,False,False,0.0985534405703799,0.4166666666666667,0.4957427720436149,0.0,0.0985534405703799,0.25,0.5820116333459017,0.0,0.2326412684790704
69 | SoFar,Yes,No,Introduces successor-feature orientation representations bridging spatial reasoning and robotic manipulation.,Self-collected Orientation task demonstrations,"Florence-2 (ViT-style), SAM",CLIP Text Encode,"VLM (e.g., LLaVA or GPT-4o) for 6D goal pose, then motion planner",hierarch,additive,planner,manipulation,8.0,7.0,0.3489170139299367,0.8896439013460229,5.111842673628743,0.0156524486050095,0.2780257040262446,proxy_existing,0.0,0.32,False,False,False,0.1668154224157467,0.4166666666666667,0.3104118935186334,2.0,0.1668154224157467,0.25,0.2775593867495308,1.0,0.4235937022913194
70 | ARM4R,Yes,No,Auto-regressive 4D transition model for predicting and planning manipulator trajectories.,76K videos from the Epic-Kitchens100 dataset,ViT-Base,CLIP text encoder,2-layer MLP,late,additive,mlp,manipulation,8.0,7.0,0.8529551794647776,0.6817319807119595,4.234840949051182,0.0130965168287242,0.2750579920909524,proxy_existing,0.0,0.32,False,False,False,0.1650347952545714,0.4166666666666667,0.5814868239550477,2.0,0.1650347952545714,0.25,0.7228676495935815,1.0,0.5344756112120382
71 | Magma,Yes,No,"Foundation multimodal agent model unifying vision, language, and action domains for end-to-end control.",Self-collected Multimodal interaction dataset,ConvNeXt-XXlarge,LLaMA-3-8B (decoder-only LLM),Decoder-Only LLM Head (LLaMA-3-8B),late,additive,autoregressive,manipulation,7.5,8.0,0.6613608069549721,0.6110504529653061,4.490546076315089,0.1325030261465944,0.3682162647868279,proxy_existing,0.0,0.12,False,False,False,0.2209297588720967,0.4305555555555556,0.4041248206633361,2.0,0.2209297588720967,0.375,0.4315062704604443,1.0,0.5068590073331353
72 | An Atomic Skill Library,Yes,No,"Constructs an atomic skill library for modular, data-efficient composition of robotic actions.",Self-collected Skill primitive demonstrations,"Prismatic VLM (scene description.), DINO-X (obj detection), SAM-2 (segmentation)","Prismatic, GPT-4 (for planning)",Skill executor module,late,additive,autoregressive,manipulation,8.0,7.5,0.368608015236915,0.8657175772923327,3.2336294870169024,0.1079079756737837,0.3427985935377072,proxy_existing,0.0,0.12,False,False,False,0.2056791561226243,0.4305555555555556,0.3191104379214373,2.0,0.2056791561226243,0.375,0.2918489180280164,1.0,0.4681320185376602
73 | RoboBrain,Yes,No,Knowledge-grounded policy brain that maps abstract high-level plans to concrete multimodal actions across diverse tasks.,Multi-domain robot and plan data,SigLIPr,Qwen2.5-7B-Instruct (decoder-only LLM),LoRA adapters for skill,mid,additive,autoregressive,manipulation,8.0,9.0,0.724751161058266,0.7818470241237657,5.372459221373023,-0.0810376702748838,0.1475333584455862,proxy_existing,0.0,0.24,False,False,False,0.0885200150673517,0.4722222222222222,0.5666445385036493,0.0,0.0885200150673517,0.75,0.6984854895009568,0.0,0.3842513761420771
74 | SafeVLA,Yes,No,"Safety-aware VLA integrating constraint feedback through safe RL to ensure collision-free, reliable manipulation.",Safety-scenario demonstrations,"Modular (DINOv2, SigLIP, CLIP)","LLM (model-agnostic, e.g., T5, LLaMA, Qwen)",Safety-constraint policy head,late,additive,mlp,manipulation,7.0,8.0,0.62882291172083,0.606375032211623,4.553138862416713,-0.2176644525965327,0.0063368750020631,proxy_existing,0.0,0.12,False,False,False,0.0038021250012378,0.4166666666666667,0.3813025133501249,0.0,0.0038021250012378,0.25,0.3940149317838008,0.0,0.1619542641962596
75 | Diffusion-VLA,Yes,Yes,"Multimodal VLA framework unifying vision-language reasoning with diffusion-based policy for robust, generalizable manipulation across diverse robot embodiments.",Multi-embodiment manipulation suites,SigLIP,Qwen2-VL (2B/7B/72B),Latent diffusion policy head + MLP,late,diffusion,diffusion,manipulation,8.0,9.0,0.3855532100040863,0.881294561592872,3.682033560676781,0.3702238675017123,0.6463880157586654,proxy_existing,0.2,0.52,False,True,False,0.5378328094551993,0.4722222222222222,0.3397859471812757,1.745938261950861,0.5378328094551993,0.75,0.3258136048291605,0.8729691309754305,0.6216538863149476
76 | 


--------------------------------------------------------------------------------
/Plot_script/plots/scale_analysis_adjusted_4panel.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1140.601562pt" height="348.144062pt" viewBox="0 0 1140.601562 348.144062" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-18T00:03:19.487250</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.7, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 348.144062 
 25 | L 1140.601562 348.144062 
 26 | L 1140.601562 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 67.481563 289.3775 
 34 | L 374.041563 289.3775 
 35 | L 374.041563 34.3575 
 36 | L 67.481563 34.3575 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="line2d_1">
 41 |     <path d="M 67.481563 231.418409 
 42 | L 374.041563 231.418409 
 43 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 44 |    </g>
 45 |    <g id="line2d_2">
 46 |     <path d="M 67.481563 185.051136 
 47 | L 374.041563 185.051136 
 48 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 49 |    </g>
 50 |    <g id="line2d_3">
 51 |     <path d="M 67.481563 138.683864 
 52 | L 374.041563 138.683864 
 53 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 54 |    </g>
 55 |    <g id="line2d_4">
 56 |     <path d="M 67.481563 92.316591 
 57 | L 374.041563 92.316591 
 58 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 59 |    </g>
 60 |    <g id="line2d_5">
 61 |     <path d="M 67.481563 45.949318 
 62 | L 374.041563 45.949318 
 63 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
 64 |    </g>
 65 |    <g id="matplotlib.axis_1">
 66 |     <g id="xtick_1">
 67 |      <g id="text_1">
 68 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="118.574896" y="311.515312" transform="rotate(-0 118.574896 311.515312)">Small</text>
 69 |      </g>
 70 |     </g>
 71 |     <g id="xtick_2">
 72 |      <g id="text_2">
 73 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="220.761563" y="311.515312" transform="rotate(-0 220.761563 311.515312)">Medium</text>
 74 |      </g>
 75 |     </g>
 76 |     <g id="xtick_3">
 77 |      <g id="text_3">
 78 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="322.948229" y="311.515312" transform="rotate(-0 322.948229 311.515312)">Large</text>
 79 |      </g>
 80 |     </g>
 81 |     <g id="text_4">
 82 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="220.761563" y="337.824531" transform="rotate(-0 220.761563 337.824531)">Vision Model Size</text>
 83 |     </g>
 84 |    </g>
 85 |    <g id="matplotlib.axis_2">
 86 |     <g id="ytick_1">
 87 |      <g id="line2d_6">
 88 |       <path d="M 67.481563 277.785682 
 89 | L 374.041563 277.785682 
 90 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
 91 |      </g>
 92 |      <g id="text_5">
 93 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="283.104588" transform="rotate(-0 55.981563 283.104588)">0.0</text>
 94 |      </g>
 95 |     </g>
 96 |     <g id="ytick_2">
 97 |      <g id="line2d_7">
 98 |       <path d="M 67.481563 231.418409 
 99 | L 374.041563 231.418409 
100 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
101 |      </g>
102 |      <g id="text_6">
103 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="236.737315" transform="rotate(-0 55.981563 236.737315)">0.2</text>
104 |      </g>
105 |     </g>
106 |     <g id="ytick_3">
107 |      <g id="line2d_8">
108 |       <path d="M 67.481563 185.051136 
109 | L 374.041563 185.051136 
110 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
111 |      </g>
112 |      <g id="text_7">
113 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="190.370043" transform="rotate(-0 55.981563 190.370043)">0.4</text>
114 |      </g>
115 |     </g>
116 |     <g id="ytick_4">
117 |      <g id="line2d_9">
118 |       <path d="M 67.481563 138.683864 
119 | L 374.041563 138.683864 
120 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
121 |      </g>
122 |      <g id="text_8">
123 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="144.00277" transform="rotate(-0 55.981563 144.00277)">0.6</text>
124 |      </g>
125 |     </g>
126 |     <g id="ytick_5">
127 |      <g id="line2d_10">
128 |       <path d="M 67.481563 92.316591 
129 | L 374.041563 92.316591 
130 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
131 |      </g>
132 |      <g id="text_9">
133 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="97.635497" transform="rotate(-0 55.981563 97.635497)">0.8</text>
134 |      </g>
135 |     </g>
136 |     <g id="ytick_6">
137 |      <g id="line2d_11">
138 |       <path d="M 67.481563 45.949318 
139 | L 374.041563 45.949318 
140 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
141 |      </g>
142 |      <g id="text_10">
143 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="55.981563" y="51.268224" transform="rotate(-0 55.981563 51.268224)">1.0</text>
144 |      </g>
145 |     </g>
146 |     <g id="text_11">
147 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="18.597656" y="161.8675" transform="rotate(-90 18.597656 161.8675)">Adjusted Success (0-1)</text>
148 |     </g>
149 |    </g>
150 |    <g id="patch_3">
151 |     <path d="M 93.028229 243.564089 
152 | L 144.121563 243.564089 
153 | L 144.121563 196.793902 
154 | L 93.028229 196.793902 
155 | L 93.028229 243.564089 
156 | z
157 | " clip-path="url(#p0033fb32f9)" style="fill: #abc9ea; stroke: #6f6f6f; stroke-linejoin: miter"/>
158 |    </g>
159 |    <g id="line2d_12">
160 |     <path d="M 118.574896 243.564089 
161 | L 118.574896 276.316564 
162 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
163 |    </g>
164 |    <g id="line2d_13">
165 |     <path d="M 118.574896 196.793902 
166 | L 118.574896 146.418847 
167 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
168 |    </g>
169 |    <g id="line2d_14">
170 |     <path d="M 105.801563 276.316564 
171 | L 131.348229 276.316564 
172 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
173 |    </g>
174 |    <g id="line2d_15">
175 |     <path d="M 105.801563 146.418847 
176 | L 131.348229 146.418847 
177 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
178 |    </g>
179 |    <g id="line2d_16">
180 |     <defs>
181 |      <path id="m87149b00c2" d="M 0 4 
182 | C 1.060812 4 2.078319 3.578535 2.828427 2.828427 
183 | C 3.578535 2.078319 4 1.060812 4 0 
184 | C 4 -1.060812 3.578535 -2.078319 2.828427 -2.828427 
185 | C 2.078319 -3.578535 1.060812 -4 0 -4 
186 | C -1.060812 -4 -2.078319 -3.578535 -2.828427 -2.828427 
187 | C -3.578535 -2.078319 -4 -1.060812 -4 0 
188 | C -4 1.060812 -3.578535 2.078319 -2.828427 2.828427 
189 | C -2.078319 3.578535 -1.060812 4 0 4 
190 | z
191 | " style="stroke: #6f6f6f"/>
192 |     </defs>
193 |     <g clip-path="url(#p0033fb32f9)">
194 |      <use xlink:href="#m87149b00c2" x="118.574896" y="96.731973" style="fill-opacity: 0; stroke: #6f6f6f"/>
195 |      <use xlink:href="#m87149b00c2" x="118.574896" y="124.300612" style="fill-opacity: 0; stroke: #6f6f6f"/>
196 |     </g>
197 |    </g>
198 |    <g id="patch_4">
199 |     <path d="M 195.214896 227.825286 
200 | L 246.308229 227.825286 
201 | L 246.308229 138.244774 
202 | L 195.214896 138.244774 
203 | L 195.214896 227.825286 
204 | z
205 | " clip-path="url(#p0033fb32f9)" style="fill: #efb792; stroke: #6f6f6f; stroke-linejoin: miter"/>
206 |    </g>
207 |    <g id="line2d_17">
208 |     <path d="M 220.761563 227.825286 
209 | L 220.761563 277.785682 
210 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
211 |    </g>
212 |    <g id="line2d_18">
213 |     <path d="M 220.761563 138.244774 
214 | L 220.761563 66.814591 
215 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
216 |    </g>
217 |    <g id="line2d_19">
218 |     <path d="M 207.988229 277.785682 
219 | L 233.534896 277.785682 
220 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
221 |    </g>
222 |    <g id="line2d_20">
223 |     <path d="M 207.988229 66.814591 
224 | L 233.534896 66.814591 
225 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
226 |    </g>
227 |    <g id="line2d_21"/>
228 |    <g id="patch_5">
229 |     <path d="M 297.401563 214.789916 
230 | L 348.494896 214.789916 
231 | L 348.494896 181.477576 
232 | L 297.401563 181.477576 
233 | L 297.401563 214.789916 
234 | z
235 | " clip-path="url(#p0033fb32f9)" style="fill: #98daa7; stroke: #6f6f6f; stroke-linejoin: miter"/>
236 |    </g>
237 |    <g id="line2d_22">
238 |     <path d="M 322.948229 214.789916 
239 | L 322.948229 225.509873 
240 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
241 |    </g>
242 |    <g id="line2d_23">
243 |     <path d="M 322.948229 181.477576 
244 | L 322.948229 174.527161 
245 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
246 |    </g>
247 |    <g id="line2d_24">
248 |     <path d="M 310.174896 225.509873 
249 | L 335.721563 225.509873 
250 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
251 |    </g>
252 |    <g id="line2d_25">
253 |     <path d="M 310.174896 174.527161 
254 | L 335.721563 174.527161 
255 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f; stroke-linecap: round"/>
256 |    </g>
257 |    <g id="line2d_26">
258 |     <g clip-path="url(#p0033fb32f9)">
259 |      <use xlink:href="#m87149b00c2" x="322.948229" y="93.134973" style="fill-opacity: 0; stroke: #6f6f6f"/>
260 |     </g>
261 |    </g>
262 |    <g id="line2d_27">
263 |     <path d="M 93.028229 213.768656 
264 | L 144.121563 213.768656 
265 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
266 |    </g>
267 |    <g id="line2d_28">
268 |     <path d="M 195.214896 201.988017 
269 | L 246.308229 201.988017 
270 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
271 |    </g>
272 |    <g id="line2d_29">
273 |     <path d="M 297.401563 201.978455 
274 | L 348.494896 201.978455 
275 | " clip-path="url(#p0033fb32f9)" style="fill: none; stroke: #6f6f6f"/>
276 |    </g>
277 |    <g id="patch_6">
278 |     <path d="M 67.481563 289.3775 
279 | L 67.481563 34.3575 
280 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
281 |    </g>
282 |    <g id="patch_7">
283 |     <path d="M 67.481563 289.3775 
284 | L 374.041563 289.3775 
285 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
286 |    </g>
287 |    <g id="text_12">
288 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="220.761563" y="19.3575" transform="rotate(-0 220.761563 19.3575)">Vision Model Scale Impact</text>
289 |    </g>
290 |   </g>
291 |   <g id="axes_2">
292 |    <g id="patch_8">
293 |     <path d="M 447.161563 289.3775 
294 | L 753.721563 289.3775 
295 | L 753.721563 34.3575 
296 | L 447.161563 34.3575 
297 | z
298 | " style="fill: #ffffff"/>
299 |    </g>
300 |    <g id="line2d_30">
301 |     <path d="M 447.161563 231.418409 
302 | L 753.721563 231.418409 
303 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
304 |    </g>
305 |    <g id="line2d_31">
306 |     <path d="M 447.161563 185.051136 
307 | L 753.721563 185.051136 
308 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
309 |    </g>
310 |    <g id="line2d_32">
311 |     <path d="M 447.161563 138.683864 
312 | L 753.721563 138.683864 
313 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
314 |    </g>
315 |    <g id="line2d_33">
316 |     <path d="M 447.161563 92.316591 
317 | L 753.721563 92.316591 
318 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
319 |    </g>
320 |    <g id="line2d_34">
321 |     <path d="M 447.161563 45.949318 
322 | L 753.721563 45.949318 
323 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
324 |    </g>
325 |    <g id="matplotlib.axis_3">
326 |     <g id="xtick_4">
327 |      <g id="text_13">
328 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="498.254896" y="311.515312" transform="rotate(-0 498.254896 311.515312)">Small</text>
329 |      </g>
330 |     </g>
331 |     <g id="xtick_5">
332 |      <g id="text_14">
333 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="600.441563" y="311.515312" transform="rotate(-0 600.441563 311.515312)">Medium</text>
334 |      </g>
335 |     </g>
336 |     <g id="xtick_6">
337 |      <g id="text_15">
338 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="702.628229" y="311.515312" transform="rotate(-0 702.628229 311.515312)">Large</text>
339 |      </g>
340 |     </g>
341 |     <g id="text_16">
342 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="600.441563" y="337.824531" transform="rotate(-0 600.441563 337.824531)">Language Model Size</text>
343 |     </g>
344 |    </g>
345 |    <g id="matplotlib.axis_4">
346 |     <g id="ytick_7">
347 |      <g id="line2d_35">
348 |       <path d="M 447.161563 277.785682 
349 | L 753.721563 277.785682 
350 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
351 |      </g>
352 |      <g id="text_17">
353 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="283.104588" transform="rotate(-0 435.661563 283.104588)">0.0</text>
354 |      </g>
355 |     </g>
356 |     <g id="ytick_8">
357 |      <g id="line2d_36">
358 |       <path d="M 447.161563 231.418409 
359 | L 753.721563 231.418409 
360 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
361 |      </g>
362 |      <g id="text_18">
363 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="236.737315" transform="rotate(-0 435.661563 236.737315)">0.2</text>
364 |      </g>
365 |     </g>
366 |     <g id="ytick_9">
367 |      <g id="line2d_37">
368 |       <path d="M 447.161563 185.051136 
369 | L 753.721563 185.051136 
370 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
371 |      </g>
372 |      <g id="text_19">
373 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="190.370043" transform="rotate(-0 435.661563 190.370043)">0.4</text>
374 |      </g>
375 |     </g>
376 |     <g id="ytick_10">
377 |      <g id="line2d_38">
378 |       <path d="M 447.161563 138.683864 
379 | L 753.721563 138.683864 
380 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
381 |      </g>
382 |      <g id="text_20">
383 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="144.00277" transform="rotate(-0 435.661563 144.00277)">0.6</text>
384 |      </g>
385 |     </g>
386 |     <g id="ytick_11">
387 |      <g id="line2d_39">
388 |       <path d="M 447.161563 92.316591 
389 | L 753.721563 92.316591 
390 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
391 |      </g>
392 |      <g id="text_21">
393 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="97.635497" transform="rotate(-0 435.661563 97.635497)">0.8</text>
394 |      </g>
395 |     </g>
396 |     <g id="ytick_12">
397 |      <g id="line2d_40">
398 |       <path d="M 447.161563 45.949318 
399 | L 753.721563 45.949318 
400 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
401 |      </g>
402 |      <g id="text_22">
403 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="435.661563" y="51.268224" transform="rotate(-0 435.661563 51.268224)">1.0</text>
404 |      </g>
405 |     </g>
406 |     <g id="text_23">
407 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="398.277656" y="161.8675" transform="rotate(-90 398.277656 161.8675)">Adjusted Success (0-1)</text>
408 |     </g>
409 |    </g>
410 |    <g id="patch_9">
411 |     <path d="M 472.708229 229.322041 
412 | L 523.801563 229.322041 
413 | L 523.801563 187.581326 
414 | L 472.708229 187.581326 
415 | L 472.708229 229.322041 
416 | z
417 | " clip-path="url(#p8aed83f084)" style="fill: #96cac1; stroke: #6a6a6a; stroke-linejoin: miter"/>
418 |    </g>
419 |    <g id="line2d_41">
420 |     <path d="M 498.254896 229.322041 
421 | L 498.254896 277.785682 
422 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
423 |    </g>
424 |    <g id="line2d_42">
425 |     <path d="M 498.254896 187.581326 
426 | L 498.254896 126.035969 
427 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
428 |    </g>
429 |    <g id="line2d_43">
430 |     <path d="M 485.481563 277.785682 
431 | L 511.028229 277.785682 
432 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
433 |    </g>
434 |    <g id="line2d_44">
435 |     <path d="M 485.481563 126.035969 
436 | L 511.028229 126.035969 
437 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
438 |    </g>
439 |    <g id="line2d_45">
440 |     <defs>
441 |      <path id="mf0e4e6a11b" d="M 0 4 
442 | C 1.060812 4 2.078319 3.578535 2.828427 2.828427 
443 | C 3.578535 2.078319 4 1.060812 4 0 
444 | C 4 -1.060812 3.578535 -2.078319 2.828427 -2.828427 
445 | C 2.078319 -3.578535 1.060812 -4 0 -4 
446 | C -1.060812 -4 -2.078319 -3.578535 -2.828427 -2.828427 
447 | C -3.578535 -2.078319 -4 -1.060812 -4 0 
448 | C -4 1.060812 -3.578535 2.078319 -2.828427 2.828427 
449 | C -2.078319 3.578535 -1.060812 4 0 4 
450 | z
451 | " style="stroke: #6a6a6a"/>
452 |     </defs>
453 |     <g clip-path="url(#p8aed83f084)">
454 |      <use xlink:href="#mf0e4e6a11b" x="498.254896" y="96.731973" style="fill-opacity: 0; stroke: #6a6a6a"/>
455 |      <use xlink:href="#mf0e4e6a11b" x="498.254896" y="82.950402" style="fill-opacity: 0; stroke: #6a6a6a"/>
456 |      <use xlink:href="#mf0e4e6a11b" x="498.254896" y="93.134973" style="fill-opacity: 0; stroke: #6a6a6a"/>
457 |      <use xlink:href="#mf0e4e6a11b" x="498.254896" y="108.844182" style="fill-opacity: 0; stroke: #6a6a6a"/>
458 |      <use xlink:href="#mf0e4e6a11b" x="498.254896" y="66.814591" style="fill-opacity: 0; stroke: #6a6a6a"/>
459 |     </g>
460 |    </g>
461 |    <g id="patch_10">
462 |     <path d="M 574.894896 240.005162 
463 | L 625.988229 240.005162 
464 | L 625.988229 138.244774 
465 | L 574.894896 138.244774 
466 | L 574.894896 240.005162 
467 | z
468 | " clip-path="url(#p8aed83f084)" style="fill: #f6f6bc; stroke: #6a6a6a; stroke-linejoin: miter"/>
469 |    </g>
470 |    <g id="line2d_46">
471 |     <path d="M 600.441563 240.005162 
472 | L 600.441563 276.316564 
473 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
474 |    </g>
475 |    <g id="line2d_47">
476 |     <path d="M 600.441563 138.244774 
477 | L 600.441563 74.102063 
478 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
479 |    </g>
480 |    <g id="line2d_48">
481 |     <path d="M 587.668229 276.316564 
482 | L 613.214896 276.316564 
483 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
484 |    </g>
485 |    <g id="line2d_49">
486 |     <path d="M 587.668229 74.102063 
487 | L 613.214896 74.102063 
488 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
489 |    </g>
490 |    <g id="line2d_50"/>
491 |    <g id="patch_11">
492 |     <path d="M 677.081563 214.92298 
493 | L 728.174896 214.92298 
494 | L 728.174896 127.929435 
495 | L 677.081563 127.929435 
496 | L 677.081563 214.92298 
497 | z
498 | " clip-path="url(#p8aed83f084)" style="fill: #c1bed6; stroke: #6a6a6a; stroke-linejoin: miter"/>
499 |    </g>
500 |    <g id="line2d_51">
501 |     <path d="M 702.628229 214.92298 
502 | L 702.628229 243.992934 
503 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
504 |    </g>
505 |    <g id="line2d_52">
506 |     <path d="M 702.628229 127.929435 
507 | L 702.628229 81.652118 
508 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
509 |    </g>
510 |    <g id="line2d_53">
511 |     <path d="M 689.854896 243.992934 
512 | L 715.401562 243.992934 
513 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
514 |    </g>
515 |    <g id="line2d_54">
516 |     <path d="M 689.854896 81.652118 
517 | L 715.401562 81.652118 
518 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a; stroke-linecap: round"/>
519 |    </g>
520 |    <g id="line2d_55"/>
521 |    <g id="line2d_56">
522 |     <path d="M 472.708229 212.303638 
523 | L 523.801563 212.303638 
524 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
525 |    </g>
526 |    <g id="line2d_57">
527 |     <path d="M 574.894896 205.234406 
528 | L 625.988229 205.234406 
529 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
530 |    </g>
531 |    <g id="line2d_58">
532 |     <path d="M 677.081563 190.172783 
533 | L 728.174896 190.172783 
534 | " clip-path="url(#p8aed83f084)" style="fill: none; stroke: #6a6a6a"/>
535 |    </g>
536 |    <g id="patch_12">
537 |     <path d="M 447.161563 289.3775 
538 | L 447.161563 34.3575 
539 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
540 |    </g>
541 |    <g id="patch_13">
542 |     <path d="M 447.161563 289.3775 
543 | L 753.721563 289.3775 
544 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
545 |    </g>
546 |    <g id="text_24">
547 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="600.441563" y="19.3575" transform="rotate(-0 600.441563 19.3575)">Language Model Scale Impact</text>
548 |    </g>
549 |   </g>
550 |   <g id="axes_3">
551 |    <g id="patch_14">
552 |     <path d="M 826.841563 289.3775 
553 | L 1133.401562 289.3775 
554 | L 1133.401562 34.3575 
555 | L 826.841563 34.3575 
556 | z
557 | " style="fill: #ffffff"/>
558 |    </g>
559 |    <g id="line2d_59">
560 |     <path d="M 826.841563 231.418409 
561 | L 1133.401562 231.418409 
562 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
563 |    </g>
564 |    <g id="line2d_60">
565 |     <path d="M 826.841563 185.051136 
566 | L 1133.401562 185.051136 
567 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
568 |    </g>
569 |    <g id="line2d_61">
570 |     <path d="M 826.841563 138.683864 
571 | L 1133.401562 138.683864 
572 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
573 |    </g>
574 |    <g id="line2d_62">
575 |     <path d="M 826.841563 92.316591 
576 | L 1133.401562 92.316591 
577 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
578 |    </g>
579 |    <g id="line2d_63">
580 |     <path d="M 826.841563 45.949318 
581 | L 1133.401562 45.949318 
582 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke-dasharray: 3.7,1.6; stroke-dashoffset: 0; stroke: #808080; stroke-opacity: 0.4"/>
583 |    </g>
584 |    <g id="matplotlib.axis_5">
585 |     <g id="xtick_7">
586 |      <g id="text_25">
587 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="877.934896" y="311.515312" transform="rotate(-0 877.934896 311.515312)">early</text>
588 |      </g>
589 |     </g>
590 |     <g id="xtick_8">
591 |      <g id="text_26">
592 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="980.121563" y="311.515312" transform="rotate(-0 980.121563 311.515312)">late</text>
593 |      </g>
594 |     </g>
595 |     <g id="xtick_9">
596 |      <g id="text_27">
597 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="1082.308229" y="311.515312" transform="rotate(-0 1082.308229 311.515312)">hierarch</text>
598 |      </g>
599 |     </g>
600 |     <g id="text_28">
601 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="980.121563" y="337.824531" transform="rotate(-0 980.121563 337.824531)">Fusion Depth</text>
602 |     </g>
603 |    </g>
604 |    <g id="matplotlib.axis_6">
605 |     <g id="ytick_13">
606 |      <g id="line2d_64">
607 |       <path d="M 826.841563 277.785682 
608 | L 1133.401562 277.785682 
609 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
610 |      </g>
611 |      <g id="text_29">
612 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="283.104588" transform="rotate(-0 815.341563 283.104588)">0.0</text>
613 |      </g>
614 |     </g>
615 |     <g id="ytick_14">
616 |      <g id="line2d_65">
617 |       <path d="M 826.841563 231.418409 
618 | L 1133.401562 231.418409 
619 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
620 |      </g>
621 |      <g id="text_30">
622 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="236.737315" transform="rotate(-0 815.341563 236.737315)">0.2</text>
623 |      </g>
624 |     </g>
625 |     <g id="ytick_15">
626 |      <g id="line2d_66">
627 |       <path d="M 826.841563 185.051136 
628 | L 1133.401562 185.051136 
629 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
630 |      </g>
631 |      <g id="text_31">
632 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="190.370043" transform="rotate(-0 815.341563 190.370043)">0.4</text>
633 |      </g>
634 |     </g>
635 |     <g id="ytick_16">
636 |      <g id="line2d_67">
637 |       <path d="M 826.841563 138.683864 
638 | L 1133.401562 138.683864 
639 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
640 |      </g>
641 |      <g id="text_32">
642 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="144.00277" transform="rotate(-0 815.341563 144.00277)">0.6</text>
643 |      </g>
644 |     </g>
645 |     <g id="ytick_17">
646 |      <g id="line2d_68">
647 |       <path d="M 826.841563 92.316591 
648 | L 1133.401562 92.316591 
649 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
650 |      </g>
651 |      <g id="text_33">
652 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="97.635497" transform="rotate(-0 815.341563 97.635497)">0.8</text>
653 |      </g>
654 |     </g>
655 |     <g id="ytick_18">
656 |      <g id="line2d_69">
657 |       <path d="M 826.841563 45.949318 
658 | L 1133.401562 45.949318 
659 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #e6e6e6; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: round"/>
660 |      </g>
661 |      <g id="text_34">
662 |       <text style="font-size: 14px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: end; fill: #262626" x="815.341563" y="51.268224" transform="rotate(-0 815.341563 51.268224)">1.0</text>
663 |      </g>
664 |     </g>
665 |     <g id="text_35">
666 |      <text style="font-size: 15px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="777.957656" y="161.8675" transform="rotate(-90 777.957656 161.8675)">Adjusted Success (0-1)</text>
667 |     </g>
668 |    </g>
669 |    <g id="patch_15">
670 |     <path d="M 852.388229 218.375529 
671 | L 903.481563 218.375529 
672 | L 903.481563 196.793902 
673 | L 852.388229 196.793902 
674 | L 852.388229 218.375529 
675 | z
676 | " clip-path="url(#pe7b5b629a5)" style="fill: #b9dccc; stroke: #7a7a7a; stroke-linejoin: miter"/>
677 |    </g>
678 |    <g id="line2d_70">
679 |     <path d="M 877.934896 218.375529 
680 | L 877.934896 227.133015 
681 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
682 |    </g>
683 |    <g id="line2d_71">
684 |     <path d="M 877.934896 196.793902 
685 | L 877.934896 196.056295 
686 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
687 |    </g>
688 |    <g id="line2d_72">
689 |     <path d="M 865.161563 227.133015 
690 | L 890.708229 227.133015 
691 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
692 |    </g>
693 |    <g id="line2d_73">
694 |     <path d="M 865.161563 196.056295 
695 | L 890.708229 196.056295 
696 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
697 |    </g>
698 |    <g id="line2d_74"/>
699 |    <g id="patch_16">
700 |     <path d="M 954.574896 236.704595 
701 | L 1005.668229 236.704595 
702 | L 1005.668229 180.017681 
703 | L 954.574896 180.017681 
704 | L 954.574896 236.704595 
705 | z
706 | " clip-path="url(#pe7b5b629a5)" style="fill: #f3cfb6; stroke: #7a7a7a; stroke-linejoin: miter"/>
707 |    </g>
708 |    <g id="line2d_75">
709 |     <path d="M 980.121563 236.704595 
710 | L 980.121563 277.785682 
711 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
712 |    </g>
713 |    <g id="line2d_76">
714 |     <path d="M 980.121563 180.017681 
715 | L 980.121563 96.731973 
716 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
717 |    </g>
718 |    <g id="line2d_77">
719 |     <path d="M 967.348229 277.785682 
720 | L 992.894896 277.785682 
721 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
722 |    </g>
723 |    <g id="line2d_78">
724 |     <path d="M 967.348229 96.731973 
725 | L 992.894896 96.731973 
726 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
727 |    </g>
728 |    <g id="line2d_79">
729 |     <defs>
730 |      <path id="m8938947139" d="M 0 4 
731 | C 1.060812 4 2.078319 3.578535 2.828427 2.828427 
732 | C 3.578535 2.078319 4 1.060812 4 0 
733 | C 4 -1.060812 3.578535 -2.078319 2.828427 -2.828427 
734 | C 2.078319 -3.578535 1.060812 -4 0 -4 
735 | C -1.060812 -4 -2.078319 -3.578535 -2.828427 -2.828427 
736 | C -3.578535 -2.078319 -4 -1.060812 -4 0 
737 | C -4 1.060812 -3.578535 2.078319 -2.828427 2.828427 
738 | C -2.078319 3.578535 -1.060812 4 0 4 
739 | z
740 | " style="stroke: #7a7a7a"/>
741 |     </defs>
742 |     <g clip-path="url(#pe7b5b629a5)">
743 |      <use xlink:href="#m8938947139" x="980.121563" y="74.102063" style="fill-opacity: 0; stroke: #7a7a7a"/>
744 |      <use xlink:href="#m8938947139" x="980.121563" y="80.724773" style="fill-opacity: 0; stroke: #7a7a7a"/>
745 |      <use xlink:href="#m8938947139" x="980.121563" y="86.439539" style="fill-opacity: 0; stroke: #7a7a7a"/>
746 |      <use xlink:href="#m8938947139" x="980.121563" y="66.814591" style="fill-opacity: 0; stroke: #7a7a7a"/>
747 |      <use xlink:href="#m8938947139" x="980.121563" y="81.652118" style="fill-opacity: 0; stroke: #7a7a7a"/>
748 |     </g>
749 |    </g>
750 |    <g id="patch_17">
751 |     <path d="M 1056.761562 213.258902 
752 | L 1107.854896 213.258902 
753 | L 1107.854896 115.865873 
754 | L 1056.761562 115.865873 
755 | L 1056.761562 213.258902 
756 | z
757 | " clip-path="url(#pe7b5b629a5)" style="fill: #cfd6e4; stroke: #7a7a7a; stroke-linejoin: miter"/>
758 |    </g>
759 |    <g id="line2d_80">
760 |     <path d="M 1082.308229 213.258902 
761 | L 1082.308229 261.607939 
762 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
763 |    </g>
764 |    <g id="line2d_81">
765 |     <path d="M 1082.308229 115.865873 
766 | L 1082.308229 82.950402 
767 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
768 |    </g>
769 |    <g id="line2d_82">
770 |     <path d="M 1069.534896 261.607939 
771 | L 1095.081563 261.607939 
772 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
773 |    </g>
774 |    <g id="line2d_83">
775 |     <path d="M 1069.534896 82.950402 
776 | L 1095.081563 82.950402 
777 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a; stroke-linecap: round"/>
778 |    </g>
779 |    <g id="line2d_84"/>
780 |    <g id="line2d_85">
781 |     <path d="M 852.388229 206.248069 
782 | L 903.481563 206.248069 
783 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
784 |    </g>
785 |    <g id="line2d_86">
786 |     <path d="M 954.574896 211.052463 
787 | L 1005.668229 211.052463 
788 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
789 |    </g>
790 |    <g id="line2d_87">
791 |     <path d="M 1056.761562 145.020524 
792 | L 1107.854896 145.020524 
793 | " clip-path="url(#pe7b5b629a5)" style="fill: none; stroke: #7a7a7a"/>
794 |    </g>
795 |    <g id="patch_18">
796 |     <path d="M 826.841563 289.3775 
797 | L 826.841563 34.3575 
798 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
799 |    </g>
800 |    <g id="patch_19">
801 |     <path d="M 826.841563 289.3775 
802 | L 1133.401562 289.3775 
803 | " style="fill: none; stroke: #333333; stroke-width: 1.2; stroke-linejoin: miter; stroke-linecap: square"/>
804 |    </g>
805 |    <g id="text_36">
806 |     <text style="font-weight: 700; font-size: 16px; font-family: 'Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', sans-serif; text-anchor: middle; fill: #262626" x="980.121563" y="19.3575" transform="rotate(-0 980.121563 19.3575)">Fusion Depth Impact</text>
807 |    </g>
808 |   </g>
809 |  </g>
810 |  <defs>
811 |   <clipPath id="p0033fb32f9">
812 |    <rect x="67.481563" y="34.3575" width="306.56" height="255.02"/>
813 |   </clipPath>
814 |   <clipPath id="p8aed83f084">
815 |    <rect x="447.161563" y="34.3575" width="306.56" height="255.02"/>
816 |   </clipPath>
817 |   <clipPath id="pe7b5b629a5">
818 |    <rect x="826.841563" y="34.3575" width="306.56" height="255.02"/>
819 |   </clipPath>
820 |  </defs>
821 | </svg>
822 | 


--------------------------------------------------------------------------------