├── wonderbread ├── collect │ └── qa │ │ ├── __init__.py │ │ └── trace_cleanup │ │ └── config.py └── benchmark │ ├── tasks │ ├── knowledge_transfer │ │ └── demo_validation │ │ │ └── demo_2_config │ │ │ ├── 117 @ 2023-12-31-23-34-18.json │ │ │ ├── 127 @ 2024-01-08-19-59-09.json │ │ │ ├── 156 @ 2023-12-27-12-13-15.json │ │ │ ├── 157 @ 2023-12-27-22-58-25.json │ │ │ ├── 163 @ 2024-01-13-17-57-52.json │ │ │ ├── 188 @ 2024-01-01-22-56-49.json │ │ │ ├── 205 @ 2023-12-30-13-02-59.json │ │ │ ├── 21 @ 2024-01-05-02-16-47.json │ │ │ ├── 235 @ 2024-01-01-00-13-13.json │ │ │ ├── 258 @ 2023-12-30-14-42-44.json │ │ │ ├── 260 @ 2024-01-01-14-11-28.json │ │ │ ├── 276 @ 2023-12-26-22-12-50.json │ │ │ ├── 322 @ 2024-01-02-16-28-59.json │ │ │ ├── 332 @ 2024-01-02-15-52-51.json │ │ │ ├── 357 @ 2024-01-06-14-24-18.json │ │ │ ├── 42 @ 2024-01-13-05-03-47.json │ │ │ ├── 44 @ 2024-01-05-03-59-01.json │ │ │ ├── 45 @ 2024-01-08-14-51-24.json │ │ │ ├── 47 @ 2023-12-31-18-24-54.json │ │ │ ├── 519 @ 2023-12-26-23-48-13.json │ │ │ ├── 652 @ 2024-01-03-16-23-50.json │ │ │ └── 94 @ 2024-01-08-18-24-10.json │ ├── documentation │ │ └── demo_segmentation │ │ │ └── demo_2_config │ │ │ ├── 0 @ 2023-12-25-15-10-58.json │ │ │ ├── 1 @ 2023-12-25-15-44-04.json │ │ │ ├── 3 @ 2023-12-25-15-31-51.json │ │ │ ├── 4 @ 2023-12-25-17-18-04.json │ │ │ ├── 5 @ 2024-01-05-02-02-06.json │ │ │ ├── 114 @ 2024-01-13-05-31-58.json │ │ │ ├── 115 @ 2024-01-13-05-34-10.json │ │ │ ├── 118 @ 2024-01-08-01-14-57.json │ │ │ ├── 122 @ 2024-01-13-06-01-40.json │ │ │ ├── 131 @ 2024-01-13-06-11-26.json │ │ │ ├── 14 @ 2023-12-25-17-33-45.json │ │ │ ├── 188 @ 2024-01-01-22-56-49.json │ │ │ ├── 21 @ 2024-01-05-02-16-47.json │ │ │ ├── 245 @ 2024-01-13-07-01-31.json │ │ │ ├── 279 @ 2024-01-09-02-08-52.json │ │ │ ├── 29 @ 2023-12-25-17-51-26.json │ │ │ ├── 319 @ 2024-01-01-00-19-57.json │ │ │ ├── 333 @ 2024-01-01-13-19-13.json │ │ │ ├── 410 @ 2023-12-31-20-27-12.json │ │ │ ├── 42 @ 2024-01-13-05-03-47.json │ │ │ ├── 44 @ 2024-01-05-03-59-01.json │ │ │ ├── 45 @ 2024-01-08-14-51-24.json │ │ │ ├── 47 @ 2023-12-31-18-24-54.json │ │ │ ├── 499 @ 2024-01-07-16-03-13.json │ │ │ ├── 512 @ 2023-12-26-23-44-03.json │ │ │ ├── 545 @ 2024-01-28-09-10-43.json │ │ │ ├── 607 @ 2023-12-31-13-30-17.json │ │ │ ├── 62 @ 2023-12-26-23-59-25.json │ │ │ ├── 622 @ 2024-01-01-19-21-28.json │ │ │ ├── 63 @ 2023-12-27-00-04-48.json │ │ │ ├── 64 @ 2023-12-27-00-12-03.json │ │ │ ├── 652 @ 2024-01-03-16-23-50.json │ │ │ ├── 69 @ 2024-01-05-04-25-33.json │ │ │ ├── 709 @ 2024-01-07-20-50-00.json │ │ │ ├── 711 @ 2023-12-31-03-10-51.json │ │ │ ├── 721 @ 2024-01-06-15-15-58.json │ │ │ ├── 769 @ 2024-01-06-12-39-15.json │ │ │ ├── 79 @ 2023-12-27-22-50-34.json │ │ │ ├── 94 @ 2024-01-08-18-24-10.json │ │ │ ├── 106 @ 2023-12-30-12-35-44.json │ │ │ ├── 107 @ 2024-01-13-05-19-45.json │ │ │ ├── 116 @ 2024-01-16-09-09-51.json │ │ │ ├── 117 @ 2023-12-31-23-34-18.json │ │ │ ├── 125 @ 2023-12-27-12-53-48.json │ │ │ ├── 127 @ 2024-01-08-19-59-09.json │ │ │ ├── 133 @ 2024-01-14-06-02-33.json │ │ │ ├── 143 @ 2024-01-08-01-57-37.json │ │ │ ├── 147 @ 2024-01-08-02-17-03.json │ │ │ ├── 156 @ 2023-12-27-12-13-15.json │ │ │ ├── 157 @ 2023-12-27-22-58-25.json │ │ │ ├── 162 @ 2024-01-08-02-53-13.json │ │ │ ├── 163 @ 2024-01-13-17-57-52.json │ │ │ ├── 171 @ 2023-12-31-01-42-52.json │ │ │ ├── 177 @ 2023-12-28-09-18-21.json │ │ │ ├── 179 @ 2023-12-28-11-13-06.json │ │ │ ├── 185 @ 2023-12-28-17-09-28.json │ │ │ ├── 194 @ 2024-01-13-06-23-00.json │ │ │ ├── 203 @ 2024-01-13-06-37-19.json │ │ │ ├── 205 @ 2023-12-30-13-02-59.json │ │ │ ├── 212 @ 2023-12-31-15-14-14.json │ │ │ ├── 216 @ 2023-12-31-16-12-31.json │ │ │ ├── 226 @ 2023-12-27-12-59-37.json │ │ │ ├── 228 @ 2023-12-26-22-02-47.json │ │ │ ├── 235 @ 2024-01-01-00-13-13.json │ │ │ ├── 240 @ 2024-01-08-03-34-34.json │ │ │ ├── 258 @ 2023-12-30-14-42-44.json │ │ │ ├── 259 @ 2023-12-30-14-47-53.json │ │ │ ├── 260 @ 2024-01-01-14-11-28.json │ │ │ ├── 276 @ 2023-12-26-22-12-50.json │ │ │ ├── 280 @ 2024-01-09-02-10-40.json │ │ │ ├── 281 @ 2024-02-18-06-05-33.json │ │ │ ├── 282 @ 2024-01-09-02-16-45.json │ │ │ ├── 285 @ 2024-01-02-19-53-59.json │ │ │ ├── 296 @ 2024-01-14-06-32-18.json │ │ │ ├── 299 @ 2024-01-09-02-51-58.json │ │ │ ├── 317 @ 2023-12-31-23-22-24.json │ │ │ ├── 322 @ 2024-01-02-16-28-59.json │ │ │ ├── 323 @ 2024-01-02-16-31-55.json │ │ │ ├── 327 @ 2023-12-26-22-37-29.json │ │ │ ├── 332 @ 2024-01-02-15-52-51.json │ │ │ ├── 336 @ 2024-01-04-15-02-17.json │ │ │ ├── 343 @ 2023-12-30-17-04-17.json │ │ │ ├── 345 @ 2023-12-29-18-38-39.json │ │ │ ├── 350 @ 2024-01-04-18-16-40.json │ │ │ ├── 357 @ 2024-01-06-14-24-18.json │ │ │ ├── 360 @ 2023-12-31-18-49-53.json │ │ │ ├── 374 @ 2024-01-07-00-51-38.json │ │ │ ├── 384 @ 2024-01-05-15-10-54.json │ │ │ ├── 385 @ 2023-12-28-13-47-48.json │ │ │ ├── 386 @ 2024-01-05-15-23-56.json │ │ │ ├── 393 @ 2023-12-30-17-40-03.json │ │ │ ├── 402 @ 2024-01-06-18-26-07.json │ │ │ ├── 407 @ 2023-12-30-20-51-23.json │ │ │ ├── 412 @ 2023-12-25-10-32-50.json │ │ │ ├── 413 @ 2023-12-25-10-36-22.json │ │ │ ├── 417 @ 2023-12-29-08-28-22.json │ │ │ ├── 422 @ 2023-12-25-12-33-35.json │ │ │ ├── 438 @ 2024-01-05-10-41-27.json │ │ │ ├── 446 @ 2024-01-06-21-47-36.json │ │ │ ├── 450 @ 2023-12-25-13-19-39.json │ │ │ ├── 457 @ 2023-12-28-09-01-06.json │ │ │ ├── 461 @ 2023-12-28-09-48-06.json │ │ │ ├── 464 @ 2023-12-28-10-08-46.json │ │ │ ├── 468 @ 2024-01-06-15-26-01.json │ │ │ ├── 472 @ 2024-01-07-01-30-10.json │ │ │ ├── 477 @ 2023-12-26-09-10-42.json │ │ │ ├── 480 @ 2023-12-26-09-12-33.json │ │ │ ├── 483 @ 2023-12-26-09-27-35.json │ │ │ ├── 489 @ 2023-12-28-10-38-09.json │ │ │ ├── 503 @ 2023-12-28-11-00-33.json │ │ │ ├── 508 @ 2023-12-26-14-46-44.json │ │ │ ├── 510 @ 2023-12-26-23-32-50.json │ │ │ ├── 519 @ 2023-12-26-23-48-13.json │ │ │ ├── 521 @ 2024-01-07-20-51-07.json │ │ │ ├── 522 @ 2024-01-05-16-49-40.json │ │ │ ├── 524 @ 2023-12-25-08-31-30.json │ │ │ ├── 534 @ 2023-12-25-08-48-07.json │ │ │ ├── 539 @ 2024-01-07-17-04-44.json │ │ │ ├── 547 @ 2023-12-28-14-24-32.json │ │ │ ├── 571 @ 2023-12-27-09-20-35.json │ │ │ ├── 578 @ 2023-12-25-09-23-43.json │ │ │ ├── 579 @ 2023-12-25-09-25-08.json │ │ │ ├── 580 @ 2024-01-03-13-39-07.json │ │ │ ├── 585 @ 2024-01-09-20-18-54.json │ │ │ ├── 593 @ 2023-12-25-09-31-55.json │ │ │ ├── 595 @ 2024-01-06-19-48-54.json │ │ │ ├── 601 @ 2023-12-30-22-56-49.json │ │ │ ├── 613 @ 2023-12-31-13-36-38.json │ │ │ ├── 627 @ 2024-01-01-19-39-28.json │ │ │ ├── 630 @ 2023-12-31-16-19-07.json │ │ │ ├── 637 @ 2024-01-01-19-56-36.json │ │ │ ├── 644 @ 2024-01-01-09-28-58.json │ │ │ ├── 645 @ 2024-01-03-16-11-31.json │ │ │ ├── 646 @ 2024-01-03-16-13-26.json │ │ │ ├── 647 @ 2024-01-01-09-41-54.json │ │ │ ├── 648 @ 2024-01-01-09-42-54.json │ │ │ ├── 649 @ 2024-01-01-09-44-07.json │ │ │ ├── 654 @ 2023-12-27-23-19-13.json │ │ │ ├── 661 @ 2023-12-25-09-45-59.json │ │ │ ├── 669 @ 2023-12-25-09-59-18.json │ │ │ ├── 677 @ 2023-12-28-20-24-49.json │ │ │ ├── 693 @ 2023-12-29-03-39-36.json │ │ │ ├── 695 @ 2023-12-28-16-52-02.json │ │ │ ├── 706 @ 2024-01-02-11-32-59.json │ │ │ ├── 733 @ 2024-01-04-20-18-46.json │ │ │ ├── 736 @ 2023-12-31-03-41-32.json │ │ │ ├── 742 @ 2024-01-06-20-59-19.json │ │ │ ├── 743 @ 2024-01-07-12-20-29.json │ │ │ ├── 744 @ 2023-12-31-03-47-20.json │ │ │ ├── 745 @ 2024-01-07-12-24-30.json │ │ │ ├── 746 @ 2024-01-14-04-09-43.json │ │ │ ├── 747 @ 2024-01-06-15-28-29.json │ │ │ ├── 753 @ 2024-01-14-04-26-12.json │ │ │ ├── 768 @ 2023-12-31-04-23-22.json │ │ │ ├── 770 @ 2024-01-13-03-25-04.json │ │ │ ├── 771 @ 2024-01-16-05-32-23.json │ │ │ ├── 775 @ 2024-01-16-05-29-03.json │ │ │ ├── 777 @ 2024-01-14-11-29-03.json │ │ │ ├── 787 @ 2024-01-14-04-41-43.json │ │ │ ├── 800 @ 2024-01-14-04-46-53.json │ │ │ ├── 806 @ 2024-01-07-02-19-21.json │ │ │ └── 810 @ 2024-01-03-01-50-10.json │ ├── improvement │ │ └── sop_ranking │ │ │ └── run_experiments.py │ └── README.md │ └── webarena │ ├── 44.json │ ├── 258.json │ ├── 157.json │ ├── 259.json │ ├── 275.json │ ├── 274.json │ ├── 72.json │ ├── 156.json │ ├── 71.json │ ├── 73.json │ ├── 357.json │ ├── 46.json │ ├── 298.json │ ├── 299.json │ ├── 300.json │ ├── 70.json │ ├── 276.json │ ├── 277.json │ ├── 301.json │ ├── 302.json │ ├── 313.json │ ├── 41.json │ ├── 45.json │ ├── 260.json │ ├── 287.json │ ├── 368.json │ ├── 380.json │ ├── 117.json │ ├── 261.json │ ├── 278.json │ ├── 377.json │ ├── 379.json │ ├── 381.json │ ├── 262.json │ ├── 375.json │ ├── 378.json │ ├── 374.json │ ├── 227.json │ ├── 234.json │ ├── 235.json │ ├── 341.json │ ├── 344.json │ ├── 189.json │ ├── 190.json │ ├── 263.json │ ├── 340.json │ ├── 346.json │ ├── 348.json │ ├── 94.json │ ├── 95.json │ ├── 188.json │ ├── 232.json │ ├── 339.json │ ├── 345.json │ ├── 231.json │ ├── 233.json │ ├── 254.json │ ├── 266.json │ ├── 329.json │ ├── 349.json │ ├── 1.json │ ├── 192.json │ ├── 206.json │ ├── 224.json │ ├── 255.json │ ├── 331.json │ ├── 338.json │ ├── 5.json │ ├── 60.json │ ├── 93.json │ ├── 153.json │ ├── 223.json │ ├── 330.json │ ├── 335.json │ ├── 336.json │ ├── 337.json │ ├── 58.json │ ├── 59.json │ ├── 92.json │ └── 229.json ├── data ├── Process Mining Task Demonstrations.xlsx └── experimental_results │ ├── sop_generation │ ├── f1_hist.png │ ├── recall_hist.png │ ├── ordering_hist.png │ ├── precision_hist.png │ ├── GPT4 - precision_vs_recall.pdf │ ├── n_lines_gold_sop_vs_recall.pdf │ ├── n_lines_pred_sop_vs_recall.pdf │ ├── Claude3 - precision_vs_recall.pdf │ ├── n_lines_gold_sop_vs_precision.pdf │ ├── n_lines_pred_sop_vs_precision.pdf │ ├── GeminiPro - precision_vs_recall.pdf │ ├── sop_generation_precision_vs_recall.pdf │ ├── human_correlation │ │ └── human_v_llm_scatter_plot.png │ ├── metrics.txt │ └── results.tex │ ├── sop_ranking │ ├── confusion_matrix.pdf │ ├── kendall_corr_hist.pdf │ ├── spearman_corr_hist.pdf │ ├── kendall_corr_vs_spearman_corr.pdf │ ├── sop_ranking_kendall_corr_hist.pdf │ ├── metrics.txt │ ├── sop_ranking_kendall_corr_vs_spearman_corr.pdf │ └── sop_ranking.tex │ ├── sop_improvement │ ├── sop_improvement_radar.png │ └── sop_improvement_scores.tex │ ├── demo_validation │ ├── n_screenshots_vs_is_correct.pdf │ ├── df_task_trajectory.tex │ ├── df_task_type_task_trajectory.tex │ ├── df_task_type_task_completion.tex │ └── df_task_completion.tex │ ├── question_answering │ ├── question_answering_radar.png │ ├── human_correlation │ │ ├── clarity_density_plot.png │ │ ├── soundness_density_plot.png │ │ ├── causability_density_plot.png │ │ ├── compactness_density_plot.png │ │ └── completeness_density_plot.png │ └── question_answering_scores.tex │ └── demo_segmentation │ ├── n_tasks=3 │ ├── GPT4--3--True--False--True--False--True--False--False--False--1 │ │ ├── metrics.txt │ │ ├── v_measure_hist.pdf │ │ ├── adjusted_rand_hist.pdf │ │ ├── is_correct_line_plot.pdf │ │ ├── hist_distance_to_transition.pdf │ │ └── v_measure_vs_adjusted_rand.pdf │ ├── GPT4--3--True--False--True--False--True--False--True--False--1 │ │ ├── metrics.txt │ │ ├── v_measure_hist.pdf │ │ ├── adjusted_rand_hist.pdf │ │ ├── is_correct_line_plot.pdf │ │ ├── hist_distance_to_transition.pdf │ │ └── v_measure_vs_adjusted_rand.pdf │ ├── GPT4--3--True--False--True--True--True--False--True--False--1 │ │ ├── metrics.txt │ │ ├── v_measure_hist.pdf │ │ ├── adjusted_rand_hist.pdf │ │ ├── is_correct_line_plot.pdf │ │ ├── v_measure_vs_adjusted_rand.pdf │ │ └── hist_distance_to_transition.pdf │ ├── Claude3--3--True--False--True--False--True--False--False--False--1 │ │ ├── metrics.txt │ │ ├── v_measure_hist.pdf │ │ ├── adjusted_rand_hist.pdf │ │ ├── is_correct_line_plot.pdf │ │ ├── v_measure_vs_adjusted_rand.pdf │ │ └── hist_distance_to_transition.pdf │ ├── GeminiPro--3--True--False--True--False--True--False--False--False--1 │ │ ├── metrics.txt │ │ ├── v_measure_hist.pdf │ │ ├── adjusted_rand_hist.pdf │ │ ├── is_correct_line_plot.pdf │ │ ├── hist_distance_to_transition.pdf │ │ └── v_measure_vs_adjusted_rand.pdf │ ├── GeminiPro--3--True--False--True--False--True--False--True--False--1 │ │ ├── metrics.txt │ │ ├── v_measure_hist.pdf │ │ ├── adjusted_rand_hist.pdf │ │ ├── is_correct_line_plot.pdf │ │ ├── hist_distance_to_transition.pdf │ │ └── v_measure_vs_adjusted_rand.pdf │ ├── GeminiPro--3--True--False--True--True--True--False--True--False--1 │ │ ├── metrics.txt │ │ ├── v_measure_hist.pdf │ │ ├── adjusted_rand_hist.pdf │ │ ├── is_correct_line_plot.pdf │ │ ├── v_measure_vs_adjusted_rand.pdf │ │ └── hist_distance_to_transition.pdf │ └── task_segmentation.tex │ └── demo_segmentation.tex ├── setup.py ├── requirements.txt └── Dockerfile /wonderbread/collect/qa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/Process Mining Task Demonstrations.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/Process Mining Task Demonstrations.xlsx -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="wonderbread", 5 | version="0.1", 6 | packages=find_packages(), 7 | ) -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/f1_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/f1_hist.png -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/recall_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/recall_hist.png -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/ordering_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/ordering_hist.png -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/precision_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/precision_hist.png -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/confusion_matrix.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_ranking/confusion_matrix.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/kendall_corr_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_ranking/kendall_corr_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/spearman_corr_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_ranking/spearman_corr_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_improvement/sop_improvement_radar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_improvement/sop_improvement_radar.png -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/GPT4 - precision_vs_recall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/GPT4 - precision_vs_recall.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/n_lines_gold_sop_vs_recall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/n_lines_gold_sop_vs_recall.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/n_lines_pred_sop_vs_recall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/n_lines_pred_sop_vs_recall.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/kendall_corr_vs_spearman_corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_ranking/kendall_corr_vs_spearman_corr.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/sop_ranking_kendall_corr_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_ranking/sop_ranking_kendall_corr_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_validation/n_screenshots_vs_is_correct.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_validation/n_screenshots_vs_is_correct.pdf -------------------------------------------------------------------------------- /data/experimental_results/question_answering/question_answering_radar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/question_answering/question_answering_radar.png -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/Claude3 - precision_vs_recall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/Claude3 - precision_vs_recall.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/n_lines_gold_sop_vs_precision.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/n_lines_gold_sop_vs_precision.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/n_lines_pred_sop_vs_precision.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/n_lines_pred_sop_vs_precision.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/GeminiPro - precision_vs_recall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/GeminiPro - precision_vs_recall.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/sop_generation_precision_vs_recall.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/sop_generation_precision_vs_recall.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean spearman_corr: 0.057571560480147735 2 | Std spearman_corr: 0.567719324664298 3 | Mean kendall_corr: 0.04216682056017236 4 | Std kendall_corr: 0.4801280126910653 5 | -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/sop_ranking_kendall_corr_vs_spearman_corr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_ranking/sop_ranking_kendall_corr_vs_spearman_corr.pdf -------------------------------------------------------------------------------- /wonderbread/collect/qa/trace_cleanup/config.py: -------------------------------------------------------------------------------- 1 | path_to_dir: str = '/Users/michaelwornow/Desktop/demonstration-collection/data/demos' 2 | path_to_df_valid: str = '/Users/michaelwornow/Desktop/demonstration-collection/data/df_valid.csv' -------------------------------------------------------------------------------- /data/experimental_results/question_answering/human_correlation/clarity_density_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/question_answering/human_correlation/clarity_density_plot.png -------------------------------------------------------------------------------- /data/experimental_results/question_answering/human_correlation/soundness_density_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/question_answering/human_correlation/soundness_density_plot.png -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/human_correlation/human_v_llm_scatter_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/sop_generation/human_correlation/human_v_llm_scatter_plot.png -------------------------------------------------------------------------------- /data/experimental_results/question_answering/human_correlation/causability_density_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/question_answering/human_correlation/causability_density_plot.png -------------------------------------------------------------------------------- /data/experimental_results/question_answering/human_correlation/compactness_density_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/question_answering/human_correlation/compactness_density_plot.png -------------------------------------------------------------------------------- /data/experimental_results/question_answering/human_correlation/completeness_density_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/question_answering/human_correlation/completeness_density_plot.png -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean adjusted_rand: 0.7981321435060027 2 | Std adjusted_rand: 0.22777458677639104 3 | Mean v_measure: 0.8597297246066645 4 | Std v_measure: 0.1482124253877677 5 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean adjusted_rand: 0.845691015101685 2 | Std adjusted_rand: 0.15394137650950593 3 | Mean v_measure: 0.8725953029626818 4 | Std v_measure: 0.11352231779114468 5 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean adjusted_rand: 0.8535955244298248 2 | Std adjusted_rand: 0.1629115083554615 3 | Mean v_measure: 0.8784769633438149 4 | Std v_measure: 0.1216570344625597 5 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean adjusted_rand: 0.8104828334704159 2 | Std adjusted_rand: 0.12602127147451064 3 | Mean v_measure: 0.8409656709029293 4 | Std v_measure: 0.09293845680446348 5 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean adjusted_rand: 0.5805595907082329 2 | Std adjusted_rand: 0.26699196667208136 3 | Mean v_measure: 0.6861342267546585 4 | Std v_measure: 0.20456702768553417 5 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean adjusted_rand: 0.5338798186727085 2 | Std adjusted_rand: 0.27048587342131 3 | Mean v_measure: 0.6464203673194973 4 | Std v_measure: 0.22562326271691147 5 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean adjusted_rand: 0.5524814045960297 2 | Std adjusted_rand: 0.2836130351076145 3 | Mean v_measure: 0.6562083660636884 4 | Std v_measure: 0.24147381695476888 5 | -------------------------------------------------------------------------------- /data/experimental_results/sop_ranking/sop_ranking.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrr} 2 | \toprule 3 | Model & Spearman Corr. & Kendall Tau \\ 4 | \midrule 5 | Claude3 & 0.068006 & 0.042013 \\ 6 | GPT4 & 0.067313 & 0.054017 \\ 7 | GeminiPro & 0.037396 & 0.030471 \\ 8 | \bottomrule 9 | \end{tabular} 10 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/v_measure_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/v_measure_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/v_measure_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/v_measure_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/v_measure_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/v_measure_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/v_measure_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/v_measure_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/adjusted_rand_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/adjusted_rand_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/adjusted_rand_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/adjusted_rand_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/v_measure_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/v_measure_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/adjusted_rand_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/adjusted_rand_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/is_correct_line_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/is_correct_line_plot.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/is_correct_line_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/is_correct_line_plot.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/v_measure_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/v_measure_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/v_measure_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/v_measure_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/metrics.txt: -------------------------------------------------------------------------------- 1 | Mean Precision: 0.5935267701222054 2 | Std Precision: 0.24317559704086214 3 | Mean Recall: 0.6582822500850658 4 | Std Recall: 0.271171818389596 5 | Mean F1: 0.5969117705830557 6 | Std F1: 0.23510602646667236 7 | Mean Ordering: 0.5139548404335015 8 | Std Ordering: 0.2433239510883539 9 | -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/adjusted_rand_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/adjusted_rand_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/is_correct_line_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/is_correct_line_plot.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/adjusted_rand_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/adjusted_rand_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/adjusted_rand_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/adjusted_rand_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/is_correct_line_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/is_correct_line_plot.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/adjusted_rand_hist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/adjusted_rand_hist.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/is_correct_line_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/is_correct_line_plot.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/is_correct_line_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/is_correct_line_plot.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/hist_distance_to_transition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/hist_distance_to_transition.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/v_measure_vs_adjusted_rand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--False--False--1/v_measure_vs_adjusted_rand.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/hist_distance_to_transition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/hist_distance_to_transition.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--False--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/hist_distance_to_transition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GPT4--3--True--False--True--True--True--False--True--False--1/hist_distance_to_transition.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/is_correct_line_plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/is_correct_line_plot.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/v_measure_vs_adjusted_rand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/v_measure_vs_adjusted_rand.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/hist_distance_to_transition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/Claude3--3--True--False--True--False--True--False--False--False--1/hist_distance_to_transition.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/hist_distance_to_transition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/hist_distance_to_transition.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/v_measure_vs_adjusted_rand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--False--False--1/v_measure_vs_adjusted_rand.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/hist_distance_to_transition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/hist_distance_to_transition.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--False--True--False--True--False--1/v_measure_vs_adjusted_rand.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/hist_distance_to_transition.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HazyResearch/wonderbread/HEAD/data/experimental_results/demo_segmentation/n_tasks=3/GeminiPro--3--True--False--True--True--True--False--True--False--1/hist_distance_to_transition.pdf -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/n_tasks=3/task_segmentation.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lll} 2 | \toprule 3 | Ablation & V-Measure & ARI \\ 4 | \midrule 5 | GPT4V - TD+KF+SOP - n=3 & 0.87 & 0.84 \\ 6 | GPT4V - KF+SOP - n=3 & 0.86 & 0.83 \\ 7 | GPT4V - TD+KF+ACT+SOP - n=3 & 0.82 & 0.81 \\ 8 | GPT4V - KF - n=3 & 0.81 & 0.72 \\ 9 | \bottomrule 10 | \end{tabular} 11 | -------------------------------------------------------------------------------- /data/experimental_results/demo_validation/df_task_trajectory.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{llllllllr} 2 | \toprule 3 | model & is_td & is_kf & is_act & is_include_sop & Precision & Recall & F1 & Count \\ 4 | \midrule 5 | GeminiPro & \checkmark & \checkmark & & \checkmark & 0.94 & 0.14 & 0.25 & 973 \\ 6 | GPT4 & \checkmark & \checkmark & & \checkmark & 0.52 & 0.18 & 0.27 & 980 \\ 7 | \bottomrule 8 | \end{tabular} 9 | -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/117 @ 2023-12-31-23-34-18.json: -------------------------------------------------------------------------------- 1 | { 2 | "117 @ 2023-12-31-23-34-18": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/127 @ 2024-01-08-19-59-09.json: -------------------------------------------------------------------------------- 1 | { 2 | "127 @ 2024-01-08-19-59-09": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/156 @ 2023-12-27-12-13-15.json: -------------------------------------------------------------------------------- 1 | { 2 | "156 @ 2023-12-27-12-13-15": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/157 @ 2023-12-27-22-58-25.json: -------------------------------------------------------------------------------- 1 | { 2 | "157 @ 2023-12-27-22-58-25": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/163 @ 2024-01-13-17-57-52.json: -------------------------------------------------------------------------------- 1 | { 2 | "163 @ 2024-01-13-17-57-52": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/188 @ 2024-01-01-22-56-49.json: -------------------------------------------------------------------------------- 1 | { 2 | "188 @ 2024-01-01-22-56-49": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/205 @ 2023-12-30-13-02-59.json: -------------------------------------------------------------------------------- 1 | { 2 | "205 @ 2023-12-30-13-02-59": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/21 @ 2024-01-05-02-16-47.json: -------------------------------------------------------------------------------- 1 | { 2 | "21 @ 2024-01-05-02-16-47": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/235 @ 2024-01-01-00-13-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "235 @ 2024-01-01-00-13-13": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/258 @ 2023-12-30-14-42-44.json: -------------------------------------------------------------------------------- 1 | { 2 | "258 @ 2023-12-30-14-42-44": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/260 @ 2024-01-01-14-11-28.json: -------------------------------------------------------------------------------- 1 | { 2 | "260 @ 2024-01-01-14-11-28": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/276 @ 2023-12-26-22-12-50.json: -------------------------------------------------------------------------------- 1 | { 2 | "276 @ 2023-12-26-22-12-50": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/322 @ 2024-01-02-16-28-59.json: -------------------------------------------------------------------------------- 1 | { 2 | "322 @ 2024-01-02-16-28-59": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/332 @ 2024-01-02-15-52-51.json: -------------------------------------------------------------------------------- 1 | { 2 | "332 @ 2024-01-02-15-52-51": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/357 @ 2024-01-06-14-24-18.json: -------------------------------------------------------------------------------- 1 | { 2 | "357 @ 2024-01-06-14-24-18": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/42 @ 2024-01-13-05-03-47.json: -------------------------------------------------------------------------------- 1 | { 2 | "42 @ 2024-01-13-05-03-47": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/44 @ 2024-01-05-03-59-01.json: -------------------------------------------------------------------------------- 1 | { 2 | "44 @ 2024-01-05-03-59-01": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/45 @ 2024-01-08-14-51-24.json: -------------------------------------------------------------------------------- 1 | { 2 | "45 @ 2024-01-08-14-51-24": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/47 @ 2023-12-31-18-24-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "47 @ 2023-12-31-18-24-54": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/519 @ 2023-12-26-23-48-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "519 @ 2023-12-26-23-48-13": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/652 @ 2024-01-03-16-23-50.json: -------------------------------------------------------------------------------- 1 | { 2 | "652 @ 2024-01-03-16-23-50": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/knowledge_transfer/demo_validation/demo_2_config/94 @ 2024-01-08-18-24-10.json: -------------------------------------------------------------------------------- 1 | { 2 | "94 @ 2024-01-08-18-24-10": { 3 | "3": { 4 | "skip": { 5 | "state_ids": null, 6 | "action_ids": null 7 | }, 8 | "shuffle": { 9 | "state_ids": null 10 | }, 11 | "truncate": { 12 | "state_ids": null 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /data/experimental_results/sop_improvement/sop_improvement_scores.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{llllll} 2 | \toprule 3 | Model & completeness & soundness & clarity & compactness & average \\ 4 | \midrule 5 | Claude3 & 1.35 & 1.60 & 2.35 & 2.90 & 2.05 \\ 6 | GPT4 & 2.25 & 2.40 & 3.00 & 2.95 & 2.65 \\ 7 | GeminiPro & 1.75 & 1.80 & 2.75 & 2.95 & 2.31 \\ 8 | Human & 3.00 & 3.00 & 2.25 & 2.95 & 2.80 \\ 9 | \bottomrule 10 | \end{tabular} 11 | -------------------------------------------------------------------------------- /data/experimental_results/question_answering/question_answering_scores.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{llllll} 2 | \toprule 3 | Model & completeness & soundness & clarity & compactness & average \\ 4 | \midrule 5 | Claude3 & 1.56 & 1.83 & 2.18 & 2.61 & 2.05 \\ 6 | GPT4 & 2.20 & 2.51 & 2.96 & 2.85 & 2.63 \\ 7 | GeminiPro & 1.81 & 2.15 & 2.83 & 2.95 & 2.44 \\ 8 | Human & 3.00 & 3.00 & 2.64 & 2.88 & 2.88 \\ 9 | \bottomrule 10 | \end{tabular} 11 | -------------------------------------------------------------------------------- /data/experimental_results/demo_validation/df_task_type_task_trajectory.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lll} 2 | \toprule 3 | Ablation & Setting & Accuracy \\ 4 | \midrule 5 | GeminiPro - TD+KF+SOP & true & 0.94 \\ 6 | GPT4 - TD+KF+SOP & shuffle & 0.65 \\ 7 | GPT4 - TD+KF+SOP & skip & 0.56 \\ 8 | GPT4 - TD+KF+SOP & true & 0.52 \\ 9 | GeminiPro - TD+KF+SOP & shuffle & 0.05 \\ 10 | GeminiPro - TD+KF+SOP & skip & 0.04 \\ 11 | \bottomrule 12 | \end{tabular} 13 | -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/0 @ 2023-12-25-15-10-58.json: -------------------------------------------------------------------------------- 1 | { 2 | "0 @ 2023-12-25-15-10-58": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "0 @ 2023-12-25-15-10-58", 8 | "704 @ 2024-01-07-20-32-15", 9 | "494 @ 2024-01-07-17-31-39" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/1 @ 2023-12-25-15-44-04.json: -------------------------------------------------------------------------------- 1 | { 2 | "1 @ 2023-12-25-15-44-04": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "1 @ 2023-12-25-15-44-04", 8 | "120 @ 2023-12-28-01-20-43", 9 | "494 @ 2023-12-30-23-48-17" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/3 @ 2023-12-25-15-31-51.json: -------------------------------------------------------------------------------- 1 | { 2 | "3 @ 2023-12-25-15-31-51": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "3 @ 2023-12-25-15-31-51", 8 | "545 @ 2023-12-28-00-29-57", 9 | "129 @ 2023-12-28-21-41-38" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/4 @ 2023-12-25-17-18-04.json: -------------------------------------------------------------------------------- 1 | { 2 | "4 @ 2023-12-25-17-18-04": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "4 @ 2023-12-25-17-18-04", 8 | "79 @ 2024-01-05-04-31-57", 9 | "204 @ 2024-01-14-00-54-20" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/5 @ 2024-01-05-02-02-06.json: -------------------------------------------------------------------------------- 1 | { 2 | "5 @ 2024-01-05-02-02-06": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "5 @ 2024-01-05-02-02-06", 8 | "122 @ 2023-12-28-01-25-17", 9 | "112 @ 2024-01-08-19-07-04" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/114 @ 2024-01-13-05-31-58.json: -------------------------------------------------------------------------------- 1 | { 2 | "114 @ 2024-01-13-05-31-58": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "114 @ 2024-01-13-05-31-58", 8 | "1 @ 2023-12-25-17-08-00", 9 | "4 @ 2024-01-05-02-01-20" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/115 @ 2024-01-13-05-34-10.json: -------------------------------------------------------------------------------- 1 | { 2 | "115 @ 2024-01-13-05-34-10": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "115 @ 2024-01-13-05-34-10", 8 | "15 @ 2023-12-25-16-13-48", 9 | "501 @ 2024-01-07-18-57-10" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/118 @ 2024-01-08-01-14-57.json: -------------------------------------------------------------------------------- 1 | { 2 | "118 @ 2024-01-08-01-14-57": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "118 @ 2024-01-08-01-14-57", 8 | "50 @ 2024-01-11-08-38-54", 9 | "233 @ 2024-01-08-03-29-03" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/122 @ 2024-01-13-06-01-40.json: -------------------------------------------------------------------------------- 1 | { 2 | "122 @ 2024-01-13-06-01-40": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "122 @ 2024-01-13-06-01-40", 8 | "215 @ 2024-01-13-06-52-53", 9 | "63 @ 2023-12-27-00-04-48" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/131 @ 2024-01-13-06-11-26.json: -------------------------------------------------------------------------------- 1 | { 2 | "131 @ 2024-01-13-06-11-26": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "131 @ 2024-01-13-06-11-26", 8 | "78 @ 2023-12-27-22-47-56", 9 | "706 @ 2024-01-02-11-32-59" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/14 @ 2023-12-25-17-33-45.json: -------------------------------------------------------------------------------- 1 | { 2 | "14 @ 2023-12-25-17-33-45": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "14 @ 2023-12-25-17-33-45", 8 | "487 @ 2023-12-28-10-35-29", 9 | "195 @ 2024-01-30-17-37-27" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/188 @ 2024-01-01-22-56-49.json: -------------------------------------------------------------------------------- 1 | { 2 | "188 @ 2024-01-01-22-56-49": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "188 @ 2024-01-01-22-56-49", 8 | "468 @ 2024-01-07-21-10-46", 9 | "51 @ 2023-12-26-23-53-42" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/21 @ 2024-01-05-02-16-47.json: -------------------------------------------------------------------------------- 1 | { 2 | "21 @ 2024-01-05-02-16-47": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "21 @ 2024-01-05-02-16-47", 8 | "385 @ 2023-12-28-13-47-48", 9 | "279 @ 2024-02-16-23-22-15" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/245 @ 2024-01-13-07-01-31.json: -------------------------------------------------------------------------------- 1 | { 2 | "245 @ 2024-01-13-07-01-31": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "245 @ 2024-01-13-07-01-31", 8 | "130 @ 2023-12-28-21-45-33", 9 | "1 @ 2023-12-25-17-08-00" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/279 @ 2024-01-09-02-08-52.json: -------------------------------------------------------------------------------- 1 | { 2 | "279 @ 2024-01-09-02-08-52": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "279 @ 2024-01-09-02-08-52", 8 | "49 @ 2024-01-05-04-06-16", 9 | "338 @ 2024-01-01-14-22-32" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/29 @ 2023-12-25-17-51-26.json: -------------------------------------------------------------------------------- 1 | { 2 | "29 @ 2023-12-25-17-51-26": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "29 @ 2023-12-25-17-51-26", 8 | "731 @ 2024-01-08-03-17-56", 9 | "649 @ 2024-01-03-16-18-01" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/319 @ 2024-01-01-00-19-57.json: -------------------------------------------------------------------------------- 1 | { 2 | "319 @ 2024-01-01-00-19-57": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "319 @ 2024-01-01-00-19-57", 8 | "48 @ 2023-12-26-23-44-04", 9 | "514 @ 2023-12-28-19-22-03" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/333 @ 2024-01-01-13-19-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "333 @ 2024-01-01-13-19-13": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "333 @ 2024-01-01-13-19-13", 8 | "188 @ 2024-01-13-18-07-49", 9 | "51 @ 2023-12-26-23-53-42" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/410 @ 2023-12-31-20-27-12.json: -------------------------------------------------------------------------------- 1 | { 2 | "410 @ 2023-12-31-20-27-12": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "410 @ 2023-12-31-20-27-12", 8 | "27 @ 2024-01-05-02-22-11", 9 | "636 @ 2023-12-31-16-31-59" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/42 @ 2024-01-13-05-03-47.json: -------------------------------------------------------------------------------- 1 | { 2 | "42 @ 2024-01-13-05-03-47": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "42 @ 2024-01-13-05-03-47", 8 | "703 @ 2024-01-05-19-56-50", 9 | "121 @ 2024-01-30-15-37-51" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/44 @ 2024-01-05-03-59-01.json: -------------------------------------------------------------------------------- 1 | { 2 | "44 @ 2024-01-05-03-59-01": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "44 @ 2024-01-05-03-59-01", 8 | "578 @ 2023-12-31-17-45-58", 9 | "182 @ 2023-12-28-11-17-34" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/45 @ 2024-01-08-14-51-24.json: -------------------------------------------------------------------------------- 1 | { 2 | "45 @ 2024-01-08-14-51-24": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "45 @ 2024-01-08-14-51-24", 8 | "592 @ 2023-12-25-09-30-51", 9 | "659 @ 2023-12-25-09-41-21" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/47 @ 2023-12-31-18-24-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "47 @ 2023-12-31-18-24-54": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "47 @ 2023-12-31-18-24-54", 8 | "51 @ 2023-12-26-23-53-42", 9 | "467 @ 2023-12-26-09-53-15" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/499 @ 2024-01-07-16-03-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "499 @ 2024-01-07-16-03-13": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "499 @ 2024-01-07-16-03-13", 8 | "209 @ 2024-01-11-06-49-21", 9 | "94 @ 2024-01-08-18-24-10" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/512 @ 2023-12-26-23-44-03.json: -------------------------------------------------------------------------------- 1 | { 2 | "512 @ 2023-12-26-23-44-03": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "512 @ 2023-12-26-23-44-03", 8 | "515 @ 2023-12-28-19-23-18", 9 | "51 @ 2023-12-31-18-35-38" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/545 @ 2024-01-28-09-10-43.json: -------------------------------------------------------------------------------- 1 | { 2 | "545 @ 2024-01-28-09-10-43": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "545 @ 2024-01-28-09-10-43", 8 | "217 @ 2024-01-11-07-23-30", 9 | "42 @ 2023-12-27-13-30-23" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/607 @ 2023-12-31-13-30-17.json: -------------------------------------------------------------------------------- 1 | { 2 | "607 @ 2023-12-31-13-30-17": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "607 @ 2023-12-31-13-30-17", 8 | "28 @ 2023-12-25-21-04-23", 9 | "601 @ 2023-12-30-22-56-49" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/62 @ 2023-12-26-23-59-25.json: -------------------------------------------------------------------------------- 1 | { 2 | "62 @ 2023-12-26-23-59-25": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "62 @ 2023-12-26-23-59-25", 8 | "244 @ 2023-12-29-16-24-12", 9 | "208 @ 2023-12-31-15-19-52" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/622 @ 2024-01-01-19-21-28.json: -------------------------------------------------------------------------------- 1 | { 2 | "622 @ 2024-01-01-19-21-28": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "622 @ 2024-01-01-19-21-28", 8 | "30 @ 2023-12-25-21-44-29", 9 | "727 @ 2024-01-16-04-10-03" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/63 @ 2023-12-27-00-04-48.json: -------------------------------------------------------------------------------- 1 | { 2 | "63 @ 2023-12-27-00-04-48": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "63 @ 2023-12-27-00-04-48", 8 | "202 @ 2023-12-29-15-21-47", 9 | "12 @ 2024-01-13-05-00-49" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/64 @ 2023-12-27-00-12-03.json: -------------------------------------------------------------------------------- 1 | { 2 | "64 @ 2023-12-27-00-12-03": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "64 @ 2023-12-27-00-12-03", 8 | "679 @ 2024-01-07-18-09-46", 9 | "713 @ 2024-01-13-02-38-35" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/652 @ 2024-01-03-16-23-50.json: -------------------------------------------------------------------------------- 1 | { 2 | "652 @ 2024-01-03-16-23-50": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "652 @ 2024-01-03-16-23-50", 8 | "27 @ 2023-12-25-17-40-36", 9 | "631 @ 2023-12-31-16-20-17" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/69 @ 2024-01-05-04-25-33.json: -------------------------------------------------------------------------------- 1 | { 2 | "69 @ 2024-01-05-04-25-33": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "69 @ 2024-01-05-04-25-33", 8 | "730 @ 2024-01-04-20-06-32", 9 | "625 @ 2024-01-03-21-28-54" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/709 @ 2024-01-07-20-50-00.json: -------------------------------------------------------------------------------- 1 | { 2 | "709 @ 2024-01-07-20-50-00": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "709 @ 2024-01-07-20-50-00", 8 | "497 @ 2024-01-07-01-50-19", 9 | "14 @ 2023-12-25-16-12-37" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/711 @ 2023-12-31-03-10-51.json: -------------------------------------------------------------------------------- 1 | { 2 | "711 @ 2023-12-31-03-10-51": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "711 @ 2023-12-31-03-10-51", 8 | "544 @ 2024-02-04-14-41-19", 9 | "2 @ 2023-12-25-15-27-33" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/721 @ 2024-01-06-15-15-58.json: -------------------------------------------------------------------------------- 1 | { 2 | "721 @ 2024-01-06-15-15-58": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "721 @ 2024-01-06-15-15-58", 8 | "67 @ 2024-01-13-23-36-04", 9 | "729 @ 2024-01-08-03-11-29" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/769 @ 2024-01-06-12-39-15.json: -------------------------------------------------------------------------------- 1 | { 2 | "769 @ 2024-01-06-12-39-15": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "769 @ 2024-01-06-12-39-15", 8 | "700 @ 2024-01-02-10-36-20", 9 | "79 @ 2023-12-27-22-50-34" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/79 @ 2023-12-27-22-50-34.json: -------------------------------------------------------------------------------- 1 | { 2 | "79 @ 2023-12-27-22-50-34": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "79 @ 2023-12-27-22-50-34", 8 | "289 @ 2023-12-29-17-06-13", 9 | "775 @ 2024-01-07-15-27-19" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/94 @ 2024-01-08-18-24-10.json: -------------------------------------------------------------------------------- 1 | { 2 | "94 @ 2024-01-08-18-24-10": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "94 @ 2024-01-08-18-24-10", 8 | "1 @ 2024-01-05-01-57-42", 9 | "344 @ 2024-01-07-17-41-10" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/106 @ 2023-12-30-12-35-44.json: -------------------------------------------------------------------------------- 1 | { 2 | "106 @ 2023-12-30-12-35-44": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "106 @ 2023-12-30-12-35-44", 8 | "415 @ 2024-02-20-21-29-42", 9 | "417 @ 2023-12-29-08-28-22" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/107 @ 2024-01-13-05-19-45.json: -------------------------------------------------------------------------------- 1 | { 2 | "107 @ 2024-01-13-05-19-45": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "107 @ 2024-01-13-05-19-45", 8 | "457 @ 2023-12-31-20-09-45", 9 | "202 @ 2024-01-14-00-45-14" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/116 @ 2024-01-16-09-09-51.json: -------------------------------------------------------------------------------- 1 | { 2 | "116 @ 2024-01-16-09-09-51": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "116 @ 2024-01-16-09-09-51", 8 | "709 @ 2023-12-31-03-06-35", 9 | "503 @ 2023-12-28-11-00-33" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/117 @ 2023-12-31-23-34-18.json: -------------------------------------------------------------------------------- 1 | { 2 | "117 @ 2023-12-31-23-34-18": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "117 @ 2023-12-31-23-34-18", 8 | "693 @ 2023-12-29-03-39-36", 9 | "143 @ 2024-01-08-01-57-37" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/125 @ 2023-12-27-12-53-48.json: -------------------------------------------------------------------------------- 1 | { 2 | "125 @ 2023-12-27-12-53-48": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "125 @ 2023-12-27-12-53-48", 8 | "438 @ 2024-02-21-09-39-29", 9 | "146 @ 2023-12-28-23-05-07" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/127 @ 2024-01-08-19-59-09.json: -------------------------------------------------------------------------------- 1 | { 2 | "127 @ 2024-01-08-19-59-09": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "127 @ 2024-01-08-19-59-09", 8 | "543 @ 2024-01-07-02-15-29", 9 | "344 @ 2023-12-29-18-33-02" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/133 @ 2024-01-14-06-02-33.json: -------------------------------------------------------------------------------- 1 | { 2 | "133 @ 2024-01-14-06-02-33": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "133 @ 2024-01-14-06-02-33", 8 | "349 @ 2024-01-06-18-57-30", 9 | "389 @ 2023-12-31-21-16-55" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/143 @ 2024-01-08-01-57-37.json: -------------------------------------------------------------------------------- 1 | { 2 | "143 @ 2024-01-08-01-57-37": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "143 @ 2024-01-08-01-57-37", 8 | "333 @ 2024-02-09-18-38-11", 9 | "690 @ 2024-01-06-13-45-40" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/147 @ 2024-01-08-02-17-03.json: -------------------------------------------------------------------------------- 1 | { 2 | "147 @ 2024-01-08-02-17-03": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "147 @ 2024-01-08-02-17-03", 8 | "361 @ 2024-01-05-15-01-56", 9 | "141 @ 2024-01-08-01-51-42" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/156 @ 2023-12-27-12-13-15.json: -------------------------------------------------------------------------------- 1 | { 2 | "156 @ 2023-12-27-12-13-15": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "156 @ 2023-12-27-12-13-15", 8 | "169 @ 2023-12-31-01-35-46", 9 | "593 @ 2023-12-25-09-31-55" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/157 @ 2023-12-27-22-58-25.json: -------------------------------------------------------------------------------- 1 | { 2 | "157 @ 2023-12-27-22-58-25": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "157 @ 2023-12-27-22-58-25", 8 | "695 @ 2024-01-05-19-33-52", 9 | "110 @ 2023-12-28-00-59-44" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/162 @ 2024-01-08-02-53-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "162 @ 2024-01-08-02-53-13": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "162 @ 2024-01-08-02-53-13", 8 | "283 @ 2024-02-02-13-15-48", 9 | "167 @ 2024-01-13-18-04-58" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/163 @ 2024-01-13-17-57-52.json: -------------------------------------------------------------------------------- 1 | { 2 | "163 @ 2024-01-13-17-57-52": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "163 @ 2024-01-13-17-57-52", 8 | "520 @ 2024-01-07-20-49-03", 9 | "281 @ 2024-02-18-06-05-33" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/171 @ 2023-12-31-01-42-52.json: -------------------------------------------------------------------------------- 1 | { 2 | "171 @ 2023-12-31-01-42-52": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "171 @ 2023-12-31-01-42-52", 8 | "207 @ 2024-01-14-06-27-55", 9 | "475 @ 2024-02-05-21-25-34" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/177 @ 2023-12-28-09-18-21.json: -------------------------------------------------------------------------------- 1 | { 2 | "177 @ 2023-12-28-09-18-21": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "177 @ 2023-12-28-09-18-21", 8 | "171 @ 2024-01-30-18-11-31", 9 | "294 @ 2024-01-06-17-45-29" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/179 @ 2023-12-28-11-13-06.json: -------------------------------------------------------------------------------- 1 | { 2 | "179 @ 2023-12-28-11-13-06": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "179 @ 2023-12-28-11-13-06", 8 | "483 @ 2023-12-26-09-27-35", 9 | "590 @ 2024-01-08-02-35-26" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/185 @ 2023-12-28-17-09-28.json: -------------------------------------------------------------------------------- 1 | { 2 | "185 @ 2023-12-28-17-09-28": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "185 @ 2023-12-28-17-09-28", 8 | "204 @ 2023-12-31-16-30-47", 9 | "706 @ 2024-01-02-11-32-59" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/194 @ 2024-01-13-06-23-00.json: -------------------------------------------------------------------------------- 1 | { 2 | "194 @ 2024-01-13-06-23-00": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "194 @ 2024-01-13-06-23-00", 8 | "782 @ 2024-01-13-04-10-24", 9 | "215 @ 2024-02-04-17-30-11" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/203 @ 2024-01-13-06-37-19.json: -------------------------------------------------------------------------------- 1 | { 2 | "203 @ 2024-01-13-06-37-19": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "203 @ 2024-01-13-06-37-19", 8 | "454 @ 2024-02-05-16-34-32", 9 | "540 @ 2024-02-05-20-55-46" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/205 @ 2023-12-30-13-02-59.json: -------------------------------------------------------------------------------- 1 | { 2 | "205 @ 2023-12-30-13-02-59": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "205 @ 2023-12-30-13-02-59", 8 | "420 @ 2024-01-06-21-39-29", 9 | "311 @ 2023-12-31-23-06-53" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/212 @ 2023-12-31-15-14-14.json: -------------------------------------------------------------------------------- 1 | { 2 | "212 @ 2023-12-31-15-14-14": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "212 @ 2023-12-31-15-14-14", 8 | "212 @ 2023-12-29-15-51-41", 9 | "545 @ 2024-01-28-09-10-43" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/216 @ 2023-12-31-16-12-31.json: -------------------------------------------------------------------------------- 1 | { 2 | "216 @ 2023-12-31-16-12-31": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "216 @ 2023-12-31-16-12-31", 8 | "215 @ 2024-01-11-07-13-15", 9 | "486 @ 2023-12-28-10-33-57" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/226 @ 2023-12-27-12-59-37.json: -------------------------------------------------------------------------------- 1 | { 2 | "226 @ 2023-12-27-12-59-37": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "226 @ 2023-12-27-12-59-37", 8 | "147 @ 2024-01-01-00-00-32", 9 | "467 @ 2023-12-26-09-53-15" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/228 @ 2023-12-26-22-02-47.json: -------------------------------------------------------------------------------- 1 | { 2 | "228 @ 2023-12-26-22-02-47": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "228 @ 2023-12-26-22-02-47", 8 | "355 @ 2024-01-01-14-54-14", 9 | "159 @ 2024-01-13-11-13-30" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/235 @ 2024-01-01-00-13-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "235 @ 2024-01-01-00-13-13": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "235 @ 2024-01-01-00-13-13", 8 | "322 @ 2024-02-08-11-46-16", 9 | "362 @ 2023-12-31-18-56-59" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/240 @ 2024-01-08-03-34-34.json: -------------------------------------------------------------------------------- 1 | { 2 | "240 @ 2024-01-08-03-34-34": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "240 @ 2024-01-08-03-34-34", 8 | "274 @ 2023-12-27-09-09-52", 9 | "326 @ 2024-02-08-11-54-47" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/258 @ 2023-12-30-14-42-44.json: -------------------------------------------------------------------------------- 1 | { 2 | "258 @ 2023-12-30-14-42-44": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "258 @ 2023-12-30-14-42-44", 8 | "522 @ 2024-01-06-22-02-23", 9 | "567 @ 2024-01-07-16-35-13" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/259 @ 2023-12-30-14-47-53.json: -------------------------------------------------------------------------------- 1 | { 2 | "259 @ 2023-12-30-14-47-53": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "259 @ 2023-12-30-14-47-53", 8 | "452 @ 2024-02-07-20-03-30", 9 | "411 @ 2023-12-25-10-30-07" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/260 @ 2024-01-01-14-11-28.json: -------------------------------------------------------------------------------- 1 | { 2 | "260 @ 2024-01-01-14-11-28": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "260 @ 2024-01-01-14-11-28", 8 | "264 @ 2024-02-22-07-37-35", 9 | "322 @ 2024-02-08-11-46-16" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/276 @ 2023-12-26-22-12-50.json: -------------------------------------------------------------------------------- 1 | { 2 | "276 @ 2023-12-26-22-12-50": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "276 @ 2023-12-26-22-12-50", 8 | "163 @ 2024-01-13-17-57-52", 9 | "319 @ 2024-01-01-00-19-57" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/280 @ 2024-01-09-02-10-40.json: -------------------------------------------------------------------------------- 1 | { 2 | "280 @ 2024-01-09-02-10-40": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "280 @ 2024-01-09-02-10-40", 8 | "586 @ 2023-12-27-11-21-14", 9 | "360 @ 2024-01-05-14-58-44" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/281 @ 2024-02-18-06-05-33.json: -------------------------------------------------------------------------------- 1 | { 2 | "281 @ 2024-02-18-06-05-33": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "281 @ 2024-02-18-06-05-33", 8 | "323 @ 2024-01-01-16-35-18", 9 | "239 @ 2024-01-08-03-33-42" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/282 @ 2024-01-09-02-16-45.json: -------------------------------------------------------------------------------- 1 | { 2 | "282 @ 2024-01-09-02-16-45": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "282 @ 2024-01-09-02-16-45", 8 | "274 @ 2024-02-07-15-51-13", 9 | "689 @ 2023-12-29-03-36-01" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/285 @ 2024-01-02-19-53-59.json: -------------------------------------------------------------------------------- 1 | { 2 | "285 @ 2024-01-02-19-53-59": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "285 @ 2024-01-02-19-53-59", 8 | "334 @ 2024-01-02-14-57-29", 9 | "261 @ 2024-01-08-03-38-00" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/296 @ 2024-01-14-06-32-18.json: -------------------------------------------------------------------------------- 1 | { 2 | "296 @ 2024-01-14-06-32-18": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "296 @ 2024-01-14-06-32-18", 8 | "174 @ 2024-01-08-12-04-28", 9 | "175 @ 2024-01-08-12-10-16" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/299 @ 2024-01-09-02-51-58.json: -------------------------------------------------------------------------------- 1 | { 2 | "299 @ 2024-01-09-02-51-58": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "299 @ 2024-01-09-02-51-58", 8 | "436 @ 2024-02-21-09-25-00", 9 | "161 @ 2024-01-13-17-53-56" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/317 @ 2023-12-31-23-22-24.json: -------------------------------------------------------------------------------- 1 | { 2 | "317 @ 2023-12-31-23-22-24": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "317 @ 2023-12-31-23-22-24", 8 | "445 @ 2023-12-25-12-46-03", 9 | "296 @ 2023-12-30-15-17-42" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/322 @ 2024-01-02-16-28-59.json: -------------------------------------------------------------------------------- 1 | { 2 | "322 @ 2024-01-02-16-28-59": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "322 @ 2024-01-02-16-28-59", 8 | "280 @ 2024-01-09-02-10-40", 9 | "519 @ 2023-12-28-19-42-44" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/323 @ 2024-01-02-16-31-55.json: -------------------------------------------------------------------------------- 1 | { 2 | "323 @ 2024-01-02-16-31-55": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "323 @ 2024-01-02-16-31-55", 8 | "572 @ 2023-12-27-09-22-10", 9 | "144 @ 2023-12-31-23-46-12" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/327 @ 2023-12-26-22-37-29.json: -------------------------------------------------------------------------------- 1 | { 2 | "327 @ 2023-12-26-22-37-29": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "327 @ 2023-12-26-22-37-29", 8 | "143 @ 2024-01-12-23-46-06", 9 | "528 @ 2024-02-22-20-27-46" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/332 @ 2024-01-02-15-52-51.json: -------------------------------------------------------------------------------- 1 | { 2 | "332 @ 2024-01-02-15-52-51": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "332 @ 2024-01-02-15-52-51", 8 | "466 @ 2023-12-26-09-52-38", 9 | "519 @ 2024-01-07-20-48-36" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/336 @ 2024-01-04-15-02-17.json: -------------------------------------------------------------------------------- 1 | { 2 | "336 @ 2024-01-04-15-02-17": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "336 @ 2024-01-04-15-02-17", 8 | "328 @ 2023-12-26-22-39-55", 9 | "325 @ 2024-01-01-17-07-42" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/343 @ 2023-12-30-17-04-17.json: -------------------------------------------------------------------------------- 1 | { 2 | "343 @ 2023-12-30-17-04-17": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "343 @ 2023-12-30-17-04-17", 8 | "747 @ 2024-01-07-12-29-06", 9 | "810 @ 2024-01-14-05-46-56" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/345 @ 2023-12-29-18-38-39.json: -------------------------------------------------------------------------------- 1 | { 2 | "345 @ 2023-12-29-18-38-39": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "345 @ 2023-12-29-18-38-39", 8 | "709 @ 2023-12-31-03-06-35", 9 | "775 @ 2024-02-15-01-14-54" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/350 @ 2024-01-04-18-16-40.json: -------------------------------------------------------------------------------- 1 | { 2 | "350 @ 2024-01-04-18-16-40": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "350 @ 2024-01-04-18-16-40", 8 | "594 @ 2023-12-28-03-50-23", 9 | "750 @ 2024-01-06-21-43-28" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/357 @ 2024-01-06-14-24-18.json: -------------------------------------------------------------------------------- 1 | { 2 | "357 @ 2024-01-06-14-24-18": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "357 @ 2024-01-06-14-24-18", 8 | "445 @ 2024-01-06-21-46-59", 9 | "663 @ 2023-12-29-03-13-37" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/360 @ 2023-12-31-18-49-53.json: -------------------------------------------------------------------------------- 1 | { 2 | "360 @ 2023-12-31-18-49-53": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "360 @ 2023-12-31-18-49-53", 8 | "240 @ 2023-12-29-09-15-43", 9 | "656 @ 2024-01-03-00-27-44" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/374 @ 2024-01-07-00-51-38.json: -------------------------------------------------------------------------------- 1 | { 2 | "374 @ 2024-01-07-00-51-38": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "374 @ 2024-01-07-00-51-38", 8 | "773 @ 2024-01-16-05-39-20", 9 | "454 @ 2023-12-31-19-55-06" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/384 @ 2024-01-05-15-10-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "384 @ 2024-01-05-15-10-54": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "384 @ 2024-01-05-15-10-54", 8 | "276 @ 2024-02-07-15-52-28", 9 | "333 @ 2024-01-01-19-58-42" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/385 @ 2023-12-28-13-47-48.json: -------------------------------------------------------------------------------- 1 | { 2 | "385 @ 2023-12-28-13-47-48": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "385 @ 2023-12-28-13-47-48", 8 | "329 @ 2024-01-02-15-37-56", 9 | "150 @ 2024-01-13-10-55-39" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/386 @ 2024-01-05-15-23-56.json: -------------------------------------------------------------------------------- 1 | { 2 | "386 @ 2024-01-05-15-23-56": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "386 @ 2024-01-05-15-23-56", 8 | "241 @ 2024-01-08-03-36-07", 9 | "573 @ 2023-12-28-23-27-25" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/393 @ 2023-12-30-17-40-03.json: -------------------------------------------------------------------------------- 1 | { 2 | "393 @ 2023-12-30-17-40-03": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "393 @ 2023-12-30-17-40-03", 8 | "785 @ 2024-01-06-21-06-28", 9 | "135 @ 2024-01-08-12-59-49" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/402 @ 2024-01-06-18-26-07.json: -------------------------------------------------------------------------------- 1 | { 2 | "402 @ 2024-01-06-18-26-07": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "402 @ 2024-01-06-18-26-07", 8 | "636 @ 2023-12-31-16-31-59", 9 | "609 @ 2024-01-03-20-32-48" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/407 @ 2023-12-30-20-51-23.json: -------------------------------------------------------------------------------- 1 | { 2 | "407 @ 2023-12-30-20-51-23": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "407 @ 2023-12-30-20-51-23", 8 | "625 @ 2024-01-01-19-33-24", 9 | "637 @ 2024-01-01-19-56-36" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/412 @ 2023-12-25-10-32-50.json: -------------------------------------------------------------------------------- 1 | { 2 | "412 @ 2023-12-25-10-32-50": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "412 @ 2023-12-25-10-32-50", 8 | "294 @ 2023-12-30-15-07-58", 9 | "305 @ 2023-12-30-15-31-37" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/413 @ 2023-12-25-10-36-22.json: -------------------------------------------------------------------------------- 1 | { 2 | "413 @ 2023-12-25-10-36-22": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "413 @ 2023-12-25-10-36-22", 8 | "442 @ 2023-12-25-12-41-40", 9 | "753 @ 2024-01-07-13-01-24" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/417 @ 2023-12-29-08-28-22.json: -------------------------------------------------------------------------------- 1 | { 2 | "417 @ 2023-12-29-08-28-22": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "417 @ 2023-12-29-08-28-22", 8 | "390 @ 2024-01-06-14-41-16", 9 | "450 @ 2024-01-06-13-00-03" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/422 @ 2023-12-25-12-33-35.json: -------------------------------------------------------------------------------- 1 | { 2 | "422 @ 2023-12-25-12-33-35": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "422 @ 2023-12-25-12-33-35", 8 | "744 @ 2023-12-31-03-47-20", 9 | "316 @ 2024-02-17-14-28-33" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/438 @ 2024-01-05-10-41-27.json: -------------------------------------------------------------------------------- 1 | { 2 | "438 @ 2024-01-05-10-41-27": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "438 @ 2024-01-05-10-41-27", 8 | "353 @ 2024-01-01-22-40-26", 9 | "319 @ 2024-01-06-13-22-12" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/446 @ 2024-01-06-21-47-36.json: -------------------------------------------------------------------------------- 1 | { 2 | "446 @ 2024-01-06-21-47-36": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "446 @ 2024-01-06-21-47-36", 8 | "443 @ 2023-12-25-12-42-53", 9 | "483 @ 2024-02-09-10-45-06" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/450 @ 2023-12-25-13-19-39.json: -------------------------------------------------------------------------------- 1 | { 2 | "450 @ 2023-12-25-13-19-39": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "450 @ 2023-12-25-13-19-39", 8 | "579 @ 2023-12-25-09-25-08", 9 | "102 @ 2023-12-27-00-57-01" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/457 @ 2023-12-28-09-01-06.json: -------------------------------------------------------------------------------- 1 | { 2 | "457 @ 2023-12-28-09-01-06": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "457 @ 2023-12-28-09-01-06", 8 | "503 @ 2024-01-01-11-01-44", 9 | "456 @ 2024-01-28-08-49-23" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/461 @ 2023-12-28-09-48-06.json: -------------------------------------------------------------------------------- 1 | { 2 | "461 @ 2023-12-28-09-48-06": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "461 @ 2023-12-28-09-48-06", 8 | "472 @ 2024-01-07-18-49-32", 9 | "115 @ 2023-12-31-10-24-48" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/464 @ 2023-12-28-10-08-46.json: -------------------------------------------------------------------------------- 1 | { 2 | "464 @ 2023-12-28-10-08-46": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "464 @ 2023-12-28-10-08-46", 8 | "292 @ 2024-02-18-07-00-54", 9 | "244 @ 2024-01-06-23-50-16" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/468 @ 2024-01-06-15-26-01.json: -------------------------------------------------------------------------------- 1 | { 2 | "468 @ 2024-01-06-15-26-01": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "468 @ 2024-01-06-15-26-01", 8 | "333 @ 2024-01-01-13-19-13", 9 | "466 @ 2024-02-09-14-31-19" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/472 @ 2024-01-07-01-30-10.json: -------------------------------------------------------------------------------- 1 | { 2 | "472 @ 2024-01-07-01-30-10": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "472 @ 2024-01-07-01-30-10", 8 | "701 @ 2023-12-31-02-48-17", 9 | "462 @ 2024-02-05-17-10-26" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/477 @ 2023-12-26-09-10-42.json: -------------------------------------------------------------------------------- 1 | { 2 | "477 @ 2023-12-26-09-10-42": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "477 @ 2023-12-26-09-10-42", 8 | "310 @ 2023-12-31-12-32-17", 9 | "412 @ 2024-01-08-00-03-39" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/480 @ 2023-12-26-09-12-33.json: -------------------------------------------------------------------------------- 1 | { 2 | "480 @ 2023-12-26-09-12-33": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "480 @ 2023-12-26-09-12-33", 8 | "303 @ 2024-02-06-23-11-24", 9 | "658 @ 2023-12-31-18-41-07" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/483 @ 2023-12-26-09-27-35.json: -------------------------------------------------------------------------------- 1 | { 2 | "483 @ 2023-12-26-09-27-35": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "483 @ 2023-12-26-09-27-35", 8 | "752 @ 2024-01-06-22-02-01", 9 | "182 @ 2023-12-26-21-48-56" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/489 @ 2023-12-28-10-38-09.json: -------------------------------------------------------------------------------- 1 | { 2 | "489 @ 2023-12-28-10-38-09": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "489 @ 2023-12-28-10-38-09", 8 | "116 @ 2024-01-16-09-09-51", 9 | "157 @ 2023-12-31-10-02-20" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/503 @ 2023-12-28-11-00-33.json: -------------------------------------------------------------------------------- 1 | { 2 | "503 @ 2023-12-28-11-00-33": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "503 @ 2023-12-28-11-00-33", 8 | "120 @ 2024-01-08-19-46-04", 9 | "496 @ 2024-02-17-14-48-36" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/508 @ 2023-12-26-14-46-44.json: -------------------------------------------------------------------------------- 1 | { 2 | "508 @ 2023-12-26-14-46-44": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "508 @ 2023-12-26-14-46-44", 8 | "437 @ 2024-02-21-09-33-06", 9 | "338 @ 2024-01-04-15-35-31" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/510 @ 2023-12-26-23-32-50.json: -------------------------------------------------------------------------------- 1 | { 2 | "510 @ 2023-12-26-23-32-50": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "510 @ 2023-12-26-23-32-50", 8 | "164 @ 2024-01-16-06-25-25", 9 | "124 @ 2023-12-27-12-39-02" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/519 @ 2023-12-26-23-48-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "519 @ 2023-12-26-23-48-13": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "519 @ 2023-12-26-23-48-13", 8 | "657 @ 2023-12-29-12-34-37", 9 | "324 @ 2024-02-22-12-20-23" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/521 @ 2024-01-07-20-51-07.json: -------------------------------------------------------------------------------- 1 | { 2 | "521 @ 2024-01-07-20-51-07": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "521 @ 2024-01-07-20-51-07", 8 | "264 @ 2024-02-16-22-48-20", 9 | "519 @ 2024-02-22-19-06-38" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/522 @ 2024-01-05-16-49-40.json: -------------------------------------------------------------------------------- 1 | { 2 | "522 @ 2024-01-05-16-49-40": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "522 @ 2024-01-05-16-49-40", 8 | "304 @ 2024-01-30-18-14-09", 9 | "594 @ 2023-12-25-09-33-33" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/524 @ 2023-12-25-08-31-30.json: -------------------------------------------------------------------------------- 1 | { 2 | "524 @ 2023-12-25-08-31-30": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "524 @ 2023-12-25-08-31-30", 8 | "477 @ 2024-01-06-21-54-29", 9 | "419 @ 2024-01-07-20-34-53" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/534 @ 2023-12-25-08-48-07.json: -------------------------------------------------------------------------------- 1 | { 2 | "534 @ 2023-12-25-08-48-07": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "534 @ 2023-12-25-08-48-07", 8 | "533 @ 2024-01-06-22-11-08", 9 | "106 @ 2024-01-14-06-00-24" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/539 @ 2024-01-07-17-04-44.json: -------------------------------------------------------------------------------- 1 | { 2 | "539 @ 2024-01-07-17-04-44": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "539 @ 2024-01-07-17-04-44", 8 | "680 @ 2023-12-28-20-27-04", 9 | "543 @ 2024-01-07-02-15-29" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/547 @ 2023-12-28-14-24-32.json: -------------------------------------------------------------------------------- 1 | { 2 | "547 @ 2023-12-28-14-24-32": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "547 @ 2023-12-28-14-24-32", 8 | "499 @ 2023-12-31-20-37-14", 9 | "505 @ 2024-01-07-19-01-31" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/571 @ 2023-12-27-09-20-35.json: -------------------------------------------------------------------------------- 1 | { 2 | "571 @ 2023-12-27-09-20-35": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "571 @ 2023-12-27-09-20-35", 8 | "516 @ 2023-12-28-19-24-43", 9 | "277 @ 2024-01-06-13-07-30" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/578 @ 2023-12-25-09-23-43.json: -------------------------------------------------------------------------------- 1 | { 2 | "578 @ 2023-12-25-09-23-43": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "578 @ 2023-12-25-09-23-43", 8 | "784 @ 2023-12-31-04-33-28", 9 | "104 @ 2024-01-14-05-57-58" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/579 @ 2023-12-25-09-25-08.json: -------------------------------------------------------------------------------- 1 | { 2 | "579 @ 2023-12-25-09-25-08": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "579 @ 2023-12-25-09-25-08", 8 | "451 @ 2024-02-16-20-02-14", 9 | "414 @ 2024-01-08-00-08-23" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/580 @ 2024-01-03-13-39-07.json: -------------------------------------------------------------------------------- 1 | { 2 | "580 @ 2024-01-03-13-39-07": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "580 @ 2024-01-03-13-39-07", 8 | "636 @ 2024-02-05-14-11-46", 9 | "633 @ 2024-02-05-14-07-41" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/585 @ 2024-01-09-20-18-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "585 @ 2024-01-09-20-18-54": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "585 @ 2024-01-09-20-18-54", 8 | "240 @ 2023-12-29-09-15-43", 9 | "189 @ 2023-12-29-09-48-19" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/593 @ 2023-12-25-09-31-55.json: -------------------------------------------------------------------------------- 1 | { 2 | "593 @ 2023-12-25-09-31-55": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "593 @ 2023-12-25-09-31-55", 8 | "482 @ 2024-02-09-10-43-08", 9 | "395 @ 2023-12-30-18-11-43" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/595 @ 2024-01-06-19-48-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "595 @ 2024-01-06-19-48-54": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "595 @ 2024-01-06-19-48-54", 8 | "581 @ 2024-01-03-13-44-30", 9 | "627 @ 2024-01-08-01-14-35" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/601 @ 2023-12-30-22-56-49.json: -------------------------------------------------------------------------------- 1 | { 2 | "601 @ 2023-12-30-22-56-49": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "601 @ 2023-12-30-22-56-49", 8 | "647 @ 2024-01-01-09-41-54", 9 | "642 @ 2024-01-08-01-51-36" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/613 @ 2023-12-31-13-36-38.json: -------------------------------------------------------------------------------- 1 | { 2 | "613 @ 2023-12-31-13-36-38": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "613 @ 2023-12-31-13-36-38", 8 | "646 @ 2024-01-04-12-06-45", 9 | "652 @ 2024-01-03-16-23-50" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/627 @ 2024-01-01-19-39-28.json: -------------------------------------------------------------------------------- 1 | { 2 | "627 @ 2024-01-01-19-39-28": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "627 @ 2024-01-01-19-39-28", 8 | "734 @ 2024-01-04-20-21-44", 9 | "652 @ 2024-01-03-16-23-50" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/630 @ 2023-12-31-16-19-07.json: -------------------------------------------------------------------------------- 1 | { 2 | "630 @ 2023-12-31-16-19-07": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "630 @ 2023-12-31-16-19-07", 8 | "719 @ 2024-01-04-23-27-19", 9 | "401 @ 2024-02-08-23-28-18" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/637 @ 2024-01-01-19-56-36.json: -------------------------------------------------------------------------------- 1 | { 2 | "637 @ 2024-01-01-19-56-36": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "637 @ 2024-01-01-19-56-36", 8 | "714 @ 2024-01-04-13-30-33", 9 | "607 @ 2024-01-03-20-29-09" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/644 @ 2024-01-01-09-28-58.json: -------------------------------------------------------------------------------- 1 | { 2 | "644 @ 2024-01-01-09-28-58": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "644 @ 2024-01-01-09-28-58", 8 | "648 @ 2024-01-01-20-12-53", 9 | "605 @ 2024-02-05-14-05-58" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/645 @ 2024-01-03-16-11-31.json: -------------------------------------------------------------------------------- 1 | { 2 | "645 @ 2024-01-03-16-11-31": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "645 @ 2024-01-03-16-11-31", 8 | "406 @ 2024-02-17-14-39-43", 9 | "731 @ 2024-01-16-08-17-15" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/646 @ 2024-01-03-16-13-26.json: -------------------------------------------------------------------------------- 1 | { 2 | "646 @ 2024-01-03-16-13-26": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "646 @ 2024-01-03-16-13-26", 8 | "624 @ 2024-01-06-19-37-28", 9 | "614 @ 2023-12-31-13-37-15" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/647 @ 2024-01-01-09-41-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "647 @ 2024-01-01-09-41-54": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "647 @ 2024-01-01-09-41-54", 8 | "715 @ 2024-01-04-23-17-58", 9 | "409 @ 2023-12-30-21-06-19" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/648 @ 2024-01-01-09-42-54.json: -------------------------------------------------------------------------------- 1 | { 2 | "648 @ 2024-01-01-09-42-54": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "648 @ 2024-01-01-09-42-54", 8 | "629 @ 2024-01-01-19-50-08", 9 | "400 @ 2023-12-31-20-24-32" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/649 @ 2024-01-01-09-44-07.json: -------------------------------------------------------------------------------- 1 | { 2 | "649 @ 2024-01-01-09-44-07": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "649 @ 2024-01-01-09-44-07", 8 | "600 @ 2024-01-01-18-15-53", 9 | "630 @ 2024-01-08-01-22-43" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/654 @ 2023-12-27-23-19-13.json: -------------------------------------------------------------------------------- 1 | { 2 | "654 @ 2023-12-27-23-19-13": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "654 @ 2023-12-27-23-19-13", 8 | "146 @ 2023-12-28-23-05-07", 9 | "319 @ 2023-12-30-20-22-58" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/661 @ 2023-12-25-09-45-59.json: -------------------------------------------------------------------------------- 1 | { 2 | "661 @ 2023-12-25-09-45-59": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "661 @ 2023-12-25-09-45-59", 8 | "175 @ 2024-01-08-12-10-16", 9 | "801 @ 2024-01-14-04-48-36" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/669 @ 2023-12-25-09-59-18.json: -------------------------------------------------------------------------------- 1 | { 2 | "669 @ 2023-12-25-09-59-18": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "669 @ 2023-12-25-09-59-18", 8 | "350 @ 2024-02-08-22-39-41", 9 | "592 @ 2024-01-08-02-38-47" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/677 @ 2023-12-28-20-24-49.json: -------------------------------------------------------------------------------- 1 | { 2 | "677 @ 2023-12-28-20-24-49": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "677 @ 2023-12-28-20-24-49", 8 | "775 @ 2024-01-02-15-10-49", 9 | "459 @ 2024-01-07-18-38-30" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/693 @ 2023-12-29-03-39-36.json: -------------------------------------------------------------------------------- 1 | { 2 | "693 @ 2023-12-29-03-39-36": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "693 @ 2023-12-29-03-39-36", 8 | "332 @ 2024-01-06-13-40-29", 9 | "338 @ 2024-01-02-15-29-33" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/695 @ 2023-12-28-16-52-02.json: -------------------------------------------------------------------------------- 1 | { 2 | "695 @ 2023-12-28-16-52-02": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "695 @ 2023-12-28-16-52-02", 8 | "699 @ 2024-01-02-10-28-50", 9 | "546 @ 2024-01-07-19-22-35" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/706 @ 2024-01-02-11-32-59.json: -------------------------------------------------------------------------------- 1 | { 2 | "706 @ 2024-01-02-11-32-59": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "706 @ 2024-01-02-11-32-59", 8 | "423 @ 2024-02-26-19-11-17", 9 | "347 @ 2023-12-29-18-49-29" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/733 @ 2024-01-04-20-18-46.json: -------------------------------------------------------------------------------- 1 | { 2 | "733 @ 2024-01-04-20-18-46": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "733 @ 2024-01-04-20-18-46", 8 | "638 @ 2024-02-05-14-13-47", 9 | "642 @ 2024-01-01-20-06-37" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/736 @ 2023-12-31-03-41-32.json: -------------------------------------------------------------------------------- 1 | { 2 | "736 @ 2023-12-31-03-41-32": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "736 @ 2023-12-31-03-41-32", 8 | "578 @ 2023-12-31-17-45-58", 9 | "396 @ 2023-12-30-18-22-05" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/742 @ 2024-01-06-20-59-19.json: -------------------------------------------------------------------------------- 1 | { 2 | "742 @ 2024-01-06-20-59-19": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "742 @ 2024-01-06-20-59-19", 8 | "808 @ 2024-01-04-07-34-04", 9 | "442 @ 2023-12-25-12-41-40" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/743 @ 2024-01-07-12-20-29.json: -------------------------------------------------------------------------------- 1 | { 2 | "743 @ 2024-01-07-12-20-29": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "743 @ 2024-01-07-12-20-29", 8 | "525 @ 2024-01-07-21-26-19", 9 | "174 @ 2023-12-31-01-53-08" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/744 @ 2023-12-31-03-47-20.json: -------------------------------------------------------------------------------- 1 | { 2 | "744 @ 2023-12-31-03-47-20": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "744 @ 2023-12-31-03-47-20", 8 | "392 @ 2023-12-31-21-20-54", 9 | "785 @ 2024-01-06-21-06-28" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/745 @ 2024-01-07-12-24-30.json: -------------------------------------------------------------------------------- 1 | { 2 | "745 @ 2024-01-07-12-24-30": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "745 @ 2024-01-07-12-24-30", 8 | "665 @ 2024-01-03-01-07-18", 9 | "420 @ 2024-01-06-21-39-29" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/746 @ 2024-01-14-04-09-43.json: -------------------------------------------------------------------------------- 1 | { 2 | "746 @ 2024-01-14-04-09-43": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "746 @ 2024-01-14-04-09-43", 8 | "670 @ 2023-12-25-10-00-06", 9 | "751 @ 2024-01-06-15-52-16" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/747 @ 2024-01-06-15-28-29.json: -------------------------------------------------------------------------------- 1 | { 2 | "747 @ 2024-01-06-15-28-29": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "747 @ 2024-01-06-15-28-29", 8 | "747 @ 2023-12-31-03-51-24", 9 | "103 @ 2024-01-14-05-56-24" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/753 @ 2024-01-14-04-26-12.json: -------------------------------------------------------------------------------- 1 | { 2 | "753 @ 2024-01-14-04-26-12": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "753 @ 2024-01-14-04-26-12", 8 | "104 @ 2023-12-28-09-37-55", 9 | "787 @ 2024-01-14-04-41-43" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/768 @ 2023-12-31-04-23-22.json: -------------------------------------------------------------------------------- 1 | { 2 | "768 @ 2023-12-31-04-23-22": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "768 @ 2023-12-31-04-23-22", 8 | "128 @ 2024-01-08-20-03-37", 9 | "461 @ 2024-01-07-18-42-47" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/770 @ 2024-01-13-03-25-04.json: -------------------------------------------------------------------------------- 1 | { 2 | "770 @ 2024-01-13-03-25-04": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "770 @ 2024-01-13-03-25-04", 8 | "194 @ 2023-12-29-14-32-16", 9 | "112 @ 2024-01-08-19-07-04" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/771 @ 2024-01-16-05-32-23.json: -------------------------------------------------------------------------------- 1 | { 2 | "771 @ 2024-01-16-05-32-23": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "771 @ 2024-01-16-05-32-23", 8 | "702 @ 2023-12-28-18-57-43", 9 | "701 @ 2023-12-28-18-56-33" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/775 @ 2024-01-16-05-29-03.json: -------------------------------------------------------------------------------- 1 | { 2 | "775 @ 2024-01-16-05-29-03": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "775 @ 2024-01-16-05-29-03", 8 | "456 @ 2023-12-31-20-07-58", 9 | "771 @ 2024-01-06-12-54-47" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/777 @ 2024-01-14-11-29-03.json: -------------------------------------------------------------------------------- 1 | { 2 | "777 @ 2024-01-14-11-29-03": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "777 @ 2024-01-14-11-29-03", 8 | "245 @ 2023-12-31-18-49-58", 9 | "461 @ 2024-01-07-18-42-47" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/787 @ 2024-01-14-04-41-43.json: -------------------------------------------------------------------------------- 1 | { 2 | "787 @ 2024-01-14-04-41-43": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "787 @ 2024-01-14-04-41-43", 8 | "660 @ 2024-01-05-21-36-05", 9 | "339 @ 2023-12-31-20-58-06" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/800 @ 2024-01-14-04-46-53.json: -------------------------------------------------------------------------------- 1 | { 2 | "800 @ 2024-01-14-04-46-53": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "800 @ 2024-01-14-04-46-53", 8 | "303 @ 2023-12-30-15-27-26", 9 | "784 @ 2024-02-01-23-05-15" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/806 @ 2024-01-07-02-19-21.json: -------------------------------------------------------------------------------- 1 | { 2 | "806 @ 2024-01-07-02-19-21": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "806 @ 2024-01-07-02-19-21", 8 | "396 @ 2024-01-07-20-29-04", 9 | "524 @ 2023-12-25-08-31-30" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/documentation/demo_segmentation/demo_2_config/810 @ 2024-01-03-01-50-10.json: -------------------------------------------------------------------------------- 1 | { 2 | "810 @ 2024-01-03-01-50-10": { 3 | "3": { 4 | "is_same_site=True": { 5 | "concatenate": { 6 | "demo_names": [ 7 | "810 @ 2024-01-03-01-50-10", 8 | "170 @ 2023-12-26-11-30-46", 9 | "442 @ 2023-12-27-10-59-11" 10 | ] 11 | } 12 | } 13 | } 14 | } 15 | } -------------------------------------------------------------------------------- /data/experimental_results/demo_segmentation/demo_segmentation.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lrllr} 2 | \toprule 3 | Ablation & # of Tasks & ARI & V-Measure & Count \\ 4 | \midrule 5 | GeminiPro - TD+KF+SOP & 3 & 0.55 & 0.66 & 162 \\ 6 | GeminiPro - KF+SOP & 3 & 0.53 & 0.65 & 162 \\ 7 | GeminiPro - KF & 3 & 0.58 & 0.69 & 162 \\ 8 | GPT4 - TD+KF+SOP & 3 & 0.85 & 0.88 & 159 \\ 9 | GPT4 - KF+SOP & 3 & 0.85 & 0.87 & 162 \\ 10 | GPT4 - KF & 3 & 0.80 & 0.86 & 162 \\ 11 | \bottomrule 12 | \end{tabular} 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pynput 2 | selenium 3 | pywinctl 4 | mss 5 | pillow 6 | vidgear 7 | IPython 8 | lxml 9 | pyautogui 10 | numpy 11 | tqdm 12 | moviepy 13 | playwright 14 | gdown 15 | pydrive2 16 | pandas 17 | openpyxl 18 | python-Levenshtein 19 | openai 20 | scikit-learn 21 | dirtyjson 22 | matplotlib 23 | seaborn 24 | google-generativeai 25 | tiktoken 26 | einops 27 | opt_einsum 28 | nbformat 29 | plotly 30 | google-auth-oauthlib 31 | python-dotenv 32 | plotly 33 | kaleido 34 | anthropic 35 | together 36 | gdown 37 | -------------------------------------------------------------------------------- /data/experimental_results/demo_validation/df_task_type_task_completion.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lll} 2 | \toprule 3 | Ablation & Setting & Accuracy \\ 4 | \midrule 5 | GPT4 - TD+KF+SOP & truncate & 0.97 \\ 6 | GeminiPro - TD+KF & true & 0.94 \\ 7 | GeminiPro - TD+KF+SOP & true & 0.94 \\ 8 | GPT4 - TD+KF & truncate & 0.92 \\ 9 | GPT4 - TD+KF+SOP & true & 0.89 \\ 10 | Claude3 - TD+KF & true & 0.85 \\ 11 | GPT4 - TD+KF & true & 0.84 \\ 12 | Claude3 - TD+KF & truncate & 0.72 \\ 13 | Claude3 - TD+KF+SOP & true & 0.58 \\ 14 | Claude3 - TD+KF+SOP & truncate & 0.57 \\ 15 | GeminiPro - TD+KF & truncate & 0.13 \\ 16 | GeminiPro - TD+KF+SOP & truncate & 0.07 \\ 17 | \bottomrule 18 | \end{tabular} 19 | -------------------------------------------------------------------------------- /data/experimental_results/demo_validation/df_task_completion.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{llllllllr} 2 | \toprule 3 | model & is_td & is_kf & is_act & is_include_sop & Precision & Recall & F1 & Count \\ 4 | \midrule 5 | GeminiPro & \checkmark & \checkmark & & \checkmark & 0.94 & 0.25 & 0.40 & 556 \\ 6 | GeminiPro & \checkmark & \checkmark & & & 0.94 & 0.26 & 0.41 & 556 \\ 7 | GPT4 & \checkmark & \checkmark & & \checkmark & 0.89 & 0.90 & 0.90 & 560 \\ 8 | GPT4 & \checkmark & \checkmark & & & 0.84 & 0.77 & 0.81 & 560 \\ 9 | Claude3 & \checkmark & \checkmark & & \checkmark & 0.58 & 0.31 & 0.40 & 548 \\ 10 | Claude3 & \checkmark & \checkmark & & & 0.85 & 0.50 & 0.63 & 352 \\ 11 | \bottomrule 12 | \end{tabular} 13 | -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/44.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 44, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023", 9 | "geolocation": null, 10 | "intent_template": "Check out my todos", 11 | "instantiation_dict": {}, 12 | "intent": "Check out my todos", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "url_match" 17 | ], 18 | "reference_answers": null, 19 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/dashboard/todos", 20 | "program_html": [], 21 | "url_note": "EXACT" 22 | }, 23 | "intent_template_id": 303 24 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/258.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 258, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023", 9 | "geolocation": null, 10 | "intent_template": "See all public projects", 11 | "instantiation_dict": {}, 12 | "intent": "See all public projects", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "url_match" 17 | ], 18 | "reference_answers": null, 19 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/explore", 20 | "program_html": [], 21 | "url_note": "EXACT" 22 | }, 23 | "intent_template_id": 325 24 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/improvement/sop_ranking/run_experiments.py: -------------------------------------------------------------------------------- 1 | from wonderbread.benchmark.tasks.helpers import run_experiment 2 | from wonderbread.benchmark.tasks.improvement.sop_ranking.main import run 3 | 4 | import argparse 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument( "--model", type=str, default="GPT4", choices=["GPT4", "GeminiPro", "Claude3"] ) 7 | parser.add_argument("--is_debug", action='store_true', default=False, help="If set, run in debug mode (only 3 examples)") 8 | args = parser.parse_args() 9 | 10 | kwarg_settings = [ 11 | {}, 12 | ] 13 | 14 | run_experiment(run, __file__, kwarg_settings, n_threads=1, model=args.model, is_debug=args.is_debug, is_path_to_demo_folder=False, is_use_rank_1_df=True, is_skip_completed_ablations=True) 15 | -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/157.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 157, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "Show all customers", 11 | "instantiation_dict": {}, 12 | "intent": "Show all customers", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "url_match" 17 | ], 18 | "reference_answers": null, 19 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin/customer/index/", 20 | "program_html": [], 21 | "url_note": "GOLD in PRED" 22 | }, 23 | "intent_template_id": 255 24 | } -------------------------------------------------------------------------------- /data/experimental_results/sop_generation/results.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{lllllll} 2 | \toprule 3 | ablation_human & precision & recall & f1 & ordering & n_lines_pred_sop & n_lines_gold_sop \\ 4 | \midrule 5 | Claude3 - TD & 0.53 & 0.54 & 0.50 & 0.45 & 11.34 & 9.41 \\ 6 | Claude3 - TD+KF & 0.67 & 0.78 & 0.70 & 0.45 & 11.35 & 9.41 \\ 7 | Claude3 - TD+KF+ACT & 0.72 & 0.85 & 0.76 & 0.54 & 10.94 & 9.41 \\ 8 | GPT4 - TD & 0.48 & 0.59 & 0.49 & 0.44 & 13.10 & 9.41 \\ 9 | GPT4 - TD+KF & 0.69 & 0.79 & 0.71 & 0.64 & 10.32 & 9.41 \\ 10 | GPT4 - TD+KF+ACT & 0.80 & 0.88 & 0.82 & 0.77 & 10.26 & 9.41 \\ 11 | GeminiPro - TD & 0.40 & 0.36 & 0.34 & 0.37 & 7.31 & 9.43 \\ 12 | GeminiPro - TD+KF & 0.48 & 0.51 & 0.46 & 0.42 & 11.28 & 9.43 \\ 13 | GeminiPro - TD+KF+ACT & 0.58 & 0.63 & 0.58 & 0.54 & 11.09 & 9.31 \\ 14 | \bottomrule 15 | \end{tabular} 16 | -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/259.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 259, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023", 9 | "geolocation": null, 10 | "intent_template": "Get me my RSS feed token", 11 | "instantiation_dict": {}, 12 | "intent": "Get me my RSS feed token", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "string_match" 17 | ], 18 | "reference_answers": { 19 | "exact_match": "TMN_bBn9Z48qVbUFZV45" 20 | }, 21 | "reference_url": "", 22 | "program_html": [], 23 | "string_note": "", 24 | "reference_answer_raw_annotation": "TMN_bBn9Z48qVbUFZV45" 25 | }, 26 | "intent_template_id": 312 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/275.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 275, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Search for \"{{keyword}}\"", 11 | "instantiation_dict": { 12 | "keyword": "xbox" 13 | }, 14 | "intent": "Search for \"xbox\"", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/catalogsearch/result/?q=xbox", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 212 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/274.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 274, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Search for \"{{keyword}}\"", 11 | "instantiation_dict": { 12 | "keyword": "usb wifi" 13 | }, 14 | "intent": "Search for \"usb wifi\"", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/catalogsearch/result/?q=usb+wifi", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 212 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/72.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 72, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "What is the zip code of {{place}}?", 11 | "instantiation_dict": { 12 | "place": "Yale University" 13 | }, 14 | "intent": "What is the zip code of Yale University?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "exact_match": "06516" 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "06516" 27 | }, 28 | "intent_template_id": 70 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/156.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 156, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023", 9 | "geolocation": null, 10 | "intent_template": "Checkout merge requests assigned to me", 11 | "instantiation_dict": {}, 12 | "intent": "Checkout merge requests assigned to me", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "url_match" 17 | ], 18 | "reference_answers": null, 19 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/dashboard/merge_requests?assignee_username=byteblaze", 20 | "program_html": [], 21 | "url_note": "EXACT" 22 | }, 23 | "intent_template_id": 290 24 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/71.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 71, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "What is the zip code of {{place}}?", 11 | "instantiation_dict": { 12 | "place": "Chatham University" 13 | }, 14 | "intent": "What is the zip code of Chatham University?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "exact_match": "15232" 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "15232" 27 | }, 28 | "intent_template_id": 70 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/73.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 73, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "What is the zip code of {{place}}?", 11 | "instantiation_dict": { 12 | "place": "Columbia University" 13 | }, 14 | "intent": "What is the zip code of Columbia University?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "exact_match": "10027" 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "10027" 27 | }, 28 | "intent_template_id": 70 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/357.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 357, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023", 9 | "geolocation": null, 10 | "intent_template": "Checkout merge requests requiring my review", 11 | "instantiation_dict": {}, 12 | "intent": "Checkout merge requests requiring my review", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "url_match" 17 | ], 18 | "reference_answers": null, 19 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/dashboard/merge_requests?reviewer_username=byteblaze", 20 | "program_html": [], 21 | "url_note": "EXACT" 22 | }, 23 | "intent_template_id": 291 24 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/46.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 46, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/primer/design", 9 | "geolocation": null, 10 | "intent_template": "Check out the most recent open issues", 11 | "instantiation_dict": {}, 12 | "intent": "Check out the most recent open issues", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "url_match" 17 | ], 18 | "reference_answers": null, 19 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/primer/design/-/issues/?sort=created_date&state=opened", 20 | "program_html": [], 21 | "url_note": "EXACT" 22 | }, 23 | "intent_template_id": 300 24 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/298.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 298, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Show the most recent {{status}} order", 11 | "instantiation_dict": { 12 | "status": "completed" 13 | }, 14 | "intent": "Show the most recent completed order", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/sales/order/view/order_id/180/", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 180 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/299.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 299, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Show the most recent {{status}} order", 11 | "instantiation_dict": { 12 | "status": "cancelled" 13 | }, 14 | "intent": "Show the most recent cancelled order", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/sales/order/view/order_id/170/", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 180 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/300.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 300, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Show the most recent {{status}} order", 11 | "instantiation_dict": { 12 | "status": "pending" 13 | }, 14 | "intent": "Show the most recent pending order", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/sales/order/view/order_id/189/", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 180 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/70.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 70, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "What is the zip code of {{place}}?", 11 | "instantiation_dict": { 12 | "place": "Carnegie Mellon University" 13 | }, 14 | "intent": "What is the zip code of Carnegie Mellon University?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "exact_match": "15213" 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "15213" 27 | }, 28 | "intent_template_id": 70 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/276.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 276, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Search for \"{{keyword}}\"", 11 | "instantiation_dict": { 12 | "keyword": "switch accessories" 13 | }, 14 | "intent": "Search for \"switch accessories\"", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/catalogsearch/result/?q=switch+accessories", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 212 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/277.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 277, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Search for \"{{keyword}}\"", 11 | "instantiation_dict": { 12 | "keyword": "batteries for iphone 13" 13 | }, 14 | "intent": "Search for \"batteries for iphone 13\"", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/catalogsearch/result/?q=iphone+13", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 212 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/301.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 301, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Show the most recent {{status}} order", 11 | "instantiation_dict": { 12 | "status": "processing" 13 | }, 14 | "intent": "Show the most recent processing order", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": "N/A", 21 | "reference_url": "", 22 | "program_html": [], 23 | "string_note": "There is no order in the processing status", 24 | "reference_answer_raw_annotation": "N/A" 25 | }, 26 | "intent_template_id": 180 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/302.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 302, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Show the most recent {{status}} order", 11 | "instantiation_dict": { 12 | "status": "out of delivery" 13 | }, 14 | "intent": "Show the most recent out of delivery order", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": "N/A", 21 | "reference_url": "", 22 | "program_html": [], 23 | "string_note": "There is no order out of delivery", 24 | "reference_answer_raw_annotation": "N/A" 25 | }, 26 | "intent_template_id": 180 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/313.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 313, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Which number to call for the customer service?", 11 | "instantiation_dict": {}, 12 | "intent": "Which number to call for the customer service?", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "string_match" 17 | ], 18 | "reference_answers": { 19 | "exact_match": "N/A" 20 | }, 21 | "reference_url": "", 22 | "program_html": [], 23 | "string_note": "There is no phone number in the website", 24 | "reference_answer_raw_annotation": "N/A" 25 | }, 26 | "intent_template_id": 134 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/41.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 41, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "List the top {{n}} search terms in my store", 11 | "instantiation_dict": { 12 | "n": "1" 13 | }, 14 | "intent": "List the top 1 search terms in my store", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "exact_match": "hollister" 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "hollister" 27 | }, 28 | "intent_template_id": 285 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/45.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 45, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/a11yproject/a11yproject.com", 9 | "geolocation": null, 10 | "intent_template": "Check out the most recent open issues", 11 | "instantiation_dict": {}, 12 | "intent": "Check out the most recent open issues", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "url_match" 17 | ], 18 | "reference_answers": null, 19 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/a11yproject/a11yproject.com/-/issues/?sort=created_asc&state=opened", 20 | "program_html": [], 21 | "url_note": "EXACT" 22 | }, 23 | "intent_template_id": 300 24 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | git \ 5 | ffmpeg \ 6 | unzip \ 7 | # required for evdev 8 | linux-headers-generic \ 9 | gcc \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | RUN git clone https://github.com/HazyResearch/wonderbread.git /app/wonderbread 14 | 15 | WORKDIR /app 16 | 17 | COPY . . 18 | 19 | RUN pip install --no-cache-dir -r requirements.txt 20 | RUN pip install --no-cache-dir -e . 21 | 22 | RUN mkdir -p /app/data/demos 23 | 24 | WORKDIR /app/data 25 | RUN wget https://zenodo.org/records/12671568/files/debug_demos.zip && \ 26 | unzip debug_demos.zip && \ 27 | rm debug_demos.zip && \ 28 | mv debug_demos/* /app/data/demos && \ 29 | rm -r debug_demos 30 | 31 | WORKDIR /app/wonderbread/benchmark/tasks 32 | 33 | ENTRYPOINT ["python3"] 34 | 35 | -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/260.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 260, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "I want to browse the products in the {{category}} category", 11 | "instantiation_dict": { 12 | "category": "Video Game" 13 | }, 14 | "intent": "I want to browse the products in the Video Game category", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/video-games.html", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 211 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/287.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 287, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "How much time does it take from Pittsburgh to Philadelphia by car?", 11 | "instantiation_dict": {}, 12 | "intent": "How much time does it take from Pittsburgh to Philadelphia by car?", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "string_match" 17 | ], 18 | "reference_answers": { 19 | "fuzzy_match": [ 20 | "5h 47min" 21 | ] 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "5h 47min" 27 | }, 28 | "intent_template_id": 47 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/368.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 368, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "find discounted items.", 11 | "instantiation_dict": {}, 12 | "intent": "find discounted items.", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "string_match" 17 | ], 18 | "reference_answers": { 19 | "exact_match": "N/A" 20 | }, 21 | "reference_url": "", 22 | "program_html": [], 23 | "string_note": "There is no function to show only discount items", 24 | "reference_answer_raw_annotation": "There is no function to show only discount items." 25 | }, 26 | "intent_template_id": 188 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/380.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 380, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Find the {{space}} around {{location}}", 11 | "instantiation_dict": { 12 | "location": "Carnegie Music Hall", 13 | "space": "bar" 14 | }, 15 | "intent": "Find the bar around Carnegie Music Hall", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "url_match" 20 | ], 21 | "reference_answers": null, 22 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000search?query=bars%20near%20Carnegie%20Music%20Hall", 23 | "program_html": [], 24 | "url_note": "GOLD in PRED" 25 | }, 26 | "intent_template_id": 59 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/117.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 117, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "What is the date when I made my first purchase on this site?", 11 | "instantiation_dict": {}, 12 | "intent": "What is the date when I made my first purchase on this site?", 13 | "require_reset": false, 14 | "eval": { 15 | "eval_types": [ 16 | "string_match" 17 | ], 18 | "reference_answers": { 19 | "fuzzy_match": [ 20 | "3/2/22" 21 | ] 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "3/2/22" 27 | }, 28 | "intent_template_id": 161 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/261.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 261, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "I want to browse the products in the {{category}} category", 11 | "instantiation_dict": { 12 | "category": "Headphones" 13 | }, 14 | "intent": "I want to browse the products in the Headphones category", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/electronics/headphones.html", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 211 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/278.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 278, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Search for \"{{keyword}}\"", 11 | "instantiation_dict": { 12 | "keyword": "green tea bag for weight loss" 13 | }, 14 | "intent": "Search for \"green tea bag for weight loss\"", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/catalogsearch/result/?q=green+tea+bag+for+weight+loss", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 212 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/377.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 377, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Find the {{space}} around {{location}}", 11 | "instantiation_dict": { 12 | "location": "CMU ArtPark Lab", 13 | "space": "resturants" 14 | }, 15 | "intent": "Find the resturants around CMU ArtPark Lab", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "url_match" 20 | ], 21 | "reference_answers": null, 22 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000search?query=restaurants%20near%20CMU%20ArtPark%20Lab", 23 | "program_html": [], 24 | "url_note": "GOLD in PRED" 25 | }, 26 | "intent_template_id": 59 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/379.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 379, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Find the {{space}} around {{location}}", 11 | "instantiation_dict": { 12 | "location": "CMU main campus", 13 | "space": "hotel" 14 | }, 15 | "intent": "Find the hotel around CMU main campus", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "url_match" 20 | ], 21 | "reference_answers": null, 22 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000search?query=hotels%20near%20carnegie%20mellon%20university", 23 | "program_html": [], 24 | "url_note": "GOLD in PRED" 25 | }, 26 | "intent_template_id": 59 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/381.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 381, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Find the {{space}} around {{location}}", 11 | "instantiation_dict": { 12 | "location": "Carnegie Music Hall", 13 | "space": "hotel" 14 | }, 15 | "intent": "Find the hotel around Carnegie Music Hall", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "url_match" 20 | ], 21 | "reference_answers": null, 22 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000search?query=hotels%20near%20Carnegie%20Music%20Hall", 23 | "program_html": [], 24 | "url_note": "GOLD in PRED" 25 | }, 26 | "intent_template_id": 59 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/262.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 262, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "I want to browse the products in the {{category}} category", 11 | "instantiation_dict": { 12 | "category": "Men shoes" 13 | }, 14 | "intent": "I want to browse the products in the Men shoes category", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/clothing-shoes-jewelry/men/shoes.html", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 211 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/375.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 375, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "Preview the {{name}} theme for my shop", 11 | "instantiation_dict": { 12 | "name": "Magento Luma" 13 | }, 14 | "intent": "Preview the Magento Luma theme for my shop", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin/admin/system_design_theme/edit/id/3/key/", 22 | "program_html": [], 23 | "url_note": "GOLD in PRED" 24 | }, 25 | "intent_template_id": 266 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/378.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 378, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Find the {{space}} around {{location}}", 11 | "instantiation_dict": { 12 | "location": "CMU main campus", 13 | "space": "parking" 14 | }, 15 | "intent": "Find the parking around CMU main campus", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "url_match" 20 | ], 21 | "reference_answers": null, 22 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000search?query=parking%20near%20carnegie%20mellon%20university", 23 | "program_html": [], 24 | "url_note": "GOLD in PRED" 25 | }, 26 | "intent_template_id": 59 27 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/374.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 374, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "Preview the {{name}} theme for my shop", 11 | "instantiation_dict": { 12 | "name": "Magento Blank" 13 | }, 14 | "intent": "Preview the Magento Blank theme for my shop", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin/admin/system_design_theme/edit/id/1/key/", 22 | "program_html": [], 23 | "url_note": "GOLD in PRED" 24 | }, 25 | "intent_template_id": 266 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/227.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 227, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "What is the price range for products from {{brand}}?", 11 | "instantiation_dict": { 12 | "brand": "EYZUTAK" 13 | }, 14 | "intent": "What is the price range for products from EYZUTAK?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "9.99" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "$9.99" 29 | }, 30 | "intent_template_id": 370 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/234.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 234, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Get the order number of my most recent {{status}} order ", 11 | "instantiation_dict": { 12 | "status": "on hold" 13 | }, 14 | "intent": "Get the order number of my most recent on hold order ", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "exact_match": "N/A" 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "there is no on hold order", 26 | "reference_answer_raw_annotation": "N/A" 27 | }, 28 | "intent_template_id": 213 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/235.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 235, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Get the order number of my most recent {{status}} order ", 11 | "instantiation_dict": { 12 | "status": "" 13 | }, 14 | "intent": "Get the order number of my most recent order ", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "136" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "000000136" 29 | }, 30 | "intent_template_id": 213 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/341.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 341, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/root/metaseq", 9 | "geolocation": null, 10 | "intent_template": "List all opened issues {{description}}", 11 | "instantiation_dict": { 12 | "description": "requesting new features" 13 | }, 14 | "intent": "List all opened issues requesting new features", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/root/metaseq/-/issues/?label_name%5B%5D=enhancement", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 299 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/344.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 344, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "How many reviews our shop received {{time}}?", 11 | "instantiation_dict": { 12 | "time": "by far" 13 | }, 14 | "intent": "How many reviews our shop received by far?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "351" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "351" 29 | }, 30 | "intent_template_id": 248 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/189.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 189, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me the total cost of my latest {{status}} order?", 11 | "instantiation_dict": { 12 | "status": "pending" 13 | }, 14 | "intent": "Tell me the total cost of my latest pending order?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "754.99" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "754.99" 29 | }, 30 | "intent_template_id": 214 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/190.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 190, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me the total cost of my latest {{status}} order?", 11 | "instantiation_dict": { 12 | "status": "complete" 13 | }, 14 | "intent": "Tell me the total cost of my latest complete order?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "65.32" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "65.32" 29 | }, 30 | "intent_template_id": 214 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/263.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 263, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "I want to browse the products in the {{category}} category", 11 | "instantiation_dict": { 12 | "category": "Woman clothing" 13 | }, 14 | "intent": "I want to browse the products in the Woman clothing category", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770/clothing-shoes-jewelry/women/clothing.html", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 211 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/340.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 340, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/primer/design", 9 | "geolocation": null, 10 | "intent_template": "List all opened issues {{description}}", 11 | "instantiation_dict": { 12 | "description": "that report bugs" 13 | }, 14 | "intent": "List all opened issues that report bugs", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/primer/design/-/issues/?label_name%5B%5D=type%3A%20bug%20%F0%9F%90%9E", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 299 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/346.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 346, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "How many reviews our shop received {{time}}?", 11 | "instantiation_dict": { 12 | "time": "during 2022" 13 | }, 14 | "intent": "How many reviews our shop received during 2022?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "0" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "0" 29 | }, 30 | "intent_template_id": 248 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/348.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 348, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "How many reviews our shop received {{time}}?", 11 | "instantiation_dict": { 12 | "time": "in May 2023" 13 | }, 14 | "intent": "How many reviews our shop received in May 2023?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "0" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "0" 29 | }, 30 | "intent_template_id": 248 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/94.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 94, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "Telll me the grand total of invoice {{id}}.", 11 | "instantiation_dict": { 12 | "id": "000000001" 13 | }, 14 | "intent": "Telll me the grand total of invoice 000000001.", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "36.39" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "$36.39" 29 | }, 30 | "intent_template_id": 274 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/95.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 95, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "Telll me the grand total of invoice {{id}}.", 11 | "instantiation_dict": { 12 | "id": "000000002" 13 | }, 14 | "intent": "Telll me the grand total of invoice 000000002.", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "39.64" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "$39.64" 29 | }, 30 | "intent_template_id": 274 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/188.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 188, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me the total cost of my latest {{status}} order?", 11 | "instantiation_dict": { 12 | "status": "cancelled" 13 | }, 14 | "intent": "Tell me the total cost of my latest cancelled order?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "365.42" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "365.42" 29 | }, 30 | "intent_template_id": 214 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/232.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 232, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Get the order number of my most recent {{status}} order ", 11 | "instantiation_dict": { 12 | "status": "pending" 13 | }, 14 | "intent": "Get the order number of my most recent pending order ", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "189" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "000000189" 29 | }, 30 | "intent_template_id": 213 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/339.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 339, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/a11yproject/a11yproject.com", 9 | "geolocation": null, 10 | "intent_template": "List all opened issues {{description}}", 11 | "instantiation_dict": { 12 | "description": "that report bugs" 13 | }, 14 | "intent": "List all opened issues that report bugs", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "url_match" 19 | ], 20 | "reference_answers": null, 21 | "reference_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/a11yproject/a11yproject.com/-/issues/?label_name%5B%5D=bug", 22 | "program_html": [], 23 | "url_note": "EXACT" 24 | }, 25 | "intent_template_id": 299 26 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/345.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 345, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "How many reviews our shop received {{time}}?", 11 | "instantiation_dict": { 12 | "time": "in Apr 2023" 13 | }, 14 | "intent": "How many reviews our shop received in Apr 2023?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "351" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "351" 29 | }, 30 | "intent_template_id": 248 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/231.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 231, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Get the order number of my most recent {{status}} order ", 11 | "instantiation_dict": { 12 | "status": "cancelled" 13 | }, 14 | "intent": "Get the order number of my most recent cancelled order ", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "170" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "000000170" 29 | }, 30 | "intent_template_id": 213 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/233.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 233, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Get the order number of my most recent {{status}} order ", 11 | "instantiation_dict": { 12 | "status": "complete" 13 | }, 14 | "intent": "Get the order number of my most recent complete order ", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "180" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "000000180" 29 | }, 30 | "intent_template_id": 213 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/254.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 254, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "What is the {{information}} of {{location}}", 11 | "instantiation_dict": { 12 | "location": "Western Pennsylvania Hospital", 13 | "information": "phone number" 14 | }, 15 | "intent": "What is the phone number of Western Pennsylvania Hospital", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "exact_match": "4125785000" 23 | }, 24 | "reference_url": "", 25 | "program_html": [], 26 | "string_note": "", 27 | "reference_answer_raw_annotation": "4125785000" 28 | }, 29 | "intent_template_id": 501 30 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/266.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "wikipedia", 4 | "map" 5 | ], 6 | "task_id": 266, 7 | "require_login": true, 8 | "storage_state": null, 9 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 10 | "geolocation": null, 11 | "intent_template": "What's the closest national park to {{city}}?", 12 | "instantiation_dict": { 13 | "city": "the largest city in Maine" 14 | }, 15 | "intent": "What's the closest national park to the largest city in Maine?", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "exact_match": "Acadia National Park" 23 | }, 24 | "reference_url": "", 25 | "program_html": [], 26 | "string_note": "", 27 | "reference_answer_raw_annotation": "Acadia National Park" 28 | }, 29 | "intent_template_id": 85 30 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/329.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 329, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", 11 | "instantiation_dict": { 12 | "time": "on 4/19/2023" 13 | }, 14 | "intent": "How much I spend on 4/19/2023 on shopping at One Stop Market?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "0" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "0" 29 | }, 30 | "intent_template_id": 147 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/349.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 349, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023", 9 | "geolocation": null, 10 | "intent_template": "Who else have access to my repo {{repo}}, show me their usernames", 11 | "instantiation_dict": { 12 | "repo": "gimmiethat.space" 13 | }, 14 | "intent": "Who else have access to my repo gimmiethat.space, show me their usernames", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "exact_match": "yjlou" 22 | }, 23 | "reference_url": "", 24 | "program_html": [], 25 | "string_note": "", 26 | "reference_answer_raw_annotation": "yjlou" 27 | }, 28 | "intent_template_id": 298 29 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 1, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "What is the top-{{n}} best-selling brand in {{period}}", 11 | "instantiation_dict": { 12 | "n": 1, 13 | "period": "Quarter 1 2022" 14 | }, 15 | "intent": "What is the top-1 best-selling brand in Quarter 1 2022", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "exact_match": "Sprite" 23 | }, 24 | "reference_url": "", 25 | "program_html": [], 26 | "string_note": "", 27 | "reference_answer_raw_annotation": "Sprite" 28 | }, 29 | "intent_template_id": 279 30 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/192.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 192, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me the total cost of my latest {{status}} order?", 11 | "instantiation_dict": { 12 | "status": "non-cancelled" 13 | }, 14 | "intent": "Tell me the total cost of my latest non-cancelled order?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "754.99" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "754.99" 29 | }, 30 | "intent_template_id": 214 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/206.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "gitlab" 4 | ], 5 | "task_id": 206, 6 | "require_login": true, 7 | "storage_state": "./.auth/gitlab_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:8023/a11yproject/a11yproject.com", 9 | "geolocation": null, 10 | "intent_template": "How many commits did {{user}} make on {{date}}?", 11 | "instantiation_dict": { 12 | "user": "Eric", 13 | "date": "3/2" 14 | }, 15 | "intent": "How many commits did Eric make on 3/2?", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "must_include": [ 23 | "2" 24 | ] 25 | }, 26 | "reference_url": "", 27 | "program_html": [], 28 | "string_note": "", 29 | "reference_answer_raw_annotation": "2" 30 | }, 31 | "intent_template_id": 320 32 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/224.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 224, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", 11 | "instantiation_dict": { 12 | "location": "wendys" 13 | }, 14 | "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest wendys", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "fuzzy_match": [ 22 | "3min" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "3min" 29 | }, 30 | "intent_template_id": 35 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/255.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 255, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Who is the {{information}} of {{location}}", 11 | "instantiation_dict": { 12 | "location": "PIT airport", 13 | "information": "operator" 14 | }, 15 | "intent": "Who is the operator of PIT airport", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "exact_match": "Allegheny County Airport Authority" 23 | }, 24 | "reference_url": "", 25 | "program_html": [], 26 | "string_note": "", 27 | "reference_answer_raw_annotation": "Allegheny County Airport Authority" 28 | }, 29 | "intent_template_id": 501 30 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/331.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 331, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", 11 | "instantiation_dict": { 12 | "time": "in July 2022" 13 | }, 14 | "intent": "How much I spend in July 2022 on shopping at One Stop Market?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "40.16" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "40.16" 29 | }, 30 | "intent_template_id": 147 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/338.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 338, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me when I last ordered my {{description}}?", 11 | "instantiation_dict": { 12 | "description": "toothpaste" 13 | }, 14 | "intent": "Tell me when I last ordered my toothpaste?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "fuzzy_match": [ 22 | "December 4th 2022" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "December 4th 2022" 29 | }, 30 | "intent_template_id": 169 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/5.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping_admin" 4 | ], 5 | "task_id": 5, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_admin_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7780/admin", 9 | "geolocation": null, 10 | "intent_template": "What is the top-{{n}} best-selling product type in {{period}}", 11 | "instantiation_dict": { 12 | "n": 1, 13 | "period": "Jan 2023" 14 | }, 15 | "intent": "What is the top-1 best-selling product type in Jan 2023", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "exact_match": "Duffle" 23 | }, 24 | "reference_url": "", 25 | "program_html": [], 26 | "string_note": "", 27 | "reference_answer_raw_annotation": "Duffle" 28 | }, 29 | "intent_template_id": 279 30 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/60.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 60, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", 11 | "instantiation_dict": { 12 | "place1": "restaurant", 13 | "place2": "CMU Posner Hall" 14 | }, 15 | "intent": "Tell me the closest restaurant(s) to CMU Posner Hall", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "must_include": [ 23 | "The exchange" 24 | ] 25 | }, 26 | "reference_url": "", 27 | "program_html": [], 28 | "string_note": "", 29 | "reference_answer_raw_annotation": "The exchange" 30 | }, 31 | "intent_template_id": 69 32 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/93.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 93, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Which US states border {{state}}?", 11 | "instantiation_dict": { 12 | "state": "New Hampshire" 13 | }, 14 | "intent": "Which US states border New Hampshire?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "Massachusetts", 23 | "Vermont", 24 | "Maine" 25 | ] 26 | }, 27 | "reference_url": "", 28 | "program_html": [], 29 | "string_note": "", 30 | "reference_answer_raw_annotation": "Massachusetts, Vermont, Maine" 31 | }, 32 | "intent_template_id": 67 33 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/tasks/README.md: -------------------------------------------------------------------------------- 1 | # Experiments 2 | 3 | This folder contains our WONDERBREAD benchmark tasks. The tasks are divided into three folders: documentation, improvement, knowledge-transfer. 4 | 5 | - Each task has a: 6 | - A `main.py` file that runs the experiment. 7 | - A `run_experiments.py` file that runs all experiments across all settings. 8 | 9 | ## Installation 10 | 11 | Follow the instructions in the main `README.md` to (1) install the conda env; (2) download the dataset. 12 | 13 | Move the dataset into the `data/` directory in this repo. You should now have many demonstration folders (potentially multiple per task). 14 | 15 | ## How to Run 16 | 17 | You should be able to go into an tasks subfolder and simply run `python3 run_experiments.py` to generate all results. 18 | 19 | ```bash 20 | cd documentation/sop_generation/ 21 | python3 run_experiments.py --model 22 | ``` 23 | -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/153.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 153, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "What is the minimum travel time by car from {{location1}} to {{location2}}?", 11 | "instantiation_dict": { 12 | "location1": "REI", 13 | "location2": "CMU" 14 | }, 15 | "intent": "What is the minimum travel time by car from REI to CMU?", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "fuzzy_match": [ 23 | "7min" 24 | ] 25 | }, 26 | "reference_url": "", 27 | "program_html": [], 28 | "string_note": "", 29 | "reference_answer_raw_annotation": "7min" 30 | }, 31 | "intent_template_id": 36 32 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/223.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 223, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "I am at CMU Pittsburgh, how long it takes to drive to the nearest {{location}}", 11 | "instantiation_dict": { 12 | "location": "Mcdonald's" 13 | }, 14 | "intent": "I am at CMU Pittsburgh, how long it takes to drive to the nearest Mcdonald's", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "fuzzy_match": [ 22 | "4min" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "4min" 29 | }, 30 | "intent_template_id": 35 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/330.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 330, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "How much I spend {{time}} on shopping at One Stop Market?", 11 | "instantiation_dict": { 12 | "time": "in March 2023" 13 | }, 14 | "intent": "How much I spend in March 2023 on shopping at One Stop Market?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "81.31" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "81.31" 29 | }, 30 | "intent_template_id": 147 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/335.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 335, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me when I last ordered my {{description}}?", 11 | "instantiation_dict": { 12 | "description": "body butter" 13 | }, 14 | "intent": "Tell me when I last ordered my body butter?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "fuzzy_match": [ 22 | "January 16th 2023" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "January 16th 2023" 29 | }, 30 | "intent_template_id": 169 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/336.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 336, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me when I last ordered my {{description}}?", 11 | "instantiation_dict": { 12 | "description": "conditioner" 13 | }, 14 | "intent": "Tell me when I last ordered my conditioner?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "fuzzy_match": [ 22 | "January 16th 2023" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "January 16th 2023" 29 | }, 30 | "intent_template_id": 169 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/337.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 337, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "Tell me when I last ordered my {{description}}?", 11 | "instantiation_dict": { 12 | "description": "bread olive" 13 | }, 14 | "intent": "Tell me when I last ordered my bread olive?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "fuzzy_match": [ 22 | "December 12th 2022" 23 | ] 24 | }, 25 | "reference_url": "", 26 | "program_html": [], 27 | "string_note": "", 28 | "reference_answer_raw_annotation": "December 12th 2022" 29 | }, 30 | "intent_template_id": 169 31 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/58.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 58, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", 11 | "instantiation_dict": { 12 | "place1": "cafe", 13 | "place2": "CMU Hunt library" 14 | }, 15 | "intent": "Tell me the closest cafe(s) to CMU Hunt library", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "must_include": [ 23 | "De Fer Coffee & Tea" 24 | ] 25 | }, 26 | "reference_url": "", 27 | "program_html": [], 28 | "string_note": "", 29 | "reference_answer_raw_annotation": "De Fer Coffee & Tea" 30 | }, 31 | "intent_template_id": 69 32 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/59.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 59, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Tell me the closest {{place1}}(s) to {{place2}}", 11 | "instantiation_dict": { 12 | "place1": "restaurant", 13 | "place2": "CMU Hunt library" 14 | }, 15 | "intent": "Tell me the closest restaurant(s) to CMU Hunt library", 16 | "require_reset": false, 17 | "eval": { 18 | "eval_types": [ 19 | "string_match" 20 | ], 21 | "reference_answers": { 22 | "must_include": [ 23 | "The exchange" 24 | ] 25 | }, 26 | "reference_url": "", 27 | "program_html": [], 28 | "string_note": "", 29 | "reference_answer_raw_annotation": "The exchange" 30 | }, 31 | "intent_template_id": 69 32 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/92.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "map" 4 | ], 5 | "task_id": 92, 6 | "require_login": true, 7 | "storage_state": null, 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:3000", 9 | "geolocation": null, 10 | "intent_template": "Which US states border {{state}}?", 11 | "instantiation_dict": { 12 | "state": "Vermont" 13 | }, 14 | "intent": "Which US states border Vermont?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "New York", 23 | "New Hampshire", 24 | "Massachusetts" 25 | ] 26 | }, 27 | "reference_url": "", 28 | "program_html": [], 29 | "string_note": "", 30 | "reference_answer_raw_annotation": "New York, New Hampshire, Massachusetts" 31 | }, 32 | "intent_template_id": 67 33 | } -------------------------------------------------------------------------------- /wonderbread/benchmark/webarena/229.json: -------------------------------------------------------------------------------- 1 | { 2 | "sites": [ 3 | "shopping" 4 | ], 5 | "task_id": 229, 6 | "require_login": true, 7 | "storage_state": "./.auth/shopping_state.json", 8 | "start_url": "http://ec2-3-130-83-246.us-east-2.compute.amazonaws.com:7770", 9 | "geolocation": null, 10 | "intent_template": "What is the price range for products from {{brand}}?", 11 | "instantiation_dict": { 12 | "brand": "ugreen" 13 | }, 14 | "intent": "What is the price range for products from ugreen?", 15 | "require_reset": false, 16 | "eval": { 17 | "eval_types": [ 18 | "string_match" 19 | ], 20 | "reference_answers": { 21 | "must_include": [ 22 | "6.99", 23 | "38.99" 24 | ] 25 | }, 26 | "reference_url": "", 27 | "program_html": [], 28 | "string_note": "", 29 | "reference_answer_raw_annotation": "$6.99 - $38.99" 30 | }, 31 | "intent_template_id": 370 32 | } --------------------------------------------------------------------------------