├── src └── productive_agents │ ├── __init__.py │ ├── env │ ├── officebench │ │ ├── apps │ │ │ ├── system_app │ │ │ │ ├── switch_app.py │ │ │ │ ├── __init__.py │ │ │ │ └── system_info.py │ │ │ ├── calculator_app │ │ │ │ └── __init__.py │ │ │ ├── ocr_app │ │ │ │ ├── __init__.py │ │ │ │ └── ocr_info.py │ │ │ ├── python_app │ │ │ │ ├── __init__.py │ │ │ │ └── python_info.py │ │ │ ├── scratchpad_app │ │ │ │ └── __init__.py │ │ │ ├── shell_app │ │ │ │ ├── __init__.py │ │ │ │ └── shell_info.py │ │ │ ├── llm_app │ │ │ │ └── __init__.py │ │ │ ├── email_app │ │ │ │ └── __init__.py │ │ │ ├── calendar_app │ │ │ │ └── __init__.py │ │ │ ├── pdf_app │ │ │ │ └── __init__.py │ │ │ ├── word_app │ │ │ │ └── __init__.py │ │ │ ├── exp_config.yaml │ │ │ └── excel_app │ │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── config.py │ ├── appworld │ │ └── __init__.py │ └── smolagents │ │ └── __init__.py │ ├── agents │ ├── officebench │ │ └── __init__.py │ └── appworld │ │ └── __init__.py │ └── utils.py ├── experiments ├── officebench │ ├── evaluation │ │ └── __init__.py │ ├── tasks │ │ ├── 1-8 │ │ │ ├── reference │ │ │ │ ├── score.xlsx │ │ │ │ ├── salary.xlsx │ │ │ │ ├── class_member.xlsx │ │ │ │ ├── company_budget.xlsx │ │ │ │ └── shopping_list.xlsx │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── salary.xlsx │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── class_member.xlsx │ │ │ │ │ ├── company_budget.xlsx │ │ │ │ │ └── shopping_list.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ ├── 3.json │ │ │ │ ├── 2.json │ │ │ │ ├── 4.json │ │ │ │ └── 1.json │ │ ├── 3-38 │ │ │ ├── reference │ │ │ │ └── sids.xlsx │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── students_ids.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 1-14 │ │ │ ├── reference │ │ │ │ ├── salary.xlsx │ │ │ │ ├── company_budget.xlsx │ │ │ │ ├── score_swap_row.xlsx │ │ │ │ ├── shopping_list.xlsx │ │ │ │ └── score_swap_column.xlsx │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── salary.xlsx │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── class_member.xlsx │ │ │ │ │ ├── shopping_list.xlsx │ │ │ │ │ └── company_budget.xlsx │ │ │ └── subtasks │ │ │ │ ├── 2.json │ │ │ │ ├── 0.json │ │ │ │ ├── 1.json │ │ │ │ ├── 4.json │ │ │ │ └── 3.json │ │ ├── 1-3 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── file2.pdf │ │ │ │ │ ├── file4.jpg │ │ │ │ │ ├── file1.docx │ │ │ │ │ └── file3.xlsx │ │ │ └── subtasks │ │ │ │ ├── 3.json │ │ │ │ ├── 0.json │ │ │ │ ├── 1.json │ │ │ │ └── 2.json │ │ ├── 2-7 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── paper.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-1 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── task.xlsx │ │ ├── 3-36 │ │ │ ├── reference │ │ │ │ └── scores.xlsx │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── students_scores.jpg │ │ ├── 3-37 │ │ │ ├── reference │ │ │ │ └── scores.xlsx │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── students_scores.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 1-10 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── salary.xlsx │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── class_member.xlsx │ │ │ │ │ ├── shopping_list.xlsx │ │ │ │ │ └── company_budget.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ ├── 4.json │ │ │ │ ├── 2.json │ │ │ │ ├── 3.json │ │ │ │ └── 1.json │ │ ├── 1-11 │ │ │ ├── reference │ │ │ │ ├── midterm1.xlsx │ │ │ │ ├── midterm2.xlsx │ │ │ │ ├── company_budget.xlsx │ │ │ │ └── sorted_score.xlsx │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── salary.xlsx │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── class_member.xlsx │ │ │ │ │ ├── shopping_list.xlsx │ │ │ │ │ └── company_budget.xlsx │ │ │ └── subtasks │ │ │ │ ├── 3.json │ │ │ │ ├── 2.json │ │ │ │ └── 0.json │ │ ├── 1-19 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── paper.pdf │ │ │ │ │ └── contract_agreement.pdf │ │ │ └── subtasks │ │ │ │ ├── 2.json │ │ │ │ └── 3.json │ │ ├── 1-4 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── salary.xlsx │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── company_budget.xlsx │ │ │ │ │ └── shopping_list.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 1-5 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── salary.xlsx │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── company_budget.xlsx │ │ │ │ │ └── shopping_list.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 1-6 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── salary.xlsx │ │ │ │ ├── score.xlsx │ │ │ │ ├── class_member.xlsx │ │ │ │ ├── company_budget.xlsx │ │ │ │ └── shopping_list.xlsx │ │ ├── 1-7 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── salary.xlsx │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── class_member.xlsx │ │ │ │ │ ├── company_budget.xlsx │ │ │ │ │ └── shopping_list.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ ├── 1.json │ │ │ │ ├── 3.json │ │ │ │ └── 5.json │ │ ├── 1-9 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── salary.xlsx │ │ │ │ ├── score.xlsx │ │ │ │ ├── class_member.xlsx │ │ │ │ ├── company_budget.xlsx │ │ │ │ └── shopping_list.xlsx │ │ ├── 2-14 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── score.xlsx │ │ ├── 2-18 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── class_member.xlsx │ │ │ │ │ └── shopping_list.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 2.json │ │ ├── 2-19 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── score.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-22 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── score.xlsx │ │ │ │ │ ├── car_trading.xlsx │ │ │ │ │ └── house_price_index.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 2-23 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── Invoice.pdf │ │ │ │ │ └── Invoice_2.pdf │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ └── 0.json │ │ ├── 2-24 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── score.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-33 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── score.xlsx │ │ │ │ │ └── class_member.xlsx │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ └── 0.json │ │ ├── 2-38 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── party.pdf │ │ │ │ │ └── notification.docx │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ └── 0.json │ │ ├── 2-6 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── homework.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-8 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── Bob_card.png │ │ │ │ │ ├── Tom_card.png │ │ │ │ │ └── John_card.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-15 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── resume.pdf │ │ ├── 3-18 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── resume.pdf │ │ ├── 3-24 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── paper.pdf │ │ ├── 3-26 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── paper.pdf │ │ ├── 3-3 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── manual.docx │ │ ├── 3-46 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── score.xlsx │ │ ├── 3-47 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── score.xlsx │ │ ├── 3-5 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── Invoice.pdf │ │ │ │ └── Invoice_2.pdf │ │ ├── 3-54 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── score.xlsx │ │ ├── 3-55 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── score.xlsx │ │ ├── 3-59 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── score.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-62 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── score.xlsx │ │ │ │ └── target.pdf │ │ ├── 3-63 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── score.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-68 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── Invoice.pdf │ │ │ │ └── Invoice_2.pdf │ │ ├── 3-8 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── score.xlsx │ │ │ │ └── company_revenues.xlsx │ │ ├── 3-82 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── score.xlsx │ │ ├── 3-92 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── party.pdf │ │ ├── 3-22 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── homework.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-23 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── homework.jpg │ │ ├── 3-25 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── homework.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-27 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── Bob_card.png │ │ │ │ ├── John_card.jpg │ │ │ │ └── Tom_card.png │ │ ├── 3-28 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── Bob_card.png │ │ │ │ ├── John_card.jpg │ │ │ │ └── Tom_card.png │ │ ├── 3-29 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── Bob_card.png │ │ │ │ ├── John_card.jpg │ │ │ │ └── Tom_card.png │ │ ├── 3-30 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── Bob_card.png │ │ │ │ ├── John_card.jpg │ │ │ │ └── Tom_card.png │ │ ├── 1-15 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── House_creak.docx │ │ │ │ │ ├── project_report.docx │ │ │ │ │ ├── sample_syllabus.docx │ │ │ │ │ ├── academic_integrity.docx │ │ │ │ │ └── reference.txt │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 1-18 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── concert_post.pdf │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 2-10 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── students_ids.jpg │ │ │ │ └── students_scores.jpg │ │ ├── 2-11 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── car_trading.png │ │ │ │ └── company_trading_records.jpg │ │ ├── 2-2 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── concert_post.pdf │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ └── 0.json │ │ ├── 2-3 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── google_form.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-31 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── car_trading.png │ │ │ │ │ ├── house_price_index.jpg │ │ │ │ │ └── students_scores.jpg │ │ │ └── subtasks │ │ │ │ ├── 3.json │ │ │ │ ├── 0.json │ │ │ │ ├── 1.json │ │ │ │ ├── 4.json │ │ │ │ └── 2.json │ │ ├── 2-36 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── sales_report.pdf │ │ │ │ │ └── transcripts.png │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ ├── 2.json │ │ │ │ ├── 3.json │ │ │ │ └── 1.json │ │ ├── 2-5 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── school_policy.jpg │ │ │ │ │ ├── school_fellowship.jpg │ │ │ │ │ └── fellowship_application.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-16 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── concert_post.pdf │ │ ├── 3-17 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── google_form.pdf │ │ ├── 3-39 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── student_ids.png │ │ ├── 3-40 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── car_trading.png │ │ ├── 3-66 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── car_trading.xlsx │ │ ├── 3-67 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── car_trading.xlsx │ │ ├── 3-69 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── transcripts.pdf │ │ ├── 3-76 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── car_trading.png │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-86 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── transcripts.png │ │ ├── 3-87 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── transcripts.png │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-88 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── sales_report.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-89 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── sales_report.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-12 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── meeting_agenda.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-17 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── class_member.xlsx │ │ ├── 2-27 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── meeting_agenda.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-32 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── company_budget.jpg │ │ │ │ │ └── company_trading_records.jpg │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 2-35 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── survey_report.pdf │ │ │ │ │ └── financial_report.pdf │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 2.json │ │ ├── 2-45 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── daily_schedule.jpg │ │ │ │ └── business_travel.jpg │ │ ├── 3-19 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── school_policy.jpg │ │ │ │ └── fellowship_application.pdf │ │ ├── 3-20 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── school_policy.jpg │ │ │ │ └── fellowship_application.pdf │ │ ├── 3-21 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── school_policy.jpg │ │ │ │ └── fellowship_application.pdf │ │ ├── 3-32 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── car_inventory.pdf │ │ ├── 3-33 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── car_inventory.pdf │ │ ├── 3-34 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── car_inventory.pdf │ │ ├── 3-43 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── meeting_agenda.pdf │ │ ├── 3-44 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── daily_schedule.jpg │ │ ├── 3-53 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── class_member.xlsx │ │ ├── 3-56 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── class_member.xlsx │ │ ├── 3-57 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── class_member.xlsx │ │ ├── 3-58 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── shopping_list.xlsx │ │ ├── 3-71 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── meeting_agenda.pdf │ │ ├── 3-72 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── meeting_agenda.pdf │ │ ├── 3-78 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── company_budget.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-79 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── company_budget.jpg │ │ ├── 3-83 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── class_member.xlsx │ │ ├── 2-1 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── notification_image.jpg │ │ │ │ │ ├── meeting_notification.jpg │ │ │ │ │ ├── combined_notifications.jpg │ │ │ │ │ └── power_failure_notification.jpg │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ ├── 2.json │ │ │ │ ├── 3.json │ │ │ │ └── 0.json │ │ ├── 2-15 │ │ │ └── testbed │ │ │ │ ├── data │ │ │ │ └── meeting_agenda_2.xlsx │ │ │ │ └── calendar │ │ │ │ └── Alice.ics │ │ ├── 2-16 │ │ │ └── testbed │ │ │ │ ├── data │ │ │ │ └── meeting_agenda_2.xlsx │ │ │ │ └── calendar │ │ │ │ └── Alice.ics │ │ ├── 2-25 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── A12345homework.docx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-26 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── invitation_letter.pdf │ │ │ │ │ └── market_analysis.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-29 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── invitation_letter.pdf │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-34 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ ├── survey_responses.xlsx │ │ │ │ │ └── house_price_index.xlsx │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ └── 0.json │ │ ├── 2-43 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── company_revenues.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-2 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── financial_report.pdf │ │ ├── 3-35 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── students_scores.jpg │ │ ├── 3-52 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── meeting_agenda_2.xlsx │ │ ├── 3-70 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── A12345homework.docx │ │ ├── 3-73 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── invitation_letter.pdf │ │ ├── 3-74 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── invitation_letter.pdf │ │ ├── 3-77 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── house_price_index.jpg │ │ ├── 3-84 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── survey_responses.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-85 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── survey_responses.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-9 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── company_revenues.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 3-96 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── company_revenues.xlsx │ │ ├── 3-99 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── company_revenues.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 1-12 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── CS161_Class_Roster.xlsx │ │ │ └── subtasks │ │ │ │ ├── 2.json │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 1-23 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── CS161_Class_Roster.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 1.json │ │ ├── 2-20 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── CS161_Class_Roster.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-21 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── Bill_course_lists.xlsx │ │ ├── 2-30 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── homework_withname.docx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-100 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── company_revenues.xlsx │ │ ├── 3-4 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── financial_report_2.xlsx │ │ ├── 3-60 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── CS161_Class_Roster.xlsx │ │ ├── 3-64 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── house_price_index.xlsx │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-65 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── house_price_index.xlsx │ │ ├── 3-75 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── homework_withname.docx │ │ ├── 3-12 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── combined_notifications.jpg │ │ │ │ ├── meeting_notification.jpg │ │ │ │ └── power_failure_notification.jpg │ │ ├── 3-13 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── combined_notifications.jpg │ │ │ │ ├── meeting_notification.jpg │ │ │ │ └── power_failure_notification.jpg │ │ ├── 3-14 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ ├── combined_notifications.jpg │ │ │ │ ├── meeting_notification.jpg │ │ │ │ └── power_failure_notification.jpg │ │ ├── 3-6 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── team_training_schedule.xlsx │ │ ├── 3-7 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── UIC_Admitted_Students.xlsx │ │ ├── 2-13 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── team_training_schedule.xlsx │ │ ├── 3-41 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── company_trading_records.jpg │ │ ├── 3-42 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── company_trading_records.jpg │ │ ├── 3-45 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── team_training_schedule.xlsx │ │ ├── 3-48 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── team_training_schedule.xlsx │ │ ├── 3-49 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── team_training_schedule.xlsx │ │ ├── 3-80 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── company_trading_records.jpg │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 3-81 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── company_trading_records.jpg │ │ ├── 1-13 │ │ │ ├── testbed │ │ │ │ └── data │ │ │ │ │ └── Thomas_Robinson_Course_List.xlsx │ │ │ └── subtasks │ │ │ │ ├── 0.json │ │ │ │ └── 2.json │ │ ├── 3-11 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── business_travel_notification.jpg │ │ ├── 3-61 │ │ │ └── testbed │ │ │ │ └── data │ │ │ │ └── Thomas_Robinson_Course_List.xlsx │ │ ├── 2-41 │ │ │ ├── subtasks │ │ │ │ └── 0.json │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ └── Alice │ │ │ │ ├── work_overtime.eml │ │ │ │ └── body_test.eml │ │ ├── 1-2 │ │ │ └── subtasks │ │ │ │ ├── 3.json │ │ │ │ └── 4.json │ │ ├── 1-22 │ │ │ ├── subtasks │ │ │ │ ├── 1.json │ │ │ │ ├── 4.json │ │ │ │ ├── 2.json │ │ │ │ ├── 0.json │ │ │ │ └── 3.json │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ └── Bob │ │ │ │ ├── meeting.eml │ │ │ │ ├── gradescope.eml │ │ │ │ ├── scholarship approved.eml │ │ │ │ └── rental.eml │ │ ├── 3-90 │ │ │ └── testbed │ │ │ │ └── calendar │ │ │ │ └── Alice.ics │ │ ├── 1-20 │ │ │ ├── subtasks │ │ │ │ ├── 4.json │ │ │ │ ├── 2.json │ │ │ │ ├── 3.json │ │ │ │ └── 1.json │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ └── Bob │ │ │ │ ├── gradescope.eml │ │ │ │ ├── meeting.eml │ │ │ │ ├── scholarship approved.eml │ │ │ │ └── rental.eml │ │ ├── 1-21 │ │ │ ├── subtasks │ │ │ │ ├── 2.json │ │ │ │ ├── 1.json │ │ │ │ └── 0.json │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ └── Bob │ │ │ │ ├── gradescope.eml │ │ │ │ ├── meeting.eml │ │ │ │ ├── scholarship approved.eml │ │ │ │ └── rental.eml │ │ ├── 2-37 │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ └── 2.json │ │ ├── 2-46 │ │ │ ├── subtasks │ │ │ │ └── 0.json │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ └── Alice │ │ │ │ └── class_cancel.eml │ │ ├── 2-39 │ │ │ ├── testbed │ │ │ │ └── emails │ │ │ │ │ └── Bob │ │ │ │ │ ├── gradescope.eml │ │ │ │ │ ├── meeting.eml │ │ │ │ │ ├── scholarship approved.eml │ │ │ │ │ └── rental.eml │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ ├── 2-42 │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ ├── Bob │ │ │ │ ├── gradescope.eml │ │ │ │ ├── meeting.eml │ │ │ │ ├── scholarship approved.eml │ │ │ │ └── rental.eml │ │ │ │ └── Alice │ │ │ │ ├── mobile_bill.eml │ │ │ │ └── coupon.eml │ │ ├── 2-44 │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ └── Bob │ │ │ │ ├── gradescope.eml │ │ │ │ ├── meeting.eml │ │ │ │ ├── scholarship approved.eml │ │ │ │ └── rental.eml │ │ ├── 3-10 │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ └── Alice │ │ │ │ ├── gradescope.eml │ │ │ │ ├── mobile_bill.eml │ │ │ │ ├── meeting.eml │ │ │ │ ├── coupon.eml │ │ │ │ ├── scholarship.eml │ │ │ │ └── rental.eml │ │ ├── 3-95 │ │ │ └── testbed │ │ │ │ └── emails │ │ │ │ ├── Bob │ │ │ │ ├── gradescope.eml │ │ │ │ ├── meeting.eml │ │ │ │ ├── scholarship approved.eml │ │ │ │ └── rental.eml │ │ │ │ └── Alice │ │ │ │ ├── mobile_bill.eml │ │ │ │ └── coupon.eml │ │ ├── 1-1 │ │ │ └── subtasks │ │ │ │ ├── 1.json │ │ │ │ ├── 2.json │ │ │ │ ├── 0.json │ │ │ │ ├── 4.json │ │ │ │ └── 3.json │ │ ├── 2-40 │ │ │ ├── testbed │ │ │ │ └── calendar │ │ │ │ │ └── Tom.ics │ │ │ └── subtasks │ │ │ │ └── 0.json │ │ └── 1-16 │ │ │ └── testbed │ │ │ └── data │ │ │ └── random_paragraph.txt │ ├── configs │ │ └── context_opt │ │ │ ├── gpt-4.1_obs.yaml │ │ │ └── gpt-4.1_history.yaml │ ├── test_tasks.txt │ └── train_tasks.txt ├── smolagents │ ├── __init__.py │ ├── prompts │ │ └── context_opt │ │ │ └── system_prompt.jinja │ ├── configs │ │ └── context_opt │ │ │ ├── gpt-4.1_obs.yaml │ │ │ ├── llmlingua_obs.yaml │ │ │ ├── llmlingua_history.yaml │ │ │ ├── discard_history_keep5.yaml │ │ │ ├── gpt-4.1_history.yaml │ │ │ └── retrieval_history_keep5.yaml │ ├── data │ │ └── nq_multi_8 │ │ │ └── folds │ │ │ ├── train_history_tiny.txt │ │ │ └── train_obs_tiny.txt │ └── search │ │ └── download.py ├── __init__.py ├── prompt_optimizer │ ├── __init__.py │ └── common │ │ ├── aggregation.py │ │ └── __init__.py └── appworld │ ├── prompts │ ├── context_opt │ │ └── system_prompt.jinja │ └── prompts_v1.json │ ├── configs │ ├── context_opt │ │ ├── gpt-4.1_obs.yaml │ │ ├── gpt-4.1-mini_obs.yaml │ │ ├── gpt-4.1_history.yaml │ │ └── gpt-4.1-mini_history.yaml │ └── base_config.yaml │ └── data_copy │ └── datasets │ ├── train_obs_tiny.txt │ └── train_history_tiny.txt ├── configs └── private_config_dummy.yaml ├── assets └── concept.png ├── SUPPORT.md ├── SECURITY.md └── CODE_OF_CONDUCT.md /src/productive_agents/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/officebench/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configs/private_config_dummy.yaml: -------------------------------------------------------------------------------- 1 | openai_key: -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/system_app/switch_app.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/smolagents/__init__.py: -------------------------------------------------------------------------------- 1 | # Minimal Smolagents experiments package 2 | -------------------------------------------------------------------------------- /assets/concept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/assets/concept.png -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | # Marks experiments as a package for -m execution and imports 2 | -------------------------------------------------------------------------------- /experiments/prompt_optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | """Prompt optimizer experiment scripts package.""" 2 | -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/system_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "system: used when you want to switch between apps." -------------------------------------------------------------------------------- /src/productive_agents/agents/officebench/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .agent import OfficeBenchAgent 3 | from .config import OfficeBenchAgentConfig -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import OfficeBenchEnv 2 | from .config import OfficeBenchEnvConfig 3 | from . import apps -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/reference/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/reference/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-38/reference/sids.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-38/reference/sids.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/reference/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/reference/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/testbed/data/file2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-3/testbed/data/file2.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/testbed/data/file4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-3/testbed/data/file4.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/reference/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/reference/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-7/testbed/data/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-7/testbed/data/paper.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-1/testbed/data/task.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-1/testbed/data/task.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-36/reference/scores.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-36/reference/scores.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-37/reference/scores.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-37/reference/scores.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-10/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-10/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/reference/midterm1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/reference/midterm1.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/reference/midterm2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/reference/midterm2.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-19/testbed/data/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-19/testbed/data/paper.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/testbed/data/file1.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-3/testbed/data/file1.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/testbed/data/file3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-3/testbed/data/file3.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-4/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-4/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-4/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-4/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-5/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-5/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-5/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-5/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-6/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-6/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-6/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-6/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-7/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-7/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-9/testbed/data/salary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-9/testbed/data/salary.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-9/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-9/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-14/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-14/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-18/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-18/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-19/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-19/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-22/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-22/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-23/testbed/data/Invoice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-23/testbed/data/Invoice.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-24/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-24/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-33/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-33/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-38/testbed/data/party.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-38/testbed/data/party.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-6/testbed/data/homework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-6/testbed/data/homework.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-8/testbed/data/Bob_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-8/testbed/data/Bob_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-8/testbed/data/Tom_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-8/testbed/data/Tom_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-15/testbed/data/resume.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-15/testbed/data/resume.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-18/testbed/data/resume.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-18/testbed/data/resume.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-24/testbed/data/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-24/testbed/data/paper.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-26/testbed/data/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-26/testbed/data/paper.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-3/testbed/data/manual.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-3/testbed/data/manual.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-46/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-46/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-47/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-47/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-5/testbed/data/Invoice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-5/testbed/data/Invoice.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-54/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-54/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-55/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-55/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-59/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-59/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-62/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-62/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-62/testbed/data/target.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-62/testbed/data/target.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-63/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-63/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-68/testbed/data/Invoice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-68/testbed/data/Invoice.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-8/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-8/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-82/testbed/data/score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-82/testbed/data/score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-92/testbed/data/party.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-92/testbed/data/party.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/reference/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/reference/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-23/testbed/data/Invoice_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-23/testbed/data/Invoice_2.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-8/testbed/data/John_card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-8/testbed/data/John_card.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-22/testbed/data/homework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-22/testbed/data/homework.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-23/testbed/data/homework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-23/testbed/data/homework.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-25/testbed/data/homework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-25/testbed/data/homework.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-27/testbed/data/Bob_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-27/testbed/data/Bob_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-27/testbed/data/John_card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-27/testbed/data/John_card.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-27/testbed/data/Tom_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-27/testbed/data/Tom_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-28/testbed/data/Bob_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-28/testbed/data/Bob_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-28/testbed/data/John_card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-28/testbed/data/John_card.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-28/testbed/data/Tom_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-28/testbed/data/Tom_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-29/testbed/data/Bob_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-29/testbed/data/Bob_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-29/testbed/data/John_card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-29/testbed/data/John_card.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-29/testbed/data/Tom_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-29/testbed/data/Tom_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-30/testbed/data/Bob_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-30/testbed/data/Bob_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-30/testbed/data/John_card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-30/testbed/data/John_card.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-30/testbed/data/Tom_card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-30/testbed/data/Tom_card.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-5/testbed/data/Invoice_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-5/testbed/data/Invoice_2.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-68/testbed/data/Invoice_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-68/testbed/data/Invoice_2.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/reference/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/reference/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/reference/sorted_score.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/reference/sorted_score.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/reference/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/reference/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/reference/score_swap_row.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/reference/score_swap_row.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/reference/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/reference/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-15/testbed/data/House_creak.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-15/testbed/data/House_creak.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-18/testbed/data/concert_post.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-18/testbed/data/concert_post.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-6/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-6/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-7/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/reference/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/reference/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/reference/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/reference/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-9/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-9/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-10/testbed/data/students_ids.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-10/testbed/data/students_ids.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-11/testbed/data/car_trading.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-11/testbed/data/car_trading.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-2/testbed/data/concert_post.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-2/testbed/data/concert_post.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-22/testbed/data/car_trading.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-22/testbed/data/car_trading.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-3/testbed/data/google_form.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-3/testbed/data/google_form.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/testbed/data/car_trading.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-31/testbed/data/car_trading.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-36/testbed/data/sales_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-36/testbed/data/sales_report.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-36/testbed/data/transcripts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-36/testbed/data/transcripts.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-5/testbed/data/school_policy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-5/testbed/data/school_policy.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-16/testbed/data/concert_post.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-16/testbed/data/concert_post.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-17/testbed/data/google_form.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-17/testbed/data/google_form.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-38/testbed/data/students_ids.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-38/testbed/data/students_ids.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-39/testbed/data/student_ids.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-39/testbed/data/student_ids.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-40/testbed/data/car_trading.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-40/testbed/data/car_trading.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-66/testbed/data/car_trading.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-66/testbed/data/car_trading.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-67/testbed/data/car_trading.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-67/testbed/data/car_trading.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-69/testbed/data/transcripts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-69/testbed/data/transcripts.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-76/testbed/data/car_trading.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-76/testbed/data/car_trading.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-86/testbed/data/transcripts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-86/testbed/data/transcripts.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-87/testbed/data/transcripts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-87/testbed/data/transcripts.png -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-88/testbed/data/sales_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-88/testbed/data/sales_report.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-89/testbed/data/sales_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-89/testbed/data/sales_report.pdf -------------------------------------------------------------------------------- /src/productive_agents/env/appworld/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import AppWorldEnv 2 | from .config import AppWorldEnvConfig 3 | 4 | __all__ = ['AppWorldEnv', 'AppWorldEnvConfig'] 5 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-10/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-10/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-4/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-4/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-4/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-4/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-5/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-5/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-5/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-5/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-6/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-6/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-6/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-6/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-7/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-7/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-8/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-9/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-9/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-9/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-9/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-12/testbed/data/meeting_agenda.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-12/testbed/data/meeting_agenda.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-17/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-17/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-18/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-18/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-18/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-18/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-27/testbed/data/meeting_agenda.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-27/testbed/data/meeting_agenda.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-32/testbed/data/company_budget.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-32/testbed/data/company_budget.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-33/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-33/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-35/testbed/data/survey_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-35/testbed/data/survey_report.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-38/testbed/data/notification.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-38/testbed/data/notification.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-45/testbed/data/daily_schedule.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-45/testbed/data/daily_schedule.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-19/testbed/data/school_policy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-19/testbed/data/school_policy.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-20/testbed/data/school_policy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-20/testbed/data/school_policy.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-21/testbed/data/school_policy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-21/testbed/data/school_policy.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-32/testbed/data/car_inventory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-32/testbed/data/car_inventory.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-33/testbed/data/car_inventory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-33/testbed/data/car_inventory.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-34/testbed/data/car_inventory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-34/testbed/data/car_inventory.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-43/testbed/data/meeting_agenda.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-43/testbed/data/meeting_agenda.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-44/testbed/data/daily_schedule.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-44/testbed/data/daily_schedule.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-53/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-53/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-56/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-56/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-57/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-57/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-58/testbed/data/shopping_list.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-58/testbed/data/shopping_list.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-71/testbed/data/meeting_agenda.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-71/testbed/data/meeting_agenda.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-72/testbed/data/meeting_agenda.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-72/testbed/data/meeting_agenda.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-78/testbed/data/company_budget.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-78/testbed/data/company_budget.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-79/testbed/data/company_budget.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-79/testbed/data/company_budget.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-83/testbed/data/class_member.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-83/testbed/data/class_member.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-10/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-11/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/reference/score_swap_column.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/reference/score_swap_column.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/testbed/data/company_budget.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-14/testbed/data/company_budget.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-15/testbed/data/project_report.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-15/testbed/data/project_report.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-15/testbed/data/sample_syllabus.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-15/testbed/data/sample_syllabus.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/testbed/data/notification_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-1/testbed/data/notification_image.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-10/testbed/data/students_scores.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-10/testbed/data/students_scores.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-15/testbed/data/meeting_agenda_2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-15/testbed/data/meeting_agenda_2.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-16/testbed/data/meeting_agenda_2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-16/testbed/data/meeting_agenda_2.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-25/testbed/data/A12345homework.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-25/testbed/data/A12345homework.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-26/testbed/data/invitation_letter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-26/testbed/data/invitation_letter.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-26/testbed/data/market_analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-26/testbed/data/market_analysis.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-29/testbed/data/invitation_letter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-29/testbed/data/invitation_letter.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/testbed/data/house_price_index.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-31/testbed/data/house_price_index.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/testbed/data/students_scores.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-31/testbed/data/students_scores.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-34/testbed/data/survey_responses.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-34/testbed/data/survey_responses.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-35/testbed/data/financial_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-35/testbed/data/financial_report.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-43/testbed/data/company_revenues.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-43/testbed/data/company_revenues.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-45/testbed/data/business_travel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-45/testbed/data/business_travel.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-5/testbed/data/school_fellowship.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-5/testbed/data/school_fellowship.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-2/testbed/data/financial_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-2/testbed/data/financial_report.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-35/testbed/data/students_scores.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-35/testbed/data/students_scores.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-36/testbed/data/students_scores.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-36/testbed/data/students_scores.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-37/testbed/data/students_scores.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-37/testbed/data/students_scores.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-52/testbed/data/meeting_agenda_2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-52/testbed/data/meeting_agenda_2.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-70/testbed/data/A12345homework.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-70/testbed/data/A12345homework.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-73/testbed/data/invitation_letter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-73/testbed/data/invitation_letter.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-74/testbed/data/invitation_letter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-74/testbed/data/invitation_letter.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-77/testbed/data/house_price_index.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-77/testbed/data/house_price_index.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-8/testbed/data/company_revenues.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-8/testbed/data/company_revenues.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-84/testbed/data/survey_responses.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-84/testbed/data/survey_responses.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-85/testbed/data/survey_responses.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-85/testbed/data/survey_responses.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-9/testbed/data/company_revenues.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-9/testbed/data/company_revenues.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-96/testbed/data/company_revenues.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-96/testbed/data/company_revenues.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-99/testbed/data/company_revenues.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-99/testbed/data/company_revenues.xlsx -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/calculator_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "calculator: an app to evaluate mathematical expressions." 2 | 3 | from . import calculator_calculate 4 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-12/testbed/data/CS161_Class_Roster.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-12/testbed/data/CS161_Class_Roster.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-15/testbed/data/academic_integrity.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-15/testbed/data/academic_integrity.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-19/testbed/data/contract_agreement.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-19/testbed/data/contract_agreement.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-23/testbed/data/CS161_Class_Roster.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-23/testbed/data/CS161_Class_Roster.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/testbed/data/meeting_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-1/testbed/data/meeting_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-20/testbed/data/CS161_Class_Roster.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-20/testbed/data/CS161_Class_Roster.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-21/testbed/data/Bill_course_lists.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-21/testbed/data/Bill_course_lists.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-22/testbed/data/house_price_index.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-22/testbed/data/house_price_index.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-30/testbed/data/homework_withname.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-30/testbed/data/homework_withname.docx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-34/testbed/data/house_price_index.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-34/testbed/data/house_price_index.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-100/testbed/data/company_revenues.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-100/testbed/data/company_revenues.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-4/testbed/data/financial_report_2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-4/testbed/data/financial_report_2.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-60/testbed/data/CS161_Class_Roster.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-60/testbed/data/CS161_Class_Roster.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-64/testbed/data/house_price_index.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-64/testbed/data/house_price_index.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-65/testbed/data/house_price_index.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-65/testbed/data/house_price_index.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-75/testbed/data/homework_withname.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-75/testbed/data/homework_withname.docx -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/ocr_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "ocr: an app to recognize text from images." 2 | 3 | from . import ocr_recognize_file 4 | from .ocr_info import OCRInfo -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/python_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "python: an app to run the python code." 2 | 3 | from . import python_run_code 4 | from .python_info import PythonInfo -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/scratchpad_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "scratchpad: an app to write and save notes, plans, or any text content." 2 | 3 | from . import scratchpad 4 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/testbed/data/combined_notifications.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-1/testbed/data/combined_notifications.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-5/testbed/data/fellowship_application.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-5/testbed/data/fellowship_application.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-12/testbed/data/combined_notifications.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-12/testbed/data/combined_notifications.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-12/testbed/data/meeting_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-12/testbed/data/meeting_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-13/testbed/data/combined_notifications.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-13/testbed/data/combined_notifications.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-13/testbed/data/meeting_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-13/testbed/data/meeting_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-14/testbed/data/combined_notifications.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-14/testbed/data/combined_notifications.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-14/testbed/data/meeting_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-14/testbed/data/meeting_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-19/testbed/data/fellowship_application.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-19/testbed/data/fellowship_application.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-20/testbed/data/fellowship_application.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-20/testbed/data/fellowship_application.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-21/testbed/data/fellowship_application.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-21/testbed/data/fellowship_application.pdf -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-6/testbed/data/team_training_schedule.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-6/testbed/data/team_training_schedule.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-7/testbed/data/UIC_Admitted_Students.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-7/testbed/data/UIC_Admitted_Students.xlsx -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/shell_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "shell: an app to run shell commands in the system." 2 | 3 | from . import command 4 | 5 | from .shell_info import ShellInfo -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-11/testbed/data/company_trading_records.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-11/testbed/data/company_trading_records.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-13/testbed/data/team_training_schedule.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-13/testbed/data/team_training_schedule.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-32/testbed/data/company_trading_records.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-32/testbed/data/company_trading_records.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-41/testbed/data/company_trading_records.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-41/testbed/data/company_trading_records.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-42/testbed/data/company_trading_records.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-42/testbed/data/company_trading_records.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-45/testbed/data/team_training_schedule.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-45/testbed/data/team_training_schedule.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-48/testbed/data/team_training_schedule.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-48/testbed/data/team_training_schedule.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-49/testbed/data/team_training_schedule.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-49/testbed/data/team_training_schedule.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-80/testbed/data/company_trading_records.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-80/testbed/data/company_trading_records.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-81/testbed/data/company_trading_records.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-81/testbed/data/company_trading_records.jpg -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/llm_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "llm: an app to interact with the large language model to answer questions, generate text, etc." 2 | 3 | from . import llm_query -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/testbed/data/power_failure_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/2-1/testbed/data/power_failure_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-12/testbed/data/power_failure_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-12/testbed/data/power_failure_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-13/testbed/data/power_failure_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-13/testbed/data/power_failure_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-14/testbed/data/power_failure_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-14/testbed/data/power_failure_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-13/testbed/data/Thomas_Robinson_Course_List.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/1-13/testbed/data/Thomas_Robinson_Course_List.xlsx -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-11/testbed/data/business_travel_notification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-11/testbed/data/business_travel_notification.jpg -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-61/testbed/data/Thomas_Robinson_Course_List.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/acon/main/experiments/officebench/tasks/3-61/testbed/data/Thomas_Robinson_Course_List.xlsx -------------------------------------------------------------------------------- /experiments/appworld/prompts/context_opt/system_prompt.jinja: -------------------------------------------------------------------------------- 1 | You are an agent tasked with extracting and refining a concise and optimized version of the context based on the user instruction and other provided information. -------------------------------------------------------------------------------- /experiments/smolagents/prompts/context_opt/system_prompt.jinja: -------------------------------------------------------------------------------- 1 | You are an agent tasked with extracting and refining a concise and optimized version of the context based on the user instruction and other provided information. -------------------------------------------------------------------------------- /experiments/appworld/configs/context_opt/gpt-4.1_obs.yaml: -------------------------------------------------------------------------------- 1 | type: "obs" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_user: "prompt_user" 7 | obs_summarization_threshold: 256 -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-15/testbed/data/reference.txt: -------------------------------------------------------------------------------- 1 | Martin Arjovsky, Amar Shah, and Yoshua Bengio. “Unitary Evolution Recurrent Neural Networks”. In: The International Conference on Machine Learning (ICML). 2016, pp. 1120–1128. -------------------------------------------------------------------------------- /experiments/officebench/configs/context_opt/gpt-4.1_obs.yaml: -------------------------------------------------------------------------------- 1 | type: "obs" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_user: "prompt_user" 7 | obs_summarization_threshold: 256 -------------------------------------------------------------------------------- /experiments/smolagents/configs/context_opt/gpt-4.1_obs.yaml: -------------------------------------------------------------------------------- 1 | type: "obs" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_user: "prompt_user" 7 | obs_summarization_threshold: 400 -------------------------------------------------------------------------------- /experiments/appworld/configs/context_opt/gpt-4.1-mini_obs.yaml: -------------------------------------------------------------------------------- 1 | type: "obs" 2 | model: "gpt-4.1-mini" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_user: "prompt_user" 7 | obs_summarization_threshold: 256 -------------------------------------------------------------------------------- /experiments/smolagents/configs/context_opt/llmlingua_obs.yaml: -------------------------------------------------------------------------------- 1 | type: "obs" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_user: "prompt_user" 7 | obs_summarization_threshold: 400 8 | use_llmlingua: True -------------------------------------------------------------------------------- /src/productive_agents/env/smolagents/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import SmolagentsEnv 2 | from .config import SmolagentsEnvConfig, create_smolagents_config 3 | 4 | __all__ = [ 5 | "SmolagentsEnv", 6 | "SmolagentsEnvConfig", 7 | "create_smolagents_config", 8 | ] 9 | -------------------------------------------------------------------------------- /experiments/officebench/configs/context_opt/gpt-4.1_history.yaml: -------------------------------------------------------------------------------- 1 | type: "history" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_history_user: "prompt_history" 7 | history_summarization_threshold: 4096 8 | preserve_last_k_turns: 1 -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/email_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "email: an app to manage emails, such as sending and reading emails." 2 | 3 | from . import email_send_email 4 | from . import email_list_emails 5 | from . import email_read_email 6 | from .email_info import EmailInfo -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/calendar_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "calendar: an app to manage daily events on calendar." 2 | 3 | from . import calendar_create_event 4 | from . import calendar_delete_event 5 | from . import calendar_list_events 6 | from .calendar_info import CalendarInfo -------------------------------------------------------------------------------- /experiments/smolagents/configs/context_opt/llmlingua_history.yaml: -------------------------------------------------------------------------------- 1 | type: "history" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_history_user: "prompt_history" 7 | history_summarization_threshold: 2048 8 | preserve_last_k_turns: 1 9 | use_llmlingua: True -------------------------------------------------------------------------------- /experiments/appworld/data_copy/datasets/train_obs_tiny.txt: -------------------------------------------------------------------------------- 1 | 229360a_1 2 | 229360a_3 3 | 22cc237_1 4 | 22cc237_3 5 | 27e1026_2 6 | 287e338_3 7 | 302c169_1 8 | 34d9492_1 9 | 771d8fc_1 10 | 771d8fc_2 11 | 7d7fbf6_2 12 | 82e2fac_1 13 | 82e2fac_2 14 | ccb4494_1 15 | ce359b5_3 16 | d0b1f43_2 17 | e3d6c94_1 18 | e85d92a_1 -------------------------------------------------------------------------------- /experiments/appworld/configs/context_opt/gpt-4.1_history.yaml: -------------------------------------------------------------------------------- 1 | type: "history" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_history_user: "prompt_history_v2" 7 | history_summarization_threshold: 4096 8 | preserve_last_k_turns: 1 9 | history_summary_rule: "reset" -------------------------------------------------------------------------------- /experiments/smolagents/configs/context_opt/discard_history_keep5.yaml: -------------------------------------------------------------------------------- 1 | type: "history" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_history_user: "prompt_history" 7 | history_summarization_threshold: 4096 8 | baseline_strategy: "discard" 9 | preserve_last_k_turns: 5 -------------------------------------------------------------------------------- /experiments/smolagents/configs/context_opt/gpt-4.1_history.yaml: -------------------------------------------------------------------------------- 1 | type: "history" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_history_user: "prompt_history_v2" 7 | history_summarization_threshold: 2048 8 | preserve_last_k_turns: 1 9 | history_summary_rule: "reset" -------------------------------------------------------------------------------- /experiments/appworld/configs/context_opt/gpt-4.1-mini_history.yaml: -------------------------------------------------------------------------------- 1 | type: "history" 2 | model: "gpt-4.1-mini" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_history_user: "prompt_history_v2" 7 | history_summarization_threshold: 4096 8 | preserve_last_k_turns: 1 9 | history_summary_rule: "reset" -------------------------------------------------------------------------------- /experiments/appworld/prompts/prompts_v1.json: -------------------------------------------------------------------------------- 1 | { 2 | "system_message": "You are an AI assistant that writes Python code to complete tasks. You should respond with clear, executable Python code to interact with APIs and solve the given task completely autonomously.", 3 | "main_prompt_template": "./prompts/prompt_v1.jinja" 4 | } 5 | -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/pdf_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "pdf: an app to manipulate pdf files, including format conversion and file reading." 2 | 3 | # from . import image_convert_to_pdf 4 | from . import pdf_convert_to_image 5 | from . import pdf_convert_to_word 6 | from . import pdf_read_file 7 | from .pdf_info import PDFInfo -------------------------------------------------------------------------------- /src/productive_agents/agents/appworld/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent import AppWorldAgent, create_appworld_agent 2 | from .config import AppWorldAgentConfig, create_appworld_agent_config 3 | 4 | __all__ = [ 5 | 'AppWorldAgent', 6 | 'create_appworld_agent', 7 | 'AppWorldAgentConfig', 8 | 'create_appworld_agent_config' 9 | ] 10 | -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/system_app/system_info.py: -------------------------------------------------------------------------------- 1 | from .copy import DEMO as copy_demo 2 | from paste import DEMO as paste_demo 3 | 4 | class SystemInfo: 5 | def __init__(self): 6 | self.name = "shell" 7 | self.info = { 8 | "copy": copy_demo, 9 | "paste": paste_demo, 10 | } -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/word_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "word: an app to manipulate word files, including reading, writing, converting, etc." 2 | 3 | from . import word_convert_to_pdf 4 | from . import word_create_new_file 5 | from . import word_read_file 6 | from . import word_write_to_file 7 | from .word_info import WordInfo 8 | -------------------------------------------------------------------------------- /experiments/smolagents/configs/context_opt/retrieval_history_keep5.yaml: -------------------------------------------------------------------------------- 1 | type: "history" 2 | model: "gpt-4.1" 3 | compressor_type: "full" 4 | prompts: 5 | prompt_system: "system_prompt" 6 | prompt_history_user: "prompt_history" 7 | history_summarization_threshold: 2048 8 | baseline_strategy: "retrieve" 9 | preserve_last_k_turns: 1 10 | retrieve_turns: 4 -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "delete file3.xlsx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_not_exist", 10 | "args": { 11 | "file": "./data/file3.xlsx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "change the docx file name to test_file1.docx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "./data/test_file1.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Rename the PDF file to match the name of the DOCX file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "./data/file1.pdf" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-3/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "read a pdf file google_form.pdf, make an image post welcome.jpg", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/welcome.jpg" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/prompt_optimizer/common/aggregation.py: -------------------------------------------------------------------------------- 1 | """Small list / aggregation helpers.""" 2 | from __future__ import annotations 3 | from typing import List 4 | 5 | 6 | def dedupe_list(values: List[str]) -> List[str]: 7 | seen = set() 8 | out: List[str] = [] 9 | for v in values: 10 | if v and v not in seen: 11 | seen.add(v) 12 | out.append(v) 13 | return out 14 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-36/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Read grade in transcripts and generate a grade report report.docx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "./data/report.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-19/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract paper abstract and save as an image file called 'abstract.png'", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/abstract.png" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "summarize content from meeting notification image and save in meeting.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/meeting.pdf" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-18/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract concert post from pdf file and save as image named 'concert_post.jpg'", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/concert_post.jpg" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-19/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze Excel data of students' grade and generate a teaching report in teaching.docx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/teaching.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-18/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract concert post and summarize it in a new pdf file named concert_invitation.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/concert_invitation.pdf" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-12/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Scan handwritten notes from a meeting agenda, convert them to text, and add them to my calendar", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "calendar/Alice.ics" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-7/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Extract key points from PDF research papers, summarize them in paper.docx, and save it for reference", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/paper.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-25/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Convert student homework word documents to PDF and rename them into same name with .pdf suffix", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/A12345homework.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-41/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "read work overtime email, save as recipient's calendar event using their names as file names", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "calendar/Bob.ics" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-22/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Generate a word report based on midterm1 and midterm2 scores, the filename is 'midterm_report.docx'.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/midterm_report.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-30/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Extract the student ID from docx then rename this docx file to {ID}.docx where the ID is the student ID.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "./data/A12345678.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /src/productive_agents/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from contextlib import contextmanager 4 | 5 | @contextmanager 6 | def all_seed(seed): 7 | random_state = random.getstate() 8 | np_random_state = np.random.get_state() 9 | 10 | try: 11 | random.seed(seed) 12 | np.random.seed(seed) 13 | yield 14 | finally: 15 | random.setstate(random_state) 16 | np.random.set_state(np_random_state) -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "summarize content from combined notification image into one notification pdf file named two_notification.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/two_notification.pdf" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-18/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Write a statement in docx file to the student with the highest score in the class, use the student's name as filename", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/Rachel.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-2/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Who gets up earlier? Bob or Tom", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "Tom" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-33/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Which class has the least student?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "4" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-9/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "read company revenues, describe revenues in a report. I want the report converted into an image named report.jpg", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/report.jpg" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/prompt_optimizer/common/__init__.py: -------------------------------------------------------------------------------- 1 | """Common utilities package for prompt optimizer experiments. 2 | 3 | Direct imports are intentionally not re-exported to avoid issues when scripts 4 | are executed as standalone modules (python path/namespace variability). 5 | Import needed helpers explicitly: 6 | 7 | from experiments.prompt_optimizer.common.paths import infer_paths_appworld 8 | 9 | """ 10 | 11 | __all__ = [ 12 | # kept empty for explicitness 13 | ] -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "who sent the most email to Bob?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "gradescope" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/test_tasks.txt: -------------------------------------------------------------------------------- 1 | 1-13 2 | 1-14 3 | 1-15 4 | 1-20 5 | 1-23 6 | 1-3 7 | 1-4 8 | 1-5 9 | 1-8 10 | 1-9 11 | 2-14 12 | 2-15 13 | 2-16 14 | 2-17 15 | 2-22 16 | 2-24 17 | 2-30 18 | 2-37 19 | 2-39 20 | 2-41 21 | 2-42 22 | 2-44 23 | 2-48 24 | 3-1 25 | 3-10 26 | 3-46 27 | 3-49 28 | 3-50 29 | 3-51 30 | 3-53 31 | 3-55 32 | 3-56 33 | 3-57 34 | 3-59 35 | 3-6 36 | 3-60 37 | 3-61 38 | 3-65 39 | 3-66 40 | 3-7 41 | 3-75 42 | 3-8 43 | 3-90 44 | 3-94 45 | 3-95 46 | 3-97 47 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-90/testbed/calendar/Alice.ics: -------------------------------------------------------------------------------- 1 | BEGIN:VCALENDAR 2 | VERSION:2.0 3 | BEGIN:VEVENT 4 | SUMMARY:meeting 5 | DESCRIPTION:weekly meeting 6 | DTSTART;TZID=America/Los_Angeles:20230120T100000 7 | DTEND;TZID=America/Los_Angeles:20230120T120000 8 | LOCATION:Event Location 9 | SEQUENCE:0 10 | STATUS:CONFIRMED 11 | TRANSP:OPAQUE 12 | BEGIN:VALARM 13 | TRIGGER:-PT15M 14 | DESCRIPTION:Reminder 15 | ACTION:DISPLAY 16 | END:VALARM 17 | END:VEVENT 18 | END:VCALENDAR 19 | -------------------------------------------------------------------------------- /experiments/officebench/train_tasks.txt: -------------------------------------------------------------------------------- 1 | 1-1 2 | 1-10 3 | 1-11 4 | 1-12 5 | 1-16 6 | 1-2 7 | 1-21 8 | 1-22 9 | 1-6 10 | 1-7 11 | 2-13 12 | 2-18 13 | 2-19 14 | 2-20 15 | 2-21 16 | 2-25 17 | 2-33 18 | 2-34 19 | 2-40 20 | 2-43 21 | 2-46 22 | 2-49 23 | 2-50 24 | 3-100 25 | 3-3 26 | 3-4 27 | 3-45 28 | 3-47 29 | 3-48 30 | 3-52 31 | 3-54 32 | 3-58 33 | 3-63 34 | 3-64 35 | 3-67 36 | 3-70 37 | 3-82 38 | 3-83 39 | 3-84 40 | 3-85 41 | 3-9 42 | 3-91 43 | 3-93 44 | 3-96 45 | 3-98 46 | 3-99 47 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-2/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "who has more commitments on 5/1/2024? Tom or Bob", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "Tom" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2022-11-01", 4 | "weekday": "Tuesday", 5 | "time": "10:00 AM", 6 | "task": "Check my emails, what is my score of project2?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "93" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-22/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "give some instruction and prediction in word file based on house_price_index.xlsx, and save the word file in the same filename", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/house_price_index.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2024-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "In which season does Bob receive most emails?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "spring" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is median score for student scores in image?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "272" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-21/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2024-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "does Bob receive emails in last month? Answer yes or no.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "yes" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is the maximum prices of the cars sold in March?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "20000" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-36/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what the maximum sales revenue entry from sales report", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "78002" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-63/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Generate a file report.docx based on midterm1 and midterm2 scores, save in folder named report", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/report/report.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-88/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze relationship between revenues and sales unit, regions, save as an image post named analyze.jpg", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/analyze.jpg" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2022-11-01", 4 | "weekday": "Tuesday", 5 | "time": "10:00 AM", 6 | "task": "Check the emails, and let me know if my rent is due. Yes or no?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "yes" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-35/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is 2024 total revenue from annual financial report?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "36455" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-37/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "for calendar data, which event has longest total time?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "Class" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-46/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract class canceling notification from email, save it into a doc file and convert the doc file into a pdf named class_canceling.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/class_canceling.pdf" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is the subject of the email Alice sent to Bob?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "scheduled meeting" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is the maximum prices of the cars sold over all time?", 7 | "evaluation": [ 8 | 9 | { 10 | "function": "evaluate_contain", 11 | "args": { 12 | "file": "data/answer.txt", 13 | "doc_type": "txt", 14 | "keywords": [ 15 | "120000" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-32/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Does the reimbursement amount exceed the company budget? Yes or No?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "Yes" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-34/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Check satisfying score in the survey. What age has highest score.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "26" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-22/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Scan homework and save as homework.docx for editing, save homework file into folder named homework", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/homework/homework.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-85/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze relationship between sleeping hours and age, weight, gender from survey result, save report as report.jpg", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/report.jpg" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-23/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is total hw score of Cixarim? Add his hw1, hw2, and hw3 together.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "153" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-9/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "read company revenues, how many years of revenues are higher than 4000000$, save results in an image revenues.jpg", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/revenues.jpg" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-21/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is the subject of the earliest email that send to Bob?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "Reminder Rent due" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-23/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Check the total due in Invoice_2.pdf and save it in costs.xlsx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/costs.xlsx", 12 | "doc_type": "xlsx", 13 | "keywords": [ 14 | "1389.99" 15 | ] 16 | 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-84/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze relationship between sleeping hours and age, weight, gender from survey result, convert report to report.docx file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/report.docx" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "find the lowest score and highest score of midterm 1, what is their difference?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "40" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is the item that is bought the fewest amount in shopping list excel file?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "apple" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "swap the lowest and highest incoming item in salary excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/salary.xlsx", 13 | "expected_file": "../../../../reference/salary.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "delete all scores and the score-related headers in score excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/score.xlsx", 13 | "expected_file": "../../../../reference/score.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "delete everything in company budget excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/company_budget.xlsx", 13 | "expected_file": "../../../../reference/company_budget.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-5/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Does the submitted fellowship application PDF material satisfie the school policy?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "yes" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "sort items in budget descently in budget excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/company_budget.xlsx", 13 | "expected_file": "../../../../reference/company_budget.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Swap row of Alice and row of Bob in score excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/score.xlsx", 13 | "expected_file": "../../../../reference/score_swap_row.xlsx" 14 | } 15 | } 16 | ] 17 | 18 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "swap column of midterm1 and midterm2 in score excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/score.xlsx", 13 | "expected_file": "../../../../reference/score_swap_column.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2022-11-01", 4 | "weekday": "Tuesday", 5 | "time": "10:00 AM", 6 | "task": "Check the emails, and let me know if my scholarship application is approved. Yes or no?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "yes" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "delete all amounts and amounts-related header in salary excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/salary.xlsx", 13 | "expected_file": "../../../../reference/salary.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is median house price index based on house price index image? answer the question!", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "91" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-36/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze relationship between revenues and sales unit, regions. Save the results in relationship.txt in the folder /testbed/data/.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "./data/relationship.txt" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/exp_config.yaml: -------------------------------------------------------------------------------- 1 | experiment: 2 | exp_id: "debug-code-tool_az-gpt-4o" 3 | model_id: "az-gpt-4o" 4 | evaluation: "online" 5 | memory: false 6 | use_thinking_tokens: true 7 | use_scratchpad: false 8 | env: 9 | max_iter: 50 10 | model: 11 | id: "az-gpt-4o" 12 | name: "azure/gpt-4o" 13 | ocr: "pytesseract" 14 | memory: 15 | mode: 16 | docker: 17 | docker_name: "officebench" 18 | dockerfile_path: "./docker/Dockerfile" 19 | prompt_file: "configs/prompts_v2.json" -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/shell_app/shell_info.py: -------------------------------------------------------------------------------- 1 | from .command import DEMO as command_demo 2 | 3 | class ShellInfo: 4 | def __init__(self): 5 | self.name = "shell" 6 | self.info = { 7 | "shell command": command_demo, 8 | } 9 | 10 | def get_instruction(self) -> str: 11 | instructions = [f"Command to perform function: {key}:\n{demo}" for key, demo in self.info.items()] 12 | return f"## How to use the {self.name} app:\n\n" + "\n\n".join(instructions) -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-12/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is Xihozun's HW2 score?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "54" 15 | ] 16 | } 17 | } 18 | ] 19 | 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/testbed/emails/Bob/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-21/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is the subject of the latest email that send to Bob?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "application", 15 | "approved" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-21/testbed/emails/Bob/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-24/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Extract the score and name then calculate and write the average score in excel score.xlsx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/score.xlsx", 12 | "doc_type": "xlsx", 13 | "keywords": [ 14 | "79" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-39/testbed/emails/Bob/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-42/testbed/emails/Bob/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-44/testbed/emails/Bob/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-10/testbed/emails/Alice/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-95/testbed/emails/Bob/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "swap the highest and second highest item in budget excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/company_budget.xlsx", 13 | "expected_file": "../../../../reference/company_budget.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-23/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "how many students in CS161 is freshman?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "4" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-3/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "duplicate the excel file and name temp.xlsx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "./data/temp.xlsx" 12 | } 13 | }, 14 | { 15 | "function": "evaluate_file_exist", 16 | "args": { 17 | "file": "./data/file3.xlsx" 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-34/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze housing price and print the lowest price in lowest.txt in the /testbed/data/ folder", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/lowest.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "16.91" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-42/testbed/emails/Alice/mobile_bill.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5945798227130114692==" 2 | MIME-Version: 1.0 3 | From: 10086@gmail.com 4 | To: Alice@gmail.com 5 | Subject: mobile bill 6 | 7 | --===============5945798227130114692== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | please pay this month's bill 28.90$. 13 | 14 | Sent at: 2024-04-22 10:00:00 15 | --===============5945798227130114692==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-10/testbed/emails/Alice/mobile_bill.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5945798227130114692==" 2 | MIME-Version: 1.0 3 | From: 10086@gmail.com 4 | To: Alice@gmail.com 5 | Subject: mobile bill 6 | 7 | --===============5945798227130114692== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | please pay this month's bill 28.90$. 13 | 14 | Sent at: 2024-04-22 10:00:00 15 | --===============5945798227130114692==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-89/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze relationship between revenues and sales unit, regions, save your results in the analysis.txt and put it in a folder named report", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/report/analysis.txt" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-95/testbed/emails/Alice/mobile_bill.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5945798227130114692==" 2 | MIME-Version: 1.0 3 | From: 10086@gmail.com 4 | To: Alice@gmail.com 5 | Subject: mobile bill 6 | 7 | --===============5945798227130114692== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | please pay this month's bill 28.90$. 13 | 14 | Sent at: 2024-04-22 10:00:00 15 | --===============5945798227130114692==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-1/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "remind Bob that he has a dinner at 5/18/2024 7:00 p.m", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./calendar/Bob.ics", 13 | "keywords": [ 14 | "dinner", 15 | "DTSTART:20240518T190000" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-12/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is total number of students in class CS161, and how many of them are senior?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "9", 15 | "2" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-14/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "swap the fewest and the most shopped item in shopping list excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/shopping_list.xlsx", 13 | "expected_file": "../../../../reference/shopping_list.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/testbed/emails/Bob/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-21/testbed/emails/Bob/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/testbed/emails/Bob/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-31/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is the average score and variance for student scores shown in the image", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "264", 15 | "4004" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-39/testbed/emails/Bob/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-42/testbed/emails/Bob/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-44/testbed/emails/Bob/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-46/testbed/emails/Alice/class_cancel.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============2742940021967139810==" 2 | MIME-Version: 1.0 3 | From: samueli@gmail.com 4 | To: Alice@gmail.com 5 | Subject: class cancel 6 | 7 | --===============2742940021967139810== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | class cancel today due to protesting. 13 | 14 | Sent at: 2024-05-02 08:00:00 15 | --===============2742940021967139810==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-6/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Scan PDF homework to homework.docx for editing", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/homework.docx", 12 | "doc_type": "doc", 13 | "keywords": [ 14 | "CSE", 15 | "240A", 16 | "Homework" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-10/testbed/emails/Alice/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-95/testbed/emails/Bob/meeting.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0528119580747089718==" 2 | MIME-Version: 1.0 3 | From: Alice@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: scheduled meeting 6 | 7 | --===============0528119580747089718== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Can we have a zoom meeting later today? 13 | 14 | Sent at: 2024-03-23 10:00:00 15 | --===============0528119580747089718==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "add all incoming in the salery excel file. This is the annual income. So what is the monthly income?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "35000" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "delete all amounts and amounts-related header in shopping list excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/shopping_list.xlsx", 13 | "expected_file": "../../../../reference/shopping_list.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-42/testbed/emails/Alice/coupon.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============2457938525952404095==" 2 | MIME-Version: 1.0 3 | From: amazon@gmail.com 4 | To: Alice@gmail.com 5 | Subject: black friday 6 | 7 | --===============2457938525952404095== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | here is a discount coupon for this year festival. 13 | 14 | Sent at: 2023-10-22 10:00:00 15 | --===============2457938525952404095==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-10/testbed/emails/Alice/coupon.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============2457938525952404095==" 2 | MIME-Version: 1.0 3 | From: amazon@gmail.com 4 | To: Alice@gmail.com 5 | Subject: black friday 6 | 7 | --===============2457938525952404095== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | here is a discount coupon for this year festival. 13 | 14 | Sent at: 2023-10-22 10:00:00 15 | --===============2457938525952404095==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-95/testbed/emails/Alice/coupon.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============2457938525952404095==" 2 | MIME-Version: 1.0 3 | From: amazon@gmail.com 4 | To: Alice@gmail.com 5 | Subject: black friday 6 | 7 | --===============2457938525952404095== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | here is a discount coupon for this year festival. 13 | 14 | Sent at: 2023-10-22 10:00:00 15 | --===============2457938525952404095==-- 16 | -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/python_app/python_info.py: -------------------------------------------------------------------------------- 1 | from apps.python_app.python_run_code import DEMO as run_code_demo 2 | 3 | class PythonInfo: 4 | def __init__(self): 5 | self.name = "python" 6 | self.info = { 7 | "run_code": run_code_demo, 8 | } 9 | 10 | def get_instruction(self) -> str: 11 | instructions = [f"Command to perform function: {key}:\n{demo}" for key, demo in self.info.items()] 12 | return f"## How to use the {self.name} app:\n\n" + "\n\n".join(instructions) -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-12/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "how many students in CS161 class got 60 or more in final", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "5" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/testbed/emails/Bob/gradescope.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============5072539883543392509==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Gradescope 6 | 7 | --===============5072539883543392509== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your Project 2 has been graded. Your score is 93. 13 | 14 | Sent at: 2024-01-02 03:00:00 15 | --===============5072539883543392509==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-8/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "remove all students' class and the class-related header in class member excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/class_member.xlsx", 13 | "expected_file": "../../../../reference/class_member.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/ocr_app/ocr_info.py: -------------------------------------------------------------------------------- 1 | from .ocr_recognize_file import DEMO as recognize_file_demo 2 | 3 | class OCRInfo: 4 | def __init__(self): 5 | self.name = "ocr" 6 | self.info = { 7 | "recognize_file": recognize_file_demo, 8 | } 9 | 10 | def get_instruction(self) -> str: 11 | instructions = [f"Command to perform function: {key}:\n{demo}" for key, demo in self.info.items()] 12 | return f"## How to use the {self.name} app:\n\n" + "\n\n".join(instructions) -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2024-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is the latest email to Bob about? Read the contents as the answer.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "Your scholarship application has been approved." 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-10/testbed/emails/Alice/scholarship.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | For help and questions about using this project, please refer to the [README](./README.md) or leave issues. 10 | 11 | ## Microsoft Support Policy 12 | 13 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 14 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-1/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "I have to workout at 5/16/2024 5:00 p.m to 6:00 p.m, notify me on calendar", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./calendar/Bob.ics", 13 | "keywords": [ 14 | "workout", 15 | "DTSTART:20240516T170000" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-11/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "sort students score descently by midterm1, save as a new excel file as sorted_score.xlsx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/sorted_score.xlsx", 13 | "expected_file": "../../../../reference/sorted_score.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2024-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "does Bob receive emails in last month? if does, how many? Answer with yes or no and a arabic number.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "yes", 15 | "1" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-23/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Extract data from PDF invoice (Invoice.pdf) and put the total due in file costs.xlsx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/costs.xlsx", 12 | "doc_type": "xlsx", 13 | "keywords": [ 14 | "total due", 15 | "104.98" 16 | ] 17 | 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-36/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "if Dean's list requires term average gpa to be higher than 3.9, how many terms can this students be awarded on Dean's list?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "11" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/testbed/emails/Bob/scholarship approved.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-21/testbed/emails/Bob/scholarship approved.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-39/testbed/emails/Bob/scholarship approved.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-42/testbed/emails/Bob/scholarship approved.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-44/testbed/emails/Bob/scholarship approved.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-38/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract students and their IDs, collect in excel file SIDs.xlsx with headers Names and IDs, save in folder IDs", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/IDs/SIDs.xlsx", 13 | "expected_file": "../../../../reference/sids.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-95/testbed/emails/Bob/scholarship approved.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2024-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is the earliest email to Bob about? Read the contents as the answer.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "Just a friendly reminder that rent is due October 1, 2022." 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/testbed/emails/Bob/scholarship approved.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============0835243834727249202==" 2 | MIME-Version: 1.0 3 | From: gradescope@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: application approved 6 | 7 | --===============0835243834727249202== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Your scholarship application has been approved. 13 | 14 | Sent at: 2024-04-02 10:00:00 15 | --===============0835243834727249202==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-4/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "change Bob's midterm1 score 100 to score.xlsx excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_excel_cell_value", 10 | "args": { 11 | "file": "./data/score.xlsx", 12 | "matches": [ 13 | { 14 | "row": "12", 15 | "col": "2", 16 | "value": "100" 17 | } 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-37/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Put student scores from scanned PDF file to scores.xlsx with headers Name and Scores, save it in folder scores", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_exact_match", 10 | "args": { 11 | "doc_type": "xlsx", 12 | "result_file": "./data/scores/scores.xlsx", 13 | "expected_file": "../../../../reference/scores.xlsx" 14 | } 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /experiments/appworld/configs/base_config.yaml: -------------------------------------------------------------------------------- 1 | # Base configuration for AppWorld experiments 2 | exp_id: "appworld_base" 3 | model_name: "gpt-4o" 4 | tag: "base" 5 | 6 | # Agent settings 7 | debug_mode: true 8 | max_iter: 50 9 | use_workflow_memory: false 10 | use_thinking_tokens: true 11 | 12 | # Prompt settings 13 | prompt_file: null 14 | 15 | # Environment settings 16 | experiment_name: "minimal_test" 17 | max_interactions: 50 18 | 19 | # Context optimization settings 20 | co_config: null 21 | 22 | # Output settings 23 | save_trajectory: true 24 | save_llm_history: true 25 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-15/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Add the sentence \"This is a test sentence for task 1-15-0.\" to the end of house_creak.docx.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "docx", 12 | "file": "./data/House_creak.docx", 13 | "keywords": [ 14 | "This is a test sentence for task 1-15-0." 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-19/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "what is the effective date from agreement pdf file?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "2024-05-29" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-5/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "change Carol's midterm1 score to 98 in score excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_excel_cell_value", 10 | "args": { 11 | "file": "./data/score.xlsx", 12 | "matches": [ 13 | { 14 | "row": "21", 15 | "col": "2", 16 | "value": "98" 17 | } 18 | ] 19 | } 20 | } 21 | ] 22 | 23 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-5/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "reduce half amount of apple in shopping list excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_excel_cell_value", 10 | "args": { 11 | "file": "./data/shopping_list.xlsx", 12 | "matches": [ 13 | { 14 | "row": "2", 15 | "col": "2", 16 | "value": "1" 17 | } 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-32/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Compare income and expenditure based on company trading record. Larger or smaller? Answer the question with `larger` or `smaller`", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/answer.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "smaller" 15 | ] 16 | } 17 | } 18 | ] 19 | 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-33/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Print the top 3 students in midterm1 into a file named top3.txt in the /testbed/data/ folder. ", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/top3.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "Tina", 15 | "Rachel", 16 | "Sam" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-41/testbed/emails/Alice/work_overtime.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============3273973992312735059==" 2 | MIME-Version: 1.0 3 | From: office@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: work overtime 6 | 7 | --===============3273973992312735059== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Dear Bob, 13 | 14 | work overtime today from 8 p.m to 12 p.m to fix bug. 15 | 16 | Sent at: 2024-04-02 10:00:00 17 | --===============3273973992312735059==-- 18 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-80/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Compare income and expenditure based on company trading record, write a short analysis and save it as trading_record_post.jpg in folder named trading_account", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/trading_account/company_trading_records.jpg" 12 | } 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "find the highest incoming in salary excel file, duplicate it at bottom", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_excel_cell_value", 10 | "args": { 11 | "file": "data/salary.xlsx", 12 | "matches": [ 13 | { 14 | "row": 5, 15 | "col": 2, 16 | "value": "200000" 17 | } 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-1/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Add a meeting to Bob's calendar at 5/17/2024 10:30 a.m to 11:00 a.m", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./calendar/Bob.ics", 13 | "keywords": [ 14 | "DTSTART:20240517T103000", 15 | "Meeting", 16 | "DTEND:20240517T110000" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-1/subtasks/4.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Bob has a travelling from 6/19/2024 to 6/23/2024 at Florida, set as a calendar event", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./calendar/Bob.ics", 13 | "keywords": [ 14 | "travel", 15 | "DTSTART:20240619", 16 | "DTEND:20240623" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract text from combined notification image and split into two pdf files, named party.pdf and meeting.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/party.pdf" 12 | } 13 | }, 14 | { 15 | "function": "evaluate_file_exist", 16 | "args": { 17 | "file": "data/meeting.pdf" 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-43/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "read company revenues and send an email revenues.eml containing all years' revenues to Bob for reporting", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "email", 12 | "username": "Bob", 13 | "keywords": [ 14 | "revenues", 15 | "4439044", 16 | "2212203" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which 6 | includes all source code repositories in our GitHub organizations. 7 | 8 | **Please do not report security vulnerabilities through public GitHub issues.** 9 | 10 | For security reporting information, locations, contact information, and policies, 11 | please review the latest guidance for Microsoft repositories at 12 | [https://aka.ms/SECURITY.md](https://aka.ms/SECURITY.md). 13 | 14 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-1/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract text from power failure notification image and save in power_failure.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/power_failure.pdf", 12 | "doc_type": "pdf", 13 | "keywords": [ 14 | "Electric", 15 | "Failure", 16 | "Power", 17 | "issue" 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-18/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "write all shopping item in docx file (shopping_list.docx) from shopping list (shopping_list.xlsx)", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/shopping_list.docx", 12 | "doc_type": "docx", 13 | "keywords": [ 14 | "apple", 15 | "cabbage", 16 | "onion" 17 | ] 18 | 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | - Employees can reach out at [aka.ms/opensource/moderation-support](https://aka.ms/opensource/moderation-support) 11 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-29/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Scan Conference Invitation PDF and search for conference date, add event to that date on calender, save in my calendar.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "calendar/Alice.ics", 12 | "doc_type": "ics", 13 | "keywords": [ 14 | "Conference", 15 | "DTSTART:20241115" 16 | ] 17 | } 18 | } 19 | ] 20 | 21 | } -------------------------------------------------------------------------------- /experiments/smolagents/data/nq_multi_8/folds/train_history_tiny.txt: -------------------------------------------------------------------------------- 1 | nq_multi8_train_10456 2 | nq_multi8_train_11392 3 | nq_multi8_train_12280 4 | nq_multi8_train_13232 5 | nq_multi8_train_13432 6 | nq_multi8_train_14592 7 | nq_multi8_train_21416 8 | nq_multi8_train_27648 9 | nq_multi8_train_28656 10 | nq_multi8_train_29864 11 | nq_multi8_train_30512 12 | nq_multi8_train_32096 13 | nq_multi8_train_3472 14 | nq_multi8_train_34992 15 | nq_multi8_train_36048 16 | nq_multi8_train_36456 17 | nq_multi8_train_4200 18 | nq_multi8_train_47400 19 | nq_multi8_train_60216 20 | nq_multi8_train_71424 21 | nq_multi8_train_9112 -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-8/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Use OCR to extract text from business cards and convert them into an Excel sheet with contact information, called 'contacts.xlsx'.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/contacts.xlsx", 12 | "doc_type": "xlsx", 13 | "keywords": [ 14 | "Bob", 15 | "John", 16 | "Tom" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/appworld/data_copy/datasets/train_history_tiny.txt: -------------------------------------------------------------------------------- 1 | 07b42fd_1 2 | 07b42fd_2 3 | 07b42fd_3 4 | 229360a_1 5 | 229360a_3 6 | 22cc237_1 7 | 22cc237_2 8 | 22cc237_3 9 | 27e1026_1 10 | 27e1026_2 11 | 287e338_2 12 | 34d9492_1 13 | 34d9492_2 14 | 34d9492_3 15 | 3c13f5a_1 16 | 3c13f5a_2 17 | 3c13f5a_3 18 | 60d0b5b_1 19 | 60d0b5b_2 20 | 76f2c72_2 21 | 771d8fc_1 22 | 771d8fc_2 23 | 771d8fc_3 24 | 7d7fbf6_2 25 | 7d7fbf6_3 26 | 82e2fac_1 27 | 82e2fac_2 28 | 82e2fac_3 29 | b7a9ee9_2 30 | b7a9ee9_3 31 | ccb4494_1 32 | ce359b5_3 33 | d0b1f43_2 34 | e3d6c94_1 35 | e3d6c94_3 36 | e7a10f8_1 37 | e7a10f8_2 38 | e7a10f8_3 39 | e85d92a_1 -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-2/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "scan Concert Announcement pdf file, extract name and address saved it as contact card in doc file named contact.docx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/contact.docx", 12 | "doc_type": "doc", 13 | "keywords": [ 14 | "John Smith", 15 | "123 Main Street", 16 | "Cityville" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-1/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "at 5/15/2024 4:00 p.m to 6:00 p.m at Mathematic Science 4000A, Bob needs to have a class", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./calendar/Bob.ics", 13 | "keywords": [ 14 | "Mathematic Science 4000A", 15 | "DTSTART:20240515T160000", 16 | "DTEND:20240515T180000" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "delete student Alice and her score in score excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_diff_contain_text", 10 | "args": { 11 | "input_file": "../../../../cache/0/testbed/data/score.xlsx", 12 | "output_file": "./data/score.xlsx", 13 | "doc_type": "xlsx", 14 | "keywords": [ 15 | "Alice", 16 | "78", 17 | "75" 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-2/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "scan Concert Announcement pdf file, extract name and date saved it as contact card in doc file named contact.docx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/contact.docx", 12 | "doc_type": "doc", 13 | "keywords": [ 14 | "John Smith", 15 | "July", 16 | "15", 17 | "2024" 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-38/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Bob was invited to party hold by Jane Doe, send an email party.eml to Bob to notify Bob", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "email", 12 | "username": "Bob", 13 | "keywords": [ 14 | "party", 15 | "Jane Doe" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/config.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Dict 2 | from dataclasses import dataclass, field 3 | 4 | @dataclass 5 | class OfficeBenchEnvConfig: 6 | """Configuration for OfficeAgent environment""" 7 | env_id: str = 'null' 8 | local_workdir: str = "./local_workdir" 9 | output_dir: str = "./officebench_output" 10 | task: str = None 11 | task_dir: str = "tasks/officebench/tasks/1-1" 12 | prompt_file: str = "src/productive_agents/env/officebench/prompts_v2.json" 13 | app_root_dir: str = "src/productive_agents/env/officebench/apps" 14 | terminate_token_len: int = 7500 -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-13/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Given an Excel containing student's previous courses, does he/she satisfy the prerequiste of CS161 if prerequiste classes are CS101, CS 102, Math 151, and Math 152? Answer should be Yes or No.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "Yes" 15 | ] 16 | } 17 | } 18 | ] 19 | 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "remove the third student and his/her score in score excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_diff_contain_text", 10 | "args": { 11 | "input_file": "../../../../cache/1/testbed/data/score.xlsx", 12 | "output_file": "./data/score.xlsx", 13 | "doc_type": "xlsx", 14 | "keywords": [ 15 | "Alice", 16 | "78", 17 | "75" 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /src/productive_agents/env/officebench/apps/excel_app/__init__.py: -------------------------------------------------------------------------------- 1 | INTRO = "excel: an app to manipulate excel files, including reading, writing, etc." 2 | 3 | # from . import excel_add_column 4 | # from . import excel_add_row 5 | # from . import excel_delete_cell 6 | # from . import excel_delete_column 7 | # from . import excel_delete_row 8 | from . import excel_read_file 9 | from . import excel_set_cell 10 | from . import excel_delete_cell 11 | from . import excel_create_new_file 12 | from . import excel_convert_to_pdf 13 | # from . import excel_write_column 14 | # from . import excel_write_row 15 | from .excel_info import ExcelInfo 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-35/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Calculate variance of all participants' ages listed in the survey pdf. the answer needs to be saved in a file named var.txt in the /testbed/data/ folder.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/var.txt", 12 | "doc_type": "txt", 13 | "keywords": [ 14 | "170" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "find the lowest cost in budget item, add it to highest cost in budget excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_excel_cell_comparator", 10 | "args": { 11 | "file": "data/company_budget.xlsx", 12 | "matches": [ 13 | { 14 | "row": 2, 15 | "col": 2, 16 | "comparator": "lambda x: x in ['2001000', '2,001,000']" 17 | } 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-27/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Extract schedule from PDF and add it to my calendar.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "ics", 12 | "file": "./calendar/Alice.ics", 13 | "keywords": [ 14 | "Welcome", 15 | "Fiscal", 16 | "vote", 17 | "advisory", 18 | "budget", 19 | "break", 20 | "business" 21 | ] 22 | } 23 | } 24 | ] 25 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-39/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "find Bob's latest email, save it in latest.docx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/latest.docx" 12 | } 13 | }, 14 | { 15 | "function": "evaluate_contain", 16 | "args": { 17 | "doc_type": "docx", 18 | "file": "data/latest.docx", 19 | "keywords": [ 20 | "scholarship" 21 | ] 22 | } 23 | } 24 | ] 25 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-41/testbed/emails/Alice/body_test.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============2621726632538245186==" 2 | MIME-Version: 1.0 3 | From: your_email@gmail.com 4 | To: bob@example.com, alice@example.com, tom@example.com 5 | Subject: Body Test Email 6 | 7 | --===============2621726632538245186== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Dear Bob, Alice, and Tom, 13 | 14 | This is a test email to verify the body of the message for today. 15 | 16 | Best regards, 17 | Your Name 18 | 19 | --===============2621726632538245186==-- 20 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-20/testbed/emails/Bob/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-21/testbed/emails/Bob/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-22/testbed/emails/Bob/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/subtasks/3.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "delete the stock and its line in salary excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_diff_contain_text", 10 | "file": "./data/salary.xlsx", 11 | "args": { 12 | "input_file": "../../../../cache/3/testbed/data/salary.xlsx", 13 | "output_file": "./data/salary.xlsx", 14 | "doc_type": "xlsx", 15 | "keywords": [ 16 | "stock", 17 | "100000" 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-39/testbed/emails/Bob/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-42/testbed/emails/Bob/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-44/testbed/emails/Bob/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-10/testbed/emails/Alice/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-95/testbed/emails/Bob/rental.eml: -------------------------------------------------------------------------------- 1 | Content-Type: multipart/mixed; boundary="===============8013565661883809466==" 2 | MIME-Version: 1.0 3 | From: Glendon@gmail.com 4 | To: Bob123@gmail.com 5 | Subject: Reminder Rent due 6 | 7 | --===============8013565661883809466== 8 | Content-Type: text/plain; charset="us-ascii" 9 | MIME-Version: 1.0 10 | Content-Transfer-Encoding: 7bit 11 | 12 | Just a friendly reminder that rent is due October 1, 2022. Please remember to make your payment for rent and utilities on time to avoid a rent delinquency. 13 | 14 | Sent at: 2022-09-23 03:00:00 15 | --===============8013565661883809466==-- 16 | -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-38/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Jack", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "read notification file and send email titled notification to Alice with the notification contents", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "email", 12 | "username": "Alice", 13 | "keywords": [ 14 | "notification", 15 | "July", 16 | "15", 17 | "2024", 18 | "Conference", 19 | "Room 1" 20 | ] 21 | } 22 | } 23 | ] 24 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-76/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What is the maximum price of the cars sold in March, send the price and car model via email to the buyer Tom", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "email", 12 | "username": "Tom", 13 | "keywords": [ 14 | "Ford Mustang", 15 | "20000" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-15/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Retrieve academic integrity for research from academic_integrity.docx and add it to the end of the syllabus sample_syllabus.docx.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "docx", 12 | "file": "./data/sample_syllabus.docx", 13 | "keywords": [ 14 | "ACADEMIC INTEGRITY FOR RESEARCH" 15 | ] 16 | } 17 | } 18 | ] 19 | 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-37/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "What are conflicts in my calendar events? Tell me the conflict events.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "data/answer.txt", 13 | "keywords": [ 14 | "workout", 15 | "class", 16 | "meeting" 17 | ] 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-40/testbed/calendar/Tom.ics: -------------------------------------------------------------------------------- 1 | BEGIN:VCALENDAR 2 | VERSION:2.0 3 | PRODID:ics.py - http://git.io/lLljaA 4 | BEGIN:VEVENT 5 | DTEND:20240501T133000Z 6 | DTSTART:20240501T123000Z 7 | SUMMARY:lunch with David 8 | UID:f2c7bb64-face-48ff-a494-c23a5b31b6ad@f2c7.org 9 | END:VEVENT 10 | BEGIN:VEVENT 11 | DTEND:20240501T180000Z 12 | DTSTART:20240501T160000Z 13 | SUMMARY:class 14 | UID:2fc598fd-361b-46c0-8b12-426c4d7939af@2fc5.org 15 | END:VEVENT 16 | BEGIN:VEVENT 17 | DTEND:20240501T143000Z 18 | DTSTART:20240501T140000Z 19 | SUMMARY:report 20 | UID:33d7143a-de13-4041-aa20-a88866b3acad@33d7.org 21 | END:VEVENT 22 | END:VCALENDAR -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-25/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Scan homework to homework.docx for editing, and save homework word file to new_homework.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/homework.docx" 12 | } 13 | }, 14 | { 15 | "function": "evaluate_file_exist", 16 | "args": { 17 | "file": "data/new_homework.pdf" 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-20/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Find all students who are taking CS161 and put the name and Student ID of students into students.docx. Each student should be on seperate lines, with the format `Student ID, name`.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/students.docx", 12 | "doc_type": "doc", 13 | "keywords": [ 14 | "Vufuh", 15 | "Pom", 16 | "S1006" 17 | ] 18 | } 19 | } 20 | ] 21 | 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-40/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "read Bob's calendar event and send email using event names to Bob for all events in calendar", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "email", 12 | "username": "Bob", 13 | "keywords": [ 14 | "class", 15 | "nap", 16 | "lunch", 17 | "dinner", 18 | "sleeping" 19 | ] 20 | } 21 | } 22 | ] 23 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-87/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "if Dean's list requires term average gpa to be higher than 3.9, how many terms can this students be awarded on Dean's list? Email to student about this result", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "email", 12 | "username": "Teddy", 13 | "keywords": [ 14 | "11" 15 | ] 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-99/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "extract revenue data and save as a table in excel file revenues.xlsx, write statements in revenues.docx", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/revenues.xlsx" 12 | } 13 | }, 14 | { 15 | "function": "evaluate_file_exist", 16 | "args": { 17 | "file": "data/revenues.docx" 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-16/testbed/data/random_paragraph.txt: -------------------------------------------------------------------------------- 1 | In the heart of the bustling city, a small café stood as a tranquil oasis amidst the chaos. Its cozy interior, adorned with vintage furniture and soft lighting, created an inviting atmosphere that drew in locals and tourists alike. The aroma of freshly brewed coffee mingled with the scent of baked pastries, creating a comforting embrace for all who entered. Patrons sat at mismatched tables, engrossed in conversations or lost in their thoughts as they sipped their drinks. Outside, the city buzzed with activity, but within the café's walls, time seemed to slow down, offering a brief respite from the fast-paced world beyond. -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-59/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Analyze Excel data of students' grade and generate a teaching report in teaching.docx, convert to teaching.pdf", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/teaching.docx" 12 | } 13 | }, 14 | { 15 | "function": "evaluate_file_exist", 16 | "args": { 17 | "file": "data/teaching.pdf" 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-78/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Identify whether the reimbursement amount exceeds the company budget, report to manager Tom about reimbursement via email", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "email", 12 | "username": "Tom", 13 | "keywords": [ 14 | "reimbursement", 15 | "exceed" 16 | ] 17 | } 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-10/subtasks/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Which students have lower score in midterm2 than midterm1 in score excel file?", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "Liam", 15 | "Alice", 16 | "Sam", 17 | "David", 18 | "Bob", 19 | "Tina", 20 | "Rachel", 21 | "Frank" 22 | ] 23 | } 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /experiments/smolagents/data/nq_multi_8/folds/train_obs_tiny.txt: -------------------------------------------------------------------------------- 1 | nq_multi8_train_12672 2 | nq_multi8_train_13232 3 | nq_multi8_train_20376 4 | nq_multi8_train_21312 5 | nq_multi8_train_27648 6 | nq_multi8_train_27864 7 | nq_multi8_train_28656 8 | nq_multi8_train_29256 9 | nq_multi8_train_32080 10 | nq_multi8_train_3472 11 | nq_multi8_train_41240 12 | nq_multi8_train_4200 13 | nq_multi8_train_46560 14 | nq_multi8_train_47400 15 | nq_multi8_train_47816 16 | nq_multi8_train_49608 17 | nq_multi8_train_49792 18 | nq_multi8_train_54984 19 | nq_multi8_train_58872 20 | nq_multi8_train_60584 21 | nq_multi8_train_70008 22 | nq_multi8_train_7328 23 | nq_multi8_train_73576 24 | nq_multi8_train_848 -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-26/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Extract text from PDF and insert it to a new Word file of the same name.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "file": "data/market_analysis.docx", 12 | "doc_type": "doc", 13 | "keywords": [ 14 | "Market", 15 | "Analysis", 16 | "Marketing", 17 | "Plan", 18 | "Business", 19 | "Customer", 20 | "How Large" 21 | ] 22 | 23 | } 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/3-64/subtasks/0.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Bob", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "give some instruction and prediction in file house_price.docx based on house price index, convert to a post housing.jpg", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_file_exist", 10 | "args": { 11 | "file": "data/housing.jpg" 12 | } 13 | }, 14 | { 15 | "function": "evaluate_file_exist", 16 | "args": { 17 | "file": "data/house_price.docx" 18 | } 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /experiments/smolagents/search/download.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from huggingface_hub import hf_hub_download 3 | 4 | parser = argparse.ArgumentParser(description="Download files from a Hugging Face dataset repository.") 5 | parser.add_argument("--repo_id", type=str, default="PeterJinGo/wiki-18-e5-index", help="Hugging Face repository ID") 6 | parser.add_argument("--save_path", type=str, required=True, help="Local directory to save files") 7 | 8 | args = parser.parse_args() 9 | 10 | repo_id = "PeterJinGo/wiki-18-corpus" 11 | hf_hub_download( 12 | repo_id=repo_id, 13 | filename="wiki-18.jsonl.gz", 14 | repo_type="dataset", 15 | local_dir=args.save_path, 16 | ) -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-13/subtasks/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "Given an Excel containing student's previous courses and GPA, does he satisfy the graduate requirement if student needs to take at least 8 CS courses and 5 math courses? Answer should be Yes or No.", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_contain", 10 | "args": { 11 | "doc_type": "txt", 12 | "file": "./data/answer.txt", 13 | "keywords": [ 14 | "No" 15 | ] 16 | } 17 | } 18 | ] 19 | 20 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/1-7/subtasks/5.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "Alice", 3 | "date": "2020-05-01", 4 | "weekday": "Friday", 5 | "time": "10:00 AM", 6 | "task": "we already have onion at home, remove this entry from the shopping list excel file", 7 | "evaluation": [ 8 | { 9 | "function": "evaluate_diff_contain_text", 10 | "file": "./data/shopping_list.xlsx", 11 | "args": { 12 | "input_file": "../../../../cache/5/testbed/data/shopping_list.xlsx", 13 | "output_file": "./data/shopping_list.xlsx", 14 | "doc_type": "xlsx", 15 | "keywords": [ 16 | "onion", 17 | "2" 18 | ] 19 | } 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-15/testbed/calendar/Alice.ics: -------------------------------------------------------------------------------- 1 | BEGIN:VCALENDAR 2 | VERSION:2.0 3 | PRODID:ics.py - http://git.io/lLljaA 4 | BEGIN:VEVENT 5 | DTEND:20140101T143000Z 6 | DTSTART:20140101T141000Z 7 | SUMMARY:Strategy by Charlie 8 | UID:e0952a34-e8a8-4cbf-820f-652e7db5921e@e095.org 9 | END:VEVENT 10 | BEGIN:VEVENT 11 | DTEND:20140101T133000Z 12 | DTSTART:20140101T130000Z 13 | SUMMARY:Introductions by Linda 14 | UID:8cd53050-f81f-4b9b-884e-adaf16d411a0@8cd5.org 15 | END:VEVENT 16 | BEGIN:VEVENT 17 | DTEND:20140101T141000Z 18 | DTSTART:20140101T133000Z 19 | SUMMARY:Agenda by Davide 20 | UID:2a1c5d35-c9cc-463a-911e-becc29336468@2a1c.org 21 | END:VEVENT 22 | END:VCALENDAR -------------------------------------------------------------------------------- /experiments/officebench/tasks/2-16/testbed/calendar/Alice.ics: -------------------------------------------------------------------------------- 1 | BEGIN:VCALENDAR 2 | VERSION:2.0 3 | PRODID:ics.py - http://git.io/lLljaA 4 | BEGIN:VEVENT 5 | DTEND:20140101T143000Z 6 | DTSTART:20140101T141000Z 7 | SUMMARY:Strategy by Charlie 8 | UID:e0952a34-e8a8-4cbf-820f-652e7db5921e@e095.org 9 | END:VEVENT 10 | BEGIN:VEVENT 11 | DTEND:20140101T133000Z 12 | DTSTART:20140101T130000Z 13 | SUMMARY:Introductions by Linda 14 | UID:8cd53050-f81f-4b9b-884e-adaf16d411a0@8cd5.org 15 | END:VEVENT 16 | BEGIN:VEVENT 17 | DTEND:20140101T141000Z 18 | DTSTART:20140101T133000Z 19 | SUMMARY:Agenda by Davide 20 | UID:2a1c5d35-c9cc-463a-911e-becc29336468@2a1c.org 21 | END:VEVENT 22 | END:VCALENDAR --------------------------------------------------------------------------------