├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── docs ├── DATA_GENERATION.md ├── EVALUATION.md ├── KUBERNETES_DEPLOY.md └── MODEL_TRAINING.md ├── environment.yml ├── figures ├── codeact-comparison-perf.png ├── codeact-comparison-table.png ├── data-stats.png ├── json-text-comparison.png ├── model-performance.png └── overview.png ├── mint ├── agents │ ├── __init__.py │ ├── bard_agent.py │ ├── base.py │ ├── claude_agent.py │ ├── claude_feedback_agent.py │ ├── openai_feedback_agent.py │ ├── openai_lm_agent.py │ ├── vllm_agent.py │ └── vllm_feedback_agent.py ├── configs │ ├── __init__.py │ ├── config_variables.py │ └── generate_config.py ├── data_gen_configs │ ├── claude-2 │ │ └── F=None │ │ │ └── max5_p2+tool+cd │ │ │ ├── code_generation │ │ │ └── APPS │ │ │ │ └── train.json │ │ │ ├── decision_making │ │ │ └── alfworld │ │ │ │ └── train.json │ │ │ ├── reasoning │ │ │ ├── algebra │ │ │ │ └── train.json │ │ │ ├── counting_and_probability │ │ │ │ └── train.json │ │ │ ├── geometry │ │ │ │ └── train.json │ │ │ ├── hotpotqa │ │ │ │ └── train.json │ │ │ ├── intermediate_algebra │ │ │ │ └── train.json │ │ │ ├── number_theory │ │ │ │ └── train.json │ │ │ ├── prealgebra │ │ │ │ └── train.json │ │ │ ├── precalculus │ │ │ │ └── train.json │ │ │ └── strategyqa │ │ │ │ └── train.json │ │ │ └── tabular │ │ │ └── wiki_table_questions │ │ │ └── train.json │ ├── claude-instant-1 │ │ └── F=None │ │ │ └── max5_p2+tool+cd │ │ │ ├── code_generation │ │ │ └── APPS │ │ │ │ └── train.json │ │ │ ├── decision_making │ │ │ └── alfworld │ │ │ │ └── train.json │ │ │ ├── reasoning │ │ │ ├── algebra │ │ │ │ └── train.json │ │ │ ├── counting_and_probability │ │ │ │ └── train.json │ │ │ ├── geometry │ │ │ │ └── train.json │ │ │ ├── hotpotqa │ │ │ │ └── train.json │ │ │ ├── intermediate_algebra │ │ │ │ └── train.json │ │ │ ├── number_theory │ │ │ │ └── train.json │ │ │ ├── prealgebra │ │ │ │ └── train.json │ │ │ ├── precalculus │ │ │ │ └── train.json │ │ │ └── strategyqa │ │ │ │ └── train.json │ │ │ └── tabular │ │ │ └── wiki_table_questions │ │ │ └── train.json │ ├── gpt-3.5-turbo-0613 │ │ └── F=None │ │ │ └── max5_p2+tool+cd │ │ │ ├── code_generation │ │ │ └── APPS │ │ │ │ └── train.json │ │ │ ├── decision_making │ │ │ └── alfworld │ │ │ │ └── train.json │ │ │ ├── reasoning │ │ │ ├── algebra │ │ │ │ └── train.json │ │ │ ├── counting_and_probability │ │ │ │ └── train.json │ │ │ ├── geometry │ │ │ │ └── train.json │ │ │ ├── hotpotqa │ │ │ │ └── train.json │ │ │ ├── intermediate_algebra │ │ │ │ └── train.json │ │ │ ├── number_theory │ │ │ │ └── train.json │ │ │ ├── prealgebra │ │ │ │ └── train.json │ │ │ ├── precalculus │ │ │ │ └── train.json │ │ │ └── strategyqa │ │ │ │ └── train.json │ │ │ └── tabular │ │ │ └── wiki_table_questions │ │ │ └── train.json │ ├── gpt-3.5-turbo-16k-0613 │ │ └── F=None │ │ │ └── max5_p2+tool+cd │ │ │ ├── code_generation │ │ │ └── APPS │ │ │ │ └── train.json │ │ │ ├── decision_making │ │ │ └── alfworld │ │ │ │ └── train.json │ │ │ ├── reasoning │ │ │ ├── algebra │ │ │ │ └── train.json │ │ │ ├── counting_and_probability │ │ │ │ └── train.json │ │ │ ├── geometry │ │ │ │ └── train.json │ │ │ ├── hotpotqa │ │ │ │ └── train.json │ │ │ ├── intermediate_algebra │ │ │ │ └── train.json │ │ │ ├── number_theory │ │ │ │ └── train.json │ │ │ ├── prealgebra │ │ │ │ └── train.json │ │ │ ├── precalculus │ │ │ │ └── train.json │ │ │ └── strategyqa │ │ │ │ └── train.json │ │ │ └── tabular │ │ │ └── wiki_table_questions │ │ │ └── train.json │ └── gpt-4-0613 │ │ └── F=None │ │ └── max5_p2+tool+cd │ │ ├── code_generation │ │ └── APPS │ │ │ └── train.json │ │ ├── decision_making │ │ └── alfworld │ │ │ └── train.json │ │ ├── reasoning │ │ ├── algebra │ │ │ └── train.json │ │ ├── counting_and_probability │ │ │ └── train.json │ │ ├── geometry │ │ │ └── train.json │ │ ├── hotpotqa │ │ │ └── train.json │ │ ├── intermediate_algebra │ │ │ └── train.json │ │ ├── number_theory │ │ │ └── train.json │ │ ├── prealgebra │ │ │ └── train.json │ │ ├── precalculus │ │ │ └── train.json │ │ └── strategyqa │ │ │ └── train.json │ │ └── tabular │ │ └── wiki_table_questions │ │ └── train.json ├── datatypes.py ├── envs │ ├── __init__.py │ ├── alfworld_env.py │ ├── base.py │ └── general_env.py ├── main.py ├── prompt │ ├── __init__.py │ └── templates │ │ ├── template_feedback_agent.txt │ │ ├── template_with_tool.txt │ │ └── template_without_tool.txt ├── tasks │ ├── __init__.py │ ├── alfworld.py │ ├── base.py │ ├── codegen │ │ ├── APPS │ │ │ ├── __init__.py │ │ │ ├── testing_util.py │ │ │ └── utils.py │ │ └── __init__.py │ ├── in_context_examples │ │ ├── APPS │ │ │ ├── with_tool.txt │ │ │ └── with_tool_and_feedback.txt │ │ ├── README.md │ │ ├── alfworld │ │ │ ├── clean │ │ │ │ ├── with_tool.txt │ │ │ │ └── with_tool_and_feedback.txt │ │ │ ├── cool │ │ │ │ ├── with_tool.txt │ │ │ │ └── with_tool_and_feedback.txt │ │ │ ├── examine │ │ │ │ ├── with_tool.txt │ │ │ │ └── with_tool_and_feedback.txt │ │ │ ├── heat │ │ │ │ ├── with_tool.txt │ │ │ │ └── with_tool_and_feedback.txt │ │ │ ├── put │ │ │ │ ├── with_tool.txt │ │ │ │ └── with_tool_and_feedback.txt │ │ │ └── puttwo │ │ │ │ ├── with_tool.txt │ │ │ │ └── with_tool_and_feedback.txt │ │ ├── humaneval │ │ │ ├── with_tool.txt │ │ │ └── with_tool_and_feedback.txt │ │ ├── mbpp │ │ │ ├── with_tool.txt │ │ │ └── with_tool_and_feedback.txt │ │ ├── reasoning │ │ │ ├── with_tool.txt │ │ │ └── with_tool_and_feedback.txt │ │ └── tabular │ │ │ ├── with_tool.txt │ │ │ └── with_tool_and_feedback.txt │ ├── reasoning │ │ ├── __init__.py │ │ ├── grader.py │ │ └── math_normalize.py │ └── tabular │ │ ├── __init__.py │ │ └── evaluator.py ├── tools │ ├── __init__.py │ ├── alfworld.py │ ├── arxiv_search.py │ ├── base.py │ ├── python_tool.py │ ├── web_search.py │ └── wikipedia_search.py └── utils │ ├── __init__.py │ ├── exception.py │ └── exec.py ├── requirements.txt └── scripts ├── chat ├── code_execution │ ├── Dockerfile.api │ ├── Dockerfile.executor │ ├── api.py │ ├── jupyter.py │ └── start_jupyter_server.sh ├── demo.py ├── kubernetes │ ├── build_chat_ui_and_push.sh │ ├── build_code_execute_api_and_push.sh │ ├── build_code_executor_container_and_push.sh │ └── k8s.template.yml ├── run_chat_ui.sh ├── run_chat_ui_debug.sh ├── start_mongodb.sh └── start_vllm.sh ├── data ├── analyze_dataset.ipynb ├── create_hf_dataset.py ├── download_from_hf.py ├── general │ ├── process_capybara.py │ ├── process_general_traj.sh │ ├── process_openorca.py │ └── process_sharegpt.py ├── interaction │ ├── collection │ │ ├── convert_outputs.ipynb │ │ ├── process_trajectories.ipynb │ │ ├── run.sh │ │ └── run_mint_docker_interactive.sh │ ├── download_raw_data.sh │ ├── process_APPS.py │ ├── process_MATH.py │ ├── process_WikiTableQuestions.py │ └── process_hotpotQA.py └── megatron_conversion │ ├── process_mixture_llama.sh │ └── process_mixture_mistral.sh ├── docker ├── Dockerfile.megatron ├── Dockerfile.mint ├── run_megatron_interactive.sh └── run_megatron_interactive_slurm.sh ├── eval ├── aggregate_eval.py ├── api-bank │ ├── .gitignore │ ├── API-Bank-arxiv-version.pdf │ ├── LICENSE │ ├── README.md │ ├── api_bank_analysis.ipynb │ ├── api_call_extraction.py │ ├── apis │ │ ├── __init__.py │ │ ├── add_agenda.py │ │ ├── add_alarm.py │ │ ├── add_meeting.py │ │ ├── add_reminder.py │ │ ├── add_scene.py │ │ ├── api.py │ │ ├── appointment_registration.py │ │ ├── book_hotel.py │ │ ├── calculator.py │ │ ├── cancel_registration.py │ │ ├── cancel_timed_switch.py │ │ ├── check_token.py │ │ ├── delete_account.py │ │ ├── delete_agenda.py │ │ ├── delete_alarm.py │ │ ├── delete_meeting.py │ │ ├── delete_reminder.py │ │ ├── delete_scene.py │ │ ├── dictionary.py │ │ ├── document_qa.py │ │ ├── emergency_knowledge.py │ │ ├── forgot_password.py │ │ ├── get_today.py │ │ ├── get_user_token.py │ │ ├── image_caption.py │ │ ├── modify_agenda.py │ │ ├── modify_alarm.py │ │ ├── modify_meeting.py │ │ ├── modify_password.py │ │ ├── modify_registration.py │ │ ├── modify_reminder.py │ │ ├── modify_scene.py │ │ ├── open_bank_account.py │ │ ├── play_music.py │ │ ├── query_agenda.py │ │ ├── query_alarm.py │ │ ├── query_balance.py │ │ ├── query_health_data.py │ │ ├── query_history_today.py │ │ ├── query_meeting.py │ │ ├── query_registration.py │ │ ├── query_reminder.py │ │ ├── query_scene.py │ │ ├── query_stock.py │ │ ├── record_health_data.py │ │ ├── register_user.py │ │ ├── search_engine.py │ │ ├── send_email.py │ │ ├── speech_recognition.py │ │ ├── symptom_search.py │ │ ├── timed_switch.py │ │ ├── tool_search.py │ │ ├── translate.py │ │ └── wiki.py │ ├── data │ │ ├── all_apis.csv │ │ └── exceptions.json │ ├── demo.py │ ├── evaluator.py │ ├── evaluator_by_json.py │ ├── figures │ │ ├── multi-agent.png │ │ └── three_ability.png │ ├── init_database │ │ ├── Account.json │ │ ├── Agenda.json │ │ ├── Alarm.json │ │ ├── Appointments.json │ │ ├── Bank.json │ │ ├── HealthData.json │ │ ├── History.json │ │ ├── Hotel.json │ │ ├── ImageCaptioning.json │ │ ├── Meeting.json │ │ ├── QuestionAnswering.json │ │ ├── Reminder.json │ │ ├── Scenes.json │ │ ├── SearchEngine.json │ │ ├── SpeechRecognition.json │ │ ├── Stock.json │ │ ├── Symptom.json │ │ ├── TimeSwitch.json │ │ └── Wiki.json │ ├── lv1-lv2-samples │ │ ├── level-1-given-desc │ │ │ ├── AddAgenda-AddAlarm-GetUserToken-level-2-1.jsonl │ │ │ ├── AddAgenda-AddMeeting-GetUserToken-level-2-2.jsonl │ │ │ ├── AddAgenda-AddMeeting-GetUserToken-level-2-3.jsonl │ │ │ ├── AddAgenda-AddMeeting-GetUserToken-level-2-4.jsonl │ │ │ ├── AddAgenda-level-1-1.jsonl │ │ │ ├── AddAgenda-level-1-2.jsonl │ │ │ ├── AddAlarm-GetReminder-ModifyAgenda-GetUserToken-level-2-1.jsonl │ │ │ ├── AddAlarm-level-1-1.jsonl │ │ │ ├── AddMeeting-level-1-1.jsonl │ │ │ ├── AddReminder-level-1-1.jsonl │ │ │ ├── AddReminder-level-1-2.jsonl │ │ │ ├── AddReminder-level-1-3.jsonl │ │ │ ├── AppointmentRegistration-level-1-1.jsonl │ │ │ ├── AppointmentRegistration-level-1-2.jsonl │ │ │ ├── AppointmentRegistration-level-1-3.jsonl │ │ │ ├── BookHotel-level-1-1.jsonl │ │ │ ├── BookHotel-level-1-2.jsonl │ │ │ ├── BookHotel-level-1-3.jsonl │ │ │ ├── BookHotel-level-1-4.jsonl │ │ │ ├── BookHotel-level-1-5.jsonl │ │ │ ├── Calculator-QueryHistoryToday-level-2-1.jsonl │ │ │ ├── Calculator-QueryHistoryToday-level-2-2.jsonl │ │ │ ├── Calculator-level-1-1.jsonl │ │ │ ├── Calculator-level-1-2.jsonl │ │ │ ├── Calculator-level-1-3.jsonl │ │ │ ├── CancelRegistration-RecordHealthData-QueryRegistration-level-2-1.jsonl │ │ │ ├── CancelRegistration-RecordHealthData-QueryRegistration-level-2-2.jsonl │ │ │ ├── CancelRegistration-RecordHealthData-level-2-1.jsonl │ │ │ ├── CancelRegistration-RecordHealthData-level-2-2.jsonl │ │ │ ├── CancelRegistration-RecordHealthData-level-2-3.jsonl │ │ │ ├── CancelRegistration-RecordHealthData-level-2-4.jsonl │ │ │ ├── CancelRegistration-RecordHealthData-level-2-5.jsonl │ │ │ ├── CancelRegistration-level-1-1.jsonl │ │ │ ├── CancelRegistration-level-1-2.jsonl │ │ │ ├── CancelRegistration-level-1-3.jsonl │ │ │ ├── CancelTimedSwitch-level-1-1.jsonl │ │ │ ├── CancelTimedSwitch-level-1-2.jsonl │ │ │ ├── CancelTimedSwitch-level-1-3.jsonl │ │ │ ├── CancelTimedSwitch-level-1-4.jsonl │ │ │ ├── CancelTimedSwitch-level-1-5.jsonl │ │ │ ├── DeleteAccount-ModifyPassword-GetUserToken-level-2-1.jsonl │ │ │ ├── DeleteAccount-ModifyPassword-GetUserToken-level-2-2.jsonl │ │ │ ├── DeleteAccount-ModifyPassword-GetUserToken-level-2-3.jsonl │ │ │ ├── DeleteAccount-ModifyPassword-GetUserToken-level-2-4.jsonl │ │ │ ├── DeleteAccount-ModifyPassword-GetUserToken-level-2-5.jsonl │ │ │ ├── DeleteAccount-RegisterUser-ForgotPassword-GetUserToken-level-2-1.jsonl │ │ │ ├── DeleteAccount-RegisterUser-ForgotPassword-GetUserToken-level-2-2.jsonl │ │ │ ├── DeleteAccount-level-1-1.jsonl │ │ │ ├── DeleteAccount-level-1-2.jsonl │ │ │ ├── DeleteAccount-level-1-3.jsonl │ │ │ ├── DeleteAgenda-AddAlarm-DeleteMeeting-GetUserToken-level-2-1.jsonl │ │ │ ├── DeleteAgenda-AddAlarm-DeleteMeeting-GetUserToken-level-2-2.jsonl │ │ │ ├── DeleteAgenda-level-1-1.jsonl │ │ │ ├── DeleteAlarm-AddAgenda-GetUserToken-level-2-1.jsonl │ │ │ ├── DeleteAlarm-AddAgenda-GetUserToken-level-2-2.jsonl │ │ │ ├── DeleteAlarm-level-1-1.jsonl │ │ │ ├── DeleteAlarm-level-1-2.jsonl │ │ │ ├── DeleteMeeting-level-1-1.jsonl │ │ │ ├── DeleteReminder-level-1-1.jsonl │ │ │ ├── DeleteScene-level-1-1.jsonl │ │ │ ├── DeleteScene-level-1-2.jsonl │ │ │ ├── DeleteScene-level-1-3.jsonl │ │ │ ├── Dictionary-level-1-1.jsonl │ │ │ ├── Dictionary-level-1-2.jsonl │ │ │ ├── Dictionary-level-1-3.jsonl │ │ │ ├── Dictionary-level-1-4.jsonl │ │ │ ├── DocumentQA-level-1-1.jsonl │ │ │ ├── DocumentQA-level-1-2.jsonl │ │ │ ├── DocumentQA-level-1-3.jsonl │ │ │ ├── DocumentQA-level-1-4.jsonl │ │ │ ├── EmergencyKnowledge-ModifyRegistration-RecordHealthData-level-2-1.jsonl │ │ │ ├── EmergencyKnowledge-ModifyRegistration-RecordHealthData-level-2-2.jsonl │ │ │ ├── EmergencyKnowledge-ModifyRegistration-RecordHealthData-level-2-3.jsonl │ │ │ ├── EmergencyKnowledge-level-1-1.jsonl │ │ │ ├── EmergencyKnowledge-level-1-2.jsonl │ │ │ ├── ForgotPassword-ModifyPassword-GetUserToken-level-2-1.jsonl │ │ │ ├── ForgotPassword-RegisterUser-DeleteAccount-GetUserToken-level-2-1.jsonl │ │ │ ├── ForgotPassword-level-1-1.jsonl │ │ │ ├── GetMeeting-level-1-1.jsonl │ │ │ ├── GetMeeting-level-1-2.jsonl │ │ │ ├── GetReminder-level-1-1.jsonl │ │ │ ├── GetReminder-level-1-2.jsonl │ │ │ ├── GetToday-ModifyMeeting-ModifyAlarm-GetUserToken-level-2-1.jsonl │ │ │ ├── GetToday-ModifyMeeting-ModifyAlarm-GetUserToken-level-2-2.jsonl │ │ │ ├── GetToday-level-1-1.jsonl │ │ │ ├── GetToday-level-1-2.jsonl │ │ │ ├── GetToday-level-1-3.jsonl │ │ │ ├── GetToday-level-1-4.jsonl │ │ │ ├── ImageCaption-level-1-1.jsonl │ │ │ ├── ModifyAgenda-level-1-1.jsonl │ │ │ ├── ModifyAgenda-level-1-2.jsonl │ │ │ ├── ModifyAlarm-AddAgenda-GetUserToken-level-2-1.jsonl │ │ │ ├── ModifyAlarm-AddAgenda-GetUserToken-level-2-2.jsonl │ │ │ ├── ModifyAlarm-level-1-1.jsonl │ │ │ ├── ModifyAlarm-level-1-2.jsonl │ │ │ ├── ModifyMeeting-level-1-1.jsonl │ │ │ ├── ModifyMeeting-level-1-2.jsonl │ │ │ ├── ModifyMeeting-level-1-3.jsonl │ │ │ ├── ModifyPassword-ForgotPassword-DeleteAccount-GetUserToken-level-2-1.jsonl │ │ │ ├── ModifyPassword-ForgotPassword-DeleteAccount-GetUserToken-level-2-2.jsonl │ │ │ ├── ModifyPassword-ForgotPassword-DeleteAccount-GetUserToken-level-2-3.jsonl │ │ │ ├── ModifyPassword-ForgotPassword-DeleteAccount-GetUserToken-level-2-4.jsonl │ │ │ ├── ModifyPassword-RegisterUser-GetUserToken-level-2-1.jsonl │ │ │ ├── ModifyPassword-level-1-1.jsonl │ │ │ ├── ModifyPassword-level-1-2.jsonl │ │ │ ├── ModifyRegistration-EmergencyKnowledge-level-2-1.jsonl │ │ │ ├── ModifyRegistration-EmergencyKnowledge-level-2-2.jsonl │ │ │ ├── ModifyRegistration-EmergencyKnowledge-level-2-3.jsonl │ │ │ ├── ModifyRegistration-EmergencyKnowledge-level-2-4.jsonl │ │ │ ├── ModifyRegistration-QueryHealthData-CancelRegistration-level-2-1.jsonl │ │ │ ├── ModifyRegistration-QueryHealthData-CancelRegistration-level-2-2.jsonl │ │ │ ├── ModifyRegistration-level-1-1.jsonl │ │ │ ├── ModifyRegistration-level-1-2.jsonl │ │ │ ├── ModifyRegistration-level-1-3.jsonl │ │ │ ├── ModifyRegistration-level-1-4.jsonl │ │ │ ├── ModifyRegistration-level-1-5.jsonl │ │ │ ├── ModifyReminder-AddAgenda-DeleteAgenda-GetUserToken-level-2-1.jsonl │ │ │ ├── ModifyReminder-AddAgenda-DeleteAgenda-GetUserToken-level-2-2.jsonl │ │ │ ├── ModifyReminder-AddAgenda-GetUserToken-level-2-1.jsonl │ │ │ ├── ModifyReminder-AddAgenda-GetUserToken-level-2-2.jsonl │ │ │ ├── ModifyReminder-AddAgenda-GetUserToken-level-2-3.jsonl │ │ │ ├── ModifyReminder-level-1-1.jsonl │ │ │ ├── ModifyReminder-level-1-2.jsonl │ │ │ ├── ModifyReminder-level-1-3.jsonl │ │ │ ├── OpenBankAccount-QueryBalance-QueryStock-GetUserToken-level-2-2.jsonl │ │ │ ├── OpenBankAccount-QueryStock-QueryBalance-GetUserToken-level-2-1.jsonl │ │ │ ├── OpenBankAccount-QueryStock-level-2-1.jsonl │ │ │ ├── OpenBankAccount-level-1-1.jsonl │ │ │ ├── OpenBankAccount-level-1-2.jsonl │ │ │ ├── OpenBankAccount-level-1-3.jsonl │ │ │ ├── OpenBankAccount-level-1-4.jsonl │ │ │ ├── OpenBankAccount-level-1-5.jsonl │ │ │ ├── PlayMusic-level-1-1.jsonl │ │ │ ├── PlayMusic-level-1-2.jsonl │ │ │ ├── PlayMusic-level-1-3.jsonl │ │ │ ├── QueryAgenda-level-1-1.jsonl │ │ │ ├── QueryAlarm-level-1-1.jsonl │ │ │ ├── QueryBalance-OpenBankAccount-QueryStock-GetUserToken-level-2-1.jsonl │ │ │ ├── QueryBalance-QueryStock-OpenBankAccount-GetUserToken-level-2-1.jsonl │ │ │ ├── QueryBalance-QueryStock-OpenBankAccount-GetUserToken-level-2-2.jsonl │ │ │ ├── QueryBalance-level-1-1.jsonl │ │ │ ├── QueryBalance-level-1-2.jsonl │ │ │ ├── QueryHealthData-CancelRegistration-SymptomSearch-level-2-1.jsonl │ │ │ ├── QueryHealthData-level-1-1.jsonl │ │ │ ├── QueryHealthData-level-1-2.jsonl │ │ │ ├── QueryHistoryToday-Calculator-level-2-1.jsonl │ │ │ ├── QueryHistoryToday-Calculator-level-2-2.jsonl │ │ │ ├── QueryHistoryToday-Calculator-level-2-3.jsonl │ │ │ ├── QueryHistoryToday-Calculator-level-2-4.jsonl │ │ │ ├── QueryHistoryToday-level-1-1.jsonl │ │ │ ├── QueryHistoryToday-level-1-2.jsonl │ │ │ ├── QueryHistoryToday-level-1-3.jsonl │ │ │ ├── QueryMeeting-level-1-1.jsonl │ │ │ ├── QueryRegistration-AppointmentRegistration-level-2-1.jsonl │ │ │ ├── QueryRegistration-EmergencyKnowledge-level-2-1.jsonl │ │ │ ├── QueryRegistration-EmergencyKnowledge-level-2-2.jsonl │ │ │ ├── QueryRegistration-EmergencyKnowledge-level-2-3.jsonl │ │ │ ├── QueryRegistration-EmergencyKnowledge-level-2-4.jsonl │ │ │ ├── QueryRegistration-SymptomSearch-RecordHealthData-level-2-1.jsonl │ │ │ ├── QueryRegistration-SymptomSearch-RecordHealthData-level-2-2.jsonl │ │ │ ├── QueryRegistration-level-1-1.jsonl │ │ │ ├── QueryRegistration-level-1-2.jsonl │ │ │ ├── QueryRegistration-level-1-3.jsonl │ │ │ ├── QueryRegistration-level-1-4.jsonl │ │ │ ├── QueryRegistration-level-1-5.jsonl │ │ │ ├── QueryReminder-level-1-1.jsonl │ │ │ ├── QueryReminder-level-1-2.jsonl │ │ │ ├── QueryScene-level-1-1.jsonl │ │ │ ├── QueryScene-level-1-2.jsonl │ │ │ ├── QueryScene-level-1-3.jsonl │ │ │ ├── QueryScene-level-1-4.jsonl │ │ │ ├── QueryScene-level-1-5.jsonl │ │ │ ├── QueryStock-OpenBankAccount-QueryBalance-GetUserToken-level-2-1.jsonl │ │ │ ├── QueryStock-OpenBankAccount-QueryBalance-GetUserToken-level-2-2.jsonl │ │ │ ├── QueryStock-OpenBankAccount-level-2-1.jsonl │ │ │ ├── QueryStock-QueryBalance-GetUserToken-level-2-1.jsonl │ │ │ ├── QueryStock-QueryBalance-OpenBankAccount-GetUserToken-level-2-1.jsonl │ │ │ ├── QueryStock-QueryBalance-OpenBankAccount-GetUserToken-level-2-2.jsonl │ │ │ ├── QueryStock-level-1-1.jsonl │ │ │ ├── QueryStock-level-1-2.jsonl │ │ │ ├── QueryStock-level-1-3.jsonl │ │ │ ├── QueryStock-level-1-4.jsonl │ │ │ ├── QueryStock-level-1-5.jsonl │ │ │ ├── RecordHealthData-AppointmentRegistration-EmergencyKnowledge-level-2-1.jsonl │ │ │ ├── RecordHealthData-AppointmentRegistration-EmergencyKnowledge-level-2-2.jsonl │ │ │ ├── RecordHealthData-AppointmentRegistration-EmergencyKnowledge-level-2-3.jsonl │ │ │ ├── RecordHealthData-AppointmentRegistration-EmergencyKnowledge-level-2-4.jsonl │ │ │ ├── RecordHealthData-AppointmentRegistration-EmergencyKnowledge-level-2-5.jsonl │ │ │ ├── RecordHealthData-AppointmentRegistration-QueryRegistration-level-2-1.jsonl │ │ │ ├── RecordHealthData-AppointmentRegistration-QueryRegistration-level-2-2.jsonl │ │ │ ├── RecordHealthData-level-1-1.jsonl │ │ │ ├── RecordHealthData-level-1-2.jsonl │ │ │ ├── RegisterUser-ForgotPassword-DeleteAccount-GetUserToken-level-2-1.jsonl │ │ │ ├── RegisterUser-ForgotPassword-level-2-1.jsonl │ │ │ ├── RegisterUser-ForgotPassword-level-2-2.jsonl │ │ │ ├── RegisterUser-ForgotPassword-level-2-3.jsonl │ │ │ ├── RegisterUser-level-1-1.jsonl │ │ │ ├── RegisterUser-level-1-2.jsonl │ │ │ ├── RegisterUser-level-1-3.jsonl │ │ │ ├── SearchEngine-level-1-1.jsonl │ │ │ ├── SearchEngine-level-1-2.jsonl │ │ │ ├── SendEmail-level-1-1.jsonl │ │ │ ├── SpeechRecognition-level-1-1.jsonl │ │ │ ├── SymptomSearch-AppointmentRegistration-level-2-1.jsonl │ │ │ ├── SymptomSearch-level-1-1.jsonl │ │ │ ├── SymptomSearch-level-1-2.jsonl │ │ │ ├── TimedSwitch-level-1-1.jsonl │ │ │ ├── TimedSwitch-level-1-2.jsonl │ │ │ ├── TimedSwitch-level-1-3.jsonl │ │ │ ├── Translate-level-1-1.jsonl │ │ │ ├── Translate-level-1-2.jsonl │ │ │ ├── Translate-level-1-3.jsonl │ │ │ ├── Translate-level-1-4.jsonl │ │ │ └── Wiki-level-1-1.jsonl │ │ └── level-2-toolsearcher │ │ │ ├── AddAgenda-level-3-1.jsonl │ │ │ ├── AddAgenda-level-3-2.jsonl │ │ │ ├── AddAlarm-level-3-1.jsonl │ │ │ ├── AddAlarm-level-3-2.jsonl │ │ │ ├── AddMeeting-level-3-1.jsonl │ │ │ ├── AddMeeting-level-3-2.jsonl │ │ │ ├── AddReminder-level-3-1.jsonl │ │ │ ├── AppointmentRegistration-level-3-1.jsonl │ │ │ ├── BookHotel-level-3-1.jsonl │ │ │ ├── BookHotel-level-3-2.jsonl │ │ │ ├── BookHotel-level-3-3.jsonl │ │ │ ├── Calculator-level-3-1.jsonl │ │ │ ├── Calculator-level-3-2.jsonl │ │ │ ├── DeleteAccount-level-3-1.jsonl │ │ │ ├── DeleteAccount-level-3-2.jsonl │ │ │ ├── DeleteAccount-level-3-3.jsonl │ │ │ ├── DeleteAgenda-level-3-1.jsonl │ │ │ ├── DeleteAgenda-level-3-2.jsonl │ │ │ ├── DeleteAlarm-level-3-1.jsonl │ │ │ ├── DeleteMeeting-level-3-1.jsonl │ │ │ ├── DeleteMeeting-level-3-2.jsonl │ │ │ ├── DeleteReminder-level-3-1.jsonl │ │ │ ├── EmergencyKnowledge-level-3-1.jsonl │ │ │ ├── GetToday-level-3-1.jsonl │ │ │ ├── ModifyAgenda-level-3-1.jsonl │ │ │ ├── ModifyMeeting-level-3-1.jsonl │ │ │ ├── ModifyRegistration-level-3-1.jsonl │ │ │ ├── ModifyReminder-level-3-1.jsonl │ │ │ ├── QueryAlarm-level-3-1.jsonl │ │ │ ├── QueryBalance-level-3-1.jsonl │ │ │ ├── QueryBalance-level-3-2.jsonl │ │ │ ├── QueryHealthData-level-3-1.jsonl │ │ │ ├── QueryHistoryToday-level-3-2.jsonl │ │ │ ├── QueryHistoryToday-level-3-3.jsonl │ │ │ ├── QueryMeeting-level-3-1.jsonl │ │ │ ├── QueryMeeting-level-3-2.jsonl │ │ │ ├── QueryRegistration-level-3-1.jsonl │ │ │ ├── QueryRegistration-level-3-2.jsonl │ │ │ ├── QueryReminder-level-3-1.jsonl │ │ │ ├── QueryReminder-level-3-2.jsonl │ │ │ ├── QueryReminder-level-3-3.jsonl │ │ │ ├── QueryStock-level-3-1.jsonl │ │ │ ├── RecordHealthData-level-3-1.jsonl │ │ │ ├── RecordHealthData-level-3-2.jsonl │ │ │ ├── RecordHealthData-level-3-3.jsonl │ │ │ ├── SymptomSearch-level-3-1.jsonl │ │ │ ├── SymptomSearch-level-3-2.jsonl │ │ │ ├── SymptomSearch-level-3-3.jsonl │ │ │ ├── SymptomSearch-level-3-4.jsonl │ │ │ └── SymptomSearch-level-3-5.jsonl │ ├── lv3-samples │ │ ├── Financial Management.txt │ │ ├── Health Management.txt │ │ ├── Hospital.txt │ │ ├── Hotel Booking.txt │ │ ├── Meeting Schedule.txt │ │ ├── Personal Assistant.txt │ │ ├── Reminder.txt │ │ └── Shopping.txt │ ├── lv3_apis │ │ ├── account_info.py │ │ ├── add_meeting.py │ │ ├── calculator.py │ │ ├── clothing_recommandation.py │ │ ├── email_reminder.py │ │ ├── flight_search.py │ │ ├── geocoding.py │ │ ├── get_occupation_salary.py │ │ ├── get_weather.py │ │ ├── hotel_availability.py │ │ ├── like_count.py │ │ ├── movie_recommandation.py │ │ ├── nearby_restaurants.py │ │ ├── organization_members.py │ │ ├── query_meeting.py │ │ ├── tax_calculator.py │ │ ├── tool_search.py │ │ ├── travel_status.py │ │ ├── update_account_info.py │ │ ├── user_posts.py │ │ ├── user_watched_movies.py │ │ └── users_movie_preference.py │ ├── lv3_evaluator.py │ ├── requirements.txt │ ├── run.sh │ ├── simulator.py │ ├── tool_manager.py │ └── utils.py ├── gsm8k │ ├── evaluate_gsm8k.py │ └── gsm8k.sh ├── human_eval │ ├── HumanEval.jsonl │ ├── evaluate_human_eval.py │ └── human_eval.sh ├── m3tooleval │ ├── m3tooleval_analysis.ipynb │ ├── main.py │ ├── run.sh │ └── tasks │ │ ├── __init__.py │ │ ├── base.py │ │ ├── impl │ │ ├── __init__.py │ │ ├── dna_sequencer.py │ │ ├── message_decoder.py │ │ ├── trade_calculator.py │ │ ├── travel_itinerary_planning.py │ │ └── web_browsing │ │ │ ├── __init__.py │ │ │ ├── generate_web_pages.py │ │ │ ├── metadata.json │ │ │ └── web_pages.jsonl │ │ └── repl.py ├── miniwob++ │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── available_tasks.txt │ ├── computergym │ │ ├── .gitignore │ │ ├── README.md │ │ ├── computergym │ │ │ ├── __init__.py │ │ │ └── miniwob │ │ │ │ ├── base_env.py │ │ │ │ └── miniwob_interface │ │ │ │ ├── __init__.py │ │ │ │ ├── action.py │ │ │ │ ├── environment.py │ │ │ │ ├── fields.py │ │ │ │ ├── html │ │ │ │ ├── .gitignore │ │ │ │ ├── common │ │ │ │ │ ├── shapes.js │ │ │ │ │ ├── special │ │ │ │ │ │ ├── book-flight │ │ │ │ │ │ │ └── domestic.js │ │ │ │ │ │ ├── checkbox-numbers │ │ │ │ │ │ │ ├── ch_0.png │ │ │ │ │ │ │ ├── ch_1.png │ │ │ │ │ │ │ ├── ch_2.png │ │ │ │ │ │ │ ├── ch_3.png │ │ │ │ │ │ │ ├── ch_4.png │ │ │ │ │ │ │ ├── ch_5.png │ │ │ │ │ │ │ ├── ch_6.png │ │ │ │ │ │ │ ├── ch_7.png │ │ │ │ │ │ │ ├── ch_8.png │ │ │ │ │ │ │ └── ch_9.png │ │ │ │ │ │ ├── click-pie │ │ │ │ │ │ │ ├── raphael.icons.min.js │ │ │ │ │ │ │ ├── raphael.min.js │ │ │ │ │ │ │ └── wheelnav.min.js │ │ │ │ │ │ ├── drag-cube │ │ │ │ │ │ │ ├── blank.png │ │ │ │ │ │ │ ├── cube.css │ │ │ │ │ │ │ └── cube.js │ │ │ │ │ │ ├── email-inbox-nl │ │ │ │ │ │ │ └── templates.js │ │ │ │ │ │ ├── email-inbox │ │ │ │ │ │ │ ├── delete.png │ │ │ │ │ │ │ ├── email-inbox.css │ │ │ │ │ │ │ ├── forward.png │ │ │ │ │ │ │ ├── left-arrow-white.png │ │ │ │ │ │ │ ├── left-arrow.png │ │ │ │ │ │ │ ├── reply.png │ │ │ │ │ │ │ ├── search.png │ │ │ │ │ │ │ ├── send.png │ │ │ │ │ │ │ ├── star-clicked.png │ │ │ │ │ │ │ └── star.png │ │ │ │ │ │ ├── navigate-tree │ │ │ │ │ │ │ ├── images │ │ │ │ │ │ │ │ ├── ajax-loader.gif │ │ │ │ │ │ │ │ ├── file.gif │ │ │ │ │ │ │ │ ├── folder-closed.gif │ │ │ │ │ │ │ │ ├── folder.gif │ │ │ │ │ │ │ │ ├── minus.gif │ │ │ │ │ │ │ │ ├── plus.gif │ │ │ │ │ │ │ │ ├── treeview-black-line.gif │ │ │ │ │ │ │ │ ├── treeview-black.gif │ │ │ │ │ │ │ │ ├── treeview-default-line.gif │ │ │ │ │ │ │ │ ├── treeview-default.gif │ │ │ │ │ │ │ │ ├── treeview-famfamfam-line.gif │ │ │ │ │ │ │ │ ├── treeview-famfamfam.gif │ │ │ │ │ │ │ │ ├── treeview-gray-line.gif │ │ │ │ │ │ │ │ ├── treeview-gray.gif │ │ │ │ │ │ │ │ ├── treeview-red-line.gif │ │ │ │ │ │ │ │ └── treeview-red.gif │ │ │ │ │ │ │ ├── jquery.treeview.css │ │ │ │ │ │ │ └── jquery.treeview.min.js │ │ │ │ │ │ ├── search-engine │ │ │ │ │ │ │ └── jquery.twbsPagination.min.js │ │ │ │ │ │ ├── social-media │ │ │ │ │ │ │ ├── like-hover.png │ │ │ │ │ │ │ ├── like.png │ │ │ │ │ │ │ ├── more-hover.png │ │ │ │ │ │ │ ├── more.png │ │ │ │ │ │ │ ├── reply-hover.png │ │ │ │ │ │ │ ├── reply.png │ │ │ │ │ │ │ ├── retweet-hover.png │ │ │ │ │ │ │ ├── retweet.png │ │ │ │ │ │ │ ├── share-hover.png │ │ │ │ │ │ │ └── share.png │ │ │ │ │ │ ├── text-editor │ │ │ │ │ │ │ ├── quill.min.js │ │ │ │ │ │ │ └── quill.snow.css │ │ │ │ │ │ └── tic-tac-toe │ │ │ │ │ │ │ ├── o.png │ │ │ │ │ │ │ └── x.png │ │ │ │ │ └── ui_utils.js │ │ │ │ ├── core │ │ │ │ │ ├── core.css │ │ │ │ │ ├── core.js │ │ │ │ │ ├── d3.v3.min.js │ │ │ │ │ ├── jquery-ui │ │ │ │ │ │ ├── external │ │ │ │ │ │ │ └── jquery │ │ │ │ │ │ │ │ └── jquery.js │ │ │ │ │ │ ├── images │ │ │ │ │ │ │ ├── ui-icons_444444_256x240.png │ │ │ │ │ │ │ ├── ui-icons_555555_256x240.png │ │ │ │ │ │ │ ├── ui-icons_777620_256x240.png │ │ │ │ │ │ │ ├── ui-icons_777777_256x240.png │ │ │ │ │ │ │ ├── ui-icons_cc0000_256x240.png │ │ │ │ │ │ │ └── ui-icons_ffffff_256x240.png │ │ │ │ │ │ ├── jquery-ui.min.css │ │ │ │ │ │ ├── jquery-ui.min.js │ │ │ │ │ │ ├── jquery-ui.structure.min.css │ │ │ │ │ │ └── jquery-ui.theme.min.css │ │ │ │ │ ├── jscolor.min.js │ │ │ │ │ └── record.js │ │ │ │ ├── flight │ │ │ │ │ ├── AA │ │ │ │ │ │ ├── apps │ │ │ │ │ │ │ └── common │ │ │ │ │ │ │ │ └── js │ │ │ │ │ │ │ │ ├── aacom.js │ │ │ │ │ │ │ │ ├── aacomDevice.js │ │ │ │ │ │ │ │ ├── airportcode.js │ │ │ │ │ │ │ │ ├── cookieconsent.js │ │ │ │ │ │ │ │ ├── jquery │ │ │ │ │ │ │ │ └── aacom │ │ │ │ │ │ │ │ │ ├── plugins │ │ │ │ │ │ │ │ │ ├── aaAirportAutoComplete.js │ │ │ │ │ │ │ │ │ ├── aaCache.js │ │ │ │ │ │ │ │ │ ├── aaCookie.js │ │ │ │ │ │ │ │ │ ├── aaCountryLanSelect.js │ │ │ │ │ │ │ │ │ ├── aaDropdownPanel.js │ │ │ │ │ │ │ │ │ ├── aaFooterAds.js │ │ │ │ │ │ │ │ │ └── aaTextBoxMessage.js │ │ │ │ │ │ │ │ │ └── utilities │ │ │ │ │ │ │ │ │ ├── aaUtilities-2.1.js │ │ │ │ │ │ │ │ │ └── aaUtils.js │ │ │ │ │ │ │ │ └── wa.js │ │ │ │ │ │ ├── content │ │ │ │ │ │ │ ├── common │ │ │ │ │ │ │ │ └── css │ │ │ │ │ │ │ │ │ └── v3 │ │ │ │ │ │ │ │ │ ├── core.css │ │ │ │ │ │ │ │ │ ├── jquery-ui-1.10-aa.css │ │ │ │ │ │ │ │ │ ├── reservation │ │ │ │ │ │ │ │ │ └── findFlights │ │ │ │ │ │ │ │ │ │ └── mobile │ │ │ │ │ │ │ │ │ │ └── findFlights.css │ │ │ │ │ │ │ │ │ └── responsive.css │ │ │ │ │ │ │ ├── fonts │ │ │ │ │ │ │ │ ├── american-v2 │ │ │ │ │ │ │ │ │ ├── americansans-bold.woff │ │ │ │ │ │ │ │ │ ├── americansans-light.woff │ │ │ │ │ │ │ │ │ ├── americansans-medium.woff │ │ │ │ │ │ │ │ │ └── americansans-regular.woff │ │ │ │ │ │ │ │ └── icons │ │ │ │ │ │ │ │ │ └── american-icons-v4-4.woff │ │ │ │ │ │ │ └── images │ │ │ │ │ │ │ │ ├── chrome │ │ │ │ │ │ │ │ ├── icons │ │ │ │ │ │ │ │ │ └── loading.gif │ │ │ │ │ │ │ │ └── rebrand │ │ │ │ │ │ │ │ │ ├── aa-flight-icon.png │ │ │ │ │ │ │ │ │ ├── aa-icons-flags-sprite.png │ │ │ │ │ │ │ │ │ ├── aa-logo.png │ │ │ │ │ │ │ │ │ ├── down-arrow.png │ │ │ │ │ │ │ │ │ ├── favicon.png │ │ │ │ │ │ │ │ │ ├── oneworld.png │ │ │ │ │ │ │ │ │ ├── shadow-down.png │ │ │ │ │ │ │ │ │ ├── shadow-vertical-150.png │ │ │ │ │ │ │ │ │ ├── shadow12-down.png │ │ │ │ │ │ │ │ │ ├── shadow12-up.png │ │ │ │ │ │ │ │ │ └── shadow3-down.png │ │ │ │ │ │ │ │ └── graphics │ │ │ │ │ │ │ │ └── icons │ │ │ │ │ │ │ │ └── aa-jqueryUIicons-sprite.png │ │ │ │ │ │ ├── dataset-AA.js │ │ │ │ │ │ ├── index.html │ │ │ │ │ │ ├── js │ │ │ │ │ │ │ ├── aa │ │ │ │ │ │ │ │ ├── common │ │ │ │ │ │ │ │ │ ├── aa-utility-menu.js │ │ │ │ │ │ │ │ │ ├── aacom-ui-1.0.0.js │ │ │ │ │ │ │ │ │ └── core-2.0.0.js │ │ │ │ │ │ │ │ ├── modules │ │ │ │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ │ │ │ ├── ajax.js │ │ │ │ │ │ │ │ │ ├── browserdetect.js │ │ │ │ │ │ │ │ │ ├── commonsetup.js │ │ │ │ │ │ │ │ │ ├── mobileDatePicker.js │ │ │ │ │ │ │ │ │ ├── utilities.js │ │ │ │ │ │ │ │ │ └── widgets.js │ │ │ │ │ │ │ │ ├── plugins │ │ │ │ │ │ │ │ │ └── noBounce.js │ │ │ │ │ │ │ │ └── shopping │ │ │ │ │ │ │ │ │ └── mobileSearchFlights.js │ │ │ │ │ │ │ └── libs │ │ │ │ │ │ │ │ ├── jquery │ │ │ │ │ │ │ │ ├── jquery-1.11.1.min.js │ │ │ │ │ │ │ │ ├── jquery-migrate-1.2.1.min.js │ │ │ │ │ │ │ │ └── ui │ │ │ │ │ │ │ │ │ └── 1.10 │ │ │ │ │ │ │ │ │ ├── i18n │ │ │ │ │ │ │ │ │ └── jquery.ui.datepicker-en-aa.js │ │ │ │ │ │ │ │ │ └── jquery-ui.min.js │ │ │ │ │ │ │ │ └── modernizr-2.8.1.js │ │ │ │ │ │ ├── original.html │ │ │ │ │ │ ├── surrogate │ │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ │ └── airports.json │ │ │ │ │ │ └── wrapper.html │ │ │ │ │ ├── Alaska-auto-medium │ │ │ │ │ │ ├── images │ │ │ │ │ │ │ ├── aura.png │ │ │ │ │ │ │ ├── cal3.png │ │ │ │ │ │ │ ├── chkboxes3.png │ │ │ │ │ │ │ ├── clear_text2.png │ │ │ │ │ │ │ ├── collapse.png │ │ │ │ │ │ │ ├── expand.png │ │ │ │ │ │ │ ├── flight_arrow.png │ │ │ │ │ │ │ ├── geo.png │ │ │ │ │ │ │ ├── home.png │ │ │ │ │ │ │ ├── info2.png │ │ │ │ │ │ │ ├── leftright.png │ │ │ │ │ │ │ ├── logo2.png │ │ │ │ │ │ │ └── logos │ │ │ │ │ │ │ │ ├── AA.png │ │ │ │ │ │ │ │ ├── AS.png │ │ │ │ │ │ │ │ ├── DL.png │ │ │ │ │ │ │ │ └── VX.png │ │ │ │ │ │ ├── index.html │ │ │ │ │ │ ├── mobileweb-v3-28-6227-21813.css │ │ │ │ │ │ ├── scripts │ │ │ │ │ │ │ ├── datepickr.js │ │ │ │ │ │ │ ├── main.js │ │ │ │ │ │ │ └── shopbook.js │ │ │ │ │ │ ├── stylesheets │ │ │ │ │ │ │ └── circular │ │ │ │ │ │ │ │ ├── ASCircularWeb-Bold.woff │ │ │ │ │ │ │ │ └── ASCircularWeb-Book.woff │ │ │ │ │ │ ├── surrogate │ │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ │ └── airports.json │ │ │ │ │ │ └── wrapper.html │ │ │ │ │ ├── Alaska-auto │ │ │ │ │ │ ├── images │ │ │ │ │ │ │ ├── aura.png │ │ │ │ │ │ │ ├── cal3.png │ │ │ │ │ │ │ ├── chkboxes3.png │ │ │ │ │ │ │ ├── clear_text2.png │ │ │ │ │ │ │ ├── collapse.png │ │ │ │ │ │ │ ├── expand.png │ │ │ │ │ │ │ ├── flight_arrow.png │ │ │ │ │ │ │ ├── geo.png │ │ │ │ │ │ │ ├── home.png │ │ │ │ │ │ │ ├── info2.png │ │ │ │ │ │ │ ├── leftright.png │ │ │ │ │ │ │ ├── logo2.png │ │ │ │ │ │ │ └── logos │ │ │ │ │ │ │ │ ├── AA.png │ │ │ │ │ │ │ │ ├── AS.png │ │ │ │ │ │ │ │ ├── DL.png │ │ │ │ │ │ │ │ └── VX.png │ │ │ │ │ │ ├── index.html │ │ │ │ │ │ ├── mobileweb-v3-28-6227-21813.css │ │ │ │ │ │ ├── scripts │ │ │ │ │ │ │ ├── datepickr.js │ │ │ │ │ │ │ ├── main.js │ │ │ │ │ │ │ └── shopbook.js │ │ │ │ │ │ ├── stylesheets │ │ │ │ │ │ │ └── circular │ │ │ │ │ │ │ │ ├── ASCircularWeb-Bold.woff │ │ │ │ │ │ │ │ └── ASCircularWeb-Book.woff │ │ │ │ │ │ ├── surrogate │ │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ │ └── airports.json │ │ │ │ │ │ └── wrapper.html │ │ │ │ │ ├── Alaska │ │ │ │ │ │ ├── dataset-Alaska.js │ │ │ │ │ │ ├── images │ │ │ │ │ │ │ ├── aura.png │ │ │ │ │ │ │ ├── cal3.png │ │ │ │ │ │ │ ├── chkboxes3.png │ │ │ │ │ │ │ ├── clear_text2.png │ │ │ │ │ │ │ ├── collapse.png │ │ │ │ │ │ │ ├── expand.png │ │ │ │ │ │ │ ├── flight_arrow.png │ │ │ │ │ │ │ ├── geo.png │ │ │ │ │ │ │ ├── home.png │ │ │ │ │ │ │ ├── info2.png │ │ │ │ │ │ │ ├── leftright.png │ │ │ │ │ │ │ ├── logo2.png │ │ │ │ │ │ │ └── logos │ │ │ │ │ │ │ │ ├── AA.png │ │ │ │ │ │ │ │ ├── AS.png │ │ │ │ │ │ │ │ ├── DL.png │ │ │ │ │ │ │ │ └── VX.png │ │ │ │ │ │ ├── index.html │ │ │ │ │ │ ├── mobileweb-v3-28-6227-21813.css │ │ │ │ │ │ ├── original.html │ │ │ │ │ │ ├── scripts │ │ │ │ │ │ │ ├── datepickr.js │ │ │ │ │ │ │ ├── main.js │ │ │ │ │ │ │ └── shopbook.js │ │ │ │ │ │ ├── stylesheets │ │ │ │ │ │ │ └── circular │ │ │ │ │ │ │ │ ├── ASCircularWeb-Bold.woff │ │ │ │ │ │ │ │ └── ASCircularWeb-Book.woff │ │ │ │ │ │ ├── surrogate │ │ │ │ │ │ │ ├── airportLookup.js │ │ │ │ │ │ │ └── airports.json │ │ │ │ │ │ └── wrapper.html │ │ │ │ │ └── flight-common │ │ │ │ │ │ ├── inject.js │ │ │ │ │ │ ├── wrapper.css │ │ │ │ │ │ └── wrapper.js │ │ │ │ └── miniwob │ │ │ │ │ ├── bisect-angle.html │ │ │ │ │ ├── book-flight-nodelay.html │ │ │ │ │ ├── book-flight.html │ │ │ │ │ ├── chase-circle.html │ │ │ │ │ ├── choose-date-easy.html │ │ │ │ │ ├── choose-date-medium.html │ │ │ │ │ ├── choose-date-nodelay.html │ │ │ │ │ ├── choose-date.html │ │ │ │ │ ├── choose-list.html │ │ │ │ │ ├── circle-center.html │ │ │ │ │ ├── click-button-sequence.html │ │ │ │ │ ├── click-button.html │ │ │ │ │ ├── click-checkboxes-large.html │ │ │ │ │ ├── click-checkboxes-soft.html │ │ │ │ │ ├── click-checkboxes-transfer.html │ │ │ │ │ ├── click-checkboxes.html │ │ │ │ │ ├── click-collapsible-2-nodelay.html │ │ │ │ │ ├── click-collapsible-2.html │ │ │ │ │ ├── click-collapsible-nodelay.html │ │ │ │ │ ├── click-collapsible.html │ │ │ │ │ ├── click-color.html │ │ │ │ │ ├── click-dialog-2.html │ │ │ │ │ ├── click-dialog.html │ │ │ │ │ ├── click-link.html │ │ │ │ │ ├── click-menu-2.html │ │ │ │ │ ├── click-menu.html │ │ │ │ │ ├── click-option.html │ │ │ │ │ ├── click-pie-nodelay.html │ │ │ │ │ ├── click-pie.html │ │ │ │ │ ├── click-scroll-list.html │ │ │ │ │ ├── click-shades.html │ │ │ │ │ ├── click-shape.html │ │ │ │ │ ├── click-tab-2-easy.html │ │ │ │ │ ├── click-tab-2-hard.html │ │ │ │ │ ├── click-tab-2-medium.html │ │ │ │ │ ├── click-tab-2.html │ │ │ │ │ ├── click-tab.html │ │ │ │ │ ├── click-test-2.html │ │ │ │ │ ├── click-test-transfer.html │ │ │ │ │ ├── click-test.html │ │ │ │ │ ├── click-widget.html │ │ │ │ │ ├── copy-paste-2.html │ │ │ │ │ ├── copy-paste.html │ │ │ │ │ ├── count-shape.html │ │ │ │ │ ├── count-sides.html │ │ │ │ │ ├── drag-box.html │ │ │ │ │ ├── drag-cube.html │ │ │ │ │ ├── drag-item.html │ │ │ │ │ ├── drag-items-grid.html │ │ │ │ │ ├── drag-items.html │ │ │ │ │ ├── drag-shapes.html │ │ │ │ │ ├── drag-sort-numbers.html │ │ │ │ │ ├── email-inbox-delete.html │ │ │ │ │ ├── email-inbox-forward-nl-turk.html │ │ │ │ │ ├── email-inbox-forward-nl.html │ │ │ │ │ ├── email-inbox-forward.html │ │ │ │ │ ├── email-inbox-important.html │ │ │ │ │ ├── email-inbox-nl-turk.html │ │ │ │ │ ├── email-inbox-noscroll.html │ │ │ │ │ ├── email-inbox-reply.html │ │ │ │ │ ├── email-inbox-star-reply.html │ │ │ │ │ ├── email-inbox.html │ │ │ │ │ ├── enter-date.html │ │ │ │ │ ├── enter-password.html │ │ │ │ │ ├── enter-text-2.html │ │ │ │ │ ├── enter-text-dynamic.html │ │ │ │ │ ├── enter-text.html │ │ │ │ │ ├── enter-time.html │ │ │ │ │ ├── find-midpoint.html │ │ │ │ │ ├── find-word.html │ │ │ │ │ ├── focus-text-2.html │ │ │ │ │ ├── focus-text.html │ │ │ │ │ ├── grid-coordinate.html │ │ │ │ │ ├── guess-number.html │ │ │ │ │ ├── highlight-text-2.html │ │ │ │ │ ├── highlight-text.html │ │ │ │ │ ├── identify-shape.html │ │ │ │ │ ├── login-user-popup.html │ │ │ │ │ ├── login-user.html │ │ │ │ │ ├── moving-items.html │ │ │ │ │ ├── multi-layouts.html │ │ │ │ │ ├── multi-orderings.html │ │ │ │ │ ├── navigate-tree.html │ │ │ │ │ ├── number-checkboxes.html │ │ │ │ │ ├── read-table-2.html │ │ │ │ │ ├── read-table.html │ │ │ │ │ ├── resize-textarea.html │ │ │ │ │ ├── right-angle.html │ │ │ │ │ ├── scroll-text-2.html │ │ │ │ │ ├── scroll-text.html │ │ │ │ │ ├── search-engine.html │ │ │ │ │ ├── simon-says.html │ │ │ │ │ ├── simple-algebra.html │ │ │ │ │ ├── simple-arithmetic.html │ │ │ │ │ ├── social-media-all.html │ │ │ │ │ ├── social-media-some.html │ │ │ │ │ ├── social-media.html │ │ │ │ │ ├── terminal.html │ │ │ │ │ ├── terminal_reproduce.html │ │ │ │ │ ├── text-editor.html │ │ │ │ │ ├── text-transform.html │ │ │ │ │ ├── tic-tac-toe.html │ │ │ │ │ ├── unicode-test.html │ │ │ │ │ ├── use-autocomplete-nodelay.html │ │ │ │ │ ├── use-autocomplete.html │ │ │ │ │ ├── use-colorwheel-2.html │ │ │ │ │ ├── use-colorwheel.html │ │ │ │ │ ├── use-slider-2.html │ │ │ │ │ ├── use-slider.html │ │ │ │ │ ├── use-spinner.html │ │ │ │ │ └── visual-addition.html │ │ │ │ ├── instance.py │ │ │ │ ├── reward.py │ │ │ │ ├── screenshot.py │ │ │ │ ├── state.py │ │ │ │ └── utils.py │ │ └── setup.py │ ├── llm_agent.py │ ├── main.py │ ├── metrics.py │ ├── miniwob++.sh │ ├── prompt.py │ ├── prompt │ │ ├── action.txt │ │ ├── base.txt │ │ ├── choose-list │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-button-sequence │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-button │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-checkboxes-large │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-checkboxes-soft │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-checkboxes-transfer │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-checkboxes │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-collapsible-2 │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-collapsible │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-dialog-2 │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-dialog │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-menu │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-option │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-scroll-list │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-shades │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-shape │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-tab-2-hard │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-tab-2 │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-tab │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-test │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── click-widget │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── count-shape │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── email-inbox-forward-nl-turk │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── email-inbox-forward-nl │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── email-inbox-nl-turk │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── email-inbox │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── enter-date │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── enter-time │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── example.txt │ │ ├── few-shot.json │ │ ├── first_action.txt │ │ ├── focus-text │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── grid-coordinate │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── check_finish.txt │ │ │ ├── done.txt │ │ │ ├── edit_plan.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── new_plan.txt │ │ │ ├── rci_action.txt │ │ │ ├── rci_answer.txt │ │ │ └── update_action.txt │ │ ├── identify-shape │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── initialize_plan.txt │ │ ├── login-user-popup │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── multi-layouts │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── navigate-tree │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── rci_action.txt │ │ ├── search-engine │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── simple-algebra │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── check_finish.txt │ │ │ ├── done.txt │ │ │ ├── edit_plan.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── new_plan.txt │ │ │ ├── rci_action.txt │ │ │ ├── rci_answer.txt │ │ │ └── update_action.txt │ │ ├── social-media-all │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── social-media-some │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── social-media │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── terminal │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── tic-tac-toe │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ ├── update_action.txt │ │ ├── use-autocomplete │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ │ └── use-spinner │ │ │ ├── action.txt │ │ │ ├── base.txt │ │ │ ├── example.txt │ │ │ ├── first_action.txt │ │ │ ├── initialize_plan.txt │ │ │ ├── rci_action.txt │ │ │ └── update_action.txt │ └── requirements.txt ├── mint-bench │ ├── config_variables.py │ ├── convert_outputs.py │ ├── mint-bench-run.sh │ ├── mint.sh │ └── remove_dups.py ├── mmlu │ ├── evaluate_mmlu.py │ └── mmlu.sh ├── mt-bench │ ├── mt-bench.sh │ └── process_micro_avg.py ├── run_all.sh ├── science-world │ ├── .gitignore │ ├── README.md │ ├── code_actions.py │ ├── data_utils │ │ ├── READMD.md │ │ ├── __init__.py │ │ ├── data_convert.py │ │ ├── data_utils.py │ │ ├── demos.json │ │ └── goldpaths-all.zip │ ├── eval.py │ ├── eval_code_as_action.py │ ├── eval_utils.py │ ├── fast_agent │ │ ├── deepspeed_reqs.txt │ │ ├── ds_train.py │ │ ├── ds_train.sh │ │ └── zero_2_bf16.json │ ├── metrics.py │ ├── prompts │ │ ├── README.md │ │ ├── convert.py │ │ ├── prompt.json │ │ ├── prompt_code_action.json │ │ └── prompt_orig.json │ ├── science-world.sh │ └── slow_agent │ │ ├── local_llm.py │ │ ├── run_gradio.py │ │ └── utils.py ├── serve │ └── serve_ckpt.sh ├── setup_data.sh ├── setup_env.sh └── source.sh ├── models ├── add_chat_format.py └── megatron │ ├── convert_llama.sh │ ├── convert_mistral.sh │ ├── convert_sharded_to_hf.sh │ ├── finetune_4xA100_4tp_mixture_llama.sh │ ├── finetune_4xA100_4tp_mixture_mistral.sh │ ├── shard_model_4tp.sh │ └── source.sh └── slurm ├── configs ├── eval_2xA100.slurm └── finetune_4xA100_4tp.slurm ├── run_4gpu_interactive.sh ├── run_gpu_interactive.sh ├── run_gpu_interactive_A100.sh └── run_megatron_gpu_interactive.sh /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "scripts/eval/human_eval/human-eval"] 2 | path = scripts/eval/human_eval/human-eval 3 | url = https://github.com/openai/human-eval.git 4 | [submodule "scripts/eval/mt-bench/FastChat"] 5 | path = scripts/eval/mt-bench/FastChat 6 | url = https://github.com/xingyaoww/FastChat.git 7 | [submodule "scripts/eval/mint-bench/mint-bench"] 8 | path = scripts/eval/mint-bench/mint-bench 9 | url = https://github.com/xingyaoww/mint-bench.git 10 | [submodule "chat-ui"] 11 | path = chat-ui 12 | url = https://github.com/xingyaoww/chat-ui.git 13 | [submodule "Megatron-LLM"] 14 | path = Megatron-LLM 15 | url = https://github.com/xingyaoww/Megatron-LLM.git 16 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: code-act 2 | channels: 3 | - defaults 4 | - pytorch 5 | - nvidia/label/cuda-11.8.0 6 | - conda-forge 7 | dependencies: 8 | - python=3.9 9 | - pytorch-cuda=11.8 10 | - cuda-toolkit 11 | - pip 12 | - pip: 13 | - -r requirements.txt 14 | -------------------------------------------------------------------------------- /figures/codeact-comparison-perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/figures/codeact-comparison-perf.png -------------------------------------------------------------------------------- /figures/codeact-comparison-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/figures/codeact-comparison-table.png -------------------------------------------------------------------------------- /figures/data-stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/figures/data-stats.png -------------------------------------------------------------------------------- /figures/json-text-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/figures/json-text-comparison.png -------------------------------------------------------------------------------- /figures/model-performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/figures/model-performance.png -------------------------------------------------------------------------------- /figures/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/figures/overview.png -------------------------------------------------------------------------------- /mint/agents/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import LMAgent 2 | from .openai_lm_agent import OpenAILMAgent 3 | from .bard_agent import BardLMAgent 4 | from .openai_feedback_agent import OpenAIFeedbackAgent 5 | from .claude_feedback_agent import ClaudeFeedbackAgent 6 | from .vllm_feedback_agent import VLLMFeedbackAgent 7 | from .vllm_agent import VLLMAgent 8 | from .claude_agent import ClaudeLMAgent 9 | -------------------------------------------------------------------------------- /mint/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/mint/configs/__init__.py -------------------------------------------------------------------------------- /mint/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseEnv 2 | from .general_env import GeneralEnv 3 | from .alfworld_env import AlfworldEnv 4 | -------------------------------------------------------------------------------- /mint/envs/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from mint.datatypes import State, Action 3 | 4 | 5 | class BaseEnv(ABC): 6 | @abstractmethod 7 | def step(self, action: Action) -> State: 8 | pass 9 | 10 | @abstractmethod 11 | def reset(self) -> State: 12 | pass 13 | -------------------------------------------------------------------------------- /mint/prompt/templates/template_feedback_agent.txt: -------------------------------------------------------------------------------- 1 | You are an expert tasked with evaluating and providing feedback on an assistant's performance. 2 | 3 | --- 4 | Here is an example. Please follow the format as the following expert acts. 5 | 6 | {in_context_example} 7 | 8 | --- 9 | 10 | {tool_desc} 11 | 12 | {trajectory} 13 | 14 | {correct_solution} 15 | 16 | Please provide concise and constructive feedback. Remember, your role is similar to a teacher. Rather than giving away the solution or details about the answer, guide the assistant toward understanding how to arrive at the correct answer. Your feedback should focus on enhancing the assistant's ability to think critically and respond accurately. Now provide your feedback. 17 | Expert feedback: 18 | -------------------------------------------------------------------------------- /mint/prompt/templates/template_without_tool.txt: -------------------------------------------------------------------------------- 1 | You are a helpful assistant assigned with the task of problem-solving. 2 | 3 | At each turn, you should first provide your step-by-step thinking for solving the task. Your thought process should be enclosed using "" tag, for example: I need to print "Hello World!" . 4 | 5 | After that, you can directly provide a solution that adheres to the required format for the given task. Your solution should be enclosed using "" tag, for example: The answer is A . 6 | 7 | You can only propose a solution {max_propose_solution} times. 8 | 9 | {tool_desc} 10 | 11 | --- 12 | 13 | {in_context_example} 14 | 15 | --- 16 | 17 | {task_prompt} 18 | -------------------------------------------------------------------------------- /mint/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Task 2 | from .reasoning import ReasoningTask, MATHTask 3 | from .codegen import CodeGenTask, HumanEvalTask, MBPPTask, APPSTask 4 | from .alfworld import AlfWorldTask 5 | from .tabular import WikiTableQuestionsTask 6 | -------------------------------------------------------------------------------- /mint/tasks/codegen/APPS/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/mint/tasks/codegen/APPS/__init__.py -------------------------------------------------------------------------------- /mint/tasks/in_context_examples/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Code Generation 3 | 4 | One-shot example for MBPP is obtained from the "prompt" split of MBPP dataset ([link](https://huggingface.co/datasets/mbpp/viewer/sanitized/prompt?row=0)). Since HumanEval does not provide one-shot prompt, we re-format the MBPP prompt for HumanEval. 5 | -------------------------------------------------------------------------------- /mint/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Tool 2 | from typing import List 3 | import warnings 4 | 5 | warnings.filterwarnings("ignore") 6 | 7 | 8 | def get_toolset_description(tools: List[Tool]) -> str: 9 | if len(tools) == 0: 10 | return "" 11 | 12 | output = "Tool function available (already imported in environment):\n" 13 | for i, tool in enumerate(tools): 14 | output += f"[{i + 1}] {tool.signature}\n" 15 | output += f"{tool.description}\n" 16 | 17 | return output 18 | -------------------------------------------------------------------------------- /mint/tools/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any 3 | 4 | 5 | class Tool(ABC): 6 | """Abstract class for a tool.""" 7 | 8 | name: str 9 | signature: str 10 | description: str 11 | 12 | @abstractmethod 13 | def __call__(self, *args: Any, **kwds: Any) -> str: 14 | """Execute the tool with the given args and return the output.""" 15 | # execute tool with abitrary args 16 | pass 17 | 18 | def reset(self) -> None: 19 | """Reset the tool to its initial state.""" 20 | pass 21 | -------------------------------------------------------------------------------- /mint/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | # use cache to avoid loading the same file multiple times 4 | # which can leads to too many open files error 5 | @functools.lru_cache(maxsize=128) 6 | def load_file(filepath: str) -> str: 7 | with open(filepath, "r") as f: 8 | content = f.read() 9 | return content 10 | -------------------------------------------------------------------------------- /mint/utils/exception.py: -------------------------------------------------------------------------------- 1 | class ParseError(Exception): 2 | pass 3 | 4 | 5 | class ToolExecutionError(Exception): 6 | pass 7 | 8 | 9 | class LMExecutionTimeoutError(Exception): 10 | pass 11 | -------------------------------------------------------------------------------- /scripts/chat/code_execution/Dockerfile.api: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | EXPOSE 8081 4 | RUN pip install --upgrade pip 5 | RUN pip install tornado 6 | RUN pip install docker 7 | RUN pip install kubernetes 8 | 9 | # Install ping for debugging 10 | RUN apt-get update && apt-get install -y iputils-ping 11 | 12 | # copy over file 13 | RUN mkdir -p /app 14 | WORKDIR /app 15 | COPY . /app 16 | 17 | # docker build -t xingyaoww/codeact-execute-api -f Dockerfile.api . 18 | # docker push xingyaoww/codeact-execute-api 19 | -------------------------------------------------------------------------------- /scripts/chat/code_execution/Dockerfile.executor: -------------------------------------------------------------------------------- 1 | FROM quay.io/jupyter/datascience-notebook:2023-10-20 2 | 3 | # install the kernel gateway 4 | RUN pip install jupyter_kernel_gateway 5 | RUN pip install langchain google-api-python-client 6 | RUN pip install boilerpy3 7 | RUN pip install pandas datasets sympy 8 | RUN pip install transformers[torch] 9 | RUN pip install scikit-learn matplotlib seaborn 10 | RUN pip install torch --index-url https://download.pytorch.org/whl/cpu 11 | # run kernel gateway on container start, not notebook server 12 | EXPOSE 8888 13 | CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip=0.0.0.0", "--KernelGatewayApp.port=8888"] 14 | # docker build -t xingyaoww/codeact-executor -f Dockerfile.executor . 15 | # docker push xingyaoww/codeact-executor 16 | -------------------------------------------------------------------------------- /scripts/chat/code_execution/start_jupyter_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | JUPYTER_API_PORT=$1 3 | echo "JUPYTER_API_PORT=$JUPYTER_API_PORT" 4 | 5 | pushd scripts/chat/code_execution 6 | export PYTHONPATH=`pwd`:$PYTHONPATH 7 | # gunicorn -w 1 api:app --bind localhost:$JUPYTER_API_PORT 8 | # TODO: fix the issue of sharing data across worker before enabling multiple workers 9 | python3 api.py --port $JUPYTER_API_PORT 10 | 11 | popd 12 | -------------------------------------------------------------------------------- /scripts/chat/kubernetes/build_chat_ui_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "NOTE: You should configure ./chat-ui/.env.local first before running this script." 4 | 5 | YOUR_DOCKER_IMAGE_PATH=$1 # e.g., xingyaoww/chat-ui for docker.io 6 | if [ -z "$YOUR_DOCKER_IMAGE_PATH" ]; then 7 | YOUR_DOCKER_IMAGE_PATH=xingyaoww/chat-ui 8 | fi 9 | echo "YOUR_DOCKER_IMAGE_PATH=$YOUR_DOCKER_IMAGE_PATH" 10 | 11 | pushd chat-ui 12 | docker build -t $YOUR_DOCKER_IMAGE_PATH --secret id=DOTENV_LOCAL,src=.env.local . 13 | docker push $YOUR_DOCKER_IMAGE_PATH 14 | popd 15 | -------------------------------------------------------------------------------- /scripts/chat/kubernetes/build_code_execute_api_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | YOUR_DOCKER_IMAGE_PATH=$1 # e.g., xingyaoww/codeact-execute-api for docker.io 4 | if [ -z "$YOUR_DOCKER_IMAGE_PATH" ]; then 5 | YOUR_DOCKER_IMAGE_PATH=xingyaoww/codeact-execute-api 6 | fi 7 | echo "YOUR_DOCKER_IMAGE_PATH=$YOUR_DOCKER_IMAGE_PATH" 8 | pushd scripts/chat/code_execution 9 | docker build -f Dockerfile.api -t $YOUR_DOCKER_IMAGE_PATH . 10 | docker push $YOUR_DOCKER_IMAGE_PATH 11 | popd 12 | -------------------------------------------------------------------------------- /scripts/chat/kubernetes/build_code_executor_container_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | YOUR_DOCKER_IMAGE_PATH=$1 # e.g., xingyaoww/codeact-executor for docker.io 4 | if [ -z "$YOUR_DOCKER_IMAGE_PATH" ]; then 5 | YOUR_DOCKER_IMAGE_PATH=xingyaoww/codeact-executor 6 | fi 7 | echo "YOUR_DOCKER_IMAGE_PATH=$YOUR_DOCKER_IMAGE_PATH" 8 | pushd scripts/chat/code_execution 9 | docker build -f Dockerfile.executor -t $YOUR_DOCKER_IMAGE_PATH . 10 | docker push $YOUR_DOCKER_IMAGE_PATH 11 | popd 12 | -------------------------------------------------------------------------------- /scripts/chat/run_chat_ui.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "NOTE: You should configure ./chat-ui/.env.local first before running this script." 4 | 5 | pushd chat-ui 6 | 7 | docker build \ 8 | -t chat-ui \ 9 | --secret id=DOTENV_LOCAL,src=.env.local \ 10 | . 11 | 12 | docker run \ 13 | --rm \ 14 | --env PORT=80 \ 15 | --name chat-ui \ 16 | --network host \ 17 | chat-ui 18 | 19 | popd 20 | -------------------------------------------------------------------------------- /scripts/chat/run_chat_ui_debug.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pushd chat-ui 4 | npm install 5 | npm run dev -- --port 3000 --host 0.0.0.0 6 | -------------------------------------------------------------------------------- /scripts/chat/start_mongodb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DB_DIR=`pwd`/data/mongodb 3 | # Make sure the directory exists 4 | if [ ! -d "$DB_DIR" ]; then 5 | mkdir -p $DB_DIR 6 | fi 7 | echo "Starting mongodb in $DB_DIR" 8 | 9 | USERNAME=codeactagent 10 | PASSWORD=$1 11 | echo "USERNAME=$USERNAME" 12 | echo "PASSWORD=$PASSWORD" 13 | docker run \ 14 | --rm \ 15 | -p 27017:27017 \ 16 | -v $DB_DIR:/data/db \ 17 | --user $(id -u):$(id -g) \ 18 | -e MONGODB_INITDB_ROOT_USERNAME=$USERNAME \ 19 | -e MONGODB_INITDB_ROOT_PASSWORD=$PASSWORD \ 20 | --name mongo-chat-ui \ 21 | -d mongodb/mongodb-community-server:latest 22 | -------------------------------------------------------------------------------- /scripts/data/download_from_hf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | from datasets import load_dataset 4 | 5 | ds = load_dataset("xingyaoww/code-act") 6 | 7 | if not os.path.exists("data/datasets"): 8 | pathlib.Path("data/datasets").mkdir(parents=True, exist_ok=True) 9 | print("Created data/datasets") 10 | 11 | codeact_ds = ds["codeact"] 12 | codeact_df = codeact_ds.to_pandas() 13 | codeact_df.to_json("data/datasets/codeact.jsonl", orient="records", lines=True) 14 | print(f"Saved {len(codeact_df)} examples to data/datasets/codeact.jsonl") 15 | 16 | general_ds = ds["general"] 17 | general_df = general_ds.to_pandas() 18 | general_df.to_json("data/datasets/general.jsonl", orient="records", lines=True) 19 | print(f"Saved {len(general_df)} examples to data/datasets/general.jsonl") 20 | print("Done") 21 | -------------------------------------------------------------------------------- /scripts/docker/Dockerfile.megatron: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:23.09-py3 2 | 3 | COPY Megatron-LLM/requirements.txt . 4 | 5 | RUN pip install -r requirements.txt 6 | RUN pip install --upgrade pip 7 | RUN pip install ninja wandb 8 | RUN pip install -U flash-attn 9 | RUN pip install accelerate 10 | # docker build -t xingyaoww/pt-megatron-llm:v1.0 -f scripts/docker/Dockerfile.megatron . 11 | # docker build -t xingyaoww/pt-megatron-llm:v1.1.1 -f scripts/docker/Dockerfile.megatron . 12 | -------------------------------------------------------------------------------- /scripts/docker/run_megatron_interactive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DOCKER_IMG=xingyaoww/pt-megatron-llm:v1.0 3 | 4 | WORK_DIR=`pwd` 5 | 6 | docker run \ 7 | -e UID=$(id -u) \ 8 | -e WANDB_API_KEY \ 9 | -e HUGGING_FACE_HUB_TOKEN \ 10 | --gpus all -it \ 11 | --rm \ 12 | --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 \ 13 | -v $WORK_DIR:/workspace \ 14 | -v /data/shared:/models \ 15 | $DOCKER_IMG \ 16 | bash -c "useradd --shell /bin/bash -u $UID -o -c '' -m code-act-agent && cd /workspace && su code-act-agent -c 'git config --global credential.helper store' && su code-act-agent" 17 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/.gitignore: -------------------------------------------------------------------------------- 1 | prompt\ examples/ 2 | __pycache__/ 3 | .vscode 4 | .DS_Store -------------------------------------------------------------------------------- /scripts/eval/api-bank/API-Bank-arxiv-version.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/api-bank/API-Bank-arxiv-version.pdf -------------------------------------------------------------------------------- /scripts/eval/api-bank/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import API 2 | 3 | import os 4 | import importlib 5 | 6 | # Get the directory path of the "apis" folder 7 | apis_dir = os.path.dirname(os.path.abspath(__file__)) 8 | 9 | # Get a list of all the files in the "apis" folder 10 | api_files = [f[:-3] for f in os.listdir(apis_dir) if f.endswith(".py") and f != "__init__.py"] 11 | 12 | # Import all classes in the files in the "apis" folder 13 | for api_file in api_files: 14 | try: 15 | module = importlib.import_module(f".{api_file}", package="apis") 16 | globals().update({k: v for k, v in module.__dict__.items() if not k.startswith("__")}) 17 | except ImportError: 18 | # Handle ImportError, if necessary 19 | pass 20 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/figures/multi-agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/api-bank/figures/multi-agent.png -------------------------------------------------------------------------------- /scripts/eval/api-bank/figures/three_ability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/api-bank/figures/three_ability.png -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/Calculator-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you calculate (5+6)*3 for me?"} 2 | {"role": "AI", "text": "Sure, let me handle that."} 3 | {"role": "API", "api_name": "Calculator", "param_dict": {"formula": "(5+6)*3"}, "result": {"api_name": "Calculator", "input": {"formula": "(5+6)*3"}, "output": 33.0, "exception": null}} 4 | {"role": "AI", "text": "The result is 33."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/Calculator-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you perform some basic arithmetic calculation for me?"} 2 | {"role": "AI", "text": "Sure, what calculation do you want me to perform?"} 3 | {"role": "User", "text": "What is the result of (5+3)*6?"} 4 | {"role": "API", "api_name": "Calculator", "param_dict": {"formula": "(5+3)*6"}, "result": {"api_name": "Calculator", "input": {"formula": "(5+3)*6"}, "output": 48.0, "exception": null}} 5 | {"role": "AI", "text": "The result of (5+3)*6 is 48."} 6 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/CancelRegistration-RecordHealthData-level-2-5.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you please cancel my appointment for David Brown with Dr. Davis on April 10th, 2023 at 10:00 am?"} 2 | {"role": "AI", "text": "Sure, I can help you with that. What's your appointment ID?"} 3 | {"role": "User", "text": "56789012"} 4 | {"role": "AI", "text": "I'm cancelling your appointment with ID 56789012."} 5 | {"role": "API", "api_name": "CancelRegistration", "param_dict": {"appointment_id": "56789012"}, "result": {"api_name": "CancelRegistration", "input": {"appointment_id": "56789012"}, "output": "success", "exception": null}} 6 | {"role": "AI", "text": "Your appointment with ID 56789012 has been successfully cancelled. Is there anything else I can assist you with?"} 7 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/CancelRegistration-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you cancel my registration for appointment ID 90123456?"} 2 | {"role": "AI", "text": "Sure, may I have the patient name, appointment date, and doctor's name for verification?"} 3 | {"role": "User", "text": "Olivia Davis, October 10th, Dr. Smith."} 4 | {"role": "API", "api_name": "CancelRegistration", "param_dict": {"appointment_id": "90123456"}, "result": {"api_name": "CancelRegistration", "input": {"appointment_id": "90123456"}, "output": "success", "exception": null}} 5 | {"role": "AI", "text": "Your appointment has been successfully canceled. Is there anything else I can assist you with?"} 6 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/CancelRegistration-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Hi, can you cancel my appointment on October 10th with Dr. Smith? My appointment ID is 90123456."} 2 | {"role": "AI", "text": "Sure, I can help you with that. I just need your appointment ID which is 90123456."} 3 | {"role": "API", "api_name": "CancelRegistration", "param_dict": {"appointment_id": "90123456"}, "result": {"api_name": "CancelRegistration", "input": {"appointment_id": "90123456"}, "output": "success", "exception": null}} 4 | {"role": "AI", "text": "Your appointment with Dr. Smith on October 10th has been cancelled."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/CancelRegistration-level-1-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "I need to cancel an appointment with Dr. Smith for Olivia Davis on October 10th, 2023. The appointment ID is 90123456."} 2 | {"role": "AI", "text": "I can help you with that."} 3 | {"role": "API", "api_name": "CancelRegistration", "param_dict": {"appointment_id": "90123456"}, "result": {"api_name": "CancelRegistration", "input": {"appointment_id": "90123456"}, "output": "success", "exception": null}} 4 | {"role": "AI", "text": "The status of cancellation is confirmed. Is there anything else you need help with?"} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/CancelTimedSwitch-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you cancel a timed switch for my smart device with ID 10000025 for March 19th at 9:30 AM?"} 2 | {"role": "AI", "text": "Sure, I can help you with that."} 3 | {"role": "API", "api_name": "CancelTimedSwitch", "param_dict": {"device_id": "10000025", "time": "2023-03-19 09:30:00"}, "result": {"api_name": "CancelTimedSwitch", "input": {"device_id": "10000025", "time": "2023-03-19 09:30:00"}, "output": "success", "exception": null}} 4 | {"role": "AI", "text": "Your timed switch has been canceled successfully."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/CancelTimedSwitch-level-1-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me cancel a timed switch for one of my smart devices?"} 2 | {"role": "AI", "text": "Sure, I can assist you with that. Can you please provide me with the device ID and the time for the switch?"} 3 | {"role": "User", "text": "The device ID is 10000025 and the time is 2023-03-19 09:30:00."} 4 | {"role": "API", "api_name": "CancelTimedSwitch", "param_dict": {"device_id": "10000025", "time": "2023-03-19 09:30:00"}, "result": {"api_name": "CancelTimedSwitch", "input": {"device_id": "10000025", "time": "2023-03-19 09:30:00"}, "output": "success", "exception": null}} 5 | {"role": "AI", "text": "Okay, I have cancelled the timed switch for your smart device. Is there anything else I can help you with?"} 6 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/CancelTimedSwitch-level-1-5.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Please cancel the timed switch of my device with ID 10000025 set for March 19th at 9:30 AM."} 2 | {"role": "AI", "text": "Alright, let me check and cancel the timed switch for that device."} 3 | {"role": "API", "api_name": "CancelTimedSwitch", "param_dict": {"device_id": "10000025", "time": "2023-03-19 09:30:00"}, "result": {"api_name": "CancelTimedSwitch", "input": {"device_id": "10000025", "time": "2023-03-19 09:30:00"}, "output": "success", "exception": null}} 4 | {"role": "AI", "text": "The timed switch has been successfully canceled."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/DeleteScene-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me delete a scene called \"Bedtime\"?"} 2 | {"role": "AI", "text": "Sure, let me handle that for you."} 3 | {"role": "API", "api_name": "DeleteScene", "param_dict": {"name": "Bedtime"}, "result": {"api_name": "DeleteScene", "input": {"name": "Bedtime"}, "output": "success", "exception": null}} 4 | {"role": "AI", "text": "The scene called \"Bedtime\" has been deleted successfully."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/DeleteScene-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me delete a scene named \"Relaxing Time\"?"} 2 | {"role": "AI", "text": "Sure, I can do that. Just to confirm, you want me to delete the scene named \"Relaxing Time\". Is that correct?"} 3 | {"role": "User", "text": "Yes, that's correct."} 4 | {"role": "API", "api_name": "DeleteScene", "param_dict": {"name": "Relaxing Time"}, "result": {"api_name": "DeleteScene", "input": {"name": "Relaxing Time"}, "output": "success", "exception": null}} 5 | {"role": "AI", "text": "The \"Relaxing Time\" scene has been deleted successfully. Is there anything else I can help you with?"} 6 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/DeleteScene-level-1-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me delete a scene named \"Movie Time\"?"} 2 | {"role": "AI", "text": "Sure, I can assist with that. Deleting scene named \"Movie Time\"."} 3 | {"role": "API", "api_name": "DeleteScene", "param_dict": {"name": "Movie Time"}, "result": {"api_name": "DeleteScene", "input": {"name": "Movie Time"}, "output": "success", "exception": null}} 4 | {"role": "AI", "text": "The scene named \"Movie Time\" has been successfully deleted. Is there anything else I can help you with?"} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/GetToday-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you tell me today's date?"} 2 | {"role": "AI", "text": "Sure, let me check."} 3 | {"role": "API", "api_name": "GetToday", "param_dict": {}, "result": {"api_name": "GetToday", "input": null, "output": "2023-03-31", "exception": null}} 4 | {"role": "AI", "text": "Today's date is 2023-03-31. How else can I assist you?"} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/GetToday-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you tell me what's the current date?"} 2 | {"role": "AI", "text": "Sure, I can help you with that."} 3 | {"role": "API", "api_name": "GetToday", "param_dict": {}, "result": {"api_name": "GetToday", "input": null, "output": "2023-03-31", "exception": null}} 4 | {"role": "AI", "text": "The current date is 2023-03-31."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/GetToday-level-1-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you tell me the current date?"} 2 | {"role": "AI", "text": "Sure, I can do that."} 3 | {"role": "API", "api_name": "GetToday", "param_dict": {}, "result": {"api_name": "GetToday", "input": null, "output": "2023-03-31", "exception": null}} 4 | {"role": "AI", "text": "Today is 2023-03-31."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/GetToday-level-1-4.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you please tell me the current date?"} 2 | {"role": "AI", "text": "Sure, I can help you with that."} 3 | {"role": "API", "api_name": "GetToday", "param_dict": {}, "result": {"api_name": "GetToday", "input": null, "output": "2023-03-31", "exception": null}} 4 | {"role": "AI", "text": "Today's date is 2023-03-31. How can I assist you further?"} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/PlayMusic-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you play \"Shape of You\" by Ed Sheeran?"} 2 | {"role": "AI", "text": "Sure, playing \"Shape of You\" by Ed Sheeran now."} 3 | {"role": "API", "api_name": "PlayMusic", "param_dict": {"music_name": "Shape of You"}, "result": {"api_name": "PlayMusic", "input": {"music_name": "Shape of You"}, "output": "music://shape of you", "exception": null}} 4 | {"role": "AI", "text": "Enjoy your music!"} -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/PlayMusic-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you play the song \"Shape of You\" for me?"} 2 | {"role": "AI", "text": "Sure, playing \"Shape of You\" for you now."} 3 | {"role": "API", "api_name": "PlayMusic", "param_dict": {"music_name": "Shape of You"}, "result": {"api_name": "PlayMusic", "input": {"music_name": "Shape of You"}, "output": "music://shape of you", "exception": null}} 4 | {"role": "AI", "text": "Enjoy your music!"} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/PlayMusic-level-1-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you play the music \"Shape of You\"?"} 2 | {"role": "AI", "text": "Sure, I can play \"Shape of You\"."} 3 | {"role": "API", "api_name": "PlayMusic", "param_dict": {"music_name": "Shape of You"}, "result": {"api_name": "PlayMusic", "input": {"music_name": "Shape of You"}, "output": "music://shape of you", "exception": null}} 4 | {"role": "AI", "text": "Playing \"Shape of You\" now. Enjoy your music!"} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryRegistration-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you please check if there is any appointment for Olivia Davis with Dr. Smith on 10th Oct 2023?"} 2 | {"role": "AI", "text": "Sure. Let me check."} 3 | {"role": "API", "api_name": "QueryRegistration", "param_dict": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "result": {"api_name": "QueryRegistration", "input": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "output": {"90123456": {"patient_name": "Olivia Davis", "date": "2023-10-10", "doctor_name": "Dr. Smith"}}, "exception": null}} 4 | {"role": "AI", "text": "According to our records, there is an appointment for Olivia Davis with Dr. Smith on October 10th, 2023."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryRegistration-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me find out my next appointment with Dr. Smith on October 10th, 2023? My patient ID is 90123456 and my name is Olivia Davis."} 2 | {"role": "AI", "text": "Sure, let me check that for you."} 3 | {"role": "API", "api_name": "QueryRegistration", "param_dict": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "result": {"api_name": "QueryRegistration", "input": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "output": {"90123456": {"patient_name": "Olivia Davis", "date": "2023-10-10", "doctor_name": "Dr. Smith"}}, "exception": null}} 4 | {"role": "AI", "text": "According to our records, your next appointment with Dr. Smith is on October 10th, 2023."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryRegistration-level-1-4.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me find my upcoming appointment with Dr. Smith on October 10th, 2023 under the name Olivia Davis with patient ID 90123456?"} 2 | {"role": "AI", "text": "Sure, let me check for you."} 3 | {"role": "API", "api_name": "QueryRegistration", "param_dict": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "result": {"api_name": "QueryRegistration", "input": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "output": {"90123456": {"patient_name": "Olivia Davis", "date": "2023-10-10", "doctor_name": "Dr. Smith"}}, "exception": null}} 4 | {"role": "AI", "text": "According to our record, you have an appointment with Dr. Smith on October 10th, 2023."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryRegistration-level-1-5.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you tell me the appointments for Olivia Davis on October 10th, 2023 with Dr. Smith?"} 2 | {"role": "AI", "text": "Sure! Let me check."} 3 | {"role": "API", "api_name": "QueryRegistration", "param_dict": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "result": {"api_name": "QueryRegistration", "input": {"patient_name": "Olivia Davis", "date": "2023-10-10"}, "output": {"90123456": {"patient_name": "Olivia Davis", "date": "2023-10-10", "doctor_name": "Dr. Smith"}}, "exception": null}} 4 | {"role": "AI", "text": "According to our system, Olivia Davis has an appointment with Dr. Smith at 2pm on October 10th, 2023."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryScene-level-1-4.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you please query the Morning Routine scene for me?"} 2 | {"role": "AI", "text": "Sure, let me check."} 3 | {"role": "API", "api_name": "QueryScene", "param_dict": {"name": "Morning Routine"}, "result": {"api_name": "QueryScene", "input": {"name": "Morning Routine"}, "output": [{"name": "Light", "description": "Smart light in the living room"}, {"name": "Coffee Maker", "description": "Smart coffee maker in the kitchen"}, {"name": "Blinds", "description": "Smart blinds in the living room"}], "exception": null}} 4 | {"role": "AI", "text": "These are the devices in the Morning Routine scene: Light, Coffee Maker, and Blinds."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryStock-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "What is the stock price of SQ on March 14th, 2022?"} 2 | {"role": "AI", "text": "Let me check for you."} 3 | {"role": "API", "api_name": "QueryStock", "param_dict": {"stock_code": "SQ", "date": "2022-03-14"}, "result": {"api_name": "QueryStock", "input": {"stock_code": "SQ", "date": "2022-03-14"}, "output": 248.9, "exception": null}} 4 | {"role": "AI", "text": "The stock price of SQ on March 14th, 2022 is 248.9."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryStock-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you tell me about the stock price of SQ on March 15th, 2022?"} 2 | {"role": "AI", "text": "Sure, I can help with that."} 3 | {"role": "API", "api_name": "QueryStock", "param_dict": {"stock_code": "SQ", "date": "2022-03-15"}, "result": {"api_name": "QueryStock", "input": {"stock_code": "SQ", "date": "2022-03-15"}, "output": 250.3, "exception": null}} 4 | {"role": "AI", "text": "The stock price of SQ on March 15th, 2022 was 250.3."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryStock-level-1-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you tell me the stock price of SQ on March 14th, 2022?"} 2 | {"role": "AI", "text": "Sure, to query the stock price of a given stock, I need the stock code and the date. The stock code for SQ is SQ and the date you're looking for is March 14th."} 3 | {"role": "API", "api_name": "QueryStock", "param_dict": {"stock_code": "SQ", "date": "2022-03-14"}, "result": {"api_name": "QueryStock", "input": {"stock_code": "SQ", "date": "2022-03-14"}, "output": 248.9, "exception": null}} 4 | {"role": "AI", "text": "The stock price of SQ on March 14th was 248.9."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryStock-level-1-4.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you please tell me the stock price of SQ on March 14th, 2022?"} 2 | {"role": "AI", "text": "Sure, let me check that for you."} 3 | {"role": "API", "api_name": "QueryStock", "param_dict": {"stock_code": "SQ", "date": "2022-03-14"}, "result": {"api_name": "QueryStock", "input": {"stock_code": "SQ", "date": "2022-03-14"}, "output": 248.9, "exception": null}} 4 | {"role": "AI", "text": "The stock price of SQ on March 14th, 2022 was 248.9."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/QueryStock-level-1-5.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you tell me the stock price of SQ on March 14, 2022?"} 2 | {"role": "AI", "text": "Sure, let me check that for you."} 3 | {"role": "API", "api_name": "QueryStock", "param_dict": {"stock_code": "SQ", "date": "2022-03-14"}, "result": {"api_name": "QueryStock", "input": {"stock_code": "SQ", "date": "2022-03-14"}, "output": 248.9, "exception": null}} 4 | {"role": "AI", "text": "The stock price of SQ on March 14, 2022 was 248.9."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/RegisterUser-ForgotPassword-level-2-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me register on this app?"} 2 | {"role": "AI", "text": "Sure. Please provide your username, password, and email address."} 3 | {"role": "User", "text": "My username is xyz, password is abc123, and email is xyz@example.com."} 4 | {"role": "API", "api_name": "RegisterUser", "param_dict": {"username": "xyz", "password": "abc123", "email": "xyz@example.com"}, "result": {"input": {"username": "xyz", "password": "abc123", "email": "xyz@example.com"}, "output": {"token": "krp4ejpwc6wepfxoo3vi64lckwkakj8h"}, "exception": null}} 5 | {"role": "AI", "text": "You have been registered successfully. What would you like to do next?"} 6 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/RegisterUser-ForgotPassword-level-2-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me register as a user?"} 2 | {"role": "AI", "text": "Sure, what's your desired username, password and email?"} 3 | {"role": "User", "text": "My username is johnsmith123, password is 1234password and email is johnsmith123@example.com."} 4 | {"role": "API", "api_name": "RegisterUser", "param_dict": {"username": "johnsmith123", "password": "1234password", "email": "johnsmith123@example.com"}, "result": {"input": {"username": "johnsmith123", "password": "1234password", "email": "johnsmith123@example.com"}, "output": {"token": "o21h1snglvh8zvugvxm10gktcm4tqb6h"}, "exception": null}} 5 | {"role": "AI", "text": "Your registration is successful. You can now start exploring our services."} 6 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/SearchEngine-level-1-1.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/SearchEngine-level-1-1.jsonl -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/SendEmail-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you help me send an email to john@example.com with subject \"Meeting Reminder\" and content \"Don't forget our meeting tomorrow at 2pm.\"?"} 2 | {"role": "AI", "text": "Sure, I can help you with that."} 3 | {"role": "API", "api_name": "SendEmail", "param_dict": {"receiver": "john@example.com", "subject": "Meeting Reminder", "content": "Don"}, "result": {"api_name": "SendEmail", "input": {"receiver": "john@example.com", "subject": "Meeting Reminder", "content": "Don"}, "output": "success", "exception": null}} 4 | {"role": "AI", "text": "Your email has been sent."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/SpeechRecognition-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you recognize speech from an audio URL?"} 2 | {"role": "AI", "text": "Yes, I can. Which URL should I use?"} 3 | {"role": "User", "text": "https://example.com/voice22.wav."} 4 | {"role": "AI", "text": "Okay, let me recognize the speech."} 5 | {"role": "API", "api_name": "SpeechRecognition", "param_dict": {"url": "https://example.com/voice22.wav"}, "result": {"api_name": "SpeechRecognition", "input": {"url": "https://example.com/voice22.wav"}, "output": "What's the best way to get to the beach from here?", "exception": null}} 6 | {"role": "AI", "text": "The transcript of the audio is \"What's the best way to get to the beach from here?\"."} 7 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/Translate-level-1-1.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you translate \"Hello, how are you?\" to French?"} 2 | {"role": "AI", "text": "Sure, I can do that."} 3 | {"role": "API", "api_name": "Translate", "param_dict": {"src": "Hello, how are you?", "tgt_lang": "fr"}, "result": {"api_name": "Translate", "input": {"src": "Hello, how are you?", "tgt_lang": "fr"}, "output": "Bonjour comment allez-vous?", "exception": null}} 4 | {"role": "AI", "text": "The translation is \"Bonjour comment allez-vous?\""} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/Translate-level-1-2.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you translate \"Hello\" to Spanish?"} 2 | {"role": "AI", "text": "Sure, I can help with that."} 3 | {"role": "API", "api_name": "Translate", "param_dict": {"src": "Hello", "tgt_lang": "es"}, "result": {"api_name": "Translate", "input": {"src": "Hello", "tgt_lang": "es"}, "output": "Hola", "exception": null}} 4 | {"role": "AI", "text": "Here is your translated text: \"Hola!\""} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/Translate-level-1-3.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you please help me translate this text to Spanish?"} 2 | {"role": "AI", "text": "Sure, please tell me the text you would like me to translate."} 3 | {"role": "User", "text": "The quick brown fox jumps over the lazy dog."} 4 | {"role": "API", "api_name": "Translate", "param_dict": {"src": "The quick brown fox jumps over the lazy dog.", "tgt_lang": "es"}, "result": {"api_name": "Translate", "input": {"src": "The quick brown fox jumps over the lazy dog.", "tgt_lang": "es"}, "output": "El veloz zorro marr\u00f3n salta sobre el perro perezoso.", "exception": null}} 5 | {"role": "AI", "text": "The translated text is \"El veloz zorro marr\u00f3n salta sobre el perro perezoso.\". Is there anything else I can help you with?"} 6 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv1-lv2-samples/level-1-given-desc/Translate-level-1-4.jsonl: -------------------------------------------------------------------------------- 1 | {"role": "User", "text": "Can you translate \"hello\" to French?"} 2 | {"role": "AI", "text": "Sure, I can help with that."} 3 | {"role": "API", "api_name": "Translate", "param_dict": {"src": "hello", "tgt_lang": "fr"}, "result": {"api_name": "Translate", "input": {"src": "hello", "tgt_lang": "fr"}, "output": "Bonjour", "exception": null}} 4 | {"role": "AI", "text": "The translated text is \"Bonjour\"."} 5 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/lv3-samples/Financial Management.txt: -------------------------------------------------------------------------------- 1 | Scene: Financial Management 2 | 3 | First Utterance: Help me check the remaining money in the bank this month. 4 | 5 | 6 | Key Info: 7 | 8 | "JohnDoe":{ 9 | "password":"pass123", 10 | "token":"a9s8d7f6g5h4j3k2l1", 11 | "email":"johndoe@example.com" 12 | }, 13 | 14 | - Bank of China 15 | - Current balance: 5000 USD 16 | - Monthly salary: 3000 USD 17 | - Monthly expenses: 2000 USD 18 | - Credit card Time: 2023-04-15 00:00:00 19 | 20 | API Call: 21 | GetUserToken(username="JohnDoe", password="pass123") 22 | QueryBalance(token="a1b2c3d4e5f6") 23 | Calculator(formula="5000+3000-2000") 24 | AddReminder(token="a9s8d7f6g5h4j3k2l1", content="Pay credit card bill", time="2023-04-15 00:00:00") 25 | -------------------------------------------------------------------------------- /scripts/eval/api-bank/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.25.1 2 | sentence_transformers==2.2.2 3 | tqdm==4.64.1 4 | nltk==3.6.5 5 | googletrans==3.1.0a0 6 | selenium 7 | bs4 8 | rank_bm25 9 | -------------------------------------------------------------------------------- /scripts/eval/gsm8k/gsm8k.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source scripts/eval/source.sh 3 | check_conda_env_and_activate code-act-agent 4 | echo_status 5 | 6 | OUTPUT_DIR=$1 # "data/ckpts/Llama-2-7b-megatron-tp2-pp2/hf/mint_agent_iter_52" 7 | MODEL_NAME="code-act-agent" 8 | 9 | OUTPUT_DIR=$OUTPUT_DIR/eval/gsm8k 10 | check_is_done $OUTPUT_DIR 11 | mkdir -p $OUTPUT_DIR 12 | 13 | check_var "OPENAI_API_BASE" 14 | 15 | set -xe 16 | python3 scripts/eval/gsm8k/evaluate_gsm8k.py \ 17 | --model $MODEL_NAME \ 18 | --sample-input-file data/eval/gsm8k \ 19 | --sample-output-file $OUTPUT_DIR/gsm8k_res.jsonl \ 20 | 21 | # Mark the evaluation as finished 22 | touch $OUTPUT_DIR/DONE 23 | -------------------------------------------------------------------------------- /scripts/eval/human_eval/human_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source scripts/eval/source.sh 3 | check_conda_env_and_activate code-act-agent 4 | echo_status 5 | 6 | OUTPUT_DIR=$1 # "data/ckpts/Llama-2-7b-megatron-tp2-pp2/hf/mint_agent_iter_52" 7 | MODEL_NAME="code-act-agent" 8 | 9 | OUTPUT_DIR=$OUTPUT_DIR/eval/human_eval 10 | check_is_done $OUTPUT_DIR 11 | mkdir -p $OUTPUT_DIR 12 | 13 | check_var "OPENAI_API_BASE" 14 | 15 | set -xe 16 | python3 scripts/eval/human_eval/evaluate_human_eval.py \ 17 | --model $MODEL_NAME \ 18 | --save_dir $OUTPUT_DIR 19 | 20 | evaluate_functional_correctness $OUTPUT_DIR/samples.jsonl > $OUTPUT_DIR/result.txt 21 | 22 | # Mark the evaluation as finished 23 | touch $OUTPUT_DIR/DONE 24 | -------------------------------------------------------------------------------- /scripts/eval/m3tooleval/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Task, ToolType, ActionMode, get_task_iterator 2 | from .impl import ( 3 | message_decoder, 4 | dna_sequencer, 5 | trade_calculator, 6 | travel_itinerary_planning, 7 | web_browsing 8 | ) 9 | -------------------------------------------------------------------------------- /scripts/eval/m3tooleval/tasks/impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/m3tooleval/tasks/impl/__init__.py -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/.gitignore: -------------------------------------------------------------------------------- 1 | # Distribution / packaging 2 | .Python 3 | build/ 4 | develop-eggs/ 5 | dist/ 6 | downloads/ 7 | eggs/ 8 | .eggs/ 9 | lib/ 10 | lib64/ 11 | parts/ 12 | sdist/ 13 | var/ 14 | wheels/ 15 | share/python-wheels/ 16 | *.egg-info/ 17 | .installed.cfg 18 | *.egg 19 | MANIFEST -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/README.md: -------------------------------------------------------------------------------- 1 | # Computergym 2 | 3 | ## Install 4 | ```sh 5 | pip install -e . 6 | ``` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | from gym.envs.registration import register 5 | 6 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 7 | 8 | 9 | _AVAILABLE_ENVS = { 10 | "MiniWoBEnv-v0": { 11 | "entry_point": "computergym.miniwob.base_env:MiniWoBEnv", 12 | "discription": "MinoWoB++ environments", 13 | }, 14 | } 15 | 16 | for env_id, val in _AVAILABLE_ENVS.items(): 17 | register(id=env_id, entry_point=val.get("entry_point")) 18 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/__init__.py -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/.gitignore: -------------------------------------------------------------------------------- 1 | twistd.pid 2 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_0.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_1.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_3.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_4.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_5.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_6.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_7.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_8.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/checkbox-numbers/ch_9.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/drag-cube/blank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/drag-cube/blank.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/delete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/delete.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/forward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/forward.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow-white.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/left-arrow.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/reply.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/reply.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/search.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/send.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/send.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star-clicked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star-clicked.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/email-inbox/star.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/ajax-loader.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/file.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/file.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder-closed.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder-closed.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/folder.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/minus.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/minus.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/plus.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/plus.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black-line.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black-line.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-black.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default-line.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default-line.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-default.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam-line.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam-line.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-famfamfam.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray-line.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray-line.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-gray.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red-line.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red-line.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/navigate-tree/images/treeview-red.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like-hover.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/like.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more-hover.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/more.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply-hover.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/reply.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet-hover.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/retweet.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share-hover.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/social-media/share.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/o.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/common/special/tic-tac-toe/x.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_444444_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_444444_256x240.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_555555_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_555555_256x240.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777620_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777620_256x240.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777777_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_777777_256x240.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_cc0000_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_cc0000_256x240.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_ffffff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/core/jquery-ui/images/ui-icons_ffffff_256x240.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/apps/common/js/jquery/aacom/plugins/aaCache.js: -------------------------------------------------------------------------------- 1 | var aaCache=(function($j){var cache={};function _get(key){if(!cache[key]){cache[key]=$j(key);}return cache[key];}function _remove(key){if(cache.hasOwnProperty(key)){return(delete cache[key]);}return true;}return{get:_get,remove:_remove};}(jQuery)); -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-bold.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-light.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-light.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-medium.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-medium.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/american-v2/americansans-regular.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/icons/american-icons-v4-4.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/fonts/icons/american-icons-v4-4.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/icons/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/icons/loading.gif -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-flight-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-flight-icon.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-icons-flags-sprite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-icons-flags-sprite.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/aa-logo.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/down-arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/down-arrow.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/favicon.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/oneworld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/oneworld.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-down.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-vertical-150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow-vertical-150.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-down.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow12-up.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow3-down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/chrome/rebrand/shadow3-down.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/graphics/icons/aa-jqueryUIicons-sprite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/content/images/graphics/icons/aa-jqueryUIicons-sprite.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/AA/js/aa/modules/commonsetup.js: -------------------------------------------------------------------------------- 1 | AAcom.modules.commonsetup=function(AAUI){/* NOT NEEDED */}; 2 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/aura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/aura.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/cal3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/cal3.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/chkboxes3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/chkboxes3.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/clear_text2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/clear_text2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/collapse.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/expand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/expand.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/flight_arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/flight_arrow.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/geo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/geo.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/home.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/info2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/info2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/leftright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/leftright.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logo2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AA.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/AS.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/DL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/DL.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/VX.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/images/logos/VX.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Bold.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Book.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto-medium/stylesheets/circular/ASCircularWeb-Book.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/aura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/aura.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/cal3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/cal3.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/chkboxes3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/chkboxes3.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/clear_text2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/clear_text2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/collapse.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/expand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/expand.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/flight_arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/flight_arrow.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/geo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/geo.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/home.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/info2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/info2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/leftright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/leftright.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logo2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AA.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/AS.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/DL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/DL.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/VX.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/images/logos/VX.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Bold.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Book.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska-auto/stylesheets/circular/ASCircularWeb-Book.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/aura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/aura.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/cal3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/cal3.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/chkboxes3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/chkboxes3.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/clear_text2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/clear_text2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/collapse.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/expand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/expand.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/flight_arrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/flight_arrow.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/geo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/geo.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/home.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/info2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/info2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/leftright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/leftright.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logo2.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AA.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/AS.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/DL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/DL.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/VX.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/images/logos/VX.png -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Bold.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Book.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/computergym/computergym/miniwob/miniwob_interface/html/flight/Alaska/stylesheets/circular/ASCircularWeb-Book.woff -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/choose-list/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/choose-list/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/choose-list/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/choose-list/example.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/choose-list/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/choose-list/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/choose-list/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/choose-list/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/choose-list/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button-sequence/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button-sequence/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button-sequence/example.txt: -------------------------------------------------------------------------------- 1 | task: Click button ONE, then click button TWO. 2 | plan: 3 | 1. Click the button with xpath "//button[@id='subbtn']". 4 | 2. Click the button with xpath "//button[@id='subbtn2']". -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button-sequence/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button-sequence/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button-sequence/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button-sequence/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Note that pressing enter should be seperate instruction from typing. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. 7 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click "Ok" button. 3 | Here is a plan to solve this example task on the same webpage with the autonomous agent. 4 | 1. clickxpath //button[text()="Ok"] 5 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-button/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction for solving the task should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-large/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-large/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-large/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click the checkboxes labeled "PtM", "H0t1p0w", "fKWS", "moTD0Z", "LC", "1QDslel", "LCVeO5", "PqN" and then click the "Submit" button. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. clickxpath //label[text() = 'PtM']/input 5 | 2. clickxpath //label[text() = 'H0t1p0w']/input 6 | 3. clickxpath //label[text() = 'fKWS']/input 7 | 4. clickxpath //label[text() = 'moTD0Z']/input 8 | 5. clickxpath //label[text() = 'LC']/input 9 | 6. clickxpath //label[text() = '1QDslel']/input 10 | 7. clickxpath //label[text() = 'LCVeO5']/input 11 | 8. clickxpath //label[text() = 'PqN']/input 12 | 9. clickxpath //button[@id='subbtn'] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-large/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-large/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-large/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-large/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-soft/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-soft/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-soft/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | task: select the words similar to bunnies, tiny, scared, swine and click the Submit button. 3 | plan: we need to select synonyms of the given words and the words themselves. 4 | 1. clickxpath //label[text() = 'rabbit']/input // rabbit is a synonym of bunnies 5 | 2. clickxpath //label[text() = 'pig']/input // pig is a synonym of swine 6 | 3. clickxpath //label[text() = 'panicked']/input panicked is a synonym of scared 7 | 4. clickxpath //label[text() = 'tiny']/input 8 | 5. clickxpath //button[@id='subbtn'] 9 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-soft/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-soft/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-soft/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-soft/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-transfer/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-transfer/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-transfer/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to select the checkboxes with the values 5h, KbtHKF, jg2aSq, and chVBED, and then click the Submit button. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. clickxpath //label[text() = '5h']/input 5 | 2. clickxpath //label[text() = 'KbtHKF']/input 6 | 3. clickxpath //label[text() = 'jg2aSq']/input 7 | 4. clickxpath //label[text() = 'chVBED']/input 8 | 5. clickxpath //button[@id='subbtn'] 9 | 10 | The task is to select nothing and click the Submit button on the webpage. 11 | Here is a plan to solve this example task on the webpage with the autonomous agent. 12 | 1. clickxpath //button[@id='subbtn'] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-transfer/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-transfer/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-transfer/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes-transfer/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to select the checkboxes with the values 5h, KbtHKF, jg2aSq, and chVBED, and then click the Submit button.Here is a plan to solve this example task on the webpage with the autonomous agent. 3 | 1. clickxpath //label[text() = '5h']/input 4 | 2. clickxpath //label[text() = 'KbtHKF']/input 5 | 3. clickxpath //label[text() = 'jg2aSq']/input 6 | 4. clickxpath //label[text() = 'chVBED']/input 7 | 5. clickxpath //button[@id='subbtn'] 8 | 9 | The task is to select nothing and click the Submit button on the webpage. 10 | Here is a plan to solve this example task on the webpage with the autonomous agent. 11 | 1. clickxpath //button[@id='subbtn'] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-checkboxes/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible-2/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible-2/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible-2/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | task: find a proper section that include the text "adipiscing" and click on the link "adipiscing" on the webpage. 3 | plan: 4 | 1. clickxpath //*[@id="ui-id-3"] // click the tab that includes the link with a text "adipiscing" 5 | 2. clickxpath //*[@class="alink" and text()="adipiscing"] 6 | 7 | task: find a proper section that include the text "lectus" and click on the link "lectus" on the webpage. 8 | plan: 9 | 1. clickxpath //*[@id="ui-id-19"] // click the tab that includes the link with a text "lectus" 10 | 2. clickxpath //span[@class='alink' and contains(text(), 'lectus')] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible-2/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible-2/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible-2/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible-2/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to expand the section below and click the submit button. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. Click the expanded section by typing the xpath: "//*[@id="ui-id-1"]". 5 | 2. Click the submit button by typing the xpath: "//button[@id='subbtn']". -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-collapsible/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog-2/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog-2/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog-2/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to close the dialog box by clicking the "x". 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. Clickxpath /html/body/div[4]/div[1]/button 5 | 6 | The task is to click the button in the dialog box labeled "OK". 7 | Here is a plan to solve this example task on the webpage with the autonomous agent. 8 | 1. clickxpath //div[@class='ui-dialog-buttonset']/button[2] 9 | 10 | The task is to click the button in the dialog box labeled "Cancel". 11 | Here is a plan to solve this example task on the webpage with the autonomous agent. 12 | 1. Clickxpath //div[@class='ui-dialog-buttonpane ui-widget-content ui-helper-clearfix']/div/button[text()='Cancel'] 13 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog-2/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog-2/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog-2/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog-2/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/click-dialog-2/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to close the dialog box by clicking the "x". 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 5 | 1. Clickxpath //button[@class='ui-button ui-corner-all ui-widget ui-button-icon-only ui-dialog-titlebar-close'] 6 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-dialog/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/click-dialog/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-menu/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-menu/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is for the agent to select Leonelle > Jane > Amabel from the HTML code provided. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. You must use "movemouse" not "clickxpath" to expand option. 4 | 1. movemouse //*[text()="Leonelle"] 5 | 2. movemouse //*[text()="Jane"] 6 | 3. clickxpath //*[text()="Amabel"] 7 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-menu/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-menu/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-menu/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-menu/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/click-menu/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-option/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-option/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Note that pressing enter should be seperate instruction from typing. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. 7 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-option/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to select the radio button labeled "Rf" and click the "Submit" button. 3 | Here is a plan to solve this example task on the same webpage with the autonomous agent. 4 | 1. clickxpath //label[input[@id='ch4']] 5 | 2. clickxpath //button[@id='subbtn'] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-option/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-option/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-option/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-option/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction for solving the task should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-scroll-list/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-scroll-list/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to select Canada, Bosnia and Herzegovina from the scroll list and click Submit. 3 | Here is a plan to solve this example task on the same webpage with the autonomous agent. 4 | 5 | 1. clickoption //option[text() = 'Canada'] 6 | 2. clickoption //option[text() = 'Bosnia'] 7 | 3. clickoption //option[text() = 'Herzegovina'] 8 | 4. Clickxpath //*[@class="secondary-action"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-scroll-list/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-scroll-list/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-scroll-list/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-scroll-list/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction for solving the task should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shades/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shades/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shades/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to select all the shades of red and press the Submit button. 3 | Here is a plan to solve a task on the webpage with the autonomous agent. The first instruction will click all the shades of red. 4 | 5 | 1. Clickxpath //span[@data-color="red"] 6 | 2. Clickxpath //*[@id="submit"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shades/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shades/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shades/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shades/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, the single specific instruction for solving the task should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shape/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shape/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click on the magenta digit on the webpage. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. Clickxpath //*[name()='svg']//*[name()='text' and @fill='magenta'] 5 | 6 | The task is to click on the large 4 on the webpage. 7 | Here is a plan to solve this example task on the webpage with the autonomous agent. 8 | 1. clickxpath //*[name()='svg']//*[name()='text' and @fill="aqua" and @font-size="20px" and text()="4"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shape/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shape/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shape/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-shape/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2-hard/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2-hard/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2-hard/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | task: switch between the tabs to find and click on the link "amet". 3 | plan: 4 | 1. Clickxpath //a[text()="1"] // first, open the proper tab where the link with a text "amet" is in 5 | 2. Clickxpath //*[@id="tabs-1"]/p/span[text()="amet] 6 | 7 | task: switch between the tabs to find and click on the link "gur". 8 | plan: 9 | 1. Clickxpath //a[text()="4"] // first, open the proper tab where the link with a text "gur" is in 10 | 2. Clickxpath //*[@id="tabs-4"]/p/span[text()="gur"] 11 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2-hard/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2-hard/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2-hard/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2-hard/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | task: switch between the tabs to find and click on the link "vitae." 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. clickxpath //a[text()="Tab #2"] // first, open the proper tab where the link with a text "vitae." is in 5 | 2. clickxpath //*[@id="tabs-2"]/p/span[text()="vitae."] 6 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab-2/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click on Tab #3 on the webpage. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. clickxpath //a[text()="Tab #3"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-tab/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/click-tab/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-test/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-test/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-test/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click the button with the id "subbtn" on the webpage. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. Clickxpath //*[@id="subbtn"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-test/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-test/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-test/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-test/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this single specific instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-widget/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-widget/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-widget/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click on the "checkbox" widget on the webpage. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. clickxpath //input[@data-type="checkbox"] 5 | 6 | The task is for the agent to click on the "text" widget on the webpage. 7 | Here is a plan to solve this example task on the webpage with the autonomous agent. 8 | 1. clickxpath //input[@data-type="text"] // You must click text not textarea 9 | 10 | The task is to click on the "button" widget. 11 | Here is a plan to solve this example task on the webpage with the autonomous agent. 12 | 1. clickxpath //button[@data-type="button"] 13 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-widget/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-widget/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-widget/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/click-widget/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/count-shape/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/count-shape/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, "{type}", it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/count-shape/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | task: How many small letters are there? 3 | plan: we need to count the number of small letters in the svg area. 4 | clickxpath //*[@id="count-buttons"]/button[5] 5 | task: How many red items are there? 6 | plan: we need to count the number of red items in the svg area. 7 | clickxpath //*[@id="count-buttons"]/button[2] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/count-shape/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/count-shape/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/count-shape/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/count-shape/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl-turk/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl-turk/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl-turk/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl-turk/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl-turk/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl-turk/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/email-inbox-forward-nl-turk/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl/action.txt: -------------------------------------------------------------------------------- 1 | the next instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-forward-nl/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/email-inbox-forward-nl/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-nl-turk/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-nl-turk/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-nl-turk/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-nl-turk/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-nl-turk/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox-nl-turk/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/email-inbox-nl-turk/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/email-inbox/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/email-inbox/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-date/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction to solve the task should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-date/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-date/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan for the task on the above webpage: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-date/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-date/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-time/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-time/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-time/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to enter 9:01 AM as the time in the input box and press the submit button. 3 | Here is a plan to solve this example task on the webpage with the autonomous agent. 4 | 1. clickxpath //*[@id="tt"] 5 | 2. type 0901AM 6 | 3. clickxpath //*[@id="subbtn"] 7 | 8 | The task is to enter 10:07 PM as the time in the input box and press the submit button. 9 | Here is a plan to solve this example task on the webpage with the autonomous agent. 10 | 1. clickxpath //*[@id="tt"] 11 | 2. type 1007PM 12 | 3. clickxpath //*[@id="subbtn"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-time/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-time/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-time/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/enter-time/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/enter-time/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/example.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/focus-text/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/focus-text/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is for the agent to focus into the textbox on the webpage. 3 | Here is a plan to solve the example task. 4 | 1. clickxpath //*[@id="area"]/input[@id="tt"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/focus-text/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/focus-text/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/focus-text/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/focus-text/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/focus-text/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/check_finish.txt: -------------------------------------------------------------------------------- 1 | Do you think you complete the current plan? -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/done.txt: -------------------------------------------------------------------------------- 1 | Do you think that you accomplished the task on this webpage with the above list of instructions? Answer Yes or No. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/edit_plan.txt: -------------------------------------------------------------------------------- 1 | The agent is following the above plan. Do you think that you need to edit the current plan for the agent to solve the task on this webpage? Answer yes or no and explain why. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click on the grid coordinate (1,2) on the webpage. 3 | Here is a plan to solve the example task. 4 | 1. clickxpath //*[@id="(1,2)"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/new_plan.txt: -------------------------------------------------------------------------------- 1 | You failed with the above plans to solve a task on the below webpage. Based on your answer, what is your new plan to solve the task on this webpage? Note that the new plan should be able to start from the current webpage state. You should not assume that the autonomous agent is in the initial state of the webpage. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/rci_answer.txt: -------------------------------------------------------------------------------- 1 | Answer Yes or No. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/grid-coordinate/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/grid-coordinate/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/identify-shape/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/identify-shape/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/identify-shape/example.txt: -------------------------------------------------------------------------------- 1 | Example plan) 2 | task: click the button that best describes the figure below, which is a number (1). 3 | plan: note that number 0 is a number not circle and the number text inside tag is number not a letter. 4 | 1. clickxpath //button[@data-type="digit"] 5 | 6 | task: click the button that best describes the figure below. 7 | plan: note that a single letter of number 0 is a digit not a circle, and a single number inside tag is a digit not a letter. 8 | 1. clickxpath //button[@data-type="triangle"] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/identify-shape/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/identify-shape/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/identify-shape/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/identify-shape/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/identify-shape/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/login-user-popup/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/login-user-popup/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/login-user-popup/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/login-user-popup/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/login-user-popup/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. Notice that the instruction shouldn't contain quotation marks. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/login-user-popup/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/login-user-popup/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/multi-layouts/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/multi-layouts/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/multi-layouts/example.txt: -------------------------------------------------------------------------------- 1 | Example plan) 2 | task: search for crime movies directed by Mckenzie from year 1970. 3 | plan: 4 | 1. click the input box labeled with "genre" 5 | 2. type crime 6 | 3. click the input box labeled with "director name" 7 | 4. type Mckenzie 8 | 5. click the input box labeled with "year" 9 | 6. type 1970 10 | 7. click the search button -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/multi-layouts/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/multi-layouts/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/multi-layouts/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/multi-layouts/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/multi-layouts/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/navigate-tree/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/navigate-tree/base.txt: -------------------------------------------------------------------------------- 1 | 2 | We have an autonomous computer control agent that can perform atomic instructions specified by natural language to control computers. There are two types of instructions it can execute. 3 | 4 | First, given the instruction that matches the regular expression, “{type}”, it can type a list of characters via the keyboard. This instruction should specify the target keyboard input for the agent to type. Before this typing instruction, you should first locate the cursor by clicking the input box with click instruction. 5 | 6 | Second, given the instruction that matches the regular expression, "{clickxpath}", it can click an HTML element with an xpath that is visible on the webpage. The target of this instruction should be a valid xpath. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/navigate-tree/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | task: navigate through the file tree. Find and click on the folder or file named "Riley". 3 | plan: 4 | 1. clickxpath //span[text()='Briana'] // Click the expandable hitarea of the folder "Briana" 5 | 2. clickxpath //span[text()='Riley'] -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/navigate-tree/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/navigate-tree/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/navigate-tree/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/navigate-tree/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/navigate-tree/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/search-engine/action.txt: -------------------------------------------------------------------------------- 1 | the next instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/search-engine/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/search-engine/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/search-engine/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/search-engine/update_action.txt: -------------------------------------------------------------------------------- 1 | The target of click should be a valid XPath. Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/action.txt: -------------------------------------------------------------------------------- 1 | the next instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/check_finish.txt: -------------------------------------------------------------------------------- 1 | Do you think you complete the current plan? -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/done.txt: -------------------------------------------------------------------------------- 1 | Do you think that you accomplished the task on this webpage with the above list of instructions? Answer Yes or No. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/edit_plan.txt: -------------------------------------------------------------------------------- 1 | The agent is following the above plan. Do you think that you need to edit the current plan for the agent to solve the task on this webpage? Answer yes or no and explain why. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/example.txt: -------------------------------------------------------------------------------- 1 | task: Solve for x and type your answer into the textbox. Press Submit when done. 2 | plan: 3 | 1. Locate the input box by clicking on it with a click instruction using its ID "math-answer". 4 | 2. Type the answer using a type instruction. 5 | 3. Press the "Enter" key using a press instruction. 6 | 4. Click the "Submit" button using a clickxpath instruction with the xpath "//button[@id='subbtn']". -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/new_plan.txt: -------------------------------------------------------------------------------- 1 | You failed with the above plans to solve a task on the below webpage. Based on your answer, what is your new plan to solve the task on this webpage? Note that the new plan should be able to start from the current webpage state. You should not assume that the autonomous agent is in the initial state of the webpage. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/rci_action.txt: -------------------------------------------------------------------------------- 1 | Notice that the instruction shouldn't contain quotation marks. Without explanation, the single instruction that matches one of the regular expressions is ' -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/rci_answer.txt: -------------------------------------------------------------------------------- 1 | Answer Yes or No. -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/simple-algebra/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/simple-algebra/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-all/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-all/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-all/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-all/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-all/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the information on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-some/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-some/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/social-media-some/example.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-some/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-some/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-some/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media-some/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/social-media-some/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | The task is to click on the "Report" button for the user @briana. 3 | Here is a plan to solve a task on the webpage with the autonomous agent. 4 | 1. click the "more" button for a tweet from @briana (e.g., clickxpath //div[@data-result="i"]//span[@class="more"]) 5 | 2. click the report button (e.g., clickxpath //div[@data-result="i"]//li[@class="report"]) 6 | 7 | The task is, for the user @ultricies, to click on the "Retweet" button. 8 | Here is a plan to solve a task on the webpage with the autonomous agent. 9 | 1. click the "more" button for a tweet from @ultricies (e.g., clickxpath //div[@data-result="i"]//span[@class="retweet"]) -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/social-media/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this single specific instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/terminal/action.txt: -------------------------------------------------------------------------------- 1 | the next instruction that the agent needs to execute is -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/terminal/example.txt: -------------------------------------------------------------------------------- 1 | Example plans) 2 | task: use the terminal below to delete a file ending with the extension .gpg 3 | plan: 4 | 1. Type "ls" to list all files in the terminal with "type ls" instruction. 5 | 2. Press enter with "press enter" instruction 6 | 3. Identify the filename ending with ".gpg" and type "rm [filename].gpg" to delete the identified file with type instruction. 7 | 4. Press enter everytime after typing a command on terminal. 8 | -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/terminal/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/terminal/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/terminal/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/terminal/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the output on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/tic-tac-toe/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/tic-tac-toe/example.txt: -------------------------------------------------------------------------------- 1 | task: for the agent to play as 'X' and win a game of tic-tac-toe. 2 | plan: 3 | 1. Check the board and click the proper position which is empty to newly put X mark on the board to win the game (e.g., clickxpath clickxpath //*[@id="ttt-i"]) 4 | 2. Start in the middle, play defensively, play offensively, and block your opponent to improve your chances of winning 5 | 3. You should select the position if you can directly win by doing that 6 | 3. Repeat these until win -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/tic-tac-toe/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/tic-tac-toe/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/tic-tac-toe/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/tic-tac-toe/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/tic-tac-toe/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/update_action.txt: -------------------------------------------------------------------------------- 1 | Therefore, considering the information on the webpage, this instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-autocomplete/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-autocomplete/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-autocomplete/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-autocomplete/rci_action.txt: -------------------------------------------------------------------------------- 1 | This action does not match the regular expressions. The updated instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-autocomplete/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/use-autocomplete/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-spinner/action.txt: -------------------------------------------------------------------------------- 1 | the next proper instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-spinner/first_action.txt: -------------------------------------------------------------------------------- 1 | the first instruction should be ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-spinner/initialize_plan.txt: -------------------------------------------------------------------------------- 1 | plan: -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-spinner/rci_action.txt: -------------------------------------------------------------------------------- 1 | Without explanation, the single instruction that matches one of the regular expressions is ` -------------------------------------------------------------------------------- /scripts/eval/miniwob++/prompt/use-spinner/update_action.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/miniwob++/prompt/use-spinner/update_action.txt -------------------------------------------------------------------------------- /scripts/eval/miniwob++/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | fschat 3 | gym 4 | openai 5 | selenium 6 | transformers 7 | Pillow 8 | regex -------------------------------------------------------------------------------- /scripts/eval/mmlu/mmlu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source scripts/eval/source.sh 3 | check_conda_env_and_activate code-act-agent 4 | echo_status 5 | 6 | OUTPUT_DIR=$1 # "data/ckpts/Llama-2-7b-megatron-tp2-pp2/hf/mint_agent_iter_52" 7 | MODEL_NAME="code-act-agent" 8 | 9 | OUTPUT_DIR=$OUTPUT_DIR/eval/mmlu 10 | check_is_done $OUTPUT_DIR 11 | mkdir -p $OUTPUT_DIR 12 | 13 | check_var "OPENAI_API_BASE" 14 | 15 | set -xe 16 | python3 -u scripts/eval/mmlu/evaluate_mmlu.py \ 17 | --model $MODEL_NAME \ 18 | --data_dir data/eval/mmlu \ 19 | --save_dir $OUTPUT_DIR 20 | 21 | # Mark the evaluation as finished 22 | touch $OUTPUT_DIR/DONE 23 | -------------------------------------------------------------------------------- /scripts/eval/science-world/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .DS_Store 3 | logs/ 4 | *.pyc 5 | .vscode/ 6 | __pycache__/ -------------------------------------------------------------------------------- /scripts/eval/science-world/data_utils/READMD.md: -------------------------------------------------------------------------------- 1 | ```bash 2 | # wget https://github.com/allenai/ScienceWorld/raw/main/goldpaths/goldpaths-all.zip 3 | wget https://github.com/allenai/ScienceWorld/raw/exhaustivevalidactions/goldpaths/goldpaths-all.zip 4 | unzip goldpaths-all.zip 5 | ``` 6 | 7 | 8 | -------------------------------------------------------------------------------- /scripts/eval/science-world/data_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/science-world/data_utils/__init__.py -------------------------------------------------------------------------------- /scripts/eval/science-world/data_utils/goldpaths-all.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyaoww/code-act/d607f56c9cfe9e8632ebaf65dcaf2b4b7fe1c6f8/scripts/eval/science-world/data_utils/goldpaths-all.zip -------------------------------------------------------------------------------- /scripts/eval/science-world/metrics.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | import json 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--input_dir", type=str, default="logs") 8 | args = parser.parse_args() 9 | 10 | scores = [] 11 | for i in range(30): 12 | file = Path(args.input_dir) / f'task{i}-score.txt' 13 | try: 14 | s = file.open().read() 15 | except: 16 | print(f'Warning: {file} not found') 17 | continue 18 | score = re.search(r'Average score: ([0-9\.]*)', s)[1] 19 | x = float(score) 20 | scores.append(x) 21 | 22 | with open(Path(args.input_dir) / 'results.json', 'w') as f: 23 | json.dump({ 24 | "scores": scores, 25 | "average": sum(scores) / len(scores), 26 | }, f) 27 | -------------------------------------------------------------------------------- /scripts/eval/science-world/prompts/README.md: -------------------------------------------------------------------------------- 1 | # Few-shot Prompts 2 | 3 | `prompt_orig.json` contains the original few-show prompts provided by [SwiftSage](https://github.com/yuchenlin/SwiftSage). We use `convert.py` to convert it to suit our conversation format and saved the result in `prompt.json`, which will be used in our evaluation. -------------------------------------------------------------------------------- /scripts/eval/science-world/slow_agent/utils.py: -------------------------------------------------------------------------------- 1 | 2 | # from openai_key import OPENAI_KEY 3 | import openai 4 | import os 5 | import json 6 | import random 7 | from tenacity import ( 8 | retry, 9 | stop_after_attempt, 10 | wait_random_exponential, 11 | ) # for exponential backoff 12 | 13 | openai.api_key = os.getenv("OPENAI_API_KEY") 14 | 15 | 16 | @retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(10)) 17 | def completion_with_backoff(**kwargs): 18 | return openai.ChatCompletion.create(**kwargs) 19 | 20 | 21 | triplets_by_task = load_triplets() 22 | prompt = sample_few_shot(triplets_by_task, "0") 23 | print(prompt) -------------------------------------------------------------------------------- /scripts/eval/setup_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source scripts/eval/source.sh 4 | 5 | # MMLU 6 | wget https://people.eecs.berkeley.edu/~hendrycks/data.tar -P data/eval/mmlu 7 | tar -xvf data/eval/mmlu/data.tar -C data/eval/mmlu 8 | rm data/eval/mmlu/data.tar 9 | mv data/eval/mmlu/data/* data/eval/mmlu 10 | rm -r data/eval/mmlu/data 11 | 12 | # MATH 13 | wget https://people.eecs.berkeley.edu/~hendrycks/MATH.tar -P data/eval/math 14 | tar -xvf data/eval/math/MATH.tar -C data/eval/math 15 | rm data/eval/math/MATH.tar 16 | mv data/eval/math/MATH/* data/eval/math 17 | rm -r data/eval/math/MATH 18 | 19 | # GSM8K 20 | check_conda_env_and_activate code-act 21 | python3 -c "import datasets; dataset = datasets.load_dataset('gsm8k', 'main'); dataset.save_to_disk('data/eval/gsm8k')" 22 | -------------------------------------------------------------------------------- /scripts/models/megatron/convert_llama.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | RAW_MODEL_WEIGHT_DIR=data/models/raw_hf/Llama-2-7b-hf/ 4 | OUTPUT_DIR=data/models/raw/Llama-2-7b-megatron 5 | 6 | python Megatron-LLM/weights_conversion/hf_to_megatron.py llama2 \ 7 | --size=7 \ 8 | --out=$OUTPUT_DIR \ 9 | --model-path=$RAW_MODEL_WEIGHT_DIR \ 10 | -------------------------------------------------------------------------------- /scripts/models/megatron/convert_mistral.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | RAW_MODEL_WEIGHT_DIR=data/models/raw_hf/Mistral-7B-v0.1/ 4 | OUTPUT_DIR=data/models/raw/Mistral-7b-megatron 5 | 6 | python Megatron-LLM/weights_conversion/hf_to_megatron.py mistral \ 7 | --size=7 \ 8 | --out=$OUTPUT_DIR \ 9 | --model-path=$RAW_MODEL_WEIGHT_DIR \ 10 | -------------------------------------------------------------------------------- /scripts/slurm/run_4gpu_interactive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | srun \ 3 | -A TODO_YOUR_ACCOUNT \ 4 | --time=00:30:00 \ 5 | --nodes=1 \ 6 | --ntasks-per-node=16 \ 7 | --tasks=1 \ 8 | --cpus-per-task=16 \ 9 | --partition=gpuA40x4 \ 10 | --gpus=4 \ 11 | --mem=240g \ 12 | --pty /bin/bash 13 | -------------------------------------------------------------------------------- /scripts/slurm/run_gpu_interactive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | srun \ 3 | -A TODO_YOUR_ACCOUNT \ 4 | --time=00:30:00 \ 5 | --nodes=1 \ 6 | --ntasks-per-node=16 \ 7 | --tasks=1 \ 8 | --cpus-per-task=16 \ 9 | --partition=gpuA40x4 \ 10 | --gpus=1 \ 11 | --mem=64g \ 12 | --pty /bin/bash 13 | -------------------------------------------------------------------------------- /scripts/slurm/run_gpu_interactive_A100.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | srun \ 3 | -A TODO_YOUR_ACCOUNT \ 4 | --time=00:30:00 \ 5 | --nodes=1 \ 6 | --ntasks-per-node=16 \ 7 | --tasks=1 \ 8 | --cpus-per-task=16 \ 9 | --partition=gpuA100x4 \ 10 | --gpus=1 \ 11 | --mem=64g \ 12 | --pty /bin/bash 13 | -------------------------------------------------------------------------------- /scripts/slurm/run_megatron_gpu_interactive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | srun \ 3 | -A YOUR_CLUSTER \ 4 | --time=00:30:00 \ 5 | --nodes=1 \ 6 | --ntasks-per-node=16 \ 7 | --tasks=1 \ 8 | --cpus-per-task=16 \ 9 | --partition=gpuA40x4 \ 10 | --gpus=1 \ 11 | --mem=60g \ 12 | --pty scripts/docker/run_megatron_interactive_slurm.sh 13 | --------------------------------------------------------------------------------