├── .gitignore ├── LICENSE ├── README.md ├── data ├── figures │ ├── eval_framework.png │ ├── full_pipeline.png │ ├── pull.png │ └── templates.png ├── plots │ ├── answer_action_lengths.png │ ├── error_breakdown_all.png │ ├── number_of_tools_accuracy_comparison.png │ └── percentage_correct_per_template.png ├── processed │ ├── analytics_data.csv │ ├── calendar_events.csv │ ├── customer_relationship_manager_data.csv │ ├── emails.csv │ ├── project_tasks.csv │ └── queries_and_answers │ │ ├── analytics_queries_and_answers.csv │ │ ├── calendar_queries_and_answers.csv │ │ ├── customer_relationship_manager_queries_and_answers.csv │ │ ├── email_queries_and_answers.csv │ │ ├── multi_domain_queries_and_answers.csv │ │ └── project_management_queries_and_answers.csv ├── raw │ ├── email_addresses.csv │ ├── email_content_pairs.csv │ └── events.csv └── results │ ├── analytics │ ├── claude-2_all_2024-03-23_23-37-01.csv │ ├── claude-2_domains_2024-03-23_17-31-35.csv │ ├── gpt-3.5_domains_2024-03-23_13-56-43.csv │ ├── gpt-4_all_2024-03-23_21-56-59.csv │ ├── gpt-4_domains_2024-03-23_13-00-36.csv │ ├── llama2-70b_domains_2024-03-23_18-36-29.csv │ ├── mistral-8x7B_all_2024-03-24_05-59-31.csv │ └── mistral-8x7B_domains_2024-03-23_20-08-02.csv │ ├── calendar │ ├── claude-2_all_2024-03-24_01-00-33.csv │ ├── claude-2_domains_2024-03-20_14-44-45.csv │ ├── gpt-3.5_domains_2024-03-20_13-03-15.csv │ ├── gpt-4_all_2024-03-21_14-57-26.csv │ ├── gpt-4_domains_2024-03-19_22-36-21.csv │ ├── llama2-70b_domains_2024-03-20_16-24-35.csv │ ├── mistral-8x7B_all_2024-03-24_06-19-20.csv │ └── mistral-8x7B_domains_2024-03-20_17-29-35.csv │ ├── customer_relationship_manager │ ├── claude-2_all_2024-03-25_16-38-22.csv │ ├── claude-2_domains_2024-03-20_23-07-37.csv │ ├── gpt-3.5_domains_2024-03-20_22-31-06.csv │ ├── gpt-4_all_2024-03-19_11-18-10.csv │ ├── gpt-4_domains_2024-03-18_12-50-37.csv │ ├── llama2-70b_domains_2024-03-20_23-36-39.csv │ ├── mistral-8x7B_all_2024-03-25_16-48-26.csv │ └── mistral-8x7B_domains_2024-03-20_23-47-42.csv │ ├── email │ ├── claude-2_all_2024-03-24_01-57-07.csv │ ├── claude-2_domains_2024-03-20_15-36-46.csv │ ├── gpt-3.5_domains_2024-03-20_13-16-18.csv │ ├── gpt-4_all_2024-03-21_15-21-12.csv │ ├── gpt-4_domains_2024-03-20_12-48-43.csv │ ├── llama2-70b_domains_2024-03-20_17-01-34.csv │ ├── mistral-8x7B_all_2024-03-24_06-47-26.csv │ └── mistral-8x7B_domains_2024-03-20_18-12-43.csv │ ├── multi_domain │ ├── claude-2_all_2024-03-24_05-37-15.csv │ ├── claude-2_domains_2024-03-23_16-15-05.csv │ ├── gpt-3.5_domains_2024-03-23_13-32-46.csv │ ├── gpt-4_all_2024-03-23_21-17-32.csv │ ├── gpt-4_domains_2024-03-23_12-12-02.csv │ ├── llama2-70b_domains_2024-03-23_18-00-54.csv │ ├── mistral-8x7B_all_2024-03-24_07-55-30.csv │ └── mistral-8x7B_domains_2024-03-23_19-42-41.csv │ └── project_management │ ├── claude-2_all_2024-03-24_02-39-42.csv │ ├── claude-2_domains_2024-03-20_21-21-58.csv │ ├── gpt-3.5_domains_2024-03-20_20-52-32.csv │ ├── gpt-4_all_2024-03-21_15-40-36.csv │ ├── gpt-4_domains_2024-03-19_23-06-51.csv │ ├── llama2-70b_domains_2024-03-20_22-01-40.csv │ ├── mistral-8x7B_all_2024-03-24_07-01-18.csv │ └── mistral-8x7B_domains_2024-03-20_22-26-13.csv ├── requirements.txt ├── scripts ├── data_generation │ ├── mocked_data │ │ ├── generate_all_mocked_data.py │ │ ├── generate_analytics_data.py │ │ ├── generate_calendar_data.py │ │ ├── generate_customer_relationship_manager_data.py │ │ ├── generate_email_content_pairs.py │ │ ├── generate_email_data.py │ │ └── generate_project_management_data.py │ └── query_answer_generation │ │ ├── generate_all_query_and_answer.py │ │ ├── generate_analytics_query_and_answer.py │ │ ├── generate_calendar_query_and_answer.py │ │ ├── generate_customer_relationship_manager_query_and_answer.py │ │ ├── generate_email_query_and_answer.py │ │ ├── generate_multi_domain_query_and_answer.py │ │ └── generate_project_management_query_and_answer.py ├── evals │ ├── calculate_all_metrics.py │ └── calculate_metrics_for_single_file.py ├── inference │ ├── generate_all_results.py │ └── generate_results.py └── plotting │ ├── plot_answer_action_lengths.py │ ├── plot_error_breakdown.py │ ├── plot_errors_per_template.py │ ├── plot_number_of_tools.py │ └── print_number_of_queries.py ├── src ├── __init__.py ├── data_generation │ ├── __init__.py │ └── data_generation_utils.py ├── evals │ └── utils.py └── tools │ ├── analytics.py │ ├── calendar.py │ ├── company_directory.py │ ├── customer_relationship_manager.py │ ├── email.py │ ├── project_management.py │ └── toolkits.py └── tests ├── data_generation ├── processed │ ├── test_logic.py │ ├── test_processed_calendar_data.py │ ├── test_processed_customer_relationship_manager_data.py │ ├── test_processed_email_data.py │ └── test_processed_project_management_data.py └── raw │ └── test_raw_data.py ├── evals └── test_evals.py └── tools ├── test_analytics_tools.py ├── test_calendar_tools.py ├── test_customer_relationship_manager_tools.py ├── test_email_tools.py └── test_project_management_tools.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/README.md -------------------------------------------------------------------------------- /data/figures/eval_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/figures/eval_framework.png -------------------------------------------------------------------------------- /data/figures/full_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/figures/full_pipeline.png -------------------------------------------------------------------------------- /data/figures/pull.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/figures/pull.png -------------------------------------------------------------------------------- /data/figures/templates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/figures/templates.png -------------------------------------------------------------------------------- /data/plots/answer_action_lengths.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/plots/answer_action_lengths.png -------------------------------------------------------------------------------- /data/plots/error_breakdown_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/plots/error_breakdown_all.png -------------------------------------------------------------------------------- /data/plots/number_of_tools_accuracy_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/plots/number_of_tools_accuracy_comparison.png -------------------------------------------------------------------------------- /data/plots/percentage_correct_per_template.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/plots/percentage_correct_per_template.png -------------------------------------------------------------------------------- /data/processed/analytics_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/analytics_data.csv -------------------------------------------------------------------------------- /data/processed/calendar_events.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/calendar_events.csv -------------------------------------------------------------------------------- /data/processed/customer_relationship_manager_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/customer_relationship_manager_data.csv -------------------------------------------------------------------------------- /data/processed/emails.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/emails.csv -------------------------------------------------------------------------------- /data/processed/project_tasks.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/project_tasks.csv -------------------------------------------------------------------------------- /data/processed/queries_and_answers/analytics_queries_and_answers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/queries_and_answers/analytics_queries_and_answers.csv -------------------------------------------------------------------------------- /data/processed/queries_and_answers/calendar_queries_and_answers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/queries_and_answers/calendar_queries_and_answers.csv -------------------------------------------------------------------------------- /data/processed/queries_and_answers/customer_relationship_manager_queries_and_answers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/queries_and_answers/customer_relationship_manager_queries_and_answers.csv -------------------------------------------------------------------------------- /data/processed/queries_and_answers/email_queries_and_answers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/queries_and_answers/email_queries_and_answers.csv -------------------------------------------------------------------------------- /data/processed/queries_and_answers/multi_domain_queries_and_answers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/queries_and_answers/multi_domain_queries_and_answers.csv -------------------------------------------------------------------------------- /data/processed/queries_and_answers/project_management_queries_and_answers.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/processed/queries_and_answers/project_management_queries_and_answers.csv -------------------------------------------------------------------------------- /data/raw/email_addresses.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/raw/email_addresses.csv -------------------------------------------------------------------------------- /data/raw/email_content_pairs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/raw/email_content_pairs.csv -------------------------------------------------------------------------------- /data/raw/events.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/raw/events.csv -------------------------------------------------------------------------------- /data/results/analytics/claude-2_all_2024-03-23_23-37-01.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/claude-2_all_2024-03-23_23-37-01.csv -------------------------------------------------------------------------------- /data/results/analytics/claude-2_domains_2024-03-23_17-31-35.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/claude-2_domains_2024-03-23_17-31-35.csv -------------------------------------------------------------------------------- /data/results/analytics/gpt-3.5_domains_2024-03-23_13-56-43.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/gpt-3.5_domains_2024-03-23_13-56-43.csv -------------------------------------------------------------------------------- /data/results/analytics/gpt-4_all_2024-03-23_21-56-59.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/gpt-4_all_2024-03-23_21-56-59.csv -------------------------------------------------------------------------------- /data/results/analytics/gpt-4_domains_2024-03-23_13-00-36.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/gpt-4_domains_2024-03-23_13-00-36.csv -------------------------------------------------------------------------------- /data/results/analytics/llama2-70b_domains_2024-03-23_18-36-29.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/llama2-70b_domains_2024-03-23_18-36-29.csv -------------------------------------------------------------------------------- /data/results/analytics/mistral-8x7B_all_2024-03-24_05-59-31.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/mistral-8x7B_all_2024-03-24_05-59-31.csv -------------------------------------------------------------------------------- /data/results/analytics/mistral-8x7B_domains_2024-03-23_20-08-02.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/analytics/mistral-8x7B_domains_2024-03-23_20-08-02.csv -------------------------------------------------------------------------------- /data/results/calendar/claude-2_all_2024-03-24_01-00-33.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/claude-2_all_2024-03-24_01-00-33.csv -------------------------------------------------------------------------------- /data/results/calendar/claude-2_domains_2024-03-20_14-44-45.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/claude-2_domains_2024-03-20_14-44-45.csv -------------------------------------------------------------------------------- /data/results/calendar/gpt-3.5_domains_2024-03-20_13-03-15.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/gpt-3.5_domains_2024-03-20_13-03-15.csv -------------------------------------------------------------------------------- /data/results/calendar/gpt-4_all_2024-03-21_14-57-26.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/gpt-4_all_2024-03-21_14-57-26.csv -------------------------------------------------------------------------------- /data/results/calendar/gpt-4_domains_2024-03-19_22-36-21.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/gpt-4_domains_2024-03-19_22-36-21.csv -------------------------------------------------------------------------------- /data/results/calendar/llama2-70b_domains_2024-03-20_16-24-35.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/llama2-70b_domains_2024-03-20_16-24-35.csv -------------------------------------------------------------------------------- /data/results/calendar/mistral-8x7B_all_2024-03-24_06-19-20.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/mistral-8x7B_all_2024-03-24_06-19-20.csv -------------------------------------------------------------------------------- /data/results/calendar/mistral-8x7B_domains_2024-03-20_17-29-35.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/calendar/mistral-8x7B_domains_2024-03-20_17-29-35.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/claude-2_all_2024-03-25_16-38-22.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/claude-2_all_2024-03-25_16-38-22.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/claude-2_domains_2024-03-20_23-07-37.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/claude-2_domains_2024-03-20_23-07-37.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/gpt-3.5_domains_2024-03-20_22-31-06.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/gpt-3.5_domains_2024-03-20_22-31-06.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/gpt-4_all_2024-03-19_11-18-10.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/gpt-4_all_2024-03-19_11-18-10.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/gpt-4_domains_2024-03-18_12-50-37.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/gpt-4_domains_2024-03-18_12-50-37.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/llama2-70b_domains_2024-03-20_23-36-39.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/llama2-70b_domains_2024-03-20_23-36-39.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/mistral-8x7B_all_2024-03-25_16-48-26.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/mistral-8x7B_all_2024-03-25_16-48-26.csv -------------------------------------------------------------------------------- /data/results/customer_relationship_manager/mistral-8x7B_domains_2024-03-20_23-47-42.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/customer_relationship_manager/mistral-8x7B_domains_2024-03-20_23-47-42.csv -------------------------------------------------------------------------------- /data/results/email/claude-2_all_2024-03-24_01-57-07.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/claude-2_all_2024-03-24_01-57-07.csv -------------------------------------------------------------------------------- /data/results/email/claude-2_domains_2024-03-20_15-36-46.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/claude-2_domains_2024-03-20_15-36-46.csv -------------------------------------------------------------------------------- /data/results/email/gpt-3.5_domains_2024-03-20_13-16-18.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/gpt-3.5_domains_2024-03-20_13-16-18.csv -------------------------------------------------------------------------------- /data/results/email/gpt-4_all_2024-03-21_15-21-12.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/gpt-4_all_2024-03-21_15-21-12.csv -------------------------------------------------------------------------------- /data/results/email/gpt-4_domains_2024-03-20_12-48-43.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/gpt-4_domains_2024-03-20_12-48-43.csv -------------------------------------------------------------------------------- /data/results/email/llama2-70b_domains_2024-03-20_17-01-34.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/llama2-70b_domains_2024-03-20_17-01-34.csv -------------------------------------------------------------------------------- /data/results/email/mistral-8x7B_all_2024-03-24_06-47-26.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/mistral-8x7B_all_2024-03-24_06-47-26.csv -------------------------------------------------------------------------------- /data/results/email/mistral-8x7B_domains_2024-03-20_18-12-43.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/email/mistral-8x7B_domains_2024-03-20_18-12-43.csv -------------------------------------------------------------------------------- /data/results/multi_domain/claude-2_all_2024-03-24_05-37-15.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/claude-2_all_2024-03-24_05-37-15.csv -------------------------------------------------------------------------------- /data/results/multi_domain/claude-2_domains_2024-03-23_16-15-05.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/claude-2_domains_2024-03-23_16-15-05.csv -------------------------------------------------------------------------------- /data/results/multi_domain/gpt-3.5_domains_2024-03-23_13-32-46.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/gpt-3.5_domains_2024-03-23_13-32-46.csv -------------------------------------------------------------------------------- /data/results/multi_domain/gpt-4_all_2024-03-23_21-17-32.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/gpt-4_all_2024-03-23_21-17-32.csv -------------------------------------------------------------------------------- /data/results/multi_domain/gpt-4_domains_2024-03-23_12-12-02.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/gpt-4_domains_2024-03-23_12-12-02.csv -------------------------------------------------------------------------------- /data/results/multi_domain/llama2-70b_domains_2024-03-23_18-00-54.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/llama2-70b_domains_2024-03-23_18-00-54.csv -------------------------------------------------------------------------------- /data/results/multi_domain/mistral-8x7B_all_2024-03-24_07-55-30.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/mistral-8x7B_all_2024-03-24_07-55-30.csv -------------------------------------------------------------------------------- /data/results/multi_domain/mistral-8x7B_domains_2024-03-23_19-42-41.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/multi_domain/mistral-8x7B_domains_2024-03-23_19-42-41.csv -------------------------------------------------------------------------------- /data/results/project_management/claude-2_all_2024-03-24_02-39-42.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/claude-2_all_2024-03-24_02-39-42.csv -------------------------------------------------------------------------------- /data/results/project_management/claude-2_domains_2024-03-20_21-21-58.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/claude-2_domains_2024-03-20_21-21-58.csv -------------------------------------------------------------------------------- /data/results/project_management/gpt-3.5_domains_2024-03-20_20-52-32.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/gpt-3.5_domains_2024-03-20_20-52-32.csv -------------------------------------------------------------------------------- /data/results/project_management/gpt-4_all_2024-03-21_15-40-36.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/gpt-4_all_2024-03-21_15-40-36.csv -------------------------------------------------------------------------------- /data/results/project_management/gpt-4_domains_2024-03-19_23-06-51.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/gpt-4_domains_2024-03-19_23-06-51.csv -------------------------------------------------------------------------------- /data/results/project_management/llama2-70b_domains_2024-03-20_22-01-40.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/llama2-70b_domains_2024-03-20_22-01-40.csv -------------------------------------------------------------------------------- /data/results/project_management/mistral-8x7B_all_2024-03-24_07-01-18.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/mistral-8x7B_all_2024-03-24_07-01-18.csv -------------------------------------------------------------------------------- /data/results/project_management/mistral-8x7B_domains_2024-03-20_22-26-13.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/data/results/project_management/mistral-8x7B_domains_2024-03-20_22-26-13.csv -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/data_generation/mocked_data/generate_all_mocked_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/mocked_data/generate_all_mocked_data.py -------------------------------------------------------------------------------- /scripts/data_generation/mocked_data/generate_analytics_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/mocked_data/generate_analytics_data.py -------------------------------------------------------------------------------- /scripts/data_generation/mocked_data/generate_calendar_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/mocked_data/generate_calendar_data.py -------------------------------------------------------------------------------- /scripts/data_generation/mocked_data/generate_customer_relationship_manager_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/mocked_data/generate_customer_relationship_manager_data.py -------------------------------------------------------------------------------- /scripts/data_generation/mocked_data/generate_email_content_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/mocked_data/generate_email_content_pairs.py -------------------------------------------------------------------------------- /scripts/data_generation/mocked_data/generate_email_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/mocked_data/generate_email_data.py -------------------------------------------------------------------------------- /scripts/data_generation/mocked_data/generate_project_management_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/mocked_data/generate_project_management_data.py -------------------------------------------------------------------------------- /scripts/data_generation/query_answer_generation/generate_all_query_and_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/query_answer_generation/generate_all_query_and_answer.py -------------------------------------------------------------------------------- /scripts/data_generation/query_answer_generation/generate_analytics_query_and_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/query_answer_generation/generate_analytics_query_and_answer.py -------------------------------------------------------------------------------- /scripts/data_generation/query_answer_generation/generate_calendar_query_and_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/query_answer_generation/generate_calendar_query_and_answer.py -------------------------------------------------------------------------------- /scripts/data_generation/query_answer_generation/generate_customer_relationship_manager_query_and_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/query_answer_generation/generate_customer_relationship_manager_query_and_answer.py -------------------------------------------------------------------------------- /scripts/data_generation/query_answer_generation/generate_email_query_and_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/query_answer_generation/generate_email_query_and_answer.py -------------------------------------------------------------------------------- /scripts/data_generation/query_answer_generation/generate_multi_domain_query_and_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/query_answer_generation/generate_multi_domain_query_and_answer.py -------------------------------------------------------------------------------- /scripts/data_generation/query_answer_generation/generate_project_management_query_and_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/data_generation/query_answer_generation/generate_project_management_query_and_answer.py -------------------------------------------------------------------------------- /scripts/evals/calculate_all_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/evals/calculate_all_metrics.py -------------------------------------------------------------------------------- /scripts/evals/calculate_metrics_for_single_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/evals/calculate_metrics_for_single_file.py -------------------------------------------------------------------------------- /scripts/inference/generate_all_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/inference/generate_all_results.py -------------------------------------------------------------------------------- /scripts/inference/generate_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/inference/generate_results.py -------------------------------------------------------------------------------- /scripts/plotting/plot_answer_action_lengths.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/plotting/plot_answer_action_lengths.py -------------------------------------------------------------------------------- /scripts/plotting/plot_error_breakdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/plotting/plot_error_breakdown.py -------------------------------------------------------------------------------- /scripts/plotting/plot_errors_per_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/plotting/plot_errors_per_template.py -------------------------------------------------------------------------------- /scripts/plotting/plot_number_of_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/plotting/plot_number_of_tools.py -------------------------------------------------------------------------------- /scripts/plotting/print_number_of_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/scripts/plotting/print_number_of_queries.py -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data_generation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data_generation/data_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/data_generation/data_generation_utils.py -------------------------------------------------------------------------------- /src/evals/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/evals/utils.py -------------------------------------------------------------------------------- /src/tools/analytics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/tools/analytics.py -------------------------------------------------------------------------------- /src/tools/calendar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/tools/calendar.py -------------------------------------------------------------------------------- /src/tools/company_directory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/tools/company_directory.py -------------------------------------------------------------------------------- /src/tools/customer_relationship_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/tools/customer_relationship_manager.py -------------------------------------------------------------------------------- /src/tools/email.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/tools/email.py -------------------------------------------------------------------------------- /src/tools/project_management.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/tools/project_management.py -------------------------------------------------------------------------------- /src/tools/toolkits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/src/tools/toolkits.py -------------------------------------------------------------------------------- /tests/data_generation/processed/test_logic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/data_generation/processed/test_logic.py -------------------------------------------------------------------------------- /tests/data_generation/processed/test_processed_calendar_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/data_generation/processed/test_processed_calendar_data.py -------------------------------------------------------------------------------- /tests/data_generation/processed/test_processed_customer_relationship_manager_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/data_generation/processed/test_processed_customer_relationship_manager_data.py -------------------------------------------------------------------------------- /tests/data_generation/processed/test_processed_email_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/data_generation/processed/test_processed_email_data.py -------------------------------------------------------------------------------- /tests/data_generation/processed/test_processed_project_management_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/data_generation/processed/test_processed_project_management_data.py -------------------------------------------------------------------------------- /tests/data_generation/raw/test_raw_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/data_generation/raw/test_raw_data.py -------------------------------------------------------------------------------- /tests/evals/test_evals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/evals/test_evals.py -------------------------------------------------------------------------------- /tests/tools/test_analytics_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/tools/test_analytics_tools.py -------------------------------------------------------------------------------- /tests/tools/test_calendar_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/tools/test_calendar_tools.py -------------------------------------------------------------------------------- /tests/tools/test_customer_relationship_manager_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/tools/test_customer_relationship_manager_tools.py -------------------------------------------------------------------------------- /tests/tools/test_email_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/tools/test_email_tools.py -------------------------------------------------------------------------------- /tests/tools/test_project_management_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olly-styles/WorkBench/HEAD/tests/tools/test_project_management_tools.py --------------------------------------------------------------------------------