├── downloads └── .gitkeep ├── .vscode ├── settings.json └── launch.json ├── assets ├── icon.png ├── overall_process_crop.png └── webvoyager_overall_res.png ├── .gitignore ├── data ├── tasks_test.jsonl ├── WebVoyagerImpossibleTasks.json └── GAIA_web.jsonl ├── run.sh ├── calculate_current_score.py ├── requirements_old.txt ├── README.md ├── evaluation └── auto_eval_browser_use.py ├── LICENSE ├── run_browser_use.py └── analysis.ipynb /downloads/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.analysis.typeCheckingMode": "basic" 3 | } -------------------------------------------------------------------------------- /assets/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/eval/HEAD/assets/icon.png -------------------------------------------------------------------------------- /assets/overall_process_crop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/eval/HEAD/assets/overall_process_crop.png -------------------------------------------------------------------------------- /assets/webvoyager_overall_res.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browser-use/eval/HEAD/assets/webvoyager_overall_res.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | downloads/*.pdf 3 | results/2024* 4 | test_code.py 5 | __pycache__ 6 | venv/ 7 | .env 8 | 9 | results/ 10 | /results* 11 | gcp-login.json -------------------------------------------------------------------------------- /data/tasks_test.jsonl: -------------------------------------------------------------------------------- 1 | {"web_name": "Cambridge Dictionary", "id": "Cambridge Dictionary--29", "ques": "Go to the Plus section of Cambridge Dictionary, find Image quizzes and do an easy quiz about Animals and tell me your final score.", "web": "https://dictionary.cambridge.org/"} -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | nohup python -u run.py \ 3 | --test_file ./data/tasks_test.jsonl \ 4 | --api_key YOUR_OPENAI_API_KEY \ 5 | --headless \ 6 | --max_iter 15 \ 7 | --max_attached_imgs 3 \ 8 | --temperature 1 \ 9 | --fix_box_color \ 10 | --seed 42 > test_tasks.log & 11 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python Debugger: Current File", 6 | "type": "debugpy", 7 | "request": "launch", 8 | "program": "${file}", 9 | "console": "integratedTerminal", 10 | "justMyCode": false, 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /calculate_current_score.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | def get_score(folder): 6 | success_count = 0 7 | total_count = 1 8 | print(f"Processing {folder}") 9 | print(f"Total tasks: {len(os.listdir(folder))}") 10 | for subfolder in os.listdir(folder): 11 | # if not folder, skip 12 | if not os.path.isdir(os.path.join(folder, subfolder)): 13 | continue 14 | for file in os.listdir(os.path.join(folder, subfolder)): 15 | if file.endswith("task_result.json"): 16 | with open(os.path.join(folder, subfolder, file), "r") as f: 17 | data = json.load(f) 18 | if data["success"] == "success": 19 | success_count += 1 20 | total_count += 1 21 | 22 | print( 23 | f"Success rate : {success_count / total_count:.2f}={success_count}/{total_count}" 24 | ) 25 | print() 26 | 27 | 28 | main_folder = "results" 29 | print("Success rate for each folder:") 30 | for folder in os.listdir(main_folder): 31 | if not os.path.isdir(os.path.join(main_folder, folder)): 32 | continue 33 | get_score(os.path.join(main_folder, folder)) 34 | -------------------------------------------------------------------------------- /data/WebVoyagerImpossibleTasks.json: -------------------------------------------------------------------------------- 1 | [ 2 | "Allrecipes--16", 3 | "Allrecipes--19", 4 | "Allrecipes--23", 5 | "Allrecipes--3", 6 | "Allrecipes--30", 7 | "Allrecipes--7", 8 | "Amazon--16", 9 | "Amazon--19", 10 | "Amazon--4", 11 | "Apple--1", 12 | "Apple--14", 13 | "Apple--16", 14 | "Apple--2", 15 | "Apple--20", 16 | "Apple--37", 17 | "Apple--41", 18 | "Apple--42", 19 | "Apple--7", 20 | "Apple--9", 21 | "ArXiv--11", 22 | "BBC News--14", 23 | "BBC News--16", 24 | "BBC News--18", 25 | "BBC News--2", 26 | "BBC News--21", 27 | "BBC News--33", 28 | "BBC News--37", 29 | "Booking--11", 30 | "Booking--13", 31 | "Booking--14", 32 | "Booking--6", 33 | "Coursera--17", 34 | "Coursera--28", 35 | "ESPN--19", 36 | "ESPN--2", 37 | "ESPN--21", 38 | "ESPN--26", 39 | "GitHub--22", 40 | "Google Flights--0", 41 | "Google Flights--20", 42 | "Google Flights--7", 43 | "Google Map--13", 44 | "Google Map--18", 45 | "Google Map--26", 46 | "Google Search--15", 47 | "Google Search--16", 48 | "Google Search--22", 49 | "Huggingface--1", 50 | "Huggingface--10", 51 | "Huggingface--20", 52 | "Huggingface--21", 53 | "Huggingface--22", 54 | "Huggingface--23", 55 | "Huggingface--32", 56 | "Huggingface--6" 57 | ] 58 | -------------------------------------------------------------------------------- /requirements_old.txt: -------------------------------------------------------------------------------- 1 | aiohappyeyeballs==2.4.3 2 | aiohttp==3.11.2 3 | aiosignal==1.3.1 4 | annotated-types==0.7.0 5 | anthropic==0.39.0 6 | anyio==3.7.1 7 | attrs==24.2.0 8 | babel==2.16.0 9 | backoff==2.2.1 10 | beautifulsoup4==4.12.3 11 | browser-use==0.1.6 12 | certifi==2024.8.30 13 | charset-normalizer==3.4.0 14 | courlan==1.3.2 15 | dateparser==1.2.0 16 | defusedxml==0.7.1 17 | distro==1.9.0 18 | fireworks-ai==0.15.8 19 | frozenlist==1.5.0 20 | h11==0.14.0 21 | html2text==2024.2.26 22 | htmldate==1.9.2 23 | httpcore==1.0.7 24 | httpx==0.27.2 25 | httpx-sse==0.4.0 26 | httpx-ws==0.5.1 27 | idna==3.10 28 | jiter==0.7.1 29 | jsonpatch==1.33 30 | jsonpointer==3.0.0 31 | jusText==3.0.1 32 | langchain==0.3.7 33 | langchain-anthropic==0.3.0 34 | langchain-core==0.3.19 35 | langchain-fireworks==0.2.5 36 | langchain-openai==0.2.8 37 | langchain-text-splitters==0.3.2 38 | langsmith==0.1.143 39 | lxml==5.3.0 40 | lxml_html_clean==0.4.1 41 | MainContentExtractor==0.0.4 42 | monotonic==1.6 43 | multidict==6.1.0 44 | numpy==1.26.4 45 | openai==1.54.4 46 | orjson==3.10.11 47 | outcome==1.3.0.post0 48 | packaging==24.2 49 | Pillow==10.1.0 50 | posthog==3.7.2 51 | propcache==0.2.0 52 | pydantic==2.9.2 53 | pydantic_core==2.23.4 54 | PySocks==1.7.1 55 | python-dateutil==2.9.0.post0 56 | python-dotenv==1.0.1 57 | pytz==2024.2 58 | PyYAML==6.0.2 59 | regex==2024.11.6 60 | requests==2.32.3 61 | requests-toolbelt==1.0.0 62 | selenium==4.26.1 63 | Selenium-Screenshot==2.1.0 64 | six==1.16.0 65 | sniffio==1.3.1 66 | sortedcontainers==2.4.0 67 | soupsieve==2.6 68 | SQLAlchemy==2.0.36 69 | tenacity==9.0.0 70 | tiktoken==0.8.0 71 | tld==0.13 72 | tqdm==4.67.0 73 | trafilatura==1.12.2 74 | trio==0.27.0 75 | trio-websocket==0.11.1 76 | typing_extensions==4.12.2 77 | tzlocal==5.2 78 | urllib3==2.2.3 79 | webdriver-manager==4.0.2 80 | websocket-client==1.8.0 81 | wsproto==1.2.0 82 | yarl==1.17.1 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

WebVoyager evaluation for Browser Use 3 |

4 | 5 |
6 | This repository is a fork of original repo
7 | 8 |
9 | 10 |
11 | 12 | # Evaluation runs 13 | 14 | The file structure is the same as the original repo. 15 | The only difference is that the `run_browser_use.py` is modified to add the browser use evaluation, we also changed the prompts to be suitable for the browser use evaluation (VERY minimal changes - evaluate multiple images, not just one) and switched to langchain. 16 | 17 | We also have a list of impossible tasks that are not possible anymore (completely outdated, can't be fixed with dates). 18 | 19 | We changed some tasks that included dates to be more in the future instead of the past since the data is outdated which would make the task impossible (e.g. "Please find me a hotel on 2023-12-01 on booking.com", which is impossible since you can't search for a hotel in the past). 20 | 21 | We ran the evaluation on 16gb of RAM with 15 concurrent tasks. 22 | 23 | `requirements.txt` is missing `browser-use` on purpose since we install it by building the package locally. 24 | 25 | ## Manual correction of evaluations 26 | 27 | The eval model is not good. That's why we added another success criteria - `unknown` if the eval model is not sure. 28 | 29 | Most of the tasks are indeed correct, but some tasks had wrong assesment, and `unknown` either went into `success` or `failed`. 30 | 31 | We manually reviewed the evaluations for the tasks that are either "unknown" or "failed" and corrected them. This is due to the fact that the default WebVoyager evaluator is not good. 32 | 33 | ## Costs 34 | 35 | The whole cost of running the dataset once is around 250 USD for gpt4o. 36 | 37 | ## Interesting findings 38 | 39 | - WebVoyager is a terrible dataset. 40 | - a lot of tasks are straight up impossible (outdated usually) 41 | - many prompts are VERY ambiguous and can be interpreted in many ways - which confuses the model (both agent and eval model) 42 | - why do we have to rely on third party websites? That is not scalable. 43 | - the dataset does not test for actual understanding of website, mostly of planning and reasoning - NOT what you want from web agent evaluations on COMPLEX sites 44 | 45 | # Todos 46 | 47 | - make manually labeled items more transparent 48 | - add proxies 49 | - test all kinds of models and different setups (Claude, GPT-4o, Llama 3, etc.) 50 | - test different setups (single vs multiple images, single vs multiple tasks, etc.) 51 | -------------------------------------------------------------------------------- /evaluation/auto_eval_browser_use.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import TYPE_CHECKING 3 | 4 | from browser_use import AgentHistoryList 5 | from langchain_anthropic import ChatAnthropic 6 | from langchain_core.messages import HumanMessage, SystemMessage 7 | from langchain_google_genai import ChatGoogleGenerativeAI 8 | from langchain_openai import AzureChatOpenAI 9 | 10 | if TYPE_CHECKING: 11 | from run_browser_use import EvalResult 12 | 13 | 14 | SYSTEM_PROMPT = """As an evaluator, you will be presented with three primary components to assist you in your role: 15 | 16 | 1. Web Task Instruction: This is a clear and specific directive provided in natural language, detailing the online activity to be carried out. These requirements may include conducting searches, verifying information, comparing prices, checking availability, or any other action relevant to the specified web service (such as Amazon, Apple, ArXiv, BBC News, Booking etc). 17 | 18 | 2. Result Screenshots: This is a visual representation of the screen showing the result or intermediate state of performing a web task. It serves as visual proof of the actions taken in response to the instruction, and may not represent everything the agent sees. 19 | 20 | 3. Result Response: This is a textual response obtained after the execution of the web task. It serves as textual result in response to the instruction. 21 | 22 | -- You DO NOT NEED to interact with web pages or perform actions such as booking flights or conducting searches on websites. 23 | -- You SHOULD NOT make assumptions based on information not presented in the screenshot when comparing it to the instructions. If you cannot find any information in the screenshot that matches the instruction, you can believe the information in the response. 24 | -- Your primary responsibility is to conduct a thorough assessment of the web task instruction against the outcome depicted in the screenshot and in the response, evaluating whether the actions taken align with the given instructions. 25 | -- NOTE that the instruction may involve more than one task, for example, locating the garage and summarizing the review. Failing to complete either task, such as not providing a summary, should be considered unsuccessful. 26 | -- NOTE that the screenshot is authentic, but the response provided by LLM is generated at the end of web browsing, and there may be discrepancies between the text and the screenshots. 27 | -- Note the difference: 1) Result response may contradict the screenshot, then the content of the screenshot prevails, 2) The content in the Result response is not mentioned on the screenshot, choose to believe the content. 28 | -- If you are not sure whether you should believe the content in the response, you should choose unknown. 29 | 30 | You should elaborate on how you arrived at your final evaluation and then provide a definitive verdict on whether the task has been successfully accomplished, either as 'SUCCESS', 'NOT SUCCESS', or 'UNKNOWN'.""" 31 | 32 | USER_PROMPT = """TASK: 33 | Result Response: 34 | screenshot at the end: """ 35 | 36 | 37 | async def auto_eval_by_gpt4o( 38 | history: AgentHistoryList, 39 | task: str, 40 | openai_client: AzureChatOpenAI | ChatAnthropic | ChatGoogleGenerativeAI, 41 | ) -> tuple["EvalResult", str]: 42 | # print(f"--------------------- {process_dir} ---------------------") 43 | 44 | if not history.is_done(): 45 | return "failed", "" 46 | 47 | answer = history.final_result() 48 | if answer is None: 49 | return "failed", "" 50 | 51 | screenshots = history.screenshots()[-4:] 52 | screenshot_content = [ 53 | { 54 | "type": "image_url", 55 | "image_url": {"url": f"data:image/png;base64,{screenshot}"}, 56 | } 57 | for screenshot in screenshots 58 | ] 59 | 60 | # Prepare GPT-4V messages 61 | user_prompt_tmp = USER_PROMPT.replace("", task) 62 | user_prompt_tmp = user_prompt_tmp.replace("", answer) 63 | user_prompt_tmp = user_prompt_tmp.replace("", str(len(screenshots))) 64 | 65 | messages = [ 66 | SystemMessage(content=SYSTEM_PROMPT), 67 | HumanMessage( 68 | content=[ 69 | {"type": "text", "text": user_prompt_tmp}, 70 | *screenshot_content, 71 | {"type": "text", "text": "Your verdict:\n"}, 72 | ] 73 | ), 74 | ] 75 | 76 | while True: 77 | try: 78 | # print("Calling gpt4v API to get the auto evaluation......") 79 | response = await openai_client.ainvoke(messages) 80 | # print("API call complete...") 81 | break 82 | except Exception as e: 83 | print(e) 84 | if type(e).__name__ == "RateLimitError": 85 | time.sleep(10) 86 | elif type(e).__name__ == "APIError": 87 | time.sleep(15) 88 | elif type(e).__name__ == "InvalidRequestError": 89 | exit(0) 90 | else: 91 | time.sleep(10) 92 | 93 | gpt_4v_res = str(response.content) 94 | 95 | if gpt_4v_res is None: 96 | return "unknown", "" 97 | elif "NOT SUCCESS" in gpt_4v_res: 98 | auto_eval_res = "failed" 99 | elif "SUCCESS" in gpt_4v_res: 100 | auto_eval_res = "success" 101 | elif "UNKNOWN" in gpt_4v_res: 102 | auto_eval_res = "unknown" 103 | else: 104 | auto_eval_res = "failed" 105 | 106 | return auto_eval_res, gpt_4v_res 107 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /run_browser_use.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import json 4 | import logging 5 | import os 6 | import random 7 | import shutil 8 | from asyncio import Semaphore 9 | from dataclasses import dataclass, field 10 | from datetime import datetime 11 | from pathlib import Path 12 | from typing import Generator, List, Literal, Set, TypedDict 13 | 14 | from browser_use import Agent, Browser, BrowserConfig 15 | from browser_use.browser.context import BrowserContextConfig 16 | from dotenv import load_dotenv 17 | from langchain_anthropic import ChatAnthropic 18 | from langchain_google_genai import ChatGoogleGenerativeAI 19 | from langchain_openai import AzureChatOpenAI 20 | from pydantic import BaseModel, Field, SecretStr 21 | 22 | from evaluation.auto_eval_browser_use import auto_eval_by_gpt4o 23 | 24 | load_dotenv() 25 | 26 | 27 | class TaskData(TypedDict): 28 | id: str 29 | web: str 30 | ques: str 31 | 32 | 33 | EvalResult = Literal["success", "failed", "unknown"] 34 | 35 | 36 | @dataclass 37 | class RunStats: 38 | total_tasks: int 39 | current_task: int = 0 40 | successful_tasks: Set[str] = field(default_factory=set) 41 | failed_tasks: Set[str] = field(default_factory=set) 42 | unknown_tasks: Set[str] = field(default_factory=set) 43 | 44 | def update(self, task_id: str, success: "EvalResult") -> None: 45 | if success == "success": 46 | self.successful_tasks.add(task_id) 47 | elif success == "failed": 48 | self.failed_tasks.add(task_id) 49 | else: 50 | self.unknown_tasks.add(task_id) 51 | 52 | def get_success_rate(self) -> str: 53 | if self.current_task == 0: 54 | return "0/0=0.00" 55 | return f"{len(self.successful_tasks)}/{self.current_task}={len(self.successful_tasks) / self.current_task:.2f}" 56 | 57 | def print_periodic_summary(self) -> None: 58 | print("\n=== Task Summary ===") 59 | print( 60 | f"Successful tasks ({len(self.successful_tasks)}): {sorted(list(self.successful_tasks))}" 61 | ) 62 | print( 63 | f"Failed tasks ({len(self.failed_tasks)}): {sorted(list(self.failed_tasks))}" 64 | ) 65 | print(f"Current success rate: {self.get_success_rate()}") 66 | print("==================\n") 67 | 68 | 69 | class TaskResult(BaseModel): 70 | task_id: str 71 | web_name: str 72 | start_time: datetime 73 | end_time: datetime 74 | duration_seconds: float 75 | num_steps: int 76 | success: EvalResult 77 | task_prompt: str 78 | final_answer: str 79 | gpt_4v_res: str 80 | 81 | 82 | class ExperimentResults(BaseModel): 83 | total_tasks: int = 0 84 | total_success: int = 0 85 | total_failed: int = 0 86 | total_unknown: int = 0 87 | all_tasks: List[TaskResult] = Field(default_factory=list) 88 | 89 | 90 | def cleanup_webdriver_cache() -> None: 91 | """Clean up webdriver cache directories.""" 92 | cache_paths = [ 93 | Path.home() / ".wdm", 94 | Path.home() / ".cache" / "selenium", 95 | Path.home() / "Library" / "Caches" / "selenium", 96 | ] 97 | for path in cache_paths: 98 | if path.exists(): 99 | print(f"Removing cache directory: {path}") 100 | shutil.rmtree(path, ignore_errors=True) 101 | 102 | 103 | def create_task_result( 104 | task: TaskData, 105 | start_time: datetime, 106 | eval_result: EvalResult, 107 | num_steps: int, 108 | final_answer: str, 109 | gpt_4v_res: str, 110 | ) -> TaskResult: 111 | """Create task result object.""" 112 | end_time = datetime.now() 113 | return TaskResult( 114 | task_id=task["id"], 115 | web_name=task["web"], 116 | start_time=start_time, 117 | end_time=end_time, 118 | duration_seconds=(end_time - start_time).total_seconds(), 119 | num_steps=num_steps, 120 | success=eval_result, 121 | task_prompt=f"{task['ques']} on {task['web']}", 122 | final_answer=final_answer, 123 | gpt_4v_res=gpt_4v_res, 124 | ) 125 | 126 | 127 | def save_results( 128 | task_result: TaskResult, 129 | task_dir: Path, 130 | ) -> None: 131 | """Save results to files.""" 132 | # Save interaction messages 133 | 134 | with open(task_dir / "task_result.json", "w") as f: 135 | json.dump(task_result.model_dump(), f, indent=2, default=str) 136 | 137 | 138 | def print_task_progress( 139 | task_id: str, steps: int, success: EvalResult, stats: RunStats 140 | ) -> None: 141 | """Print concise task progress.""" 142 | status = "✓" if success == "success" else "✗" if success == "failed" else "?" 143 | print( 144 | f"Task {task_id} [{stats.current_task}/{stats.total_tasks}] " 145 | f"Steps: {steps} Status: {status} Score: {stats.get_success_rate()}" 146 | ) 147 | 148 | 149 | def save_experiment_results(experiment_results: ExperimentResults) -> None: 150 | """Save experiment results to file.""" 151 | with open("results/examples-browser-use/experiment_results.json", "w") as f: 152 | json.dump(experiment_results.model_dump(), f, indent=2, default=str) 153 | 154 | 155 | @dataclass 156 | class LLMModel: 157 | model: AzureChatOpenAI 158 | token_limit: int 159 | 160 | 161 | def get_llm_model_generator( 162 | model_provider: str, 163 | ) -> Generator[AzureChatOpenAI | ChatAnthropic, None, None]: 164 | """Generator that creates fresh model instances each time""" 165 | while True: 166 | # Force reload environment variables 167 | load_dotenv(override=True) 168 | 169 | if model_provider == "anthropic": 170 | # Create fresh instances each time, reading current env vars 171 | yield ChatAnthropic( 172 | model_name="claude-3-5-sonnet-20240620", 173 | timeout=25, 174 | stop=None, 175 | temperature=0.0, 176 | ) 177 | 178 | elif model_provider == "azure": 179 | # Create fresh instances each time, reading current env vars 180 | west_eu = LLMModel( 181 | model=AzureChatOpenAI( 182 | model="gpt-4o", 183 | api_version="2024-10-21", 184 | azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_WEST_EU", ""), 185 | api_key=SecretStr(os.getenv("AZURE_OPENAI_API_KEY_WEST_EU", "")), 186 | ), 187 | token_limit=900, 188 | ) 189 | east_us = LLMModel( 190 | model=AzureChatOpenAI( 191 | model="gpt-4o", 192 | api_version="2024-10-21", 193 | azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_EAST_US", ""), 194 | api_key=SecretStr(os.getenv("AZURE_OPENAI_API_KEY_EAST_US", "")), 195 | ), 196 | token_limit=450, 197 | ) 198 | east_us_2 = LLMModel( 199 | model=AzureChatOpenAI( 200 | model="gpt-4o", 201 | api_version="2024-10-21", 202 | azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_EAST_US_2", ""), 203 | api_key=SecretStr(os.getenv("AZURE_OPENAI_API_KEY_EAST_US_2", "")), 204 | ), 205 | token_limit=450, 206 | ) 207 | west_us = LLMModel( 208 | model=AzureChatOpenAI( 209 | model="gpt-4o", 210 | api_version="2024-10-21", 211 | azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_WEST_US", ""), 212 | api_key=SecretStr(os.getenv("AZURE_OPENAI_API_KEY_WEST_US", "")), 213 | ), 214 | token_limit=450, 215 | ) 216 | 217 | # Yield fresh instances in the same pattern 218 | yield west_eu.model # First 900 219 | yield west_eu.model # Second 900 220 | yield east_us.model # 450 221 | yield east_us_2.model # 450 222 | yield west_us.model # 450 223 | elif model_provider == "google/gemini-1.5-flash": 224 | llm = ChatGoogleGenerativeAI( 225 | model="gemini-1.5-flash", 226 | ) 227 | yield llm 228 | elif model_provider == "google/gemini-1.5-pro": 229 | llm = ChatGoogleGenerativeAI( 230 | model="gemini-1.5-pro", 231 | ) 232 | yield llm 233 | elif model_provider == "google/gemini-1.5-flash-8b": 234 | llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-8b") 235 | yield llm 236 | else: 237 | raise ValueError(f"Invalid model provider: {model_provider}") 238 | 239 | 240 | async def process_single_task( 241 | task: TaskData, 242 | client: AzureChatOpenAI | ChatAnthropic, 243 | stats: RunStats, 244 | results_dir: Path, 245 | experiment_results: ExperimentResults, 246 | browser: Browser, 247 | ) -> None: 248 | """Process a single task asynchronously.""" 249 | task_str = f"{task['ques']} on {task['web']}" 250 | start_time = datetime.now() 251 | task_dir = results_dir / f"{task['id']}" 252 | task_dir.mkdir(exist_ok=True) 253 | 254 | try: 255 | if not (task_dir / "task_result.json").exists(): 256 | logging.getLogger("browser_use").setLevel(logging.INFO) 257 | 258 | agent = Agent( 259 | task=task_str, 260 | llm=client, 261 | browser=browser, 262 | validate_output=True, 263 | generate_gif=False, 264 | ) 265 | 266 | history = await agent.run(max_steps=30) 267 | history.save_to_file(task_dir / "history.json") 268 | 269 | eval_result, gpt_4v_res = await auto_eval_by_gpt4o( 270 | task=task_str, 271 | openai_client=client, 272 | history=history, 273 | ) 274 | 275 | task_result = create_task_result( 276 | task, 277 | start_time, 278 | eval_result, 279 | len(history.history), 280 | history.final_result() or "", 281 | gpt_4v_res, 282 | ) 283 | save_results(task_result, task_dir) 284 | else: 285 | task_result = TaskResult(**json.load(open(task_dir / "task_result.json"))) 286 | eval_result = task_result.success 287 | 288 | stats.update(task["id"], eval_result) 289 | print_task_progress(task["id"], task_result.num_steps, eval_result, stats) 290 | 291 | # Update experiment results 292 | experiment_results.all_tasks.append(task_result) 293 | experiment_results.total_tasks += 1 294 | experiment_results.total_success += int(eval_result == "success") 295 | experiment_results.total_failed += int(eval_result == "failed") 296 | experiment_results.total_unknown += int(eval_result == "unknown") 297 | # save curent stats to file 298 | print(f"Saving stats to file {stats.current_task} {stats.get_success_rate()}") 299 | # with open(file="results/examples-browser-use/aaa_stats.txt", mode="a") as f: 300 | # # in one line 301 | # f.write(f"{stats.current_task}\n") 302 | # f.write(f"{stats.get_success_rate()}\n") 303 | 304 | except Exception as e: 305 | logging.error(f"Error processing task {task['id']}: {str(e)}") 306 | stats.update(task["id"], "failed") # Mark as failed instead of crashing 307 | return 308 | 309 | finally: 310 | await browser.close() 311 | 312 | 313 | async def main(max_concurrent_tasks: int, model_provider: str) -> None: 314 | try: 315 | # Setup 316 | cleanup_webdriver_cache() 317 | semaphore = Semaphore(max_concurrent_tasks) 318 | 319 | # Load tasks 320 | tasks: List[TaskData] = [] 321 | with open("data/WebVoyager_data.jsonl", "r") as f: 322 | for line in f: 323 | tasks.append(json.loads(line)) 324 | 325 | # remove impossible tasks 326 | with open("data/WebVoyagerImpossibleTasks.json", "r") as f: 327 | impossible_tasks = set(json.load(f)) 328 | tasks = [task for task in tasks if task["id"] not in impossible_tasks] 329 | 330 | # randomize the order of tasks 331 | random.seed(42) 332 | random.shuffle(tasks) 333 | 334 | # Initialize 335 | 336 | experiment_results = ExperimentResults() 337 | stats = RunStats(total_tasks=len(tasks)) 338 | results_dir = Path("results/examples-browser-use") 339 | results_dir.mkdir(parents=True, exist_ok=True) 340 | 341 | # Process tasks concurrently with semaphore 342 | async def process_with_semaphore( 343 | task: TaskData, client: AzureChatOpenAI | ChatAnthropic 344 | ) -> None: 345 | async with semaphore: 346 | print(f"\n=== Now at task {task['id']} ===") 347 | 348 | # Create browser instance inside the semaphore block 349 | browser = Browser( 350 | config=BrowserConfig( 351 | headless=True, 352 | disable_security=True, 353 | new_context_config=BrowserContextConfig( 354 | disable_security=True, 355 | wait_for_network_idle_page_load_time=5, 356 | maximum_wait_page_load_time=20, 357 | # no_viewport=True, 358 | browser_window_size={ 359 | "width": 1280, 360 | "height": 1100, 361 | }, 362 | # trace_path=str(results_dir / f"{task['id']}"), 363 | ), 364 | ) 365 | ) 366 | 367 | await process_single_task( 368 | task, 369 | client, 370 | stats, 371 | results_dir, 372 | experiment_results, 373 | browser, # Pass browser instance 374 | ) 375 | stats.current_task += 1 376 | 377 | # Add this to ensure browser is always closed 378 | try: 379 | await browser.close() 380 | except Exception as e: 381 | logging.error(f"Error closing browser: {e}") 382 | 383 | print(f"Current task: {stats.current_task}") 384 | print(f"Total tasks: {stats.total_tasks}") 385 | print(f"Success rate: {stats.get_success_rate()}") 386 | # if stats.current_task % max_concurrent_tasks == 0: 387 | stats.print_periodic_summary() 388 | save_experiment_results(experiment_results) 389 | 390 | # Create and run all tasks 391 | all_tasks = [] 392 | for i, task in enumerate(tasks): 393 | model = next(get_llm_model_generator(model_provider)) 394 | all_tasks.append(process_with_semaphore(task, model)) 395 | 396 | # Add timeout and better error handling 397 | await asyncio.gather(*all_tasks, return_exceptions=True) 398 | except Exception as e: 399 | logging.error(f"Main loop error: {e}") 400 | finally: 401 | # Cleanup code here 402 | logging.info("Shutting down...") 403 | stats.print_periodic_summary() 404 | 405 | 406 | if __name__ == "__main__": 407 | try: 408 | parser = argparse.ArgumentParser( 409 | description="Run browser tasks with concurrent execution" 410 | ) 411 | parser.add_argument( 412 | "--max-concurrent", 413 | type=int, 414 | default=3, 415 | help="Maximum number of concurrent tasks (default: 3)", 416 | ) 417 | parser.add_argument( 418 | "--model-provider", 419 | type=str, 420 | default="azure", 421 | help="Model provider (default: azure)", 422 | choices=[ 423 | "azure", 424 | "anthropic", 425 | "google/gemini-1.5-flash", 426 | "google/gemini-1.5-flash-8b", 427 | "google/gemini-1.5-pro", 428 | ], 429 | ) 430 | args = parser.parse_args() 431 | 432 | logging.info(f"Running with {args.max_concurrent} concurrent tasks") 433 | 434 | asyncio.run(main(args.max_concurrent, args.model_provider)) 435 | except KeyboardInterrupt: 436 | print("\nReceived keyboard interrupt, shutting down...") 437 | except Exception as e: 438 | print(f"Fatal error: {e}") 439 | logging.exception("Fatal error occurred") 440 | -------------------------------------------------------------------------------- /data/GAIA_web.jsonl: -------------------------------------------------------------------------------- 1 | {"task_id": "e1fc63a2-da7a-432f-be78-7c4a95598703", "Level": 1, "Final answer": "17", "id": "level1-0", "web": "https://www.google.com/", "ques": "If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon its closest approach? Please use the minimum perigee value on the Wikipedia page for the Moon when carrying out your calculation. Round your result to the nearest 1000 hours and do not use any comma separators if necessary."} 2 | {"task_id": "8e867cd7-cff9-4e6c-867a-ff5ddc2550be", "Level": 1, "Final answer": "3", "id": "level1-1", "web": "https://www.google.com/", "ques": "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."} 3 | {"task_id": "5d0080cb-90d7-4712-bc33-848150e917d3", "Level": 1, "Final answer": "0.1777", "id": "level1-2", "web": "https://www.google.com/", "ques": "What was the volume in m^3 of the fish bag that was calculated in the University of Leicester paper \"Can Hiccup Supply Enough Fish to Maintain a Dragon\u2019s Diet?\""} 4 | {"task_id": "a1e91b78-d3d8-4675-bb8d-62741b4b68a6", "Level": 1, "Final answer": "3", "id": "level1-3", "web": "https://www.google.com/", "ques": "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"} 5 | {"task_id": "46719c30-f4c3-4cad-be07-d5cb21eee6bb", "Level": 1, "Final answer": "Mapping Human Oriented Information to Software Agents for Online Systems Usage", "id": "level1-4", "web": "https://www.google.com/", "ques": "Of the authors (First M. Last) that worked on the paper \"Pie Menus or Linear Menus, Which Is Better?\" in 2015, what was the title of the first paper authored by the one that had authored prior papers?"} 6 | {"task_id": "4b6bb5f7-f634-410e-815d-e673ab7f8632", "Level": 1, "Final answer": "THE CASTLE", "id": "level1-5", "web": "https://www.google.com/", "ques": "In Series 9, Episode 11 of Doctor Who, the Doctor is trapped inside an ever-shifting maze. What is this location called in the official script for the episode? Give the setting exactly as it appears in the first scene heading."} 7 | {"task_id": "b816bfce-3d80-4913-a07d-69b752ce6377", "Level": 1, "Final answer": "fluffy", "id": "level1-6", "web": "https://www.google.com/", "ques": "In Emily Midkiff's June 2014 article in a journal named for the one of Hreidmar's sons that guarded his house, what word was quoted from two different authors in distaste for the nature of dragon depictions?"} 8 | {"task_id": "72e110e7-464c-453c-a309-90a95aed6538", "Level": 1, "Final answer": "Guatemala", "id": "level1-7", "web": "https://www.google.com/", "ques": "Under DDC 633 on Bielefeld University Library's BASE, as of 2020, from what country was the unknown language article with a flag unique from the others?"} 9 | {"task_id": "b415aba4-4b68-4fc6-9b89-2c812e55a3e1", "Level": 1, "Final answer": "diamond", "id": "level1-8", "web": "https://www.google.com/", "ques": "In Nature journal's Scientific Reports conference proceedings from 2012, in the article that did not mention plasmons or plasmonics, what nano-compound is studied? Don't use the prefix nano in your answer if there is one."} 10 | {"task_id": "935e2cff-ae78-4218-b3f5-115589b19dae", "Level": 1, "Final answer": "research", "id": "level1-9", "web": "https://www.google.com/", "ques": "In the year 2022, and before December, what does \"R\" stand for in the three core policies of the type of content that was violated in the public logs on the Legume Wikipedia page?"} 11 | {"task_id": "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8", "Level": 1, "Final answer": "FunkMonk", "id": "level1-10", "web": "https://www.google.com/", "ques": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?"} 12 | {"task_id": "5188369a-3bbe-43d8-8b94-11558f909a08", "Level": 1, "Final answer": "Annie Levin", "id": "level1-11", "web": "https://www.google.com/", "ques": "What writer is quoted by Merriam-Webster for the Word of the Day from June 27, 2022?"} 13 | {"task_id": "9d191bce-651d-4746-be2d-7ef8ecadb9c2", "Level": 1, "Final answer": "Extremely", "id": "level1-12", "web": "https://www.google.com/", "ques": "Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\""} 14 | {"task_id": "cabe07ed-9eca-40ea-8ead-410ef5e83f91", "Level": 1, "Final answer": "Louvrier", "id": "level1-13", "web": "https://www.google.com/", "ques": "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?"} 15 | {"task_id": "d0633230-7067-47a9-9dbf-ee11e0a2cdd6", "Level": 1, "Final answer": "BaseLabelPropagation", "id": "level1-14", "web": "https://www.google.com/", "ques": "In the Scikit-Learn July 2017 changelog, what other predictor base command received a bug fix? Just give the name, not a path."} 16 | {"task_id": "0383a3ee-47a7-41a4-b493-519bdefe0488", "Level": 1, "Final answer": "Rockhopper penguin", "id": "level1-15", "web": "https://www.google.com/", "ques": "On the BBC Earth YouTube video of the Top 5 Silliest Animal Moments, what species of bird is featured?"} 17 | {"task_id": "11af4e1a-5f45-467d-9aeb-46f4bb0bf034", "Level": 1, "Final answer": "6", "id": "level1-16", "web": "https://www.google.com/", "ques": "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?"} 18 | {"task_id": "7673d772-ef80-4f0f-a602-1bf4485c9b43", "Level": 1, "Final answer": "inference", "id": "level1-17", "web": "https://www.google.com/", "ques": "On Cornell Law School website's legal information institute, under the fifth section of federal rules alphabetically, what word was deleted in the last amendment to the first rule in the article that has \"witnesses\" in the most titles as of 2021?"} 19 | {"task_id": "c365c1c7-a3db-4d5e-a9a1-66f56eae7865", "Level": 1, "Final answer": "Braintree, Honolulu", "id": "level1-18", "web": "https://www.google.com/", "ques": "Of the cities within the United States where U.S. presidents were born, which two are the farthest apart from the westernmost to the easternmost going east, giving the city names only? Give them to me in alphabetical order, in a comma-separated list"} 20 | {"task_id": "7d4a7d1d-cac6-44a8-96e8-ea9584a70825", "Level": 1, "Final answer": "22", "id": "level1-19", "web": "https://www.google.com/", "ques": "According to Girls Who Code, how long did it take in years for the percentage of computer scientists that were women to change by 13% from a starting point of 37%?"} 21 | {"task_id": "dc22a632-937f-4e6a-b72f-ba0ff3f5ff97", "Level": 1, "Final answer": "Five Hundred Things To Eat Before It's Too Late: and the Very Best Places to Eat Them", "id": "level1-20", "web": "https://www.google.com/", "ques": "What was the complete title of the book in which two James Beard Award winners recommended the restaurant where Ali Khan enjoyed a New Mexican staple in his cost-conscious TV show that started in 2015? Write the numbers in plain text if there are some in the title."} 22 | {"task_id": "3f57289b-8c60-48be-bd80-01f8099ca449", "Level": 1, "Final answer": "519", "id": "level1-21", "web": "https://www.google.com/", "ques": "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?"} 23 | {"task_id": "23dd907f-1261-4488-b21c-e9185af91d5e", "Level": 1, "Final answer": "2", "id": "level1-22", "web": "https://www.google.com/", "ques": "In Audre Lorde\u2019s poem \u201cFather Son and Holy Ghost\u201d, what is the number of the stanza in which some lines are indented?"} 24 | {"task_id": "840bfca7-4f7b-481a-8794-c560c340185d", "Level": 1, "Final answer": "80GSFC21M0002", "id": "level1-23", "web": "https://www.google.com/", "ques": "On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?"} 25 | {"task_id": "a0068077-79f4-461a-adfe-75c1a4148545", "Level": 1, "Final answer": "90", "id": "level1-24", "web": "https://www.google.com/", "ques": "What was the actual enrollment count of the clinical trial on H. pylori in acne vulgaris patients from Jan-May 2018 as listed on the NIH website?"} 26 | {"task_id": "bda648d7-d618-4883-88f4-3466eabd860e", "Level": 1, "Final answer": "Saint Petersburg", "id": "level1-25", "web": "https://www.google.com/", "ques": "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations."} 27 | {"task_id": "c61d22de-5f6c-4958-a7f6-5e9707bd3466", "Level": 2, "Final answer": "egalitarian", "id": "level2-0", "web": "https://www.google.com/", "ques": "A paper about AI regulation that was originally submitted to arXiv.org in June 2022 shows a figure with three axes, where each axis has a label word at both ends. Which of these words is used to describe a type of society in a Physics and Society article submitted to arXiv.org on August 11, 2016?"} 28 | {"task_id": "17b5a6a3-bc87-42e8-b0fb-6ab0781ef2cc", "Level": 2, "Final answer": "34689", "id": "level2-1", "web": "https://www.google.com/", "ques": "I\u2019m researching species that became invasive after people who kept them as pets released them. There\u2019s a certain species of fish that was popularized as a pet by being the main character of the movie Finding Nemo. According to the USGS, where was this fish found as a nonnative species, before the year 2020? I need the answer formatted as the five-digit zip codes of the places the species was found, separated by commas if there is more than one place."} 29 | {"task_id": "04a04a9b-226c-43fd-b319-d5e89743676f", "Level": 2, "Final answer": "41", "id": "level2-2", "web": "https://www.google.com/", "ques": "If we assume all articles published by Nature in 2020 (articles, only, not book reviews/columns, etc) relied on statistical significance to justify their findings and they on average came to a p-value of 0.04, how many papers would be incorrect as to their claims of statistical significance? Round the value up to the next integer."} 30 | {"task_id": "14569e28-c88c-43e4-8c32-097d35b9a67d", "Level": 2, "Final answer": "backtick", "id": "level2-3", "web": "https://www.google.com/", "ques": "In Unlambda, what exact charcter or text needs to be added to correct the following code to output \"For penguins\"? If what is needed is a character, answer with the name of the character. If there are different names for the character, use the shortest. The text location is not needed. Code:\n\n`r```````````.F.o.r. .p.e.n.g.u.i.n.si"} 31 | {"task_id": "3627a8be-a77f-41bb-b807-7e1bd4c0ebdf", "Level": 2, "Final answer": "142", "id": "level2-4", "web": "https://www.google.com/", "ques": "The object in the British Museum's collection with a museum number of 2012,5015.17 is the shell of a particular mollusk species. According to the abstract of a research article published in Science Advances in 2021, beads made from the shells of this species were found that are at least how many thousands of years old?"} 32 | {"task_id": "7619a514-5fa8-43ef-9143-83b66a43d7a4", "Level": 2, "Final answer": "04/15/18", "id": "level2-5", "web": "https://www.google.com/", "ques": "According to github, when was Regression added to the oldest closed numpy.polynomial issue that has the Regression label in MM/DD/YY?"} 33 | {"task_id": "2a649bb1-795f-4a01-b3be-9a01868dae73", "Level": 2, "Final answer": "3.1.3.1; 1.11.1.7", "id": "level2-6", "web": "https://www.google.com/", "ques": "What are the EC numbers of the two most commonly used chemicals for the virus testing method in the paper about SPFMV and SPCSV in the Pearl Of Africa from 2016? Return the semicolon-separated numbers in the order of the alphabetized chemicals."} 34 | {"task_id": "87c610df-bef7-4932-b950-1d83ef4e282b", "Level": 2, "Final answer": "Morarji Desai", "id": "level2-7", "web": "https://www.google.com/", "ques": "In April of 1977, who was the Prime Minister of the first place mentioned by name in the Book of Esther (in the New International Version)?"} 35 | {"task_id": "624cbf11-6a41-4692-af9c-36b3e5ca3130", "Level": 2, "Final answer": "So we had to let it die.", "id": "level2-8", "web": "https://www.google.com/", "ques": "What's the last line of the rhyme under the flavor name on the headstone visible in the background of the photo of the oldest flavor's headstone in the Ben & Jerry's online flavor graveyard as of the end of 2022?"} 36 | {"task_id": "dd3c7503-f62a-4bd0-9f67-1b63b94194cc", "Level": 2, "Final answer": "6", "id": "level2-9", "web": "https://www.google.com/", "ques": "Use density measures from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023.\n\nI have a gallon of honey and a gallon of mayonnaise at 25C. I remove one cup of honey at a time from the gallon of honey. How many times will I need to remove a cup to have the honey weigh less than the mayonaise? Assume the containers themselves weigh the same."} 37 | {"task_id": "f0f46385-fc03-4599-b5d3-f56496c3e69f", "Level": 2, "Final answer": "Indonesia, Myanmar", "id": "level2-10", "web": "https://www.google.com/", "ques": "In terms of geographical distance between capital cities, which 2 countries are the furthest from each other within the ASEAN bloc according to wikipedia? Answer using a comma separated list, ordering the countries by alphabetical order."} 38 | {"task_id": "e4e91f1c-1dcd-439e-9fdd-cb976f5293fd", "Level": 2, "Final answer": "cloak", "id": "level2-11", "web": "https://www.google.com/", "ques": "I need to fact-check a citation. This is the citation from the bibliography:\n\nGreetham, David. \"Uncoupled: OR, How I Lost My Author(s).\" Textual Cultures: Texts, Contexts, Interpretation, vol. 3 no. 1, 2008, p. 45-46. Project MUSE, doi:10.2979/tex.2008.3.1.44.\n\nAnd this is the in-line citation:\n\nOur relationship with the authors of the works we read can often be \u201cobscured not by a \"cloak of print\" but by the veil of scribal confusion and mis-transmission\u201d (Greetham 45-46).\n\nDoes the quoted text match what is actually in the article? If Yes, answer Yes, otherwise, give me the word in my citation that does not match with the correct one (without any article)."} 39 | {"task_id": "56137764-b4e0-45b8-9c52-1866420c3df5", "Level": 2, "Final answer": "Li Peng", "id": "level2-12", "web": "https://www.google.com/", "ques": "Which contributor to the version of OpenCV where support was added for the Mask-RCNN model has the same name as a former Chinese head of government when the names are transliterated to the Latin alphabet?"} 40 | {"task_id": "8b3379c0-0981-4f5b-8407-6444610cb212", "Level": 2, "Final answer": "1.8", "id": "level2-13", "web": "https://www.google.com/", "ques": "What is the maximum length in meters of #9 in the first National Geographic short on YouTube that was ever released according to the Monterey Bay Aquarium website? Just give the number."} 41 | {"task_id": "0ff53813-3367-4f43-bcbd-3fd725c1bf4b", "Level": 2, "Final answer": "beta geometric", "id": "level2-14", "web": "https://www.google.com/", "ques": "What two-word type of model did Manash Pratim Kashyap's and PS Fader's studies in customer retention studies published during 2018-2019 have in common (no punctuation)?"} 42 | {"task_id": "a7feb290-76bb-4cb7-8800-7edaf7954f2f", "Level": 2, "Final answer": "31", "id": "level2-15", "web": "https://www.google.com/", "ques": "How many High Energy Physics - Lattice articles listed in January 2020 on Arxiv had ps versions available?"} 43 | {"task_id": "b4cc024b-3f5e-480e-b96a-6656493255b5", "Level": 2, "Final answer": "Russian-German Legion", "id": "level2-16", "web": "https://www.google.com/", "ques": "The photograph in the Whitney Museum of American Art's collection with accession number 2022.128 shows a person holding a book. Which military unit did the author of this book join in 1813? Answer without using articles."} 44 | {"task_id": "33d8ea3b-6c6b-4ff1-803d-7e270dea8a57", "Level": 2, "Final answer": "2", "id": "level2-17", "web": "https://www.google.com/", "ques": "What is the minimum number of page links a person must click on to go from the english Wikipedia page on The Lord of the Rings (the book) to the english Wikipedia page on A Song of Ice and Fire (the book series)? In your count, include each link you would click on to get to the page. Use the pages as they appeared at the end of the day on July 3, 2023."} 45 | {"task_id": "e8cb5b03-41e0-4086-99e5-f6806cd97211", "Level": 2, "Final answer": "shrimp", "id": "level2-18", "web": "https://www.google.com/", "ques": "I went to Virtue restaurant & bar in Chicago for my birthday on March 22, 2021 and the main course I had was delicious! Unfortunately, when I went back about a month later on April 21, it was no longer on the dinner menu. Using the Wayback Machine, can you help me figure out which main course was on the dinner menu for Virtue on March 22, 2021 but not April 21, 2021? Answer using the singular form, without articles."} 46 | {"task_id": "f46b4380-207e-4434-820b-f32ce04ae2a4", "Level": 2, "Final answer": "Harbinger, Tidal", "id": "level2-19", "web": "https://www.google.com/", "ques": "It is 1999. Before you party like it is 1999, please assist me in settling a bet.\n\nFiona Apple and Paula Cole released albums prior to 1999. Of these albums, which didn't receive a letter grade from Robert Christgau? Provide your answer as a comma delimited list of album titles, sorted alphabetically."} 47 | {"task_id": "05407167-39ec-4d3a-a234-73a9120c325d", "Level": 2, "Final answer": "Format Document", "id": "level2-20", "web": "https://www.google.com/", "ques": "In the 2018 VSCode blog post on replit.com, what was the command they clicked on in the last video to remove extra lines?"} 48 | {"task_id": "b9763138-c053-4832-9f55-86200cb1f99c", "Level": 2, "Final answer": "3", "id": "level2-21", "web": "https://www.google.com/", "ques": "Compute the check digit the Tropicos ID for the Order Helotiales would have if it were an ISBN-10 number."} 49 | {"task_id": "16d825ff-1623-4176-a5b5-42e0f5c2b0ac", "Level": 2, "Final answer": "6:41 PM", "id": "level2-22", "web": "https://www.google.com/", "ques": "What time was the Tri-Rail train that carried the most passengers on May 27, 2019 scheduled to arrive in Pompano Beach? Express your answer in the 12-hour digital clock format without leading zero if any, and include whether it is AM or PM."} 50 | {"task_id": "544b7f0c-173a-4377-8d56-57b36eb26ddf", "Level": 2, "Final answer": "A Nightmare on Elm Street", "id": "level2-23", "web": "https://www.google.com/", "ques": "In Valentina Re\u2019s contribution to the 2017 book \u201cWorld Building: Transmedia, Fans, Industries\u201d, what horror movie does the author cite as having popularized metalepsis between a dream world and reality? Use the complete name with article if any."} 51 | {"task_id": "6b078778-0b90-464d-83f6-59511c811b01", "Level": 2, "Final answer": "Alfonso Visconti", "id": "level2-24", "web": "https://www.google.com/", "ques": "The Metropolitan Museum of Art has a portrait in its collection with an accession number of 29.100.5. Of the consecrators and co-consecrators of this portrait's subject as a bishop, what is the name of the one who never became pope?"} 52 | {"task_id": "08cae58d-4084-4616-b6dd-dd6534e4825b", "Level": 2, "Final answer": "2018", "id": "level2-25", "web": "https://www.google.com/", "ques": "According to Google Finance, when was the first year the Apple stock went above $50 (without adjusting for stock split)?"} 53 | {"task_id": "2dfc4c37-fec1-4518-84a7-10095d30ad75", "Level": 2, "Final answer": "6", "id": "level2-26", "web": "https://www.google.com/", "ques": "According to Box Office Mojo's 2020 Worldwide Box Office list, how many of the top 10 highest-grossing worldwide movies are also on the top 10 highest-grossing domestic movies? Your answer should be a numerical integer value."} 54 | {"task_id": "9f41b083-683e-4dcf-9185-ccfeaa88fa45", "Level": 2, "Final answer": "0", "id": "level2-27", "web": "https://www.google.com/", "ques": "How many pages if the 2023 IPCC report (85 pages version) mentions nuclear energy?"} 55 | {"task_id": "ecbc4f94-95a3-4cc7-b255-6741a458a625", "Level": 2, "Final answer": "13", "id": "level2-28", "web": "https://www.google.com/", "ques": "How many images are there in the latest 2022 Lego english wikipedia article?"} 56 | {"task_id": "71345b0a-9c7d-4b50-b2bf-937ec5879845", "Level": 2, "Final answer": "Here be dragons", "id": "level2-29", "web": "https://www.google.com/", "ques": "On a leap day before the year 2008, a joke was removed from the Wikipedia page for \u201cDragon\u201d. What was the phrase that was removed? Give the phrase as it appeared on the page, but without punctuation."} 57 | {"task_id": "7b5377b0-3f38-4103-8ad2-90fe89864c04", "Level": 2, "Final answer": "563.9", "id": "level2-30", "web": "https://www.google.com/", "ques": "Find the value of x to the nearest tenth: Lx = (d/dx * (A * x-squared)) + 4-thousand'n'ninety-7 minus C\nWhere L is the last two digits of the year of the Venezuelan Declaration of Independence,\nA is the number of colors in the TikTok logo as of July 2023, excluding black and white,\nand C is the height of the average woman in the Philippines according to a July 2023 Business Insider article, rounded to the nearest whole centimeter"} 58 | {"task_id": "114d5fd0-e2ae-4b6d-a65a-870da2d19c08", "Level": 2, "Final answer": "4", "id": "level2-31", "web": "https://www.google.com/", "ques": "In the endnote found in the second-to-last paragraph of page 11 of the book with the doi 10.2307/j.ctv9b2xdv, what date in November was the Wikipedia article accessed? Just give the day of the month."} 59 | {"task_id": "ad37a656-079a-49f9-a493-7b739c9167d1", "Level": 2, "Final answer": "Bravo", "id": "level2-32", "web": "https://www.google.com/", "ques": "On July 15, 2008, Phys.org published an article about a catastrophe. Find the explosive force of this catastrophe according to Encyclopedia Britannica, then find the name of the US nuclear test that had the same yield. Your answer should only be the last word of the name of the test."} 60 | {"task_id": "f3917a3d-1d17-4ee2-90c5-683b072218fe", "Level": 2, "Final answer": "2732", "id": "level2-33", "web": "https://www.google.com/", "ques": "How many edits were made to the Wikipedia page on Antidisestablishmentarianism from its inception until June of 2023?"} 61 | {"task_id": "48eb8242-1099-4c26-95d4-ef22b002457a", "Level": 2, "Final answer": "6", "id": "level2-34", "web": "https://www.google.com/", "ques": "How many nonindigenous crocodiles were found in Florida from the year 2000 through 2020? You can get the data from the USGS Nonindigenous Aquatic Species database."} 62 | {"task_id": "c8b7e059-c60d-472e-ad64-3b04ae1166dc", "Level": 2, "Final answer": "8", "id": "level2-35", "web": "https://www.google.com/", "ques": "The work referenced in footnote 397 of Federico Lauria's 2014 dissertation is also the source for the titles of two paintings in the Smithsonian American Art Museum's collection, as of August 2023. What is the absolute difference between the chapter numbers of the chapters that the titles of these two paintings quote?"} 63 | {"task_id": "d1af70ea-a9a4-421a-b9cc-94b5e02f1788", "Level": 2, "Final answer": "736455", "id": "level2-36", "web": "https://www.google.com/", "ques": "As of the 2020 census, what was the population difference between the largest county seat and smallest county seat, by land area of the county seat, in Washington state? For population figures, please use the official data from data.census.gov. Please report the integer difference."} 64 | {"task_id": "ded28325-3447-4c56-860f-e497d6fb3577", "Level": 2, "Final answer": "Picnic is in Ploybius Plaza.", "id": "level2-37", "web": "https://www.google.com/", "ques": "This is a secret message my friend gave me. It says where we should meet for our picnic on Friday. The only problem is, it\u2019s encrypted in the Caesar cipher, so I can\u2019t read it. Can you tell me what it says? This is the message:\n\nZsmxsm sc sx Zyvilsec Zvkjk."} 65 | {"task_id": "d700d50d-c707-4dca-90dc-4528cddd0c80", "Level": 2, "Final answer": "Roger Miller", "id": "level2-38", "web": "https://www.google.com/", "ques": "Who composed the song that was performed by a rooster and a hamster in separate animated videos at separate tempos with different lyrics? Answer using the format First name Last name."} 66 | {"task_id": "0a3cd321-3e76-4622-911b-0fda2e5d6b1a", "Level": 2, "Final answer": "Brunei, China, Morocco, Singapore", "id": "level2-39", "web": "https://www.google.com/", "ques": "According to the World Bank, which countries had gross savings of over 35% of GDP for every year in the period 2001-2010? Give your answer as a comma-separated list of countries in alphabetical order. Use the countries most common names in english when answering."} 67 | {"task_id": "f2feb6a4-363c-4c09-a804-0db564eafd68", "Level": 2, "Final answer": "900000", "id": "level2-40", "web": "https://www.google.com/", "ques": "I\u2019m thinking about selling my home, so I want to learn more about how homes in my area sold recently. I live in Pearl City, Hawaii, which is on the island of Oahu. I know two homes near me that sold in 2022 were 2072 Akaikai Loop, and 2017 Komo Mai Drive. Find which of those homes sold for more in 2022, and tell me how much it sold for. Don\u2019t put commas or decimal places in the answer."} 68 | {"task_id": "0b260a57-3f3a-4405-9f29-6d7a1012dbfb", "Level": 2, "Final answer": "0.269", "id": "level2-41", "web": "https://www.google.com/", "ques": "On ScienceDirect, what is the difference to 3 decimal places in the sample standard deviations of the number of Reference Works in each Life Science domain compared to Health Sciences as of 2022?"} 69 | {"task_id": "ed58682d-bc52-4baa-9eb0-4eb81e1edacc", "Level": 2, "Final answer": "stare", "id": "level2-42", "web": "https://www.google.com/", "ques": "What is the last word before the second chorus of the King of Pop's fifth single from his sixth studio album?"} 70 | {"task_id": "023e9d44-96ae-4eed-b912-244ee8c3b994", "Level": 2, "Final answer": "8", "id": "level2-43", "web": "https://www.google.com/", "ques": "It's May 2023, and I'm about to drive across the U.S. from California to Maine. I always recycle my water bottles at the end of a trip, and I drink 5 12-ounce water bottles for every 100 miles I travel, rounded to the nearest 100. Assuming I follow I-40 from Los Angeles to Cincinnati, then take I-90 from Cincinnati to Augusta, how many dollars will I get back according to Wikipedia?"} 71 | {"task_id": "0e9e85b8-52b9-4de4-b402-5f635ab9631f", "Level": 2, "Final answer": "1927", "id": "level2-44", "web": "https://www.google.com/", "ques": "What is the latest chronological year date written in the image on the webpage found when following the first citation reference link on the latest version of Carl Nebel's Wikipedia page as of August 2023?"} 72 | {"task_id": "20194330-9976-4043-8632-f8485c6c71b2", "Level": 2, "Final answer": "4", "id": "level2-45", "web": "https://www.google.com/", "ques": "The YouTube channel Game Grumps began a Let\u2019s Play of the game Sonic the Hedgehog (2006) in the year 2012. Thirty seconds into the first episode, a phrase is shown on the screen in white letters on a red background. How many times does the letter \"E\" appear in this phrase?"} 73 | {"task_id": "65638e28-7f37-4fa7-b7b9-8c19bb609879", "Level": 2, "Final answer": "Kleinpaul", "id": "level2-46", "web": "https://www.google.com/", "ques": "The book with the doi 10.1353/book.24372 concerns a certain neurologist. According to chapter 2 of the book, what author influenced this neurologist\u2019s belief in \u201cendopsychic myths\u201d? Give the last name only."} 74 | {"task_id": "3ff6b7a9-a5bd-4412-ad92-0cd0d45c0fee", "Level": 2, "Final answer": "56000", "id": "level2-47", "web": "https://www.google.com/", "ques": "The longest-lived vertebrate is named after an island. According to Wikipedia as of January 1, 2021, what is the 2020 estimated population of that island, to the nearest thousand?"} 75 | {"task_id": "708b99c5-e4a7-49cb-a5cf-933c8d46470d", "Level": 2, "Final answer": "Citations", "id": "level2-48", "web": "https://www.google.com/", "ques": "On the DeepFruits fruit detection graph on Connected Papers from 2016, what feature caused the largest bubble to be the size it is?"} 76 | {"task_id": "0a65cb96-cb6e-4a6a-8aae-c1084f613456", "Level": 2, "Final answer": "Holabird", "id": "level2-49", "web": "https://www.google.com/", "ques": "During the first week of August 2015, one of the NASA Astronomy Pictures of the Day shows the lights of a city on the horizon. The namesake of this city also has a landmark building in Chicago named after him. What is the name of the architectural firm that designed this landmark building? Give the first name appearing in the name of the firm as of June 2023."} 77 | {"task_id": "65da0822-a48a-4a68-bbad-8ed1b835a834", "Level": 2, "Final answer": "Santa Clara, Boston", "id": "level2-50", "web": "https://www.google.com/", "ques": "All of the individuals who formally held the position of United States secretary of homeland security prior to April 2019, excluding those who held the position in an acting capacity, have a bachelor's degree. Of the universities that these bachelor's degrees were from, which is the westernmost university and which is the easternmost university? Give them to me as a comma-separated list, I only want the name of the cities where the universities are located, with the westernmost city listed first."} 78 | {"task_id": "73c1b9fe-ee1d-4cf4-96ca-35c08f97b054", "Level": 2, "Final answer": "1954", "id": "level2-51", "web": "https://www.google.com/", "ques": "According to the USGS, in what year was the American Alligator first found west of Texas (not including Texas)?"} 79 | {"task_id": "e2d69698-bc99-4e85-9880-67eaccd66e6c", "Level": 2, "Final answer": "Michele Fitzgerald", "id": "level2-52", "web": "https://www.google.com/", "ques": "As of August 2023, who is the only winner of the US version of Survivor to be born in the month of May?"} 80 | {"task_id": "a56f1527-3abf-41d6-91f8-7296d6336c3f", "Level": 2, "Final answer": "185", "id": "level2-53", "web": "https://www.google.com/", "ques": "The cover of the August 2021 issue of Vogue shows a famous landmark in the background behind some trees. How tall is this monument in yards, rounded to the nearest yard? Give the number only."} 81 | {"task_id": "42d4198c-5895-4f0a-b0c0-424a66465d83", "Level": 2, "Final answer": "60", "id": "level2-54", "web": "https://www.google.com/", "ques": "I'm curious about how much information is available for popular video games before their release. Find the Wikipedia page for the 2019 game that won the British Academy Games Awards. How many revisions did that page have before the month listed as the game's release date on that Wikipedia page (as of the most recent entry from 2022)?"} 82 | {"task_id": "a26649c6-1cb2-470a-871e-6910c64c3e53", "Level": 2, "Final answer": "116", "id": "level2-55", "web": "https://www.google.com/", "ques": "What is the absolute difference in tens of thousands between the population of chinstrap penguins on the Wikipedia page for penguin species populations as of the end of 2018 and the population recorded in the Nature.com \"global population assessment of the Chinstrap penguin\" article from 2020, assuming two penguins per breeding pair?"} 83 | {"task_id": "d5141ca5-e7a0-469f-bf3e-e773507c86e2", "Level": 2, "Final answer": "19/02/2009", "id": "level2-56", "web": "https://www.google.com/", "ques": "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect? Answer using the format DD/MM/YYYY."} 84 | {"task_id": "1dcc160f-c187-48c2-b68e-319bd4354f3d", "Level": 2, "Final answer": "3", "id": "level2-57", "web": "https://www.google.com/", "ques": "According to Openreview.net, at the NeurIPS 2022 Conference, how many papers by an author named Yuri were accepted with a \"certain\" recommendation?"} 85 | {"task_id": "e0c10771-d627-4fd7-9694-05348e54ee36", "Level": 2, "Final answer": "234.9", "id": "level2-58", "web": "https://www.google.com/", "ques": "Take the gender split from the 2011 Bulgarian census about those who have completed tertiary education. Subtract the smaller number from the larger number, then return the difference in thousands of women. So if there were 30.1 thousand more men, you'd give \"30.1\""} 86 | {"task_id": "e29834fd-413a-455c-a33e-c3915b07401c", "Level": 2, "Final answer": "21", "id": "level2-59", "web": "https://www.google.com/", "ques": "I'd like to learn more about some popular reality television competition shows. As of the end of the 44th season of the American version of Survivor, how many more unique winners have there been compared to the number of winners of American Idol?"} 87 | {"task_id": "08c0b6e9-1b43-4c2e-ae55-4e3fce2c2715", "Level": 2, "Final answer": "orange, white", "id": "level2-60", "web": "https://www.google.com/", "ques": "In the film Goldfinger, what color was the object that James Bond concealed himself and his companion Pussy Galore at the end of the film? If there are multiple colors, put them in a comma-separated list in alphabetical order."} 88 | {"task_id": "db4fd70a-2d37-40ea-873f-9433dc5e301f", "Level": 2, "Final answer": "10", "id": "level2-61", "web": "https://www.google.com/", "ques": "As of May 2023, how many stops are between South Station and Windsor Gardens on MBTA\u2019s Franklin-Foxboro line (not included)?"} 89 | {"task_id": "853c8244-429e-46ca-89f2-addf40dfb2bd", "Level": 2, "Final answer": "11", "id": "level2-62", "web": "https://www.google.com/", "ques": "In the 2015 Metropolitan Museum of Art exhibition titled after the Chinese zodiac animal of 2015, how many of the \"twelve animals of the Chinese zodiac\" have a hand visible?"} 90 | {"task_id": "7a4a336d-dcfa-45a0-b014-824c7619e8de", "Level": 2, "Final answer": "1:41.614", "id": "level2-63", "web": "https://www.google.com/", "ques": "At the two-minute mark in the YouTube video uploaded by the channel \u201cGameGrumps\u201d on May 14, 2017 as part of their playthrough of the game Mario Kart 8 Deluxe, the shows\u2019 hosts are competing on one of the game\u2019s racetracks. What was the world record time for that track in the game\u2019s 150cc mode as of June 7, 2023? Express your answer in minutes and seconds, rounding the seconds to the nearest hundredth, e.g. 1:01.001."} 91 | -------------------------------------------------------------------------------- /analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 84, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import pandas as pd\n", 11 | "\n", 12 | "# Read the JSON file\n", 13 | "# with open(\"results/experiment_results.json\", \"r\") as f:\n", 14 | "# data = pd.read_json(f)\n", 15 | "\n", 16 | "import glob\n", 17 | "import os\n", 18 | "\n", 19 | "# Get all task result files\n", 20 | "task_files = glob.glob(\"results/*--*/task_result.json\")\n", 21 | "\n", 22 | "# Read each file into a list of dictionaries\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 85, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "results = []\n", 32 | "for file in task_files:\n", 33 | " with open(file, \"r\") as f:\n", 34 | " results.append(json.load(f))\n", 35 | "\n", 36 | "# Create DataFrame\n", 37 | "data = pd.DataFrame({\"all_tasks\": results})" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 86, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "590" 49 | ] 50 | }, 51 | "execution_count": 86, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "all_tasks = pd.DataFrame(list(data['all_tasks']))\n", 58 | "# all_tasks.head(3)\n", 59 | "len(all_tasks)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 87, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "586" 71 | ] 72 | }, 73 | "execution_count": 87, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "# exclude impossible tasks\n", 80 | "import json\n", 81 | "\n", 82 | "\n", 83 | "with open(\"impossible-tasks.json\", \"r\") as f:\n", 84 | " impossible_tasks = set(json.load(f))\n", 85 | "\n", 86 | "all_tasks = all_tasks[~all_tasks[\"task_id\"].isin(impossible_tasks)]\n", 87 | "# all_tasks[\"group\"] = all_tasks[\"task_id\"].str.split(\"--\").str[0]\n", 88 | "len(all_tasks)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 88, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "group\n", 100 | "Wolfram Alpha 46\n", 101 | "Cambridge Dictionary 43\n", 102 | "ArXiv 42\n", 103 | "ESPN 40\n", 104 | "GitHub 40\n", 105 | "Booking 40\n", 106 | "Google Search 40\n", 107 | "Coursera 40\n", 108 | "Allrecipes 39\n", 109 | "Google Flights 39\n", 110 | "Amazon 38\n", 111 | "Google Map 36\n", 112 | "Huggingface 35\n", 113 | "BBC News 35\n", 114 | "Apple 33\n", 115 | "Name: count, dtype: int64" 116 | ] 117 | }, 118 | "execution_count": 88, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "all_tasks[\"group\"] = all_tasks[\"task_id\"].str.split(\"--\").str[0]\n", 125 | "all_tasks[\"group\"].value_counts()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 89, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "" 137 | ] 138 | }, 139 | "execution_count": 89, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | }, 143 | { 144 | "data": { 145 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAoU0lEQVR4nO3df1TU153/8RfgMIg6UFRAViAmTaPUX4lWmNNuNlUEXU5qKmc3MZ7EZN1k66KbSOta9qhBbapL9hu72SWmZ49R96Q2W/c0ydEYFTXqpuIvEk/90eWox4S0OrCrB1ApwwD3+0cOsxlRYXBw7kyej3M8ZO7nzp33mzswr3xmhokxxhgBAABYJDbcBQAAANyIgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsM6AcBfQF52dnbp48aKGDBmimJiYcJcDAAB6wRijq1evKiMjQ7Gxtz9HEpEB5eLFi8rMzAx3GQAAoA8+//xzjRw58rZzIjKgDBkyRNIXDbpcrjBX03s+n0+7d+9WQUGBHA5HuMvpF9HeY7T3J9FjtKDHyBeN/TU3NyszM9P/OH47ERlQup7WcblcERdQEhMT5XK5oubOdqNo7zHa+5PoMVrQY+SL5v568/IMXiQLAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYJ0B4S7ARvf8+P1+WdcZZ1QxRRpbvkvejp4/ajoYn64tCul6AACEE2dQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDpBBZTy8nLFxMQE/Bs9erT/eGtrq0pKSjR06FANHjxYxcXFqq+vD1ijrq5ORUVFSkxMVGpqqpYsWaL29vbQdAMAAKLCgGCv8M1vflN79uz5vwUG/N8Sixcv1vvvv6+tW7cqKSlJCxcu1OzZs/Wb3/xGktTR0aGioiKlp6fr0KFDunTpkp5++mk5HA799Kc/DUE7AAAgGgQdUAYMGKD09PRu401NTdqwYYO2bNmiqVOnSpI2btyoMWPG6PDhw8rLy9Pu3bt15swZ7dmzR2lpaZo4caJWr16tpUuXqry8XPHx8XfeEQAAiHhBB5SzZ88qIyNDCQkJcrvdWrNmjbKyslRTUyOfz6f8/Hz/3NGjRysrK0vV1dXKy8tTdXW1xo0bp7S0NP+cwsJCLViwQKdPn9aDDz5409v0er3yer3+y83NzZIkn88nn88XbAs9csaZkK8pSc5YE/A1lPrj+9AXXXXYUk+oRXt/Ej1GC3qMfNHYXzC9xBhjev1o+cEHH+jatWt64IEHdOnSJa1cuVJ/+MMfdOrUKW3btk3PPvtsQJCQpClTpui73/2u/vEf/1HPP/+8PvvsM+3atct/vKWlRYMGDdKOHTs0c+bMm95ueXm5Vq5c2W18y5YtSkxM7G35AAAgjFpaWvTkk0+qqalJLpfrtnODOoPy5QAxfvx45ebmKjs7W7/61a80cODAvlXbC2VlZSotLfVfbm5uVmZmpgoKCnpssC/Glu/qeVIfOGONVk/u1PLjsfJ2xoR07VPlhSFdr698Pp+qqqo0ffp0ORyOcJcTctHen0SP0YIeI1809tf1DEhvBP0Uz5clJyfrG9/4hs6dO6fp06erra1NjY2NSk5O9s+pr6/3v2YlPT1dR48eDVij610+N3tdSxen0ymn09lt3OFw9MumeTtCGx66rd8ZE/LbsO3O2197Y4to70+ix2hBj5EvmvoLpo87+jso165d0/nz5zVixAhNmjRJDodDe/fu9R+vra1VXV2d3G63JMntduvkyZNqaGjwz6mqqpLL5VJOTs6dlAIAAKJIUGdQfvSjH+nRRx9Vdna2Ll68qJdeeklxcXGaM2eOkpKSNH/+fJWWliolJUUul0uLFi2S2+1WXl6eJKmgoEA5OTl66qmnVFFRIY/Ho2XLlqmkpOSmZ0gAAMBXU1AB5fe//73mzJmjy5cva/jw4frOd76jw4cPa/jw4ZKkdevWKTY2VsXFxfJ6vSosLNTrr7/uv35cXJy2b9+uBQsWyO12a9CgQZo3b55WrVoV2q4AAEBECyqgvP3227c9npCQoMrKSlVWVt5yTnZ2tnbs2BHMzQIAgK8YPosHAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6dxRQ1q5dq5iYGL344ov+sdbWVpWUlGjo0KEaPHiwiouLVV9fH3C9uro6FRUVKTExUampqVqyZIna29vvpBQAABBF+hxQjh07pp///OcaP358wPjixYu1bds2bd26VQcOHNDFixc1e/Zs//GOjg4VFRWpra1Nhw4d0ubNm7Vp0yatWLGi710AAICo0qeAcu3aNc2dO1f/9m//pq997Wv+8aamJm3YsEGvvvqqpk6dqkmTJmnjxo06dOiQDh8+LEnavXu3zpw5o7feeksTJ07UzJkztXr1alVWVqqtrS00XQEAgIjWp4BSUlKioqIi5efnB4zX1NTI5/MFjI8ePVpZWVmqrq6WJFVXV2vcuHFKS0vzzyksLFRzc7NOnz7dl3IAAECUGRDsFd5++219/PHHOnbsWLdjHo9H8fHxSk5ODhhPS0uTx+Pxz/lyOOk63nXsZrxer7xer/9yc3OzJMnn88nn8wXbQo+ccSbka0qSM9YEfA2l/vg+9EVXHbbUE2rR3p9Ej9GCHiNfNPYXTC9BBZTPP/9cL7zwgqqqqpSQkBB0YX21Zs0arVy5stv47t27lZiYGPLbq5gS8iUDrJ7cGfI1d+zYEfI170RVVVW4S+hX0d6fRI/Rgh4jXzT119LS0uu5QQWUmpoaNTQ06KGHHvKPdXR06ODBg/rXf/1X7dq1S21tbWpsbAw4i1JfX6/09HRJUnp6uo4ePRqwbte7fLrm3KisrEylpaX+y83NzcrMzFRBQYFcLlcwLfTK2PJdIV9T+uLMyerJnVp+PFbezpiQrn2qvDCk6/WVz+dTVVWVpk+fLofDEe5yQi7a+5PoMVrQY+SLxv66ngHpjaACyrRp03Ty5MmAsWeffVajR4/W0qVLlZmZKYfDob1796q4uFiSVFtbq7q6OrndbkmS2+3Wyy+/rIaGBqWmpkr6Ih26XC7l5OTc9HadTqecTme3cYfD0S+b5u0IbXjotn5nTMhvw7Y7b3/tjS2ivT+JHqMFPUa+aOovmD6CCihDhgzR2LFjA8YGDRqkoUOH+sfnz5+v0tJSpaSkyOVyadGiRXK73crLy5MkFRQUKCcnR0899ZQqKirk8Xi0bNkylZSU3DSEAACAr56gXyTbk3Xr1ik2NlbFxcXyer0qLCzU66+/7j8eFxen7du3a8GCBXK73Ro0aJDmzZunVatWhboUAAAQoe44oOzfvz/gckJCgiorK1VZWXnL62RnZ1v3ok4AAGAPPosHAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGCdoALK+vXrNX78eLlcLrlcLrndbn3wwQf+462trSopKdHQoUM1ePBgFRcXq76+PmCNuro6FRUVKTExUampqVqyZIna29tD0w0AAIgKQQWUkSNHau3ataqpqdHx48c1depUzZo1S6dPn5YkLV68WNu2bdPWrVt14MABXbx4UbNnz/Zfv6OjQ0VFRWpra9OhQ4e0efNmbdq0SStWrAhtVwAAIKINCGbyo48+GnD55Zdf1vr163X48GGNHDlSGzZs0JYtWzR16lRJ0saNGzVmzBgdPnxYeXl52r17t86cOaM9e/YoLS1NEydO1OrVq7V06VKVl5crPj4+dJ0BAICI1efXoHR0dOjtt9/W9evX5Xa7VVNTI5/Pp/z8fP+c0aNHKysrS9XV1ZKk6upqjRs3Tmlpaf45hYWFam5u9p+FAQAACOoMiiSdPHlSbrdbra2tGjx4sN555x3l5OToxIkTio+PV3JycsD8tLQ0eTweSZLH4wkIJ13Hu47ditfrldfr9V9ubm6WJPl8Pvl8vmBb6JEzzoR8TUlyxpqAr6HUH9+Hvuiqw5Z6Qi3a+5PoMVrQY+SLxv6C6SXogPLAAw/oxIkTampq0n/+539q3rx5OnDgQLDLBGXNmjVauXJlt/Hdu3crMTEx5LdXMSXkSwZYPbkz5Gvu2LEj5GveiaqqqnCX0K+ivT+JHqMFPUa+aOqvpaWl13ODDijx8fH6+te/LkmaNGmSjh07pn/+53/W448/rra2NjU2NgacRamvr1d6erokKT09XUePHg1Yr+tdPl1zbqasrEylpaX+y83NzcrMzFRBQYFcLlewLfRobPmukK8pfXHmZPXkTi0/HitvZ0xI1z5VXhjS9frK5/OpqqpK06dPl8PhCHc5IRft/Un0GC3oMfJFY39dz4D0RtAB5UadnZ3yer2aNGmSHA6H9u7dq+LiYklSbW2t6urq5Ha7JUlut1svv/yyGhoalJqaKumLZOhyuZSTk3PL23A6nXI6nd3GHQ5Hv2yatyO04aHb+p0xIb8N2+68/bU3toj2/iR6jBb0GPmiqb9g+ggqoJSVlWnmzJnKysrS1atXtWXLFu3fv1+7du1SUlKS5s+fr9LSUqWkpMjlcmnRokVyu93Ky8uTJBUUFCgnJ0dPPfWUKioq5PF4tGzZMpWUlNw0gAAAgK+moAJKQ0ODnn76aV26dElJSUkaP368du3apenTp0uS1q1bp9jYWBUXF8vr9aqwsFCvv/66//pxcXHavn27FixYILfbrUGDBmnevHlatWpVaLsCAAARLaiAsmHDhtseT0hIUGVlpSorK285Jzs727oXdAIAALvwWTwAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsEFVDWrFmjb33rWxoyZIhSU1P12GOPqba2NmBOa2urSkpKNHToUA0ePFjFxcWqr68PmFNXV6eioiIlJiYqNTVVS5YsUXt7+513AwAAokJQAeXAgQMqKSnR4cOHVVVVJZ/Pp4KCAl2/ft0/Z/Hixdq2bZu2bt2qAwcO6OLFi5o9e7b/eEdHh4qKitTW1qZDhw5p8+bN2rRpk1asWBG6rgAAQEQbEMzknTt3BlzetGmTUlNTVVNTo4cfflhNTU3asGGDtmzZoqlTp0qSNm7cqDFjxujw4cPKy8vT7t27debMGe3Zs0dpaWmaOHGiVq9eraVLl6q8vFzx8fGh6w4AAESkoALKjZqamiRJKSkpkqSamhr5fD7l5+f754wePVpZWVmqrq5WXl6eqqurNW7cOKWlpfnnFBYWasGCBTp9+rQefPDBbrfj9Xrl9Xr9l5ubmyVJPp9PPp/vTlq4KWecCfmakuSMNQFfQ6k/vg990VWHLfWEWrT3J9FjtKDHyBeN/QXTS58DSmdnp1588UV9+9vf1tixYyVJHo9H8fHxSk5ODpiblpYmj8fjn/PlcNJ1vOvYzaxZs0YrV67sNr57924lJib2tYVbqpgS8iUDrJ7cGfI1d+zYEfI170RVVVW4S+hX0d6fRI/Rgh4jXzT119LS0uu5fQ4oJSUlOnXqlD766KO+LtFrZWVlKi0t9V9ubm5WZmamCgoK5HK5Qn57Y8t3hXxN6YszJ6snd2r58Vh5O2NCuvap8sKQrtdXPp9PVVVVmj59uhwOR7jLCblo70+ix2hBj5EvGvvregakN/oUUBYuXKjt27fr4MGDGjlypH88PT1dbW1tamxsDDiLUl9fr/T0dP+co0ePBqzX9S6frjk3cjqdcjqd3cYdDke/bJq3I7Thodv6nTEhvw3b7rz9tTe2iPb+JHqMFvQY+aKpv2D6COpdPMYYLVy4UO+884727dunUaNGBRyfNGmSHA6H9u7d6x+rra1VXV2d3G63JMntduvkyZNqaGjwz6mqqpLL5VJOTk4w5QAAgCgV1BmUkpISbdmyRe+9956GDBnif81IUlKSBg4cqKSkJM2fP1+lpaVKSUmRy+XSokWL5Ha7lZeXJ0kqKChQTk6OnnrqKVVUVMjj8WjZsmUqKSm56VkSAADw1RNUQFm/fr0k6ZFHHgkY37hxo5555hlJ0rp16xQbG6vi4mJ5vV4VFhbq9ddf98+Ni4vT9u3btWDBArndbg0aNEjz5s3TqlWr7qwTAAAQNYIKKMb0/PbYhIQEVVZWqrKy8pZzsrOzrXvXCQAAsAefxQMAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsE7QAeXgwYN69NFHlZGRoZiYGL377rsBx40xWrFihUaMGKGBAwcqPz9fZ8+eDZhz5coVzZ07Vy6XS8nJyZo/f76uXbt2R40AAIDoEXRAuX79uiZMmKDKysqbHq+oqNBrr72mN954Q0eOHNGgQYNUWFio1tZW/5y5c+fq9OnTqqqq0vbt23Xw4EE9//zzfe8CAABElQHBXmHmzJmaOXPmTY8ZY/Szn/1My5Yt06xZsyRJ//7v/660tDS9++67euKJJ/S73/1OO3fu1LFjxzR58mRJ0r/8y7/oz//8z/VP//RPysjIuIN2AABANAg6oNzOhQsX5PF4lJ+f7x9LSkpSbm6uqqur9cQTT6i6ulrJycn+cCJJ+fn5io2N1ZEjR/T973+/27per1der9d/ubm5WZLk8/nk8/lC2YIkyRlnQr6mJDljTcDXUOqP70NfdNVhSz2hFu39SfQYLegx8kVjf8H0EtKA4vF4JElpaWkB42lpaf5jHo9HqampgUUMGKCUlBT/nButWbNGK1eu7Da+e/duJSYmhqL0ABVTQr5kgNWTO0O+5o4dO0K+5p2oqqoKdwn9Ktr7k+gxWtBj5Ium/lpaWno9N6QBpb+UlZWptLTUf7m5uVmZmZkqKCiQy+UK+e2NLd8V8jWlL86crJ7cqeXHY+XtjAnp2qfKC0O6Xl/5fD5VVVVp+vTpcjgc4S4n5KK9P4keowU9Rr5o7K/rGZDeCGlASU9PlyTV19drxIgR/vH6+npNnDjRP6ehoSHgeu3t7bpy5Yr/+jdyOp1yOp3dxh0OR79smrcjtOGh2/qdMSG/DdvuvP21N7aI9v4keowW9Bj5oqm/YPoI6d9BGTVqlNLT07V3717/WHNzs44cOSK32y1JcrvdamxsVE1NjX/Ovn371NnZqdzc3FCWAwAAIlTQZ1CuXbumc+fO+S9fuHBBJ06cUEpKirKysvTiiy/qJz/5ie6//36NGjVKy5cvV0ZGhh577DFJ0pgxYzRjxgw999xzeuONN+Tz+bRw4UI98cQTvIMHAABI6kNAOX78uL773e/6L3e9NmTevHnatGmT/v7v/17Xr1/X888/r8bGRn3nO9/Rzp07lZCQ4L/OL37xCy1cuFDTpk1TbGysiouL9dprr4WgHQAAEA2CDiiPPPKIjLn122RjYmK0atUqrVq16pZzUlJStGXLlmBvGgAAfEXwWTwAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYZEO4CEBr3/Pj9cJcgSXLGGVVMkcaW75K3I+a2cz9dW3SXqgIARBrOoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYJa0CprKzUPffco4SEBOXm5uro0aPhLAcAAFgibAHlP/7jP1RaWqqXXnpJH3/8sSZMmKDCwkI1NDSEqyQAAGCJAeG64VdffVXPPfecnn32WUnSG2+8offff19vvvmmfvzjH4erLOC2xpbvUsWUL756O2LCXU6vfLq2KNwlAEDQwhJQ2traVFNTo7KyMv9YbGys8vPzVV1d3W2+1+uV1+v1X25qapIkXblyRT6fL+T1DWi/HvI1JWlAp1FLS6cG+GLV0RkZD27BCqbHy5cv36WqQmeA73rE7WGw32efz6eWlhZdvnxZDoejn6oKL3qMDtHeYzT2d/XqVUmSMabHuWEJKP/7v/+rjo4OpaWlBYynpaXpv//7v7vNX7NmjVauXNltfNSoUf1WY395MtwF3AW97XHY/+vXMvpNpO1hpH6fAUSvq1evKikp6bZzwvYUTzDKyspUWlrqv9zZ2akrV65o6NChiomJjP+LlaTm5mZlZmbq888/l8vlCnc5/SLae4z2/iR6jBb0GPmisT9jjK5evaqMjIwe54YloAwbNkxxcXGqr68PGK+vr1d6enq3+U6nU06nM2AsOTm5P0vsVy6XK2rubLcS7T1Ge38SPUYLeox80dZfT2dOuoTlXTzx8fGaNGmS9u7d6x/r7OzU3r175Xa7w1ESAACwSNie4iktLdW8efM0efJkTZkyRT/72c90/fp1/7t6AADAV1fYAsrjjz+u//mf/9GKFSvk8Xg0ceJE7dy5s9sLZ6OJ0+nUSy+91O3pqmgS7T1Ge38SPUYLeox80d5fT2JMb97rAwAAcBfxWTwAAMA6BBQAAGAdAgoAALAOAQUAAFiHgBJia9as0be+9S0NGTJEqampeuyxx1RbWxsw55FHHlFMTEzAvx/84Adhqjh45eXl3eofPXq0/3hra6tKSko0dOhQDR48WMXFxd3+KJ/t7rnnnm49xsTEqKSkRFJk7uHBgwf16KOPKiMjQzExMXr33XcDjhtjtGLFCo0YMUIDBw5Ufn6+zp49GzDnypUrmjt3rlwul5KTkzV//nxdu3btLnZxa7frz+fzaenSpRo3bpwGDRqkjIwMPf3007p48WLAGjfb97Vr197lTm6tpz185plnutU/Y8aMgDk276HUc483+7mMiYnRK6+84p9j8z725jGiN79D6+rqVFRUpMTERKWmpmrJkiVqb2+/m630OwJKiB04cEAlJSU6fPiwqqqq5PP5VFBQoOvXAz+A8LnnntOlS5f8/yoqKsJUcd9885vfDKj/o48+8h9bvHixtm3bpq1bt+rAgQO6ePGiZs+eHcZqg3fs2LGA/qqqqiRJf/EXf+GfE2l7eP36dU2YMEGVlZU3PV5RUaHXXntNb7zxho4cOaJBgwapsLBQra2t/jlz587V6dOnVVVVpe3bt+vgwYN6/vnn71YLt3W7/lpaWvTxxx9r+fLl+vjjj/XrX/9atbW1+t73vtdt7qpVqwL2ddGiRXej/F7paQ8lacaMGQH1//KXvww4bvMeSj33+OXeLl26pDfffFMxMTEqLi4OmGfrPvbmMaKn36EdHR0qKipSW1ubDh06pM2bN2vTpk1asWJFOFrqPwb9qqGhwUgyBw4c8I/92Z/9mXnhhRfCV9Qdeumll8yECRNueqyxsdE4HA6zdetW/9jvfvc7I8lUV1ffpQpD74UXXjD33Xef6ezsNMZE/h5KMu+8847/cmdnp0lPTzevvPKKf6yxsdE4nU7zy1/+0hhjzJkzZ4wkc+zYMf+cDz74wMTExJg//OEPd6323rixv5s5evSokWQ+++wz/1h2drZZt25d/xYXIjfrcd68eWbWrFm3vE4k7aExvdvHWbNmmalTpwaMRdI+3vgY0ZvfoTt27DCxsbHG4/H456xfv964XC7j9XrvbgP9iDMo/aypqUmSlJKSEjD+i1/8QsOGDdPYsWNVVlamlpaWcJTXZ2fPnlVGRobuvfdezZ07V3V1dZKkmpoa+Xw+5efn++eOHj1aWVlZqq6uDle5d6StrU1vvfWW/uqv/irgwykjfQ+/7MKFC/J4PAH7lpSUpNzcXP++VVdXKzk5WZMnT/bPyc/PV2xsrI4cOXLXa75TTU1NiomJ6fa5XmvXrtXQoUP14IMP6pVXXom40+b79+9XamqqHnjgAS1YsECXL1/2H4u2Payvr9f777+v+fPndzsWKft442NEb36HVldXa9y4cQF/2LSwsFDNzc06ffr0Xay+f0XEpxlHqs7OTr344ov69re/rbFjx/rHn3zySWVnZysjI0O//e1vtXTpUtXW1urXv/51GKvtvdzcXG3atEkPPPCALl26pJUrV+pP//RPderUKXk8HsXHx3f7pZ+WliaPxxOegu/Qu+++q8bGRj3zzDP+sUjfwxt17c2Nf8n5y/vm8XiUmpoacHzAgAFKSUmJuL1tbW3V0qVLNWfOnIAPYfu7v/s7PfTQQ0pJSdGhQ4dUVlamS5cu6dVXXw1jtb03Y8YMzZ49W6NGjdL58+f1D//wD5o5c6aqq6sVFxcXVXsoSZs3b9aQIUO6PYUcKft4s8eI3vwO9Xg8N/1Z7ToWLQgo/aikpESnTp0KeH2GpIDne8eNG6cRI0Zo2rRpOn/+vO677767XWbQZs6c6f/v8ePHKzc3V9nZ2frVr36lgQMHhrGy/rFhwwbNnDkz4OPBI30Pv8p8Pp/+8i//UsYYrV+/PuBYaWmp/7/Hjx+v+Ph4/c3f/I3WrFkTEX9u/IknnvD/97hx4zR+/Hjdd9992r9/v6ZNmxbGyvrHm2++qblz5yohISFgPFL28VaPEfgCT/H0k4ULF2r79u368MMPNXLkyNvOzc3NlSSdO3fubpQWcsnJyfrGN76hc+fOKT09XW1tbWpsbAyYU19fr/T09PAUeAc+++wz7dmzR3/9139923mRvodde3PjOwW+vG/p6elqaGgION7e3q4rV65EzN52hZPPPvtMVVVVPX6EfW5urtrb2/Xpp5/enQJD7N5779WwYcP898to2MMu//Vf/6Xa2toefzYlO/fxVo8Rvfkdmp6eftOf1a5j0YKAEmLGGC1cuFDvvPOO9u3bp1GjRvV4nRMnTkiSRowY0c/V9Y9r167p/PnzGjFihCZNmiSHw6G9e/f6j9fW1qqurk5utzuMVfbNxo0blZqaqqKiotvOi/Q9HDVqlNLT0wP2rbm5WUeOHPHvm9vtVmNjo2pqavxz9u3bp87OTn9As1lXODl79qz27NmjoUOH9nidEydOKDY2ttvTIpHi97//vS5fvuy/X0b6Hn7Zhg0bNGnSJE2YMKHHuTbtY0+PEb35Hep2u3Xy5MmAsNkVuHNycu5OI3dDmF+kG3UWLFhgkpKSzP79+82lS5f8/1paWowxxpw7d86sWrXKHD9+3Fy4cMG899575t577zUPP/xwmCvvvR/+8Idm//795sKFC+Y3v/mNyc/PN8OGDTMNDQ3GGGN+8IMfmKysLLNv3z5z/Phx43a7jdvtDnPVwevo6DBZWVlm6dKlAeORuodXr141n3zyifnkk0+MJPPqq6+aTz75xP8ulrVr15rk5GTz3nvvmd/+9rdm1qxZZtSoUeaPf/yjf40ZM2aYBx980Bw5csR89NFH5v777zdz5swJV0sBbtdfW1ub+d73vmdGjhxpTpw4EfCz2fWuh0OHDpl169aZEydOmPPnz5u33nrLDB8+3Dz99NNh7uz/3K7Hq1evmh/96EemurraXLhwwezZs8c89NBD5v777zetra3+NWzeQ2N6vp8aY0xTU5NJTEw069ev73Z92/exp8cIY3r+Hdre3m7Gjh1rCgoKzIkTJ8zOnTvN8OHDTVlZWTha6jcElBCTdNN/GzduNMYYU1dXZx5++GGTkpJinE6n+frXv26WLFlimpqawlt4EB5//HEzYsQIEx8fb/7kT/7EPP744+bcuXP+43/84x/N3/7t35qvfe1rJjEx0Xz/+983ly5dCmPFfbNr1y4jydTW1gaMR+oefvjhhze9b86bN88Y88VbjZcvX27S0tKM0+k006ZN69b75cuXzZw5c8zgwYONy+Uyzz77rLl69WoYuunudv1duHDhlj+bH374oTHGmJqaGpObm2uSkpJMQkKCGTNmjPnpT38a8OAebrfrsaWlxRQUFJjhw4cbh8NhsrOzzXPPPRfwVlRj7N5DY3q+nxpjzM9//nMzcOBA09jY2O36tu9jT48RxvTud+inn35qZs6caQYOHGiGDRtmfvjDHxqfz3eXu+lfMcYY008nZwAAAPqE16AAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYJ3/D32yHalDIsYUAAAAAElFTkSuQmCC", 146 | "text/plain": [ 147 | "
" 148 | ] 149 | }, 150 | "metadata": {}, 151 | "output_type": "display_data" 152 | } 153 | ], 154 | "source": [ 155 | "# average duration of tasks, average duration per step\n", 156 | "# all_tasks[\"duration_seconds\"].mean(), all_tasks[\"duration_seconds\"].mean() / all_tasks[\"num_steps\"].mean()\n", 157 | "\n", 158 | "# histogram of duration PER STEP\n", 159 | "(all_tasks[\"duration_seconds\"] / all_tasks[\"num_steps\"]).hist()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Impossible tasks\n", 167 | "\n", 168 | "Some tasks are impossible to solve. For Apple doesn't show prices for certain products in the dataset, there are no recipes for chocolate chip cookies etc.\n", 169 | "\n", 170 | "A lot of tasks have dates in the past (kookings, flights), so we just changed the years from 2023 to 2024 or 2024 to 2025 respectively.\n", 171 | "\n", 172 | "Here is a full list of impossible tasks:" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 94, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "{'Allrecipes--16',\n", 184 | " 'Allrecipes--19',\n", 185 | " 'Allrecipes--23',\n", 186 | " 'Allrecipes--3',\n", 187 | " 'Allrecipes--30',\n", 188 | " 'Allrecipes--7',\n", 189 | " 'Amazon--16',\n", 190 | " 'Amazon--19',\n", 191 | " 'Amazon--4',\n", 192 | " 'Apple--1',\n", 193 | " 'Apple--14',\n", 194 | " 'Apple--16',\n", 195 | " 'Apple--2',\n", 196 | " 'Apple--20',\n", 197 | " 'Apple--37',\n", 198 | " 'Apple--41',\n", 199 | " 'Apple--42',\n", 200 | " 'Apple--7',\n", 201 | " 'Apple--9',\n", 202 | " 'ArXiv--11',\n", 203 | " 'BBC News--14',\n", 204 | " 'BBC News--16',\n", 205 | " 'BBC News--18',\n", 206 | " 'BBC News--2',\n", 207 | " 'BBC News--21',\n", 208 | " 'BBC News--33',\n", 209 | " 'BBC News--37',\n", 210 | " 'Booking--11',\n", 211 | " 'Booking--13',\n", 212 | " 'Booking--14',\n", 213 | " 'Booking--6',\n", 214 | " 'Coursera--17',\n", 215 | " 'Coursera--28',\n", 216 | " 'ESPN--19',\n", 217 | " 'ESPN--2',\n", 218 | " 'ESPN--21',\n", 219 | " 'ESPN--26',\n", 220 | " 'GitHub--22',\n", 221 | " 'Google Flights--0',\n", 222 | " 'Google Flights--20',\n", 223 | " 'Google Flights--7',\n", 224 | " 'Google Map--13',\n", 225 | " 'Google Map--18',\n", 226 | " 'Google Map--26',\n", 227 | " 'Google Search--15',\n", 228 | " 'Google Search--16',\n", 229 | " 'Google Search--22',\n", 230 | " 'Huggingface--1',\n", 231 | " 'Huggingface--10',\n", 232 | " 'Huggingface--20',\n", 233 | " 'Huggingface--21',\n", 234 | " 'Huggingface--22',\n", 235 | " 'Huggingface--23',\n", 236 | " 'Huggingface--32',\n", 237 | " 'Huggingface--6'}" 238 | ] 239 | }, 240 | "execution_count": 94, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "impossible_tasks" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "### Why tasks fail?\n", 254 | "\n", 255 | "As you can see, most of the tasks fail because the model just can't figure out what to do. WHY?" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 91, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAHqCAYAAADVi/1VAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAByGUlEQVR4nO3deXxMd/v/8feIZLKQRJDFLUJRu1KKlKJIYqnbki66iVa5q6Elrbbp3dpaDXp3r9JF0ZYulC6qCFq62EttbYpS7S0JpQS5jZGc3x/9Zn6mWUwimclJXs/HYx6PnHM+85nrzHXClWvOnGMxDMMQAAAAAAAA4EZVPB0AAAAAAAAAKh+aUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSQCn76quvZLFYtHjxYk+H4pLMzEzdeOONqlmzpiwWi1544QVPh+Q2+/btU2xsrIKCgmSxWPTxxx97OqRK43KPu+7du6t79+5lEltJDBs2TNWqVfN0GABQaVF/mYdZ6y+LxaJJkyY5lufNmyeLxaJDhw6V2msMGzZM9evXL7X5LtekSZNksVj0xx9/eDoUVGA0pWBKef8J+Pr66r///W++7d27d1fLli09EJn5jBs3TitXrlRycrLeeecd9e7du9CxZ86c0cSJE9WyZUsFBASoZs2aatOmjR544AEdOXLEMW758uVO/2mXVwkJCdq1a5emTp2qd955R+3bty/T1zt27JgeeOABNW3aVH5+fgoNDVWHDh30yCOP6MyZM2X62uVNcY67ksgroi71KE+NLQAo76i/Sg/1l3vqr0OHDhVaA3Tq1KnMXres5P0OXupRnhpbwKVU9XQAwOWw2WyaNm2aXn75ZU+HYlpr167VgAED9NBDDxU5zm63q2vXrvrpp5+UkJCgMWPG6MyZM9qzZ48WLlyoQYMGqU6dOpL+KopmzpxZrguj//3vf9qwYYP+/e9/a/To0WX+eidOnFD79u2VlZWlu+++W02bNtXx48e1c+dOzZo1S6NGjapUZ9q4etyV1ODBg9WoUSPH8pkzZzRq1CgNGjRIgwcPdqwPCwsrk9cHgIqM+uvyUX+5p/7Kc+utt6pv375O62rXrl2sOf73v/+palXP/vnctWtXvfPOO07r7rnnHnXo0EEjR450rKtMNSXMj6YUTK1NmzZ64403lJyc7PgPubI4e/asAgICLnueo0ePKjg4+JLjPv74Y23fvl0LFizQbbfd5rTt3LlzOn/+/GXH4k7Hjh2TJJf23VVF5WTOnDk6fPiwvv32W1177bVO27KysuTj41NqcZiBq8ddSbVu3VqtW7d2LP/xxx8aNWqUWrdurTvuuKPMXhcAKgPqL+qvknJ3/ZXn6quvvuz//319fS/r+aXhiiuu0BVXXOG07t5779UVV1xBfQPT4ut7MLXHHntMOTk5mjZtWpHj8k7dnTdvXr5tf/9+eN7Xfn7++WfdcccdCgoKUu3atfXEE0/IMAz99ttvGjBggAIDAxUeHq5nn322wNfMycnRY489pvDwcAUEBOif//ynfvvtt3zjNm3apN69eysoKEj+/v7q1q2bvv32W6cxeTHt3btXt912m2rUqKEuXboUuc+//PKLbrrpJoWEhMjf31+dOnXS559/7tied/qvYRiaOXOm43Tfwhw4cECS1Llz53zbfH19FRgYKOmv78LPnDlTkpxOI86Tm5urF154QS1atJCvr6/CwsL0r3/9S3/++afTnPXr19cNN9ygVatWqU2bNvL19VXz5s21ZMkSp3F2u12TJ09W48aN5evrq5o1a6pLly5KTU0tdF8mTZqkqKgoSdL48ePznea8fft29enTR4GBgapWrZp69uypjRs3Os2R9/6tW7dO9913n0JDQ1W3bt0i3z8vL68CTxUPDAx0KnTq16+vYcOG5RtX0HWUzp07p0mTJunKK6+Ur6+vIiIiNHjwYEe+pL/e8xdffFGtWrWSr6+vateurd69e2vr1q1Oc7377rtq166d/Pz8FBISoiFDhuQ7Zvft26f4+HiFh4fL19dXdevW1ZAhQ3Tq1CnHmNTUVHXp0kXBwcGqVq2amjRposcee8zpfSvouMs7zv+uLK7ZIEm//vqr7rvvPjVp0kR+fn6qWbOmbrrppnyvU5JjTJJ27Nih2rVrq3v37o6vZ27dulVxcXGqVauW/Pz81KBBA919992lul8AUNaovwpH/VW+6q9LOX/+vCZMmKB27dopKChIAQEBuu666/Tll1/mG/v3Y7YwX3zxha677joFBASoevXq6tevn/bs2ZNv3Mcff6yWLVvK19dXLVu21NKlS0u8Hxc7ceKEHnroIbVq1UrVqlVTYGCg+vTpox9++CHf2JdfflktWrSQv7+/atSoofbt22vhwoVFzv/rr7+qUaNGatmypTIzMyW5Vh8CheFMKZhagwYNNHToUL3xxht69NFHS/XTultuuUXNmjXTtGnT9Pnnn+upp55SSEiIXnvtNfXo0UPTp0/XggUL9NBDD+maa65R165dnZ4/depUWSwWPfLIIzp69KheeOEF9erVSzt27JCfn5+kv07d7tOnj9q1a6eJEyeqSpUqmjt3rnr06KGvv/5aHTp0cJrzpptuUuPGjfX000/LMIxCY8/MzNS1116r7Oxs3X///apZs6bmz5+vf/7zn1q8eLEGDRrkOP33zjvvVExMjIYOHVrk+5FXRLz99tt6/PHHCy2g/vWvf+nIkSNKTU3Nd3px3vZ58+bprrvu0v3336+DBw/qlVde0fbt2/Xtt9/K29vbMXbfvn265ZZbdO+99yohIUFz587VTTfdpBUrVigmJkbSXwVOSkqK49TlrKwsbd26Vd9//71jzN8NHjxYwcHBGjdunON07rzTnPfs2aPrrrtOgYGBevjhh+Xt7a3XXntN3bt317p169SxY0enue677z7Vrl1bEyZM0NmzZ4t8/3JycvTOO+8oISGhiHfadTk5Obrhhhu0Zs0aDRkyRA888IBOnz6t1NRU7d69Ww0bNpQkDR8+XPPmzVOfPn10zz336MKFC/r666+1ceNGx3Ucpk6dqieeeEI333yz7rnnHh07dkwvv/yyunbtqu3btys4OFjnz59XXFycbDabxowZo/DwcP33v//VsmXLdPLkSQUFBWnPnj264YYb1Lp1a02ZMkVWq1X79+93FPrFPe7K0pYtW/Tdd99pyJAhqlu3rg4dOqRZs2ape/fu2rt3r/z9/SWV7BjbsmWL4uLi1L59e33yySfy8/PT0aNHFRsbq9q1a+vRRx9VcHCwDh06lK/QB4DyjvqrYNRf5a/+ypOdnZ3vYt1BQUHKysrSm2++qVtvvVUjRozQ6dOnNWfOHMXFxWnz5s1q06bNJee+WF6dFxcXp+nTpys7O1uzZs1Sly5dtH37dkcTbtWqVYqPj1fz5s2VkpKi48eP66677rqsBlueX375RR9//LFuuukmNWjQQJmZmXrttdfUrVs37d271/H7+sYbb+j+++/XjTfeqAceeEDnzp3Tzp07tWnTpnxn5eU5cOCAevTooZCQEKWmpqpWrVou1YdAkQzAhObOnWtIMrZs2WIcOHDAqFq1qnH//fc7tnfr1s1o0aKFY/ngwYOGJGPu3Ln55pJkTJw40bE8ceJEQ5IxcuRIx7oLFy4YdevWNSwWizFt2jTH+j///NPw8/MzEhISHOu+/PJLQ5Lxj3/8w8jKynKs//DDDw1JxosvvmgYhmHk5uYajRs3NuLi4ozc3FzHuOzsbKNBgwZGTExMvphuvfVWl96fsWPHGpKMr7/+2rHu9OnTRoMGDYz69esbOTk5TvufmJh4yTmzs7ONJk2aGJKMqKgoY9iwYcacOXOMzMzMfGMTExONgv55+frrrw1JxoIFC5zWr1ixIt/6qKgoQ5Lx0UcfOdadOnXKiIiIMNq2betYd9VVVxn9+vW7ZPx/l3dMPPPMM07rBw4caPj4+BgHDhxwrDty5IhRvXp1o2vXro51ecdgly5djAsXLlzy9TIyMozatWsbkoymTZsa9957r7Fw4ULj5MmT+cZGRUU5HVN5unXrZnTr1s2x/NZbbxmSjOeeey7f2Lxjau3atYYkp9+Pv485dOiQ4eXlZUydOtVp+65du4yqVas61m/fvt2QZCxatKjQ/Xz++ecNScaxY8cKHWMYBR93ecf53+W91wcPHnSs+/t7cSnHjh3L97uenZ2db9yGDRsMScbbb7/tWOfKMZaQkGAEBAQYhmEY33zzjREYGGj069fPOHfunGPM0qVLHf9uAYAZUX8Vjfrr0txdf+W9XkGPL7/80rhw4YJhs9mcnvPnn38aYWFhxt133+20/u/H7N/rk9OnTxvBwcHGiBEjnJ6XkZFhBAUFOa1v06aNERER4VQHrlq1ypHn4ggICHD6XTh37pzTsZb3PlitVmPKlCmOdQMGDHD6fS1I3u/AsWPHjB9//NGoU6eOcc011xgnTpxwjHGlPgSKwtf3YHpXXHGF7rzzTr3++utKT08vtXnvuecex89eXl5q3769DMPQ8OHDHeuDg4PVpEkT/fLLL/meP3ToUFWvXt2xfOONNyoiIkLLly+X9NdXe/bt26fbbrtNx48f1x9//KE//vhDZ8+eVc+ePbV+/Xrl5uY6zXnvvfe6FPvy5cvVoUMHp1PMq1WrppEjR+rQoUPau3eva2/CRfz8/LRp0yaNHz9e0l+nTw8fPlwREREaM2aMbDbbJedYtGiRgoKCFBMT49jfP/74Q+3atVO1atXynSpdp04dDRo0yLEcGBiooUOHavv27crIyJD0Vw727Nmjffv2FXuf/i4nJ0erVq3SwIEDnb6vHxERodtuu03ffPONsrKynJ4zYsQIeXl5XXLusLAw/fDDD7r33nv1559/avbs2brtttsUGhqqJ598sshPXgvz0UcfqVatWhozZky+bXmfpH700UeyWCyaOHFioWOWLFmi3Nxc3XzzzU55CQ8PV+PGjR15yfuka+XKlcrOzi4wprxrRHzyySf5jt/yJu8Tc+mvryEcP35cjRo1UnBwsL7//nvHtuIcY19++aXi4uLUs2dPLVmyRFar1WkeSVq2bJnsdnvp7QgAeAD1V37UXyVTlvVXnpEjRyo1NdXpcdVVV8nLy8txXc/c3FydOHFCFy5cUPv27Z1qAVekpqbq5MmTuvXWW53eZy8vL3Xs2NHxPqenp2vHjh1KSEhwOosoJiZGzZs3L9ZrFsRqtapKlb/+zM/JydHx48cdl1P4e33z+++/a8uWLZecc/fu3erWrZvq16+v1atXq0aNGo5trtSHQFFoSqFCePzxx3XhwoVLXtugOOrVq+e0HBQUJF9fX9WqVSvf+r9/H1+SGjdu7LRssVjUqFEjx/Vq8v4TT0hIUO3atZ0eb775pmw2W77vYTdo0MCl2H/99Vc1adIk3/pmzZo5tpdEUFCQZsyYoUOHDunQoUOaM2eOmjRpoldeeUVPPvnkJZ+/b98+nTp1SqGhofn2+cyZMzp69KjT+EaNGuU7Tf3KK6+UJMf7OGXKFJ08eVJXXnmlWrVqpfHjx2vnzp0l2r9jx44pOzu70PcuNzc333UpXM2J9FdxNWvWLKWnpystLU0vvfSS49TzOXPmFDveAwcOqEmTJkXeCebAgQOqU6eOQkJCCh2zb98+GYahxo0b58vLjz/+6MhLgwYNlJSUpDfffFO1atVSXFycZs6c6XSc3nLLLercubPuuecehYWFaciQIfrwww/LZYPqf//7nyZMmKDIyEhZrVbVqlVLtWvX1smTJ532ydVj7Ny5c+rXr5/atm2rDz/8MN/F67t166b4+HhNnjxZtWrV0oABAzR37lyX/qAAgPKI+ssZ9Vf5rL+kv46LXr16OT3yGivz589X69atHdfGql27tj7//PNiXw8p79jq0aNHvvd51apVjvc57zj4+7EqqcD3oLhyc3P1/PPPq3Hjxk71zc6dO5326ZFHHlG1atXUoUMHNW7cWImJifmuq5anf//+ql69ulauXOm4jlkeV+pDoChcUwoVQt4dJ15//XU9+uij+bYX9v37nJycQucs6NOXwj6RKclZLnl/pD/zzDOFfl/977dzvfjMDk+LiorS3XffrUGDBumKK67QggUL9NRTTxX5nNzcXIWGhmrBggUFbi/urXmlv65RdODAAX3yySdatWqV3nzzTT3//POaPXu206etZaUkObFYLLryyit15ZVXql+/fmrcuLEWLFjgiLeo47U4nwq6Kjc3VxaLRV988UWB8198HD777LMaNmyY4/2+//77lZKSoo0bN6pu3bry8/PT+vXr9eWXX+rzzz/XihUr9MEHH6hHjx5atWpVkfGX5Pf0cowZM0Zz587V2LFjFR0draCgIFksFg0ZMsSpiebqMWa1WtW3b1998sknWrFihW644YZ8+7d48WJt3LhRn332mVauXKm7775bzz77rDZu3MjtmwGYDvWX+1F//aW0cvLuu+9q2LBhGjhwoMaPH6/Q0FB5eXkpJSXF6aYxrsg7tt555x2Fh4fn217Uh4il6emnn9YTTzyhu+++W08++aRCQkJUpUoVjR071qm+adasmdLS0rRs2TKtWLFCH330kV599VVNmDBBkydPdpozPj5e8+fP14IFC/Svf/0r32teqj4EikJTChXG448/rnfffVfTp0/Pty3vk5CTJ086rS/pJ1au+PvpzIZhaP/+/Y7b1OddhDowMFC9evUq1deOiopSWlpavvU//fSTY3tpqVGjhho2bKjdu3c71hVWhDZs2FCrV69W586dXSom9u/fL8MwnOb7+eefJcnpbi0hISG66667dNddd+nMmTPq2rWrJk2aVOyiqHbt2vL39y/0vatSpYoiIyOLNeelXHHFFapRo4bTVx9q1KiR71iV/jpeLz6tvWHDhtq0aZPsdrvTBUov1rBhQ61cuVInTpwo9Gyphg0byjAMNWjQwPFJaFFatWqlVq1a6fHHH9d3332nzp07a/bs2Y6iuEqVKurZs6d69uyp5557Tk8//bT+/e9/68svvyzyWL/49/TiW0WX1e/p4sWLlZCQ4HQHp3PnzhX43rtyjFksFi1YsEADBgzQTTfdpC+++CLf3RIlqVOnTurUqZOmTp2qhQsX6vbbb9f777/vliIeAEob9df/R/1lnvorz+LFi3XFFVdoyZIlTvtb0GUPLiXv2AoNDS3y2Mo7Dgr66mNB70FxLV68WNdff32+s/BPnjyZ74zDgIAA3XLLLbrlllt0/vx5DR48WFOnTlVycrLTnaGfeeYZVa1aVffdd5+qV69e4IXQL1UfAoXh63uoMBo2bKg77rhDr732muP77nkCAwNVq1YtrV+/3mn9q6++WmbxvP322zp9+rRjefHixUpPT1efPn0kSe3atVPDhg31n//8x3G7+IsdO3asxK/dt29fbd68WRs2bHCsO3v2rF5//XXVr1+/RN9X/+GHH/LdtUT6q7Dcu3ev0+nGAQEBkvIXoTfffLNycnIKPNX8woUL+cYfOXLE6fa4WVlZevvtt9WmTRvHJ1DHjx93ek61atXUqFGjEn0lysvLS7Gxsfrkk08cp6dLf91NZ+HCherSpUu+U5ZdtWnTpgLvDrN582YdP37c6f1r2LChNm7cqPPnzzvWLVu2LN+p6/Hx8frjjz/0yiuv5Js379Pj+Ph4GYaR7xOvi8cMHjxYXl5emjx5cr5PnQ3DcLzHWVlZunDhgtP2Vq1aqUqVKo73+8SJE/leJ++T6EvlJK+Yu/j39OzZs5o/f36RzyspLy+vfPv78ssv5/sEvzjHmI+Pj5YsWaJrrrlG/fv31+bNmx3b/vzzz3yv5+p7AwDlFfXX/0f9Vf7qL1deW3I+627Tpk1OOXRVXFycAgMD9fTTTxd47ci8YysiIkJt2rTR/Pnznb7ilpqaWqLrjv1dQfXNokWL9N///tdp3d9z6OPjo+bNm8swjHzxWywWvf7667rxxhuVkJCgTz/91LHNlfoQKApnSqFC+fe//6133nlHaWlpatGihdO2e+65R9OmTdM999yj9u3ba/369Y5PfcpCSEiIunTporvuukuZmZl64YUX1KhRI40YMULSX2eTvPnmm+rTp49atGihu+66S//4xz/03//+V19++aUCAwP12Weflei1H330Ub333nvq06eP7r//foWEhGj+/Pk6ePCgPvroI8fFD4sjNTVVEydO1D//+U916tRJ1apV0y+//KK33npLNptNkyZNcoxt166dJOn+++9XXFycvLy8NGTIEHXr1k3/+te/lJKSoh07dig2Nlbe3t7at2+fFi1apBdffFE33nijY54rr7xSw4cP15YtWxQWFqa33npLmZmZmjt3rmNM8+bN1b17d7Vr104hISHaunWrFi9erNGjR5fovXvqqaeUmpqqLl266L777lPVqlX12muvyWazacaMGSWaU/rrVO4FCxZo0KBBateunXx8fPTjjz/qrbfekq+vrx577DHH2HvuuUeLFy9W7969dfPNN+vAgQN69913HU2bPEOHDtXbb7+tpKQkbd68Wdddd53Onj2r1atX67777tOAAQN0/fXX684779RLL72kffv2qXfv3srNzdXXX3+t66+/XqNHj1bDhg311FNPKTk5WYcOHdLAgQNVvXp1HTx4UEuXLtXIkSP10EMPae3atRo9erRuuukmXXnllbpw4YLeeecdeXl5KT4+XtJf15hYv369+vXrp6ioKB09elSvvvqq6tat63Th14LExsaqXr16Gj58uMaPHy8vLy+99dZbql27tg4fPlzi974wN9xwg9555x0FBQWpefPm2rBhg1avXq2aNWs6jSvuMebn56dly5apR48e6tOnj9atW6eWLVtq/vz5evXVVzVo0CA1bNhQp0+f1htvvKHAwED17du31PcPANyF+usv1F/lr/66lBtuuEFLlizRoEGD1K9fPx08eFCzZ89W8+bNC2xaFiUwMFCzZs3SnXfeqauvvlpDhgxx1DCff/65Onfu7PggMSUlRf369VOXLl10991368SJE3r55ZfVokWLYr9uQfs0ZcoU3XXXXbr22mu1a9cuLViwwOlse+mvuis8PFydO3dWWFiYfvzxR73yyivq16+f080C8lSpUkXvvvuuBg4cqJtvvlnLly9Xjx49XKoPgSK59V5/QCm5+JbEf5eQkGBIyneL0+zsbGP48OFGUFCQUb16dePmm282jh49Wugtif9+S/uLb/l+sb/f/jjvlsTvvfeekZycbISGhhp+fn5Gv379jF9//TXf87dv324MHjzYqFmzpmG1Wo2oqCjj5ptvNtasWXPJmIpy4MAB48YbbzSCg4MNX19fo0OHDsayZcvyjZOLtyT+5ZdfjAkTJhidOnUyQkNDjapVqxq1a9c2+vXrZ6xdu9Zp7IULF4wxY8YYtWvXNiwWS77bE7/++utGu3btDD8/P6N69epGq1atjIcfftg4cuSIY0xUVJTRr18/Y+XKlUbr1q0Nq9VqNG3aNN/tZp966imjQ4cORnBwsOHn52c0bdrUmDp1qnH+/Pki96ewWxIbhmF8//33RlxcnFGtWjXD39/fuP76643vvvvOaUxRx2BBdu7caYwfP964+uqrjZCQEKNq1apGRESEcdNNNxnff/99vvHPPvus8Y9//MOwWq1G586dja1btxrdunUzunXr5jQuOzvb+Pe//200aNDA8Pb2NsLDw40bb7zR6ZbKFy5cMJ555hmjadOmho+Pj1G7dm2jT58+xrZt25zm+uijj4wuXboYAQEBRkBAgNG0aVMjMTHRSEtLMwzjr2Pg7rvvNho2bGj4+voaISEhxvXXX2+sXr3aMceaNWuMAQMGGHXq1DF8fHyMOnXqGLfeeqvx888/O71WYcfdtm3bjI4dOxo+Pj5GvXr1jOeeey7fLZcNwyjwvSjKsWPH8v2u//nnn8Zdd91l1KpVy6hWrZoRFxdn/PTTT0ZUVJTTrZVdOcYK+vfhjz/+MJo3b26Eh4cb+/btM77//nvj1ltvNerVq2dYrVYjNDTUuOGGG4ytW7e6vB8A4EnUX5dG/VW+6q+iXs8wDCM3N9d4+umnjaioKMNqtRpt27Y1li1bZiQkJBhRUVFOY/9+zBZUnxjGX8diXFycERQUZPj6+hoNGzY0hg0blu//+48++sho1qyZYbVajebNmxtLliwp8HUvJSAgwKluOXfunPHggw8aERERhp+fn9G5c2djw4YN+Wqn1157zejatavjd6Bhw4bG+PHjjVOnTjnGFPQ7kJ2dbXTr1s2oVq2asXHjRpfqQ6AoFsMowRUCAaCM1a9fXy1bttSyZcs8HQoAAEClQP0FwN24phQAAAAAAADcjqYUAAAAAAAA3I6mFAAAAAAAANyOa0oBAAAAAADA7ThTCgAAAAAAAG5HUwoAAAAAAABuV9XTAZQHubm5OnLkiKpXry6LxeLpcAAAQDlmGIZOnz6tOnXqqEqVyvP5HvUSAABwlav1Ek0pSUeOHFFkZKSnwwAAACby22+/qW7dup4Ow22olwAAQHFdql6iKSWpevXqkv56swIDA116jt1u16pVqxQbGytvb++yDA+lgHyZDzkzH3JmPuSsZLKyshQZGemoHyqLvP198803NXDgQI4ZE+J33rzInXmRO/Mid5fH1XqJppTkOAU9MDCwWE0pf39/BQYGcoCaAPkyH3JmPuTMfMjZ5alsX2HL21+OGfPid968yJ15kTvzInel41L1UuW5EAIAAAAAAADKDZpSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAVyLRp02SxWDR27FjHunPnzikxMVE1a9ZUtWrVFB8fr8zMTM8FCQAAIJpSAAAAFcaWLVv02muvqXXr1k7rx40bp88++0yLFi3SunXrdOTIEQ0ePNhDUQIAAPyFphQAAEAFcObMGd1+++164403VKNGDcf6U6dOac6cOXruuefUo0cPtWvXTnPnztV3332njRs3ejBiAABQ2dGUAgAAqAASExPVr18/9erVy2n9tm3bZLfbndY3bdpU9erV04YNG9wdJgAAgENVTwcAAACAy/P+++/r+++/15YtW/Jty8jIkI+Pj4KDg53Wh4WFKSMjo9A5bTabbDabYzkrK8vxs91uv/yg4XZ5eSN/5kPuzIvcmRe5uzyuvm80pdyk/qOfezqEEjs0rZ+nQwAAAIX47bff9MADDyg1NVW+vr6lNm9KSoomT55c4LbU1NRSex24H/kzL3JnXuTOvMhdyWRnZ7s0jqYUAACAiW3btk1Hjx7V1Vdf7ViXk5Oj9evX65VXXtHKlSt1/vx5nTx50ulsqczMTIWHhxc6b3JyspKSkhzLWVlZioyMlCTFxMTI29u79HcGZcputys1NZX8mRC5My9yZ16u5K7lpJVujqp07Z4UV2ZzX3yGdVFoSgEAAJhYz549tWvXLqd1d911l5o2bapHHnlEkZGR8vb21po1axQfHy9JSktL0+HDhxUdHV3ovFarVVartcBt3t7e/HFlYuTPvMideZE78yoqd7Yci5ujKV1leUy6OjdNKQAAABOrXr26WrZs6bQuICBANWvWdKwfPny4kpKSFBISosDAQI0ZM0bR0dHq1KmTJ0IGAACQRFMKAACgwnv++edVpUoVxcfHy2azKS4uTq+++qqnwwIAAJUcTSkAAIAK5quvvnJa9vX11cyZMzVz5kzPBAQAAFCAKp4OAAAAAAAAAJUPTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4nUebUrNmzVLr1q0VGBiowMBARUdH64svvnBs7969uywWi9Pj3nvvdZrj8OHD6tevn/z9/RUaGqrx48frwoUL7t4VAAAAAAAAFENVT7543bp1NW3aNDVu3FiGYWj+/PkaMGCAtm/frhYtWkiSRowYoSlTpjie4+/v7/g5JydH/fr1U3h4uL777julp6dr6NCh8vb21tNPP+32/QEAAAAAAIBrPNqU6t+/v9Py1KlTNWvWLG3cuNHRlPL391d4eHiBz1+1apX27t2r1atXKywsTG3atNGTTz6pRx55RJMmTZKPj0+Z7wMAAAAAAACKz6NNqYvl5ORo0aJFOnv2rKKjox3rFyxYoHfffVfh4eHq37+/nnjiCcfZUhs2bFCrVq0UFhbmGB8XF6dRo0Zpz549atu2bYGvZbPZZLPZHMtZWVmSJLvdLrvd7lK8eeNcHW/1MlwaVx65uo/lWXHzBc8jZ+ZDzsyHnJUM7xcAAEDp8HhTateuXYqOjta5c+dUrVo1LV26VM2bN5ck3XbbbYqKilKdOnW0c+dOPfLII0pLS9OSJUskSRkZGU4NKUmO5YyMjEJfMyUlRZMnT863ftWqVU5fD3RFamqqS+NmdCjWtOXK8uXLPR1CqXE1Xyg/yJn5kDPzIWfFk52d7ekQAAAAKgSPN6WaNGmiHTt26NSpU1q8eLESEhK0bt06NW/eXCNHjnSMa9WqlSIiItSzZ08dOHBADRs2LPFrJicnKykpybGclZWlyMhIxcbGKjAw0KU57Ha7UlNTFRMTI29v70uObzlpZYnj9bTdk+I8HcJlK26+4HnkzHzImfmQs5LJO8MaAAAAl8fjTSkfHx81atRIktSuXTtt2bJFL774ol577bV8Yzt27ChJ2r9/vxo2bKjw8HBt3rzZaUxmZqYkFXodKkmyWq2yWq351nt7exe7KHf1ObYcS7HmLU8q0h8qJckxPIucmQ85Mx9yVjy8VwAAAKWjiqcD+Lvc3Fyn6z1dbMeOHZKkiIgISVJ0dLR27dqlo0ePOsakpqYqMDDQ8RVAAAAAAAAAlD8ePVMqOTlZffr0Ub169XT69GktXLhQX331lVauXKkDBw5o4cKF6tu3r2rWrKmdO3dq3Lhx6tq1q1q3bi1Jio2NVfPmzXXnnXdqxowZysjI0OOPP67ExMQCz4QCAAAAAABA+eDRptTRo0c1dOhQpaenKygoSK1bt9bKlSsVExOj3377TatXr9YLL7ygs2fPKjIyUvHx8Xr88ccdz/fy8tKyZcs0atQoRUdHKyAgQAkJCZoyZYoH9woAAAAAAACX4tGm1Jw5cwrdFhkZqXXr1l1yjqioqAp1dzgAAAAAAIDKoNxdUwoAAAAAAAAVH00pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAACTmzVrllq3bq3AwEAFBgYqOjpaX3zxhWN79+7dZbFYnB733nuvByMGAACQqno6AAAAAFyeunXratq0aWrcuLEMw9D8+fM1YMAAbd++XS1atJAkjRgxQlOmTHE8x9/f31PhAgAASKIpBQAAYHr9+/d3Wp46dapmzZqljRs3OppS/v7+Cg8P90R4AAAABeLrewAAABVITk6O3n//fZ09e1bR0dGO9QsWLFCtWrXUsmVLJScnKzs724NRAgAAcKYUAABAhbBr1y5FR0fr3LlzqlatmpYuXarmzZtLkm677TZFRUWpTp062rlzpx555BGlpaVpyZIlhc5ns9lks9kcy1lZWY6f7XZ72e0Iykxe3sif+ZA78yJ35uVK7qxehrvCKRNleVy6OjdNKQAAgAqgSZMm2rFjh06dOqXFixcrISFB69atU/PmzTVy5EjHuFatWikiIkI9e/bUgQMH1LBhwwLnS0lJ0eTJkwvclpqaWib7APcgf+ZF7syL3JlXUbmb0cGNgZSB5cuXl9ncrp6RTVMKAACgAvDx8VGjRo0kSe3atdOWLVv04osv6rXXXss3tmPHjpKk/fv3F9qUSk5OVlJSkmM5KytLkZGRkqSYmBh5e3uX9i6gjNntdqWmppI/EyJ35kXuzMuV3LWctNLNUZWu3ZPiymzui8+wLgpNKQAAgAooNzfX6et3F9uxY4ckKSIiotDnW61WWa3WArd5e3vzx5WJkT/zInfmRe7Mq6jc2XIsbo6mdJXlMenq3DSlAAAATC45OVl9+vRRvXr1dPr0aS1cuFBfffWVVq5cqQMHDmjhwoXq27evatasqZ07d2rcuHHq2rWrWrdu7enQAQBAJUZTCgAAwOSOHj2qoUOHKj09XUFBQWrdurVWrlypmJgY/fbbb1q9erVeeOEFnT17VpGRkYqPj9fjjz/u6bABAEAlR1MKAADA5ObMmVPotsjISK1bt86N0QAAALimiqcDAAAAAAAAQOVDUwoAAAAAAABuR1MKAAAAAAAAbkdTCgAAAAAAAG5HUwoAAAAAAABuR1MKAAAAAAAAbkdTCgAAAAAAAG5HUwoAAAAAAABuR1MKAAAAAAAAbkdTCgAAAAAAAG5HUwoAAAAAAABuR1MKAAAAAAAAbkdTCgAAAAAAAG5HUwoAAAAAAABuR1MKAAAAAAAAbufRptSsWbPUunVrBQYGKjAwUNHR0friiy8c28+dO6fExETVrFlT1apVU3x8vDIzM53mOHz4sPr16yd/f3+FhoZq/PjxunDhgrt3BQAAAAAAAMXg0aZU3bp1NW3aNG3btk1bt25Vjx49NGDAAO3Zs0eSNG7cOH322WdatGiR1q1bpyNHjmjw4MGO5+fk5Khfv346f/68vvvuO82fP1/z5s3ThAkTPLVLAAAAAAAAcEFVT754//79nZanTp2qWbNmaePGjapbt67mzJmjhQsXqkePHpKkuXPnqlmzZtq4caM6deqkVatWae/evVq9erXCwsLUpk0bPfnkk3rkkUc0adIk+fj4eGK3AAAAAAAAcAkebUpdLCcnR4sWLdLZs2cVHR2tbdu2yW63q1evXo4xTZs2Vb169bRhwwZ16tRJGzZsUKtWrRQWFuYYExcXp1GjRmnPnj1q27Ztga9ls9lks9kcy1lZWZIku90uu93uUrx541wdb/UyXBpXHrm6j+VZcfMFzyNn5kPOzIeclQzvFwAAQOnweFNq165dio6O1rlz51StWjUtXbpUzZs3144dO+Tj46Pg4GCn8WFhYcrIyJAkZWRkODWk8rbnbStMSkqKJk+enG/9qlWr5O/vX6z4U1NTXRo3o0Oxpi1Xli9f7ukQSo2r+UL5Qc7Mh5yZDzkrnuzsbE+HAAAAUCF4vCnVpEkT7dixQ6dOndLixYuVkJCgdevWlelrJicnKykpybGclZWlyMhIxcbGKjAw0KU57Ha7UlNTFRMTI29v70uObzlpZYnj9bTdk+I8HcJlK26+4HnkzHzImfmQs5LJO8MaAAAAl8fjTSkfHx81atRIktSuXTtt2bJFL774om655RadP39eJ0+edDpbKjMzU+Hh4ZKk8PBwbd682Wm+vLvz5Y0piNVqldVqzbfe29u72EW5q8+x5ViKNW95UpH+UClJjuFZ5Mx8yJn5kLPi4b0CAAAoHR69+15BcnNzZbPZ1K5dO3l7e2vNmjWObWlpaTp8+LCio6MlSdHR0dq1a5eOHj3qGJOamqrAwEA1b97c7bEDAAAAAADANR49Uyo5OVl9+vRRvXr1dPr0aS1cuFBfffWVVq5cqaCgIA0fPlxJSUkKCQlRYGCgxowZo+joaHXq1EmSFBsbq+bNm+vOO+/UjBkzlJGRoccff1yJiYkFngkFAAAAAACA8sGjTamjR49q6NChSk9PV1BQkFq3bq2VK1cqJiZGkvT888+rSpUqio+Pl81mU1xcnF599VXH8728vLRs2TKNGjVK0dHRCggIUEJCgqZMmeKpXQIAAAAAAIALPNqUmjNnTpHbfX19NXPmTM2cObPQMVFRURXq7nAAAAAAAACVQbm7phQAAAAAAAAqPppSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAACY3a9YstW7dWoGBgQoMDFR0dLS++OILx/Zz584pMTFRNWvWVLVq1RQfH6/MzEwPRgwAAEBTCgAAwPTq1q2radOmadu2bdq6dat69OihAQMGaM+ePZKkcePG6bPPPtOiRYu0bt06HTlyRIMHD/Zw1AAAoLKr6ukAAAAAcHn69+/vtDx16lTNmjVLGzduVN26dTVnzhwtXLhQPXr0kCTNnTtXzZo108aNG9WpUydPhAwAAMCZUgAAABVJTk6O3n//fZ09e1bR0dHatm2b7Ha7evXq5RjTtGlT1atXTxs2bPBgpAAAoLLjTCkAAIAKYNeuXYqOjta5c+dUrVo1LV26VM2bN9eOHTvk4+Oj4OBgp/FhYWHKyMgodD6bzSabzeZYzsrKcvxst9tLPX6Uvby8kT/zIXfmRe7My5XcWb0Md4VTJsryuHR1bppSAAAAFUCTJk20Y8cOnTp1SosXL1ZCQoLWrVtX4vlSUlI0efLkArelpqaWeF54HvkzL3JnXuTOvIrK3YwObgykDCxfvrzM5s7OznZpHE0pAACACsDHx0eNGjWSJLVr105btmzRiy++qFtuuUXnz5/XyZMnnc6WyszMVHh4eKHzJScnKykpybGclZWlyMhISVJMTIy8vb3LZkdQZux2u1JTU8mfCZE78yJ35uVK7lpOWunmqErX7klxZTb3xWdYF4WmFAAAQAWUm5srm82mdu3aydvbW2vWrFF8fLwkKS0tTYcPH1Z0dHShz7darbJarQVu8/b25o8rEyN/5kXuzIvcmVdRubPlWNwcTekqy2PS1blpSgEAAJhccnKy+vTpo3r16un06dNauHChvvrqK61cuVJBQUEaPny4kpKSFBISosDAQI0ZM0bR0dHceQ8AAHgUTSkAAACTO3r0qIYOHar09HQFBQWpdevWWrlypWJiYiRJzz//vKpUqaL4+HjZbDbFxcXp1Vdf9XDUAACgsqMpBQAAYHJz5swpcruvr69mzpypmTNnuikiAACAS6vi6QAAAAAAAABQ+dCUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA23m0KZWSkqJrrrlG1atXV2hoqAYOHKi0tDSnMd27d5fFYnF63HvvvU5jDh8+rH79+snf31+hoaEaP368Lly44M5dAQAAAAAAQDFU9eSLr1u3TomJibrmmmt04cIFPfbYY4qNjdXevXsVEBDgGDdixAhNmTLFsezv7+/4OScnR/369VN4eLi+++47paena+jQofL29tbTTz/t1v0BAAAAAACAazzalFqxYoXT8rx58xQaGqpt27apa9eujvX+/v4KDw8vcI5Vq1Zp7969Wr16tcLCwtSmTRs9+eSTeuSRRzRp0iT5+PiU6T4AAAAAAACg+DzalPq7U6dOSZJCQkKc1i9YsEDvvvuuwsPD1b9/fz3xxBOOs6U2bNigVq1aKSwszDE+Li5Oo0aN0p49e9S2bdt8r2Oz2WSz2RzLWVlZkiS73S673e5SrHnjXB1v9TJcGlceubqP5Vlx8wXPI2fmQ87Mh5yVDO8XAABA6Sg3Tanc3FyNHTtWnTt3VsuWLR3rb7vtNkVFRalOnTrauXOnHnnkEaWlpWnJkiWSpIyMDKeGlCTHckZGRoGvlZKSosmTJ+dbv2rVKqevBroiNTXVpXEzOhRr2nJl+fLlng6h1LiaL5Qf5Mx8yJn5kLPiyc7O9nQIAAAAFUK5aUolJiZq9+7d+uabb5zWjxw50vFzq1atFBERoZ49e+rAgQNq2LBhiV4rOTlZSUlJjuWsrCxFRkYqNjZWgYGBLs1ht9uVmpqqmJgYeXt7X3J8y0krSxRrebB7UpynQ7hsxc0XPI+cmQ85Mx9yVjJ5Z1gDAADg8pSLptTo0aO1bNkyrV+/XnXr1i1ybMeOHSVJ+/fvV8OGDRUeHq7Nmzc7jcnMzJSkQq9DZbVaZbVa86339vYudlHu6nNsOZZizVueVKQ/VEqSY3gWOTMfcmY+5Kx4eK8AAABKRxVPvrhhGBo9erSWLl2qtWvXqkGDBpd8zo4dOyRJERERkqTo6Gjt2rVLR48edYxJTU1VYGCgmjdvXiZxAwAAAAAA4PJ49EypxMRELVy4UJ988omqV6/uuAZUUFCQ/Pz8dODAAS1cuFB9+/ZVzZo1tXPnTo0bN05du3ZV69atJUmxsbFq3ry57rzzTs2YMUMZGRl6/PHHlZiYWODZUAAAAAAAAPA8j54pNWvWLJ06dUrdu3dXRESE4/HBBx9Iknx8fLR69WrFxsaqadOmevDBBxUfH6/PPvvMMYeXl5eWLVsmLy8vRUdH64477tDQoUM1ZcoUT+0WAAAAAAAALsGjZ0oZhlHk9sjISK1bt+6S80RFRVWoO8QBAAAAAABUdB49UwoAAAAAAACVE00pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAJNLSUnRNddco+rVqys0NFQDBw5UWlqa05ju3bvLYrE4Pe69914PRQwAAEBTCgAAwPTWrVunxMREbdy4UampqbLb7YqNjdXZs2edxo0YMULp6emOx4wZMzwUMQAAgFTV0wEAAADg8qxYscJped68eQoNDdW2bdvUtWtXx3p/f3+Fh4e7OzwAAIAC0ZQCAACoYE6dOiVJCgkJcVq/YMECvfvuuwoPD1f//v31xBNPyN/fv8A5bDabbDabYzkrK8vxs91uL4OoUdby8kb+zIfcmRe5My9Xcmf1MtwVTpkoy+PS1blpSgEAAFQgubm5Gjt2rDp37qyWLVs61t92222KiopSnTp1tHPnTj3yyCNKS0vTkiVLCpwnJSVFkydPLnBbampqmcQO9yB/5kXuzIvcmVdRuZvRwY2BlIHly5eX2dzZ2dkujaMpBQAAUIEkJiZq9+7d+uabb5zWjxw50vFzq1atFBERoZ49e+rAgQNq2LBhvnmSk5OVlJTkWM7KylJkZKQkKSYmRt7e3mW0Bygrdrtdqamp5M+EyJ15kTvzciV3LSetdHNUpWv3pLgym/viM6yLQlMKAACgghg9erSWLVum9evXq27dukWO7dixoyRp//79BTalrFarrFZrgc/19vbmjysTI3/mRe7Mi9yZV1G5s+VY3BxN6SrLY9LVuWlKAQAAmJxhGBozZoyWLl2qr776Sg0aNLjkc3bs2CFJioiIKOPoAAAACkZTCgAAwOQSExO1cOFCffLJJ6pevboyMjIkSUFBQfLz89OBAwe0cOFC9e3bVzVr1tTOnTs1btw4de3aVa1bt/Zw9AAAoLKqUpIn/fLLL6UdBwAAQKVTWjXVrFmzdOrUKXXv3l0RERGOxwcffCBJ8vHx0erVqxUbG6umTZvqwQcfVHx8vD777LNSeX0AAICSKNGZUo0aNVK3bt00fPhw3XjjjfL19S3tuAAAACq80qqpDKPoW1JHRkZq3bp1JZobAACgrJToTKnvv/9erVu3VlJSksLDw/Wvf/1LmzdvLu3YAAAAKjRqKgAAUJmVqCnVpk0bvfjiizpy5Ijeeustpaenq0uXLmrZsqWee+45HTt2rLTjBAAAqHCoqQAAQGVWoqZUnqpVq2rw4MFatGiRpk+frv379+uhhx5SZGSkhg4dqvT09NKKEwAAoMKipgIAAJXRZTWltm7dqvvuu08RERF67rnn9NBDD+nAgQNKTU3VkSNHNGDAgNKKEwAAoMKipgIAAJVRiS50/txzz2nu3LlKS0tT37599fbbb6tv376qUuWvHleDBg00b9481a9fvzRjBQAAqFCoqQAAQGVWoqbUrFmzdPfdd2vYsGGKiIgocExoaKjmzJlzWcEBAABUZNRUAACgMitRU2rfvn2XHOPj46OEhISSTA8AAFApUFMBAIDKrETXlJo7d64WLVqUb/2iRYs0f/78yw4KAACgMqCmAgAAlVmJmlIpKSmqVatWvvWhoaF6+umnLzsoAACAyoCaCgAAVGYlakodPnxYDRo0yLc+KipKhw8fvuygAAAAKgNqKgAAUJmVqCkVGhqqnTt35lv/ww8/qGbNmpcdFAAAQGVATQUAACqzEjWlbr31Vt1///368ssvlZOTo5ycHK1du1YPPPCAhgwZUtoxAgAAVEjUVAAAoDIr0d33nnzySR06dEg9e/ZU1ap/TZGbm6uhQ4dy/QMAAAAXUVMBAIDKrERNKR8fH33wwQd68skn9cMPP8jPz0+tWrVSVFRUaccHAABQYVFTAQCAyqxETak8V155pa688srSigUAAKBSoqYCAACVUYmaUjk5OZo3b57WrFmjo0ePKjc312n72rVrSyU4AACAioyaCgAAVGYlako98MADmjdvnvr166eWLVvKYrGUdlwAAAAVHjUVAACozErUlHr//ff14Ycfqm/fvqUdDwAAQKVBTQUAACqzKiV5ko+Pjxo1alTasQAAAFQq1FQAAKAyK1FT6sEHH9SLL74owzBKOx4AAIBKg5oKAABUZiX6+t4333yjL7/8Ul988YVatGghb29vp+1LliwpleAAAAAqMmoqAABQmZWoKRUcHKxBgwaVdiwAAACVCjUVAACozErUlJo7d26pvHhKSoqWLFmin376SX5+frr22ms1ffp0NWnSxDHm3LlzevDBB/X+++/LZrMpLi5Or776qsLCwhxjDh8+rFGjRunLL79UtWrVlJCQoJSUFFWtWqLdAwAAcIvSqqkAAADMqETXlJKkCxcuaPXq1Xrttdd0+vRpSdKRI0d05swZl+dYt26dEhMTtXHjRqWmpsputys2NlZnz551jBk3bpw+++wzLVq0SOvWrdORI0c0ePBgx/acnBz169dP58+f13fffaf58+dr3rx5mjBhQkl3DQAAwG1Ko6YCAAAwoxKdSvTrr7+qd+/eOnz4sGw2m2JiYlS9enVNnz5dNptNs2fPdmmeFStWOC3PmzdPoaGh2rZtm7p27apTp05pzpw5WrhwoXr06CHpr08UmzVrpo0bN6pTp05atWqV9u7dq9WrVyssLExt2rTRk08+qUceeUSTJk2Sj49PSXYRAACgzJVWTQUAAGBGJWpKPfDAA2rfvr1++OEH1axZ07F+0KBBGjFiRImDOXXqlCQpJCREkrRt2zbZ7Xb16tXLMaZp06aqV6+eNmzYoE6dOmnDhg1q1aqV09f54uLiNGrUKO3Zs0dt27bN9zo2m002m82xnJWVJUmy2+2y2+0uxZo3ztXxVi/z3lXH1X0sz4qbL3geOTMfcmY+5KxkSvP9KquaCgAAwAxK1JT6+uuv9d133+U7C6l+/fr673//W6JAcnNzNXbsWHXu3FktW7aUJGVkZMjHx0fBwcFOY8PCwpSRkeEYc3FDKm973raCpKSkaPLkyfnWr1q1Sv7+/sWKOzU11aVxMzoUa9pyZfny5Z4OodS4mi+UH+TMfMiZ+ZCz4snOzi61ucqipgIAADCLEjWlcnNzlZOTk2/977//rurVq5cokMTERO3evVvffPNNiZ5fHMnJyUpKSnIsZ2VlKTIyUrGxsQoMDHRpDrvdrtTUVMXExOS7fXNBWk5aWeJ4PW33pDhPh3DZipsveB45Mx9yZj7krGTyzrAuDWVRUwEAAJhFiZpSsbGxeuGFF/T6669LkiwWi86cOaOJEyeqb9++xZ5v9OjRWrZsmdavX6+6des61oeHh+v8+fM6efKk09lSmZmZCg8Pd4zZvHmz03yZmZmObQWxWq2yWq351nt7exe7KHf1ObYcS7HmLU8q0h8qJckxPIucmQ85Mx9yVjyl+V6Vdk0FAABgJiW6+96zzz6rb7/9Vs2bN9e5c+d02223OU4znz59usvzGIah0aNHa+nSpVq7dq0aNGjgtL1du3by9vbWmjVrHOvS0tJ0+PBhRUdHS5Kio6O1a9cuHT161DEmNTVVgYGBat68eUl2DwAAwC1Kq6YCAAAwoxKdKVW3bl398MMPev/997Vz506dOXNGw4cP1+233y4/Pz+X50lMTNTChQv1ySefqHr16o5rQAUFBcnPz09BQUEaPny4kpKSFBISosDAQI0ZM0bR0dHq1KmTpL8+YWzevLnuvPNOzZgxQxkZGXr88ceVmJhY4NlQAAAA5UVp1VQAAABmVKKmlCRVrVpVd9xxx2W9+KxZsyRJ3bt3d1o/d+5cDRs2TJL0/PPPq0qVKoqPj5fNZlNcXJxeffVVx1gvLy8tW7ZMo0aNUnR0tAICApSQkKApU6ZcVmwAAADuUBo1FQAAgBmVqCn19ttvF7l96NChLs1jGMYlx/j6+mrmzJmaOXNmoWOioqIq1B3iAABA5VBaNRUAAIAZlagp9cADDzgt2+12ZWdny8fHR/7+/hRQAAAALqCmAgAAlVmJLnT+559/Oj3OnDmjtLQ0denSRe+9915pxwgAAFAhUVMBAIDKrERNqYI0btxY06ZNy/eJHwAAAFxHTQUAACqLUmtKSX9dqPPIkSOlOSUAAEClQ00FAAAqgxJdU+rTTz91WjYMQ+np6XrllVfUuXPnUgkMAACgoiutmiolJUVLlizRTz/9JD8/P1177bWaPn26mjRp4hhz7tw5Pfjgg3r//fed7mgcFhZWavsDAABQHCVqSg0cONBp2WKxqHbt2urRo4eeffbZ0ogLAACgwiutmmrdunVKTEzUNddcowsXLuixxx5TbGys9u7dq4CAAEnSuHHj9Pnnn2vRokUKCgrS6NGjNXjwYH377beluUsAAAAuK1FTKjc3t7TjAAAAqHRKq6ZasWKF0/K8efMUGhqqbdu2qWvXrjp16pTmzJmjhQsXqkePHpKkuXPnqlmzZtq4caM6depUKnEAAAAUR4maUgAAACi/Tp06JUkKCQmRJG3btk12u129evVyjGnatKnq1aunDRs2FNiUstlsstlsjuWsrCzHz3a7vaxCRxnKyxv5Mx9yZ17kzrxcyZ3Vy3BXOGWiLI9LV+cuUVMqKSnJ5bHPPfdcSV4CAACgwiuLmio3N1djx45V586d1bJlS0lSRkaGfHx8FBwc7DQ2LCxMGRkZBc6TkpKiyZMnF7gtNTXV5bhR/pA/8yJ35kXuzKuo3M3o4MZAysDy5cvLbO7s7GyXxpWoKbV9+3Zt375ddrvdcQHNn3/+WV5eXrr66qsd4ywWS0mmBwAAqBTKoqZKTEzU7t279c0331xWbMnJyU5Ns6ysLEVGRkqSYmJi5O3tfVnzw/3sdrtSU1PJnwmRO/Mid+blSu5aTlrp5qhK1+5JcWU298VnWBelRE2p/v37q3r16po/f75q1KghSfrzzz9111136brrrtODDz5YkmkBAAAqldKuqUaPHq1ly5Zp/fr1qlu3rmN9eHi4zp8/r5MnTzqdLZWZmanw8PAC57JarbJarQVu8/b25o8rEyN/5kXuzIvcmVdRubPlmPtEnLI8Jl2du0pJJn/22WeVkpLiKJ4kqUaNGnrqqae4+x4AAICLSqumMgxDo0eP1tKlS7V27Vo1aNDAaXu7du3k7e2tNWvWONalpaXp8OHDio6OvvwdAQAAKIESnSmVlZWlY8eO5Vt/7NgxnT59+rKDAgAAqAxKq6ZKTEzUwoUL9cknn6h69eqO60QFBQXJz89PQUFBGj58uJKSkhQSEqLAwECNGTNG0dHR3HkPAAB4TInOlBo0aJDuuusuLVmyRL///rt+//13ffTRRxo+fLgGDx5c2jECAABUSKVVU82aNUunTp1S9+7dFRER4Xh88MEHjjHPP/+8brjhBsXHx6tr164KDw/XkiVLymK3AAAAXFKiM6Vmz56thx56SLfddpvjNn9Vq1bV8OHD9cwzz5RqgAAAABVVadVUhnHpW1L7+vpq5syZmjlzZonjBQAAKE0lakr5+/vr1Vdf1TPPPKMDBw5Ikho2bKiAgIBSDQ4AAKAio6YCAACVWYm+vpcnPT1d6enpaty4sQICAlz6lA4AAADOqKkAAEBlVKKm1PHjx9WzZ09deeWV6tu3r9LT0yVJw4cPL/atiwEAACoraioAAFCZlagpNW7cOHl7e+vw4cPy9/d3rL/lllu0YsWKUgsOAACgIqOmAgAAlVmJrim1atUqrVy5UnXr1nVa37hxY/3666+lEhgAAEBFR00FAAAqsxKdKXX27FmnT/PynDhxQlar9bKDAgAAqAyoqQAAQGVWoqbUddddp7ffftuxbLFYlJubqxkzZuj6668vteAAAAAqMmoqAABQmZXo63szZsxQz549tXXrVp0/f14PP/yw9uzZoxMnTujbb78t7RgBAAAqJGoqAABQmZXoTKmWLVvq559/VpcuXTRgwACdPXtWgwcP1vbt29WwYcPSjhEAAKBCoqYCAACVWbHPlLLb7erdu7dmz56tf//732UREwAAQIVHTQUAACq7Yp8p5e3trZ07d5ZFLAAAAJUGNRUAAKjsSvT1vTvuuENz5swp7VgAAAAqFWoqAABQmZXoQucXLlzQW2+9pdWrV6tdu3YKCAhw2v7cc8+VSnAAAAAVGTUVAACozIrVlPrll19Uv3597d69W1dffbUk6eeff3YaY7FYSi86AACACoiaCgAAoJhNqcaNGys9PV1ffvmlJOmWW27RSy+9pLCwsDIJDgAAoCKipgIAACjmNaUMw3Ba/uKLL3T27NlSDQgAAKCio6YCAAAo4YXO8/y9oAIAAEDxUVMBAIDKqFhNKYvFku/6BlzvAAAAoHioqQAAAIp5TSnDMDRs2DBZrVZJ0rlz53Tvvffmu1PMkiVLSi9CAACACoaaCgAAoJhNqYSEBKflO+64o1SDAQAAqAyoqQAAAIrZlJo7d25ZxQEAAFBpUFMBAABc5oXOAQAAAAAAgJKgKQUAAAAAAAC3oykFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC382hTav369erfv7/q1Kkji8Wijz/+2Gn7sGHDZLFYnB69e/d2GnPixAndfvvtCgwMVHBwsIYPH64zZ864cS8AAAAAAABQXB5tSp09e1ZXXXWVZs6cWeiY3r17Kz093fF47733nLbffvvt2rNnj1JTU7Vs2TKtX79eI0eOLOvQAQAAAAAAcBmqevLF+/Tpoz59+hQ5xmq1Kjw8vMBtP/74o1asWKEtW7aoffv2kqSXX35Zffv21X/+8x/VqVOn1GMGAAAAAADA5fNoU8oVX331lUJDQ1WjRg316NFDTz31lGrWrClJ2rBhg4KDgx0NKUnq1auXqlSpok2bNmnQoEEFzmmz2WSz2RzLWVlZkiS73S673e5SXHnjXB1v9TJcGlceubqP5Vlx8wXPI2fmQ87Mh5yVDO8XAABA6SjXTanevXtr8ODBatCggQ4cOKDHHntMffr00YYNG+Tl5aWMjAyFhoY6Padq1aoKCQlRRkZGofOmpKRo8uTJ+davWrVK/v7+xYoxNTXVpXEzOhRr2nJl+fLlng6h1LiaL5Qf5Mx8yJn5kLPiyc7O9nQIAAAAFUK5bkoNGTLE8XOrVq3UunVrNWzYUF999ZV69uxZ4nmTk5OVlJTkWM7KylJkZKRiY2MVGBjo0hx2u12pqamKiYmRt7f3Jce3nLSyxPF62u5JcZ4O4bIVN1/wPHJmPuTMfMhZyeSdYQ0AAIDLU66bUn93xRVXqFatWtq/f7969uyp8PBwHT161GnMhQsXdOLEiUKvQyX9dZ0qq9Wab723t3exi3JXn2PLsRRr3vKkIv2hUpIcw7PImfmQM/MhZ8XDewUAAFA6PHr3veL6/fffdfz4cUVEREiSoqOjdfLkSW3bts0xZu3atcrNzVXHjh09FSYAAAAAAAAuwaNnSp05c0b79+93LB88eFA7duxQSEiIQkJCNHnyZMXHxys8PFwHDhzQww8/rEaNGiku7q+vkzVr1ky9e/fWiBEjNHv2bNntdo0ePVpDhgzhznsAAAAAAADlmEfPlNq6davatm2rtm3bSpKSkpLUtm1bTZgwQV5eXtq5c6f++c9/6sorr9Tw4cPVrl07ff31105fvVuwYIGaNm2qnj17qm/fvurSpYtef/11T+0SAAAAAAAAXODRM6W6d+8uwzAK3b5y5aUvDh4SEqKFCxeWZlgAAAAAAAAoY6a6phQAAADyW79+vfr37686derIYrHo448/dto+bNgwWSwWp0fv3r09EywAAMD/oSkFAABgcmfPntVVV12lmTNnFjqmd+/eSk9Pdzzee+89N0YIAACQn0e/vgcAAIDL16dPH/Xp06fIMVarVeHh4W6KCAAA4NJoSgEAAFQCX331lUJDQ1WjRg316NFDTz31lGrWrFnoeJvNJpvN5ljOyspy/Gy328s0VpSNvLyRP/Mhd+ZF7szLldxZvQq/RrYZlOVx6ercNKUAAAAquN69e2vw4MFq0KCBDhw4oMcee0x9+vTRhg0b5OXlVeBzUlJSNHny5AK3paamlmW4KGPkz7zInXmRO/MqKnczOrgxkDKwfPnyMps7OzvbpXE0pQAAACq4IUOGOH5u1aqVWrdurYYNG+qrr75Sz549C3xOcnKykpKSHMtZWVmKjIyUJMXExMjb27tsg0aps9vtSk1NJX8mRO7Mi9yZlyu5azlppZujKl27J8WV2dwXn2FdFJpSAAAAlcwVV1yhWrVqaf/+/YU2paxWq6xWa4HbvL29+ePKxMifeZE78yJ35lVU7mw5FjdHU7rK8ph0dW7uvgcAAFDJ/P777zp+/LgiIiI8HQoAAKjEOFMKAADA5M6cOaP9+/c7lg8ePKgdO3YoJCREISEhmjx5suLj4xUeHq4DBw7o4YcfVqNGjRQXV3an7QMAAFwKTSkAAACT27p1q66//nrHct61oBISEjRr1izt3LlT8+fP18mTJ1WnTh3FxsbqySefLPTreQAAAO5AUwoAAMDkunfvLsMo/LbUK1ea+0KsAACgYuKaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO5pSAAAAAAAAcDuaUgAAAAAAAHA7mlIAAAAAAABwO482pdavX6/+/furTp06slgs+vjjj522G4ahCRMmKCIiQn5+furVq5f27dvnNObEiRO6/fbbFRgYqODgYA0fPlxnzpxx414AAAAAAACguDzalDp79qyuuuoqzZw5s8DtM2bM0EsvvaTZs2dr06ZNCggIUFxcnM6dO+cYc/vtt2vPnj1KTU3VsmXLtH79eo0cOdJduwAAAAAAAIASqOrJF+/Tp4/69OlT4DbDMPTCCy/o8ccf14ABAyRJb7/9tsLCwvTxxx9ryJAh+vHHH7VixQpt2bJF7du3lyS9/PLL6tu3r/7zn/+oTp06btsXAAAAAAAAuK7cXlPq4MGDysjIUK9evRzrgoKC1LFjR23YsEGStGHDBgUHBzsaUpLUq1cvValSRZs2bXJ7zAAAAAAAAHCNR8+UKkpGRoYkKSwszGl9WFiYY1tGRoZCQ0OdtletWlUhISGOMQWx2Wyy2WyO5aysLEmS3W6X3W53Kb68ca6Ot3oZLo0rj1zdx/KsuPmC55Ez8yFn5kPOSob3CwAAoHSU26ZUWUpJSdHkyZPzrV+1apX8/f2LNVdqaqpL42Z0KNa05cry5cs9HUKpcTVfKD/ImfmQM/MhZ8WTnZ3t6RAAAAAqhHLblAoPD5ckZWZmKiIiwrE+MzNTbdq0cYw5evSo0/MuXLigEydOOJ5fkOTkZCUlJTmWs7KyFBkZqdjYWAUGBroUn91uV2pqqmJiYuTt7X3J8S0nrXRp3vJo96Q4T4dw2YqbL3geOTMfcmY+5Kxk8s6wBgAAwOUpt02pBg0aKDw8XGvWrHE0obKysrRp0yaNGjVKkhQdHa2TJ09q27ZtateunSRp7dq1ys3NVceOHQud22q1ymq15lvv7e1d7KLc1efYcizFmrc8qUh/qJQkx/AscmY+5Mx8yFnx8F4BAACUDo9e6PzMmTPasWOHduzYIemvi5vv2LFDhw8flsVi0dixY/XUU0/p008/1a5duzR06FDVqVNHAwcOlCQ1a9ZMvXv31ogRI7R582Z9++23Gj16tIYMGcKd9wAAQKWxfv169e/fX3Xq1JHFYtHHH3/stN0wDE2YMEERERHy8/NTr169tG/fPs8ECwAA8H882pTaunWr2rZtq7Zt20qSkpKS1LZtW02YMEGS9PDDD2vMmDEaOXKkrrnmGp05c0YrVqyQr6+vY44FCxaoadOm6tmzp/r27asuXbro9ddf98j+AAAAeMLZs2d11VVXaebMmQVunzFjhl566SXNnj1bmzZtUkBAgOLi4nTu3Dk3RwoAAPD/efTre927d5dhFH5XOovFoilTpmjKlCmFjgkJCdHChQvLIjwAAABT6NOnj/r06VPgNsMw9MILL+jxxx/XgAEDJElvv/22wsLC9PHHH2vIkCHuDBUAAMCh3F5TCgAAAJfv4MGDysjIUK9evRzrgoKC1LFjR23YsKHQppTNZpPNZnMsX3yBd7vdXnYBo8zk5Y38mQ+5My9yZ16u5M7qVfhJNmZQlselq3PTlAIAAKjAMjIyJElhYWFO68PCwhzbCpKSkqLJkycXuC01NbX0AoTbkT/zInfmRe7Mq6jczejgxkDKwPLly8ts7uzsbJfG0ZQCAABAPsnJyUpKSnIsZ2VlKTIyUpIUExPDXQhNyG63KzU1lfyZELkzL3JnXq7kruWklW6OqnTtnhRXZnNffIZ1UWhKAQAAVGDh4eGSpMzMTEVERDjWZ2Zmqk2bNoU+z2q1ymq1FrjN29ubP65MjPyZF7kzL3JnXkXlzpZjcXM0passj0lX5/bo3fcAAABQtho0aKDw8HCtWbPGsS4rK0ubNm1SdHS0ByMDAACVHWdKAQAAmNyZM2e0f/9+x/LBgwe1Y8cOhYSEqF69eho7dqyeeuopNW7cWA0aNNATTzyhOnXqaODAgZ4LGgAAVHo0pQAAAExu69atuv766x3LedeCSkhI0Lx58/Twww/r7NmzGjlypE6ePKkuXbpoxYoV8vX19VTIAAAANKUAAADMrnv37jKMwm9LbbFYNGXKFE2ZMsWNUQEAABSNa0oBAAAAAADA7WhKAQAAAAAAwO1oSgEAAAAAAMDtaEoBAAAAAADA7WhKAQAAAAAAwO1oSgEAAAAAAMDtaEoBAAAAAADA7WhKAQAAAAAAwO1oSgEAAAAAAMDtaEoBAAAAAADA7WhKAQAAAAAAwO1oSgEAAAAAAMDtaEoBAAAAAADA7WhKAQAAAAAAwO2qejoAlH/1H/3c0yFclkPT+nk6BAAAAAAA8DecKQUAAAAAAAC3oykFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3oykFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3q+rpAAAAAABJqv/o554O4bIcmtbP0yEAAGAqnCkFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3oykFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3oykFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3oykFAAAAAAAAtyvXTalJkybJYrE4PZo2berYfu7cOSUmJqpmzZqqVq2a4uPjlZmZ6cGIAQAAAAAA4Ipy3ZSSpBYtWig9Pd3x+Oabbxzbxo0bp88++0yLFi3SunXrdOTIEQ0ePNiD0QIAAAAAAMAVVT0dwKVUrVpV4eHh+dafOnVKc+bM0cKFC9WjRw9J0ty5c9WsWTNt3LhRnTp1cneoAAAAAAAAcFG5P1Nq3759qlOnjq644grdfvvtOnz4sCRp27Ztstvt6tWrl2Ns06ZNVa9ePW3YsMFT4QIAAAAAAMAF5fpMqY4dO2revHlq0qSJ0tPTNXnyZF133XXavXu3MjIy5OPjo+DgYKfnhIWFKSMjo8h5bTabbDabYzkrK0uSZLfbZbfbXYotb5yr461ehkvjUPouzqur+YLnkTPzIWfmQ85Kxozv16RJkzR58mSndU2aNNFPP/3koYgAAADKeVOqT58+jp9bt26tjh07KioqSh9++KH8/PxKPG9KSkq+wkySVq1aJX9//2LNlZqa6tK4GR2KNS1K0fLlyx0/u5ovlB/kzHzImfmQs+LJzs72dAgl0qJFC61evdqxXLVquS4DAQBAJWCqaiQ4OFhXXnml9u/fr5iYGJ0/f14nT550OlsqMzOzwGtQXSw5OVlJSUmO5aysLEVGRio2NlaBgYEuxWK325WamqqYmBh5e3tfcnzLSStdmhelb/ekuGLnC55HzsyHnJkPOSuZvDOszaaw63QCAAB4iqmaUmfOnNGBAwd05513ql27dvL29taaNWsUHx8vSUpLS9Phw4cVHR1d5DxWq1VWqzXfem9v72IX5a4+x5ZjKda8KD0X56ckOYZnkTPzIWfmQ86Kx6zvVd51On19fRUdHa2UlBTVq1fP02EBAIBKrFw3pR566CH1799fUVFROnLkiCZOnCgvLy/deuutCgoK0vDhw5WUlKSQkBAFBgZqzJgxio6O5s57AAAAFynqOp3Vq1cv8DmFXYNTKrvrapn9Gpzl/XpjXEfOvMideZE783Ild/y/dflzl+um1O+//65bb71Vx48fV+3atdWlSxdt3LhRtWvXliQ9//zzqlKliuLj42Wz2RQXF6dXX33Vw1EDAACUL0Vdp3P48OEFPqewa3BKZXcdMrNfg/Pi61iWZ1xHzrzInXmRO/MqKnf8v1U4V6/BWa6bUu+//36R2319fTVz5kzNnDnTTREBAACY38XX6SxMYdfglFRm1yEz+zU4d0+K83QIReI6cuZF7syL3JmXK7nj/63CuXoNznLdlAIAAEDpu/g6nYUp7BqcUtldh8zs1+A0yx+cXEfOvMideZE78yoqd/y/dflzVymzCAAAAFAuPPTQQ1q3bp0OHTqk7777ToMGDXJcpxMAAMBTOFMKAACggrvUdToBAAA8gaYUAABABXep63QCAAB4Al/fAwAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNvRlAIAAAAAAIDb0ZQCAAAAAACA29GUAgAAAAAAgNtV9XQAAAAAAFCZ1X/08yK3W70MzeggtZy0UrYci5uict2haf08HQIAk+JMKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HXffQ4VX/9HPy/0dS4rC3UwAAAAAABURZ0oBAAAAAADA7ThTCijn6j/6uadDuCyc6QUAAAAAKAhNKQAAAKAUlPcPkoq6nAEfIgEAPIGv7wEAAAAAAMDtaEoBAAAAAADA7WhKAQAAAAAAwO1oSgEAAAAAAMDtaEoBAAAAAADA7WhKAQAAAAAAwO1oSgEAAAAAAMDtaEoBAAAAAADA7ap6OgAAKM/qP/q5p0O4LIem9fN0CAAAAABQIJpSAAAAQCVn9g9hgMrMzL+/fIAKvr4HAAAAAAAAt+NMKQBlqqSf3Fi9DM3oILWctFK2HEspR1V5uPOTs9LOGZ+cAQAAABUbZ0oBAAAAAADA7WhKAQAAAAAAwO0qzNf3Zs6cqWeeeUYZGRm66qqr9PLLL6tDhw6eDgsAAFMy80VTJb7+WRjqJQAAUJ5UiKbUBx98oKSkJM2ePVsdO3bUCy+8oLi4OKWlpSk0NNTT4QEASoCmCFC6qJcAAEB5UyGaUs8995xGjBihu+66S5I0e/Zsff7553rrrbf06KOPejg6AEBlZIamGjcUqFyolwAAQHlj+mtKnT9/Xtu2bVOvXr0c66pUqaJevXppw4YNHowMAACgfKBeAgAA5ZHpz5T6448/lJOTo7CwMKf1YWFh+umnnwp8js1mk81mcyyfOnVKknTixAnZ7XaXXtdutys7O1vHjx+Xt7f3JcdXvXDWpXlRNqrmGsrOzlVVexXl5HI2gBmQM/MhZ+ZTkXN2/PjxMpv79OnTkiTDMMrsNUpbadZLxal/iot6qWxV5N/5iq68564s/801u+L+3VgSZv63szwfO67kzszvvVQ+6iXTN6VKIiUlRZMnT863vkGDBh6IBu5ym6cDQLGRM/MhZ+ZTUXNW69myf43Tp08rKCio7F/IQwqrl+655x4PRIPSUlF/5yuD8pw7d/ybi4qJY8ezykO9ZPqmVK1ateTl5aXMzEyn9ZmZmQoPDy/wOcnJyUpKSnIs5+bm6sSJE6pZs6YsFtc+ecjKylJkZKR+++03BQYGlnwH4Bbky3zImfmQM/MhZyVjGIZOnz6tOnXqeDoUl5VWvfTrr7+qTZs2HDMmxe+8eZE78yJ35kXuLo+r9ZLpm1I+Pj5q166d1qxZo4EDB0r6q2has2aNRo8eXeBzrFarrFar07rg4OASvX5gYCAHqImQL/MhZ+ZDzsyHnBWf2c6QKq16qUqVvy5HyjFjbuTPvMideZE78yJ3JedKvWT6ppQkJSUlKSEhQe3bt1eHDh30wgsv6OzZs467ywAAAFR21EsAAKC8qRBNqVtuuUXHjh3ThAkTlJGRoTZt2mjFihX5LuYJAABQWVEvAQCA8qZCNKUkafTo0YWefl4WrFarJk6cmO+0dpRP5Mt8yJn5kDPzIWeVz+XWSxwz5kb+zIvcmRe5My9y5x4Ww0z3MwYAAAAAAECFUMXTAQAAAAAAAKDyoSkFAAAAAAAAt6MpBQAAAAAAALejKVUCM2fOVP369eXr66uOHTtq8+bNng4J/yclJUXXXHONqlevrtDQUA0cOFBpaWlOY86dO6fExETVrFlT1apVU3x8vDIzMz0UMS42bdo0WSwWjR071rGOfJU///3vf3XHHXeoZs2a8vPzU6tWrbR161bHdsMwNGHCBEVERMjPz0+9evXSvn37PBhx5ZaTk6MnnnhCDRo0kJ+fnxo2bKgnn3xSF19SkpzBVdRA5R+1UMVBXWQu1EfmRa3kWTSliumDDz5QUlKSJk6cqO+//15XXXWV4uLidPToUU+HBknr1q1TYmKiNm7cqNTUVNntdsXGxurs2bOOMePGjdNnn32mRYsWad26dTpy5IgGDx7swaghSVu2bNFrr72m1q1bO60nX+XLn3/+qc6dO8vb21tffPGF9u7dq2effVY1atRwjJkxY4ZeeuklzZ49W5s2bVJAQIDi4uJ07tw5D0ZeeU2fPl2zZs3SK6+8oh9//FHTp0/XjBkz9PLLLzvGkDO4ghrIHKiFKgbqInOhPjI3aiUPM1AsHTp0MBITEx3LOTk5Rp06dYyUlBQPRoXCHD161JBkrFu3zjAMwzh58qTh7e1tLFq0yDHmxx9/NCQZGzZs8FSYld7p06eNxo0bG6mpqUa3bt2MBx54wDAM8lUePfLII0aXLl0K3Z6bm2uEh4cbzzzzjGPdyZMnDavVarz33nvuCBF/069fP+Puu+92Wjd48GDj9ttvNwyDnMF11EDmRC1kPtRF5kN9ZG7USp7FmVLFcP78eW3btk29evVyrKtSpYp69eqlDRs2eDAyFObUqVOSpJCQEEnStm3bZLfbnXLYtGlT1atXjxx6UGJiovr16+eUF4l8lUeffvqp2rdvr5tuukmhoaFq27at3njjDcf2gwcPKiMjwylnQUFB6tixIznzkGuvvVZr1qzRzz//LEn64Ycf9M0336hPnz6SyBlcQw1kXtRC5kNdZD7UR+ZGreRZVT0dgJn88ccfysnJUVhYmNP6sLAw/fTTTx6KCoXJzc3V2LFj1blzZ7Vs2VKSlJGRIR8fHwUHBzuNDQsLU0ZGhgeixPvvv6/vv/9eW7ZsybeNfJU/v/zyi2bNmqWkpCQ99thj2rJli+6//375+PgoISHBkZeC/p0kZ57x6KOPKisrS02bNpWXl5dycnI0depU3X777ZJEzuASaiBzohYyH+oic6I+MjdqJc+iKYUKKzExUbt379Y333zj6VBQiN9++00PPPCAUlNT5evr6+lw4ILc3Fy1b99eTz/9tCSpbdu22r17t2bPnq2EhAQPR4eCfPjhh1qwYIEWLlyoFi1aaMeOHRo7dqzq1KlDzoAKjlrIXKiLzIv6yNyolTyLr+8VQ61ateTl5ZXvDheZmZkKDw/3UFQoyOjRo7Vs2TJ9+eWXqlu3rmN9eHi4zp8/r5MnTzqNJ4eesW3bNh09elRXX321qlatqqpVq2rdunV66aWXVLVqVYWFhZGvciYiIkLNmzd3WtesWTMdPnxYkhx54d/J8mP8+PF69NFHNWTIELVq1Up33nmnxo0bp5SUFEnkDK6hBjIfaiHzoS4yL+ojc6NW8iyaUsXg4+Ojdu3aac2aNY51ubm5WrNmjaKjoz0YGfIYhqHRo0dr6dKlWrt2rRo0aOC0vV27dvL29nbKYVpamg4fPkwOPaBnz57atWuXduzY4Xi0b99et99+u+Nn8lW+dO7cOd+txX/++WdFRUVJkho0aKDw8HCnnGVlZWnTpk3kzEOys7NVpYrzf/deXl7Kzc2VRM7gGmog86AWMi/qIvOiPjI3aiUP8/SV1s3m/fffN6xWqzFv3jxj7969xsiRI43g4GAjIyPD06HBMIxRo0YZQUFBxldffWWkp6c7HtnZ2Y4x9957r1GvXj1j7dq1xtatW43o6GgjOjrag1HjYhffZcYwyFd5s3nzZqNq1arG1KlTjX379hkLFiww/P39jXfffdcxZtq0aUZwcLDxySefGDt37jQGDBhgNGjQwPjf//7nwcgrr4SEBOMf//iHsWzZMuPgwYPGkiVLjFq1ahkPP/ywYww5gyuogcyBWqhioS4yB+ojc6NW8iyaUiXw8ssvG/Xq1TN8fHyMDh06GBs3bvR0SPg/kgp8zJ071zHmf//7n3HfffcZNWrUMPz9/Y1BgwYZ6enpngsaTv5efJGv8uezzz4zWrZsaVitVqNp06bG66+/7rQ9NzfXeOKJJ4ywsDDDarUaPXv2NNLS0jwULbKysowHHnjAqFevnuHr62tcccUVxr///W/DZrM5xpAzuIoaqPyjFqpYqIvMg/rIvKiVPMtiGIbhmXO0AAAAAAAAUFlxTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQAAAAAAALgdTSkAAAAAAAC4HU0pAAAAAAAAuB1NKQCmdOjQIVksFu3YscPToTj89NNP6tSpk3x9fdWmTRtPhwMAAEDNBKBcoykFoESGDRsmi8WiadOmOa3/+OOPZbFYPBSVZ02cOFEBAQFKS0vTmjVrChxz7NgxjRo1SvXq1ZPValV4eLji4uL07bffOsZYLBZ9/PHHbooaAACUJWqm/KiZAOShKQWgxHx9fTV9+nT9+eefng6l1Jw/f77Ezz1w4IC6dOmiqKgo1axZs8Ax8fHx2r59u+bPn6+ff/5Zn376qbp3767jx4+X+HUBAED5Rs3kjJoJQB6aUgBKrFevXgoPD1dKSkqhYyZNmpTvtOwXXnhB9evXdywPGzZMAwcO1NNPP62wsDAFBwdrypQpunDhgsaPH6+QkBDVrVtXc+fOzTf/Tz/9pGuvvVa+vr5q2bKl1q1b57R99+7d6tOnj6pVq6awsDDdeeed+uOPPxzbu3fvrtGjR2vs2LGqVauW4uLiCtyP3NxcTZkyRXXr1pXValWbNm20YsUKx3aLxaJt27ZpypQpslgsmjRpUr45Tp48qa+//lrTp0/X9ddfr6ioKHXo0EHJycn65z//KUmO92XQoEGyWCxO79Mnn3yiq6++Wr6+vrriiis0efJkXbhwwSmGWbNmqU+fPvLz89MVV1yhxYsXO7afP39eo0ePVkREhHx9fRUVFVVk7gAAQOmgZqJmAlAwmlIASszLy0tPP/20Xn75Zf3++++XNdfatWt15MgRrV+/Xs8995wmTpyoG264QTVq1NCmTZt077336l//+le+1xk/frwefPBBbd++XdHR0erfv7/jE7STJ0+qR48eatu2rbZu3aoVK1YoMzNTN998s9Mc8+fPl4+Pj7799lvNnj27wPhefPFFPfvss/rPf/6jnTt3Ki4uTv/85z+1b98+SVJ6erpatGihBx98UOnp6XrooYfyzVGtWjVVq1ZNH3/8sWw2W4Gvs2XLFknS3LlzlZ6e7lj++uuvNXToUD3wwAPau3evXnvtNc2bN09Tp051ev4TTzyh+Ph4/fDDD7r99ts1ZMgQ/fjjj5Kkl156SZ9++qk+/PBDpaWlacGCBU4FHAAAKBvUTNRMAAphAEAJJCQkGAMGDDAMwzA6depk3H333YZhGMbSpUuNi/9pmThxonHVVVc5Pff55583oqKinOaKiooycnJyHOuaNGliXHfddY7lCxcuGAEBAcZ7771nGIZhHDx40JBkTJs2zTHGbrcbdevWNaZPn24YhmE8+eSTRmxsrNNr//bbb4YkIy0tzTAMw+jWrZvRtm3bS+5vnTp1jKlTpzqtu+aaa4z77rvPsXzVVVcZEydOLHKexYsXGzVq1DB8fX2Na6+91khOTjZ++OEHpzGSjKVLlzqt69mzp/H00087rXvnnXeMiIgIp+fde++9TmM6duxojBo1yjAMwxgzZozRo0cPIzc3t8gYAQBA6aFmomYCUDjOlAJw2aZPn6758+c7Pl0qiRYtWqhKlf//T1JYWJhatWrlWPby8lLNmjV19OhRp+dFR0c7fq5atarat2/viOOHH37Ql19+6fi0rVq1amratKmkv65lkKddu3ZFxpaVlaUjR46oc+fOTus7d+5c7H2Oj4/XkSNH9Omnn6p379766quvdPXVV2vevHlFPu+HH37QlClTnPZlxIgRSk9PV3Z2tmPcxe9H3nJejMOGDdOOHTvUpEkT3X///Vq1alWxYgcAAJeHmsl11ExA5VDV0wEAML+uXbsqLi5OycnJGjZsmNO2KlWqyDAMp3V2uz3fHN7e3k7LFoulwHW5ubkux3XmzBn1799f06dPz7ctIiLC8XNAQIDLc5YGX19fxcTEKCYmRk888YTuueceTZw4Md97d7EzZ85o8uTJGjx4cIHzueLqq6/WwYMH9cUXX2j16tW6+eab1atXL6drKAAAgLJDzVQ81ExAxceZUgBKxbRp0/TZZ59pw4YNTutr166tjIwMpyJrx44dpfa6GzdudPx84cIFbdu2Tc2aNZP0V0GxZ88e1a9fX40aNXJ6FKeoCgwMVJ06dZxuQSxJ3377rZo3b37Z+9C8eXOdPXvWsezt7a2cnBynMVdffbXS0tLy7UejRo2cPi29+P3IW857P/L25ZZbbtEbb7yhDz74QB999JFOnDhx2fsAAABcQ81UctRMQMXDmVIASkWrVq10++2366WXXnJa3717dx07dkwzZszQjTfeqBUrVuiLL75QYGBgqbzuzJkz1bhxYzVr1kzPP/+8/vzzT919992SpMTERL3xxhu69dZb9fDDDyskJET79+/X+++/rzfffFNeXl4uv8748eM1ceJENWzYUG3atNHcuXO1Y8cOLViwwOU5jh8/rptuukl33323WrdurerVq2vr1q2aMWOGBgwY4BhXv359rVmzRp07d5bValWNGjU0YcIE3XDDDapXr55uvPFGValSRT/88IN2796tp556yvHcRYsWqX379urSpYsWLFigzZs3a86cOZKk5557ThEREWrbtq2qVKmiRYsWKTw8XMHBwS7vAwAAuDzUTJdGzQRUHpwpBaDUTJkyJd+p4s2aNdOrr76qmTNn6qqrrtLmzZsLvMtKSU2bNk3Tpk3TVVddpW+++UaffvqpatWqJUmOT+pycnIUGxurVq1aaezYsQoODnb6pMwV999/v5KSkvTggw+qVatWWrFihT799FM1btzY5TmqVaumjh076vnnn1fXrl3VsmVLPfHEExoxYoReeeUVx7hnn31WqampioyMVNu2bSVJcXFxWrZsmVatWqVrrrlGnTp10vPPP6+oqCin15g8ebLef/99tW7dWm+//bbee+89xyeT1atX14wZM9S+fXtdc801OnTokJYvX17s9wIAAFweaqaiUTMBlYfF+PsXlwEApmSxWLR06VINHDjQ06EAAACUW9RMQPlBqxcAAAAAAABuR1MKAAAAAAAAbsfX9wAAAAAAAOB2nCkFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3oykFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3oykFAAAAAAAAt6MpBQAAAAAAALejKQUAAAAAAAC3+39epoXTsRzLaAAAAABJRU5ErkJggg==", 266 | "text/plain": [ 267 | "
" 268 | ] 269 | }, 270 | "metadata": {}, 271 | "output_type": "display_data" 272 | } 273 | ], 274 | "source": [ 275 | "import matplotlib.pyplot as plt\n", 276 | "\n", 277 | "# Create a figure with two subplots side by side\n", 278 | "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n", 279 | "\n", 280 | "# Plot histogram for successful tasks\n", 281 | "successful_tasks = all_tasks[all_tasks[\"success\"] == 'success']\n", 282 | "successful_tasks[\"num_steps\"].hist(ax=ax1)\n", 283 | "ax1.set_title(\"Number of Steps for Successful Tasks\")\n", 284 | "ax1.set_xlabel(\"Number of Steps\")\n", 285 | "ax1.set_ylabel(\"Frequency\")\n", 286 | "\n", 287 | "# Plot histogram for failed tasks\n", 288 | "failed_tasks = all_tasks[all_tasks[\"success\"] == 'failed']\n", 289 | "failed_tasks[\"num_steps\"].hist(ax=ax2)\n", 290 | "ax2.set_title(\"Number of Steps for Failed Tasks\") \n", 291 | "ax2.set_xlabel(\"Number of Steps\")\n", 292 | "ax2.set_ylabel(\"Frequency\")\n", 293 | "\n", 294 | "plt.tight_layout()" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 92, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "success\n", 306 | "success 0.890785\n", 307 | "failed 0.093857\n", 308 | "unknown 0.015358\n", 309 | "Name: proportion, dtype: float64" 310 | ] 311 | }, 312 | "execution_count": 92, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "all_tasks[\"success\"].value_counts(normalize=True)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 93, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "text/html": [ 329 | "
\n", 330 | "\n", 343 | "\n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | "
successfailedsuccessunknownavg_stepsnum_tasks
group
Huggingface0.001.000.009.735
Google Flights0.050.950.0036.239
Amazon0.080.920.0014.738
GitHub0.080.920.0015.940
Apple0.060.910.0312.533
BBC News0.090.910.0018.235
Cambridge Dictionary0.090.910.0016.743
Allrecipes0.080.900.0318.339
Coursera0.100.900.008.540
Google Search0.100.900.0014.440
Google Map0.110.860.0314.936
ESPN0.100.850.0521.040
ArXiv0.140.830.0217.642
Wolfram Alpha0.130.830.0418.446
Booking0.180.800.0232.740
\n", 485 | "
" 486 | ], 487 | "text/plain": [ 488 | "success failed success unknown avg_steps num_tasks\n", 489 | "group \n", 490 | "Huggingface 0.00 1.00 0.00 9.7 35\n", 491 | "Google Flights 0.05 0.95 0.00 36.2 39\n", 492 | "Amazon 0.08 0.92 0.00 14.7 38\n", 493 | "GitHub 0.08 0.92 0.00 15.9 40\n", 494 | "Apple 0.06 0.91 0.03 12.5 33\n", 495 | "BBC News 0.09 0.91 0.00 18.2 35\n", 496 | "Cambridge Dictionary 0.09 0.91 0.00 16.7 43\n", 497 | "Allrecipes 0.08 0.90 0.03 18.3 39\n", 498 | "Coursera 0.10 0.90 0.00 8.5 40\n", 499 | "Google Search 0.10 0.90 0.00 14.4 40\n", 500 | "Google Map 0.11 0.86 0.03 14.9 36\n", 501 | "ESPN 0.10 0.85 0.05 21.0 40\n", 502 | "ArXiv 0.14 0.83 0.02 17.6 42\n", 503 | "Wolfram Alpha 0.13 0.83 0.04 18.4 46\n", 504 | "Booking 0.18 0.80 0.02 32.7 40" 505 | ] 506 | }, 507 | "execution_count": 93, 508 | "metadata": {}, 509 | "output_type": "execute_result" 510 | } 511 | ], 512 | "source": [ 513 | "# percentage of tasks failed, unknown and success per each group (the thing before the -- in each task_id)\n", 514 | "# Create group column if it doesn't exist\n", 515 | "all_tasks[\"group\"] = all_tasks[\"task_id\"].str.split(\"--\").str[0]\n", 516 | "\n", 517 | "# Calculate percentage of each success status per group\n", 518 | "# Calculate success percentages by group\n", 519 | "success_by_group = all_tasks.groupby(\"group\")[\"success\"].value_counts(normalize=True).unstack().fillna(0)\n", 520 | "success_by_group = success_by_group.round(2)\n", 521 | "\n", 522 | "# Calculate average steps per group\n", 523 | "avg_steps = all_tasks.groupby(\"group\")[\"num_steps\"].mean().round(1)\n", 524 | "success_by_group[\"avg_steps\"] = avg_steps\n", 525 | "\n", 526 | "# Calculate number of tasks per group\n", 527 | "num_tasks = all_tasks.groupby(\"group\").size()\n", 528 | "success_by_group[\"num_tasks\"] = num_tasks\n", 529 | "\n", 530 | "# Sort by success rate\n", 531 | "# success_by_group.sort_values(\"group\", ascending=True)\n", 532 | "success_by_group.sort_values(\"success\", ascending=False)" 533 | ] 534 | } 535 | ], 536 | "metadata": { 537 | "kernelspec": { 538 | "display_name": ".venv", 539 | "language": "python", 540 | "name": "python3" 541 | }, 542 | "language_info": { 543 | "codemirror_mode": { 544 | "name": "ipython", 545 | "version": 3 546 | }, 547 | "file_extension": ".py", 548 | "mimetype": "text/x-python", 549 | "name": "python", 550 | "nbconvert_exporter": "python", 551 | "pygments_lexer": "ipython3", 552 | "version": "3.11.4" 553 | } 554 | }, 555 | "nbformat": 4, 556 | "nbformat_minor": 2 557 | } 558 | --------------------------------------------------------------------------------