├── .flake8 ├── .github └── workflows │ └── pre-commit.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── agent_studio ├── __init__.py ├── agent │ ├── __init__.py │ ├── base_agent.py │ ├── direct_agent.py │ └── human_agent.py ├── apps │ ├── annotate_ground_ui.py │ ├── online_benchmark.py │ ├── trajectory_editor.py │ └── trajectory_recorder.py ├── config │ ├── __init__.py │ ├── api_key_template.json │ └── config.py ├── envs │ ├── __init__.py │ └── desktop_env │ │ ├── __init__.py │ │ ├── evaluators │ │ ├── __init__.py │ │ ├── email │ │ │ ├── __init__.py │ │ │ └── email_evaluator.py │ │ ├── evaluator.py │ │ ├── evaluator_helper.py │ │ ├── gimp │ │ │ ├── __init__.py │ │ │ └── gimp_evaluator.py │ │ ├── google │ │ │ ├── __init__.py │ │ │ ├── calendar_evaluator.py │ │ │ ├── docs_evaluator.py │ │ │ ├── drive_evaluator.py │ │ │ ├── forms_evaluator.py │ │ │ ├── gmail_evaluator.py │ │ │ ├── gservice.py │ │ │ ├── sheets_evaluator.py │ │ │ └── slides_evaluator.py │ │ ├── human_evaluator.py │ │ ├── office │ │ │ ├── __init__.py │ │ │ ├── docs_evaluator.py │ │ │ ├── sheets_evaluator.py │ │ │ ├── slides_evaluator.py │ │ │ └── utils.py │ │ ├── os │ │ │ ├── __init__.py │ │ │ ├── filesystem_evaluator.py │ │ │ ├── process_evaluator.py │ │ │ └── system_evaluator.py │ │ ├── qa_evaluator.py │ │ ├── telegram_evaluator.py │ │ └── vscode │ │ │ ├── __init__.py │ │ │ ├── vscode_connector.py │ │ │ └── vscode_evaluator.py │ │ ├── recorder │ │ ├── __init__.py │ │ ├── base_recorder.py │ │ └── screen_recorder.py │ │ ├── tools │ │ ├── __init__.py │ │ ├── email.py │ │ ├── keyboard.py │ │ └── mouse.py │ │ └── vnc_client.py ├── llm │ ├── __init__.py │ ├── base_model.py │ ├── claude.py │ ├── dummy.py │ ├── gemini.py │ ├── huggingface.py │ ├── openai.py │ ├── remote_model.py │ ├── utils.py │ └── vertexai.py ├── recorder │ ├── README.md │ ├── config.py │ ├── player.py │ ├── recorders │ │ ├── __init__.py │ │ ├── keyboard.py │ │ ├── mouse.py │ │ └── video.py │ └── utils.py └── utils │ ├── __init__.py │ ├── communication.py │ ├── gui.py │ ├── human_utils.py │ ├── json_utils.py │ ├── prompt.py │ ├── runtime.py │ ├── singleton.py │ ├── task_status.py │ └── types.py ├── dockerfiles ├── Dockerfile.ubuntu.amd64 └── Dockerfile.ubuntu22.04.amd64 ├── docs ├── annotate_ground_ui.md └── assets │ ├── agent_space.jpg │ ├── annotate_gui_1.jpg │ ├── annotate_gui_2.jpg │ ├── comparison.png │ ├── onlinebenchmark_gui_1.png │ ├── overview.png │ └── trajectory_editor_gui_1.jpg ├── eval_agent_desiderata ├── README.md ├── common.py ├── datasets │ ├── gui_grounding │ │ └── metadata_1k.jsonl │ └── trajectory_lite │ │ ├── metadata_idm.jsonl │ │ ├── metadata_idmn2n.jsonl │ │ └── metadata_success_detection.jsonl ├── eval_base.py ├── eval_gui_grounding.py ├── eval_idm.py ├── eval_idmn2n.py ├── eval_success_detection.py ├── main.py ├── make_report.py ├── online_benchmark_analysis.py ├── processing │ ├── README.md │ └── process_grounding_data.py ├── re_caption_gui_grounding_data.py └── split_subset.py ├── eval_online_benchmarks ├── README.md ├── data │ └── .gitkeep └── tasks │ ├── compositional │ ├── 0283d41d-9f4c-4b95-aea6-aa1e9918be91.json │ ├── 0321d49b-a7f9-458a-91cf-06bc763d23c1.json │ ├── 07525ee8-24d1-43ec-a64f-1faff0b06f3b.json │ ├── 0889e861-1544-47d0-950b-405973744f98.json │ ├── 093e355b-4fed-4734-89d9-4b419767f6ff.json │ ├── 0b29d03b-00e9-42d9-b0e1-162b839b0520.json │ ├── 17f39e5d-1660-4b6e-a45a-e75b154cc8c6.json │ ├── 1bc75904-895c-47ee-989c-4cfa20dd9d02.json │ ├── 214d9d4b-4264-4614-bcd5-17f9d6f2c8c1.json │ ├── 238a3041-6bbe-460f-8782-38a25a46ec1e.json │ ├── 24857972-b024-454b-b3a1-302cdd701a19.json │ ├── 2668faf4-8dd6-412e-9156-02559612b36c.json │ ├── 26ce1a5d-1d1e-444c-af84-299471c6252e.json │ ├── 29025c50-31b3-4efe-89ff-1e09d3ebfce8.json │ ├── 33d35d9c-cd56-4f26-9b73-459063c4e590.json │ ├── 3605a63c-20d6-4fec-9581-97570362ec6e.json │ ├── 392caf3b-5c18-41ec-bdcd-6687c4cec2f5.json │ ├── 39e4c8f1-969a-43c2-8179-99c31f4af7eb.json │ ├── 3b7e322f-2ff3-4975-91b0-b94970288252.json │ ├── 3bf574de-1315-45d3-8870-9f3a61b4f4de.json │ ├── 3d7cf1db-5533-468a-aafe-e6441f427c4a.json │ ├── 407f880e-c251-4d23-a37f-b81786f17deb.json │ ├── 41127d23-d7e9-4ccc-92d3-4bb23508666e.json │ ├── 4c5225e0-cf75-48ca-84f2-5d1245b8d846.json │ ├── 4f632a32-51e0-42c4-8439-a378929fd90b.json │ ├── 57ed25e0-6e34-4a2a-b9f6-1de2974d5494.json │ ├── 66e17f10-a5fa-41ad-b909-16f9e7702c54.json │ ├── 670af7de-7ca9-463d-9260-12beb1c095d6.json │ ├── 6ab04b64-da85-44fe-9468-efe288094898.json │ ├── 6bda9bed-b891-4ba9-87de-0d3f51f61844.json │ ├── 6d6d5187-550e-4ccc-9fa3-0fd70b10f006.json │ ├── 6da58b9f-9a07-4d23-bdcd-9a5f56bfe246.json │ ├── 73534944-d0d7-4f94-8e6c-08317a33023b.json │ ├── 7481a5c2-11da-497d-b16e-c7cc716314be.json │ ├── 7550b963-1eb3-41af-8a35-ba8a71d8fe51.json │ ├── 7e00720e-2287-4890-b5f3-9583629c057d.json │ ├── 80cb8446-eb3a-4e69-b3f3-0f2aee3be151.json │ ├── 80ebd0f7-7de3-4985-9beb-59785fd7b944.json │ ├── 84723c8b-7f82-4b5c-9627-9a5dda60f24f.json │ ├── 854007a1-97bd-470f-9028-bfdc5dd22f20.json │ ├── 88cd3105-205c-4ac3-ba10-003247fbc149.json │ ├── 95a47bdc-6ac6-4b52-ba3d-bedff89c373c.json │ ├── 9868053a-2343-4857-a62f-7bc80532c33e.json │ ├── 9895f2a0-61b0-4af8-95cb-f8e7a64ce89d.json │ ├── 9adcaecf-98ce-42ce-bd90-eb3a5d9ff97f.json │ ├── a4991e3c-ec1c-407c-862c-9b07dec35de5.json │ ├── a5cc15a9-38d4-4e2f-9fcf-8baf3d1ff9d8.json │ ├── ad2ef7c0-172f-4341-945b-773ff8ce35c5.json │ ├── bc0872e2-ca66-487e-a3fa-3cf20d630c42.json │ ├── bfb90564-daf4-4c1e-90ca-7028b93dd7b2.json │ ├── c1b45900-3488-458d-927e-1789ed150903.json │ ├── cda93fda-27b8-47e2-be69-812c7704af8e.json │ ├── db4492d5-684b-4a1c-a923-36a289a7abe0.json │ ├── ddd1a4f5-4719-409b-9c2f-ec10d10a6589.json │ ├── e21c631d-ee2c-4f20-8d11-dfc92d5a2167.json │ ├── f395ef38-5405-4926-a5b3-7dac253195ed.json │ ├── f5b74786-261a-4286-b1e0-3e951a1a9281.json │ ├── f738a7da-6a5a-4f0b-9868-4d24c396f97a.json │ ├── f792e3ae-3a05-451f-b4c4-b5faa1412cdb.json │ └── fc5bb795-b395-4159-8c2b-b3b155671fe2.json │ ├── single_api │ ├── gmail │ │ ├── 14748db5-6d85-48e8-afe8-92cb87d5aa7c.json │ │ ├── 2af3ec73-7a09-4850-8664-5124e28c00f4.json │ │ ├── 4a72b7ed-2cd8-419d-9ab3-52ea40307e08.json │ │ ├── 4c199a1f-e1c1-435e-bd31-baa009f4edf8.json │ │ ├── 5565ec62-0596-4ee9-abf0-240a8db557d1.json │ │ ├── 64fc9208-ef98-475a-9949-e8e1db458067.json │ │ ├── 729b3221-4d1a-424e-b56b-7c0fe964da30.json │ │ ├── 76406d27-a440-48ce-800b-d8a90cd88033.json │ │ ├── 77e47d98-4f75-46b3-b6d6-90665031a531.json │ │ ├── acaeeae1-cd4e-446d-bc5c-f94879de16c5.json │ │ ├── b87bf58e-326c-4106-b49c-247434785261.json │ │ ├── f428be13-f44a-4524-b2d1-3483c0ea1fe3.json │ │ └── ffcae34c-6301-407a-a87b-008c8efb6377.json │ ├── google_calendar │ │ ├── 2dc4cf42-2def-488c-ade6-b92295bc5f93.json │ │ ├── 45ad2be2-f338-4fed-9b65-349ff8cb2645.json │ │ ├── 529cc575-4c61-4c8a-9ba2-dfec23e5baf9.json │ │ ├── 55f9c08f-ef5e-4f51-b4a4-b7711de6394d.json │ │ ├── 9e3072d3-9c76-4e93-926a-5c599b7689b8.json │ │ ├── a1234567-8b90-4cde-f012-3456789abcde.json │ │ ├── a24ca1c4-7b92-41a1-8ba8-2565678e3be2.json │ │ ├── ab0b454a-9674-478d-9d64-b142a65453cd.json │ │ ├── b509052d-4ff8-47ee-9d90-8f21e7602fe6.json │ │ ├── ccbb33cc-558e-4843-ae5d-09882208667f.json │ │ └── faa27d68-f14c-44d5-a723-5e473f8ee471.json │ ├── google_docs │ │ ├── 2edfcc5d-14dd-475f-920c-ca0077460999.json │ │ ├── 3ca6dd01-aee4-42d1-aed8-13b599223542.json │ │ ├── 57a29306-f576-43ea-af08-9c3ca6da3469.json │ │ ├── acf13dbb-0627-477c-a14a-783ae93e2daa.json │ │ ├── d96709c3-b648-44aa-8f92-7cbaba84bbd0.json │ │ ├── f05a7609-f896-4885-b4e6-7c29bbeab672.json │ │ └── f188a494-3dad-44cf-a43c-5e6bfdbdda0d.json │ └── os │ │ ├── 0d5f7fef-3e79-4f86-87a8-d3435de04d9e.json │ │ ├── 1485713f-f55e-4a3b-95b2-84c5df6d5a31.json │ │ ├── 1e205d38-7d7a-4087-b966-886237ada07b.json │ │ ├── 1e2f5ea6-c448-48e9-a2d1-be6f5639b4b2.json │ │ ├── 314cad7b-f4d1-43c4-a332-605cbc948592.json │ │ ├── 31c509db-8a88-4112-940c-5db78819dfec.json │ │ ├── 41b79d45-e7f8-4203-8d61-34260f93fbff.json │ │ ├── 4a260b67-0673-4b31-a861-c0c6d84127cd.json │ │ ├── 4e95c9af-ed12-473f-901a-3100d61c80d4.json │ │ ├── 51fa0739-1bc2-460a-ac64-b1dda60df47b.json │ │ ├── 7ea3ae8d-b7a4-4f41-8860-9c395d4539fd.json │ │ ├── 814353b2-504e-46b3-8f98-34043a21e406.json │ │ ├── 9d5d3188-7c3c-4cc6-8968-7fcb55daa5bd.json │ │ ├── 9e3072d3-9c77-4e93-926a-5c599b7689b8.json │ │ ├── b9503909-5457-425c-b6c8-b66d78bc8a13.json │ │ ├── c45544f8-947e-409e-82f8-c0459b8486c8.json │ │ ├── cc3553b2-93da-4a3d-a461-459a2cc030cb.json │ │ ├── ed52765c-494c-403a-95f3-dd862241fbed.json │ │ └── f44814b5-b978-41bd-9ffe-a57191588dab.json │ └── single_gui │ ├── docs │ ├── 0810415c-bde4-4443-9047-d5f70165a697.json │ ├── 0a0faba3-5580-44df-965d-f562a99b291c.json │ ├── 0b17a146-2934-46c7-8727-73ff6b6483e8.json │ ├── 0e47de2a-32e0-456c-a366-8c607ef7a9d2.json │ ├── 0e763496-b6bb-4508-a427-fad0b6c3e195.json │ ├── 3ef2b351-8a84-4ff2-8724-d86eae9b842e.json │ ├── 45d61a06-6545-4422-97b7-bc76cfa964c1.json │ ├── 663876c7-3471-43db-ba51-f410b13d9d7d.json │ ├── 66399b0d-8fda-4618-95c4-bfc6191617e9.json │ ├── 6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json │ ├── 6ada715d-3aae-4a32-a6a7-429b2e43fb93.json │ ├── 6f81754e-285d-4ce0-b59e-af7edb02d108.json │ ├── 72b810ef-4156-4d09-8f08-a0cf57e7cefe.json │ ├── 8472fece-c7dd-4241-8d65-9b3cd1a0b568.json │ └── 88fe4b2d-3040-4c70-9a70-546a47764b48.json │ ├── gimp │ ├── 06ca5602-62ca-47f6-ad4f-da151cde54cc.json │ ├── 2a729ded-3296-423d-aec4-7dd55ed5fbb3.json │ ├── 554785e9-4523-4e7a-b8e1-8016f565f56a.json │ ├── 72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json │ ├── 734d6579-c07d-47a8-9ae2-13339795476b.json │ ├── 77b8ab4d-994f-43ac-8930-8ca087d7c4b4.json │ ├── 7a4deb26-d57d-4ea9-9a73-630f66a7b568.json │ ├── d16c99dc-2a1e-46f2-b350-d97c86c85c15.json │ ├── e2dd0213-26db-4349-abe5-d5667bfd725c.json │ ├── f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json │ └── f723c744-e62c-4ae6-98d1-750d3cd7d79d.json │ ├── os │ ├── 01091426-c9f0-4f8d-b478-563592555c1b.json │ ├── 0139a355-8ee2-4cfd-8e11-73529f064cd9.json │ ├── 1d6765b3-b744-4aa4-8287-d14e6d3cddac.json │ ├── 2cac2f16-ed18-41d5-aa9e-361e0474429b.json │ ├── 366bac7a-1d46-4676-bf3d-2a5f1c54e2a6.json │ ├── 3d62bbd1-dcfa-4df8-8703-4bfd682d0051.json │ ├── 3ff19bd5-923f-4301-9c51-ae444aa8dcfe.json │ ├── 49e457e8-9ed9-416a-8b79-69be26794ed7.json │ ├── 56b12bf1-e3a3-4f2f-90a0-fe3850b2795f.json │ ├── 581aab2a-57d5-4384-841f-2f7655491859.json │ ├── 6701f987-3609-4c87-ab8a-8546d6bab134.json │ ├── 7cc0d54a-22cd-48c1-88d9-02930c2e3151.json │ ├── 8d931c00-e4e2-4533-8768-10418f64108c.json │ ├── a69dfc50-2478-483a-af4c-a02a1b186c10.json │ ├── c37e0210-3777-4953-a3ef-2d1476995545.json │ ├── c8285e0b-a713-4459-a634-edde3d41f6ed.json │ ├── d8d0768a-9b97-48aa-851c-16b6b952e3d6.json │ ├── dc603381-80f9-4592-9fca-06053bc42191.json │ └── edb19d3f-135f-4f93-b052-3e7789454411.json │ ├── sheets │ ├── 01b269ae-2111-4a07-81fd-3fcd711993b0.json │ ├── 0326d92d-d218-48a8-9ca1-981cd6d064c7.json │ ├── 035f41ba-6653-43ab-aa63-c86d449d62e5.json │ ├── 04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json │ ├── 0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json │ ├── 0acbd372-ca7a-4507-b949-70673120190f.json │ ├── 0bf05a7d-b28b-44d2-955a-50b41e24012a.json │ ├── 42e0a640-4f19-4b28-973d-729602b5a4a7.json │ ├── 4e6fcf72-daf3-439f-a232-c434ce416af6.json │ ├── 8909d1cb-5877-44c7-a908-9f1875302441.json │ ├── 9ed02102-6b28-4946-8339-c028166e9512.json │ ├── a16d1eb7-941b-4edd-8c08-344213f939ad.json │ ├── a9f325aa-8c05-4e4f-8341-9e4358565f4f.json │ ├── b6e9778c-11b3-455f-b720-655048787484.json │ └── f654bf9a-dea2-472d-a877-edeeb12d7462.json │ ├── slides │ ├── 08aced46-45a2-48d7-993b-ed3fb5b32302.json │ ├── 39be0d19-634d-4475-8768-09c130f5425d.json │ ├── 4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json │ ├── 7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8.json │ ├── 986fc832-6af2-417c-8845-9272b3a1528b.json │ ├── 9ec204e4-f0a3-42f8-8458-b772a6797cab.json │ ├── a097acff-6266-4291-9fbd-137af7ecd439.json │ ├── a434992a-89df-4577-925c-0c58b747f0f4.json │ ├── a669ef01-ded5-4099-9ea9-25e99b569840.json │ ├── ac1b39ff-ee4d-4483-abce-c117e98942f0.json │ ├── af23762e-2bfd-4a1d-aada-20fa8de9ce07.json │ ├── af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json │ ├── c82632a4-56b6-4db4-9dd1-3820ee3388e4.json │ ├── e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json │ └── ed43c15f-00cb-4054-9c95-62c880865d68.json │ └── vscode │ ├── 083bee5b-24c1-43f9-9644-d1d04926000b.json │ ├── 08d6db0f-8b06-41ae-bb20-26250e0a760f.json │ ├── 0d38e311-29b5-4925-a480-14a6a82836c8.json │ ├── 1c0ab6a9-2bde-49e7-9b9c-ab579ab3eab7.json │ ├── 1d3861fa-d605-48be-9337-b5188c351663.json │ ├── 3b3098d8-6626-4d8d-a291-2ad19b73d0f2.json │ ├── 3dcc6db1-7ca5-412d-b519-142724d41ef2.json │ ├── 5d020bcd-acb7-46ee-a21b-3a261cede5ce.json │ ├── 6034aa6c-2892-4ebd-bdfb-a7cfdddd0cbf.json │ ├── 760b4347-bf5d-4633-ad6d-8047e9271fac.json │ ├── 88f3f498-7cd0-4f3b-b7ae-40cb8e951aa7.json │ ├── 8a48df09-4141-4cf9-89c6-e193e0f42451.json │ ├── 8bbe5ae1-611f-474e-a334-ad56c875e4bc.json │ ├── 93b4281d-c6e7-4b39-8b3b-0132f1dd8615.json │ ├── a1f7f045-7938-4042-a445-fcd7d84aa2a4.json │ ├── b5b54c18-fddd-42f1-8299-00ab0a1397ae.json │ ├── c4e9503c-822e-4297-a3c5-d4e0e5ad596d.json │ ├── d2c5244a-d32b-4bc5-9cf8-616da006ee7a.json │ ├── d7a47fa4-5e5b-4028-8d4e-a031f96c67b0.json │ └── df81e919-58eb-4dea-87ba-4b6a6321a9c2.json ├── pyproject.toml ├── scripts ├── agent_server.py ├── docker_startup.sh ├── docker_startup_22.04.sh ├── format.sh ├── generate_latex_tables │ ├── idm_all_table.py │ ├── idm_multiple_edit_distance_table.py │ └── success_detection_all_table.py ├── json2jsonl.py ├── json_check.sh ├── model_server.py ├── plot │ ├── benchmark_humans.py │ ├── benchmark_stats.py │ ├── grounding_analysis.py │ ├── grounding_recaption.py │ └── grounding_stats.py ├── push_to_hub.py ├── recorded_trajectory2episode.py └── setup_api_keys.py └── tests ├── conftest.py ├── test_agents └── test_agent.py ├── test_desktop_env ├── test_interpreter.py └── test_recorder.py └── test_evaluators ├── test_filesystem.py ├── test_gcalendar.py ├── test_gdocs.py ├── test_gdrive.py ├── test_gforms.py ├── test_gmail.py ├── test_gsheets.py ├── test_gslides.py ├── test_joint_evaluation.py ├── test_process.py ├── test_qa.py ├── test_telegram.py └── test_vscode.py /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/.flake8 -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/.gitignore -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/.isort.cfg -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/README.md -------------------------------------------------------------------------------- /agent_studio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/__init__.py -------------------------------------------------------------------------------- /agent_studio/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/agent/__init__.py -------------------------------------------------------------------------------- /agent_studio/agent/base_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/agent/base_agent.py -------------------------------------------------------------------------------- /agent_studio/agent/direct_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/agent/direct_agent.py -------------------------------------------------------------------------------- /agent_studio/agent/human_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/agent/human_agent.py -------------------------------------------------------------------------------- /agent_studio/apps/annotate_ground_ui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/apps/annotate_ground_ui.py -------------------------------------------------------------------------------- /agent_studio/apps/online_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/apps/online_benchmark.py -------------------------------------------------------------------------------- /agent_studio/apps/trajectory_editor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/apps/trajectory_editor.py -------------------------------------------------------------------------------- /agent_studio/apps/trajectory_recorder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/apps/trajectory_recorder.py -------------------------------------------------------------------------------- /agent_studio/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/config/__init__.py -------------------------------------------------------------------------------- /agent_studio/config/api_key_template.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/config/api_key_template.json -------------------------------------------------------------------------------- /agent_studio/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/config/config.py -------------------------------------------------------------------------------- /agent_studio/envs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/__init__.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/email/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/email/email_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/email/email_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/evaluator_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/evaluator_helper.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/gimp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/gimp/gimp_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/gimp/gimp_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/calendar_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/calendar_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/docs_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/docs_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/drive_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/drive_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/forms_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/forms_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/gmail_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/gmail_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/gservice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/gservice.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/sheets_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/sheets_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/google/slides_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/google/slides_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/human_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/human_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/office/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/office/docs_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/office/docs_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/office/sheets_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/office/sheets_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/office/slides_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/office/slides_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/office/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/office/utils.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/os/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/os/filesystem_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/os/filesystem_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/os/process_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/os/process_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/os/system_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/os/system_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/qa_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/qa_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/telegram_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/telegram_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/vscode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/vscode/vscode_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/vscode/vscode_connector.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/evaluators/vscode/vscode_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/evaluators/vscode/vscode_evaluator.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/recorder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/recorder/base_recorder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/recorder/base_recorder.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/recorder/screen_recorder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/recorder/screen_recorder.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/tools/email.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/tools/email.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/tools/keyboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/tools/keyboard.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/tools/mouse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/tools/mouse.py -------------------------------------------------------------------------------- /agent_studio/envs/desktop_env/vnc_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/envs/desktop_env/vnc_client.py -------------------------------------------------------------------------------- /agent_studio/llm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/__init__.py -------------------------------------------------------------------------------- /agent_studio/llm/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/base_model.py -------------------------------------------------------------------------------- /agent_studio/llm/claude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/claude.py -------------------------------------------------------------------------------- /agent_studio/llm/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/dummy.py -------------------------------------------------------------------------------- /agent_studio/llm/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/gemini.py -------------------------------------------------------------------------------- /agent_studio/llm/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/huggingface.py -------------------------------------------------------------------------------- /agent_studio/llm/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/openai.py -------------------------------------------------------------------------------- /agent_studio/llm/remote_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/remote_model.py -------------------------------------------------------------------------------- /agent_studio/llm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/utils.py -------------------------------------------------------------------------------- /agent_studio/llm/vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/llm/vertexai.py -------------------------------------------------------------------------------- /agent_studio/recorder/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/recorder/README.md -------------------------------------------------------------------------------- /agent_studio/recorder/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/recorder/config.py -------------------------------------------------------------------------------- /agent_studio/recorder/player.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/recorder/player.py -------------------------------------------------------------------------------- /agent_studio/recorder/recorders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/recorder/recorders/keyboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/recorder/recorders/keyboard.py -------------------------------------------------------------------------------- /agent_studio/recorder/recorders/mouse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/recorder/recorders/mouse.py -------------------------------------------------------------------------------- /agent_studio/recorder/recorders/video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/recorder/recorders/video.py -------------------------------------------------------------------------------- /agent_studio/recorder/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/recorder/utils.py -------------------------------------------------------------------------------- /agent_studio/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_studio/utils/communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/communication.py -------------------------------------------------------------------------------- /agent_studio/utils/gui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/gui.py -------------------------------------------------------------------------------- /agent_studio/utils/human_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/human_utils.py -------------------------------------------------------------------------------- /agent_studio/utils/json_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/json_utils.py -------------------------------------------------------------------------------- /agent_studio/utils/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/prompt.py -------------------------------------------------------------------------------- /agent_studio/utils/runtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/runtime.py -------------------------------------------------------------------------------- /agent_studio/utils/singleton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/singleton.py -------------------------------------------------------------------------------- /agent_studio/utils/task_status.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/task_status.py -------------------------------------------------------------------------------- /agent_studio/utils/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/agent_studio/utils/types.py -------------------------------------------------------------------------------- /dockerfiles/Dockerfile.ubuntu.amd64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/dockerfiles/Dockerfile.ubuntu.amd64 -------------------------------------------------------------------------------- /dockerfiles/Dockerfile.ubuntu22.04.amd64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/dockerfiles/Dockerfile.ubuntu22.04.amd64 -------------------------------------------------------------------------------- /docs/annotate_ground_ui.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/annotate_ground_ui.md -------------------------------------------------------------------------------- /docs/assets/agent_space.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/assets/agent_space.jpg -------------------------------------------------------------------------------- /docs/assets/annotate_gui_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/assets/annotate_gui_1.jpg -------------------------------------------------------------------------------- /docs/assets/annotate_gui_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/assets/annotate_gui_2.jpg -------------------------------------------------------------------------------- /docs/assets/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/assets/comparison.png -------------------------------------------------------------------------------- /docs/assets/onlinebenchmark_gui_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/assets/onlinebenchmark_gui_1.png -------------------------------------------------------------------------------- /docs/assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/assets/overview.png -------------------------------------------------------------------------------- /docs/assets/trajectory_editor_gui_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/docs/assets/trajectory_editor_gui_1.jpg -------------------------------------------------------------------------------- /eval_agent_desiderata/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/README.md -------------------------------------------------------------------------------- /eval_agent_desiderata/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/common.py -------------------------------------------------------------------------------- /eval_agent_desiderata/datasets/gui_grounding/metadata_1k.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/datasets/gui_grounding/metadata_1k.jsonl -------------------------------------------------------------------------------- /eval_agent_desiderata/datasets/trajectory_lite/metadata_idm.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/datasets/trajectory_lite/metadata_idm.jsonl -------------------------------------------------------------------------------- /eval_agent_desiderata/datasets/trajectory_lite/metadata_idmn2n.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/datasets/trajectory_lite/metadata_idmn2n.jsonl -------------------------------------------------------------------------------- /eval_agent_desiderata/datasets/trajectory_lite/metadata_success_detection.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/datasets/trajectory_lite/metadata_success_detection.jsonl -------------------------------------------------------------------------------- /eval_agent_desiderata/eval_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/eval_base.py -------------------------------------------------------------------------------- /eval_agent_desiderata/eval_gui_grounding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/eval_gui_grounding.py -------------------------------------------------------------------------------- /eval_agent_desiderata/eval_idm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/eval_idm.py -------------------------------------------------------------------------------- /eval_agent_desiderata/eval_idmn2n.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/eval_idmn2n.py -------------------------------------------------------------------------------- /eval_agent_desiderata/eval_success_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/eval_success_detection.py -------------------------------------------------------------------------------- /eval_agent_desiderata/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/main.py -------------------------------------------------------------------------------- /eval_agent_desiderata/make_report.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/make_report.py -------------------------------------------------------------------------------- /eval_agent_desiderata/online_benchmark_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/online_benchmark_analysis.py -------------------------------------------------------------------------------- /eval_agent_desiderata/processing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/processing/README.md -------------------------------------------------------------------------------- /eval_agent_desiderata/processing/process_grounding_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/processing/process_grounding_data.py -------------------------------------------------------------------------------- /eval_agent_desiderata/re_caption_gui_grounding_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/re_caption_gui_grounding_data.py -------------------------------------------------------------------------------- /eval_agent_desiderata/split_subset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_agent_desiderata/split_subset.py -------------------------------------------------------------------------------- /eval_online_benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/README.md -------------------------------------------------------------------------------- /eval_online_benchmarks/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/0283d41d-9f4c-4b95-aea6-aa1e9918be91.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/0283d41d-9f4c-4b95-aea6-aa1e9918be91.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/0321d49b-a7f9-458a-91cf-06bc763d23c1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/0321d49b-a7f9-458a-91cf-06bc763d23c1.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/07525ee8-24d1-43ec-a64f-1faff0b06f3b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/07525ee8-24d1-43ec-a64f-1faff0b06f3b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/0889e861-1544-47d0-950b-405973744f98.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/0889e861-1544-47d0-950b-405973744f98.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/093e355b-4fed-4734-89d9-4b419767f6ff.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/093e355b-4fed-4734-89d9-4b419767f6ff.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/0b29d03b-00e9-42d9-b0e1-162b839b0520.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/0b29d03b-00e9-42d9-b0e1-162b839b0520.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/17f39e5d-1660-4b6e-a45a-e75b154cc8c6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/17f39e5d-1660-4b6e-a45a-e75b154cc8c6.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/1bc75904-895c-47ee-989c-4cfa20dd9d02.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/1bc75904-895c-47ee-989c-4cfa20dd9d02.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/214d9d4b-4264-4614-bcd5-17f9d6f2c8c1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/214d9d4b-4264-4614-bcd5-17f9d6f2c8c1.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/238a3041-6bbe-460f-8782-38a25a46ec1e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/238a3041-6bbe-460f-8782-38a25a46ec1e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/24857972-b024-454b-b3a1-302cdd701a19.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/24857972-b024-454b-b3a1-302cdd701a19.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/2668faf4-8dd6-412e-9156-02559612b36c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/2668faf4-8dd6-412e-9156-02559612b36c.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/26ce1a5d-1d1e-444c-af84-299471c6252e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/26ce1a5d-1d1e-444c-af84-299471c6252e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/29025c50-31b3-4efe-89ff-1e09d3ebfce8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/29025c50-31b3-4efe-89ff-1e09d3ebfce8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/33d35d9c-cd56-4f26-9b73-459063c4e590.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/33d35d9c-cd56-4f26-9b73-459063c4e590.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/3605a63c-20d6-4fec-9581-97570362ec6e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/3605a63c-20d6-4fec-9581-97570362ec6e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/392caf3b-5c18-41ec-bdcd-6687c4cec2f5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/392caf3b-5c18-41ec-bdcd-6687c4cec2f5.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/39e4c8f1-969a-43c2-8179-99c31f4af7eb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/39e4c8f1-969a-43c2-8179-99c31f4af7eb.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/3b7e322f-2ff3-4975-91b0-b94970288252.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/3b7e322f-2ff3-4975-91b0-b94970288252.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/3bf574de-1315-45d3-8870-9f3a61b4f4de.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/3bf574de-1315-45d3-8870-9f3a61b4f4de.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/3d7cf1db-5533-468a-aafe-e6441f427c4a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/3d7cf1db-5533-468a-aafe-e6441f427c4a.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/407f880e-c251-4d23-a37f-b81786f17deb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/407f880e-c251-4d23-a37f-b81786f17deb.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/41127d23-d7e9-4ccc-92d3-4bb23508666e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/41127d23-d7e9-4ccc-92d3-4bb23508666e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/4c5225e0-cf75-48ca-84f2-5d1245b8d846.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/4c5225e0-cf75-48ca-84f2-5d1245b8d846.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/4f632a32-51e0-42c4-8439-a378929fd90b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/4f632a32-51e0-42c4-8439-a378929fd90b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/57ed25e0-6e34-4a2a-b9f6-1de2974d5494.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/57ed25e0-6e34-4a2a-b9f6-1de2974d5494.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/66e17f10-a5fa-41ad-b909-16f9e7702c54.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/66e17f10-a5fa-41ad-b909-16f9e7702c54.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/670af7de-7ca9-463d-9260-12beb1c095d6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/670af7de-7ca9-463d-9260-12beb1c095d6.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/6ab04b64-da85-44fe-9468-efe288094898.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/6ab04b64-da85-44fe-9468-efe288094898.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/6bda9bed-b891-4ba9-87de-0d3f51f61844.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/6bda9bed-b891-4ba9-87de-0d3f51f61844.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/6d6d5187-550e-4ccc-9fa3-0fd70b10f006.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/6d6d5187-550e-4ccc-9fa3-0fd70b10f006.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/6da58b9f-9a07-4d23-bdcd-9a5f56bfe246.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/6da58b9f-9a07-4d23-bdcd-9a5f56bfe246.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/73534944-d0d7-4f94-8e6c-08317a33023b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/73534944-d0d7-4f94-8e6c-08317a33023b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/7481a5c2-11da-497d-b16e-c7cc716314be.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/7481a5c2-11da-497d-b16e-c7cc716314be.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/7550b963-1eb3-41af-8a35-ba8a71d8fe51.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/7550b963-1eb3-41af-8a35-ba8a71d8fe51.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/7e00720e-2287-4890-b5f3-9583629c057d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/7e00720e-2287-4890-b5f3-9583629c057d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/80cb8446-eb3a-4e69-b3f3-0f2aee3be151.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/80cb8446-eb3a-4e69-b3f3-0f2aee3be151.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/80ebd0f7-7de3-4985-9beb-59785fd7b944.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/80ebd0f7-7de3-4985-9beb-59785fd7b944.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/84723c8b-7f82-4b5c-9627-9a5dda60f24f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/84723c8b-7f82-4b5c-9627-9a5dda60f24f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/854007a1-97bd-470f-9028-bfdc5dd22f20.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/854007a1-97bd-470f-9028-bfdc5dd22f20.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/88cd3105-205c-4ac3-ba10-003247fbc149.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/88cd3105-205c-4ac3-ba10-003247fbc149.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/95a47bdc-6ac6-4b52-ba3d-bedff89c373c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/95a47bdc-6ac6-4b52-ba3d-bedff89c373c.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/9868053a-2343-4857-a62f-7bc80532c33e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/9868053a-2343-4857-a62f-7bc80532c33e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/9895f2a0-61b0-4af8-95cb-f8e7a64ce89d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/9895f2a0-61b0-4af8-95cb-f8e7a64ce89d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/9adcaecf-98ce-42ce-bd90-eb3a5d9ff97f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/9adcaecf-98ce-42ce-bd90-eb3a5d9ff97f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/a4991e3c-ec1c-407c-862c-9b07dec35de5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/a4991e3c-ec1c-407c-862c-9b07dec35de5.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/a5cc15a9-38d4-4e2f-9fcf-8baf3d1ff9d8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/a5cc15a9-38d4-4e2f-9fcf-8baf3d1ff9d8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/ad2ef7c0-172f-4341-945b-773ff8ce35c5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/ad2ef7c0-172f-4341-945b-773ff8ce35c5.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/bc0872e2-ca66-487e-a3fa-3cf20d630c42.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/bc0872e2-ca66-487e-a3fa-3cf20d630c42.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/bfb90564-daf4-4c1e-90ca-7028b93dd7b2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/bfb90564-daf4-4c1e-90ca-7028b93dd7b2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/c1b45900-3488-458d-927e-1789ed150903.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/c1b45900-3488-458d-927e-1789ed150903.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/cda93fda-27b8-47e2-be69-812c7704af8e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/cda93fda-27b8-47e2-be69-812c7704af8e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/db4492d5-684b-4a1c-a923-36a289a7abe0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/db4492d5-684b-4a1c-a923-36a289a7abe0.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/ddd1a4f5-4719-409b-9c2f-ec10d10a6589.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/ddd1a4f5-4719-409b-9c2f-ec10d10a6589.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/e21c631d-ee2c-4f20-8d11-dfc92d5a2167.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/e21c631d-ee2c-4f20-8d11-dfc92d5a2167.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/f395ef38-5405-4926-a5b3-7dac253195ed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/f395ef38-5405-4926-a5b3-7dac253195ed.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/f5b74786-261a-4286-b1e0-3e951a1a9281.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/f5b74786-261a-4286-b1e0-3e951a1a9281.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/f738a7da-6a5a-4f0b-9868-4d24c396f97a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/f738a7da-6a5a-4f0b-9868-4d24c396f97a.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/f792e3ae-3a05-451f-b4c4-b5faa1412cdb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/f792e3ae-3a05-451f-b4c4-b5faa1412cdb.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/compositional/fc5bb795-b395-4159-8c2b-b3b155671fe2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/compositional/fc5bb795-b395-4159-8c2b-b3b155671fe2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/14748db5-6d85-48e8-afe8-92cb87d5aa7c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/14748db5-6d85-48e8-afe8-92cb87d5aa7c.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/2af3ec73-7a09-4850-8664-5124e28c00f4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/2af3ec73-7a09-4850-8664-5124e28c00f4.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/4a72b7ed-2cd8-419d-9ab3-52ea40307e08.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/4a72b7ed-2cd8-419d-9ab3-52ea40307e08.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/4c199a1f-e1c1-435e-bd31-baa009f4edf8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/4c199a1f-e1c1-435e-bd31-baa009f4edf8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/5565ec62-0596-4ee9-abf0-240a8db557d1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/5565ec62-0596-4ee9-abf0-240a8db557d1.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/64fc9208-ef98-475a-9949-e8e1db458067.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/64fc9208-ef98-475a-9949-e8e1db458067.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/729b3221-4d1a-424e-b56b-7c0fe964da30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/729b3221-4d1a-424e-b56b-7c0fe964da30.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/76406d27-a440-48ce-800b-d8a90cd88033.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/76406d27-a440-48ce-800b-d8a90cd88033.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/77e47d98-4f75-46b3-b6d6-90665031a531.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/77e47d98-4f75-46b3-b6d6-90665031a531.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/acaeeae1-cd4e-446d-bc5c-f94879de16c5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/acaeeae1-cd4e-446d-bc5c-f94879de16c5.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/b87bf58e-326c-4106-b49c-247434785261.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/b87bf58e-326c-4106-b49c-247434785261.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/f428be13-f44a-4524-b2d1-3483c0ea1fe3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/f428be13-f44a-4524-b2d1-3483c0ea1fe3.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/gmail/ffcae34c-6301-407a-a87b-008c8efb6377.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/gmail/ffcae34c-6301-407a-a87b-008c8efb6377.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/2dc4cf42-2def-488c-ade6-b92295bc5f93.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/2dc4cf42-2def-488c-ade6-b92295bc5f93.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/45ad2be2-f338-4fed-9b65-349ff8cb2645.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/45ad2be2-f338-4fed-9b65-349ff8cb2645.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/529cc575-4c61-4c8a-9ba2-dfec23e5baf9.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/529cc575-4c61-4c8a-9ba2-dfec23e5baf9.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/55f9c08f-ef5e-4f51-b4a4-b7711de6394d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/55f9c08f-ef5e-4f51-b4a4-b7711de6394d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/9e3072d3-9c76-4e93-926a-5c599b7689b8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/9e3072d3-9c76-4e93-926a-5c599b7689b8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/a1234567-8b90-4cde-f012-3456789abcde.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/a1234567-8b90-4cde-f012-3456789abcde.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/a24ca1c4-7b92-41a1-8ba8-2565678e3be2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/a24ca1c4-7b92-41a1-8ba8-2565678e3be2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/ab0b454a-9674-478d-9d64-b142a65453cd.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/ab0b454a-9674-478d-9d64-b142a65453cd.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/b509052d-4ff8-47ee-9d90-8f21e7602fe6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/b509052d-4ff8-47ee-9d90-8f21e7602fe6.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/ccbb33cc-558e-4843-ae5d-09882208667f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/ccbb33cc-558e-4843-ae5d-09882208667f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_calendar/faa27d68-f14c-44d5-a723-5e473f8ee471.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_calendar/faa27d68-f14c-44d5-a723-5e473f8ee471.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_docs/2edfcc5d-14dd-475f-920c-ca0077460999.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_docs/2edfcc5d-14dd-475f-920c-ca0077460999.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_docs/3ca6dd01-aee4-42d1-aed8-13b599223542.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_docs/3ca6dd01-aee4-42d1-aed8-13b599223542.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_docs/57a29306-f576-43ea-af08-9c3ca6da3469.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_docs/57a29306-f576-43ea-af08-9c3ca6da3469.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_docs/acf13dbb-0627-477c-a14a-783ae93e2daa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_docs/acf13dbb-0627-477c-a14a-783ae93e2daa.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_docs/d96709c3-b648-44aa-8f92-7cbaba84bbd0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_docs/d96709c3-b648-44aa-8f92-7cbaba84bbd0.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_docs/f05a7609-f896-4885-b4e6-7c29bbeab672.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_docs/f05a7609-f896-4885-b4e6-7c29bbeab672.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/google_docs/f188a494-3dad-44cf-a43c-5e6bfdbdda0d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/google_docs/f188a494-3dad-44cf-a43c-5e6bfdbdda0d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/0d5f7fef-3e79-4f86-87a8-d3435de04d9e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/0d5f7fef-3e79-4f86-87a8-d3435de04d9e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/1485713f-f55e-4a3b-95b2-84c5df6d5a31.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/1485713f-f55e-4a3b-95b2-84c5df6d5a31.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/1e205d38-7d7a-4087-b966-886237ada07b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/1e205d38-7d7a-4087-b966-886237ada07b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/1e2f5ea6-c448-48e9-a2d1-be6f5639b4b2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/1e2f5ea6-c448-48e9-a2d1-be6f5639b4b2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/314cad7b-f4d1-43c4-a332-605cbc948592.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/314cad7b-f4d1-43c4-a332-605cbc948592.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/31c509db-8a88-4112-940c-5db78819dfec.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/31c509db-8a88-4112-940c-5db78819dfec.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/41b79d45-e7f8-4203-8d61-34260f93fbff.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/41b79d45-e7f8-4203-8d61-34260f93fbff.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/4a260b67-0673-4b31-a861-c0c6d84127cd.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/4a260b67-0673-4b31-a861-c0c6d84127cd.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/4e95c9af-ed12-473f-901a-3100d61c80d4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/4e95c9af-ed12-473f-901a-3100d61c80d4.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/51fa0739-1bc2-460a-ac64-b1dda60df47b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/51fa0739-1bc2-460a-ac64-b1dda60df47b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/7ea3ae8d-b7a4-4f41-8860-9c395d4539fd.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/7ea3ae8d-b7a4-4f41-8860-9c395d4539fd.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/814353b2-504e-46b3-8f98-34043a21e406.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/814353b2-504e-46b3-8f98-34043a21e406.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/9d5d3188-7c3c-4cc6-8968-7fcb55daa5bd.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/9d5d3188-7c3c-4cc6-8968-7fcb55daa5bd.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/9e3072d3-9c77-4e93-926a-5c599b7689b8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/9e3072d3-9c77-4e93-926a-5c599b7689b8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/b9503909-5457-425c-b6c8-b66d78bc8a13.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/b9503909-5457-425c-b6c8-b66d78bc8a13.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/c45544f8-947e-409e-82f8-c0459b8486c8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/c45544f8-947e-409e-82f8-c0459b8486c8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/cc3553b2-93da-4a3d-a461-459a2cc030cb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/cc3553b2-93da-4a3d-a461-459a2cc030cb.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/ed52765c-494c-403a-95f3-dd862241fbed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/ed52765c-494c-403a-95f3-dd862241fbed.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_api/os/f44814b5-b978-41bd-9ffe-a57191588dab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_api/os/f44814b5-b978-41bd-9ffe-a57191588dab.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/0810415c-bde4-4443-9047-d5f70165a697.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/0810415c-bde4-4443-9047-d5f70165a697.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/0a0faba3-5580-44df-965d-f562a99b291c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/0a0faba3-5580-44df-965d-f562a99b291c.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/0b17a146-2934-46c7-8727-73ff6b6483e8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/0b17a146-2934-46c7-8727-73ff6b6483e8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/0e47de2a-32e0-456c-a366-8c607ef7a9d2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/0e47de2a-32e0-456c-a366-8c607ef7a9d2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/0e763496-b6bb-4508-a427-fad0b6c3e195.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/0e763496-b6bb-4508-a427-fad0b6c3e195.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/3ef2b351-8a84-4ff2-8724-d86eae9b842e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/3ef2b351-8a84-4ff2-8724-d86eae9b842e.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/45d61a06-6545-4422-97b7-bc76cfa964c1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/45d61a06-6545-4422-97b7-bc76cfa964c1.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/663876c7-3471-43db-ba51-f410b13d9d7d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/663876c7-3471-43db-ba51-f410b13d9d7d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/66399b0d-8fda-4618-95c4-bfc6191617e9.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/66399b0d-8fda-4618-95c4-bfc6191617e9.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/6f81754e-285d-4ce0-b59e-af7edb02d108.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/6f81754e-285d-4ce0-b59e-af7edb02d108.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/72b810ef-4156-4d09-8f08-a0cf57e7cefe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/72b810ef-4156-4d09-8f08-a0cf57e7cefe.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/docs/88fe4b2d-3040-4c70-9a70-546a47764b48.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/docs/88fe4b2d-3040-4c70-9a70-546a47764b48.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/06ca5602-62ca-47f6-ad4f-da151cde54cc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/06ca5602-62ca-47f6-ad4f-da151cde54cc.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/2a729ded-3296-423d-aec4-7dd55ed5fbb3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/2a729ded-3296-423d-aec4-7dd55ed5fbb3.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/554785e9-4523-4e7a-b8e1-8016f565f56a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/554785e9-4523-4e7a-b8e1-8016f565f56a.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/72f83cdc-bf76-4531-9a1b-eb893a13f8aa.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/734d6579-c07d-47a8-9ae2-13339795476b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/734d6579-c07d-47a8-9ae2-13339795476b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/77b8ab4d-994f-43ac-8930-8ca087d7c4b4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/77b8ab4d-994f-43ac-8930-8ca087d7c4b4.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/7a4deb26-d57d-4ea9-9a73-630f66a7b568.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/7a4deb26-d57d-4ea9-9a73-630f66a7b568.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/e2dd0213-26db-4349-abe5-d5667bfd725c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/e2dd0213-26db-4349-abe5-d5667bfd725c.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/gimp/f723c744-e62c-4ae6-98d1-750d3cd7d79d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/gimp/f723c744-e62c-4ae6-98d1-750d3cd7d79d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/01091426-c9f0-4f8d-b478-563592555c1b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/01091426-c9f0-4f8d-b478-563592555c1b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/0139a355-8ee2-4cfd-8e11-73529f064cd9.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/0139a355-8ee2-4cfd-8e11-73529f064cd9.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/1d6765b3-b744-4aa4-8287-d14e6d3cddac.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/1d6765b3-b744-4aa4-8287-d14e6d3cddac.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/2cac2f16-ed18-41d5-aa9e-361e0474429b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/2cac2f16-ed18-41d5-aa9e-361e0474429b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/366bac7a-1d46-4676-bf3d-2a5f1c54e2a6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/366bac7a-1d46-4676-bf3d-2a5f1c54e2a6.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/3d62bbd1-dcfa-4df8-8703-4bfd682d0051.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/3d62bbd1-dcfa-4df8-8703-4bfd682d0051.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/3ff19bd5-923f-4301-9c51-ae444aa8dcfe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/3ff19bd5-923f-4301-9c51-ae444aa8dcfe.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/49e457e8-9ed9-416a-8b79-69be26794ed7.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/49e457e8-9ed9-416a-8b79-69be26794ed7.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/56b12bf1-e3a3-4f2f-90a0-fe3850b2795f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/56b12bf1-e3a3-4f2f-90a0-fe3850b2795f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/581aab2a-57d5-4384-841f-2f7655491859.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/581aab2a-57d5-4384-841f-2f7655491859.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/6701f987-3609-4c87-ab8a-8546d6bab134.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/6701f987-3609-4c87-ab8a-8546d6bab134.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/7cc0d54a-22cd-48c1-88d9-02930c2e3151.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/7cc0d54a-22cd-48c1-88d9-02930c2e3151.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/8d931c00-e4e2-4533-8768-10418f64108c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/8d931c00-e4e2-4533-8768-10418f64108c.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/a69dfc50-2478-483a-af4c-a02a1b186c10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/a69dfc50-2478-483a-af4c-a02a1b186c10.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/c37e0210-3777-4953-a3ef-2d1476995545.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/c37e0210-3777-4953-a3ef-2d1476995545.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/c8285e0b-a713-4459-a634-edde3d41f6ed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/c8285e0b-a713-4459-a634-edde3d41f6ed.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/d8d0768a-9b97-48aa-851c-16b6b952e3d6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/d8d0768a-9b97-48aa-851c-16b6b952e3d6.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/dc603381-80f9-4592-9fca-06053bc42191.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/dc603381-80f9-4592-9fca-06053bc42191.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/os/edb19d3f-135f-4f93-b052-3e7789454411.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/os/edb19d3f-135f-4f93-b052-3e7789454411.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/01b269ae-2111-4a07-81fd-3fcd711993b0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/01b269ae-2111-4a07-81fd-3fcd711993b0.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/0326d92d-d218-48a8-9ca1-981cd6d064c7.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/0326d92d-d218-48a8-9ca1-981cd6d064c7.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/035f41ba-6653-43ab-aa63-c86d449d62e5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/035f41ba-6653-43ab-aa63-c86d449d62e5.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/0a2e43bf-b26c-4631-a966-af9dfa12c9e5.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/0acbd372-ca7a-4507-b949-70673120190f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/0acbd372-ca7a-4507-b949-70673120190f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/0bf05a7d-b28b-44d2-955a-50b41e24012a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/0bf05a7d-b28b-44d2-955a-50b41e24012a.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/42e0a640-4f19-4b28-973d-729602b5a4a7.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/42e0a640-4f19-4b28-973d-729602b5a4a7.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/4e6fcf72-daf3-439f-a232-c434ce416af6.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/4e6fcf72-daf3-439f-a232-c434ce416af6.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/8909d1cb-5877-44c7-a908-9f1875302441.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/8909d1cb-5877-44c7-a908-9f1875302441.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/9ed02102-6b28-4946-8339-c028166e9512.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/9ed02102-6b28-4946-8339-c028166e9512.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/a16d1eb7-941b-4edd-8c08-344213f939ad.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/a16d1eb7-941b-4edd-8c08-344213f939ad.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/b6e9778c-11b3-455f-b720-655048787484.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/b6e9778c-11b3-455f-b720-655048787484.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/sheets/f654bf9a-dea2-472d-a877-edeeb12d7462.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/sheets/f654bf9a-dea2-472d-a877-edeeb12d7462.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/08aced46-45a2-48d7-993b-ed3fb5b32302.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/08aced46-45a2-48d7-993b-ed3fb5b32302.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/39be0d19-634d-4475-8768-09c130f5425d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/39be0d19-634d-4475-8768-09c130f5425d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/986fc832-6af2-417c-8845-9272b3a1528b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/986fc832-6af2-417c-8845-9272b3a1528b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/9ec204e4-f0a3-42f8-8458-b772a6797cab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/9ec204e4-f0a3-42f8-8458-b772a6797cab.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/a097acff-6266-4291-9fbd-137af7ecd439.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/a097acff-6266-4291-9fbd-137af7ecd439.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/a434992a-89df-4577-925c-0c58b747f0f4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/a434992a-89df-4577-925c-0c58b747f0f4.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/a669ef01-ded5-4099-9ea9-25e99b569840.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/a669ef01-ded5-4099-9ea9-25e99b569840.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/ac1b39ff-ee4d-4483-abce-c117e98942f0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/ac1b39ff-ee4d-4483-abce-c117e98942f0.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/af23762e-2bfd-4a1d-aada-20fa8de9ce07.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/c82632a4-56b6-4db4-9dd1-3820ee3388e4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/c82632a4-56b6-4db4-9dd1-3820ee3388e4.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/slides/ed43c15f-00cb-4054-9c95-62c880865d68.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/slides/ed43c15f-00cb-4054-9c95-62c880865d68.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/083bee5b-24c1-43f9-9644-d1d04926000b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/083bee5b-24c1-43f9-9644-d1d04926000b.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/08d6db0f-8b06-41ae-bb20-26250e0a760f.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/08d6db0f-8b06-41ae-bb20-26250e0a760f.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/0d38e311-29b5-4925-a480-14a6a82836c8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/0d38e311-29b5-4925-a480-14a6a82836c8.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/1c0ab6a9-2bde-49e7-9b9c-ab579ab3eab7.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/1c0ab6a9-2bde-49e7-9b9c-ab579ab3eab7.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/1d3861fa-d605-48be-9337-b5188c351663.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/1d3861fa-d605-48be-9337-b5188c351663.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/3b3098d8-6626-4d8d-a291-2ad19b73d0f2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/3b3098d8-6626-4d8d-a291-2ad19b73d0f2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/3dcc6db1-7ca5-412d-b519-142724d41ef2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/3dcc6db1-7ca5-412d-b519-142724d41ef2.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/5d020bcd-acb7-46ee-a21b-3a261cede5ce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/5d020bcd-acb7-46ee-a21b-3a261cede5ce.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/6034aa6c-2892-4ebd-bdfb-a7cfdddd0cbf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/6034aa6c-2892-4ebd-bdfb-a7cfdddd0cbf.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/760b4347-bf5d-4633-ad6d-8047e9271fac.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/760b4347-bf5d-4633-ad6d-8047e9271fac.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/88f3f498-7cd0-4f3b-b7ae-40cb8e951aa7.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/88f3f498-7cd0-4f3b-b7ae-40cb8e951aa7.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/8a48df09-4141-4cf9-89c6-e193e0f42451.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/8a48df09-4141-4cf9-89c6-e193e0f42451.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/8bbe5ae1-611f-474e-a334-ad56c875e4bc.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/8bbe5ae1-611f-474e-a334-ad56c875e4bc.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/93b4281d-c6e7-4b39-8b3b-0132f1dd8615.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/93b4281d-c6e7-4b39-8b3b-0132f1dd8615.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/a1f7f045-7938-4042-a445-fcd7d84aa2a4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/a1f7f045-7938-4042-a445-fcd7d84aa2a4.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/b5b54c18-fddd-42f1-8299-00ab0a1397ae.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/b5b54c18-fddd-42f1-8299-00ab0a1397ae.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/c4e9503c-822e-4297-a3c5-d4e0e5ad596d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/c4e9503c-822e-4297-a3c5-d4e0e5ad596d.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/d2c5244a-d32b-4bc5-9cf8-616da006ee7a.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/d2c5244a-d32b-4bc5-9cf8-616da006ee7a.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/d7a47fa4-5e5b-4028-8d4e-a031f96c67b0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/d7a47fa4-5e5b-4028-8d4e-a031f96c67b0.json -------------------------------------------------------------------------------- /eval_online_benchmarks/tasks/single_gui/vscode/df81e919-58eb-4dea-87ba-4b6a6321a9c2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/eval_online_benchmarks/tasks/single_gui/vscode/df81e919-58eb-4dea-87ba-4b6a6321a9c2.json -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/agent_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/agent_server.py -------------------------------------------------------------------------------- /scripts/docker_startup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/docker_startup.sh -------------------------------------------------------------------------------- /scripts/docker_startup_22.04.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/docker_startup_22.04.sh -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/format.sh -------------------------------------------------------------------------------- /scripts/generate_latex_tables/idm_all_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/generate_latex_tables/idm_all_table.py -------------------------------------------------------------------------------- /scripts/generate_latex_tables/idm_multiple_edit_distance_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/generate_latex_tables/idm_multiple_edit_distance_table.py -------------------------------------------------------------------------------- /scripts/generate_latex_tables/success_detection_all_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/generate_latex_tables/success_detection_all_table.py -------------------------------------------------------------------------------- /scripts/json2jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/json2jsonl.py -------------------------------------------------------------------------------- /scripts/json_check.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/json_check.sh -------------------------------------------------------------------------------- /scripts/model_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/model_server.py -------------------------------------------------------------------------------- /scripts/plot/benchmark_humans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/plot/benchmark_humans.py -------------------------------------------------------------------------------- /scripts/plot/benchmark_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/plot/benchmark_stats.py -------------------------------------------------------------------------------- /scripts/plot/grounding_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/plot/grounding_analysis.py -------------------------------------------------------------------------------- /scripts/plot/grounding_recaption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/plot/grounding_recaption.py -------------------------------------------------------------------------------- /scripts/plot/grounding_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/plot/grounding_stats.py -------------------------------------------------------------------------------- /scripts/push_to_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/push_to_hub.py -------------------------------------------------------------------------------- /scripts/recorded_trajectory2episode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/recorded_trajectory2episode.py -------------------------------------------------------------------------------- /scripts/setup_api_keys.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/scripts/setup_api_keys.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/test_agents/test_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_agents/test_agent.py -------------------------------------------------------------------------------- /tests/test_desktop_env/test_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_desktop_env/test_interpreter.py -------------------------------------------------------------------------------- /tests/test_desktop_env/test_recorder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_desktop_env/test_recorder.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_filesystem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_filesystem.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_gcalendar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_gcalendar.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_gdocs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_gdocs.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_gdrive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_gdrive.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_gforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_gforms.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_gmail.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_gmail.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_gsheets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_gsheets.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_gslides.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_gslides.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_joint_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_joint_evaluation.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_process.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_qa.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_telegram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_telegram.py -------------------------------------------------------------------------------- /tests/test_evaluators/test_vscode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ltzheng/agent-studio/HEAD/tests/test_evaluators/test_vscode.py --------------------------------------------------------------------------------