├── .env.example ├── .eslintrc.js ├── .github └── workflows │ └── release.yml ├── .gitignore ├── .prettierrc ├── .readme ├── cover.png ├── gui-demo.gif └── how-it-works.png ├── .vscode └── settings.json ├── LICENCE ├── Makefile ├── NOTICE ├── README.md ├── examples ├── .gitignore ├── basic │ ├── .env.example │ ├── index.ts │ ├── package-lock.json │ ├── package.json │ └── tsconfig.json ├── google │ ├── index.ts │ ├── package-lock.json │ ├── package.json │ └── tsconfig.json ├── ollama │ ├── .env.example │ ├── index.ts │ ├── package-lock.json │ └── package.json └── web-voyager-questions.json ├── package-lock.json ├── package.json ├── release.config.cjs ├── rollup.config.js ├── src ├── core │ ├── agents │ │ ├── agent-base.ts │ │ ├── feedback-agent │ │ │ ├── feedback-agent.prompt.ts │ │ │ ├── feedback-agent.ts │ │ │ └── feedback-agent.types.ts │ │ ├── openator │ │ │ ├── openator.config.ts │ │ │ ├── openator.prompt.ts │ │ │ ├── openator.ts │ │ │ └── openator.types.ts │ │ └── summarize-agent │ │ │ └── summarize-agent.ts │ ├── entities │ │ ├── openator-result.ts │ │ ├── run.ts │ │ ├── task.ts │ │ ├── variable-string.ts │ │ └── variable.ts │ ├── interfaces │ │ ├── agent-reporter.interface.ts │ │ ├── browser-websocket-server.interface.ts │ │ ├── browser.interface.ts │ │ ├── event-bus.interface.ts │ │ ├── file-system.interface.ts │ │ ├── llm.interface.ts │ │ ├── reporter.interface.ts │ │ └── screenshotter.interface.ts │ ├── services │ │ ├── realtime-reporter.ts │ │ └── task-manager-service.ts │ ├── shared │ │ └── utils.ts │ └── types.ts ├── index.ts ├── infra │ └── services │ │ ├── chromium-browser.ts │ │ ├── console-reporter.ts │ │ ├── dom-service.ts │ │ ├── in-memory-file-system.ts │ │ ├── local-file-system.ts │ │ └── playwright-screenshotter.ts ├── init-openator.ts └── models │ ├── chat-google.ts │ ├── chat-ollama.ts │ └── chat-openai.ts ├── tsconfig.build.json └── tsconfig.json /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | HYPERBROWSER_API_KEY= 3 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | parser: '@typescript-eslint/parser', 3 | parserOptions: { 4 | project: 'tsconfig.json', 5 | tsconfigRootDir: __dirname, 6 | sourceType: 'module', 7 | }, 8 | plugins: ['@typescript-eslint/eslint-plugin'], 9 | extends: [ 10 | 'plugin:@typescript-eslint/recommended', 11 | 'plugin:prettier/recommended', 12 | ], 13 | root: true, 14 | env: { 15 | node: true, 16 | jest: true, 17 | }, 18 | ignorePatterns: ['.eslintrc.js'], 19 | rules: { 20 | '@typescript-eslint/interface-name-prefix': 'off', 21 | '@typescript-eslint/explicit-function-return-type': 'off', 22 | '@typescript-eslint/explicit-module-boundary-types': 'off', 23 | '@typescript-eslint/no-explicit-any': 'off', 24 | }, 25 | }; 26 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - next 8 | 9 | permissions: 10 | contents: write 11 | issues: write 12 | pull-requests: write 13 | 14 | jobs: 15 | release: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-node@v4 20 | - run: | 21 | npm i 22 | npm run build 23 | - name: Semantic Release 24 | env: 25 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 26 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }} 27 | run: | 28 | npx semantic-release 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # compiled output 2 | /dist 3 | /node_modules 4 | /build 5 | 6 | # Logs 7 | logs 8 | *.log 9 | npm-debug.log* 10 | pnpm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | lerna-debug.log* 14 | 15 | .pakcage-lock.json 16 | 17 | # OS 18 | .DS_Store 19 | 20 | # Tests 21 | /coverage 22 | /.nyc_output 23 | 24 | # IDEs and editors 25 | /.idea 26 | .project 27 | .classpath 28 | .c9/ 29 | *.launch 30 | .settings/ 31 | *.sublime-workspace 32 | 33 | # IDE - VSCode 34 | .vscode/* 35 | !.vscode/settings.json 36 | !.vscode/tasks.json 37 | !.vscode/launch.json 38 | !.vscode/extensions.json 39 | 40 | # dotenv environment variable files 41 | .env 42 | .env.development.local 43 | .env.test.local 44 | .env.production.local 45 | .env.local 46 | 47 | # temp directory 48 | .temp 49 | .tmp 50 | 51 | # Runtime data 52 | pids 53 | *.pid 54 | *.seed 55 | *.pid.lock 56 | 57 | # Diagnostic reports (https://nodejs.org/api/report.html) 58 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 59 | 60 | .rollup.cache 61 | 62 | *.tgz -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "trailingComma": "all" 4 | } -------------------------------------------------------------------------------- /.readme/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agentlabs-dev/openator/19f9b157e628f4b8f380ab75ed04ef6182c9d805/.readme/cover.png -------------------------------------------------------------------------------- /.readme/gui-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agentlabs-dev/openator/19f9b157e628f4b8f380ab75ed04ef6182c9d805/.readme/gui-demo.gif -------------------------------------------------------------------------------- /.readme/how-it-works.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agentlabs-dev/openator/19f9b157e628f4b8f380ab75ed04ef6182c9d805/.readme/how-it-works.png -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | "editor.defaultFormatter": "esbenp.prettier-vscode", 4 | "[handlebars]": { 5 | "editor.formatOnSave": false 6 | }, 7 | "cSpell.words": ["hookform", "nextui", "Signup"] 8 | } 9 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2025 AgentLabs, Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build up down restart logs 2 | 3 | build: 4 | docker-compose build 5 | 6 | watch: 7 | docker-compose up 8 | 9 | up: 10 | docker-compose up 11 | 12 | upd: 13 | docker-compose up -d 14 | 15 | dev-up: 16 | docker-compose -f docker-compose.dev.yml up --build --force-recreate 17 | 18 | dev-upd: 19 | docker-compose -f docker-compose.dev.yml up --build --force-recreate -d 20 | 21 | down: 22 | docker-compose down 23 | 24 | re: build watch 25 | 26 | logs: 27 | docker-compose logs -f 28 | 29 | logs-playwright: 30 | docker-compose logs -f playwright 31 | 32 | logs-backend: 33 | docker-compose logs -f backend 34 | 35 | logs-frontend: 36 | docker-compose logs -f frontend 37 | 38 | prune-all: 39 | docker system prune -a --volumes -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025 AgentLabs, Inc 2 | 3 | Source code in this repository is licensed under the Apache License 4 | Version 2.0. Please see LICENSE for more information. 5 | 6 | Every file is under copyright (c) 2023 AgentLabs, Inc unless otherwise 7 | specified. 8 | 9 | * For a copy of the Apache License Version 2.0, please see LICENSE 10 | as included in this repository's top-level directory. 11 | 12 | * All third party components incorporated into this software are licensed 13 | under the original license provided by the owner of the applicable component. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | Openator 3 |

4 | 5 |

6 | . 7 |

8 | 9 |

10 | Openator is a state-of-the-art browser agent tool that is capable of planning and executing actions formulated in natural language. 11 |

12 | 13 |

14 | This project is under active development and any help or support is welcome. 15 |

16 | 17 |

18 | 19 | License version 20 | 21 | 22 | Docker Image CI 23 | 24 |

25 | 26 |

27 | . 28 |

29 | 30 |

31 | 🌟 Give us some love by starring this repository! 🌟 32 |

33 | 34 |

35 | . 36 |

37 | 38 | ## Quick Start 39 | 40 | Install the package using npm or yarn. 41 | 42 | ```bash 43 | npm i openator 44 | ``` 45 | 46 | Spin up your first agent with a task. 47 | 48 | ```typescript 49 | import { initOpenator, ChatOpenAI } from 'openator'; 50 | 51 | const main = async () => { 52 | const llm = new ChatOpenAI({ 53 | apiKey: process.env.OPENAI_API_KEY!, 54 | }); 55 | 56 | const openator = initOpenator({ 57 | llm, 58 | headless: false, 59 | }); 60 | 61 | await openator.start( 62 | 'https://amazon.com', 63 | 'Find a black wirelesskeyboard and return the price.', 64 | ); 65 | }; 66 | 67 | main(); 68 | ``` 69 | 70 | ## Add Secrets and Variables 71 | 72 | Optionally, you can add variables and secrets to your agent. These variables will be interpolated during runtime by the agent. 73 | 74 | This is especially helpful if you want to pass more context to the agent, such as a username and a password. 75 | 76 | ```typescript 77 | import { initOpenator, Variable, ChatOpenAI } from 'openator'; 78 | 79 | const llm = new ChatOpenAI({ 80 | apiKey: process.env.OPENAI_API_KEY!, 81 | }); 82 | 83 | const openator = initOpenator({ 84 | headless: false, 85 | llm, 86 | variables: [ 87 | new Variable({ 88 | name: 'username', 89 | value: 'my username', 90 | isSecret: false, 91 | }), 92 | new Variable({ 93 | name: 'password', 94 | value: process.env.PASSWORD, 95 | isSecret: true, 96 | }), 97 | ], 98 | }); 99 | 100 | await openator.start( 101 | 'https://my-website.com', 102 | 'Authenticate with the username {{username}} and password {{password}} and then find the latest news on the website.', 103 | ); 104 | ``` 105 | 106 | ## Available LLM Providers 107 | 108 | Optionally you can configure the LLM to use different models or configurations. 109 | 110 | We support the following models: 111 | 112 | | Platform | Supported models | Advised model | 113 | | ------------------ | --------------------------------------------------------- | ------------- | 114 | | OpenAI | gpt-4o, gpt-4o-mini, gpt-4-turbo | gpt-4o | 115 | | Ollama | qwen2.5, llama3.2 | - | 116 | | GoogleGenerativeAI | gemini-2.0-flash, gemini-2.0-flash-lite, gemini-1.5-flash | - | 117 | 118 | > Note that we benchmarked the performance of Openator on OpenAI gpt-4o and we recommend using it. While you can try other models, we haven't battled-tested them yet. 119 | 120 | ### OpenAIChat 121 | 122 | Here's the configuration type for the ChatOpenAI provider. 123 | 124 | ```typescript 125 | import { ChatOpenAI } from 'openator'; 126 | 127 | const llm = new ChatOpenAI({ 128 | apiKey: process.env.OPENAI_API_KEY!, 129 | model: 'gpt-4o', 130 | temperature: 0, // optional 131 | maxRetries: 3, // optional 132 | maxConcurrency: 1, // optional 133 | }); 134 | ``` 135 | 136 | ### OllamaChat 137 | 138 | ```typescript 139 | import { ChatOllama } from 'openator'; 140 | 141 | const llm = new ChatOllama({ 142 | model: 'qwen2.5', 143 | temperature: 0, // optional 144 | maxRetries: 3, // optional 145 | maxConcurrency: 1, // optional 146 | baseUrl: 'http://localhost:11434', // optional 147 | }); 148 | ``` 149 | 150 | ### GoogleGenerativeAI 151 | 152 | ```typescript 153 | import { ChatGoogleGenAI } from 'openator'; 154 | 155 | const llm = new ChatGoogleGenAI({ 156 | model: 'gemini-2.0-flash', 157 | apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY!, 158 | temperature: 0, // optional 159 | maxRetries: 3, // optional 160 | maxConcurrency: 1, // optional 161 | }); 162 | ``` 163 | 164 | ## Demo 165 | 166 | Here is what you can build with Openator, you can find more examples and source code in our main repository. The frontend is not included but can be found in our open-source repository. 167 | 168 | Example task: 169 | 170 | ```typescript 171 | await openator.start( 172 | 'https://amazon.com', 173 | 'Purchase a black wireless keyboard', 174 | ); 175 | ``` 176 | 177 | agentlabs.dev 178 | 179 | ## How it works 180 | 181 |

182 | agentlabs.dev 183 |

184 | 185 |

186 | . 187 |

188 | 189 |

190 | 🌟 Give us some love by starring this repository! 🌟 191 |

192 | 193 |

194 | . 195 |

196 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules -------------------------------------------------------------------------------- /examples/basic/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=your-key -------------------------------------------------------------------------------- /examples/basic/index.ts: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI, initOpenator, Variable } from 'openator'; 2 | 3 | import 'dotenv/config'; 4 | 5 | const main = async () => { 6 | try { 7 | if (!process.env.OPENAI_API_KEY) { 8 | throw new Error('OPENAI_API_KEY is not set'); 9 | } 10 | 11 | const llm = new ChatOpenAI({ 12 | apiKey: process.env.OPENAI_API_KEY, 13 | model: 'gpt-4o', 14 | temperature: 0, 15 | maxRetries: 10, 16 | maxConcurrency: 1, 17 | }); 18 | 19 | const openator = initOpenator({ 20 | llm, 21 | headless: false, 22 | variables: [ 23 | new Variable({ 24 | name: 'password', 25 | value: process.env.PASSWORD!, 26 | isSecret: true, 27 | }), 28 | ], 29 | }); 30 | 31 | const result = await openator.start( 32 | 'https://amazon.com', 33 | 'Find a black wirelesskeyboard and return the price.', 34 | ); 35 | } catch (error) { 36 | console.error(error); 37 | } 38 | }; 39 | 40 | main(); 41 | -------------------------------------------------------------------------------- /examples/basic/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "basic", 3 | "version": "1.0.0", 4 | "description": "", 5 | "license": "ISC", 6 | "author": "", 7 | "type": "commonjs", 8 | "main": "index.js", 9 | "scripts": { 10 | "test": "echo \"Error: no test specified\" && exit 1", 11 | "start": "ts-node index.ts" 12 | }, 13 | "dependencies": { 14 | "dotenv": "^16.4.7", 15 | "openator": "^1.2.0", 16 | "typescript": "^5.7.3" 17 | }, 18 | "devDependencies": { 19 | "prettier": "3.5.2" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /examples/basic/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig to read more about this file */ 4 | 5 | /* Projects */ 6 | // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ 7 | // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ 8 | // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ 9 | // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ 10 | // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ 11 | // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ 12 | 13 | /* Language and Environment */ 14 | "target": "es2016" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, 15 | // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ 16 | // "jsx": "preserve", /* Specify what JSX code is generated. */ 17 | // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ 18 | // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ 19 | // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ 20 | // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ 21 | // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ 22 | // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ 23 | // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ 24 | // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ 25 | // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ 26 | 27 | /* Modules */ 28 | "module": "commonjs" /* Specify what module code is generated. */, 29 | // "rootDir": "./", /* Specify the root folder within your source files. */ 30 | // "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */ 31 | // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ 32 | // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ 33 | // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ 34 | // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ 35 | // "types": [], /* Specify type package names to be included without being referenced in a source file. */ 36 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ 37 | // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ 38 | // "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */ 39 | // "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */ 40 | // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */ 41 | // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */ 42 | // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */ 43 | // "noUncheckedSideEffectImports": true, /* Check side effect imports. */ 44 | // "resolveJsonModule": true, /* Enable importing .json files. */ 45 | // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */ 46 | // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */ 47 | 48 | /* JavaScript Support */ 49 | // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ 50 | // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ 51 | // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ 52 | 53 | /* Emit */ 54 | // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ 55 | // "declarationMap": true, /* Create sourcemaps for d.ts files. */ 56 | // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ 57 | // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ 58 | // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ 59 | // "noEmit": true, /* Disable emitting files from a compilation. */ 60 | // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ 61 | // "outDir": "./", /* Specify an output folder for all emitted files. */ 62 | // "removeComments": true, /* Disable emitting comments. */ 63 | // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ 64 | // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ 65 | // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ 66 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ 67 | // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ 68 | // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ 69 | // "newLine": "crlf", /* Set the newline character for emitting files. */ 70 | // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ 71 | // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ 72 | // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ 73 | // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ 74 | // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ 75 | 76 | /* Interop Constraints */ 77 | // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ 78 | // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */ 79 | // "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */ 80 | // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ 81 | "esModuleInterop": true /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */, 82 | // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ 83 | "forceConsistentCasingInFileNames": true /* Ensure that casing is correct in imports. */, 84 | 85 | /* Type Checking */ 86 | "strict": true /* Enable all strict type-checking options. */, 87 | // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ 88 | // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ 89 | // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ 90 | // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ 91 | // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ 92 | // "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */ 93 | // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ 94 | // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ 95 | // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ 96 | // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ 97 | // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ 98 | // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ 99 | // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ 100 | // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ 101 | // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ 102 | // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ 103 | // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ 104 | // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ 105 | // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ 106 | 107 | /* Completeness */ 108 | // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ 109 | "skipLibCheck": true /* Skip type checking all .d.ts files. */ 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /examples/google/index.ts: -------------------------------------------------------------------------------- 1 | import { ChatGoogleGenAI, initOpenator } from 'openator'; 2 | 3 | import 'dotenv/config'; 4 | 5 | const main = async () => { 6 | const llm = new ChatGoogleGenAI({ 7 | apiKey: process.env.GEMINI_API_KEY!, 8 | model: 'gemini-1.5-flash', 9 | }); 10 | 11 | const openator = initOpenator({ 12 | llm, 13 | headless: false, 14 | }); 15 | 16 | const result = await openator.start( 17 | 'https://amazon.com', 18 | 'Find a black wirelesskeyboard and return the price.', 19 | ); 20 | }; 21 | 22 | main(); 23 | -------------------------------------------------------------------------------- /examples/google/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "google", 3 | "version": "1.0.0", 4 | "description": "", 5 | "license": "ISC", 6 | "author": "", 7 | "type": "commonjs", 8 | "main": "index.js", 9 | "scripts": { 10 | "test": "echo \"Error: no test specified\" && exit 1" 11 | }, 12 | "dependencies": { 13 | "dotenv": "^16.4.7", 14 | "openator": "^1.3.0" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/google/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig to read more about this file */ 4 | 5 | /* Projects */ 6 | // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ 7 | // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ 8 | // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ 9 | // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ 10 | // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ 11 | // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ 12 | 13 | /* Language and Environment */ 14 | "target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ 15 | // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ 16 | // "jsx": "preserve", /* Specify what JSX code is generated. */ 17 | // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ 18 | // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ 19 | // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ 20 | // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ 21 | // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ 22 | // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ 23 | // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ 24 | // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ 25 | // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ 26 | 27 | /* Modules */ 28 | "module": "commonjs", /* Specify what module code is generated. */ 29 | // "rootDir": "./", /* Specify the root folder within your source files. */ 30 | // "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */ 31 | // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ 32 | // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ 33 | // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ 34 | // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ 35 | // "types": [], /* Specify type package names to be included without being referenced in a source file. */ 36 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ 37 | // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ 38 | // "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */ 39 | // "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */ 40 | // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */ 41 | // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */ 42 | // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */ 43 | // "noUncheckedSideEffectImports": true, /* Check side effect imports. */ 44 | // "resolveJsonModule": true, /* Enable importing .json files. */ 45 | // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */ 46 | // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */ 47 | 48 | /* JavaScript Support */ 49 | // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ 50 | // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ 51 | // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ 52 | 53 | /* Emit */ 54 | // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ 55 | // "declarationMap": true, /* Create sourcemaps for d.ts files. */ 56 | // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ 57 | // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ 58 | // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ 59 | // "noEmit": true, /* Disable emitting files from a compilation. */ 60 | // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ 61 | // "outDir": "./", /* Specify an output folder for all emitted files. */ 62 | // "removeComments": true, /* Disable emitting comments. */ 63 | // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ 64 | // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ 65 | // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ 66 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ 67 | // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ 68 | // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ 69 | // "newLine": "crlf", /* Set the newline character for emitting files. */ 70 | // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ 71 | // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ 72 | // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ 73 | // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ 74 | // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ 75 | 76 | /* Interop Constraints */ 77 | // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ 78 | // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */ 79 | // "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */ 80 | // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ 81 | "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */ 82 | // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ 83 | "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ 84 | 85 | /* Type Checking */ 86 | "strict": true, /* Enable all strict type-checking options. */ 87 | // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ 88 | // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ 89 | // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ 90 | // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ 91 | // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ 92 | // "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */ 93 | // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ 94 | // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ 95 | // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ 96 | // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ 97 | // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ 98 | // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ 99 | // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ 100 | // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ 101 | // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ 102 | // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ 103 | // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ 104 | // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ 105 | // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ 106 | 107 | /* Completeness */ 108 | // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ 109 | "skipLibCheck": true /* Skip type checking all .d.ts files. */ 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /examples/ollama/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=your-key -------------------------------------------------------------------------------- /examples/ollama/index.ts: -------------------------------------------------------------------------------- 1 | import { ChatOllama, initOpenator } from 'openator'; 2 | 3 | const main = async () => { 4 | try { 5 | const llm = new ChatOllama({ 6 | model: 'qwen2.5', 7 | temperature: 0, 8 | maxRetries: 10, 9 | maxConcurrency: 1, 10 | baseUrl: 'http://127.0.0.1:11434', 11 | }); 12 | 13 | const openator = initOpenator({ 14 | llm, 15 | headless: false, 16 | }); 17 | 18 | const result = await openator.start( 19 | 'https://amazon.com', 20 | 'Find a black wirelesskeyboard and return the price.', 21 | ); 22 | } catch (error) { 23 | console.error(error); 24 | } 25 | }; 26 | 27 | main(); 28 | -------------------------------------------------------------------------------- /examples/ollama/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "openator": "file:../../openator-v1.0.3.tgz" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openator", 3 | "version": "v1.2.0", 4 | "description": "SOTA AI-browsing agent that is capable of planning and executing actions formulated in natural language.", 5 | "author": "Kevin Piacentini ", 6 | "private": false, 7 | "license": "Apache-2.0", 8 | "type": "module", 9 | "main": "dist/index.js", 10 | "types": "dist/index.d.ts", 11 | "homepage": "https://github.com/agentlabs-dev/openator", 12 | "keywords": [ 13 | "openator", 14 | "ai agent", 15 | "ai", 16 | "browser use", 17 | "agentlabs", 18 | "playwright" 19 | ], 20 | "repository": { 21 | "type": "git", 22 | "url": "https://github.com/agentlabs-dev/openator.git" 23 | }, 24 | "publishConfig": { 25 | "access": "public" 26 | }, 27 | "resolutions": { 28 | "rollup": "npm:@rollup/wasm-node@*" 29 | }, 30 | "files": [ 31 | "dist", 32 | "README.md" 33 | ], 34 | "scripts": { 35 | "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"", 36 | "build": "npx rollup -c", 37 | "buildpack": "npm run build && npm pack", 38 | "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix" 39 | }, 40 | "optionalDependencies": { 41 | "@rollup/rollup-linux-x64-gnu": "*" 42 | }, 43 | "dependencies": { 44 | "@hyperbrowser/sdk": "^0.30.0", 45 | "@langchain/google-genai": "^0.1.9", 46 | "@langchain/ollama": "^0.2.0", 47 | "@langchain/openai": "^0.4.4", 48 | "@mozilla/readability": "^0.5.0", 49 | "class-validator": "^0.14.1", 50 | "dom-to-semantic-markdown": "^1.3.0", 51 | "dotenv": "^16.4.7", 52 | "fs-extra": "^11.3.0", 53 | "jsdom": "^26.0.0", 54 | "jsondom": "^0.4.1", 55 | "langchain": "^0.3.19", 56 | "openator": "^1.1.0", 57 | "playwright": "^1.50.1", 58 | "reflect-metadata": "^0.2.2", 59 | "rxjs": "^7.8.1", 60 | "socket.io": "^4.8.1", 61 | "zod": "^3.24.1", 62 | "zod-to-json-schema": "^3.24.1" 63 | }, 64 | "devDependencies": { 65 | "@rollup/plugin-json": "^6.1.0", 66 | "@rollup/plugin-typescript": "^12.1.2", 67 | "@semantic-release/github": "^11.0.1", 68 | "@semantic-release/npm": "^12.0.1", 69 | "@types/express": "^5.0.0", 70 | "@types/jest": "^29.5.14", 71 | "@types/jsdom": "^21.1.7", 72 | "@types/node": "^22.13.1", 73 | "@types/supertest": "^6.0.2", 74 | "@typescript-eslint/eslint-plugin": "^8.24.0", 75 | "@typescript-eslint/parser": "^8.24.0", 76 | "eslint": "^9.20.0", 77 | "eslint-config-prettier": "^10.0.1", 78 | "eslint-plugin-prettier": "^5.2.3", 79 | "jest": "^29.7.0", 80 | "prettier": "^3.5.0", 81 | "rollup": "^4.34.8", 82 | "rollup-plugin-dts": "^6.1.1", 83 | "semantic-release": "^24.2.3", 84 | "semantic-release-gitmoji": "^1.6.8", 85 | "source-map-support": "^0.5.21", 86 | "supertest": "^7.0.0", 87 | "ts-jest": "^29.2.5", 88 | "ts-loader": "^9.5.2", 89 | "ts-node": "^10.9.2", 90 | "tsconfig-paths": "^4.2.0", 91 | "typescript": "^5.7.3" 92 | }, 93 | "jest": { 94 | "moduleFileExtensions": [ 95 | "js", 96 | "json", 97 | "ts" 98 | ], 99 | "rootDir": "src", 100 | "testRegex": ".*\\.spec\\.ts$", 101 | "transform": { 102 | "^.+\\.(t|j)s$": "ts-jest" 103 | }, 104 | "collectCoverageFrom": [ 105 | "**/*.(t|j)s" 106 | ], 107 | "coverageDirectory": "../coverage", 108 | "testEnvironment": "node" 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /release.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: ['@semantic-release/github', '@semantic-release/npm'], 3 | branches: ['main', { name: 'next', prerelease: true }], 4 | }; 5 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | // Contents of the file /rollup.config.js 2 | import typescript from '@rollup/plugin-typescript'; 3 | import json from '@rollup/plugin-json'; 4 | 5 | const config = [ 6 | { 7 | input: 'src/index.ts', 8 | output: { 9 | file: 'dist/index.js', 10 | format: 'es', 11 | sourcemap: true, 12 | }, 13 | external: [ 14 | 'events', 15 | 'ora-classic', 16 | 'crypto', 17 | 'jsdom', 18 | 'playwright', 19 | 'dotenv/config', 20 | 'dom-to-semantic-markdown', 21 | '@langchain/core/output_parsers', 22 | '@langchain/openai', 23 | '@langchain/core/messages', 24 | 'zod', 25 | 'zod-to-json-schema', 26 | ], 27 | plugins: [ 28 | typescript({ 29 | exclude: ['examples/**'], 30 | }), 31 | json(), 32 | ], 33 | }, 34 | ]; 35 | export default config; 36 | -------------------------------------------------------------------------------- /src/core/agents/agent-base.ts: -------------------------------------------------------------------------------- 1 | import { LLM } from '../interfaces/llm.interface'; 2 | import { HumanMessage, SystemMessage } from '@langchain/core/messages'; 3 | import { JsonOutputParser } from '@langchain/core/output_parsers'; 4 | import { z } from 'zod'; 5 | 6 | /** 7 | * This is an experimental implementation of an agent. 8 | * It is not used in the current version of the application. 9 | */ 10 | 11 | export type AgentTool = { 12 | name: string; 13 | description: string; 14 | usage: string; 15 | }; 16 | 17 | export type AgentBaseConfig = { 18 | role: string; 19 | goal: string; 20 | backstory: string; 21 | tools: AgentTool[]; 22 | llm: LLM; 23 | strictJsonOutput: boolean; 24 | responseSchema: z.ZodSchema; 25 | }; 26 | 27 | export class Agent { 28 | public readonly backstory: string; 29 | public readonly goal: string; 30 | public readonly tools: AgentTool[]; 31 | public readonly role: string; 32 | public readonly strictJsonOutput: boolean; 33 | public readonly responseSchema: z.ZodSchema; 34 | 35 | constructor(private readonly config: AgentBaseConfig) { 36 | this.backstory = config.backstory; 37 | this.goal = config.goal; 38 | this.tools = config.tools; 39 | this.role = config.role; 40 | this.strictJsonOutput = config.strictJsonOutput; 41 | this.responseSchema = config.responseSchema; 42 | } 43 | 44 | private getSystemPrompt() { 45 | return ` 46 | You are a ${this.role} 47 | 48 | ${this.backstory} 49 | 50 | ${this.goal} 51 | 52 | ${this.tools} 53 | 54 | ${this.strictJsonOutput ? 'IMPORTANT: your output must always be a valid JSON object.' : ''} 55 | `; 56 | } 57 | 58 | private getUserTaskPrompt(task: AgentTask) { 59 | return task.getTaskPrompt(); 60 | } 61 | 62 | private getSystemMessage() { 63 | return new SystemMessage({ 64 | content: this.getSystemPrompt(), 65 | }); 66 | } 67 | 68 | private getHumanMessage(task: AgentTask) { 69 | return new HumanMessage({ 70 | content: [ 71 | { 72 | role: 'user', 73 | type: 'text', 74 | text: this.getUserTaskPrompt(task), 75 | }, 76 | ], 77 | }); 78 | } 79 | 80 | async perform(task: AgentTask) { 81 | const messages = [this.getSystemMessage(), this.getHumanMessage(task)]; 82 | 83 | type ResponseType = z.infer; 84 | 85 | const parser = new JsonOutputParser(); 86 | 87 | const response = await this.config.llm.invokeAndParse(messages, parser); 88 | 89 | return response; 90 | } 91 | } 92 | 93 | export type AgentTaskConfig = { 94 | description: string; 95 | goal: string; 96 | expectedOutput: string; 97 | validOutputExamples: string; 98 | invalidOutputExamples: string; 99 | }; 100 | 101 | export class AgentTask { 102 | private input: string; 103 | private images: string[] | undefined; 104 | private memory: string | undefined; 105 | 106 | constructor(private readonly config: AgentTaskConfig) {} 107 | 108 | prepare(params: { images?: string[]; memory?: string; input: string }) { 109 | this.input = params.input; 110 | this.images = params.images; 111 | this.memory = params.memory; 112 | } 113 | 114 | getTaskPrompt() { 115 | return ` 116 | # Task description: 117 | ${this.config.description} 118 | 119 | # Task goal: 120 | ${this.config.goal} 121 | 122 | # Expected output: 123 | ${this.config.expectedOutput} 124 | 125 | # Example valid outputs: 126 | ${this.config.validOutputExamples}; 127 | 128 | # Example invalid outputs: 129 | ${this.config.invalidOutputExamples} 130 | 131 | # Images: 132 | ${this.images} at the end 133 | 134 | # Memory: 135 | ${this.memory} 136 | 137 | # User input: 138 | ${this.input} 139 | `; 140 | } 141 | 142 | getTaskMessages() { 143 | const images = 144 | this.images?.map((image) => { 145 | return { 146 | type: 'image_url', 147 | image_url: { 148 | url: image, 149 | }, 150 | }; 151 | }) ?? []; 152 | 153 | return [ 154 | new HumanMessage({ 155 | content: [ 156 | { 157 | type: 'text', 158 | text: this.getTaskPrompt(), 159 | }, 160 | ...images, 161 | ], 162 | }), 163 | ]; 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/core/agents/feedback-agent/feedback-agent.prompt.ts: -------------------------------------------------------------------------------- 1 | import { Task } from '@/core/entities/task'; 2 | import { HumanMessage, SystemMessage } from '@langchain/core/messages'; 3 | 4 | export class EvaluationAgentSystemPrompt { 5 | constructor() {} 6 | 7 | getSystemPrompt() { 8 | return ` 9 | As an evaluator, you will be presented with three primary components to assist you in your role: 10 | 11 | 1. Web Task Instruction: This is a clear and specific directive provided in natural language, detailing the online activity to be carried out. These requirements may include conducting searches, verifying information, comparing prices, checking availability, or any other action relevant to the specified web service (such as Amazon, Apple, ArXiv, BBC News, Booking etc). 12 | 13 | 2. Result Screenshots: This is a visual representation of the screen showing the result or intermediate state of performing a web task. It serves as visual proof of the actions taken in response to the instruction, and may not represent everything the agent sees. 14 | 15 | 3. Result Response: This is a textual response obtained after the execution of the web task. It serves as textual result in response to the instruction. 16 | 17 | 4. Task History Summary: This is a summary of the task history of the agent, it serves as context proof of the actions performed by the agent. 18 | 19 | 5. Previous Task Result: This is the result of the previous task, it serves as context proof of the decision making of the agent. 20 | 21 | 22 | -- You DO NOT NEED to interact with web pages or perform actions such as booking flights or conducting searches on websites. 23 | -- You SHOULD NOT make assumptions based on information not presented in the screenshot when comparing it to the instructions. If you cannot find any information in the screenshot that matches the instruction, you can believe the information in the response. 24 | -- Your primary responsibility is to conduct a thorough assessment of the web task instruction against the outcome depicted in the screenshot and in the response, evaluating whether the actions taken align with the given instructions. 25 | -- NOTE that the instruction may involve more than one task, for example, locating the garage and summarizing the review. Failing to complete either task, such as not providing a summary, should be considered unsuccessful. 26 | -- NOTE that the screenshot is authentic, but the response provided by LLM is generated at the end of web browsing, and there may be discrepancies between the text and the screenshots. 27 | -- Note the difference: 1) Result response may contradict the screenshot, then the content of the screenshot prevails, 2) The content in the Result response is not mentioned on the screenshot, choose to believe the content. 28 | -- If you are not sure whether you should believe the content in the response, you should choose unknown. 29 | 30 | You should elaborate on how you arrived at your final evaluation and then provide a definitive verdict on whether the task has been successfully accomplished, either as 'success', 'failed', or 'unknown'. 31 | 32 | If your verdict is 'failed', you must provide a 'hint' and a 'memoryLearning' to the user to improve the result. 33 | This hint could be ideas of actions to perform to find the information you need. 34 | The memoryLearning is a string that will explain the agent what it should not do or what it should do differently later if he is in the same situation. 35 | 36 | IMPORTANT RULE: you must answer in JSON format including the result and explanation fields. 37 | 38 | Example 1: 39 | { "result": "success", "explanation": "From the two latest screenshots, we can see that the agent has successfully found the recipe and provided a summary of the reviews.", "hint": null } 40 | 41 | Example 2: 42 | { "result": "failed", "memoryLearning": "'Easy Vegetarian Spinach Lasagna was not a good choice'", "explanation": "The task was to find a vegetarian lasagna recipe with more than 100 reviews and a rating of at least 4.5 stars. The 'Easy Vegetarian Spinach Lasagna' has a rating of 4.6 stars but only 92 reviews, which does not meet the requirement of more than 100 reviews", "hint": "Go back and search for a recipe with more than 100 reviews." } 43 | 44 | Example 3: 45 | { "result": "failed", "memoryLearning": "'Cheese Burger should not be selected again'", "explanation": "The task was to find a vegetarian lasagna recipe with more than 100 reviews and a rating of at least 4.5 stars. The 'Cheese Burger' is not a vegetarian recipe", "hint": "Go back and search for a vegetarian recipe." } 46 | 47 | Example 3: 48 | { "result": "unknown", "memoryLearning": null, "explanation": "The agent did not provide a summary of the reviews." } 49 | `; 50 | } 51 | 52 | getSystemMessage() { 53 | return new SystemMessage({ 54 | content: this.getSystemPrompt(), 55 | }); 56 | } 57 | } 58 | 59 | export class EvaluationAgentUserPrompt { 60 | constructor() {} 61 | 62 | getUserPrompt({ 63 | pageUrl, 64 | task, 65 | answer, 66 | screenshotCount, 67 | taskHistorySummary, 68 | previousTaskResult, 69 | }: { 70 | pageUrl: string; 71 | task: string; 72 | answer: string; 73 | screenshotCount: number; 74 | taskHistorySummary: string; 75 | previousTaskResult: string; 76 | }) { 77 | return ` 78 | CURRENT PAGE URL: ${pageUrl} 79 | TASK: ${task} 80 | RESULT RESPONSE: ${answer} 81 | ${screenshotCount} screenshot at the end: 82 | TASK HISTORY SUMMARY: ${taskHistorySummary} 83 | PREVIOUS TASK RESULT: ${previousTaskResult} 84 | `; 85 | } 86 | 87 | getUserMessage({ 88 | pageUrl, 89 | screenshotUrls, 90 | task, 91 | answer, 92 | taskHistorySummary, 93 | previousTaskResult, 94 | }: { 95 | pageUrl: string; 96 | screenshotUrls: string[]; 97 | task: string; 98 | answer: string; 99 | taskHistorySummary: string; 100 | previousTaskResult: string; 101 | }) { 102 | if (!screenshotUrls.length) { 103 | throw new Error('No screenshot URLs provided to the evaluation agent'); 104 | } 105 | 106 | const last3Screenshots = screenshotUrls.slice(-3); 107 | 108 | const screenshotPrompts = last3Screenshots.map((url) => { 109 | return { 110 | type: 'image_url', 111 | image_url: { 112 | url, 113 | detail: 'high', 114 | }, 115 | }; 116 | }); 117 | 118 | return new HumanMessage({ 119 | content: [ 120 | { 121 | type: 'text', 122 | text: this.getUserPrompt({ 123 | pageUrl, 124 | task, 125 | answer, 126 | screenshotCount: last3Screenshots.length, 127 | taskHistorySummary, 128 | previousTaskResult, 129 | }), 130 | }, 131 | ...screenshotPrompts, 132 | ], 133 | }); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/core/agents/feedback-agent/feedback-agent.ts: -------------------------------------------------------------------------------- 1 | import { LLM } from '@/core/interfaces/llm.interface'; 2 | import { EvaluationResponse } from './feedback-agent.types'; 3 | import { JsonOutputParser } from '@langchain/core/output_parsers'; 4 | import { 5 | EvaluationAgentSystemPrompt, 6 | EvaluationAgentUserPrompt, 7 | } from './feedback-agent.prompt'; 8 | 9 | export class FeedbackAgent { 10 | constructor(private readonly llmService: LLM) {} 11 | 12 | async evaluate({ 13 | pageUrl, 14 | screenshotUrls, 15 | task, 16 | answer, 17 | taskHistorySummary, 18 | previousTaskResult, 19 | }: { 20 | pageUrl: string; 21 | screenshotUrls: string[]; 22 | task: string; 23 | answer: string; 24 | previousTaskResult: string; 25 | taskHistorySummary: string; 26 | }) { 27 | const systemMessage = new EvaluationAgentSystemPrompt().getSystemMessage(); 28 | const humanMessage = new EvaluationAgentUserPrompt().getUserMessage({ 29 | pageUrl, 30 | screenshotUrls, 31 | task, 32 | answer, 33 | taskHistorySummary, 34 | previousTaskResult, 35 | }); 36 | 37 | const parser = new JsonOutputParser(); 38 | 39 | const response = await this.llmService.invokeAndParse( 40 | [systemMessage, humanMessage], 41 | parser, 42 | ); 43 | 44 | console.log('FeedbackAgent response', JSON.stringify(response, null, 2)); 45 | 46 | return response; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/core/agents/feedback-agent/feedback-agent.types.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | 3 | export const EvaluationResponseSchema = z.object({ 4 | result: z 5 | .union([z.literal('success'), z.literal('failed'), z.literal('unknown')]) 6 | .describe('The result of the evaluation.'), 7 | explanation: z 8 | .string() 9 | .describe('The explanation and criteria of your result.'), 10 | hint: z.string().describe('A hint to the user to improve the result.'), 11 | memoryLearning: z 12 | .string() 13 | .describe('A memoryLearning to the user to improve the result.'), 14 | }); 15 | 16 | export type EvaluationResponse = z.infer; 17 | -------------------------------------------------------------------------------- /src/core/agents/openator/openator.config.ts: -------------------------------------------------------------------------------- 1 | export const DEFAULT_AGENT_MAX_RETRIES = 3; 2 | export const DEFAULT_AGENT_MAX_ACTIONS_PER_TASK = 4; 3 | -------------------------------------------------------------------------------- /src/core/agents/openator/openator.prompt.ts: -------------------------------------------------------------------------------- 1 | import { ManagerResponseExamples } from '@/core/agents/openator/openator.types'; 2 | import { HumanMessage, SystemMessage } from '@langchain/core/messages'; 3 | 4 | export class ManagerAgentPrompt { 5 | constructor(private readonly maxActionPerStep: number) {} 6 | 7 | importantRules() { 8 | return ` 9 | 1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON. 10 | 11 | ${ManagerResponseExamples} 12 | 13 | 2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. 14 | 15 | Common action sequences: 16 | 17 | // Form filling 18 | actions: [ 19 | { "name": "fillInput", "params": { "index": 1, "text": "{{user_email}}" } }, 20 | { "name": "fillInput", "params": { "index": 2, "text": "{{user_password}}" } }, 21 | { "name": "fillInput", "params": { "index": 5, "text": "13 street name, 123456 city, country" } }, 22 | { "name": "fillInput", "params": { "index": 6, "text": "1234567890" } }, 23 | { "name": "scrollDown", "params": {} }, 24 | ] 25 | 26 | // Flow that does not work 27 | actions: [ 28 | { "name": "clickElement", "params": { "index": 2 } }, 29 | { "name": "clickElement", "params": { "index": 2 } }, 30 | { "name": "clickElement", "params": { "index": 2 } }, 31 | { "name": "clickElement", "params": { "index": 2 } }, 32 | ] 33 | 34 | 35 | "index" corresponds to the index of the element you see on the screenshot. 36 | Never use other indexes than the ones provided in the element list. 37 | 38 | Example with wrong index: 39 | actions: [ 40 | { "name": "fillInput", "params": { "index": "allow all", "text": "username" } }, 41 | { "name": "fillInput", "params": { "index": "accept", "text": "password" } }, 42 | ] 43 | 44 | - NEVER plan to trigger a success or failure action among other actions, you should always trigger a success or failure action alone. 45 | - NEVER plan to do something after a scroll action since the page will change. 46 | - NEVER plan to scroll down or up if there is a cookie popup or any constent popup on screen. First accept or close the popup. 47 | - When the page is truncated, scroll down to view more elements especially if you are filling a form. 48 | - Trigger result means you have completed the task and we can ask the evaluator to evaluate the test result. 49 | - Sometimes, the user will provide variables surrounded by double brackets {{}}. You should keep them exactly as they are, we will replace them with the actual value later. 50 | 51 | Wrong example (trigger success among other actions): 52 | 53 | actions: [ 54 | { "name": "fillInput", "params": { "index": 1, "text": "{{user_email}}" } }, 55 | { "name": "fillInput", "params": { "index": 2, "text": "{{user_password}}" } }, 56 | { "name": "clickElement", "params": { "index": 2 } }, 57 | { "name": "triggerResult", "params": { "data": "" } }, 58 | ] 59 | 60 | Correct example (trigger success alone): 61 | 62 | actions: [ 63 | { "name": "fillInput", "params": { "index": 1, "text": "{{user_email}}" } }, 64 | { "name": "fillInput", "params": { "index": 2, "text": "{{user_password}}" } }, 65 | { "name": "clickElement", "params": { "index": 2 } }, 66 | ] 67 | 68 | [... later if you believe the task is completed ...] 69 | 70 | actions: [ 71 | { "name": "triggerResult", "params": { "data": "The recipe named 'Vegetarian Four Cheese Lasagna' has 4.6-star, 181 reviews, Servings 8, matches your request. It is available at https://www.allrecipes.com/recipe/123456/vegetarian-four-cheese-lasagna/" } }, 72 | ] 73 | 74 | 3. ELEMENT INTERACTION: 75 | - Only use indexes that exist in the provided element list. 76 | - Each element has a unique index number (e.g., "[33]__"). 77 | - Elements with empty index "[]" are non-interactive (for context only). 78 | - DO NOT try to fill an input field you already filled it with a value. 79 | 80 | 4. **NAVIGATION & ERROR HANDLING:** 81 | - **Track failed actions** and **do not repeat the same mistake**. 82 | - **Never enter a loop** where the same action fails repeatedly. 83 | - Example of a failure loop history (❌ Incorrect - must be avoided): 84 | 85 | ------- 86 | "Scroll up to find the star rating and verify if zucchini is included in the ingredients.", 87 | "Scroll up to find the star rating and verify if zucchini is included in the ingredients.", 88 | "Scroll down to find the star rating and ingredients list.", 89 | "Scroll down to find the star rating and ingredients list.", 90 | "Search for another vegetarian lasagna recipe with zucchini and at least a four-star rating." 91 | ------- 92 | 93 | - If an evaluator **rejects your result**, you **must adjust your approach** instead of retrying blindly. 94 | - **Before retrying, ask yourself:** 95 | - Did I already try this exact action? 96 | - Is there an alternative approach I can take? 97 | - Can I gather more information before acting? 98 | 99 | 5. SCROLLING BEHAVIOR: 100 | - **Never plan to scroll if there is a popup (cookies, modals, alerts, etc.).** 101 | - **After scrolling, always verify progress** before scrolling again. 102 | - **Avoid infinite scrolling loops.** 103 | 104 | 6. TASK COMPLETION: 105 | - When you evaluate the task, you shouls always ask yourself if the Success condition given by the user is met. If it is, use the triggerResult action as the last action. 106 | - If you are running out of steps (current step), think about speeding it up, and ALWAYS use the triggerResult action as the last action. 107 | 108 | 7. VISUAL CONTEXT: 109 | - When an image is provided, use it to understand the page layout. 110 | - Bounding boxes with labels correspond to element indexes. 111 | - Each bounding box and its label have the same color. 112 | - Most often the label is inside the bounding box, on the top right. 113 | - Visual context helps verify element locations and relationships. 114 | - Sometimes labels overlap, so use the context to verify the correct element. 115 | - Sometimes it's easier to extract the information from the content of the page than from the visual context (especially when you are dealing with a list of products). To do this, use the extractContent action. 116 | 117 | 8. FORM FILLING: 118 | - If you fill an input field and your action sequence is interrupted, most often a list with suggestions popped up under the field and you need to first select the right element from the suggestion list. 119 | - Sometimes when filling a date field, a calendar poup is displayed which can make the action sequence interrupted so you need to first select the right date from the calendar. 120 | - If you fill an input field and you see it's still empty, you need to fill it again. 121 | 122 | 9. ACTION SEQUENCING: 123 | - Actions are executed in the order they appear in the list. 124 | - Each action should logically follow from the previous one. 125 | - Only provide the action sequence until you think the page will change. 126 | - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes... 127 | - only use multiple actions if it makes sense. 128 | - After you have scrolled down or up, you should always ask yourself if you achieved your goal. If not, you should scroll down or up again. 129 | - When you will define the next goal, make sure to be as specific as possible to avoid misleading the agent. Ask yourself the following questions: 130 | - Does my goal go against the constraints of my end goal? 131 | - Does my goal do something that has been done multiple times (loop)? 132 | - Does my goal go against my memory learning? 133 | - Did I already try this action or got this information in my memory learning? 134 | -> Reajust your goal if needed. 135 | 136 | 137 | 10. RESULT: 138 | - You should always provide a result in the triggerResult action. 139 | - The result should be a string that describes the result of the task and matches the user's goal or question. 140 | - DO NOT hallucinate the result. 141 | - Your result should ALWAYS be based on what you see, or what you extract from the content of the page and not on what you think you know. 142 | - When you will trigger the result, pay attention to the feedback you will receive. This feedback will contain the reason why the task failed and the hint to fix it. It is paramount to you to follow the hint. 143 | 144 | Use a maximum of ${this.maxActionPerStep} actions per task. 145 | `; 146 | } 147 | 148 | inputFormat() { 149 | return ` 150 | INPUT STRUCTURE: 151 | 1. MEMORY LEARNINGS: A list of memory learning you should know about your previous actions. This will prevent you from doing the same mistakes over and over again. This can be used also to store information you have extracted in the past. 152 | 2. CURRENT URL: The webpage you're currently on. 153 | 3. EXTRACTED DOM ELEMENTS: List in the format: 154 | [index]__element_text 155 | - index: Numeric identifier for interaction (if empty, the element is non-interactive). 156 | - element_type: HTML element type (button, input, select, etc.). 157 | - element_text: Visible text or element description. 158 | - attributes: HTML attributes of the element used for context. 159 | 160 | 4. TASK: The task asked by the user. 161 | - Use it to define the actions you have to perform. 162 | - No failure is tolerated and success is rewarded. 163 | - You must be sure of the data you provide. Make sure to provide the exact data. Open products and navigate until you find the information you need. 164 | - It's better to navigate a bit more than to provide wrong information. 165 | 166 | Notes: 167 | - Only elements with numeric indexes are interactive. 168 | - Elements with empty index [] provide context but cannot be interacted with. 169 | 170 | Interactive examples: 171 | [14]__ 172 | [15]__ 173 | 174 | Non-interactive examples: 175 | []__
Non interactive div
176 | []__Non interactive span 177 | []__Non interactive text 178 | `; 179 | } 180 | 181 | getSystemPrompt() { 182 | return ` 183 | You are a precise Browser Automation Agent that interacts with websites through structured commands. Your role is to: 184 | 185 | 1. Analyze the provided webpage elements and structure. 186 | 2. Plan a sequence of actions to achieve the task provided by the user. 187 | 3. Respond with valid JSON containing your action sequence. 188 | 4. When you consider the scenario is complete and we can evaluate the test result, use the triggerSuccess to pass some data to the evaluator. 189 | 190 | Current date and time: ${new Date().toISOString()} 191 | 192 | ${this.inputFormat()} 193 | 194 | ${this.importantRules()} 195 | 196 | Functions: 197 | - clickElement: { index: } 198 | - fillInput: { index: , text: } 199 | - scrollDown: { goal: } 200 | - scrollUp: { goal: } 201 | - goToUrl: { url: } 202 | - triggerResult: { data: } 203 | - goBack: {} 204 | - extractContent: {} 205 | 206 | Remember: Your responses must be valid JSON matching the specified format. Each action in the sequence must be valid.""" 207 | `; 208 | } 209 | 210 | getSystemMessage() { 211 | return new SystemMessage({ 212 | content: this.getSystemPrompt(), 213 | }); 214 | } 215 | } 216 | 217 | export class ManagerAgentHumanPrompt { 218 | constructor() {} 219 | 220 | getHumanMessage({ 221 | memoryLearnings, 222 | serializedTasks, 223 | stringifiedDomState, 224 | screenshotUrl, 225 | /** This is the screenshot without the highlight */ 226 | pristineScreenshotUrl, 227 | pageUrl, 228 | pixelAbove, 229 | pixelBelow, 230 | }: { 231 | memoryLearnings: string; 232 | serializedTasks: string; 233 | stringifiedDomState: string; 234 | screenshotUrl: string; 235 | pristineScreenshotUrl: string; 236 | pageUrl: string; 237 | pixelAbove: number; 238 | pixelBelow: number; 239 | }) { 240 | return new HumanMessage({ 241 | content: [ 242 | { 243 | type: 'image_url', 244 | image_url: { 245 | url: pristineScreenshotUrl, 246 | detail: 'high', 247 | }, 248 | }, 249 | { 250 | type: 'image_url', 251 | image_url: { 252 | url: screenshotUrl, 253 | detail: 'high', 254 | }, 255 | }, 256 | { 257 | type: 'text', 258 | text: ` 259 | MEMORY LEARNINGS: ${memoryLearnings} 260 | 261 | CURRENT URL: ${pageUrl} 262 | 263 | ... ${pixelAbove} PIXEL ABOVE - SCROLL UP TO SEE MORE ELEMENTS 264 | 265 | EXTRACTED DOM ELEMENTS: ${stringifiedDomState} that you can match with the screenshot. 266 | 267 | ... ${pixelBelow} PIXEL BELOW - SCROLL DOWN TO SEE MORE ELEMENTS 268 | 269 | USER TASK AND TASK HISTORY: ${serializedTasks} 270 | `, 271 | }, 272 | ], 273 | }); 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /src/core/agents/openator/openator.ts: -------------------------------------------------------------------------------- 1 | import { TaskManagerService } from '@/core/services/task-manager-service'; 2 | import { JsonOutputParser } from '@langchain/core/output_parsers'; 3 | import { ManagerAgentPrompt, ManagerAgentHumanPrompt } from './openator.prompt'; 4 | import { DomService } from '@/infra/services/dom-service'; 5 | import { 6 | DEFAULT_AGENT_MAX_ACTIONS_PER_TASK, 7 | DEFAULT_AGENT_MAX_RETRIES, 8 | } from './openator.config'; 9 | import { ManagerAgentAction, ManagerResponse } from './openator.types'; 10 | import { Browser, Coordinates } from '@/core/interfaces/browser.interface'; 11 | import { Task, TaskAction } from '@/core/entities/task'; 12 | import { LLM } from '@/core/interfaces/llm.interface'; 13 | import { OpenatorResult } from '@/core/entities/openator-result'; 14 | import { AgentReporter } from '@/core/interfaces/agent-reporter.interface'; 15 | import { Variable } from '@/core/entities/variable'; 16 | import { VariableString } from '@/core/entities/variable-string'; 17 | import { Run } from '@/core/entities/run'; 18 | import { EventBusInterface } from '@/core/interfaces/event-bus.interface'; 19 | import { FeedbackAgent } from '../feedback-agent/feedback-agent'; 20 | import { Agent, AgentTask } from '../agent-base'; 21 | import { SummarizeAgent } from '../summarize-agent/summarize-agent'; 22 | 23 | export type OpenatorConfig = { 24 | maxActionsPerTask?: number; 25 | maxRetries?: number; 26 | variables: Variable[]; 27 | 28 | taskManager: TaskManagerService; 29 | domService: DomService; 30 | feedbackAgent: FeedbackAgent; 31 | browserService: Browser; 32 | llmService: LLM; 33 | reporter: AgentReporter; 34 | eventBus?: EventBusInterface; 35 | /** TODO: replace this experimental agent */ 36 | summarizer: SummarizeAgent; 37 | summarizeTask: AgentTask; 38 | }; 39 | 40 | export class Openator { 41 | private msDelayBetweenActions: number = 1000; 42 | private lastDomStateHash: string | null = null; 43 | private isSuccess: boolean = false; 44 | private isFailure: boolean = false; 45 | private reason: string = ''; 46 | private result: string = ''; 47 | private retries: number = 0; 48 | private stepCount: number = 0; 49 | private feedbackRetries: number = 0; 50 | private readonly variables: Variable[]; 51 | private currentRun: Run | null = null; 52 | private summarizer: Agent; 53 | private summarizeTask: AgentTask; 54 | 55 | private readonly maxActionsPerTask: number; 56 | private readonly maxRetries: number; 57 | 58 | private readonly taskManager: TaskManagerService; 59 | private readonly domService: DomService; 60 | private readonly browserService: Browser; 61 | private readonly llmService: LLM; 62 | private readonly reporter: AgentReporter; 63 | private readonly eventBus: EventBusInterface | undefined; 64 | private readonly feedbackAgent: FeedbackAgent; 65 | public readonly memoryLearnings: string[] = []; 66 | 67 | constructor(config: OpenatorConfig) { 68 | this.taskManager = config.taskManager; 69 | this.domService = config.domService; 70 | this.browserService = config.browserService; 71 | this.llmService = config.llmService; 72 | this.reporter = config.reporter; 73 | this.variables = config.variables; 74 | this.feedbackAgent = config.feedbackAgent; 75 | this.memoryLearnings = []; 76 | 77 | this.maxActionsPerTask = 78 | config.maxActionsPerTask ?? DEFAULT_AGENT_MAX_ACTIONS_PER_TASK; 79 | this.maxRetries = config.maxRetries ?? DEFAULT_AGENT_MAX_RETRIES; 80 | this.eventBus = config.eventBus; 81 | 82 | this.summarizer = config.summarizer; 83 | this.summarizeTask = config.summarizeTask; 84 | } 85 | 86 | private onSuccess(result: string) { 87 | this.reporter.success(`Manager agent completed successfully: ${result}`); 88 | this.isSuccess = true; 89 | this.result = result; 90 | this.currentRun?.setSuccess(result); 91 | this.emitRunUpdate(); 92 | } 93 | 94 | private onFailure(reason: string) { 95 | this.reporter.failure(`Manager agent failed: ${reason}`); 96 | this.isFailure = true; 97 | this.reason = reason; 98 | this.currentRun?.setFailure(reason); 99 | this.emitRunUpdate(); 100 | } 101 | 102 | private async beforeAction(action: TaskAction) { 103 | this.reporter.loading(`Performing action ${action.data.name}...`); 104 | } 105 | 106 | private async afterAction(action: TaskAction) { 107 | this.reporter.success(`Performing action ${action.data.name}...`); 108 | } 109 | 110 | private async incrementFeedbackRetries() { 111 | this.feedbackRetries += 1; 112 | } 113 | 114 | private async incrementRetries() { 115 | this.retries += 1; 116 | } 117 | 118 | private async resetRetries() { 119 | this.retries = 0; 120 | } 121 | 122 | private async incrementStepCount() { 123 | this.stepCount += 1; 124 | } 125 | 126 | get isCompleted() { 127 | return this.isSuccess || this.isFailure; 128 | } 129 | 130 | async start( 131 | startUrl: string, 132 | initialPrompt: string, 133 | jobId?: string, 134 | ): Promise { 135 | const vStartUrl = new VariableString(startUrl, this.variables); 136 | 137 | await this.browserService.launch(vStartUrl.dangerousValue()); 138 | 139 | const vInitialPrompt = new VariableString(initialPrompt, this.variables); 140 | 141 | this.taskManager.setEndGoal(vInitialPrompt.publicValue()); 142 | 143 | return this.run(jobId); 144 | } 145 | 146 | private async emitRunUpdate() { 147 | if (this.currentRun) { 148 | this.eventBus?.emit('run:update', this.currentRun); 149 | } 150 | } 151 | 152 | private async run(jobId?: string): Promise { 153 | return new Promise(async (resolve) => { 154 | this.reporter.loading('Starting manager agent'); 155 | 156 | this.currentRun = Run.InitRunning(this.taskManager.getEndGoal(), jobId); 157 | 158 | this.emitRunUpdate(); 159 | 160 | while (!this.isCompleted) { 161 | if (this.retries >= this.maxRetries) { 162 | this.onFailure('Max retries reached'); 163 | 164 | return resolve({ 165 | status: 'failed', 166 | result: this.result, 167 | stepCount: this.stepCount, 168 | reason: 169 | 'Max number of retried reached. The agent was not able to complete the test.', 170 | }); 171 | } 172 | 173 | this.incrementStepCount(); 174 | 175 | this.reporter.loading('Defining next task...'); 176 | 177 | const task = await this.defineNextTask(); 178 | 179 | this.currentRun.addTask(task); 180 | this.currentRun.executeAction(); 181 | 182 | this.emitRunUpdate(); 183 | 184 | this.reporter.loading(`Executing task: ${task.goal}`); 185 | 186 | await this.executeTask(task); 187 | } 188 | 189 | /** 190 | * If the Manager Agent failed, then we return the failure reason immediately. 191 | */ 192 | if (this.isFailure) { 193 | this.currentRun.setFailure(this.reason); 194 | this.emitRunUpdate(); 195 | 196 | return resolve({ 197 | status: 'failed', 198 | reason: this.reason, 199 | result: this.result, 200 | stepCount: this.stepCount, 201 | }); 202 | } 203 | 204 | await this.domService.resetHighlightElements(); 205 | 206 | this.emitRunUpdate(); 207 | 208 | return resolve({ 209 | status: this.isSuccess ? 'success' : 'failed', 210 | reason: this.reason, 211 | result: this.result, 212 | stepCount: this.stepCount, 213 | }); 214 | }); 215 | } 216 | 217 | /** 218 | * Checks if the DOM state has changed. 219 | * TODO: fix this 220 | */ 221 | private async didDomStateChange() { 222 | const { domStateHash: currentDomStateHash } = 223 | await this.domService.getInteractiveElements(false); 224 | 225 | return this.lastDomStateHash !== currentDomStateHash; 226 | } 227 | 228 | /** 229 | * Ensures that the triggerSuccess and triggerFailure actions are never called among other actions. 230 | * This is important because we need to reevaluate actions and ensure that the success or failure 231 | * actions are executed alone to properly determine the test result. 232 | */ 233 | private ensureNoTriggerSuccessOrFailureAmongOtherActions( 234 | actions: ManagerAgentAction[], 235 | ) { 236 | if (actions.length < 2) { 237 | return actions; 238 | } 239 | 240 | return actions.filter((action) => action.name !== 'triggerResult'); 241 | } 242 | 243 | private async defineNextTask(): Promise { 244 | this.currentRun?.think(); 245 | this.emitRunUpdate(); 246 | 247 | const parser = new JsonOutputParser(); 248 | 249 | const systemMessage = new ManagerAgentPrompt( 250 | this.maxActionsPerTask, 251 | ).getSystemMessage(); 252 | 253 | const { 254 | screenshot, 255 | pristineScreenshot, 256 | stringifiedDomState, 257 | domStateHash, 258 | pixelAbove, 259 | pixelBelow, 260 | } = await this.domService.getInteractiveElements(); 261 | 262 | this.lastDomStateHash = domStateHash; 263 | 264 | const humanMessage = new ManagerAgentHumanPrompt().getHumanMessage({ 265 | memoryLearnings: this.memoryLearnings.join(' ; '), 266 | serializedTasks: this.taskManager.getSerializedTasks(), 267 | pristineScreenshotUrl: pristineScreenshot, 268 | screenshotUrl: screenshot, 269 | stringifiedDomState, 270 | pageUrl: this.browserService.getPageUrl(), 271 | pixelAbove, 272 | pixelBelow, 273 | }); 274 | 275 | const messages = [systemMessage, humanMessage]; 276 | 277 | try { 278 | const parsedResponse = await this.llmService.invokeAndParse( 279 | messages, 280 | parser, 281 | ); 282 | 283 | const safeActions = this.ensureNoTriggerSuccessOrFailureAmongOtherActions( 284 | parsedResponse.actions, 285 | ); 286 | 287 | const task = Task.InitPending( 288 | parsedResponse.currentState.nextGoal, 289 | safeActions, 290 | ); 291 | 292 | this.taskManager.add(task); 293 | 294 | return task; 295 | } catch (error) { 296 | console.error('Error parsing agent response:', error); 297 | return Task.InitPending('Keep trying', []); 298 | } 299 | } 300 | 301 | private async executeTask(task: Task) { 302 | task.start(); 303 | 304 | await this.domService.resetHighlightElements(); 305 | 306 | for (const [i, action] of task.actions.entries()) { 307 | try { 308 | action.start(); 309 | this.emitRunUpdate(); 310 | 311 | if (i > 0 && (await this.didDomStateChange())) { 312 | action.cancel('Dom state changed, need to reevaluate.'); 313 | task.cancel('Dom state changed, need to reevaluate.'); 314 | this.taskManager.update(task); 315 | this.reporter.info('Dom state changed, need to reevaluate.'); 316 | return; 317 | } 318 | 319 | await this.executeAction(action); 320 | this.emitRunUpdate(); 321 | 322 | await new Promise((resolve) => 323 | setTimeout(resolve, this.msDelayBetweenActions), 324 | ); 325 | 326 | task.complete(); 327 | this.emitRunUpdate(); 328 | this.resetRetries(); 329 | this.taskManager.update(task); 330 | } catch (error: any) { 331 | action.fail( 332 | `Task failed with error: ${error?.message ?? 'Unknown error'}`, 333 | ); 334 | task.fail( 335 | `Task failed with error: ${error?.message ?? 'Unknown error'}`, 336 | ); 337 | 338 | this.taskManager.update(task); 339 | this.incrementRetries(); 340 | this.emitRunUpdate(); 341 | } 342 | } 343 | 344 | this.reporter.success(task.goal); 345 | } 346 | 347 | /** 348 | * Updates the action result when the action is completed or failed 349 | */ 350 | private async executeAction(action: TaskAction) { 351 | let coordinates: Coordinates | null = null; 352 | 353 | await this.beforeAction(action); 354 | 355 | this.emitRunUpdate(); 356 | 357 | switch (action.data.name) { 358 | case 'clickElement': 359 | coordinates = this.domService.getIndexSelector( 360 | action.data.params.index, 361 | ); 362 | 363 | if (!coordinates) { 364 | throw new Error('Index or coordinates not found'); 365 | } 366 | 367 | await this.domService.resetHighlightElements(); 368 | 369 | await this.domService.highlightElementPointer(coordinates); 370 | 371 | await this.browserService.mouseClick(coordinates.x, coordinates.y); 372 | 373 | await this.domService.resetHighlightElements(); 374 | 375 | action.complete(); 376 | 377 | break; 378 | 379 | case 'fillInput': 380 | coordinates = this.domService.getIndexSelector( 381 | action.data.params.index, 382 | ); 383 | 384 | if (!coordinates) { 385 | action.fail('Index or coordinates not found'); 386 | throw new Error('Index or coordinates not found'); 387 | } 388 | 389 | await this.domService.highlightElementPointer(coordinates); 390 | const variableString = new VariableString( 391 | action.data.params.text, 392 | this.variables, 393 | ); 394 | 395 | await this.browserService.fillInput(variableString, coordinates); 396 | await this.domService.resetHighlightElements(); 397 | 398 | action.complete(); 399 | 400 | break; 401 | 402 | case 'scrollDown': 403 | await this.browserService.scrollDown(); 404 | await this.domService.resetHighlightElements(); 405 | await this.domService.highlightElementWheel('down'); 406 | 407 | action.complete(); 408 | 409 | break; 410 | 411 | case 'scrollUp': 412 | await this.browserService.scrollUp(); 413 | 414 | await this.domService.resetHighlightElements(); 415 | await this.domService.highlightElementWheel('up'); 416 | 417 | action.complete(); 418 | 419 | break; 420 | 421 | case 'takeScreenshot': 422 | await this.domService.resetHighlightElements(); 423 | await this.domService.highlightForSoM(); 424 | 425 | action.complete(); 426 | 427 | break; 428 | 429 | case 'goToUrl': 430 | await this.browserService.goToUrl(action.data.params.url); 431 | 432 | action.complete(); 433 | break; 434 | 435 | case 'goBack': 436 | await this.browserService.goBack(); 437 | 438 | action.complete(); 439 | break; 440 | 441 | case 'extractContent': 442 | const content = await this.browserService.extractContent(); 443 | 444 | this.summarizeTask.prepare({ 445 | images: [], 446 | memory: '', 447 | input: `Our goal is to ${this.taskManager.getEndGoal()} Here is the content extracted from the page: ${content}.`, 448 | }); 449 | 450 | const summarized = await this.summarizer.perform(this.summarizeTask); 451 | 452 | this.memoryLearnings.push( 453 | `Key takeways from content on page ${this.browserService.getPageUrl()}: ${summarized.takeaways}`, 454 | ); 455 | action.complete(summarized.takeaways); 456 | 457 | console.log('-----this.memoryLearnings', this.memoryLearnings); 458 | break; 459 | 460 | case 'triggerResult': 461 | const { pristineScreenshot } = await this.domService.getDomState(); 462 | const answer = action.data.params.data; 463 | const { result, explanation, hint, memoryLearning } = 464 | await this.feedbackAgent.evaluate({ 465 | pageUrl: this.browserService.getPageUrl(), 466 | screenshotUrls: [pristineScreenshot], 467 | task: this.taskManager.getEndGoal(), 468 | answer, 469 | taskHistorySummary: this.taskManager.getSerializedTasks(), 470 | previousTaskResult: JSON.stringify( 471 | this.taskManager.getLatestTaskPerformed()?.objectForLLM(), 472 | ), 473 | }); 474 | 475 | if (result === 'success' || result === 'unknown') { 476 | action.complete(explanation); 477 | this.onSuccess(answer); 478 | } else { 479 | if (this.feedbackRetries > this.maxRetries) { 480 | this.onFailure('Max feedback retries reached'); 481 | } 482 | 483 | action.fail(JSON.stringify({ result, explanation, hint })); 484 | this.memoryLearnings.push(memoryLearning); 485 | this.incrementFeedbackRetries(); 486 | } 487 | 488 | break; 489 | } 490 | 491 | await this.afterAction(action); 492 | this.emitRunUpdate(); 493 | } 494 | } 495 | -------------------------------------------------------------------------------- /src/core/agents/openator/openator.types.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | import { zodToJsonSchema } from 'zod-to-json-schema'; 3 | 4 | export const ManagerAgentActionSchema = z 5 | .union([ 6 | z.object({ 7 | name: z.literal('extractContent'), 8 | params: z.null(), 9 | description: z 10 | .string() 11 | .describe( 12 | "A short description of the action you want to perform. E.g 'Extract the content of the current page'", 13 | ), 14 | }), 15 | z.object({ 16 | name: z.literal('clickElement'), 17 | params: z.object({ 18 | index: z.number(), 19 | }), 20 | description: z 21 | .string() 22 | .describe( 23 | "A short description of the action you want to perform. E.g 'Click the login button'", 24 | ), 25 | }), 26 | z.object({ 27 | name: z.literal('fillInput'), 28 | params: z.object({ 29 | index: z.number(), 30 | text: z.string(), 31 | }), 32 | description: z 33 | .string() 34 | .describe( 35 | "A short description of the action you want to perform. E.g 'Fill the email input'", 36 | ), 37 | }), 38 | z.object({ 39 | name: z.literal('goBack'), 40 | description: z 41 | .string() 42 | .describe( 43 | "A short description of the action you want to perform. E.g 'Go back to the previous page'", 44 | ), 45 | params: z.null(), 46 | }), 47 | z.object({ 48 | name: z.literal('scrollDown'), 49 | description: z 50 | .string() 51 | .describe( 52 | "A short description of the action you want to perform. E.g 'Scroll down to find the login form'", 53 | ), 54 | params: z.null(), 55 | }), 56 | z.object({ 57 | name: z.literal('scrollUp'), 58 | description: z 59 | .string() 60 | .describe( 61 | "A short description of the action you want to perform. E.g 'Scroll up to find the login form'", 62 | ), 63 | params: z.null(), 64 | }), 65 | z.object({ 66 | name: z.literal('goToUrl'), 67 | params: z.object({ 68 | url: z.string(), 69 | }), 70 | description: z 71 | .string() 72 | .describe( 73 | "A short description of the action you want to perform. E.g 'Go to the login page'", 74 | ), 75 | }), 76 | z.object({ 77 | name: z.literal('takeScreenshot'), 78 | description: z 79 | .string() 80 | .describe( 81 | "A short description of the action you want to perform. E.g 'Take a screenshot of the current page'", 82 | ), 83 | params: z.null(), 84 | }), 85 | z 86 | .object({ 87 | name: z.literal('triggerResult'), 88 | params: z.object({ 89 | data: z 90 | .string() 91 | .describe( 92 | 'The data to be passed to the evaluator. Most likely the response to the user task.', 93 | ), 94 | }), 95 | description: z.null(), 96 | }) 97 | .describe( 98 | 'Trigger result means you have completed the task and we can ask the evaluator to evaluate the test result.', 99 | ), 100 | ]) 101 | .describe( 102 | 'The action to be executed. e.g. { name: "clickElement", params: { index: 2 }, description: "Click the login button" }', 103 | ); 104 | 105 | export type ManagerAgentAction = z.infer; 106 | 107 | export const ManagerAgentResponseSchema = z.object({ 108 | currentState: z.object({ 109 | evaluationPreviousGoal: z.string(), 110 | memory: z.string(), 111 | nextGoal: z.string(), 112 | }), 113 | actions: z.array(ManagerAgentActionSchema), 114 | }); 115 | 116 | export type ManagerResponse = z.infer; 117 | 118 | export const ManagerResponseExamples = ` 119 | 120 | Example Response 1: 121 | { 122 | "currentState": { 123 | "evaluationPreviousGoal": "Cookies have been accepted. We can now proceed to login.", 124 | "memory": "Cookies accepted, ready to login. End goal is to login to my account.", 125 | "nextGoal": "Display the login form by clicking the login button", 126 | }, 127 | "actions": [{"name": "clickElement", "params": {"index": 3}, "description": "Click the login button"}] 128 | } 129 | 130 | Example Response 2: 131 | { 132 | "currentState": { 133 | "evaluationPreviousGoal": "An element seems to prevent us from logging in. We need close the cookies popup.", 134 | "memory": "Our end goal is to login to my account. We need to close the cookies popup and then we can proceed to login.", 135 | "nextGoal": "Close cookies popup and then login.", 136 | }, 137 | "actions": [{"name": "clickElement", "params": {"index": 5}, "description": "Close the cookies popup"}] 138 | } 139 | 140 | Example Response 3: 141 | { 142 | "currentState": { 143 | "evaluationPreviousGoal": "We need to scroll down to find the login form.", 144 | "memory": "We need to scroll down to find the login form. End goal is to login to my account.", 145 | "nextGoal": "Find a recipe that has more than 100 reviews and is not Spicy Vegan Recipe" 146 | }, 147 | "actions": [{"name": "scrollDown", "description": "Scroll down to find the login form"}] 148 | } 149 | `; 150 | -------------------------------------------------------------------------------- /src/core/agents/summarize-agent/summarize-agent.ts: -------------------------------------------------------------------------------- 1 | import { Agent, AgentTask } from '../agent-base'; 2 | import { z } from 'zod'; 3 | import { LLM } from '@/core/interfaces/llm.interface'; 4 | 5 | const responseSchema = z.object({ 6 | takeaways: z.string(), 7 | }); 8 | 9 | export type SummarizeAgent = Agent>; 10 | 11 | export const initSummarizer = (llm: LLM) => 12 | new Agent({ 13 | role: 'Summarizer', 14 | goal: 'Summarize the content provided by the user with key takeaways', 15 | backstory: `You are a meticulous analyst with a keen eye for detail. 16 | 17 | You are able to summarize the content provided by the user with key takeaways. 18 | 19 | Depending on the context, you will deduce that details are not relevant to the users. 20 | 21 | These details will depends on the context of the user. 22 | 23 | For example, in a recipe website, the list of ingredients, calories, etc. are relevant to the user. Where as in a news website, the list of ingredients, calories, etc. are not relevant to the user. 24 | `, 25 | tools: [], 26 | strictJsonOutput: true, 27 | llm, 28 | responseSchema, 29 | }); 30 | 31 | export const initSummarizeTask = () => 32 | new AgentTask({ 33 | description: 34 | 'Summarize the key takeaways from the content provided by the user. Try to use the least amount of words possible without losing the context. Be as specific as possible.', 35 | goal: 'The goal is to summarize the content provided by the user with key takeaways.', 36 | expectedOutput: 37 | 'A JSON object with the following properties: { takeaways: string[] }', 38 | validOutputExamples: `{ "takeaways": "The recipie named Vegetarian Lasagna is available at https://www.example.com/recipes/vegetarian-lasagna", has a rating of 4.4, contains zuchini, tomatoes, cucumber, and under 500 calories per serving. }`, 39 | invalidOutputExamples: `{ "takeaways": "The recipie named Vegetarian Lasagna is good" }`, 40 | }); 41 | -------------------------------------------------------------------------------- /src/core/entities/openator-result.ts: -------------------------------------------------------------------------------- 1 | export const OpenatorResultStatuses = ['success', 'failed'] as const; 2 | 3 | export type OpenatorResultStatus = (typeof OpenatorResultStatuses)[number]; 4 | 5 | export type OpenatorResult = { 6 | status: OpenatorResultStatus; 7 | reason: string; 8 | result: string; 9 | stepCount: number; 10 | }; 11 | -------------------------------------------------------------------------------- /src/core/entities/run.ts: -------------------------------------------------------------------------------- 1 | import { Task } from './task'; 2 | 3 | export type RunStatus = 4 | | 'running' 5 | | 'pending' 6 | | 'running' 7 | | 'completed' 8 | | 'failed'; 9 | 10 | export type RunBrainState = 'thinking' | 'executingAction'; 11 | 12 | export class Run { 13 | public readonly id: string; 14 | private _status: string; 15 | private _tasks: Task[]; 16 | private _retries: number; 17 | private _brainState: RunBrainState; 18 | private _resultReason: string; 19 | private _result: string; 20 | 21 | constructor( 22 | public readonly scenario: string, 23 | jobId?: string, 24 | ) { 25 | this.id = jobId || crypto.randomUUID(); 26 | this._status = 'running'; 27 | this._tasks = []; 28 | this._brainState = 'thinking'; 29 | this._resultReason = ''; 30 | this._result = ''; 31 | } 32 | 33 | get status() { 34 | return this._status; 35 | } 36 | 37 | get tasks() { 38 | return this._tasks; 39 | } 40 | 41 | get brainState() { 42 | return this._brainState; 43 | } 44 | 45 | get resultReason() { 46 | return this._resultReason; 47 | } 48 | 49 | get result() { 50 | return this._result; 51 | } 52 | 53 | static InitRunning(scenario: string, jobId?: string) { 54 | return new Run(scenario, jobId); 55 | } 56 | 57 | think() { 58 | this._brainState = 'thinking'; 59 | } 60 | 61 | executeAction() { 62 | this._brainState = 'executingAction'; 63 | } 64 | 65 | retry() { 66 | this._retries += 1; 67 | } 68 | 69 | run() { 70 | this._status = 'running'; 71 | } 72 | 73 | addTask(task: Task) { 74 | this._tasks.push(task); 75 | } 76 | 77 | updateTask(task: Task) { 78 | this._tasks = this._tasks.map((t) => (t.id === task.id ? task : t)); 79 | } 80 | 81 | setSuccess(answer: string) { 82 | this._status = 'completed'; 83 | this._result = answer; 84 | } 85 | 86 | setFailure(reason: string) { 87 | this._status = 'failed'; 88 | this._resultReason = reason; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/core/entities/task.ts: -------------------------------------------------------------------------------- 1 | import * as crypto from 'crypto'; 2 | import { ManagerAgentAction } from '@/core/agents/openator/openator.types'; 3 | 4 | export type TaskStatus = 5 | | 'pending' 6 | | 'completed' 7 | | 'failed' 8 | | 'cancelled' 9 | | 'running'; 10 | 11 | export type TaskActionStatus = 12 | | 'pending' 13 | | 'completed' 14 | | 'cancelled' 15 | | 'failed' 16 | | 'running'; 17 | 18 | export class TaskAction { 19 | public readonly id: string; 20 | private _reason: string | undefined; 21 | private _status: TaskActionStatus; 22 | 23 | /** 24 | * The result of the action when it is completed. 25 | */ 26 | public result: string; 27 | 28 | constructor(public readonly data: ManagerAgentAction) { 29 | this.id = crypto.randomUUID(); 30 | this._status = 'pending'; 31 | this.data = data; 32 | this.result = ''; 33 | } 34 | 35 | get status() { 36 | return this._status; 37 | } 38 | 39 | start() { 40 | this._status = 'running'; 41 | } 42 | 43 | complete(result?: string) { 44 | this._status = 'completed'; 45 | this.result = result ?? 'ok'; 46 | } 47 | 48 | cancel(reason: string) { 49 | this._status = 'cancelled'; 50 | this._reason = reason; 51 | } 52 | 53 | fail(reason: string) { 54 | this._status = 'failed'; 55 | this._reason = reason; 56 | this.result = `Action failed with reason: ${reason}`; 57 | } 58 | 59 | public asObject() { 60 | return { 61 | id: this.id, 62 | status: this.status, 63 | reason: this._reason, 64 | name: this.data.name, 65 | params: this.data.params, 66 | description: this.data.description, 67 | }; 68 | } 69 | 70 | public objectForLLM() { 71 | return { 72 | description: this.data.description, 73 | status: this.status, 74 | result: this.result, 75 | reason: this._reason, 76 | }; 77 | } 78 | } 79 | 80 | export class Task { 81 | constructor( 82 | public readonly id: string, 83 | public readonly goal: string, 84 | readonly actions: TaskAction[], 85 | private _status: TaskStatus, 86 | private _reason: string | undefined = undefined, 87 | ) {} 88 | 89 | static InitPending(goal: string, actions: ManagerAgentAction[]) { 90 | const taskActions = actions.map((action) => new TaskAction(action)); 91 | 92 | return new Task(crypto.randomUUID(), goal, taskActions ?? [], 'pending'); 93 | } 94 | 95 | get status() { 96 | return this._status; 97 | } 98 | 99 | get reason() { 100 | return this._reason; 101 | } 102 | 103 | get pendingActions() { 104 | return this.actions.filter((action) => action.status === 'pending'); 105 | } 106 | 107 | get nextPendingAction(): TaskAction | null { 108 | return this.pendingActions[0] ?? null; 109 | } 110 | 111 | completeAction(id: string) { 112 | const action = this.actions.find((action) => action.id === id); 113 | 114 | if (!action) { 115 | throw new Error('Action not found'); 116 | } 117 | 118 | action.complete(); 119 | 120 | if (!this.pendingActions.length) { 121 | action.complete(); 122 | } 123 | 124 | if (!this.pendingActions.length) { 125 | this.complete(); 126 | } 127 | } 128 | 129 | cancelAction(id: string, reason: string) { 130 | const action = this.actions.find((action) => action.id === id); 131 | 132 | if (!action) { 133 | throw new Error('Action not found'); 134 | } 135 | 136 | action.cancel(reason); 137 | this.cancel(reason); 138 | } 139 | 140 | start() { 141 | this._status = 'running'; 142 | } 143 | 144 | complete() { 145 | this._status = 'completed'; 146 | } 147 | 148 | cancel(reason: string) { 149 | this._status = 'cancelled'; 150 | this._reason = reason; 151 | } 152 | 153 | fail(reason: string) { 154 | this._status = 'failed'; 155 | this._reason = reason; 156 | } 157 | 158 | public objectForLLM() { 159 | return { 160 | goal: this.goal, 161 | actionsTaken: this.actions.map((action) => action.objectForLLM()), 162 | }; 163 | } 164 | 165 | public serialize(): string { 166 | return JSON.stringify({ 167 | id: this.id, 168 | goal: this.goal, 169 | actions: this.actions.map((action) => action.asObject()), 170 | status: this.status, 171 | reason: this.reason, 172 | }); 173 | } 174 | 175 | public asObject() { 176 | return { 177 | id: this.id, 178 | goal: this.goal, 179 | actions: this.actions.map((action) => action.asObject()), 180 | status: this.status, 181 | reason: this.reason, 182 | }; 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/core/entities/variable-string.ts: -------------------------------------------------------------------------------- 1 | import { Variable } from './variable'; 2 | 3 | export class VariableString { 4 | constructor( 5 | private readonly _value: string, 6 | private readonly variables: Variable[], 7 | ) {} 8 | 9 | publicValue(): string { 10 | let interpolatedValue = this._value; 11 | 12 | const variablePattern = /{{(.*?)}}/g; 13 | interpolatedValue = interpolatedValue.replace( 14 | variablePattern, 15 | (_, varName) => { 16 | const variable = this.variables.find((v) => v.name === varName); 17 | return variable ? variable.publicValue() : `{{${varName}}}`; 18 | }, 19 | ); 20 | 21 | return interpolatedValue; 22 | } 23 | 24 | dangerousValue(): string { 25 | let interpolatedValue = this._value; 26 | 27 | const variablePattern = /{{(.*?)}}/g; 28 | interpolatedValue = interpolatedValue.replace( 29 | variablePattern, 30 | (_, varName) => { 31 | const variable = this.variables.find((v) => v.name === varName); 32 | return variable ? variable.dangerousValue() : `{{${varName}}}`; 33 | }, 34 | ); 35 | 36 | return interpolatedValue; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/core/entities/variable.ts: -------------------------------------------------------------------------------- 1 | export type VariableParams = { 2 | name: string; 3 | value: string; 4 | isSecret: boolean; 5 | }; 6 | 7 | export class Variable { 8 | readonly name: string; 9 | readonly isSecret: boolean; 10 | private readonly _value: string; 11 | 12 | constructor(params: VariableParams) { 13 | this.name = params.name; 14 | this.isSecret = params.isSecret; 15 | 16 | this._value = params.value; 17 | } 18 | 19 | publicValue(): string { 20 | return this.isSecret ? `{{${this.name}}}` : this._value; 21 | } 22 | 23 | dangerousValue(): string { 24 | return this._value; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/core/interfaces/agent-reporter.interface.ts: -------------------------------------------------------------------------------- 1 | export interface AgentReporter { 2 | success(message: string): void; 3 | failure(message: string): void; 4 | loading(message: string): void; 5 | info(message: string): void; 6 | } 7 | -------------------------------------------------------------------------------- /src/core/interfaces/browser-websocket-server.interface.ts: -------------------------------------------------------------------------------- 1 | export interface BrowserWebSocketServer { 2 | startSession(): Promise; 3 | stopSession(sessionId: string): Promise; 4 | } 5 | 6 | export interface WSSSession { 7 | id: string; 8 | liveUrl: string; 9 | wsEndpoint: string; 10 | } 11 | -------------------------------------------------------------------------------- /src/core/interfaces/browser.interface.ts: -------------------------------------------------------------------------------- 1 | import { Page } from 'playwright'; 2 | import { VariableString } from '../entities/variable-string'; 3 | 4 | export type Coordinates = { 5 | x: number; 6 | y: number; 7 | }; 8 | 9 | export interface Browser { 10 | close(): Promise; 11 | launch(url: string): Promise; 12 | getStablePage(): Promise; 13 | getPage(): Page; 14 | getPageUrl(): string; 15 | getPixelAbove(): Promise; 16 | getPixelBelow(): Promise; 17 | mouseClick(x: number, y: number): Promise; 18 | fillInput(text: VariableString, coordinates: Coordinates): Promise; 19 | scrollDown(): Promise; 20 | scrollUp(): Promise; 21 | goToUrl(url: string): Promise; 22 | goBack(): Promise; 23 | extractContent(): Promise; 24 | } 25 | -------------------------------------------------------------------------------- /src/core/interfaces/event-bus.interface.ts: -------------------------------------------------------------------------------- 1 | import { Run } from '../entities/run'; 2 | import { Task, TaskAction } from '../entities/task'; 3 | 4 | export type AppEvents = { 5 | 'run:update': Run; 6 | 'task:update': Task; 7 | 'action:update': TaskAction; 8 | 'pristine-screenshot:taken': string; 9 | }; 10 | 11 | export interface EventBusInterface { 12 | emit(event: E, data: AppEvents[E]): void; 13 | on( 14 | event: E, 15 | callback: (data: AppEvents[E]) => void, 16 | ): void; 17 | } 18 | -------------------------------------------------------------------------------- /src/core/interfaces/file-system.interface.ts: -------------------------------------------------------------------------------- 1 | export interface FileSystem { 2 | saveFile(path: string, data: Buffer): Promise; 3 | saveScreenshot(filename: string, data: Buffer): Promise; 4 | } 5 | -------------------------------------------------------------------------------- /src/core/interfaces/llm.interface.ts: -------------------------------------------------------------------------------- 1 | import { BaseMessage } from '@langchain/core/messages'; 2 | import { JsonOutputParser } from '@langchain/core/output_parsers'; 3 | 4 | export interface LLM { 5 | invokeAndParse>( 6 | messages: BaseMessage[], 7 | parser: JsonOutputParser, 8 | ): Promise; 9 | } 10 | -------------------------------------------------------------------------------- /src/core/interfaces/reporter.interface.ts: -------------------------------------------------------------------------------- 1 | import { Task } from "../entities/task"; 2 | 3 | export interface Reporter { 4 | reportProgress(thinkin: boolean, task?: Task): void; 5 | info(message: string): void; 6 | success(message: string): void; 7 | error(message: string): void; 8 | } 9 | -------------------------------------------------------------------------------- /src/core/interfaces/screenshotter.interface.ts: -------------------------------------------------------------------------------- 1 | import { Page } from "playwright"; 2 | 3 | export interface Screenshotter { 4 | takeScreenshot(page: Page): Promise; 5 | } 6 | -------------------------------------------------------------------------------- /src/core/services/realtime-reporter.ts: -------------------------------------------------------------------------------- 1 | import { EventEmitter } from 'events'; 2 | import { 3 | AppEvents, 4 | EventBusInterface, 5 | } from '../interfaces/event-bus.interface'; 6 | 7 | export class EventBus extends EventEmitter implements EventBusInterface { 8 | emit(event: E, data: AppEvents[E]): boolean { 9 | return super.emit(event, data); 10 | } 11 | 12 | on( 13 | event: E, 14 | callback: (data: AppEvents[E]) => void, 15 | ): this { 16 | return super.on(event, callback); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/core/services/task-manager-service.ts: -------------------------------------------------------------------------------- 1 | import { Task } from '@/core/entities/task'; 2 | 3 | export class TaskManagerService { 4 | private tasks: Task[] = []; 5 | private endGoal: string | null = null; 6 | 7 | constructor() {} 8 | 9 | setEndGoal(endGoal: string) { 10 | this.endGoal = endGoal; 11 | } 12 | 13 | getEndGoal() { 14 | return this.endGoal!; 15 | } 16 | 17 | add(task: Task) { 18 | this.tasks.push(task); 19 | } 20 | 21 | update(task: Task) { 22 | this.tasks = this.tasks.map((t) => (t.id === task.id ? task : t)); 23 | } 24 | 25 | getLatestTaskPerformed() { 26 | return ( 27 | this.tasks.filter((t) => t.status !== 'running')[this.tasks.length - 1] ?? 28 | null 29 | ); 30 | } 31 | 32 | getTaskHistorySummary() { 33 | return this.tasks.map((t) => t.goal); 34 | } 35 | 36 | getSerializedTasks() { 37 | const serialized = JSON.stringify( 38 | { 39 | endGoal: this.endGoal, 40 | taskHistorySummary: this.getTaskHistorySummary(), 41 | previousTaskResult: this.getLatestTaskPerformed()?.objectForLLM(), 42 | }, 43 | null, 44 | 2, 45 | ); 46 | 47 | console.log('serialized', serialized); 48 | 49 | return serialized; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/core/shared/utils.ts: -------------------------------------------------------------------------------- 1 | export function splitArray(array: T[], numberOfChunk: number): T[][] { 2 | const chunkSize = Math.ceil(array.length / numberOfChunk); 3 | const result: T[][] = []; 4 | for (let i = 0; i < array.length; i += chunkSize) { 5 | result.push(array.slice(i, i + chunkSize)); 6 | } 7 | return result; 8 | } 9 | -------------------------------------------------------------------------------- /src/core/types.ts: -------------------------------------------------------------------------------- 1 | export * from '@/core/agents/openator/openator.types'; 2 | export * from '@/core/agents/feedback-agent/feedback-agent.types'; 3 | export * from '@/core/entities/run'; 4 | export * from '@/core/entities/openator-result'; 5 | export * from '@/core/entities/task'; 6 | export * from '@/core/entities/variable-string'; 7 | export { Variable } from '@/core/entities/variable'; 8 | export * from '@/core/interfaces/agent-reporter.interface'; 9 | export * from '@/core/interfaces/browser.interface'; 10 | export * from '@/core/interfaces/event-bus.interface'; 11 | export * from '@/core/interfaces/llm.interface'; 12 | export * from '@/core/interfaces/reporter.interface'; 13 | export * from '@/core/interfaces/file-system.interface'; 14 | export * from '@/core/interfaces/screenshotter.interface'; 15 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Export the main factory function 3 | */ 4 | export { initOpenator, InitOpenatorConfig } from './init-openator'; 5 | 6 | /** 7 | * Export interfaces and types 8 | */ 9 | export { LLM } from './core/interfaces/llm.interface'; 10 | export { Reporter } from './core/interfaces/reporter.interface'; 11 | export { 12 | OpenatorResult, 13 | OpenatorResultStatus, 14 | OpenatorResultStatuses, 15 | } from './core/entities/openator-result'; 16 | 17 | export { 18 | ManagerAgentAction, 19 | ManagerAgentResponseSchema, 20 | ManagerResponse, 21 | ManagerResponseExamples, 22 | } from './core/agents/openator/openator.types'; 23 | 24 | /** 25 | * Export entities and classes 26 | */ 27 | export { Variable } from './core/entities/variable'; 28 | export { Openator, OpenatorConfig } from './core/agents/openator/openator'; 29 | export { Task } from './core/entities/task'; 30 | export { Run } from './core/entities/run'; 31 | 32 | /** 33 | * Export Chat Models 34 | */ 35 | export { ChatOpenAI, ChatOpenAIConfig } from './models/chat-openai'; 36 | export { ChatOllama, ChatOllamaConfig } from './models/chat-ollama'; 37 | export { ChatGoogleGenAI, ChatGoogleGenAIConfig } from './models/chat-google'; 38 | -------------------------------------------------------------------------------- /src/infra/services/chromium-browser.ts: -------------------------------------------------------------------------------- 1 | import { VariableString } from '@/core/entities/variable-string'; 2 | import { Browser } from '@/core/interfaces/browser.interface'; 3 | import { convertHtmlToMarkdown } from 'dom-to-semantic-markdown'; 4 | import { JSDOM } from 'jsdom'; 5 | import { 6 | Browser as PBrowser, 7 | BrowserContext, 8 | Page, 9 | chromium, 10 | } from 'playwright'; 11 | 12 | export type Coordinates = { 13 | x: number; 14 | y: number; 15 | }; 16 | 17 | export class ChromiumBrowser implements Browser { 18 | private page: Page | null = null; 19 | private context: BrowserContext | null = null; 20 | 21 | private minimumPageLoadTime: number = 400; 22 | 23 | constructor( 24 | private options?: { 25 | headless: boolean; 26 | wsEndpoint?: string; 27 | }, 28 | ) {} 29 | 30 | async launch(url: string) { 31 | const wsEndpoint = 32 | this.options?.wsEndpoint ?? process.env.PLAYWRIGHT_WS_ENDPOINT ?? null; 33 | 34 | let browser: PBrowser; 35 | 36 | /** 37 | * If the wsEndpoint is provided, we connect to the browser using the Playwright 38 | * WebSocket endpoint. 39 | * This is used in the docker-compose file where the playwright-server is running in a dedicated container. 40 | */ 41 | if (wsEndpoint) { 42 | browser = await chromium.connectOverCDP(wsEndpoint); 43 | } else { 44 | browser = await chromium.launch({ 45 | headless: this.options?.headless ?? false, 46 | }); 47 | } 48 | 49 | this.context = await browser.newContext({ 50 | screen: { 51 | width: 1440, 52 | height: 900, 53 | }, 54 | viewport: { 55 | width: 1440, 56 | height: 900, 57 | }, 58 | }); 59 | this.page = await this.context.newPage(); 60 | await this.page.goto(url); 61 | } 62 | 63 | private async waitForDomContentLoaded() { 64 | await this.getPage().waitForLoadState('domcontentloaded'); 65 | } 66 | 67 | private async waitMinimumPageLoadTime() { 68 | return new Promise((resolve) => 69 | setTimeout(resolve, this.minimumPageLoadTime), 70 | ); 71 | } 72 | 73 | private async waitForStability() { 74 | return Promise.all([ 75 | this.waitForDomContentLoaded(), 76 | this.waitMinimumPageLoadTime(), 77 | ]); 78 | } 79 | 80 | async getStablePage(): Promise { 81 | await this.waitForStability(); 82 | return this.getPage(); 83 | } 84 | 85 | async close() { 86 | if (this.context) { 87 | this.context.close(); 88 | } 89 | } 90 | 91 | getPage(): Page { 92 | if (!this.page) { 93 | throw new Error('The page is not initialized or has been detroyed.'); 94 | } 95 | return this.page; 96 | } 97 | 98 | getPageUrl() { 99 | return this.getPage().url(); 100 | } 101 | 102 | async mouseClick(x: number, y: number) { 103 | await Promise.all([ 104 | this.getPage().mouse.click(x, y), 105 | this.getPage().waitForLoadState('domcontentloaded'), 106 | ]); 107 | } 108 | 109 | async getPixelAbove() { 110 | return this.getPage().evaluate(() => { 111 | return window.scrollY; 112 | }); 113 | } 114 | 115 | async getPixelBelow() { 116 | return this.getPage().evaluate(() => { 117 | return window.scrollY + window.innerHeight; 118 | }); 119 | } 120 | 121 | async fillInput(text: VariableString, coordinates: Coordinates) { 122 | await this.getPage().mouse.click(coordinates.x, coordinates.y); 123 | await this.getPage().keyboard.press('ControlOrMeta+A'); 124 | await this.getPage().keyboard.press('Backspace'); 125 | await this.getPage().keyboard.type(text.dangerousValue(), { delay: 100 }); 126 | } 127 | 128 | async scrollDown() { 129 | await this.getPage().mouse.wheel(0, 500); 130 | await this.getPage().waitForTimeout(300); 131 | } 132 | 133 | async scrollUp() { 134 | await this.getPage().mouse.wheel(0, -500); 135 | await this.getPage().waitForTimeout(300); 136 | } 137 | 138 | async goToUrl(url: string) { 139 | await this.getPage().goto(url); 140 | } 141 | 142 | async goBack() { 143 | await this.getPage().goBack(); 144 | } 145 | 146 | async extractContent() { 147 | const html = await this.getPage().content(); 148 | const dom = new JSDOM(html); 149 | const markdown = convertHtmlToMarkdown(html, { 150 | overrideDOMParser: new dom.window.DOMParser(), 151 | extractMainContent: true, 152 | }); 153 | return markdown; 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/infra/services/console-reporter.ts: -------------------------------------------------------------------------------- 1 | import { AgentReporter } from '@/core/interfaces/agent-reporter.interface'; 2 | 3 | export class ConsoleReporter implements AgentReporter { 4 | constructor(private readonly name: string) {} 5 | 6 | getSpinner() {} 7 | 8 | success(message: string): void { 9 | console.log(`[${this.name}] ✅ ${message}`); 10 | } 11 | 12 | failure(message: string): void { 13 | console.log(`[${this.name}] ❌ ${message}`); 14 | } 15 | 16 | loading(message: string): void { 17 | console.log(`[${this.name}] 💡 ${message}`); 18 | } 19 | 20 | info(message: string): void { 21 | console.log(`[${this.name}] 💡 ${message}`); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/infra/services/dom-service.ts: -------------------------------------------------------------------------------- 1 | import { Page } from 'playwright'; 2 | import { Browser } from '@/core/interfaces/browser.interface'; 3 | import { Screenshotter } from '@/core/interfaces/screenshotter.interface'; 4 | import * as crypto from 'crypto'; 5 | import { EventBus } from '@/core/services/realtime-reporter'; 6 | 7 | declare global { 8 | interface Window { 9 | getEventListeners?: any; 10 | } 11 | } 12 | 13 | export type Coordinates = { 14 | x: number; 15 | y: number; 16 | }; 17 | 18 | export type TextNode = { 19 | type: 'TEXT_NODE'; 20 | text: string; 21 | isVisible: boolean; 22 | }; 23 | 24 | const isElementNode = (node: DomNode | null): node is ElementNode => { 25 | if (!node) return false; 26 | 27 | return !('type' in node) || node.type !== 'TEXT_NODE'; 28 | }; 29 | 30 | export type ElementNode = { 31 | tagName: string | null; 32 | attributes: Record; 33 | text: string; 34 | index: number; 35 | xpath: string | null; 36 | coordinates: Coordinates | null; 37 | isVisible: boolean; 38 | isInteractive: boolean; 39 | isTopElement: boolean; 40 | highlightIndex: number; 41 | children: (DomNode | null)[]; 42 | iframeContext: string; 43 | shadowRoot: boolean; 44 | }; 45 | 46 | const IMPORTANT_ATTRIBUTES = [ 47 | 'id', 48 | 'name', 49 | 'value', 50 | 'placeholder', 51 | 'aria-label', 52 | 'role', 53 | 'for', 54 | 'href', 55 | 'alt', 56 | 'title', 57 | 'data-testid', 58 | 'data-test', 59 | 'data-test-id', 60 | 'data-test-name', 61 | 'data-test-value', 62 | ]; 63 | 64 | export type DomNode = TextNode | ElementNode; 65 | 66 | export const isTextNode = (node: DomNode): node is TextNode => { 67 | return 'type' in node && node.type === 'TEXT_NODE'; 68 | }; 69 | 70 | export interface SerializedDomState { 71 | screenshot: string; 72 | pristineScreenshot: string; 73 | domState: DomNode | null; 74 | pixelAbove: number; 75 | pixelBelow: number; 76 | } 77 | 78 | export class DomService { 79 | private domContext: { 80 | selectorMap: Record; 81 | } = { 82 | selectorMap: {}, 83 | }; 84 | 85 | constructor( 86 | private readonly screenshotService: Screenshotter, 87 | private readonly browserService: Browser, 88 | private readonly eventBus: EventBus, 89 | ) {} 90 | 91 | /** 92 | * For this version of the dom state string with only keep the index and tag name 93 | * because it is frequent that an attribute or the content of the node changes 94 | * and we don't want to re-run the action for such a small change. 95 | * 96 | * Ouput format: [2]__
97 | */ 98 | private stringifyDomStateForHash(nodeState: DomNode | null) { 99 | const items: string[] = []; 100 | 101 | const format = (node: DomNode | null) => { 102 | if (!isElementNode(node)) { 103 | return; 104 | } 105 | 106 | if (node.highlightIndex) { 107 | // [2]__
108 | const str = `[${node.isInteractive ? node.highlightIndex : ''}]__<${node.tagName}>`; 109 | 110 | items.push(str); 111 | } 112 | 113 | for (const child of node.children) { 114 | if (child) { 115 | format(child); 116 | } 117 | } 118 | }; 119 | 120 | format(nodeState); 121 | 122 | return items.join('\n'); 123 | } 124 | 125 | private hashDomState(domState: DomNode | null) { 126 | if (!domState) { 127 | return ''; 128 | } 129 | 130 | const domStateString = this.stringifyDomStateForHash(domState); 131 | 132 | return crypto.createHash('sha256').update(domStateString).digest('hex'); 133 | } 134 | 135 | getIndexSelector(index: number): Coordinates | null { 136 | const domNode = this.domContext?.selectorMap[index]; 137 | 138 | if (!domNode) { 139 | return null; 140 | } 141 | 142 | if (isTextNode(domNode)) { 143 | return null; 144 | } 145 | 146 | return domNode.coordinates; 147 | } 148 | 149 | async getDomState( 150 | withHighlight: boolean = true, 151 | ): Promise { 152 | await this.resetHighlightElements(); 153 | const pristineScreenshot = await this.screenshotService.takeScreenshot( 154 | await this.browserService.getStablePage(), 155 | ); 156 | 157 | this.eventBus.emit('pristine-screenshot:taken', pristineScreenshot); 158 | 159 | const state = await this.highlightForSoM(withHighlight); 160 | 161 | const screenshot = await this.screenshotService.takeScreenshot( 162 | await this.browserService.getStablePage(), 163 | ); 164 | 165 | const pixelAbove = await this.browserService.getPixelAbove(); 166 | const pixelBelow = await this.browserService.getPixelBelow(); 167 | 168 | return { 169 | screenshot, 170 | pristineScreenshot, 171 | domState: state, 172 | pixelAbove, 173 | pixelBelow, 174 | }; 175 | } 176 | 177 | async getInteractiveElements(withHighlight: boolean = true) { 178 | const { screenshot, pristineScreenshot, domState, pixelAbove, pixelBelow } = 179 | await this.getDomState(withHighlight); 180 | const selectorMap = this.createSelectorMap(domState); 181 | const stringifiedDomState = this.stringifyDomState(domState); 182 | const domStateHash = this.hashDomState(domState); 183 | 184 | this.domContext.selectorMap = selectorMap; 185 | 186 | return { 187 | screenshot, 188 | pristineScreenshot, 189 | domState, 190 | selectorMap, 191 | stringifiedDomState, 192 | domStateHash, 193 | pixelAbove, 194 | pixelBelow, 195 | }; 196 | } 197 | 198 | createSelectorMap(nodeState: DomNode | null) { 199 | const selectorMap: Record = {}; 200 | 201 | const mapNode = (node: DomNode | null) => { 202 | if (isElementNode(node)) { 203 | selectorMap[node.highlightIndex] = node; 204 | 205 | for (const child of node.children) { 206 | mapNode(child); 207 | } 208 | } 209 | }; 210 | 211 | mapNode(nodeState); 212 | return selectorMap; 213 | } 214 | 215 | stringifyDomState(nodeState: DomNode | null) { 216 | const items: string[] = []; 217 | 218 | const format = (node: DomNode | null) => { 219 | if (!isElementNode(node)) { 220 | return; 221 | } 222 | 223 | const attributes = Object.entries(node.attributes) 224 | .filter(([key]) => IMPORTANT_ATTRIBUTES.includes(key)) 225 | .map(([key, value]) => `${key}="${value}"`) 226 | .join(' '); 227 | 228 | if (node.highlightIndex) { 229 | // [2]__
Hello
230 | const str = `[${node.isInteractive ? node.highlightIndex : ''}]__<${node.tagName} ${attributes}>${node.text}`; 231 | 232 | items.push(str); 233 | } 234 | 235 | for (const child of node.children) { 236 | if (child) { 237 | format(child); 238 | } 239 | } 240 | }; 241 | 242 | format(nodeState); 243 | 244 | return items.join('\n'); 245 | } 246 | 247 | async resetHighlightElements() { 248 | const page = await this.browserService.getStablePage(); 249 | await page.evaluate(() => { 250 | try { 251 | // Remove the highlight container and all its contents 252 | const container = document.getElementById( 253 | 'playwright-highlight-container', 254 | ); 255 | if (container) { 256 | container.remove(); 257 | } 258 | 259 | // Remove highlight attributes from elements 260 | const highlightedElements = document.querySelectorAll( 261 | '[magic-inspector-highlight-id^="playwright-highlight-"]', 262 | ); 263 | highlightedElements.forEach((el) => { 264 | el.removeAttribute('magic-inspector-highlight-id'); 265 | }); 266 | } catch (e) { 267 | console.error('Failed to remove highlights:', e); 268 | } 269 | }); 270 | } 271 | 272 | async highlightElementWheel(direction: 'down' | 'up') { 273 | const page = await this.browserService.getStablePage(); 274 | await page.evaluate((direction: 'down' | 'up') => { 275 | console.log('highlightElementWheel', direction); 276 | }, direction); 277 | } 278 | 279 | async highlightElementPointer(coordinates: Coordinates) { 280 | const page = await this.browserService.getStablePage(); 281 | await page.evaluate((coordinates: Coordinates) => { 282 | try { 283 | // Create or get highlight container 284 | let container = document.getElementById( 285 | 'playwright-pointer-highlight-container', 286 | ); 287 | if (!container) { 288 | container = document.createElement('div'); 289 | container.id = 'playwright-pointer-highlight-container'; 290 | container.style.position = 'fixed'; 291 | container.style.pointerEvents = 'none'; 292 | container.style.top = '0'; 293 | container.style.left = '0'; 294 | container.style.width = '100%'; 295 | container.style.height = '100%'; 296 | container.style.zIndex = '2147483647'; // Maximum z-index value 297 | document.body.appendChild(container); 298 | } 299 | 300 | // Create the red circle 301 | const circle = document.createElement('div'); 302 | circle.style.position = 'absolute'; 303 | circle.style.width = '20px'; 304 | circle.style.height = '20px'; 305 | circle.style.borderRadius = '50%'; 306 | circle.style.backgroundColor = 'red'; 307 | circle.style.left = `${coordinates.x - 10}px`; // Center the circle 308 | circle.style.top = `${coordinates.y - 10}px`; // Center the circle 309 | circle.style.pointerEvents = 'none'; // Ensure it doesn't interfere with clicking 310 | 311 | container.appendChild(circle); 312 | 313 | setTimeout(() => { 314 | circle.remove(); 315 | container.remove(); 316 | }, 2000); 317 | } catch (e) { 318 | console.error('Failed to draw highlight circle:', e); 319 | } 320 | }, coordinates); 321 | } 322 | 323 | async waitForStability(page: Page) { 324 | await page.waitForTimeout(1500); 325 | } 326 | 327 | async highlightForSoM( 328 | withHighlight: boolean = true, 329 | ): Promise { 330 | try { 331 | const page: Page = await this.browserService.getStablePage(); 332 | 333 | if (page.isClosed()) { 334 | return null; 335 | } 336 | 337 | await this.waitForStability(page); 338 | 339 | const domState: DomNode | null = await page.evaluate((withHighlight) => { 340 | const doHighlightElements = true; 341 | const focusHighlightIndex = -1; 342 | const viewportExpansion: 0 | -1 = 0; 343 | 344 | let highlightIndex = 0; 345 | 346 | function highlightElement( 347 | element: Element, 348 | index: number, 349 | parentIframe: HTMLIFrameElement | null = null, 350 | ) { 351 | if (!withHighlight) { 352 | return; 353 | } 354 | 355 | let container = document.getElementById( 356 | 'playwright-highlight-container', 357 | ); 358 | if (!container) { 359 | container = document.createElement('div'); 360 | container.id = 'playwright-highlight-container'; 361 | container.style.position = 'absolute'; 362 | container.style.pointerEvents = 'none'; 363 | container.style.top = '0'; 364 | container.style.left = '0'; 365 | container.style.width = '100%'; 366 | container.style.height = '100%'; 367 | container.style.zIndex = '2147483647'; 368 | document.body.appendChild(container); 369 | } 370 | 371 | const colors = [ 372 | '#FF0000', 373 | '#00FF00', 374 | '#0000FF', 375 | '#FFA500', 376 | '#800080', 377 | '#008080', 378 | '#FF69B4', 379 | '#4B0082', 380 | '#FF4500', 381 | '#2E8B57', 382 | '#DC143C', 383 | '#4682B4', 384 | ]; 385 | const colorIndex = index % colors.length; 386 | const baseColor = colors[colorIndex]; 387 | // 10% opacity version of the color 388 | const backgroundColor = `${baseColor}1A`; 389 | 390 | // Create highlight overlay 391 | const overlay = document.createElement('div'); 392 | overlay.style.position = 'absolute'; 393 | overlay.style.border = `2px solid ${baseColor}`; 394 | overlay.style.backgroundColor = backgroundColor; 395 | overlay.style.pointerEvents = 'none'; 396 | overlay.style.boxSizing = 'border-box'; 397 | 398 | // Position overlay based on element, including scroll position 399 | const rect = element.getBoundingClientRect(); 400 | let top = rect.top + window.scrollY; 401 | let left = rect.left + window.scrollX; 402 | 403 | // Adjust position if element is inside an iframe 404 | if (parentIframe) { 405 | const iframeRect = parentIframe.getBoundingClientRect(); 406 | top += iframeRect.top; 407 | left += iframeRect.left; 408 | } 409 | 410 | overlay.style.top = `${top}px`; 411 | overlay.style.left = `${left}px`; 412 | overlay.style.width = `${rect.width}px`; 413 | overlay.style.height = `${rect.height}px`; 414 | 415 | // Create label 416 | const label = document.createElement('div'); 417 | label.className = 'playwright-highlight-label'; 418 | label.style.position = 'absolute'; 419 | label.style.background = `${baseColor}`; 420 | label.style.color = 'white'; 421 | label.style.padding = '1px 4px'; 422 | label.style.borderRadius = '4px'; 423 | label.style.fontSize = `${Math.min(12, Math.max(8, rect.height / 2))}px`; // Responsive font size 424 | label.textContent = `[${index}]`; 425 | 426 | // Calculate label position 427 | const labelWidth = 20; // Approximate width 428 | const labelHeight = 16; // Approximate height 429 | 430 | // Default position (top-right corner inside the box) 431 | let labelTop = top + 2; 432 | let labelLeft = left + rect.width - labelWidth - 2; 433 | 434 | // Adjust if box is too small 435 | if (rect.width < labelWidth + 4 || rect.height < labelHeight + 4) { 436 | // Position outside the box if it's too small 437 | labelTop = top - labelHeight - 2; 438 | labelLeft = left + rect.width - labelWidth; 439 | } 440 | 441 | label.style.top = `${labelTop}px`; 442 | label.style.left = `${labelLeft}px`; 443 | 444 | // Add to container 445 | container.appendChild(overlay); 446 | container.appendChild(label); 447 | 448 | // Store reference for cleanup 449 | element.setAttribute( 450 | 'magic-inspector-highlight-id', 451 | `playwright-highlight-${index}`, 452 | ); 453 | 454 | return index + 1; 455 | } 456 | 457 | function getXPathTree(element: ParentNode, stopAtBoundary = true) { 458 | const segments = []; 459 | let currentElement = element; 460 | 461 | while ( 462 | currentElement && 463 | currentElement.nodeType === Node.ELEMENT_NODE 464 | ) { 465 | // Stop if we hit a shadow root or iframe 466 | if ( 467 | stopAtBoundary && 468 | (currentElement.parentNode instanceof ShadowRoot || 469 | currentElement.parentNode instanceof HTMLIFrameElement) 470 | ) { 471 | break; 472 | } 473 | 474 | let index = 0; 475 | let sibling = currentElement.previousSibling; 476 | while (sibling) { 477 | if ( 478 | sibling.nodeType === Node.ELEMENT_NODE && 479 | sibling.nodeName === currentElement.nodeName 480 | ) { 481 | index++; 482 | } 483 | sibling = sibling.previousSibling; 484 | } 485 | 486 | const tagName = currentElement.nodeName.toLowerCase(); 487 | const xpathIndex = index > 0 ? `[${index + 1}]` : ''; 488 | segments.unshift(`${tagName}${xpathIndex}`); 489 | 490 | // @ts-ignore // TODO: fix this type issue 491 | currentElement = currentElement.parentNode; 492 | } 493 | 494 | return segments.join('/'); 495 | } 496 | 497 | function isElementAccepted(element: Element) { 498 | const leafElementDenyList = new Set([ 499 | 'svg', 500 | 'script', 501 | 'style', 502 | 'link', 503 | 'meta', 504 | ]); 505 | return !leafElementDenyList.has(element.tagName.toLowerCase()); 506 | } 507 | 508 | function isInteractiveElement(element: HTMLElement) { 509 | const interactiveElements = new Set([ 510 | 'a', 511 | 'button', 512 | 'details', 513 | 'embed', 514 | 'input', 515 | 'label', 516 | 'menu', 517 | 'menuitem', 518 | 'object', 519 | 'select', 520 | 'textarea', 521 | 'summary', 522 | ]); 523 | 524 | const interactiveRoles = new Set([ 525 | 'button', 526 | 'menu', 527 | 'menuitem', 528 | 'link', 529 | 'checkbox', 530 | 'radio', 531 | 'slider', 532 | 'tab', 533 | 'tabpanel', 534 | 'textbox', 535 | 'combobox', 536 | 'grid', 537 | 'listbox', 538 | 'option', 539 | 'progressbar', 540 | 'scrollbar', 541 | 'searchbox', 542 | 'switch', 543 | 'tree', 544 | 'treeitem', 545 | 'spinbutton', 546 | 'tooltip', 547 | 'a-button-inner', 548 | 'a-dropdown-button', 549 | 'click', 550 | 'menuitemcheckbox', 551 | 'menuitemradio', 552 | 'a-button-text', 553 | 'button-text', 554 | 'button-icon', 555 | 'button-icon-only', 556 | 'button-text-icon-only', 557 | 'dropdown', 558 | 'combobox', 559 | ]); 560 | 561 | const tagName = element.tagName.toLowerCase(); 562 | const role = element.getAttribute('role') ?? ''; 563 | const ariaRole = element.getAttribute('aria-role') ?? ''; 564 | const tabIndex = element.getAttribute('tabindex') ?? ''; 565 | 566 | const hasAddressInputClass = element.classList.contains( 567 | 'address-input__container__input', 568 | ); 569 | 570 | // Basic role/attribute checks 571 | const hasInteractiveRole = 572 | hasAddressInputClass || 573 | interactiveElements.has(tagName) || 574 | interactiveRoles.has(role) || 575 | interactiveRoles.has(ariaRole) || 576 | (tabIndex !== null && tabIndex !== '-1') || 577 | element.getAttribute('data-action') === 'a-dropdown-select' || 578 | element.getAttribute('data-action') === 'a-dropdown-button'; 579 | 580 | if (hasInteractiveRole) return true; 581 | 582 | const hasClickHandler = 583 | element.onclick !== null || 584 | element.getAttribute('onclick') !== null || 585 | element.hasAttribute('ng-click') || 586 | element.hasAttribute('@click') || 587 | element.hasAttribute('v-on:click'); 588 | 589 | function getEventListeners(el: Element) { 590 | try { 591 | return window.getEventListeners?.(el) || {}; 592 | } catch (e) { 593 | const listeners = {}; 594 | 595 | const eventTypes = [ 596 | 'click', 597 | 'mousedown', 598 | 'mouseup', 599 | 'touchstart', 600 | 'touchend', 601 | 'keydown', 602 | 'keyup', 603 | 'focus', 604 | 'blur', 605 | ]; 606 | 607 | for (const type of eventTypes) { 608 | // @ts-ignore // TODO: fix this 609 | const handler = el[`on${type}`]; 610 | if (handler) { 611 | // @ts-ignore // TODO: fix this 612 | listeners[type] = [ 613 | { 614 | listener: handler, 615 | useCapture: false, 616 | }, 617 | ]; 618 | } 619 | } 620 | 621 | return listeners; 622 | } 623 | } 624 | 625 | // Check for click-related events on the element itself 626 | const listeners = getEventListeners(element); 627 | const hasClickListeners = 628 | listeners && 629 | (listeners.click?.length > 0 || 630 | listeners.mousedown?.length > 0 || 631 | listeners.mouseup?.length > 0 || 632 | listeners.touchstart?.length > 0 || 633 | listeners.touchend?.length > 0); 634 | 635 | // Check for ARIA properties that suggest interactivity 636 | const hasAriaProps = 637 | element.hasAttribute('aria-expanded') || 638 | element.hasAttribute('aria-pressed') || 639 | element.hasAttribute('aria-selected') || 640 | element.hasAttribute('aria-checked'); 641 | 642 | // Check if element is draggable 643 | const isDraggable = 644 | element.draggable || element.getAttribute('draggable') === 'true'; 645 | 646 | return ( 647 | hasAriaProps || hasClickHandler || hasClickListeners || isDraggable 648 | ); 649 | } 650 | 651 | function isElementVisible(element: HTMLElement) { 652 | const style = window.getComputedStyle(element); 653 | return ( 654 | element.offsetWidth > 0 && 655 | element.offsetHeight > 0 && 656 | style.visibility !== 'hidden' && 657 | style.display !== 'none' 658 | ); 659 | } 660 | 661 | function isTopElement(element: Element) { 662 | const rect = element.getBoundingClientRect(); 663 | 664 | // Ignore elements outside viewport 665 | if (rect.bottom < 0 || rect.top > window.innerHeight) return false; 666 | 667 | const centerX = rect.left + rect.width / 2; 668 | const centerY = rect.top + rect.height / 2; 669 | 670 | const topEl = document.elementFromPoint(centerX, centerY); 671 | return topEl === element || element.contains(topEl); 672 | } 673 | 674 | function isTopElementOld(element: Element) { 675 | // Find the correct document context and root element 676 | let doc = element.ownerDocument; 677 | 678 | // If we're in an iframe, elements are considered top by default 679 | if (doc !== window.document) { 680 | return true; 681 | } 682 | 683 | // For shadow DOM, we need to check within its own root context 684 | const shadowRoot = element.getRootNode(); 685 | if (shadowRoot instanceof ShadowRoot) { 686 | const rect = element.getBoundingClientRect(); 687 | const point = { 688 | x: rect.left + rect.width / 2, 689 | y: rect.top + rect.height / 2, 690 | }; 691 | 692 | try { 693 | // Use shadow root's elementFromPoint to check within shadow DOM context 694 | const topEl = shadowRoot.elementFromPoint(point.x, point.y); 695 | 696 | if (!topEl) return false; 697 | 698 | // Check if the element or any of its parents match our target element 699 | let current = topEl; 700 | // @ts-ignore // TODO: fix this 701 | while (current && current !== shadowRoot) { 702 | if (current === element) return true; 703 | current = current.parentElement as Element; 704 | } 705 | return false; 706 | } catch (e) { 707 | return true; // If we can't determine, consider it visible 708 | } 709 | } 710 | 711 | const rect = element.getBoundingClientRect(); 712 | 713 | // If viewportExpansion is -1, check if element is the top one at its position 714 | if (viewportExpansion === -1) { 715 | return true; // Consider all elements as top elements when expansion is -1 716 | } 717 | 718 | // Calculate expanded viewport boundaries including scroll position 719 | const scrollX = window.scrollX; 720 | const scrollY = window.scrollY; 721 | const viewportTop = -viewportExpansion + scrollY; 722 | const viewportLeft = -viewportExpansion + scrollX; 723 | const viewportBottom = 724 | window.innerHeight + viewportExpansion + scrollY; 725 | const viewportRight = window.innerWidth + viewportExpansion + scrollX; 726 | 727 | // Get absolute element position 728 | const absTop = rect.top + scrollY; 729 | const absLeft = rect.left + scrollX; 730 | const absBottom = rect.bottom + scrollY; 731 | const absRight = rect.right + scrollX; 732 | 733 | // Skip if element is completely outside expanded viewport 734 | if ( 735 | absBottom < viewportTop || 736 | absTop > viewportBottom || 737 | absRight < viewportLeft || 738 | absLeft > viewportRight 739 | ) { 740 | return false; 741 | } 742 | 743 | // For elements within expanded viewport, check if they're the top element 744 | try { 745 | const centerX = rect.left + rect.width / 2; 746 | const centerY = rect.top + rect.height / 2; 747 | 748 | // Only clamp the point if it's outside the actual document 749 | const point = { 750 | x: centerX, 751 | y: centerY, 752 | }; 753 | 754 | if ( 755 | point.x < 0 || 756 | point.x >= window.innerWidth || 757 | point.y < 0 || 758 | point.y >= window.innerHeight 759 | ) { 760 | return true; // Consider elements with center outside viewport as visible 761 | } 762 | 763 | const topEl = document.elementFromPoint(point.x, point.y); 764 | if (!topEl) return false; 765 | 766 | let current = topEl; 767 | while (current && current !== document.documentElement) { 768 | if (current === element) return true; 769 | // @ts-ignore // TODO: fix this 770 | current = current.parentElement; 771 | } 772 | return false; 773 | } catch (e) { 774 | return true; 775 | } 776 | } 777 | 778 | function isTextNodeVisible(textNode: Node) { 779 | const range = document.createRange(); 780 | range.selectNodeContents(textNode); 781 | const rect = range.getBoundingClientRect(); 782 | 783 | return ( 784 | rect.width !== 0 && 785 | rect.height !== 0 && 786 | rect.top >= 0 && 787 | rect.top <= window.innerHeight && 788 | textNode.parentElement?.checkVisibility({ 789 | checkOpacity: true, 790 | checkVisibilityCSS: true, 791 | }) 792 | ); 793 | } 794 | 795 | function getCoordinates(element: Element) { 796 | const rect = element.getBoundingClientRect(); 797 | 798 | if (!rect) return null; 799 | 800 | const centerX = rect.left + rect.width / 2; 801 | const centerY = rect.top + rect.height / 2; 802 | 803 | if (isNaN(centerX) || isNaN(centerY)) return null; 804 | 805 | if (centerX <= 0 || centerY <= 0) return null; 806 | 807 | return { 808 | x: centerX, 809 | y: centerY, 810 | }; 811 | } 812 | 813 | function buildDomTree( 814 | node: Element, 815 | parentIframe: HTMLIFrameElement | null = null, 816 | ): DomNode | null { 817 | if (!node) return null; 818 | 819 | // Special case for text nodes 820 | if (node.nodeType === Node.TEXT_NODE) { 821 | const textContent = node.textContent?.trim() ?? ''; 822 | 823 | if (textContent && isTextNodeVisible(node)) { 824 | return { 825 | type: 'TEXT_NODE', 826 | text: textContent, 827 | isVisible: true, 828 | }; 829 | } 830 | return null; 831 | } 832 | 833 | if (node.nodeType === Node.ELEMENT_NODE && !isElementAccepted(node)) { 834 | return null; 835 | } 836 | 837 | const nodeData: Partial = { 838 | tagName: node.tagName ? node.tagName.toLowerCase() : null, 839 | attributes: {}, 840 | xpath: 841 | node.nodeType === Node.ELEMENT_NODE 842 | ? getXPathTree(node, true) 843 | : null, 844 | children: [], 845 | }; 846 | 847 | if (node.nodeType === Node.ELEMENT_NODE && node.attributes) { 848 | const attributeNames = node.getAttributeNames?.() || []; 849 | if (!nodeData.attributes) { 850 | nodeData.attributes = {}; 851 | } 852 | for (const name of attributeNames) { 853 | nodeData.attributes[name] = node.getAttribute(name) ?? ''; 854 | } 855 | } 856 | 857 | if (node.nodeType === Node.ELEMENT_NODE) { 858 | const isInteractive = isInteractiveElement(node as HTMLElement); 859 | const isVisible = isElementVisible(node as HTMLElement); 860 | const coordinates = getCoordinates(node); 861 | const isTop = isTopElement(node); 862 | 863 | nodeData.isInteractive = isInteractive; 864 | nodeData.isVisible = isVisible; 865 | nodeData.isTopElement = isTop; 866 | nodeData.text = ''; 867 | nodeData.coordinates = coordinates; 868 | 869 | if (isInteractive && isVisible && isTop) { 870 | nodeData.highlightIndex = highlightIndex++; 871 | if (doHighlightElements) { 872 | if (focusHighlightIndex >= 0) { 873 | if (focusHighlightIndex === nodeData.highlightIndex) { 874 | highlightElement( 875 | node, 876 | nodeData.highlightIndex, 877 | parentIframe, 878 | ); 879 | } 880 | } else { 881 | highlightElement(node, nodeData.highlightIndex, parentIframe); 882 | } 883 | } 884 | } 885 | } 886 | 887 | // Only add shadowRoot field if it exists 888 | if (node.shadowRoot) { 889 | nodeData.shadowRoot = true; 890 | } 891 | 892 | // Handle shadow DOM 893 | if (node.shadowRoot) { 894 | const shadowChildren = Array.from(node.shadowRoot.childNodes).map( 895 | (child) => buildDomTree(child as Element, parentIframe), 896 | ); 897 | nodeData.children?.push(...shadowChildren); 898 | } 899 | 900 | // Handle iframes 901 | if (node.tagName === 'IFRAME') { 902 | try { 903 | const iframeDoc = 904 | (node as HTMLIFrameElement).contentDocument || 905 | (node as HTMLIFrameElement).contentWindow?.document; 906 | 907 | if (iframeDoc) { 908 | const iframeChildren = Array.from( 909 | iframeDoc.body.childNodes, 910 | ).map((child) => 911 | buildDomTree(child as Element, node as HTMLIFrameElement), 912 | ); 913 | nodeData.children?.push(...iframeChildren); 914 | } 915 | } catch (e) { 916 | console.warn('Unable to access iframe:', node); 917 | } 918 | } else { 919 | const children = Array.from(node.childNodes).map((child) => 920 | buildDomTree(child as Element, parentIframe), 921 | ); 922 | nodeData.children?.push(...children); 923 | } 924 | 925 | return nodeData as DomNode; 926 | } 927 | 928 | const domTree = buildDomTree(document.body); 929 | 930 | return domTree; 931 | }, withHighlight); 932 | 933 | return domState; 934 | } catch (error: unknown) { 935 | console.log('error', error); 936 | return null; 937 | } 938 | } 939 | } 940 | -------------------------------------------------------------------------------- /src/infra/services/in-memory-file-system.ts: -------------------------------------------------------------------------------- 1 | import { FileSystem } from "@/core/interfaces/file-system.interface"; 2 | 3 | export class InMemoryFileSystem implements FileSystem { 4 | constructor() {} 5 | 6 | saveFile(path: string, data: Buffer): Promise { 7 | return this.saveScreenshot(path, data); 8 | } 9 | 10 | saveScreenshot(filename: string, data: Buffer): Promise { 11 | return new Promise((resolve, reject) => { 12 | try { 13 | const base64Data = data.toString("base64"); 14 | const url = `data:image/png;base64,${base64Data}`; 15 | resolve(url); 16 | } catch (error) { 17 | reject(error); 18 | } 19 | }); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/infra/services/local-file-system.ts: -------------------------------------------------------------------------------- 1 | import { FileSystem } from '@/core/interfaces/file-system.interface'; 2 | 3 | export class LocalFileSystem implements FileSystem { 4 | constructor() {} 5 | 6 | bufferFromStringUrl(encodedScreenshot: string): Buffer { 7 | const base64Data = encodedScreenshot.replace( 8 | /^data:image\/png;base64,/, 9 | '', 10 | ); 11 | return Buffer.from(base64Data, 'base64'); 12 | } 13 | 14 | saveFile(path: string, data: Buffer): Promise { 15 | return this.saveScreenshot(path, data); 16 | } 17 | 18 | saveScreenshot(filename: string, data: Buffer): Promise { 19 | return new Promise((resolve, reject) => { 20 | const fs = require('fs'); 21 | const path = require('path'); 22 | 23 | const filePath = path.join('/tmp/screenshots', filename); 24 | 25 | fs.writeFile(filePath, data, (err: any) => { 26 | if (err) { 27 | reject(err); 28 | } else { 29 | resolve(filePath); 30 | } 31 | }); 32 | }); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/infra/services/playwright-screenshotter.ts: -------------------------------------------------------------------------------- 1 | import { FileSystem } from "@/core/interfaces/file-system.interface"; 2 | import { Screenshotter } from "@/core/interfaces/screenshotter.interface"; 3 | import { Page } from "playwright"; 4 | 5 | export class PlaywrightScreenshoter implements Screenshotter { 6 | constructor(private readonly fileSystem: FileSystem) {} 7 | 8 | async takeScreenshot(page: Page) { 9 | const screenshot = await page.screenshot({ 10 | type: "png", 11 | fullPage: false, 12 | }); 13 | 14 | const url = new URL(page.url()); 15 | const hostname = url.hostname.replace(/[:/]/g, "_"); 16 | const segments = url.pathname 17 | .split("/") 18 | .filter((segment) => segment) 19 | .join("_"); 20 | 21 | const key = `${hostname}_${segments}_${crypto.randomUUID()}`; 22 | 23 | const signedUrl = await this.fileSystem.saveScreenshot(key, screenshot); 24 | 25 | return signedUrl; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/init-openator.ts: -------------------------------------------------------------------------------- 1 | import { FeedbackAgent } from './core/agents/feedback-agent/feedback-agent'; 2 | import { Openator } from './core/agents/openator/openator'; 3 | import { 4 | initSummarizer, 5 | initSummarizeTask, 6 | } from './core/agents/summarize-agent/summarize-agent'; 7 | import { EventBus } from './core/services/realtime-reporter'; 8 | import { TaskManagerService } from './core/services/task-manager-service'; 9 | import { LLM, Variable } from './core/types'; 10 | import { ChromiumBrowser } from './infra/services/chromium-browser'; 11 | import { ConsoleReporter } from './infra/services/console-reporter'; 12 | import { DomService } from './infra/services/dom-service'; 13 | import { InMemoryFileSystem } from './infra/services/in-memory-file-system'; 14 | import { PlaywrightScreenshoter } from './infra/services/playwright-screenshotter'; 15 | 16 | export type InitOpenatorConfig = { 17 | /** 18 | * The LLM to use. 19 | * @default OpenAI4o 20 | */ 21 | llm: LLM; 22 | /** 23 | * Whether to run the browser in headless mode. 24 | * @default false 25 | */ 26 | headless: boolean; 27 | /** 28 | * Variables can be used to pass sensitive information to the Openator. 29 | * Every variable will be interpolated during the runtime from `{{variable_name}}` to the actual value. 30 | * Secret variables will be masked in the console output and never sent to the LLM. 31 | * Normal variables will be sent to the LLM and will be visible in the console output. 32 | * 33 | * @default [] 34 | * @example ``` 35 | * [ new Variable({ name: 'password', value: process.env.PASSWORD, isSecret: true }) ] 36 | * ``` 37 | */ 38 | variables?: Variable[]; 39 | }; 40 | 41 | export const initOpenator = (config: InitOpenatorConfig): Openator => { 42 | const fileSystem = new InMemoryFileSystem(); 43 | const screenshotService = new PlaywrightScreenshoter(fileSystem); 44 | 45 | const browser = new ChromiumBrowser({ 46 | headless: config.headless, 47 | }); 48 | 49 | const llm = config.llm; 50 | 51 | const eventBus = new EventBus(); 52 | 53 | const domService = new DomService(screenshotService, browser, eventBus); 54 | const feedbackAgent = new FeedbackAgent(llm); 55 | const taskManager = new TaskManagerService(); 56 | 57 | const summarizer = initSummarizer(llm); 58 | const summarizeTask = initSummarizeTask(); 59 | 60 | return new Openator({ 61 | variables: config.variables ?? [], 62 | taskManager, 63 | domService, 64 | browserService: browser, 65 | llmService: llm, 66 | feedbackAgent, 67 | reporter: new ConsoleReporter('Openator'), 68 | summarizer, 69 | summarizeTask, 70 | }); 71 | }; 72 | -------------------------------------------------------------------------------- /src/models/chat-google.ts: -------------------------------------------------------------------------------- 1 | import { ChatGoogleGenerativeAI as ChatModel } from '@langchain/google-genai'; 2 | import { BaseMessage } from '@langchain/core/messages'; 3 | import { JsonOutputParser } from '@langchain/core/output_parsers'; 4 | import { LLM } from '@/core/types'; 5 | 6 | export type ChatGoogleGenAIConfig = { 7 | /** 8 | * The model to use. 9 | * @default gemini-2.0-flash 10 | */ 11 | model?: 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-1.5-flash'; 12 | /** 13 | * The API key to use. 14 | */ 15 | apiKey: string; 16 | /** 17 | * @default 0 18 | */ 19 | temperature?: number; 20 | /** 21 | * The maximum number of retries. 22 | * This is usefull when you have a low quota such as Tier 1 or 2. 23 | * @default 6 24 | */ 25 | maxRetries?: number; 26 | /** 27 | * The maximum number of concurrent requests. 28 | * Set it to a low value if you have a low quota such as Tier 1 or 2. 29 | * @default 2 30 | */ 31 | maxConcurrency?: number; 32 | }; 33 | 34 | const DEFAULT_CONFIG = { 35 | model: 'gemini-2.0-flash', 36 | temperature: 0, 37 | maxRetries: 6, 38 | maxConcurrency: 2, 39 | } as const; 40 | 41 | export class ChatGoogleGenAI implements LLM { 42 | private model: ChatModel; 43 | 44 | constructor(config: ChatGoogleGenAIConfig) { 45 | this.model = new ChatModel({ 46 | model: config.model ?? DEFAULT_CONFIG.model, 47 | temperature: config.temperature ?? DEFAULT_CONFIG.temperature, 48 | maxRetries: config.maxRetries ?? DEFAULT_CONFIG.maxRetries, 49 | maxConcurrency: config.maxConcurrency ?? DEFAULT_CONFIG.maxConcurrency, 50 | apiKey: config.apiKey, 51 | }); 52 | } 53 | 54 | async invokeAndParse>( 55 | messages: BaseMessage[], 56 | parser: JsonOutputParser, 57 | ): Promise { 58 | const response = await this.model.invoke(messages); 59 | 60 | return parser.invoke(response); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/models/chat-ollama.ts: -------------------------------------------------------------------------------- 1 | import { ChatOllama as ChatModel } from '@langchain/ollama'; 2 | import { BaseMessage } from '@langchain/core/messages'; 3 | import { JsonOutputParser } from '@langchain/core/output_parsers'; 4 | import { LLM } from '@/core/types'; 5 | 6 | export type ChatOllamaConfig = { 7 | /** 8 | * The model to use. 9 | */ 10 | model: 'qwen2.5' | 'llama3.2'; 11 | /** 12 | * The base URL of the Ollama server. 13 | * @default http://localhost:11434 14 | */ 15 | baseUrl?: string; 16 | /** 17 | * The temperature to use. We recommend setting this to 0 for consistency. 18 | * @default 0 19 | */ 20 | temperature?: number; 21 | /** 22 | * The maximum number of retries. 23 | * This is usefull when you have a low quota such as Tier 1 or 2. 24 | * @default 6 25 | */ 26 | maxRetries?: number; 27 | /** 28 | * The maximum number of concurrent requests. 29 | * Set it to a low value if you have a low quota such as Tier 1 or 2. 30 | * @default 2 31 | */ 32 | maxConcurrency?: number; 33 | }; 34 | 35 | const DEFAULT_CONFIG = { 36 | model: 'qwen2.5', 37 | baseUrl: 'http://localhost:11434', 38 | temperature: 0, 39 | maxRetries: 6, 40 | maxConcurrency: 2, 41 | } as const; 42 | 43 | export class ChatOllama implements LLM { 44 | private model: ChatModel; 45 | 46 | constructor(config: ChatOllamaConfig) { 47 | this.model = new ChatModel({ 48 | model: config.model ?? DEFAULT_CONFIG.model, 49 | temperature: config.temperature ?? DEFAULT_CONFIG.temperature, 50 | maxRetries: config.maxRetries ?? DEFAULT_CONFIG.maxRetries, 51 | maxConcurrency: config.maxConcurrency ?? DEFAULT_CONFIG.maxConcurrency, 52 | baseUrl: config.baseUrl ?? DEFAULT_CONFIG.baseUrl, 53 | format: 'json', 54 | }); 55 | } 56 | 57 | async invokeAndParse>( 58 | messages: BaseMessage[], 59 | parser: JsonOutputParser, 60 | ): Promise { 61 | const response = await this.model.invoke(messages); 62 | 63 | return parser.invoke(response); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/models/chat-openai.ts: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI as LChatOpenAI } from '@langchain/openai'; 2 | import { BaseMessage } from '@langchain/core/messages'; 3 | import { JsonOutputParser } from '@langchain/core/output_parsers'; 4 | import { LLM } from '@/core/types'; 5 | 6 | export type ChatOpenAIConfig = { 7 | /** 8 | * The model to use. 9 | * @default gpt-4o 10 | */ 11 | model?: 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4-turbo'; 12 | /** 13 | * The temperature to use. We recommend setting this to 0 for consistency. 14 | * @default 0 15 | */ 16 | temperature?: number; 17 | /** 18 | * The maximum number of retries. 19 | * This is usefull when you have a low quota such as Tier 1 or 2. 20 | * @default 6 21 | */ 22 | maxRetries?: number; 23 | /** 24 | * The maximum number of concurrent requests. 25 | * Set it to a low value if you have a low quota such as Tier 1 or 2. 26 | * @default 2 27 | */ 28 | maxConcurrency?: number; 29 | /** 30 | * The OpenAI API key to use 31 | */ 32 | apiKey: string; 33 | }; 34 | 35 | const DEFAULT_CONFIG = { 36 | model: 'gpt-4o', 37 | temperature: 0, 38 | maxRetries: 6, 39 | maxConcurrency: 2, 40 | } as const; 41 | 42 | export class ChatOpenAI implements LLM { 43 | private model: LChatOpenAI; 44 | 45 | constructor(config: ChatOpenAIConfig) { 46 | this.model = new LChatOpenAI({ 47 | model: config.model ?? DEFAULT_CONFIG.model, 48 | temperature: config.temperature ?? DEFAULT_CONFIG.temperature, 49 | openAIApiKey: config.apiKey, 50 | maxRetries: config.maxRetries ?? DEFAULT_CONFIG.maxRetries, 51 | maxConcurrency: config.maxConcurrency ?? DEFAULT_CONFIG.maxConcurrency, 52 | }); 53 | } 54 | 55 | async invokeAndParse>( 56 | messages: BaseMessage[], 57 | parser: JsonOutputParser, 58 | ): Promise { 59 | const response = await this.model.invoke(messages, { 60 | response_format: { type: 'json_object' }, 61 | }); 62 | 63 | return parser.invoke(response); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /tsconfig.build.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "exclude": ["node_modules", "test", "dist", "**/*spec.ts"] 4 | } 5 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "ESNext", 4 | "moduleResolution": "Node", 5 | "declaration": true, 6 | "removeComments": true, 7 | "noImplicitAny": true, 8 | "strictNullChecks": true, 9 | "strictFunctionTypes": true, 10 | "strictBindCallApply": true, 11 | "strictPropertyInitialization": false, 12 | "noImplicitThis": true, 13 | "emitDecoratorMetadata": true, 14 | "experimentalDecorators": true, 15 | "allowSyntheticDefaultImports": true, 16 | "emitDeclarationOnly": true, 17 | "target": "ES2021", 18 | "sourceMap": true, 19 | "outDir": "./dist", 20 | "baseUrl": "./", 21 | "paths": { 22 | "@/*": ["src/*"] 23 | }, 24 | "incremental": true, 25 | "skipLibCheck": true, 26 | "forceConsistentCasingInFileNames": false, 27 | "noFallthroughCasesInSwitch": false 28 | } 29 | } 30 | --------------------------------------------------------------------------------