├── .env.example
├── .eslintrc.js
├── .github
    └── workflows
    │   └── release.yml
├── .gitignore
├── .prettierrc
├── .readme
    ├── cover.png
    ├── gui-demo.gif
    └── how-it-works.png
├── .vscode
    └── settings.json
├── LICENCE
├── Makefile
├── NOTICE
├── README.md
├── examples
    ├── .gitignore
    ├── basic
    │   ├── .env.example
    │   ├── index.ts
    │   ├── package-lock.json
    │   ├── package.json
    │   └── tsconfig.json
    ├── google
    │   ├── index.ts
    │   ├── package-lock.json
    │   ├── package.json
    │   └── tsconfig.json
    ├── ollama
    │   ├── .env.example
    │   ├── index.ts
    │   ├── package-lock.json
    │   └── package.json
    └── web-voyager-questions.json
├── package-lock.json
├── package.json
├── release.config.cjs
├── rollup.config.js
├── src
    ├── core
    │   ├── agents
    │   │   ├── agent-base.ts
    │   │   ├── feedback-agent
    │   │   │   ├── feedback-agent.prompt.ts
    │   │   │   ├── feedback-agent.ts
    │   │   │   └── feedback-agent.types.ts
    │   │   ├── openator
    │   │   │   ├── openator.config.ts
    │   │   │   ├── openator.prompt.ts
    │   │   │   ├── openator.ts
    │   │   │   └── openator.types.ts
    │   │   └── summarize-agent
    │   │   │   └── summarize-agent.ts
    │   ├── entities
    │   │   ├── openator-result.ts
    │   │   ├── run.ts
    │   │   ├── task.ts
    │   │   ├── variable-string.ts
    │   │   └── variable.ts
    │   ├── interfaces
    │   │   ├── agent-reporter.interface.ts
    │   │   ├── browser-websocket-server.interface.ts
    │   │   ├── browser.interface.ts
    │   │   ├── event-bus.interface.ts
    │   │   ├── file-system.interface.ts
    │   │   ├── llm.interface.ts
    │   │   ├── reporter.interface.ts
    │   │   └── screenshotter.interface.ts
    │   ├── services
    │   │   ├── realtime-reporter.ts
    │   │   └── task-manager-service.ts
    │   ├── shared
    │   │   └── utils.ts
    │   └── types.ts
    ├── index.ts
    ├── infra
    │   └── services
    │   │   ├── chromium-browser.ts
    │   │   ├── console-reporter.ts
    │   │   ├── dom-service.ts
    │   │   ├── in-memory-file-system.ts
    │   │   ├── local-file-system.ts
    │   │   └── playwright-screenshotter.ts
    ├── init-openator.ts
    └── models
    │   ├── chat-google.ts
    │   ├── chat-ollama.ts
    │   └── chat-openai.ts
├── tsconfig.build.json
└── tsconfig.json


/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=<your-openai-api-key>
2 | HYPERBROWSER_API_KEY=<your-hyperbrowser-api-key>
3 | 


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   parser: '@typescript-eslint/parser',
 3 |   parserOptions: {
 4 |     project: 'tsconfig.json',
 5 |     tsconfigRootDir: __dirname,
 6 |     sourceType: 'module',
 7 |   },
 8 |   plugins: ['@typescript-eslint/eslint-plugin'],
 9 |   extends: [
10 |     'plugin:@typescript-eslint/recommended',
11 |     'plugin:prettier/recommended',
12 |   ],
13 |   root: true,
14 |   env: {
15 |     node: true,
16 |     jest: true,
17 |   },
18 |   ignorePatterns: ['.eslintrc.js'],
19 |   rules: {
20 |     '@typescript-eslint/interface-name-prefix': 'off',
21 |     '@typescript-eslint/explicit-function-return-type': 'off',
22 |     '@typescript-eslint/explicit-module-boundary-types': 'off',
23 |     '@typescript-eslint/no-explicit-any': 'off',
24 |   },
25 | };
26 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - next
 8 | 
 9 | permissions:
10 |   contents: write
11 |   issues: write
12 |   pull-requests: write
13 | 
14 | jobs:
15 |   release:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |       - uses: actions/setup-node@v4
20 |       - run: |
21 |           npm i
22 |           npm run build
23 |       - name: Semantic Release
24 |         env:
25 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26 |           NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
27 |         run: |
28 |           npx semantic-release
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # compiled output
 2 | /dist
 3 | /node_modules
 4 | /build
 5 | 
 6 | # Logs
 7 | logs
 8 | *.log
 9 | npm-debug.log*
10 | pnpm-debug.log*
11 | yarn-debug.log*
12 | yarn-error.log*
13 | lerna-debug.log*
14 | 
15 | .pakcage-lock.json
16 | 
17 | # OS
18 | .DS_Store
19 | 
20 | # Tests
21 | /coverage
22 | /.nyc_output
23 | 
24 | # IDEs and editors
25 | /.idea
26 | .project
27 | .classpath
28 | .c9/
29 | *.launch
30 | .settings/
31 | *.sublime-workspace
32 | 
33 | # IDE - VSCode
34 | .vscode/*
35 | !.vscode/settings.json
36 | !.vscode/tasks.json
37 | !.vscode/launch.json
38 | !.vscode/extensions.json
39 | 
40 | # dotenv environment variable files
41 | .env
42 | .env.development.local
43 | .env.test.local
44 | .env.production.local
45 | .env.local
46 | 
47 | # temp directory
48 | .temp
49 | .tmp
50 | 
51 | # Runtime data
52 | pids
53 | *.pid
54 | *.seed
55 | *.pid.lock
56 | 
57 | # Diagnostic reports (https://nodejs.org/api/report.html)
58 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
59 | 
60 | .rollup.cache
61 | 
62 | *.tgz


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "singleQuote": true,
3 |   "trailingComma": "all"
4 | }


--------------------------------------------------------------------------------
/.readme/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agentlabs-dev/openator/19f9b157e628f4b8f380ab75ed04ef6182c9d805/.readme/cover.png


--------------------------------------------------------------------------------
/.readme/gui-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agentlabs-dev/openator/19f9b157e628f4b8f380ab75ed04ef6182c9d805/.readme/gui-demo.gif


--------------------------------------------------------------------------------
/.readme/how-it-works.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agentlabs-dev/openator/19f9b157e628f4b8f380ab75ed04ef6182c9d805/.readme/how-it-works.png


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "editor.formatOnSave": true,
3 |   "editor.defaultFormatter": "esbenp.prettier-vscode",
4 |   "[handlebars]": {
5 |     "editor.formatOnSave": false
6 |   },
7 |   "cSpell.words": ["hookform", "nextui", "Signup"]
8 | }
9 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2025 AgentLabs, Inc.
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build up down restart logs
 2 | 
 3 | build:
 4 | 	docker-compose build
 5 | 
 6 | watch:
 7 | 	docker-compose up
 8 | 
 9 | up:
10 | 	docker-compose up
11 | 
12 | upd:
13 | 	docker-compose up -d
14 | 
15 | dev-up:
16 | 	docker-compose -f docker-compose.dev.yml up --build --force-recreate
17 | 
18 | dev-upd:
19 | 	docker-compose -f docker-compose.dev.yml up --build --force-recreate -d
20 | 
21 | down:
22 | 	docker-compose down
23 | 
24 | re: build watch
25 | 
26 | logs:
27 | 	docker-compose logs -f
28 | 
29 | logs-playwright:
30 | 	docker-compose logs -f playwright
31 | 
32 | logs-backend:
33 | 	docker-compose logs -f backend
34 | 
35 | logs-frontend:
36 | 	docker-compose logs -f frontend
37 | 
38 | prune-all:
39 | 	docker system prune -a --volumes


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2025 AgentLabs, Inc
 2 | 
 3 | Source code in this repository is licensed under the Apache License
 4 | Version 2.0. Please see LICENSE for more information.
 5 | 
 6 | Every file is under copyright (c) 2023 AgentLabs, Inc unless otherwise
 7 | specified.
 8 | 
 9 | * For a copy of the Apache License Version 2.0, please see LICENSE
10 |   as included in this repository's top-level directory.
11 | 
12 | * All third party components incorporated into this software are licensed
13 |   under the original license provided by the owner of the applicable component.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <a href="https://github.com/agentlabs-dev/openator"><img src="https://raw.githubusercontent.com/agentlabs-dev/openator/refs/heads/main/.readme/cover.png" alt="Openator"></a>
  3 | </p>
  4 | 
  5 | <p align="center">
  6 |     <em>.</em>
  7 | </p>
  8 | 
  9 | <p align=center>
 10 | Openator is a state-of-the-art browser agent tool that is capable of planning and executing actions formulated in natural language.
 11 | </p>
 12 | 
 13 | <p align="center">
 14 | This project is under active development and any help or support is welcome.
 15 | </p>
 16 | 
 17 | <p align="center">
 18 | <a href="" target="_blank">
 19 |     <img src="https://img.shields.io/badge/License-Apache 2.0-blue.svg" alt="License version">
 20 | </a>
 21 | <a href="" target="_blank">
 22 |     <img src="https://img.shields.io/badge/Status-Under Active Development-green.svg" alt="Docker Image CI">
 23 | </a>
 24 | </p>
 25 | 
 26 | <p align="center">
 27 | .
 28 | </p>
 29 | 
 30 | <h3 align="center">
 31 | 🌟 Give us some love by starring this repository! 🌟  
 32 | </h3>
 33 | 
 34 | <p align="center">
 35 | .
 36 | </p>
 37 | 
 38 | ## Quick Start
 39 | 
 40 | Install the package using npm or yarn.
 41 | 
 42 | ```bash
 43 | npm i openator
 44 | ```
 45 | 
 46 | Spin up your first agent with a task.
 47 | 
 48 | ```typescript
 49 | import { initOpenator, ChatOpenAI } from 'openator';
 50 | 
 51 | const main = async () => {
 52 |   const llm = new ChatOpenAI({
 53 |     apiKey: process.env.OPENAI_API_KEY!,
 54 |   });
 55 | 
 56 |   const openator = initOpenator({
 57 |     llm,
 58 |     headless: false,
 59 |   });
 60 | 
 61 |   await openator.start(
 62 |     'https://amazon.com',
 63 |     'Find a black wirelesskeyboard and return the price.',
 64 |   );
 65 | };
 66 | 
 67 | main();
 68 | ```
 69 | 
 70 | ## Add Secrets and Variables
 71 | 
 72 | Optionally, you can add variables and secrets to your agent. These variables will be interpolated during runtime by the agent.
 73 | 
 74 | This is especially helpful if you want to pass more context to the agent, such as a username and a password.
 75 | 
 76 | ```typescript
 77 | import { initOpenator, Variable, ChatOpenAI } from 'openator';
 78 | 
 79 | const llm = new ChatOpenAI({
 80 |   apiKey: process.env.OPENAI_API_KEY!,
 81 | });
 82 | 
 83 | const openator = initOpenator({
 84 |   headless: false,
 85 |   llm,
 86 |   variables: [
 87 |     new Variable({
 88 |       name: 'username',
 89 |       value: 'my username',
 90 |       isSecret: false,
 91 |     }),
 92 |     new Variable({
 93 |       name: 'password',
 94 |       value: process.env.PASSWORD,
 95 |       isSecret: true,
 96 |     }),
 97 |   ],
 98 | });
 99 | 
100 | await openator.start(
101 |   'https://my-website.com',
102 |   'Authenticate with the username {{username}} and password {{password}} and then find the latest news on the website.',
103 | );
104 | ```
105 | 
106 | ## Available LLM Providers
107 | 
108 | Optionally you can configure the LLM to use different models or configurations.
109 | 
110 | We support the following models:
111 | 
112 | | Platform           | Supported models                                          | Advised model |
113 | | ------------------ | --------------------------------------------------------- | ------------- |
114 | | OpenAI             | gpt-4o, gpt-4o-mini, gpt-4-turbo                          | gpt-4o        |
115 | | Ollama             | qwen2.5, llama3.2                                         | -             |
116 | | GoogleGenerativeAI | gemini-2.0-flash, gemini-2.0-flash-lite, gemini-1.5-flash | -             |
117 | 
118 | > Note that we benchmarked the performance of Openator on OpenAI gpt-4o and we recommend using it. While you can try other models, we haven't battled-tested them yet.
119 | 
120 | ### OpenAIChat
121 | 
122 | Here's the configuration type for the ChatOpenAI provider.
123 | 
124 | ```typescript
125 | import { ChatOpenAI } from 'openator';
126 | 
127 | const llm = new ChatOpenAI({
128 |   apiKey: process.env.OPENAI_API_KEY!,
129 |   model: 'gpt-4o',
130 |   temperature: 0, // optional
131 |   maxRetries: 3, // optional
132 |   maxConcurrency: 1, // optional
133 | });
134 | ```
135 | 
136 | ### OllamaChat
137 | 
138 | ```typescript
139 | import { ChatOllama } from 'openator';
140 | 
141 | const llm = new ChatOllama({
142 |   model: 'qwen2.5',
143 |   temperature: 0, // optional
144 |   maxRetries: 3, // optional
145 |   maxConcurrency: 1, // optional
146 |   baseUrl: 'http://localhost:11434', // optional
147 | });
148 | ```
149 | 
150 | ### GoogleGenerativeAI
151 | 
152 | ```typescript
153 | import { ChatGoogleGenAI } from 'openator';
154 | 
155 | const llm = new ChatGoogleGenAI({
156 |   model: 'gemini-2.0-flash',
157 |   apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY!,
158 |   temperature: 0, // optional
159 |   maxRetries: 3, // optional
160 |   maxConcurrency: 1, // optional
161 | });
162 | ```
163 | 
164 | ## Demo
165 | 
166 | Here is what you can build with Openator, you can find more examples and source code in our main repository. The frontend is not included but can be found in our open-source repository.
167 | 
168 | Example task:
169 | 
170 | ```typescript
171 | await openator.start(
172 |   'https://amazon.com',
173 |   'Purchase a black wireless keyboard',
174 | );
175 | ```
176 | 
177 | <a href="#"><img src="https://github.com/agentlabs-dev/openator/blob/main/.readme/gui-demo.gif?raw=true" alt="agentlabs.dev"></a>
178 | 
179 | ## How it works
180 | 
181 | <p align="center">
182 |   <a href="#"><img src="https://raw.githubusercontent.com/agentlabs-dev/openator/refs/heads/main/.readme/how-it-works.png" alt="agentlabs.dev"></a>
183 | </p>
184 | 
185 | <p align="center">
186 | .
187 | </p>
188 | 
189 | <h3 align="center">
190 | 🌟 Give us some love by starring this repository! 🌟  
191 | </h3>
192 | 
193 | <p align="center">
194 | .
195 | </p>
196 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules


--------------------------------------------------------------------------------
/examples/basic/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=your-key


--------------------------------------------------------------------------------
/examples/basic/index.ts:
--------------------------------------------------------------------------------
 1 | import { ChatOpenAI, initOpenator, Variable } from 'openator';
 2 | 
 3 | import 'dotenv/config';
 4 | 
 5 | const main = async () => {
 6 |   try {
 7 |     if (!process.env.OPENAI_API_KEY) {
 8 |       throw new Error('OPENAI_API_KEY is not set');
 9 |     }
10 | 
11 |     const llm = new ChatOpenAI({
12 |       apiKey: process.env.OPENAI_API_KEY,
13 |       model: 'gpt-4o',
14 |       temperature: 0,
15 |       maxRetries: 10,
16 |       maxConcurrency: 1,
17 |     });
18 | 
19 |     const openator = initOpenator({
20 |       llm,
21 |       headless: false,
22 |       variables: [
23 |         new Variable({
24 |           name: 'password',
25 |           value: process.env.PASSWORD!,
26 |           isSecret: true,
27 |         }),
28 |       ],
29 |     });
30 | 
31 |     const result = await openator.start(
32 |       'https://amazon.com',
33 |       'Find a black wirelesskeyboard and return the price.',
34 |     );
35 |   } catch (error) {
36 |     console.error(error);
37 |   }
38 | };
39 | 
40 | main();
41 | 


--------------------------------------------------------------------------------
/examples/basic/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "basic",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "license": "ISC",
 6 |   "author": "",
 7 |   "type": "commonjs",
 8 |   "main": "index.js",
 9 |   "scripts": {
10 |     "test": "echo \"Error: no test specified\" && exit 1",
11 |     "start": "ts-node index.ts"
12 |   },
13 |   "dependencies": {
14 |     "dotenv": "^16.4.7",
15 |     "openator": "^1.2.0",
16 |     "typescript": "^5.7.3"
17 |   },
18 |   "devDependencies": {
19 |     "prettier": "3.5.2"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/examples/basic/tsconfig.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "compilerOptions": {
  3 |     /* Visit https://aka.ms/tsconfig to read more about this file */
  4 | 
  5 |     /* Projects */
  6 |     // "incremental": true,                              /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
  7 |     // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
  8 |     // "tsBuildInfoFile": "./.tsbuildinfo",              /* Specify the path to .tsbuildinfo incremental compilation file. */
  9 |     // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects. */
 10 |     // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
 11 |     // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
 12 | 
 13 |     /* Language and Environment */
 14 |     "target": "es2016" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
 15 |     // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
 16 |     // "jsx": "preserve",                                /* Specify what JSX code is generated. */
 17 |     // "experimentalDecorators": true,                   /* Enable experimental support for legacy experimental decorators. */
 18 |     // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
 19 |     // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
 20 |     // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
 21 |     // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
 22 |     // "reactNamespace": "",                             /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
 23 |     // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
 24 |     // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
 25 |     // "moduleDetection": "auto",                        /* Control what method is used to detect module-format JS files. */
 26 | 
 27 |     /* Modules */
 28 |     "module": "commonjs" /* Specify what module code is generated. */,
 29 |     // "rootDir": "./",                                  /* Specify the root folder within your source files. */
 30 |     // "moduleResolution": "node10",                     /* Specify how TypeScript looks up a file from a given module specifier. */
 31 |     // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
 32 |     // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
 33 |     // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
 34 |     // "typeRoots": [],                                  /* Specify multiple folders that act like './node_modules/@types'. */
 35 |     // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
 36 |     // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
 37 |     // "moduleSuffixes": [],                             /* List of file name suffixes to search when resolving a module. */
 38 |     // "allowImportingTsExtensions": true,               /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
 39 |     // "rewriteRelativeImportExtensions": true,          /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */
 40 |     // "resolvePackageJsonExports": true,                /* Use the package.json 'exports' field when resolving package imports. */
 41 |     // "resolvePackageJsonImports": true,                /* Use the package.json 'imports' field when resolving imports. */
 42 |     // "customConditions": [],                           /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
 43 |     // "noUncheckedSideEffectImports": true,             /* Check side effect imports. */
 44 |     // "resolveJsonModule": true,                        /* Enable importing .json files. */
 45 |     // "allowArbitraryExtensions": true,                 /* Enable importing files with any extension, provided a declaration file is present. */
 46 |     // "noResolve": true,                                /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
 47 | 
 48 |     /* JavaScript Support */
 49 |     // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
 50 |     // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
 51 |     // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
 52 | 
 53 |     /* Emit */
 54 |     // "declaration": true,                              /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
 55 |     // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
 56 |     // "emitDeclarationOnly": true,                      /* Only output d.ts files and not JavaScript files. */
 57 |     // "sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
 58 |     // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
 59 |     // "noEmit": true,                                   /* Disable emitting files from a compilation. */
 60 |     // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
 61 |     // "outDir": "./",                                   /* Specify an output folder for all emitted files. */
 62 |     // "removeComments": true,                           /* Disable emitting comments. */
 63 |     // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
 64 |     // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
 65 |     // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
 66 |     // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
 67 |     // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
 68 |     // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
 69 |     // "newLine": "crlf",                                /* Set the newline character for emitting files. */
 70 |     // "stripInternal": true,                            /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
 71 |     // "noEmitHelpers": true,                            /* Disable generating custom helper functions like '__extends' in compiled output. */
 72 |     // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
 73 |     // "preserveConstEnums": true,                       /* Disable erasing 'const enum' declarations in generated code. */
 74 |     // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
 75 | 
 76 |     /* Interop Constraints */
 77 |     // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
 78 |     // "verbatimModuleSyntax": true,                     /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
 79 |     // "isolatedDeclarations": true,                     /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
 80 |     // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
 81 |     "esModuleInterop": true /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */,
 82 |     // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
 83 |     "forceConsistentCasingInFileNames": true /* Ensure that casing is correct in imports. */,
 84 | 
 85 |     /* Type Checking */
 86 |     "strict": true /* Enable all strict type-checking options. */,
 87 |     // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied 'any' type. */
 88 |     // "strictNullChecks": true,                         /* When type checking, take into account 'null' and 'undefined'. */
 89 |     // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
 90 |     // "strictBindCallApply": true,                      /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
 91 |     // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
 92 |     // "strictBuiltinIteratorReturn": true,              /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */
 93 |     // "noImplicitThis": true,                           /* Enable error reporting when 'this' is given the type 'any'. */
 94 |     // "useUnknownInCatchVariables": true,               /* Default catch clause variables as 'unknown' instead of 'any'. */
 95 |     // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
 96 |     // "noUnusedLocals": true,                           /* Enable error reporting when local variables aren't read. */
 97 |     // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read. */
 98 |     // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
 99 |     // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
100 |     // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
101 |     // "noUncheckedIndexedAccess": true,                 /* Add 'undefined' to a type when accessed using an index. */
102 |     // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
103 |     // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type. */
104 |     // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
105 |     // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */
106 | 
107 |     /* Completeness */
108 |     // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
109 |     "skipLibCheck": true /* Skip type checking all .d.ts files. */
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/examples/google/index.ts:
--------------------------------------------------------------------------------
 1 | import { ChatGoogleGenAI, initOpenator } from 'openator';
 2 | 
 3 | import 'dotenv/config';
 4 | 
 5 | const main = async () => {
 6 |   const llm = new ChatGoogleGenAI({
 7 |     apiKey: process.env.GEMINI_API_KEY!,
 8 |     model: 'gemini-1.5-flash',
 9 |   });
10 | 
11 |   const openator = initOpenator({
12 |     llm,
13 |     headless: false,
14 |   });
15 | 
16 |   const result = await openator.start(
17 |     'https://amazon.com',
18 |     'Find a black wirelesskeyboard and return the price.',
19 |   );
20 | };
21 | 
22 | main();
23 | 


--------------------------------------------------------------------------------
/examples/google/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "google",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "license": "ISC",
 6 |   "author": "",
 7 |   "type": "commonjs",
 8 |   "main": "index.js",
 9 |   "scripts": {
10 |     "test": "echo \"Error: no test specified\" && exit 1"
11 |   },
12 |   "dependencies": {
13 |     "dotenv": "^16.4.7",
14 |     "openator": "^1.3.0"
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/examples/google/tsconfig.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "compilerOptions": {
  3 |     /* Visit https://aka.ms/tsconfig to read more about this file */
  4 | 
  5 |     /* Projects */
  6 |     // "incremental": true,                              /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
  7 |     // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
  8 |     // "tsBuildInfoFile": "./.tsbuildinfo",              /* Specify the path to .tsbuildinfo incremental compilation file. */
  9 |     // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects. */
 10 |     // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
 11 |     // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
 12 | 
 13 |     /* Language and Environment */
 14 |     "target": "es2016",                                  /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
 15 |     // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
 16 |     // "jsx": "preserve",                                /* Specify what JSX code is generated. */
 17 |     // "experimentalDecorators": true,                   /* Enable experimental support for legacy experimental decorators. */
 18 |     // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
 19 |     // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
 20 |     // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
 21 |     // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
 22 |     // "reactNamespace": "",                             /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
 23 |     // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
 24 |     // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
 25 |     // "moduleDetection": "auto",                        /* Control what method is used to detect module-format JS files. */
 26 | 
 27 |     /* Modules */
 28 |     "module": "commonjs",                                /* Specify what module code is generated. */
 29 |     // "rootDir": "./",                                  /* Specify the root folder within your source files. */
 30 |     // "moduleResolution": "node10",                     /* Specify how TypeScript looks up a file from a given module specifier. */
 31 |     // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
 32 |     // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
 33 |     // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
 34 |     // "typeRoots": [],                                  /* Specify multiple folders that act like './node_modules/@types'. */
 35 |     // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
 36 |     // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
 37 |     // "moduleSuffixes": [],                             /* List of file name suffixes to search when resolving a module. */
 38 |     // "allowImportingTsExtensions": true,               /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
 39 |     // "rewriteRelativeImportExtensions": true,          /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */
 40 |     // "resolvePackageJsonExports": true,                /* Use the package.json 'exports' field when resolving package imports. */
 41 |     // "resolvePackageJsonImports": true,                /* Use the package.json 'imports' field when resolving imports. */
 42 |     // "customConditions": [],                           /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
 43 |     // "noUncheckedSideEffectImports": true,             /* Check side effect imports. */
 44 |     // "resolveJsonModule": true,                        /* Enable importing .json files. */
 45 |     // "allowArbitraryExtensions": true,                 /* Enable importing files with any extension, provided a declaration file is present. */
 46 |     // "noResolve": true,                                /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
 47 | 
 48 |     /* JavaScript Support */
 49 |     // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
 50 |     // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
 51 |     // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
 52 | 
 53 |     /* Emit */
 54 |     // "declaration": true,                              /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
 55 |     // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
 56 |     // "emitDeclarationOnly": true,                      /* Only output d.ts files and not JavaScript files. */
 57 |     // "sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
 58 |     // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
 59 |     // "noEmit": true,                                   /* Disable emitting files from a compilation. */
 60 |     // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
 61 |     // "outDir": "./",                                   /* Specify an output folder for all emitted files. */
 62 |     // "removeComments": true,                           /* Disable emitting comments. */
 63 |     // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
 64 |     // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
 65 |     // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
 66 |     // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
 67 |     // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
 68 |     // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
 69 |     // "newLine": "crlf",                                /* Set the newline character for emitting files. */
 70 |     // "stripInternal": true,                            /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
 71 |     // "noEmitHelpers": true,                            /* Disable generating custom helper functions like '__extends' in compiled output. */
 72 |     // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
 73 |     // "preserveConstEnums": true,                       /* Disable erasing 'const enum' declarations in generated code. */
 74 |     // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
 75 | 
 76 |     /* Interop Constraints */
 77 |     // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
 78 |     // "verbatimModuleSyntax": true,                     /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
 79 |     // "isolatedDeclarations": true,                     /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
 80 |     // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
 81 |     "esModuleInterop": true,                             /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
 82 |     // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
 83 |     "forceConsistentCasingInFileNames": true,            /* Ensure that casing is correct in imports. */
 84 | 
 85 |     /* Type Checking */
 86 |     "strict": true,                                      /* Enable all strict type-checking options. */
 87 |     // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied 'any' type. */
 88 |     // "strictNullChecks": true,                         /* When type checking, take into account 'null' and 'undefined'. */
 89 |     // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
 90 |     // "strictBindCallApply": true,                      /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
 91 |     // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
 92 |     // "strictBuiltinIteratorReturn": true,              /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */
 93 |     // "noImplicitThis": true,                           /* Enable error reporting when 'this' is given the type 'any'. */
 94 |     // "useUnknownInCatchVariables": true,               /* Default catch clause variables as 'unknown' instead of 'any'. */
 95 |     // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
 96 |     // "noUnusedLocals": true,                           /* Enable error reporting when local variables aren't read. */
 97 |     // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read. */
 98 |     // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
 99 |     // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
100 |     // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
101 |     // "noUncheckedIndexedAccess": true,                 /* Add 'undefined' to a type when accessed using an index. */
102 |     // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
103 |     // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type. */
104 |     // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
105 |     // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */
106 | 
107 |     /* Completeness */
108 |     // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
109 |     "skipLibCheck": true                                 /* Skip type checking all .d.ts files. */
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/examples/ollama/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=your-key


--------------------------------------------------------------------------------
/examples/ollama/index.ts:
--------------------------------------------------------------------------------
 1 | import { ChatOllama, initOpenator } from 'openator';
 2 | 
 3 | const main = async () => {
 4 |   try {
 5 |     const llm = new ChatOllama({
 6 |       model: 'qwen2.5',
 7 |       temperature: 0,
 8 |       maxRetries: 10,
 9 |       maxConcurrency: 1,
10 |       baseUrl: 'http://127.0.0.1:11434',
11 |     });
12 | 
13 |     const openator = initOpenator({
14 |       llm,
15 |       headless: false,
16 |     });
17 | 
18 |     const result = await openator.start(
19 |       'https://amazon.com',
20 |       'Find a black wirelesskeyboard and return the price.',
21 |     );
22 |   } catch (error) {
23 |     console.error(error);
24 |   }
25 | };
26 | 
27 | main();
28 | 


--------------------------------------------------------------------------------
/examples/ollama/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": {
3 |     "openator": "file:../../openator-v1.0.3.tgz"
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "openator",
  3 |   "version": "v1.2.0",
  4 |   "description": "SOTA AI-browsing agent that is capable of planning and executing actions formulated in natural language.",
  5 |   "author": "Kevin Piacentini <kevin@agentlabs.dev>",
  6 |   "private": false,
  7 |   "license": "Apache-2.0",
  8 |   "type": "module",
  9 |   "main": "dist/index.js",
 10 |   "types": "dist/index.d.ts",
 11 |   "homepage": "https://github.com/agentlabs-dev/openator",
 12 |   "keywords": [
 13 |     "openator",
 14 |     "ai agent",
 15 |     "ai",
 16 |     "browser use",
 17 |     "agentlabs",
 18 |     "playwright"
 19 |   ],
 20 |   "repository": {
 21 |     "type": "git",
 22 |     "url": "https://github.com/agentlabs-dev/openator.git"
 23 |   },
 24 |   "publishConfig": {
 25 |     "access": "public"
 26 |   },
 27 |   "resolutions": {
 28 |     "rollup": "npm:@rollup/wasm-node@*"
 29 |   },
 30 |   "files": [
 31 |     "dist",
 32 |     "README.md"
 33 |   ],
 34 |   "scripts": {
 35 |     "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
 36 |     "build": "npx rollup -c",
 37 |     "buildpack": "npm run build && npm pack",
 38 |     "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix"
 39 |   },
 40 |   "optionalDependencies": {
 41 |     "@rollup/rollup-linux-x64-gnu": "*"
 42 |   },
 43 |   "dependencies": {
 44 |     "@hyperbrowser/sdk": "^0.30.0",
 45 |     "@langchain/google-genai": "^0.1.9",
 46 |     "@langchain/ollama": "^0.2.0",
 47 |     "@langchain/openai": "^0.4.4",
 48 |     "@mozilla/readability": "^0.5.0",
 49 |     "class-validator": "^0.14.1",
 50 |     "dom-to-semantic-markdown": "^1.3.0",
 51 |     "dotenv": "^16.4.7",
 52 |     "fs-extra": "^11.3.0",
 53 |     "jsdom": "^26.0.0",
 54 |     "jsondom": "^0.4.1",
 55 |     "langchain": "^0.3.19",
 56 |     "openator": "^1.1.0",
 57 |     "playwright": "^1.50.1",
 58 |     "reflect-metadata": "^0.2.2",
 59 |     "rxjs": "^7.8.1",
 60 |     "socket.io": "^4.8.1",
 61 |     "zod": "^3.24.1",
 62 |     "zod-to-json-schema": "^3.24.1"
 63 |   },
 64 |   "devDependencies": {
 65 |     "@rollup/plugin-json": "^6.1.0",
 66 |     "@rollup/plugin-typescript": "^12.1.2",
 67 |     "@semantic-release/github": "^11.0.1",
 68 |     "@semantic-release/npm": "^12.0.1",
 69 |     "@types/express": "^5.0.0",
 70 |     "@types/jest": "^29.5.14",
 71 |     "@types/jsdom": "^21.1.7",
 72 |     "@types/node": "^22.13.1",
 73 |     "@types/supertest": "^6.0.2",
 74 |     "@typescript-eslint/eslint-plugin": "^8.24.0",
 75 |     "@typescript-eslint/parser": "^8.24.0",
 76 |     "eslint": "^9.20.0",
 77 |     "eslint-config-prettier": "^10.0.1",
 78 |     "eslint-plugin-prettier": "^5.2.3",
 79 |     "jest": "^29.7.0",
 80 |     "prettier": "^3.5.0",
 81 |     "rollup": "^4.34.8",
 82 |     "rollup-plugin-dts": "^6.1.1",
 83 |     "semantic-release": "^24.2.3",
 84 |     "semantic-release-gitmoji": "^1.6.8",
 85 |     "source-map-support": "^0.5.21",
 86 |     "supertest": "^7.0.0",
 87 |     "ts-jest": "^29.2.5",
 88 |     "ts-loader": "^9.5.2",
 89 |     "ts-node": "^10.9.2",
 90 |     "tsconfig-paths": "^4.2.0",
 91 |     "typescript": "^5.7.3"
 92 |   },
 93 |   "jest": {
 94 |     "moduleFileExtensions": [
 95 |       "js",
 96 |       "json",
 97 |       "ts"
 98 |     ],
 99 |     "rootDir": "src",
100 |     "testRegex": ".*\\.spec\\.ts$",
101 |     "transform": {
102 |       "^.+\\.(t|j)s$": "ts-jest"
103 |     },
104 |     "collectCoverageFrom": [
105 |       "**/*.(t|j)s"
106 |     ],
107 |     "coverageDirectory": "../coverage",
108 |     "testEnvironment": "node"
109 |   }
110 | }
111 | 


--------------------------------------------------------------------------------
/release.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: ['@semantic-release/github', '@semantic-release/npm'],
3 |   branches: ['main', { name: 'next', prerelease: true }],
4 | };
5 | 


--------------------------------------------------------------------------------
/rollup.config.js:
--------------------------------------------------------------------------------
 1 | // Contents of the file /rollup.config.js
 2 | import typescript from '@rollup/plugin-typescript';
 3 | import json from '@rollup/plugin-json';
 4 | 
 5 | const config = [
 6 |   {
 7 |     input: 'src/index.ts',
 8 |     output: {
 9 |       file: 'dist/index.js',
10 |       format: 'es',
11 |       sourcemap: true,
12 |     },
13 |     external: [
14 |       'events',
15 |       'ora-classic',
16 |       'crypto',
17 |       'jsdom',
18 |       'playwright',
19 |       'dotenv/config',
20 |       'dom-to-semantic-markdown',
21 |       '@langchain/core/output_parsers',
22 |       '@langchain/openai',
23 |       '@langchain/core/messages',
24 |       'zod',
25 |       'zod-to-json-schema',
26 |     ],
27 |     plugins: [
28 |       typescript({
29 |         exclude: ['examples/**'],
30 |       }),
31 |       json(),
32 |     ],
33 |   },
34 | ];
35 | export default config;
36 | 


--------------------------------------------------------------------------------
/src/core/agents/agent-base.ts:
--------------------------------------------------------------------------------
  1 | import { LLM } from '../interfaces/llm.interface';
  2 | import { HumanMessage, SystemMessage } from '@langchain/core/messages';
  3 | import { JsonOutputParser } from '@langchain/core/output_parsers';
  4 | import { z } from 'zod';
  5 | 
  6 | /**
  7 |  * This is an experimental implementation of an agent.
  8 |  * It is not used in the current version of the application.
  9 |  */
 10 | 
 11 | export type AgentTool = {
 12 |   name: string;
 13 |   description: string;
 14 |   usage: string;
 15 | };
 16 | 
 17 | export type AgentBaseConfig = {
 18 |   role: string;
 19 |   goal: string;
 20 |   backstory: string;
 21 |   tools: AgentTool[];
 22 |   llm: LLM;
 23 |   strictJsonOutput: boolean;
 24 |   responseSchema: z.ZodSchema;
 25 | };
 26 | 
 27 | export class Agent<ResponseType> {
 28 |   public readonly backstory: string;
 29 |   public readonly goal: string;
 30 |   public readonly tools: AgentTool[];
 31 |   public readonly role: string;
 32 |   public readonly strictJsonOutput: boolean;
 33 |   public readonly responseSchema: z.ZodSchema;
 34 | 
 35 |   constructor(private readonly config: AgentBaseConfig) {
 36 |     this.backstory = config.backstory;
 37 |     this.goal = config.goal;
 38 |     this.tools = config.tools;
 39 |     this.role = config.role;
 40 |     this.strictJsonOutput = config.strictJsonOutput;
 41 |     this.responseSchema = config.responseSchema;
 42 |   }
 43 | 
 44 |   private getSystemPrompt() {
 45 |     return `
 46 |       You are a ${this.role}
 47 | 
 48 |       ${this.backstory}
 49 | 
 50 |       ${this.goal}
 51 | 
 52 |       ${this.tools}
 53 | 
 54 |       ${this.strictJsonOutput ? 'IMPORTANT: your output must always be a valid JSON object.' : ''}
 55 |     `;
 56 |   }
 57 | 
 58 |   private getUserTaskPrompt(task: AgentTask) {
 59 |     return task.getTaskPrompt();
 60 |   }
 61 | 
 62 |   private getSystemMessage() {
 63 |     return new SystemMessage({
 64 |       content: this.getSystemPrompt(),
 65 |     });
 66 |   }
 67 | 
 68 |   private getHumanMessage(task: AgentTask) {
 69 |     return new HumanMessage({
 70 |       content: [
 71 |         {
 72 |           role: 'user',
 73 |           type: 'text',
 74 |           text: this.getUserTaskPrompt(task),
 75 |         },
 76 |       ],
 77 |     });
 78 |   }
 79 | 
 80 |   async perform(task: AgentTask) {
 81 |     const messages = [this.getSystemMessage(), this.getHumanMessage(task)];
 82 | 
 83 |     type ResponseType = z.infer<typeof this.responseSchema>;
 84 | 
 85 |     const parser = new JsonOutputParser<ResponseType>();
 86 | 
 87 |     const response = await this.config.llm.invokeAndParse(messages, parser);
 88 | 
 89 |     return response;
 90 |   }
 91 | }
 92 | 
 93 | export type AgentTaskConfig = {
 94 |   description: string;
 95 |   goal: string;
 96 |   expectedOutput: string;
 97 |   validOutputExamples: string;
 98 |   invalidOutputExamples: string;
 99 | };
100 | 
101 | export class AgentTask {
102 |   private input: string;
103 |   private images: string[] | undefined;
104 |   private memory: string | undefined;
105 | 
106 |   constructor(private readonly config: AgentTaskConfig) {}
107 | 
108 |   prepare(params: { images?: string[]; memory?: string; input: string }) {
109 |     this.input = params.input;
110 |     this.images = params.images;
111 |     this.memory = params.memory;
112 |   }
113 | 
114 |   getTaskPrompt() {
115 |     return `    
116 |       # Task description:
117 |       ${this.config.description}
118 | 
119 |       # Task goal:
120 |       ${this.config.goal}
121 | 
122 |       # Expected output:
123 |       ${this.config.expectedOutput}
124 | 
125 |       # Example valid outputs:
126 |       ${this.config.validOutputExamples};
127 | 
128 |       # Example invalid outputs:
129 |       ${this.config.invalidOutputExamples}
130 | 
131 |       # Images:
132 |       ${this.images} at the end
133 | 
134 |       # Memory:
135 |       ${this.memory}
136 | 
137 |       # User input:
138 |       ${this.input}
139 |     `;
140 |   }
141 | 
142 |   getTaskMessages() {
143 |     const images =
144 |       this.images?.map((image) => {
145 |         return {
146 |           type: 'image_url',
147 |           image_url: {
148 |             url: image,
149 |           },
150 |         };
151 |       }) ?? [];
152 | 
153 |     return [
154 |       new HumanMessage({
155 |         content: [
156 |           {
157 |             type: 'text',
158 |             text: this.getTaskPrompt(),
159 |           },
160 |           ...images,
161 |         ],
162 |       }),
163 |     ];
164 |   }
165 | }
166 | 


--------------------------------------------------------------------------------
/src/core/agents/feedback-agent/feedback-agent.prompt.ts:
--------------------------------------------------------------------------------
  1 | import { Task } from '@/core/entities/task';
  2 | import { HumanMessage, SystemMessage } from '@langchain/core/messages';
  3 | 
  4 | export class EvaluationAgentSystemPrompt {
  5 |   constructor() {}
  6 | 
  7 |   getSystemPrompt() {
  8 |     return `
  9 |      As an evaluator, you will be presented with three primary components to assist you in your role:
 10 | 
 11 |     1. Web Task Instruction: This is a clear and specific directive provided in natural language, detailing the online activity to be carried out. These requirements may include conducting searches, verifying information, comparing prices, checking availability, or any other action relevant to the specified web service (such as Amazon, Apple, ArXiv, BBC News, Booking etc).
 12 | 
 13 |     2. Result Screenshots: This is a visual representation of the screen showing the result or intermediate state of performing a web task. It serves as visual proof of the actions taken in response to the instruction, and may not represent everything the agent sees.
 14 | 
 15 |     3. Result Response: This is a textual response obtained after the execution of the web task. It serves as textual result in response to the instruction.
 16 |     
 17 |     4. Task History Summary: This is a summary of the task history of the agent, it serves as context proof of the actions performed by the agent.
 18 | 
 19 |     5. Previous Task Result: This is the result of the previous task, it serves as context proof of the decision making of the agent.
 20 | 
 21 |     
 22 |     -- You DO NOT NEED to interact with web pages or perform actions such as booking flights or conducting searches on websites.
 23 |     -- You SHOULD NOT make assumptions based on information not presented in the screenshot when comparing it to the instructions. If you cannot find any information in the screenshot that matches the instruction, you can believe the information in the response.
 24 |     -- Your primary responsibility is to conduct a thorough assessment of the web task instruction against the outcome depicted in the screenshot and in the response, evaluating whether the actions taken align with the given instructions.
 25 |     -- NOTE that the instruction may involve more than one task, for example, locating the garage and summarizing the review. Failing to complete either task, such as not providing a summary, should be considered unsuccessful.
 26 |     -- NOTE that the screenshot is authentic, but the response provided by LLM is generated at the end of web browsing, and there may be discrepancies between the text and the screenshots.
 27 |     -- Note the difference: 1) Result response may contradict the screenshot, then the content of the screenshot prevails, 2) The content in the Result response is not mentioned on the screenshot, choose to believe the content.
 28 |     -- If you are not sure whether you should believe the content in the response, you should choose unknown.
 29 | 
 30 |     You should elaborate on how you arrived at your final evaluation and then provide a definitive verdict on whether the task has been successfully accomplished, either as 'success', 'failed', or 'unknown'.
 31 | 
 32 |     If your verdict is 'failed', you must provide a 'hint' and a 'memoryLearning' to the user to improve the result. 
 33 |     This hint could be ideas of actions to perform to find the information you need.
 34 |     The memoryLearning is a string that will explain the agent what it should not do or what it should do differently later if he is in the same situation.
 35 | 
 36 |     IMPORTANT RULE: you must answer in JSON format including the result and explanation fields.
 37 | 
 38 |     Example 1:
 39 |     { "result": "success", "explanation": "From the two latest screenshots, we can see that the agent has successfully found the recipe and provided a summary of the reviews.", "hint": null }
 40 | 
 41 |     Example 2:
 42 |     { "result": "failed", "memoryLearning": "'Easy Vegetarian Spinach Lasagna was not a good choice'", "explanation": "The task was to find a vegetarian lasagna recipe with more than 100 reviews and a rating of at least 4.5 stars. The 'Easy Vegetarian Spinach Lasagna' has a rating of 4.6 stars but only 92 reviews, which does not meet the requirement of more than 100 reviews", "hint": "Go back and search for a recipe with more than 100 reviews." }
 43 | 
 44 |     Example 3:
 45 |     { "result": "failed", "memoryLearning": "'Cheese Burger should not be selected again'", "explanation": "The task was to find a vegetarian lasagna recipe with more than 100 reviews and a rating of at least 4.5 stars. The 'Cheese Burger' is not a vegetarian recipe", "hint": "Go back and search for a vegetarian recipe." }
 46 | 
 47 |     Example 3:
 48 |     { "result": "unknown", "memoryLearning": null, "explanation": "The agent did not provide a summary of the reviews." }
 49 |     `;
 50 |   }
 51 | 
 52 |   getSystemMessage() {
 53 |     return new SystemMessage({
 54 |       content: this.getSystemPrompt(),
 55 |     });
 56 |   }
 57 | }
 58 | 
 59 | export class EvaluationAgentUserPrompt {
 60 |   constructor() {}
 61 | 
 62 |   getUserPrompt({
 63 |     pageUrl,
 64 |     task,
 65 |     answer,
 66 |     screenshotCount,
 67 |     taskHistorySummary,
 68 |     previousTaskResult,
 69 |   }: {
 70 |     pageUrl: string;
 71 |     task: string;
 72 |     answer: string;
 73 |     screenshotCount: number;
 74 |     taskHistorySummary: string;
 75 |     previousTaskResult: string;
 76 |   }) {
 77 |     return `
 78 |     CURRENT PAGE URL: ${pageUrl}
 79 |     TASK: ${task}  
 80 |     RESULT RESPONSE: ${answer}
 81 |     ${screenshotCount} screenshot at the end:
 82 |     TASK HISTORY SUMMARY: ${taskHistorySummary}
 83 |     PREVIOUS TASK RESULT: ${previousTaskResult}
 84 |     `;
 85 |   }
 86 | 
 87 |   getUserMessage({
 88 |     pageUrl,
 89 |     screenshotUrls,
 90 |     task,
 91 |     answer,
 92 |     taskHistorySummary,
 93 |     previousTaskResult,
 94 |   }: {
 95 |     pageUrl: string;
 96 |     screenshotUrls: string[];
 97 |     task: string;
 98 |     answer: string;
 99 |     taskHistorySummary: string;
100 |     previousTaskResult: string;
101 |   }) {
102 |     if (!screenshotUrls.length) {
103 |       throw new Error('No screenshot URLs provided to the evaluation agent');
104 |     }
105 | 
106 |     const last3Screenshots = screenshotUrls.slice(-3);
107 | 
108 |     const screenshotPrompts = last3Screenshots.map((url) => {
109 |       return {
110 |         type: 'image_url',
111 |         image_url: {
112 |           url,
113 |           detail: 'high',
114 |         },
115 |       };
116 |     });
117 | 
118 |     return new HumanMessage({
119 |       content: [
120 |         {
121 |           type: 'text',
122 |           text: this.getUserPrompt({
123 |             pageUrl,
124 |             task,
125 |             answer,
126 |             screenshotCount: last3Screenshots.length,
127 |             taskHistorySummary,
128 |             previousTaskResult,
129 |           }),
130 |         },
131 |         ...screenshotPrompts,
132 |       ],
133 |     });
134 |   }
135 | }
136 | 


--------------------------------------------------------------------------------
/src/core/agents/feedback-agent/feedback-agent.ts:
--------------------------------------------------------------------------------
 1 | import { LLM } from '@/core/interfaces/llm.interface';
 2 | import { EvaluationResponse } from './feedback-agent.types';
 3 | import { JsonOutputParser } from '@langchain/core/output_parsers';
 4 | import {
 5 |   EvaluationAgentSystemPrompt,
 6 |   EvaluationAgentUserPrompt,
 7 | } from './feedback-agent.prompt';
 8 | 
 9 | export class FeedbackAgent {
10 |   constructor(private readonly llmService: LLM) {}
11 | 
12 |   async evaluate({
13 |     pageUrl,
14 |     screenshotUrls,
15 |     task,
16 |     answer,
17 |     taskHistorySummary,
18 |     previousTaskResult,
19 |   }: {
20 |     pageUrl: string;
21 |     screenshotUrls: string[];
22 |     task: string;
23 |     answer: string;
24 |     previousTaskResult: string;
25 |     taskHistorySummary: string;
26 |   }) {
27 |     const systemMessage = new EvaluationAgentSystemPrompt().getSystemMessage();
28 |     const humanMessage = new EvaluationAgentUserPrompt().getUserMessage({
29 |       pageUrl,
30 |       screenshotUrls,
31 |       task,
32 |       answer,
33 |       taskHistorySummary,
34 |       previousTaskResult,
35 |     });
36 | 
37 |     const parser = new JsonOutputParser<EvaluationResponse>();
38 | 
39 |     const response = await this.llmService.invokeAndParse(
40 |       [systemMessage, humanMessage],
41 |       parser,
42 |     );
43 | 
44 |     console.log('FeedbackAgent response', JSON.stringify(response, null, 2));
45 | 
46 |     return response;
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/core/agents/feedback-agent/feedback-agent.types.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | export const EvaluationResponseSchema = z.object({
 4 |   result: z
 5 |     .union([z.literal('success'), z.literal('failed'), z.literal('unknown')])
 6 |     .describe('The result of the evaluation.'),
 7 |   explanation: z
 8 |     .string()
 9 |     .describe('The explanation and criteria of your result.'),
10 |   hint: z.string().describe('A hint to the user to improve the result.'),
11 |   memoryLearning: z
12 |     .string()
13 |     .describe('A memoryLearning to the user to improve the result.'),
14 | });
15 | 
16 | export type EvaluationResponse = z.infer<typeof EvaluationResponseSchema>;
17 | 


--------------------------------------------------------------------------------
/src/core/agents/openator/openator.config.ts:
--------------------------------------------------------------------------------
1 | export const DEFAULT_AGENT_MAX_RETRIES = 3;
2 | export const DEFAULT_AGENT_MAX_ACTIONS_PER_TASK = 4;
3 | 


--------------------------------------------------------------------------------
/src/core/agents/openator/openator.prompt.ts:
--------------------------------------------------------------------------------
  1 | import { ManagerResponseExamples } from '@/core/agents/openator/openator.types';
  2 | import { HumanMessage, SystemMessage } from '@langchain/core/messages';
  3 | 
  4 | export class ManagerAgentPrompt {
  5 |   constructor(private readonly maxActionPerStep: number) {}
  6 | 
  7 |   importantRules() {
  8 |     return `
  9 |   1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON.
 10 |     
 11 |   ${ManagerResponseExamples}
 12 |   
 13 |   2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item.
 14 |   
 15 |      Common action sequences:
 16 |   
 17 |      // Form filling
 18 |      actions: [
 19 |         { "name": "fillInput", "params": { "index": 1, "text": "{{user_email}}" } },
 20 |         { "name": "fillInput", "params": { "index": 2, "text": "{{user_password}}" } },
 21 |         { "name": "fillInput", "params": { "index": 5, "text": "13 street name, 123456 city, country" } },
 22 |         { "name": "fillInput", "params": { "index": 6, "text": "1234567890" } },
 23 |         { "name": "scrollDown", "params": {} },
 24 |       ]
 25 |   
 26 |      // Flow that does not work
 27 |      actions: [
 28 |         { "name": "clickElement", "params": { "index": 2 } },
 29 |         { "name": "clickElement", "params": { "index": 2 } },
 30 |         { "name": "clickElement", "params": { "index": 2 } },
 31 |         { "name": "clickElement", "params": { "index": 2 } },
 32 |       ]
 33 |   
 34 |   
 35 |       "index" corresponds to the index of the element you see on the screenshot.
 36 |       Never use other indexes than the ones provided in the element list.
 37 | 
 38 |       Example with wrong index:
 39 |       actions: [
 40 |         { "name": "fillInput", "params": { "index": "allow all", "text": "username" } },
 41 |         { "name": "fillInput", "params": { "index": "accept", "text": "password" } },        
 42 |       ]
 43 |   
 44 |       - NEVER plan to trigger a success or failure action among other actions, you should always trigger a success or failure action alone.
 45 |       - NEVER plan to do something after a scroll action since the page will change.
 46 |       - NEVER plan to scroll down or up if there is a cookie popup or any constent popup on screen. First accept or close the popup.
 47 |       - When the page is truncated, scroll down to view more elements especially if you are filling a form.
 48 |       - Trigger result means you have completed the task and we can ask the evaluator to evaluate the test result.
 49 |       - Sometimes, the user will provide variables surrounded by double brackets {{}}. You should keep them exactly as they are, we will replace them with the actual value later.
 50 | 
 51 |       Wrong example (trigger success among other actions):
 52 | 
 53 |       actions: [
 54 |         { "name": "fillInput", "params": { "index": 1, "text": "{{user_email}}" } },
 55 |         { "name": "fillInput", "params": { "index": 2, "text": "{{user_password}}" } },
 56 |         { "name": "clickElement", "params": { "index": 2 } },
 57 |         { "name": "triggerResult", "params": { "data": "" } },
 58 |       ]
 59 | 
 60 |       Correct example (trigger success alone):
 61 | 
 62 |       actions: [
 63 |         { "name": "fillInput", "params": { "index": 1, "text": "{{user_email}}" } },
 64 |         { "name": "fillInput", "params": { "index": 2, "text": "{{user_password}}" } },
 65 |         { "name": "clickElement", "params": { "index": 2 } },
 66 |       ]
 67 | 
 68 |       [... later if you believe the task is completed ...]
 69 | 
 70 |       actions: [
 71 |         { "name": "triggerResult", "params": { "data": "The recipe named 'Vegetarian Four Cheese Lasagna' has 4.6-star, 181 reviews, Servings 8, matches your request. It is available at https://www.allrecipes.com/recipe/123456/vegetarian-four-cheese-lasagna/" } },
 72 |       ]
 73 | 
 74 |   3. ELEMENT INTERACTION:
 75 |      - Only use indexes that exist in the provided element list.
 76 |      - Each element has a unique index number (e.g., "[33]__<button></button>").
 77 |      - Elements with empty index "[]" are non-interactive (for context only).
 78 |      - DO NOT try to fill an input field you already filled it with a value.   
 79 |   
 80 |   4. **NAVIGATION & ERROR HANDLING:**
 81 |    - **Track failed actions** and **do not repeat the same mistake**.
 82 |    - **Never enter a loop** where the same action fails repeatedly.
 83 |    - Example of a failure loop history (❌ Incorrect - must be avoided):
 84 | 
 85 |     -------
 86 |     "Scroll up to find the star rating and verify if zucchini is included in the ingredients.",
 87 |     "Scroll up to find the star rating and verify if zucchini is included in the ingredients.",
 88 |     "Scroll down to find the star rating and ingredients list.",
 89 |     "Scroll down to find the star rating and ingredients list.",
 90 |     "Search for another vegetarian lasagna recipe with zucchini and at least a four-star rating."
 91 |     -------
 92 |     
 93 |    - If an evaluator **rejects your result**, you **must adjust your approach** instead of retrying blindly.
 94 |    - **Before retrying, ask yourself:**
 95 |      - Did I already try this exact action?
 96 |      - Is there an alternative approach I can take?
 97 |      - Can I gather more information before acting?
 98 | 
 99 |   5. SCROLLING BEHAVIOR:
100 |      - **Never plan to scroll if there is a popup (cookies, modals, alerts, etc.).**
101 |      - **After scrolling, always verify progress** before scrolling again.
102 |      - **Avoid infinite scrolling loops.**
103 |      
104 |   6. TASK COMPLETION:
105 |      - When you evaluate the task, you shouls always ask yourself if the Success condition given by the user is met. If it is, use the triggerResult action as the last action.
106 |      - If you are running out of steps (current step), think about speeding it up, and ALWAYS use the triggerResult action as the last action.
107 |   
108 |   7. VISUAL CONTEXT:
109 |      - When an image is provided, use it to understand the page layout.
110 |      - Bounding boxes with labels correspond to element indexes.
111 |      - Each bounding box and its label have the same color.
112 |      - Most often the label is inside the bounding box, on the top right.
113 |      - Visual context helps verify element locations and relationships.
114 |      - Sometimes labels overlap, so use the context to verify the correct element.
115 |      - Sometimes it's easier to extract the information from the content of the page than from the visual context (especially when you are dealing with a list of products). To do this, use the extractContent action.
116 |   
117 |   8. FORM FILLING:
118 |      - If you fill an input field and your action sequence is interrupted, most often a list with suggestions popped up under the field and you need to first select the right element from the suggestion list.
119 |      - Sometimes when filling a date field, a calendar poup is displayed which can make the action sequence interrupted so you need to first select the right date from the calendar.
120 |      - If you fill an input field and you see it's still empty, you need to fill it again.
121 |   
122 |   9. ACTION SEQUENCING:
123 |      - Actions are executed in the order they appear in the list.
124 |      - Each action should logically follow from the previous one.
125 |      - Only provide the action sequence until you think the page will change.
126 |      - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
127 |      - only use multiple actions if it makes sense.
128 |      - After you have scrolled down or up, you should always ask yourself if you achieved your goal. If not, you should scroll down or up again.
129 |      - When you will define the next goal, make sure to be as specific as possible to avoid misleading the agent. Ask yourself the following questions:
130 |        - Does my goal go against the constraints of my end goal?
131 |        - Does my goal do something that has been done multiple times (loop)?
132 |        - Does my goal go against my memory learning?
133 |        - Did I already try this action or got this information in my memory learning?
134 |        -> Reajust your goal if needed.
135 | 
136 | 
137 |     10. RESULT:
138 |       - You should always provide a result in the triggerResult action.
139 |       - The result should be a string that describes the result of the task and matches the user's goal or question.
140 |       - DO NOT hallucinate the result.
141 |       - Your result should ALWAYS be based on what you see, or what you extract from the content of the page and not on what you think you know.
142 |       - When you will trigger the result, pay attention to the feedback you will receive. This feedback will contain the reason why the task failed and the hint to fix it. It is paramount to you to follow the hint.
143 |   
144 |       Use a maximum of ${this.maxActionPerStep} actions per task.
145 |   `;
146 |   }
147 | 
148 |   inputFormat() {
149 |     return `
150 |       INPUT STRUCTURE:
151 |       1. MEMORY LEARNINGS: A list of memory learning you should know about your previous actions. This will prevent you from doing the same mistakes over and over again. This can be used also to store information you have extracted in the past.
152 |       2. CURRENT URL: The webpage you're currently on.
153 |       3. EXTRACTED DOM ELEMENTS: List in the format:
154 |         [index]__<element_type attributes=value>element_text</element_type>
155 |         - index: Numeric identifier for interaction (if empty, the element is non-interactive).
156 |         - element_type: HTML element type (button, input, select, etc.).
157 |         - element_text: Visible text or element description.
158 |         - attributes: HTML attributes of the element used for context.
159 |         
160 |       4. TASK: The task asked by the user. 
161 |         - Use it to define the actions you have to perform.
162 |         - No failure is tolerated and success is rewarded.
163 |         - You must be sure of the data you provide. Make sure to provide the exact data. Open products and navigate until you find the information you need.
164 |         - It's better to navigate a bit more than to provide wrong information.
165 |   
166 |       Notes:
167 |       - Only elements with numeric indexes are interactive.
168 |       - Elements with empty index [] provide context but cannot be interacted with.
169 |   
170 |       Interactive examples:
171 |       [14]__<button id="submit-btn">Submit Form</button>
172 |       [15]__<input type="text" placeholder="Enter your name" for="name">
173 |   
174 |       Non-interactive examples:
175 |       []__<div>Non interactive div</div>
176 |       []__<span>Non interactive span</span>
177 |       []__Non interactive text
178 |       `;
179 |   }
180 | 
181 |   getSystemPrompt() {
182 |     return `
183 |       You are a precise Browser Automation Agent that interacts with websites through structured commands. Your role is to:
184 |   
185 |       1. Analyze the provided webpage elements and structure.
186 |       2. Plan a sequence of actions to achieve the task provided by the user.
187 |       3. Respond with valid JSON containing your action sequence.
188 |       4. When you consider the scenario is complete and we can evaluate the test result, use the triggerSuccess to pass some data to the evaluator.
189 |   
190 |       Current date and time: ${new Date().toISOString()}
191 |   
192 |       ${this.inputFormat()}
193 |   
194 |       ${this.importantRules()}
195 |   
196 |       Functions:
197 |       - clickElement: { index: <element_index> }
198 |       - fillInput: { index: <element_index>, text: <text> }
199 |       - scrollDown: { goal: <goal> }
200 |       - scrollUp: { goal: <goal> }
201 |       - goToUrl: { url: <url> }    
202 |       - triggerResult: { data: <data> }
203 |       - goBack: {}
204 |       - extractContent: {}
205 |   
206 |       Remember: Your responses must be valid JSON matching the specified format. Each action in the sequence must be valid."""
207 |   `;
208 |   }
209 | 
210 |   getSystemMessage() {
211 |     return new SystemMessage({
212 |       content: this.getSystemPrompt(),
213 |     });
214 |   }
215 | }
216 | 
217 | export class ManagerAgentHumanPrompt {
218 |   constructor() {}
219 | 
220 |   getHumanMessage({
221 |     memoryLearnings,
222 |     serializedTasks,
223 |     stringifiedDomState,
224 |     screenshotUrl,
225 |     /** This is the screenshot without the highlight */
226 |     pristineScreenshotUrl,
227 |     pageUrl,
228 |     pixelAbove,
229 |     pixelBelow,
230 |   }: {
231 |     memoryLearnings: string;
232 |     serializedTasks: string;
233 |     stringifiedDomState: string;
234 |     screenshotUrl: string;
235 |     pristineScreenshotUrl: string;
236 |     pageUrl: string;
237 |     pixelAbove: number;
238 |     pixelBelow: number;
239 |   }) {
240 |     return new HumanMessage({
241 |       content: [
242 |         {
243 |           type: 'image_url',
244 |           image_url: {
245 |             url: pristineScreenshotUrl,
246 |             detail: 'high',
247 |           },
248 |         },
249 |         {
250 |           type: 'image_url',
251 |           image_url: {
252 |             url: screenshotUrl,
253 |             detail: 'high',
254 |           },
255 |         },
256 |         {
257 |           type: 'text',
258 |           text: `
259 |           MEMORY LEARNINGS: ${memoryLearnings}
260 | 
261 |           CURRENT URL: ${pageUrl}
262 | 
263 |           ... ${pixelAbove} PIXEL ABOVE - SCROLL UP TO SEE MORE ELEMENTS
264 | 
265 |           EXTRACTED DOM ELEMENTS: ${stringifiedDomState} that you can match with the screenshot.
266 | 
267 |           ... ${pixelBelow} PIXEL BELOW - SCROLL DOWN TO SEE MORE ELEMENTS
268 | 
269 |           USER TASK AND TASK HISTORY: ${serializedTasks}
270 |           `,
271 |         },
272 |       ],
273 |     });
274 |   }
275 | }
276 | 


--------------------------------------------------------------------------------
/src/core/agents/openator/openator.ts:
--------------------------------------------------------------------------------
  1 | import { TaskManagerService } from '@/core/services/task-manager-service';
  2 | import { JsonOutputParser } from '@langchain/core/output_parsers';
  3 | import { ManagerAgentPrompt, ManagerAgentHumanPrompt } from './openator.prompt';
  4 | import { DomService } from '@/infra/services/dom-service';
  5 | import {
  6 |   DEFAULT_AGENT_MAX_ACTIONS_PER_TASK,
  7 |   DEFAULT_AGENT_MAX_RETRIES,
  8 | } from './openator.config';
  9 | import { ManagerAgentAction, ManagerResponse } from './openator.types';
 10 | import { Browser, Coordinates } from '@/core/interfaces/browser.interface';
 11 | import { Task, TaskAction } from '@/core/entities/task';
 12 | import { LLM } from '@/core/interfaces/llm.interface';
 13 | import { OpenatorResult } from '@/core/entities/openator-result';
 14 | import { AgentReporter } from '@/core/interfaces/agent-reporter.interface';
 15 | import { Variable } from '@/core/entities/variable';
 16 | import { VariableString } from '@/core/entities/variable-string';
 17 | import { Run } from '@/core/entities/run';
 18 | import { EventBusInterface } from '@/core/interfaces/event-bus.interface';
 19 | import { FeedbackAgent } from '../feedback-agent/feedback-agent';
 20 | import { Agent, AgentTask } from '../agent-base';
 21 | import { SummarizeAgent } from '../summarize-agent/summarize-agent';
 22 | 
 23 | export type OpenatorConfig = {
 24 |   maxActionsPerTask?: number;
 25 |   maxRetries?: number;
 26 |   variables: Variable[];
 27 | 
 28 |   taskManager: TaskManagerService;
 29 |   domService: DomService;
 30 |   feedbackAgent: FeedbackAgent;
 31 |   browserService: Browser;
 32 |   llmService: LLM;
 33 |   reporter: AgentReporter;
 34 |   eventBus?: EventBusInterface;
 35 |   /** TODO: replace this experimental agent */
 36 |   summarizer: SummarizeAgent;
 37 |   summarizeTask: AgentTask;
 38 | };
 39 | 
 40 | export class Openator {
 41 |   private msDelayBetweenActions: number = 1000;
 42 |   private lastDomStateHash: string | null = null;
 43 |   private isSuccess: boolean = false;
 44 |   private isFailure: boolean = false;
 45 |   private reason: string = '';
 46 |   private result: string = '';
 47 |   private retries: number = 0;
 48 |   private stepCount: number = 0;
 49 |   private feedbackRetries: number = 0;
 50 |   private readonly variables: Variable[];
 51 |   private currentRun: Run | null = null;
 52 |   private summarizer: Agent<unknown>;
 53 |   private summarizeTask: AgentTask;
 54 | 
 55 |   private readonly maxActionsPerTask: number;
 56 |   private readonly maxRetries: number;
 57 | 
 58 |   private readonly taskManager: TaskManagerService;
 59 |   private readonly domService: DomService;
 60 |   private readonly browserService: Browser;
 61 |   private readonly llmService: LLM;
 62 |   private readonly reporter: AgentReporter;
 63 |   private readonly eventBus: EventBusInterface | undefined;
 64 |   private readonly feedbackAgent: FeedbackAgent;
 65 |   public readonly memoryLearnings: string[] = [];
 66 | 
 67 |   constructor(config: OpenatorConfig) {
 68 |     this.taskManager = config.taskManager;
 69 |     this.domService = config.domService;
 70 |     this.browserService = config.browserService;
 71 |     this.llmService = config.llmService;
 72 |     this.reporter = config.reporter;
 73 |     this.variables = config.variables;
 74 |     this.feedbackAgent = config.feedbackAgent;
 75 |     this.memoryLearnings = [];
 76 | 
 77 |     this.maxActionsPerTask =
 78 |       config.maxActionsPerTask ?? DEFAULT_AGENT_MAX_ACTIONS_PER_TASK;
 79 |     this.maxRetries = config.maxRetries ?? DEFAULT_AGENT_MAX_RETRIES;
 80 |     this.eventBus = config.eventBus;
 81 | 
 82 |     this.summarizer = config.summarizer;
 83 |     this.summarizeTask = config.summarizeTask;
 84 |   }
 85 | 
 86 |   private onSuccess(result: string) {
 87 |     this.reporter.success(`Manager agent completed successfully: ${result}`);
 88 |     this.isSuccess = true;
 89 |     this.result = result;
 90 |     this.currentRun?.setSuccess(result);
 91 |     this.emitRunUpdate();
 92 |   }
 93 | 
 94 |   private onFailure(reason: string) {
 95 |     this.reporter.failure(`Manager agent failed: ${reason}`);
 96 |     this.isFailure = true;
 97 |     this.reason = reason;
 98 |     this.currentRun?.setFailure(reason);
 99 |     this.emitRunUpdate();
100 |   }
101 | 
102 |   private async beforeAction(action: TaskAction) {
103 |     this.reporter.loading(`Performing action ${action.data.name}...`);
104 |   }
105 | 
106 |   private async afterAction(action: TaskAction) {
107 |     this.reporter.success(`Performing action ${action.data.name}...`);
108 |   }
109 | 
110 |   private async incrementFeedbackRetries() {
111 |     this.feedbackRetries += 1;
112 |   }
113 | 
114 |   private async incrementRetries() {
115 |     this.retries += 1;
116 |   }
117 | 
118 |   private async resetRetries() {
119 |     this.retries = 0;
120 |   }
121 | 
122 |   private async incrementStepCount() {
123 |     this.stepCount += 1;
124 |   }
125 | 
126 |   get isCompleted() {
127 |     return this.isSuccess || this.isFailure;
128 |   }
129 | 
130 |   async start(
131 |     startUrl: string,
132 |     initialPrompt: string,
133 |     jobId?: string,
134 |   ): Promise<OpenatorResult> {
135 |     const vStartUrl = new VariableString(startUrl, this.variables);
136 | 
137 |     await this.browserService.launch(vStartUrl.dangerousValue());
138 | 
139 |     const vInitialPrompt = new VariableString(initialPrompt, this.variables);
140 | 
141 |     this.taskManager.setEndGoal(vInitialPrompt.publicValue());
142 | 
143 |     return this.run(jobId);
144 |   }
145 | 
146 |   private async emitRunUpdate() {
147 |     if (this.currentRun) {
148 |       this.eventBus?.emit('run:update', this.currentRun);
149 |     }
150 |   }
151 | 
152 |   private async run(jobId?: string): Promise<OpenatorResult> {
153 |     return new Promise(async (resolve) => {
154 |       this.reporter.loading('Starting manager agent');
155 | 
156 |       this.currentRun = Run.InitRunning(this.taskManager.getEndGoal(), jobId);
157 | 
158 |       this.emitRunUpdate();
159 | 
160 |       while (!this.isCompleted) {
161 |         if (this.retries >= this.maxRetries) {
162 |           this.onFailure('Max retries reached');
163 | 
164 |           return resolve({
165 |             status: 'failed',
166 |             result: this.result,
167 |             stepCount: this.stepCount,
168 |             reason:
169 |               'Max number of retried reached. The agent was not able to complete the test.',
170 |           });
171 |         }
172 | 
173 |         this.incrementStepCount();
174 | 
175 |         this.reporter.loading('Defining next task...');
176 | 
177 |         const task = await this.defineNextTask();
178 | 
179 |         this.currentRun.addTask(task);
180 |         this.currentRun.executeAction();
181 | 
182 |         this.emitRunUpdate();
183 | 
184 |         this.reporter.loading(`Executing task: ${task.goal}`);
185 | 
186 |         await this.executeTask(task);
187 |       }
188 | 
189 |       /**
190 |        * If the Manager Agent failed, then we return the failure reason immediately.
191 |        */
192 |       if (this.isFailure) {
193 |         this.currentRun.setFailure(this.reason);
194 |         this.emitRunUpdate();
195 | 
196 |         return resolve({
197 |           status: 'failed',
198 |           reason: this.reason,
199 |           result: this.result,
200 |           stepCount: this.stepCount,
201 |         });
202 |       }
203 | 
204 |       await this.domService.resetHighlightElements();
205 | 
206 |       this.emitRunUpdate();
207 | 
208 |       return resolve({
209 |         status: this.isSuccess ? 'success' : 'failed',
210 |         reason: this.reason,
211 |         result: this.result,
212 |         stepCount: this.stepCount,
213 |       });
214 |     });
215 |   }
216 | 
217 |   /**
218 |    * Checks if the DOM state has changed.
219 |    * TODO: fix this
220 |    */
221 |   private async didDomStateChange() {
222 |     const { domStateHash: currentDomStateHash } =
223 |       await this.domService.getInteractiveElements(false);
224 | 
225 |     return this.lastDomStateHash !== currentDomStateHash;
226 |   }
227 | 
228 |   /**
229 |    * Ensures that the triggerSuccess and triggerFailure actions are never called among other actions.
230 |    * This is important because we need to reevaluate actions and ensure that the success or failure
231 |    * actions are executed alone to properly determine the test result.
232 |    */
233 |   private ensureNoTriggerSuccessOrFailureAmongOtherActions(
234 |     actions: ManagerAgentAction[],
235 |   ) {
236 |     if (actions.length < 2) {
237 |       return actions;
238 |     }
239 | 
240 |     return actions.filter((action) => action.name !== 'triggerResult');
241 |   }
242 | 
243 |   private async defineNextTask(): Promise<Task> {
244 |     this.currentRun?.think();
245 |     this.emitRunUpdate();
246 | 
247 |     const parser = new JsonOutputParser<ManagerResponse>();
248 | 
249 |     const systemMessage = new ManagerAgentPrompt(
250 |       this.maxActionsPerTask,
251 |     ).getSystemMessage();
252 | 
253 |     const {
254 |       screenshot,
255 |       pristineScreenshot,
256 |       stringifiedDomState,
257 |       domStateHash,
258 |       pixelAbove,
259 |       pixelBelow,
260 |     } = await this.domService.getInteractiveElements();
261 | 
262 |     this.lastDomStateHash = domStateHash;
263 | 
264 |     const humanMessage = new ManagerAgentHumanPrompt().getHumanMessage({
265 |       memoryLearnings: this.memoryLearnings.join(' ; '),
266 |       serializedTasks: this.taskManager.getSerializedTasks(),
267 |       pristineScreenshotUrl: pristineScreenshot,
268 |       screenshotUrl: screenshot,
269 |       stringifiedDomState,
270 |       pageUrl: this.browserService.getPageUrl(),
271 |       pixelAbove,
272 |       pixelBelow,
273 |     });
274 | 
275 |     const messages = [systemMessage, humanMessage];
276 | 
277 |     try {
278 |       const parsedResponse = await this.llmService.invokeAndParse(
279 |         messages,
280 |         parser,
281 |       );
282 | 
283 |       const safeActions = this.ensureNoTriggerSuccessOrFailureAmongOtherActions(
284 |         parsedResponse.actions,
285 |       );
286 | 
287 |       const task = Task.InitPending(
288 |         parsedResponse.currentState.nextGoal,
289 |         safeActions,
290 |       );
291 | 
292 |       this.taskManager.add(task);
293 | 
294 |       return task;
295 |     } catch (error) {
296 |       console.error('Error parsing agent response:', error);
297 |       return Task.InitPending('Keep trying', []);
298 |     }
299 |   }
300 | 
301 |   private async executeTask(task: Task) {
302 |     task.start();
303 | 
304 |     await this.domService.resetHighlightElements();
305 | 
306 |     for (const [i, action] of task.actions.entries()) {
307 |       try {
308 |         action.start();
309 |         this.emitRunUpdate();
310 | 
311 |         if (i > 0 && (await this.didDomStateChange())) {
312 |           action.cancel('Dom state changed, need to reevaluate.');
313 |           task.cancel('Dom state changed, need to reevaluate.');
314 |           this.taskManager.update(task);
315 |           this.reporter.info('Dom state changed, need to reevaluate.');
316 |           return;
317 |         }
318 | 
319 |         await this.executeAction(action);
320 |         this.emitRunUpdate();
321 | 
322 |         await new Promise((resolve) =>
323 |           setTimeout(resolve, this.msDelayBetweenActions),
324 |         );
325 | 
326 |         task.complete();
327 |         this.emitRunUpdate();
328 |         this.resetRetries();
329 |         this.taskManager.update(task);
330 |       } catch (error: any) {
331 |         action.fail(
332 |           `Task failed with error: ${error?.message ?? 'Unknown error'}`,
333 |         );
334 |         task.fail(
335 |           `Task failed with error: ${error?.message ?? 'Unknown error'}`,
336 |         );
337 | 
338 |         this.taskManager.update(task);
339 |         this.incrementRetries();
340 |         this.emitRunUpdate();
341 |       }
342 |     }
343 | 
344 |     this.reporter.success(task.goal);
345 |   }
346 | 
347 |   /**
348 |    * Updates the action result when the action is completed or failed
349 |    */
350 |   private async executeAction(action: TaskAction) {
351 |     let coordinates: Coordinates | null = null;
352 | 
353 |     await this.beforeAction(action);
354 | 
355 |     this.emitRunUpdate();
356 | 
357 |     switch (action.data.name) {
358 |       case 'clickElement':
359 |         coordinates = this.domService.getIndexSelector(
360 |           action.data.params.index,
361 |         );
362 | 
363 |         if (!coordinates) {
364 |           throw new Error('Index or coordinates not found');
365 |         }
366 | 
367 |         await this.domService.resetHighlightElements();
368 | 
369 |         await this.domService.highlightElementPointer(coordinates);
370 | 
371 |         await this.browserService.mouseClick(coordinates.x, coordinates.y);
372 | 
373 |         await this.domService.resetHighlightElements();
374 | 
375 |         action.complete();
376 | 
377 |         break;
378 | 
379 |       case 'fillInput':
380 |         coordinates = this.domService.getIndexSelector(
381 |           action.data.params.index,
382 |         );
383 | 
384 |         if (!coordinates) {
385 |           action.fail('Index or coordinates not found');
386 |           throw new Error('Index or coordinates not found');
387 |         }
388 | 
389 |         await this.domService.highlightElementPointer(coordinates);
390 |         const variableString = new VariableString(
391 |           action.data.params.text,
392 |           this.variables,
393 |         );
394 | 
395 |         await this.browserService.fillInput(variableString, coordinates);
396 |         await this.domService.resetHighlightElements();
397 | 
398 |         action.complete();
399 | 
400 |         break;
401 | 
402 |       case 'scrollDown':
403 |         await this.browserService.scrollDown();
404 |         await this.domService.resetHighlightElements();
405 |         await this.domService.highlightElementWheel('down');
406 | 
407 |         action.complete();
408 | 
409 |         break;
410 | 
411 |       case 'scrollUp':
412 |         await this.browserService.scrollUp();
413 | 
414 |         await this.domService.resetHighlightElements();
415 |         await this.domService.highlightElementWheel('up');
416 | 
417 |         action.complete();
418 | 
419 |         break;
420 | 
421 |       case 'takeScreenshot':
422 |         await this.domService.resetHighlightElements();
423 |         await this.domService.highlightForSoM();
424 | 
425 |         action.complete();
426 | 
427 |         break;
428 | 
429 |       case 'goToUrl':
430 |         await this.browserService.goToUrl(action.data.params.url);
431 | 
432 |         action.complete();
433 |         break;
434 | 
435 |       case 'goBack':
436 |         await this.browserService.goBack();
437 | 
438 |         action.complete();
439 |         break;
440 | 
441 |       case 'extractContent':
442 |         const content = await this.browserService.extractContent();
443 | 
444 |         this.summarizeTask.prepare({
445 |           images: [],
446 |           memory: '',
447 |           input: `Our goal is to ${this.taskManager.getEndGoal()} Here is the content extracted from the page: ${content}.`,
448 |         });
449 | 
450 |         const summarized = await this.summarizer.perform(this.summarizeTask);
451 | 
452 |         this.memoryLearnings.push(
453 |           `Key takeways from content on page ${this.browserService.getPageUrl()}: ${summarized.takeaways}`,
454 |         );
455 |         action.complete(summarized.takeaways);
456 | 
457 |         console.log('-----this.memoryLearnings', this.memoryLearnings);
458 |         break;
459 | 
460 |       case 'triggerResult':
461 |         const { pristineScreenshot } = await this.domService.getDomState();
462 |         const answer = action.data.params.data;
463 |         const { result, explanation, hint, memoryLearning } =
464 |           await this.feedbackAgent.evaluate({
465 |             pageUrl: this.browserService.getPageUrl(),
466 |             screenshotUrls: [pristineScreenshot],
467 |             task: this.taskManager.getEndGoal(),
468 |             answer,
469 |             taskHistorySummary: this.taskManager.getSerializedTasks(),
470 |             previousTaskResult: JSON.stringify(
471 |               this.taskManager.getLatestTaskPerformed()?.objectForLLM(),
472 |             ),
473 |           });
474 | 
475 |         if (result === 'success' || result === 'unknown') {
476 |           action.complete(explanation);
477 |           this.onSuccess(answer);
478 |         } else {
479 |           if (this.feedbackRetries > this.maxRetries) {
480 |             this.onFailure('Max feedback retries reached');
481 |           }
482 | 
483 |           action.fail(JSON.stringify({ result, explanation, hint }));
484 |           this.memoryLearnings.push(memoryLearning);
485 |           this.incrementFeedbackRetries();
486 |         }
487 | 
488 |         break;
489 |     }
490 | 
491 |     await this.afterAction(action);
492 |     this.emitRunUpdate();
493 |   }
494 | }
495 | 


--------------------------------------------------------------------------------
/src/core/agents/openator/openator.types.ts:
--------------------------------------------------------------------------------
  1 | import { z } from 'zod';
  2 | import { zodToJsonSchema } from 'zod-to-json-schema';
  3 | 
  4 | export const ManagerAgentActionSchema = z
  5 |   .union([
  6 |     z.object({
  7 |       name: z.literal('extractContent'),
  8 |       params: z.null(),
  9 |       description: z
 10 |         .string()
 11 |         .describe(
 12 |           "A short description of the action you want to perform. E.g 'Extract the content of the current page'",
 13 |         ),
 14 |     }),
 15 |     z.object({
 16 |       name: z.literal('clickElement'),
 17 |       params: z.object({
 18 |         index: z.number(),
 19 |       }),
 20 |       description: z
 21 |         .string()
 22 |         .describe(
 23 |           "A short description of the action you want to perform. E.g 'Click the login button'",
 24 |         ),
 25 |     }),
 26 |     z.object({
 27 |       name: z.literal('fillInput'),
 28 |       params: z.object({
 29 |         index: z.number(),
 30 |         text: z.string(),
 31 |       }),
 32 |       description: z
 33 |         .string()
 34 |         .describe(
 35 |           "A short description of the action you want to perform. E.g 'Fill the email input'",
 36 |         ),
 37 |     }),
 38 |     z.object({
 39 |       name: z.literal('goBack'),
 40 |       description: z
 41 |         .string()
 42 |         .describe(
 43 |           "A short description of the action you want to perform. E.g 'Go back to the previous page'",
 44 |         ),
 45 |       params: z.null(),
 46 |     }),
 47 |     z.object({
 48 |       name: z.literal('scrollDown'),
 49 |       description: z
 50 |         .string()
 51 |         .describe(
 52 |           "A short description of the action you want to perform. E.g 'Scroll down to find the login form'",
 53 |         ),
 54 |       params: z.null(),
 55 |     }),
 56 |     z.object({
 57 |       name: z.literal('scrollUp'),
 58 |       description: z
 59 |         .string()
 60 |         .describe(
 61 |           "A short description of the action you want to perform. E.g 'Scroll up to find the login form'",
 62 |         ),
 63 |       params: z.null(),
 64 |     }),
 65 |     z.object({
 66 |       name: z.literal('goToUrl'),
 67 |       params: z.object({
 68 |         url: z.string(),
 69 |       }),
 70 |       description: z
 71 |         .string()
 72 |         .describe(
 73 |           "A short description of the action you want to perform. E.g 'Go to the login page'",
 74 |         ),
 75 |     }),
 76 |     z.object({
 77 |       name: z.literal('takeScreenshot'),
 78 |       description: z
 79 |         .string()
 80 |         .describe(
 81 |           "A short description of the action you want to perform. E.g 'Take a screenshot of the current page'",
 82 |         ),
 83 |       params: z.null(),
 84 |     }),
 85 |     z
 86 |       .object({
 87 |         name: z.literal('triggerResult'),
 88 |         params: z.object({
 89 |           data: z
 90 |             .string()
 91 |             .describe(
 92 |               'The data to be passed to the evaluator. Most likely the response to the user task.',
 93 |             ),
 94 |         }),
 95 |         description: z.null(),
 96 |       })
 97 |       .describe(
 98 |         'Trigger result means you have completed the task and we can ask the evaluator to evaluate the test result.',
 99 |       ),
100 |   ])
101 |   .describe(
102 |     'The action to be executed. e.g. { name: "clickElement", params: { index: 2 }, description: "Click the login button" }',
103 |   );
104 | 
105 | export type ManagerAgentAction = z.infer<typeof ManagerAgentActionSchema>;
106 | 
107 | export const ManagerAgentResponseSchema = z.object({
108 |   currentState: z.object({
109 |     evaluationPreviousGoal: z.string(),
110 |     memory: z.string(),
111 |     nextGoal: z.string(),
112 |   }),
113 |   actions: z.array(ManagerAgentActionSchema),
114 | });
115 | 
116 | export type ManagerResponse = z.infer<typeof ManagerAgentResponseSchema>;
117 | 
118 | export const ManagerResponseExamples = `
119 | 
120 | Example Response 1:
121 | {
122 |   "currentState": {
123 |     "evaluationPreviousGoal": "Cookies have been accepted. We can now proceed to login.",
124 |     "memory": "Cookies accepted, ready to login. End goal is to login to my account.",
125 |     "nextGoal": "Display the login form by clicking the login button",
126 |   },
127 |   "actions": [{"name": "clickElement", "params": {"index": 3}, "description": "Click the login button"}]
128 | }
129 | 
130 | Example Response 2:
131 | {
132 |   "currentState": {
133 |     "evaluationPreviousGoal": "An element seems to prevent us from logging in. We need close the cookies popup.",
134 |     "memory": "Our end goal is to login to my account. We need to close the cookies popup and then we can proceed to login.",
135 |     "nextGoal": "Close cookies popup and then login.",
136 |   },
137 |   "actions": [{"name": "clickElement", "params": {"index": 5}, "description": "Close the cookies popup"}]
138 | }
139 | 
140 | Example Response 3:
141 | {
142 |   "currentState": {
143 |     "evaluationPreviousGoal": "We need to scroll down to find the login form.",
144 |     "memory": "We need to scroll down to find the login form. End goal is to login to my account.",
145 |     "nextGoal": "Find a recipe that has more than 100 reviews and is not Spicy Vegan Recipe"
146 |   },
147 |    "actions": [{"name": "scrollDown", "description": "Scroll down to find the login form"}]
148 | }
149 | `;
150 | 


--------------------------------------------------------------------------------
/src/core/agents/summarize-agent/summarize-agent.ts:
--------------------------------------------------------------------------------
 1 | import { Agent, AgentTask } from '../agent-base';
 2 | import { z } from 'zod';
 3 | import { LLM } from '@/core/interfaces/llm.interface';
 4 | 
 5 | const responseSchema = z.object({
 6 |   takeaways: z.string(),
 7 | });
 8 | 
 9 | export type SummarizeAgent = Agent<z.infer<typeof responseSchema>>;
10 | 
11 | export const initSummarizer = (llm: LLM) =>
12 |   new Agent({
13 |     role: 'Summarizer',
14 |     goal: 'Summarize the content provided by the user with key takeaways',
15 |     backstory: `You are a meticulous analyst with a keen eye for detail.
16 | 
17 |   You are able to summarize the content provided by the user with key takeaways.
18 | 
19 |   Depending on the context, you will deduce that details are not relevant to the users.
20 | 
21 |   These details will depends on the context of the user.
22 | 
23 |   For example, in a recipe website, the list of ingredients, calories, etc. are relevant to the user. Where as in a news website, the list of ingredients, calories, etc. are not relevant to the user.
24 |     `,
25 |     tools: [],
26 |     strictJsonOutput: true,
27 |     llm,
28 |     responseSchema,
29 |   });
30 | 
31 | export const initSummarizeTask = () =>
32 |   new AgentTask({
33 |     description:
34 |       'Summarize the key takeaways from the content provided by the user. Try to use the least amount of words possible without losing the context. Be as specific as possible.',
35 |     goal: 'The goal is to summarize the content provided by the user with key takeaways.',
36 |     expectedOutput:
37 |       'A JSON object with the following properties: { takeaways: string[] }',
38 |     validOutputExamples: `{ "takeaways": "The recipie named Vegetarian Lasagna is available at https://www.example.com/recipes/vegetarian-lasagna", has a rating of 4.4, contains zuchini, tomatoes, cucumber, and under 500 calories per serving. }`,
39 |     invalidOutputExamples: `{ "takeaways": "The recipie named Vegetarian Lasagna is good" }`,
40 |   });
41 | 


--------------------------------------------------------------------------------
/src/core/entities/openator-result.ts:
--------------------------------------------------------------------------------
 1 | export const OpenatorResultStatuses = ['success', 'failed'] as const;
 2 | 
 3 | export type OpenatorResultStatus = (typeof OpenatorResultStatuses)[number];
 4 | 
 5 | export type OpenatorResult = {
 6 |   status: OpenatorResultStatus;
 7 |   reason: string;
 8 |   result: string;
 9 |   stepCount: number;
10 | };
11 | 


--------------------------------------------------------------------------------
/src/core/entities/run.ts:
--------------------------------------------------------------------------------
 1 | import { Task } from './task';
 2 | 
 3 | export type RunStatus =
 4 |   | 'running'
 5 |   | 'pending'
 6 |   | 'running'
 7 |   | 'completed'
 8 |   | 'failed';
 9 | 
10 | export type RunBrainState = 'thinking' | 'executingAction';
11 | 
12 | export class Run {
13 |   public readonly id: string;
14 |   private _status: string;
15 |   private _tasks: Task[];
16 |   private _retries: number;
17 |   private _brainState: RunBrainState;
18 |   private _resultReason: string;
19 |   private _result: string;
20 | 
21 |   constructor(
22 |     public readonly scenario: string,
23 |     jobId?: string,
24 |   ) {
25 |     this.id = jobId || crypto.randomUUID();
26 |     this._status = 'running';
27 |     this._tasks = [];
28 |     this._brainState = 'thinking';
29 |     this._resultReason = '';
30 |     this._result = '';
31 |   }
32 | 
33 |   get status() {
34 |     return this._status;
35 |   }
36 | 
37 |   get tasks() {
38 |     return this._tasks;
39 |   }
40 | 
41 |   get brainState() {
42 |     return this._brainState;
43 |   }
44 | 
45 |   get resultReason() {
46 |     return this._resultReason;
47 |   }
48 | 
49 |   get result() {
50 |     return this._result;
51 |   }
52 | 
53 |   static InitRunning(scenario: string, jobId?: string) {
54 |     return new Run(scenario, jobId);
55 |   }
56 | 
57 |   think() {
58 |     this._brainState = 'thinking';
59 |   }
60 | 
61 |   executeAction() {
62 |     this._brainState = 'executingAction';
63 |   }
64 | 
65 |   retry() {
66 |     this._retries += 1;
67 |   }
68 | 
69 |   run() {
70 |     this._status = 'running';
71 |   }
72 | 
73 |   addTask(task: Task) {
74 |     this._tasks.push(task);
75 |   }
76 | 
77 |   updateTask(task: Task) {
78 |     this._tasks = this._tasks.map((t) => (t.id === task.id ? task : t));
79 |   }
80 | 
81 |   setSuccess(answer: string) {
82 |     this._status = 'completed';
83 |     this._result = answer;
84 |   }
85 | 
86 |   setFailure(reason: string) {
87 |     this._status = 'failed';
88 |     this._resultReason = reason;
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/core/entities/task.ts:
--------------------------------------------------------------------------------
  1 | import * as crypto from 'crypto';
  2 | import { ManagerAgentAction } from '@/core/agents/openator/openator.types';
  3 | 
  4 | export type TaskStatus =
  5 |   | 'pending'
  6 |   | 'completed'
  7 |   | 'failed'
  8 |   | 'cancelled'
  9 |   | 'running';
 10 | 
 11 | export type TaskActionStatus =
 12 |   | 'pending'
 13 |   | 'completed'
 14 |   | 'cancelled'
 15 |   | 'failed'
 16 |   | 'running';
 17 | 
 18 | export class TaskAction {
 19 |   public readonly id: string;
 20 |   private _reason: string | undefined;
 21 |   private _status: TaskActionStatus;
 22 | 
 23 |   /**
 24 |    * The result of the action when it is completed.
 25 |    */
 26 |   public result: string;
 27 | 
 28 |   constructor(public readonly data: ManagerAgentAction) {
 29 |     this.id = crypto.randomUUID();
 30 |     this._status = 'pending';
 31 |     this.data = data;
 32 |     this.result = '';
 33 |   }
 34 | 
 35 |   get status() {
 36 |     return this._status;
 37 |   }
 38 | 
 39 |   start() {
 40 |     this._status = 'running';
 41 |   }
 42 | 
 43 |   complete(result?: string) {
 44 |     this._status = 'completed';
 45 |     this.result = result ?? 'ok';
 46 |   }
 47 | 
 48 |   cancel(reason: string) {
 49 |     this._status = 'cancelled';
 50 |     this._reason = reason;
 51 |   }
 52 | 
 53 |   fail(reason: string) {
 54 |     this._status = 'failed';
 55 |     this._reason = reason;
 56 |     this.result = `Action failed with reason: ${reason}`;
 57 |   }
 58 | 
 59 |   public asObject() {
 60 |     return {
 61 |       id: this.id,
 62 |       status: this.status,
 63 |       reason: this._reason,
 64 |       name: this.data.name,
 65 |       params: this.data.params,
 66 |       description: this.data.description,
 67 |     };
 68 |   }
 69 | 
 70 |   public objectForLLM() {
 71 |     return {
 72 |       description: this.data.description,
 73 |       status: this.status,
 74 |       result: this.result,
 75 |       reason: this._reason,
 76 |     };
 77 |   }
 78 | }
 79 | 
 80 | export class Task {
 81 |   constructor(
 82 |     public readonly id: string,
 83 |     public readonly goal: string,
 84 |     readonly actions: TaskAction[],
 85 |     private _status: TaskStatus,
 86 |     private _reason: string | undefined = undefined,
 87 |   ) {}
 88 | 
 89 |   static InitPending(goal: string, actions: ManagerAgentAction[]) {
 90 |     const taskActions = actions.map((action) => new TaskAction(action));
 91 | 
 92 |     return new Task(crypto.randomUUID(), goal, taskActions ?? [], 'pending');
 93 |   }
 94 | 
 95 |   get status() {
 96 |     return this._status;
 97 |   }
 98 | 
 99 |   get reason() {
100 |     return this._reason;
101 |   }
102 | 
103 |   get pendingActions() {
104 |     return this.actions.filter((action) => action.status === 'pending');
105 |   }
106 | 
107 |   get nextPendingAction(): TaskAction | null {
108 |     return this.pendingActions[0] ?? null;
109 |   }
110 | 
111 |   completeAction(id: string) {
112 |     const action = this.actions.find((action) => action.id === id);
113 | 
114 |     if (!action) {
115 |       throw new Error('Action not found');
116 |     }
117 | 
118 |     action.complete();
119 | 
120 |     if (!this.pendingActions.length) {
121 |       action.complete();
122 |     }
123 | 
124 |     if (!this.pendingActions.length) {
125 |       this.complete();
126 |     }
127 |   }
128 | 
129 |   cancelAction(id: string, reason: string) {
130 |     const action = this.actions.find((action) => action.id === id);
131 | 
132 |     if (!action) {
133 |       throw new Error('Action not found');
134 |     }
135 | 
136 |     action.cancel(reason);
137 |     this.cancel(reason);
138 |   }
139 | 
140 |   start() {
141 |     this._status = 'running';
142 |   }
143 | 
144 |   complete() {
145 |     this._status = 'completed';
146 |   }
147 | 
148 |   cancel(reason: string) {
149 |     this._status = 'cancelled';
150 |     this._reason = reason;
151 |   }
152 | 
153 |   fail(reason: string) {
154 |     this._status = 'failed';
155 |     this._reason = reason;
156 |   }
157 | 
158 |   public objectForLLM() {
159 |     return {
160 |       goal: this.goal,
161 |       actionsTaken: this.actions.map((action) => action.objectForLLM()),
162 |     };
163 |   }
164 | 
165 |   public serialize(): string {
166 |     return JSON.stringify({
167 |       id: this.id,
168 |       goal: this.goal,
169 |       actions: this.actions.map((action) => action.asObject()),
170 |       status: this.status,
171 |       reason: this.reason,
172 |     });
173 |   }
174 | 
175 |   public asObject() {
176 |     return {
177 |       id: this.id,
178 |       goal: this.goal,
179 |       actions: this.actions.map((action) => action.asObject()),
180 |       status: this.status,
181 |       reason: this.reason,
182 |     };
183 |   }
184 | }
185 | 


--------------------------------------------------------------------------------
/src/core/entities/variable-string.ts:
--------------------------------------------------------------------------------
 1 | import { Variable } from './variable';
 2 | 
 3 | export class VariableString {
 4 |   constructor(
 5 |     private readonly _value: string,
 6 |     private readonly variables: Variable[],
 7 |   ) {}
 8 | 
 9 |   publicValue(): string {
10 |     let interpolatedValue = this._value;
11 | 
12 |     const variablePattern = /{{(.*?)}}/g;
13 |     interpolatedValue = interpolatedValue.replace(
14 |       variablePattern,
15 |       (_, varName) => {
16 |         const variable = this.variables.find((v) => v.name === varName);
17 |         return variable ? variable.publicValue() : `{{${varName}}}`;
18 |       },
19 |     );
20 | 
21 |     return interpolatedValue;
22 |   }
23 | 
24 |   dangerousValue(): string {
25 |     let interpolatedValue = this._value;
26 | 
27 |     const variablePattern = /{{(.*?)}}/g;
28 |     interpolatedValue = interpolatedValue.replace(
29 |       variablePattern,
30 |       (_, varName) => {
31 |         const variable = this.variables.find((v) => v.name === varName);
32 |         return variable ? variable.dangerousValue() : `{{${varName}}}`;
33 |       },
34 |     );
35 | 
36 |     return interpolatedValue;
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/core/entities/variable.ts:
--------------------------------------------------------------------------------
 1 | export type VariableParams = {
 2 |   name: string;
 3 |   value: string;
 4 |   isSecret: boolean;
 5 | };
 6 | 
 7 | export class Variable {
 8 |   readonly name: string;
 9 |   readonly isSecret: boolean;
10 |   private readonly _value: string;
11 | 
12 |   constructor(params: VariableParams) {
13 |     this.name = params.name;
14 |     this.isSecret = params.isSecret;
15 | 
16 |     this._value = params.value;
17 |   }
18 | 
19 |   publicValue(): string {
20 |     return this.isSecret ? `{{${this.name}}}` : this._value;
21 |   }
22 | 
23 |   dangerousValue(): string {
24 |     return this._value;
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/core/interfaces/agent-reporter.interface.ts:
--------------------------------------------------------------------------------
1 | export interface AgentReporter {
2 |   success(message: string): void;
3 |   failure(message: string): void;
4 |   loading(message: string): void;
5 |   info(message: string): void;
6 | }
7 | 


--------------------------------------------------------------------------------
/src/core/interfaces/browser-websocket-server.interface.ts:
--------------------------------------------------------------------------------
 1 | export interface BrowserWebSocketServer {
 2 |   startSession(): Promise<WSSSession>;
 3 |   stopSession(sessionId: string): Promise<void>;
 4 | }
 5 | 
 6 | export interface WSSSession {
 7 |   id: string;
 8 |   liveUrl: string;
 9 |   wsEndpoint: string;
10 | }
11 | 


--------------------------------------------------------------------------------
/src/core/interfaces/browser.interface.ts:
--------------------------------------------------------------------------------
 1 | import { Page } from 'playwright';
 2 | import { VariableString } from '../entities/variable-string';
 3 | 
 4 | export type Coordinates = {
 5 |   x: number;
 6 |   y: number;
 7 | };
 8 | 
 9 | export interface Browser {
10 |   close(): Promise<void>;
11 |   launch(url: string): Promise<void>;
12 |   getStablePage(): Promise<Page>;
13 |   getPage(): Page;
14 |   getPageUrl(): string;
15 |   getPixelAbove(): Promise<number>;
16 |   getPixelBelow(): Promise<number>;
17 |   mouseClick(x: number, y: number): Promise<void>;
18 |   fillInput(text: VariableString, coordinates: Coordinates): Promise<void>;
19 |   scrollDown(): Promise<void>;
20 |   scrollUp(): Promise<void>;
21 |   goToUrl(url: string): Promise<void>;
22 |   goBack(): Promise<void>;
23 |   extractContent(): Promise<string>;
24 | }
25 | 


--------------------------------------------------------------------------------
/src/core/interfaces/event-bus.interface.ts:
--------------------------------------------------------------------------------
 1 | import { Run } from '../entities/run';
 2 | import { Task, TaskAction } from '../entities/task';
 3 | 
 4 | export type AppEvents = {
 5 |   'run:update': Run;
 6 |   'task:update': Task;
 7 |   'action:update': TaskAction;
 8 |   'pristine-screenshot:taken': string;
 9 | };
10 | 
11 | export interface EventBusInterface {
12 |   emit<E extends keyof AppEvents>(event: E, data: AppEvents[E]): void;
13 |   on<E extends keyof AppEvents>(
14 |     event: E,
15 |     callback: (data: AppEvents[E]) => void,
16 |   ): void;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/core/interfaces/file-system.interface.ts:
--------------------------------------------------------------------------------
1 | export interface FileSystem {
2 |   saveFile(path: string, data: Buffer): Promise<string>;
3 |   saveScreenshot(filename: string, data: Buffer): Promise<string>;
4 | }
5 | 


--------------------------------------------------------------------------------
/src/core/interfaces/llm.interface.ts:
--------------------------------------------------------------------------------
 1 | import { BaseMessage } from '@langchain/core/messages';
 2 | import { JsonOutputParser } from '@langchain/core/output_parsers';
 3 | 
 4 | export interface LLM {
 5 |   invokeAndParse<T extends Record<string, any>>(
 6 |     messages: BaseMessage[],
 7 |     parser: JsonOutputParser<T>,
 8 |   ): Promise<T>;
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/core/interfaces/reporter.interface.ts:
--------------------------------------------------------------------------------
1 | import { Task } from "../entities/task";
2 | 
3 | export interface Reporter {
4 |   reportProgress(thinkin: boolean, task?: Task): void;
5 |   info(message: string): void;
6 |   success(message: string): void;
7 |   error(message: string): void;
8 | }
9 | 


--------------------------------------------------------------------------------
/src/core/interfaces/screenshotter.interface.ts:
--------------------------------------------------------------------------------
1 | import { Page } from "playwright";
2 | 
3 | export interface Screenshotter {
4 |   takeScreenshot(page: Page): Promise<string>;
5 | }
6 | 


--------------------------------------------------------------------------------
/src/core/services/realtime-reporter.ts:
--------------------------------------------------------------------------------
 1 | import { EventEmitter } from 'events';
 2 | import {
 3 |   AppEvents,
 4 |   EventBusInterface,
 5 | } from '../interfaces/event-bus.interface';
 6 | 
 7 | export class EventBus extends EventEmitter implements EventBusInterface {
 8 |   emit<E extends keyof AppEvents>(event: E, data: AppEvents[E]): boolean {
 9 |     return super.emit(event, data);
10 |   }
11 | 
12 |   on<E extends keyof AppEvents>(
13 |     event: E,
14 |     callback: (data: AppEvents[E]) => void,
15 |   ): this {
16 |     return super.on(event, callback);
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/core/services/task-manager-service.ts:
--------------------------------------------------------------------------------
 1 | import { Task } from '@/core/entities/task';
 2 | 
 3 | export class TaskManagerService {
 4 |   private tasks: Task[] = [];
 5 |   private endGoal: string | null = null;
 6 | 
 7 |   constructor() {}
 8 | 
 9 |   setEndGoal(endGoal: string) {
10 |     this.endGoal = endGoal;
11 |   }
12 | 
13 |   getEndGoal() {
14 |     return this.endGoal!;
15 |   }
16 | 
17 |   add(task: Task) {
18 |     this.tasks.push(task);
19 |   }
20 | 
21 |   update(task: Task) {
22 |     this.tasks = this.tasks.map((t) => (t.id === task.id ? task : t));
23 |   }
24 | 
25 |   getLatestTaskPerformed() {
26 |     return (
27 |       this.tasks.filter((t) => t.status !== 'running')[this.tasks.length - 1] ??
28 |       null
29 |     );
30 |   }
31 | 
32 |   getTaskHistorySummary() {
33 |     return this.tasks.map((t) => t.goal);
34 |   }
35 | 
36 |   getSerializedTasks() {
37 |     const serialized = JSON.stringify(
38 |       {
39 |         endGoal: this.endGoal,
40 |         taskHistorySummary: this.getTaskHistorySummary(),
41 |         previousTaskResult: this.getLatestTaskPerformed()?.objectForLLM(),
42 |       },
43 |       null,
44 |       2,
45 |     );
46 | 
47 |     console.log('serialized', serialized);
48 | 
49 |     return serialized;
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/core/shared/utils.ts:
--------------------------------------------------------------------------------
1 | export function splitArray<T>(array: T[], numberOfChunk: number): T[][] {
2 |   const chunkSize = Math.ceil(array.length / numberOfChunk);
3 |   const result: T[][] = [];
4 |   for (let i = 0; i < array.length; i += chunkSize) {
5 |     result.push(array.slice(i, i + chunkSize));
6 |   }
7 |   return result;
8 | }
9 | 


--------------------------------------------------------------------------------
/src/core/types.ts:
--------------------------------------------------------------------------------
 1 | export * from '@/core/agents/openator/openator.types';
 2 | export * from '@/core/agents/feedback-agent/feedback-agent.types';
 3 | export * from '@/core/entities/run';
 4 | export * from '@/core/entities/openator-result';
 5 | export * from '@/core/entities/task';
 6 | export * from '@/core/entities/variable-string';
 7 | export { Variable } from '@/core/entities/variable';
 8 | export * from '@/core/interfaces/agent-reporter.interface';
 9 | export * from '@/core/interfaces/browser.interface';
10 | export * from '@/core/interfaces/event-bus.interface';
11 | export * from '@/core/interfaces/llm.interface';
12 | export * from '@/core/interfaces/reporter.interface';
13 | export * from '@/core/interfaces/file-system.interface';
14 | export * from '@/core/interfaces/screenshotter.interface';
15 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Export the main factory function
 3 |  */
 4 | export { initOpenator, InitOpenatorConfig } from './init-openator';
 5 | 
 6 | /**
 7 |  * Export interfaces and types
 8 |  */
 9 | export { LLM } from './core/interfaces/llm.interface';
10 | export { Reporter } from './core/interfaces/reporter.interface';
11 | export {
12 |   OpenatorResult,
13 |   OpenatorResultStatus,
14 |   OpenatorResultStatuses,
15 | } from './core/entities/openator-result';
16 | 
17 | export {
18 |   ManagerAgentAction,
19 |   ManagerAgentResponseSchema,
20 |   ManagerResponse,
21 |   ManagerResponseExamples,
22 | } from './core/agents/openator/openator.types';
23 | 
24 | /**
25 |  * Export entities and classes
26 |  */
27 | export { Variable } from './core/entities/variable';
28 | export { Openator, OpenatorConfig } from './core/agents/openator/openator';
29 | export { Task } from './core/entities/task';
30 | export { Run } from './core/entities/run';
31 | 
32 | /**
33 |  * Export Chat Models
34 |  */
35 | export { ChatOpenAI, ChatOpenAIConfig } from './models/chat-openai';
36 | export { ChatOllama, ChatOllamaConfig } from './models/chat-ollama';
37 | export { ChatGoogleGenAI, ChatGoogleGenAIConfig } from './models/chat-google';
38 | 


--------------------------------------------------------------------------------
/src/infra/services/chromium-browser.ts:
--------------------------------------------------------------------------------
  1 | import { VariableString } from '@/core/entities/variable-string';
  2 | import { Browser } from '@/core/interfaces/browser.interface';
  3 | import { convertHtmlToMarkdown } from 'dom-to-semantic-markdown';
  4 | import { JSDOM } from 'jsdom';
  5 | import {
  6 |   Browser as PBrowser,
  7 |   BrowserContext,
  8 |   Page,
  9 |   chromium,
 10 | } from 'playwright';
 11 | 
 12 | export type Coordinates = {
 13 |   x: number;
 14 |   y: number;
 15 | };
 16 | 
 17 | export class ChromiumBrowser implements Browser {
 18 |   private page: Page | null = null;
 19 |   private context: BrowserContext | null = null;
 20 | 
 21 |   private minimumPageLoadTime: number = 400;
 22 | 
 23 |   constructor(
 24 |     private options?: {
 25 |       headless: boolean;
 26 |       wsEndpoint?: string;
 27 |     },
 28 |   ) {}
 29 | 
 30 |   async launch(url: string) {
 31 |     const wsEndpoint =
 32 |       this.options?.wsEndpoint ?? process.env.PLAYWRIGHT_WS_ENDPOINT ?? null;
 33 | 
 34 |     let browser: PBrowser;
 35 | 
 36 |     /**
 37 |      * If the wsEndpoint is provided, we connect to the browser using the Playwright
 38 |      * WebSocket endpoint.
 39 |      * This is used in the docker-compose file where the playwright-server is running in a dedicated container.
 40 |      */
 41 |     if (wsEndpoint) {
 42 |       browser = await chromium.connectOverCDP(wsEndpoint);
 43 |     } else {
 44 |       browser = await chromium.launch({
 45 |         headless: this.options?.headless ?? false,
 46 |       });
 47 |     }
 48 | 
 49 |     this.context = await browser.newContext({
 50 |       screen: {
 51 |         width: 1440,
 52 |         height: 900,
 53 |       },
 54 |       viewport: {
 55 |         width: 1440,
 56 |         height: 900,
 57 |       },
 58 |     });
 59 |     this.page = await this.context.newPage();
 60 |     await this.page.goto(url);
 61 |   }
 62 | 
 63 |   private async waitForDomContentLoaded() {
 64 |     await this.getPage().waitForLoadState('domcontentloaded');
 65 |   }
 66 | 
 67 |   private async waitMinimumPageLoadTime() {
 68 |     return new Promise((resolve) =>
 69 |       setTimeout(resolve, this.minimumPageLoadTime),
 70 |     );
 71 |   }
 72 | 
 73 |   private async waitForStability() {
 74 |     return Promise.all([
 75 |       this.waitForDomContentLoaded(),
 76 |       this.waitMinimumPageLoadTime(),
 77 |     ]);
 78 |   }
 79 | 
 80 |   async getStablePage(): Promise<Page> {
 81 |     await this.waitForStability();
 82 |     return this.getPage();
 83 |   }
 84 | 
 85 |   async close() {
 86 |     if (this.context) {
 87 |       this.context.close();
 88 |     }
 89 |   }
 90 | 
 91 |   getPage(): Page {
 92 |     if (!this.page) {
 93 |       throw new Error('The page is not initialized or has been detroyed.');
 94 |     }
 95 |     return this.page;
 96 |   }
 97 | 
 98 |   getPageUrl() {
 99 |     return this.getPage().url();
100 |   }
101 | 
102 |   async mouseClick(x: number, y: number) {
103 |     await Promise.all([
104 |       this.getPage().mouse.click(x, y),
105 |       this.getPage().waitForLoadState('domcontentloaded'),
106 |     ]);
107 |   }
108 | 
109 |   async getPixelAbove() {
110 |     return this.getPage().evaluate(() => {
111 |       return window.scrollY;
112 |     });
113 |   }
114 | 
115 |   async getPixelBelow() {
116 |     return this.getPage().evaluate(() => {
117 |       return window.scrollY + window.innerHeight;
118 |     });
119 |   }
120 | 
121 |   async fillInput(text: VariableString, coordinates: Coordinates) {
122 |     await this.getPage().mouse.click(coordinates.x, coordinates.y);
123 |     await this.getPage().keyboard.press('ControlOrMeta+A');
124 |     await this.getPage().keyboard.press('Backspace');
125 |     await this.getPage().keyboard.type(text.dangerousValue(), { delay: 100 });
126 |   }
127 | 
128 |   async scrollDown() {
129 |     await this.getPage().mouse.wheel(0, 500);
130 |     await this.getPage().waitForTimeout(300);
131 |   }
132 | 
133 |   async scrollUp() {
134 |     await this.getPage().mouse.wheel(0, -500);
135 |     await this.getPage().waitForTimeout(300);
136 |   }
137 | 
138 |   async goToUrl(url: string) {
139 |     await this.getPage().goto(url);
140 |   }
141 | 
142 |   async goBack() {
143 |     await this.getPage().goBack();
144 |   }
145 | 
146 |   async extractContent() {
147 |     const html = await this.getPage().content();
148 |     const dom = new JSDOM(html);
149 |     const markdown = convertHtmlToMarkdown(html, {
150 |       overrideDOMParser: new dom.window.DOMParser(),
151 |       extractMainContent: true,
152 |     });
153 |     return markdown;
154 |   }
155 | }
156 | 


--------------------------------------------------------------------------------
/src/infra/services/console-reporter.ts:
--------------------------------------------------------------------------------
 1 | import { AgentReporter } from '@/core/interfaces/agent-reporter.interface';
 2 | 
 3 | export class ConsoleReporter implements AgentReporter {
 4 |   constructor(private readonly name: string) {}
 5 | 
 6 |   getSpinner() {}
 7 | 
 8 |   success(message: string): void {
 9 |     console.log(`[${this.name}] ✅ ${message}`);
10 |   }
11 | 
12 |   failure(message: string): void {
13 |     console.log(`[${this.name}] ❌ ${message}`);
14 |   }
15 | 
16 |   loading(message: string): void {
17 |     console.log(`[${this.name}] 💡 ${message}`);
18 |   }
19 | 
20 |   info(message: string): void {
21 |     console.log(`[${this.name}] 💡 ${message}`);
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/infra/services/dom-service.ts:
--------------------------------------------------------------------------------
  1 | import { Page } from 'playwright';
  2 | import { Browser } from '@/core/interfaces/browser.interface';
  3 | import { Screenshotter } from '@/core/interfaces/screenshotter.interface';
  4 | import * as crypto from 'crypto';
  5 | import { EventBus } from '@/core/services/realtime-reporter';
  6 | 
  7 | declare global {
  8 |   interface Window {
  9 |     getEventListeners?: any;
 10 |   }
 11 | }
 12 | 
 13 | export type Coordinates = {
 14 |   x: number;
 15 |   y: number;
 16 | };
 17 | 
 18 | export type TextNode = {
 19 |   type: 'TEXT_NODE';
 20 |   text: string;
 21 |   isVisible: boolean;
 22 | };
 23 | 
 24 | const isElementNode = (node: DomNode | null): node is ElementNode => {
 25 |   if (!node) return false;
 26 | 
 27 |   return !('type' in node) || node.type !== 'TEXT_NODE';
 28 | };
 29 | 
 30 | export type ElementNode = {
 31 |   tagName: string | null;
 32 |   attributes: Record<string, string>;
 33 |   text: string;
 34 |   index: number;
 35 |   xpath: string | null;
 36 |   coordinates: Coordinates | null;
 37 |   isVisible: boolean;
 38 |   isInteractive: boolean;
 39 |   isTopElement: boolean;
 40 |   highlightIndex: number;
 41 |   children: (DomNode | null)[];
 42 |   iframeContext: string;
 43 |   shadowRoot: boolean;
 44 | };
 45 | 
 46 | const IMPORTANT_ATTRIBUTES = [
 47 |   'id',
 48 |   'name',
 49 |   'value',
 50 |   'placeholder',
 51 |   'aria-label',
 52 |   'role',
 53 |   'for',
 54 |   'href',
 55 |   'alt',
 56 |   'title',
 57 |   'data-testid',
 58 |   'data-test',
 59 |   'data-test-id',
 60 |   'data-test-name',
 61 |   'data-test-value',
 62 | ];
 63 | 
 64 | export type DomNode = TextNode | ElementNode;
 65 | 
 66 | export const isTextNode = (node: DomNode): node is TextNode => {
 67 |   return 'type' in node && node.type === 'TEXT_NODE';
 68 | };
 69 | 
 70 | export interface SerializedDomState {
 71 |   screenshot: string;
 72 |   pristineScreenshot: string;
 73 |   domState: DomNode | null;
 74 |   pixelAbove: number;
 75 |   pixelBelow: number;
 76 | }
 77 | 
 78 | export class DomService {
 79 |   private domContext: {
 80 |     selectorMap: Record<number, DomNode>;
 81 |   } = {
 82 |     selectorMap: {},
 83 |   };
 84 | 
 85 |   constructor(
 86 |     private readonly screenshotService: Screenshotter,
 87 |     private readonly browserService: Browser,
 88 |     private readonly eventBus: EventBus,
 89 |   ) {}
 90 | 
 91 |   /**
 92 |    * For this version of the dom state string with only keep the index and tag name
 93 |    * because it is frequent that an attribute or the content of the node changes
 94 |    * and we don't want to re-run the action for such a small change.
 95 |    *
 96 |    * Ouput format: [2]__<div></div>
 97 |    */
 98 |   private stringifyDomStateForHash(nodeState: DomNode | null) {
 99 |     const items: string[] = [];
100 | 
101 |     const format = (node: DomNode | null) => {
102 |       if (!isElementNode(node)) {
103 |         return;
104 |       }
105 | 
106 |       if (node.highlightIndex) {
107 |         // [2]__<div></div>
108 |         const str = `[${node.isInteractive ? node.highlightIndex : ''}]__<${node.tagName}>`;
109 | 
110 |         items.push(str);
111 |       }
112 | 
113 |       for (const child of node.children) {
114 |         if (child) {
115 |           format(child);
116 |         }
117 |       }
118 |     };
119 | 
120 |     format(nodeState);
121 | 
122 |     return items.join('\n');
123 |   }
124 | 
125 |   private hashDomState(domState: DomNode | null) {
126 |     if (!domState) {
127 |       return '';
128 |     }
129 | 
130 |     const domStateString = this.stringifyDomStateForHash(domState);
131 | 
132 |     return crypto.createHash('sha256').update(domStateString).digest('hex');
133 |   }
134 | 
135 |   getIndexSelector(index: number): Coordinates | null {
136 |     const domNode = this.domContext?.selectorMap[index];
137 | 
138 |     if (!domNode) {
139 |       return null;
140 |     }
141 | 
142 |     if (isTextNode(domNode)) {
143 |       return null;
144 |     }
145 | 
146 |     return domNode.coordinates;
147 |   }
148 | 
149 |   async getDomState(
150 |     withHighlight: boolean = true,
151 |   ): Promise<SerializedDomState> {
152 |     await this.resetHighlightElements();
153 |     const pristineScreenshot = await this.screenshotService.takeScreenshot(
154 |       await this.browserService.getStablePage(),
155 |     );
156 | 
157 |     this.eventBus.emit('pristine-screenshot:taken', pristineScreenshot);
158 | 
159 |     const state = await this.highlightForSoM(withHighlight);
160 | 
161 |     const screenshot = await this.screenshotService.takeScreenshot(
162 |       await this.browserService.getStablePage(),
163 |     );
164 | 
165 |     const pixelAbove = await this.browserService.getPixelAbove();
166 |     const pixelBelow = await this.browserService.getPixelBelow();
167 | 
168 |     return {
169 |       screenshot,
170 |       pristineScreenshot,
171 |       domState: state,
172 |       pixelAbove,
173 |       pixelBelow,
174 |     };
175 |   }
176 | 
177 |   async getInteractiveElements(withHighlight: boolean = true) {
178 |     const { screenshot, pristineScreenshot, domState, pixelAbove, pixelBelow } =
179 |       await this.getDomState(withHighlight);
180 |     const selectorMap = this.createSelectorMap(domState);
181 |     const stringifiedDomState = this.stringifyDomState(domState);
182 |     const domStateHash = this.hashDomState(domState);
183 | 
184 |     this.domContext.selectorMap = selectorMap;
185 | 
186 |     return {
187 |       screenshot,
188 |       pristineScreenshot,
189 |       domState,
190 |       selectorMap,
191 |       stringifiedDomState,
192 |       domStateHash,
193 |       pixelAbove,
194 |       pixelBelow,
195 |     };
196 |   }
197 | 
198 |   createSelectorMap(nodeState: DomNode | null) {
199 |     const selectorMap: Record<number, DomNode> = {};
200 | 
201 |     const mapNode = (node: DomNode | null) => {
202 |       if (isElementNode(node)) {
203 |         selectorMap[node.highlightIndex] = node;
204 | 
205 |         for (const child of node.children) {
206 |           mapNode(child);
207 |         }
208 |       }
209 |     };
210 | 
211 |     mapNode(nodeState);
212 |     return selectorMap;
213 |   }
214 | 
215 |   stringifyDomState(nodeState: DomNode | null) {
216 |     const items: string[] = [];
217 | 
218 |     const format = (node: DomNode | null) => {
219 |       if (!isElementNode(node)) {
220 |         return;
221 |       }
222 | 
223 |       const attributes = Object.entries(node.attributes)
224 |         .filter(([key]) => IMPORTANT_ATTRIBUTES.includes(key))
225 |         .map(([key, value]) => `${key}="${value}"`)
226 |         .join(' ');
227 | 
228 |       if (node.highlightIndex) {
229 |         // [2]__<div optional-attributes>Hello</div>
230 |         const str = `[${node.isInteractive ? node.highlightIndex : ''}]__<${node.tagName} ${attributes}>${node.text}</${node.tagName}>`;
231 | 
232 |         items.push(str);
233 |       }
234 | 
235 |       for (const child of node.children) {
236 |         if (child) {
237 |           format(child);
238 |         }
239 |       }
240 |     };
241 | 
242 |     format(nodeState);
243 | 
244 |     return items.join('\n');
245 |   }
246 | 
247 |   async resetHighlightElements() {
248 |     const page = await this.browserService.getStablePage();
249 |     await page.evaluate(() => {
250 |       try {
251 |         // Remove the highlight container and all its contents
252 |         const container = document.getElementById(
253 |           'playwright-highlight-container',
254 |         );
255 |         if (container) {
256 |           container.remove();
257 |         }
258 | 
259 |         // Remove highlight attributes from elements
260 |         const highlightedElements = document.querySelectorAll(
261 |           '[magic-inspector-highlight-id^="playwright-highlight-"]',
262 |         );
263 |         highlightedElements.forEach((el) => {
264 |           el.removeAttribute('magic-inspector-highlight-id');
265 |         });
266 |       } catch (e) {
267 |         console.error('Failed to remove highlights:', e);
268 |       }
269 |     });
270 |   }
271 | 
272 |   async highlightElementWheel(direction: 'down' | 'up') {
273 |     const page = await this.browserService.getStablePage();
274 |     await page.evaluate((direction: 'down' | 'up') => {
275 |       console.log('highlightElementWheel', direction);
276 |     }, direction);
277 |   }
278 | 
279 |   async highlightElementPointer(coordinates: Coordinates) {
280 |     const page = await this.browserService.getStablePage();
281 |     await page.evaluate((coordinates: Coordinates) => {
282 |       try {
283 |         // Create or get highlight container
284 |         let container = document.getElementById(
285 |           'playwright-pointer-highlight-container',
286 |         );
287 |         if (!container) {
288 |           container = document.createElement('div');
289 |           container.id = 'playwright-pointer-highlight-container';
290 |           container.style.position = 'fixed';
291 |           container.style.pointerEvents = 'none';
292 |           container.style.top = '0';
293 |           container.style.left = '0';
294 |           container.style.width = '100%';
295 |           container.style.height = '100%';
296 |           container.style.zIndex = '2147483647'; // Maximum z-index value
297 |           document.body.appendChild(container);
298 |         }
299 | 
300 |         // Create the red circle
301 |         const circle = document.createElement('div');
302 |         circle.style.position = 'absolute';
303 |         circle.style.width = '20px';
304 |         circle.style.height = '20px';
305 |         circle.style.borderRadius = '50%';
306 |         circle.style.backgroundColor = 'red';
307 |         circle.style.left = `${coordinates.x - 10}px`; // Center the circle
308 |         circle.style.top = `${coordinates.y - 10}px`; // Center the circle
309 |         circle.style.pointerEvents = 'none'; // Ensure it doesn't interfere with clicking
310 | 
311 |         container.appendChild(circle);
312 | 
313 |         setTimeout(() => {
314 |           circle.remove();
315 |           container.remove();
316 |         }, 2000);
317 |       } catch (e) {
318 |         console.error('Failed to draw highlight circle:', e);
319 |       }
320 |     }, coordinates);
321 |   }
322 | 
323 |   async waitForStability(page: Page) {
324 |     await page.waitForTimeout(1500);
325 |   }
326 | 
327 |   async highlightForSoM(
328 |     withHighlight: boolean = true,
329 |   ): Promise<DomNode | null> {
330 |     try {
331 |       const page: Page = await this.browserService.getStablePage();
332 | 
333 |       if (page.isClosed()) {
334 |         return null;
335 |       }
336 | 
337 |       await this.waitForStability(page);
338 | 
339 |       const domState: DomNode | null = await page.evaluate((withHighlight) => {
340 |         const doHighlightElements = true;
341 |         const focusHighlightIndex = -1;
342 |         const viewportExpansion: 0 | -1 = 0;
343 | 
344 |         let highlightIndex = 0;
345 | 
346 |         function highlightElement(
347 |           element: Element,
348 |           index: number,
349 |           parentIframe: HTMLIFrameElement | null = null,
350 |         ) {
351 |           if (!withHighlight) {
352 |             return;
353 |           }
354 | 
355 |           let container = document.getElementById(
356 |             'playwright-highlight-container',
357 |           );
358 |           if (!container) {
359 |             container = document.createElement('div');
360 |             container.id = 'playwright-highlight-container';
361 |             container.style.position = 'absolute';
362 |             container.style.pointerEvents = 'none';
363 |             container.style.top = '0';
364 |             container.style.left = '0';
365 |             container.style.width = '100%';
366 |             container.style.height = '100%';
367 |             container.style.zIndex = '2147483647';
368 |             document.body.appendChild(container);
369 |           }
370 | 
371 |           const colors = [
372 |             '#FF0000',
373 |             '#00FF00',
374 |             '#0000FF',
375 |             '#FFA500',
376 |             '#800080',
377 |             '#008080',
378 |             '#FF69B4',
379 |             '#4B0082',
380 |             '#FF4500',
381 |             '#2E8B57',
382 |             '#DC143C',
383 |             '#4682B4',
384 |           ];
385 |           const colorIndex = index % colors.length;
386 |           const baseColor = colors[colorIndex];
387 |           // 10% opacity version of the color
388 |           const backgroundColor = `${baseColor}1A`;
389 | 
390 |           // Create highlight overlay
391 |           const overlay = document.createElement('div');
392 |           overlay.style.position = 'absolute';
393 |           overlay.style.border = `2px solid ${baseColor}`;
394 |           overlay.style.backgroundColor = backgroundColor;
395 |           overlay.style.pointerEvents = 'none';
396 |           overlay.style.boxSizing = 'border-box';
397 | 
398 |           // Position overlay based on element, including scroll position
399 |           const rect = element.getBoundingClientRect();
400 |           let top = rect.top + window.scrollY;
401 |           let left = rect.left + window.scrollX;
402 | 
403 |           // Adjust position if element is inside an iframe
404 |           if (parentIframe) {
405 |             const iframeRect = parentIframe.getBoundingClientRect();
406 |             top += iframeRect.top;
407 |             left += iframeRect.left;
408 |           }
409 | 
410 |           overlay.style.top = `${top}px`;
411 |           overlay.style.left = `${left}px`;
412 |           overlay.style.width = `${rect.width}px`;
413 |           overlay.style.height = `${rect.height}px`;
414 | 
415 |           // Create label
416 |           const label = document.createElement('div');
417 |           label.className = 'playwright-highlight-label';
418 |           label.style.position = 'absolute';
419 |           label.style.background = `${baseColor}`;
420 |           label.style.color = 'white';
421 |           label.style.padding = '1px 4px';
422 |           label.style.borderRadius = '4px';
423 |           label.style.fontSize = `${Math.min(12, Math.max(8, rect.height / 2))}px`; // Responsive font size
424 |           label.textContent = `[${index}]`;
425 | 
426 |           // Calculate label position
427 |           const labelWidth = 20; // Approximate width
428 |           const labelHeight = 16; // Approximate height
429 | 
430 |           // Default position (top-right corner inside the box)
431 |           let labelTop = top + 2;
432 |           let labelLeft = left + rect.width - labelWidth - 2;
433 | 
434 |           // Adjust if box is too small
435 |           if (rect.width < labelWidth + 4 || rect.height < labelHeight + 4) {
436 |             // Position outside the box if it's too small
437 |             labelTop = top - labelHeight - 2;
438 |             labelLeft = left + rect.width - labelWidth;
439 |           }
440 | 
441 |           label.style.top = `${labelTop}px`;
442 |           label.style.left = `${labelLeft}px`;
443 | 
444 |           // Add to container
445 |           container.appendChild(overlay);
446 |           container.appendChild(label);
447 | 
448 |           // Store reference for cleanup
449 |           element.setAttribute(
450 |             'magic-inspector-highlight-id',
451 |             `playwright-highlight-${index}`,
452 |           );
453 | 
454 |           return index + 1;
455 |         }
456 | 
457 |         function getXPathTree(element: ParentNode, stopAtBoundary = true) {
458 |           const segments = [];
459 |           let currentElement = element;
460 | 
461 |           while (
462 |             currentElement &&
463 |             currentElement.nodeType === Node.ELEMENT_NODE
464 |           ) {
465 |             // Stop if we hit a shadow root or iframe
466 |             if (
467 |               stopAtBoundary &&
468 |               (currentElement.parentNode instanceof ShadowRoot ||
469 |                 currentElement.parentNode instanceof HTMLIFrameElement)
470 |             ) {
471 |               break;
472 |             }
473 | 
474 |             let index = 0;
475 |             let sibling = currentElement.previousSibling;
476 |             while (sibling) {
477 |               if (
478 |                 sibling.nodeType === Node.ELEMENT_NODE &&
479 |                 sibling.nodeName === currentElement.nodeName
480 |               ) {
481 |                 index++;
482 |               }
483 |               sibling = sibling.previousSibling;
484 |             }
485 | 
486 |             const tagName = currentElement.nodeName.toLowerCase();
487 |             const xpathIndex = index > 0 ? `[${index + 1}]` : '';
488 |             segments.unshift(`${tagName}${xpathIndex}`);
489 | 
490 |             // @ts-ignore // TODO: fix this type issue
491 |             currentElement = currentElement.parentNode;
492 |           }
493 | 
494 |           return segments.join('/');
495 |         }
496 | 
497 |         function isElementAccepted(element: Element) {
498 |           const leafElementDenyList = new Set([
499 |             'svg',
500 |             'script',
501 |             'style',
502 |             'link',
503 |             'meta',
504 |           ]);
505 |           return !leafElementDenyList.has(element.tagName.toLowerCase());
506 |         }
507 | 
508 |         function isInteractiveElement(element: HTMLElement) {
509 |           const interactiveElements = new Set([
510 |             'a',
511 |             'button',
512 |             'details',
513 |             'embed',
514 |             'input',
515 |             'label',
516 |             'menu',
517 |             'menuitem',
518 |             'object',
519 |             'select',
520 |             'textarea',
521 |             'summary',
522 |           ]);
523 | 
524 |           const interactiveRoles = new Set([
525 |             'button',
526 |             'menu',
527 |             'menuitem',
528 |             'link',
529 |             'checkbox',
530 |             'radio',
531 |             'slider',
532 |             'tab',
533 |             'tabpanel',
534 |             'textbox',
535 |             'combobox',
536 |             'grid',
537 |             'listbox',
538 |             'option',
539 |             'progressbar',
540 |             'scrollbar',
541 |             'searchbox',
542 |             'switch',
543 |             'tree',
544 |             'treeitem',
545 |             'spinbutton',
546 |             'tooltip',
547 |             'a-button-inner',
548 |             'a-dropdown-button',
549 |             'click',
550 |             'menuitemcheckbox',
551 |             'menuitemradio',
552 |             'a-button-text',
553 |             'button-text',
554 |             'button-icon',
555 |             'button-icon-only',
556 |             'button-text-icon-only',
557 |             'dropdown',
558 |             'combobox',
559 |           ]);
560 | 
561 |           const tagName = element.tagName.toLowerCase();
562 |           const role = element.getAttribute('role') ?? '';
563 |           const ariaRole = element.getAttribute('aria-role') ?? '';
564 |           const tabIndex = element.getAttribute('tabindex') ?? '';
565 | 
566 |           const hasAddressInputClass = element.classList.contains(
567 |             'address-input__container__input',
568 |           );
569 | 
570 |           // Basic role/attribute checks
571 |           const hasInteractiveRole =
572 |             hasAddressInputClass ||
573 |             interactiveElements.has(tagName) ||
574 |             interactiveRoles.has(role) ||
575 |             interactiveRoles.has(ariaRole) ||
576 |             (tabIndex !== null && tabIndex !== '-1') ||
577 |             element.getAttribute('data-action') === 'a-dropdown-select' ||
578 |             element.getAttribute('data-action') === 'a-dropdown-button';
579 | 
580 |           if (hasInteractiveRole) return true;
581 | 
582 |           const hasClickHandler =
583 |             element.onclick !== null ||
584 |             element.getAttribute('onclick') !== null ||
585 |             element.hasAttribute('ng-click') ||
586 |             element.hasAttribute('@click') ||
587 |             element.hasAttribute('v-on:click');
588 | 
589 |           function getEventListeners(el: Element) {
590 |             try {
591 |               return window.getEventListeners?.(el) || {};
592 |             } catch (e) {
593 |               const listeners = {};
594 | 
595 |               const eventTypes = [
596 |                 'click',
597 |                 'mousedown',
598 |                 'mouseup',
599 |                 'touchstart',
600 |                 'touchend',
601 |                 'keydown',
602 |                 'keyup',
603 |                 'focus',
604 |                 'blur',
605 |               ];
606 | 
607 |               for (const type of eventTypes) {
608 |                 // @ts-ignore // TODO: fix this
609 |                 const handler = el[`on${type}`];
610 |                 if (handler) {
611 |                   // @ts-ignore // TODO: fix this
612 |                   listeners[type] = [
613 |                     {
614 |                       listener: handler,
615 |                       useCapture: false,
616 |                     },
617 |                   ];
618 |                 }
619 |               }
620 | 
621 |               return listeners;
622 |             }
623 |           }
624 | 
625 |           // Check for click-related events on the element itself
626 |           const listeners = getEventListeners(element);
627 |           const hasClickListeners =
628 |             listeners &&
629 |             (listeners.click?.length > 0 ||
630 |               listeners.mousedown?.length > 0 ||
631 |               listeners.mouseup?.length > 0 ||
632 |               listeners.touchstart?.length > 0 ||
633 |               listeners.touchend?.length > 0);
634 | 
635 |           // Check for ARIA properties that suggest interactivity
636 |           const hasAriaProps =
637 |             element.hasAttribute('aria-expanded') ||
638 |             element.hasAttribute('aria-pressed') ||
639 |             element.hasAttribute('aria-selected') ||
640 |             element.hasAttribute('aria-checked');
641 | 
642 |           // Check if element is draggable
643 |           const isDraggable =
644 |             element.draggable || element.getAttribute('draggable') === 'true';
645 | 
646 |           return (
647 |             hasAriaProps || hasClickHandler || hasClickListeners || isDraggable
648 |           );
649 |         }
650 | 
651 |         function isElementVisible(element: HTMLElement) {
652 |           const style = window.getComputedStyle(element);
653 |           return (
654 |             element.offsetWidth > 0 &&
655 |             element.offsetHeight > 0 &&
656 |             style.visibility !== 'hidden' &&
657 |             style.display !== 'none'
658 |           );
659 |         }
660 | 
661 |         function isTopElement(element: Element) {
662 |           const rect = element.getBoundingClientRect();
663 | 
664 |           // Ignore elements outside viewport
665 |           if (rect.bottom < 0 || rect.top > window.innerHeight) return false;
666 | 
667 |           const centerX = rect.left + rect.width / 2;
668 |           const centerY = rect.top + rect.height / 2;
669 | 
670 |           const topEl = document.elementFromPoint(centerX, centerY);
671 |           return topEl === element || element.contains(topEl);
672 |         }
673 | 
674 |         function isTopElementOld(element: Element) {
675 |           // Find the correct document context and root element
676 |           let doc = element.ownerDocument;
677 | 
678 |           // If we're in an iframe, elements are considered top by default
679 |           if (doc !== window.document) {
680 |             return true;
681 |           }
682 | 
683 |           // For shadow DOM, we need to check within its own root context
684 |           const shadowRoot = element.getRootNode();
685 |           if (shadowRoot instanceof ShadowRoot) {
686 |             const rect = element.getBoundingClientRect();
687 |             const point = {
688 |               x: rect.left + rect.width / 2,
689 |               y: rect.top + rect.height / 2,
690 |             };
691 | 
692 |             try {
693 |               // Use shadow root's elementFromPoint to check within shadow DOM context
694 |               const topEl = shadowRoot.elementFromPoint(point.x, point.y);
695 | 
696 |               if (!topEl) return false;
697 | 
698 |               // Check if the element or any of its parents match our target element
699 |               let current = topEl;
700 |               // @ts-ignore // TODO: fix this
701 |               while (current && current !== shadowRoot) {
702 |                 if (current === element) return true;
703 |                 current = current.parentElement as Element;
704 |               }
705 |               return false;
706 |             } catch (e) {
707 |               return true; // If we can't determine, consider it visible
708 |             }
709 |           }
710 | 
711 |           const rect = element.getBoundingClientRect();
712 | 
713 |           // If viewportExpansion is -1, check if element is the top one at its position
714 |           if (viewportExpansion === -1) {
715 |             return true; // Consider all elements as top elements when expansion is -1
716 |           }
717 | 
718 |           // Calculate expanded viewport boundaries including scroll position
719 |           const scrollX = window.scrollX;
720 |           const scrollY = window.scrollY;
721 |           const viewportTop = -viewportExpansion + scrollY;
722 |           const viewportLeft = -viewportExpansion + scrollX;
723 |           const viewportBottom =
724 |             window.innerHeight + viewportExpansion + scrollY;
725 |           const viewportRight = window.innerWidth + viewportExpansion + scrollX;
726 | 
727 |           // Get absolute element position
728 |           const absTop = rect.top + scrollY;
729 |           const absLeft = rect.left + scrollX;
730 |           const absBottom = rect.bottom + scrollY;
731 |           const absRight = rect.right + scrollX;
732 | 
733 |           // Skip if element is completely outside expanded viewport
734 |           if (
735 |             absBottom < viewportTop ||
736 |             absTop > viewportBottom ||
737 |             absRight < viewportLeft ||
738 |             absLeft > viewportRight
739 |           ) {
740 |             return false;
741 |           }
742 | 
743 |           // For elements within expanded viewport, check if they're the top element
744 |           try {
745 |             const centerX = rect.left + rect.width / 2;
746 |             const centerY = rect.top + rect.height / 2;
747 | 
748 |             // Only clamp the point if it's outside the actual document
749 |             const point = {
750 |               x: centerX,
751 |               y: centerY,
752 |             };
753 | 
754 |             if (
755 |               point.x < 0 ||
756 |               point.x >= window.innerWidth ||
757 |               point.y < 0 ||
758 |               point.y >= window.innerHeight
759 |             ) {
760 |               return true; // Consider elements with center outside viewport as visible
761 |             }
762 | 
763 |             const topEl = document.elementFromPoint(point.x, point.y);
764 |             if (!topEl) return false;
765 | 
766 |             let current = topEl;
767 |             while (current && current !== document.documentElement) {
768 |               if (current === element) return true;
769 |               // @ts-ignore // TODO: fix this
770 |               current = current.parentElement;
771 |             }
772 |             return false;
773 |           } catch (e) {
774 |             return true;
775 |           }
776 |         }
777 | 
778 |         function isTextNodeVisible(textNode: Node) {
779 |           const range = document.createRange();
780 |           range.selectNodeContents(textNode);
781 |           const rect = range.getBoundingClientRect();
782 | 
783 |           return (
784 |             rect.width !== 0 &&
785 |             rect.height !== 0 &&
786 |             rect.top >= 0 &&
787 |             rect.top <= window.innerHeight &&
788 |             textNode.parentElement?.checkVisibility({
789 |               checkOpacity: true,
790 |               checkVisibilityCSS: true,
791 |             })
792 |           );
793 |         }
794 | 
795 |         function getCoordinates(element: Element) {
796 |           const rect = element.getBoundingClientRect();
797 | 
798 |           if (!rect) return null;
799 | 
800 |           const centerX = rect.left + rect.width / 2;
801 |           const centerY = rect.top + rect.height / 2;
802 | 
803 |           if (isNaN(centerX) || isNaN(centerY)) return null;
804 | 
805 |           if (centerX <= 0 || centerY <= 0) return null;
806 | 
807 |           return {
808 |             x: centerX,
809 |             y: centerY,
810 |           };
811 |         }
812 | 
813 |         function buildDomTree(
814 |           node: Element,
815 |           parentIframe: HTMLIFrameElement | null = null,
816 |         ): DomNode | null {
817 |           if (!node) return null;
818 | 
819 |           // Special case for text nodes
820 |           if (node.nodeType === Node.TEXT_NODE) {
821 |             const textContent = node.textContent?.trim() ?? '';
822 | 
823 |             if (textContent && isTextNodeVisible(node)) {
824 |               return {
825 |                 type: 'TEXT_NODE',
826 |                 text: textContent,
827 |                 isVisible: true,
828 |               };
829 |             }
830 |             return null;
831 |           }
832 | 
833 |           if (node.nodeType === Node.ELEMENT_NODE && !isElementAccepted(node)) {
834 |             return null;
835 |           }
836 | 
837 |           const nodeData: Partial<ElementNode> = {
838 |             tagName: node.tagName ? node.tagName.toLowerCase() : null,
839 |             attributes: {},
840 |             xpath:
841 |               node.nodeType === Node.ELEMENT_NODE
842 |                 ? getXPathTree(node, true)
843 |                 : null,
844 |             children: [],
845 |           };
846 | 
847 |           if (node.nodeType === Node.ELEMENT_NODE && node.attributes) {
848 |             const attributeNames = node.getAttributeNames?.() || [];
849 |             if (!nodeData.attributes) {
850 |               nodeData.attributes = {};
851 |             }
852 |             for (const name of attributeNames) {
853 |               nodeData.attributes[name] = node.getAttribute(name) ?? '';
854 |             }
855 |           }
856 | 
857 |           if (node.nodeType === Node.ELEMENT_NODE) {
858 |             const isInteractive = isInteractiveElement(node as HTMLElement);
859 |             const isVisible = isElementVisible(node as HTMLElement);
860 |             const coordinates = getCoordinates(node);
861 |             const isTop = isTopElement(node);
862 | 
863 |             nodeData.isInteractive = isInteractive;
864 |             nodeData.isVisible = isVisible;
865 |             nodeData.isTopElement = isTop;
866 |             nodeData.text = '';
867 |             nodeData.coordinates = coordinates;
868 | 
869 |             if (isInteractive && isVisible && isTop) {
870 |               nodeData.highlightIndex = highlightIndex++;
871 |               if (doHighlightElements) {
872 |                 if (focusHighlightIndex >= 0) {
873 |                   if (focusHighlightIndex === nodeData.highlightIndex) {
874 |                     highlightElement(
875 |                       node,
876 |                       nodeData.highlightIndex,
877 |                       parentIframe,
878 |                     );
879 |                   }
880 |                 } else {
881 |                   highlightElement(node, nodeData.highlightIndex, parentIframe);
882 |                 }
883 |               }
884 |             }
885 |           }
886 | 
887 |           // Only add shadowRoot field if it exists
888 |           if (node.shadowRoot) {
889 |             nodeData.shadowRoot = true;
890 |           }
891 | 
892 |           // Handle shadow DOM
893 |           if (node.shadowRoot) {
894 |             const shadowChildren = Array.from(node.shadowRoot.childNodes).map(
895 |               (child) => buildDomTree(child as Element, parentIframe),
896 |             );
897 |             nodeData.children?.push(...shadowChildren);
898 |           }
899 | 
900 |           // Handle iframes
901 |           if (node.tagName === 'IFRAME') {
902 |             try {
903 |               const iframeDoc =
904 |                 (node as HTMLIFrameElement).contentDocument ||
905 |                 (node as HTMLIFrameElement).contentWindow?.document;
906 | 
907 |               if (iframeDoc) {
908 |                 const iframeChildren = Array.from(
909 |                   iframeDoc.body.childNodes,
910 |                 ).map((child) =>
911 |                   buildDomTree(child as Element, node as HTMLIFrameElement),
912 |                 );
913 |                 nodeData.children?.push(...iframeChildren);
914 |               }
915 |             } catch (e) {
916 |               console.warn('Unable to access iframe:', node);
917 |             }
918 |           } else {
919 |             const children = Array.from(node.childNodes).map((child) =>
920 |               buildDomTree(child as Element, parentIframe),
921 |             );
922 |             nodeData.children?.push(...children);
923 |           }
924 | 
925 |           return nodeData as DomNode;
926 |         }
927 | 
928 |         const domTree = buildDomTree(document.body);
929 | 
930 |         return domTree;
931 |       }, withHighlight);
932 | 
933 |       return domState;
934 |     } catch (error: unknown) {
935 |       console.log('error', error);
936 |       return null;
937 |     }
938 |   }
939 | }
940 | 


--------------------------------------------------------------------------------
/src/infra/services/in-memory-file-system.ts:
--------------------------------------------------------------------------------
 1 | import { FileSystem } from "@/core/interfaces/file-system.interface";
 2 | 
 3 | export class InMemoryFileSystem implements FileSystem {
 4 |   constructor() {}
 5 | 
 6 |   saveFile(path: string, data: Buffer): Promise<string> {
 7 |     return this.saveScreenshot(path, data);
 8 |   }
 9 | 
10 |   saveScreenshot(filename: string, data: Buffer): Promise<string> {
11 |     return new Promise((resolve, reject) => {
12 |       try {
13 |         const base64Data = data.toString("base64");
14 |         const url = `data:image/png;base64,${base64Data}`;
15 |         resolve(url);
16 |       } catch (error) {
17 |         reject(error);
18 |       }
19 |     });
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/infra/services/local-file-system.ts:
--------------------------------------------------------------------------------
 1 | import { FileSystem } from '@/core/interfaces/file-system.interface';
 2 | 
 3 | export class LocalFileSystem implements FileSystem {
 4 |   constructor() {}
 5 | 
 6 |   bufferFromStringUrl(encodedScreenshot: string): Buffer {
 7 |     const base64Data = encodedScreenshot.replace(
 8 |       /^data:image\/png;base64,/,
 9 |       '',
10 |     );
11 |     return Buffer.from(base64Data, 'base64');
12 |   }
13 | 
14 |   saveFile(path: string, data: Buffer): Promise<string> {
15 |     return this.saveScreenshot(path, data);
16 |   }
17 | 
18 |   saveScreenshot(filename: string, data: Buffer): Promise<string> {
19 |     return new Promise((resolve, reject) => {
20 |       const fs = require('fs');
21 |       const path = require('path');
22 | 
23 |       const filePath = path.join('/tmp/screenshots', filename);
24 | 
25 |       fs.writeFile(filePath, data, (err: any) => {
26 |         if (err) {
27 |           reject(err);
28 |         } else {
29 |           resolve(filePath);
30 |         }
31 |       });
32 |     });
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/infra/services/playwright-screenshotter.ts:
--------------------------------------------------------------------------------
 1 | import { FileSystem } from "@/core/interfaces/file-system.interface";
 2 | import { Screenshotter } from "@/core/interfaces/screenshotter.interface";
 3 | import { Page } from "playwright";
 4 | 
 5 | export class PlaywrightScreenshoter implements Screenshotter {
 6 |   constructor(private readonly fileSystem: FileSystem) {}
 7 | 
 8 |   async takeScreenshot(page: Page) {
 9 |     const screenshot = await page.screenshot({
10 |       type: "png",
11 |       fullPage: false,
12 |     });
13 | 
14 |     const url = new URL(page.url());
15 |     const hostname = url.hostname.replace(/[:/]/g, "_");
16 |     const segments = url.pathname
17 |       .split("/")
18 |       .filter((segment) => segment)
19 |       .join("_");
20 | 
21 |     const key = `${hostname}_${segments}_${crypto.randomUUID()}`;
22 | 
23 |     const signedUrl = await this.fileSystem.saveScreenshot(key, screenshot);
24 | 
25 |     return signedUrl;
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/init-openator.ts:
--------------------------------------------------------------------------------
 1 | import { FeedbackAgent } from './core/agents/feedback-agent/feedback-agent';
 2 | import { Openator } from './core/agents/openator/openator';
 3 | import {
 4 |   initSummarizer,
 5 |   initSummarizeTask,
 6 | } from './core/agents/summarize-agent/summarize-agent';
 7 | import { EventBus } from './core/services/realtime-reporter';
 8 | import { TaskManagerService } from './core/services/task-manager-service';
 9 | import { LLM, Variable } from './core/types';
10 | import { ChromiumBrowser } from './infra/services/chromium-browser';
11 | import { ConsoleReporter } from './infra/services/console-reporter';
12 | import { DomService } from './infra/services/dom-service';
13 | import { InMemoryFileSystem } from './infra/services/in-memory-file-system';
14 | import { PlaywrightScreenshoter } from './infra/services/playwright-screenshotter';
15 | 
16 | export type InitOpenatorConfig = {
17 |   /**
18 |    * The LLM to use.
19 |    * @default OpenAI4o
20 |    */
21 |   llm: LLM;
22 |   /**
23 |    * Whether to run the browser in headless mode.
24 |    * @default false
25 |    */
26 |   headless: boolean;
27 |   /**
28 |    * Variables can be used to pass sensitive information to the Openator.
29 |    * Every variable will be interpolated during the runtime from `{{variable_name}}` to the actual value.
30 |    * Secret variables will be masked in the console output and never sent to the LLM.
31 |    * Normal variables will be sent to the LLM and will be visible in the console output.
32 |    *
33 |    * @default []
34 |    * @example ```
35 |    * [ new Variable({ name: 'password', value: process.env.PASSWORD, isSecret: true }) ]
36 |    * ```
37 |    */
38 |   variables?: Variable[];
39 | };
40 | 
41 | export const initOpenator = (config: InitOpenatorConfig): Openator => {
42 |   const fileSystem = new InMemoryFileSystem();
43 |   const screenshotService = new PlaywrightScreenshoter(fileSystem);
44 | 
45 |   const browser = new ChromiumBrowser({
46 |     headless: config.headless,
47 |   });
48 | 
49 |   const llm = config.llm;
50 | 
51 |   const eventBus = new EventBus();
52 | 
53 |   const domService = new DomService(screenshotService, browser, eventBus);
54 |   const feedbackAgent = new FeedbackAgent(llm);
55 |   const taskManager = new TaskManagerService();
56 | 
57 |   const summarizer = initSummarizer(llm);
58 |   const summarizeTask = initSummarizeTask();
59 | 
60 |   return new Openator({
61 |     variables: config.variables ?? [],
62 |     taskManager,
63 |     domService,
64 |     browserService: browser,
65 |     llmService: llm,
66 |     feedbackAgent,
67 |     reporter: new ConsoleReporter('Openator'),
68 |     summarizer,
69 |     summarizeTask,
70 |   });
71 | };
72 | 


--------------------------------------------------------------------------------
/src/models/chat-google.ts:
--------------------------------------------------------------------------------
 1 | import { ChatGoogleGenerativeAI as ChatModel } from '@langchain/google-genai';
 2 | import { BaseMessage } from '@langchain/core/messages';
 3 | import { JsonOutputParser } from '@langchain/core/output_parsers';
 4 | import { LLM } from '@/core/types';
 5 | 
 6 | export type ChatGoogleGenAIConfig = {
 7 |   /**
 8 |    * The model to use.
 9 |    * @default gemini-2.0-flash
10 |    */
11 |   model?: 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-1.5-flash';
12 |   /**
13 |    * The API key to use.
14 |    */
15 |   apiKey: string;
16 |   /**
17 |    * @default 0
18 |    */
19 |   temperature?: number;
20 |   /**
21 |    * The maximum number of retries.
22 |    * This is usefull when you have a low quota such as Tier 1 or 2.
23 |    * @default 6
24 |    */
25 |   maxRetries?: number;
26 |   /**
27 |    * The maximum number of concurrent requests.
28 |    * Set it to a low value if you have a low quota such as Tier 1 or 2.
29 |    * @default 2
30 |    */
31 |   maxConcurrency?: number;
32 | };
33 | 
34 | const DEFAULT_CONFIG = {
35 |   model: 'gemini-2.0-flash',
36 |   temperature: 0,
37 |   maxRetries: 6,
38 |   maxConcurrency: 2,
39 | } as const;
40 | 
41 | export class ChatGoogleGenAI implements LLM {
42 |   private model: ChatModel;
43 | 
44 |   constructor(config: ChatGoogleGenAIConfig) {
45 |     this.model = new ChatModel({
46 |       model: config.model ?? DEFAULT_CONFIG.model,
47 |       temperature: config.temperature ?? DEFAULT_CONFIG.temperature,
48 |       maxRetries: config.maxRetries ?? DEFAULT_CONFIG.maxRetries,
49 |       maxConcurrency: config.maxConcurrency ?? DEFAULT_CONFIG.maxConcurrency,
50 |       apiKey: config.apiKey,
51 |     });
52 |   }
53 | 
54 |   async invokeAndParse<T extends Record<string, any>>(
55 |     messages: BaseMessage[],
56 |     parser: JsonOutputParser<T>,
57 |   ): Promise<T> {
58 |     const response = await this.model.invoke(messages);
59 | 
60 |     return parser.invoke(response);
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/models/chat-ollama.ts:
--------------------------------------------------------------------------------
 1 | import { ChatOllama as ChatModel } from '@langchain/ollama';
 2 | import { BaseMessage } from '@langchain/core/messages';
 3 | import { JsonOutputParser } from '@langchain/core/output_parsers';
 4 | import { LLM } from '@/core/types';
 5 | 
 6 | export type ChatOllamaConfig = {
 7 |   /**
 8 |    * The model to use.
 9 |    */
10 |   model: 'qwen2.5' | 'llama3.2';
11 |   /**
12 |    * The base URL of the Ollama server.
13 |    * @default http://localhost:11434
14 |    */
15 |   baseUrl?: string;
16 |   /**
17 |    * The temperature to use. We recommend setting this to 0 for consistency.
18 |    * @default 0
19 |    */
20 |   temperature?: number;
21 |   /**
22 |    * The maximum number of retries.
23 |    * This is usefull when you have a low quota such as Tier 1 or 2.
24 |    * @default 6
25 |    */
26 |   maxRetries?: number;
27 |   /**
28 |    * The maximum number of concurrent requests.
29 |    * Set it to a low value if you have a low quota such as Tier 1 or 2.
30 |    * @default 2
31 |    */
32 |   maxConcurrency?: number;
33 | };
34 | 
35 | const DEFAULT_CONFIG = {
36 |   model: 'qwen2.5',
37 |   baseUrl: 'http://localhost:11434',
38 |   temperature: 0,
39 |   maxRetries: 6,
40 |   maxConcurrency: 2,
41 | } as const;
42 | 
43 | export class ChatOllama implements LLM {
44 |   private model: ChatModel;
45 | 
46 |   constructor(config: ChatOllamaConfig) {
47 |     this.model = new ChatModel({
48 |       model: config.model ?? DEFAULT_CONFIG.model,
49 |       temperature: config.temperature ?? DEFAULT_CONFIG.temperature,
50 |       maxRetries: config.maxRetries ?? DEFAULT_CONFIG.maxRetries,
51 |       maxConcurrency: config.maxConcurrency ?? DEFAULT_CONFIG.maxConcurrency,
52 |       baseUrl: config.baseUrl ?? DEFAULT_CONFIG.baseUrl,
53 |       format: 'json',
54 |     });
55 |   }
56 | 
57 |   async invokeAndParse<T extends Record<string, any>>(
58 |     messages: BaseMessage[],
59 |     parser: JsonOutputParser<T>,
60 |   ): Promise<T> {
61 |     const response = await this.model.invoke(messages);
62 | 
63 |     return parser.invoke(response);
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/models/chat-openai.ts:
--------------------------------------------------------------------------------
 1 | import { ChatOpenAI as LChatOpenAI } from '@langchain/openai';
 2 | import { BaseMessage } from '@langchain/core/messages';
 3 | import { JsonOutputParser } from '@langchain/core/output_parsers';
 4 | import { LLM } from '@/core/types';
 5 | 
 6 | export type ChatOpenAIConfig = {
 7 |   /**
 8 |    * The model to use.
 9 |    * @default gpt-4o
10 |    */
11 |   model?: 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4-turbo';
12 |   /**
13 |    * The temperature to use. We recommend setting this to 0 for consistency.
14 |    * @default 0
15 |    */
16 |   temperature?: number;
17 |   /**
18 |    * The maximum number of retries.
19 |    * This is usefull when you have a low quota such as Tier 1 or 2.
20 |    * @default 6
21 |    */
22 |   maxRetries?: number;
23 |   /**
24 |    * The maximum number of concurrent requests.
25 |    * Set it to a low value if you have a low quota such as Tier 1 or 2.
26 |    * @default 2
27 |    */
28 |   maxConcurrency?: number;
29 |   /**
30 |    * The OpenAI API key to use
31 |    */
32 |   apiKey: string;
33 | };
34 | 
35 | const DEFAULT_CONFIG = {
36 |   model: 'gpt-4o',
37 |   temperature: 0,
38 |   maxRetries: 6,
39 |   maxConcurrency: 2,
40 | } as const;
41 | 
42 | export class ChatOpenAI implements LLM {
43 |   private model: LChatOpenAI;
44 | 
45 |   constructor(config: ChatOpenAIConfig) {
46 |     this.model = new LChatOpenAI({
47 |       model: config.model ?? DEFAULT_CONFIG.model,
48 |       temperature: config.temperature ?? DEFAULT_CONFIG.temperature,
49 |       openAIApiKey: config.apiKey,
50 |       maxRetries: config.maxRetries ?? DEFAULT_CONFIG.maxRetries,
51 |       maxConcurrency: config.maxConcurrency ?? DEFAULT_CONFIG.maxConcurrency,
52 |     });
53 |   }
54 | 
55 |   async invokeAndParse<T extends Record<string, any>>(
56 |     messages: BaseMessage[],
57 |     parser: JsonOutputParser<T>,
58 |   ): Promise<T> {
59 |     const response = await this.model.invoke(messages, {
60 |       response_format: { type: 'json_object' },
61 |     });
62 | 
63 |     return parser.invoke(response);
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/tsconfig.build.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "./tsconfig.json",
3 |   "exclude": ["node_modules", "test", "dist", "**/*spec.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "module": "ESNext",
 4 |     "moduleResolution": "Node",
 5 |     "declaration": true,
 6 |     "removeComments": true,
 7 |     "noImplicitAny": true,
 8 |     "strictNullChecks": true,
 9 |     "strictFunctionTypes": true,
10 |     "strictBindCallApply": true,
11 |     "strictPropertyInitialization": false,
12 |     "noImplicitThis": true,
13 |     "emitDecoratorMetadata": true,
14 |     "experimentalDecorators": true,
15 |     "allowSyntheticDefaultImports": true,
16 |     "emitDeclarationOnly": true,
17 |     "target": "ES2021",
18 |     "sourceMap": true,
19 |     "outDir": "./dist",
20 |     "baseUrl": "./",
21 |     "paths": {
22 |       "@/*": ["src/*"]
23 |     },
24 |     "incremental": true,
25 |     "skipLibCheck": true,
26 |     "forceConsistentCasingInFileNames": false,
27 |     "noFallthroughCasesInSwitch": false
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------