58 |
59 |
60 |
61 | Camano, Washington On the beach Jun 1 – 6 $910 per night
62 |
63 |
64 |
72 |
73 |
74 | `;
75 |
76 | // test('templatize', () => {
77 | // const out = templatize(sample);
78 | // console.log('in length', sample.length);
79 | // console.log('out length', out.length);
80 | // console.log(out);
81 | // expect(templatize(sample)).toBe(``);
82 | // });
83 |
84 | test("templatize", () => {
85 | // const dom = new DOMParser().parseFromString(sample, 'text/html');
86 |
87 | const out = templatize(sample);
88 | // const out = templatize(dom.documentElement);
89 | console.log(out);
90 | });
91 |
--------------------------------------------------------------------------------
/src/state/store.ts:
--------------------------------------------------------------------------------
1 | import { merge } from "lodash";
2 | import { create, StateCreator } from "zustand";
3 | import { immer } from "zustand/middleware/immer";
4 | import { createJSONStorage, devtools, persist } from "zustand/middleware";
5 | import { createCurrentTaskSlice, CurrentTaskSlice } from "./currentTask";
6 | import { createUiSlice, UiSlice } from "./ui";
7 | import { createSettingsSlice, SettingsSlice } from "./settings";
8 | import { findBestMatchingModel } from "../helpers/aiSdkUtils";
9 |
10 | export type StoreType = {
11 | currentTask: CurrentTaskSlice;
12 | ui: UiSlice;
13 | settings: SettingsSlice;
14 | };
15 |
16 | export type MyStateCreator
= StateCreator<
17 | StoreType,
18 | [["zustand/immer", never]],
19 | [],
20 | T
21 | >;
22 |
23 | export const useAppState = create()(
24 | persist(
25 | immer(
26 | devtools((...a) => ({
27 | currentTask: createCurrentTaskSlice(...a),
28 | ui: createUiSlice(...a),
29 | settings: createSettingsSlice(...a),
30 | })),
31 | ),
32 | {
33 | name: "app-state",
34 | storage: createJSONStorage(() => localStorage),
35 | partialize: (state) => ({
36 | // Stuff we want to persist
37 | ui: {
38 | instructions: state.ui.instructions,
39 | },
40 | settings: {
41 | openAIKey: state.settings.openAIKey,
42 | anthropicKey: state.settings.anthropicKey,
43 | geminiKey: state.settings.geminiKey,
44 | openAIBaseUrl: state.settings.openAIBaseUrl,
45 | anthropicBaseUrl: state.settings.anthropicBaseUrl,
46 | agentMode: state.settings.agentMode,
47 | selectedModel: state.settings.selectedModel,
48 | voiceMode: state.settings.voiceMode,
49 | customKnowledgeBase: state.settings.customKnowledgeBase,
50 | },
51 | }),
52 | merge: (persistedState, currentState) => {
53 | const result = merge(currentState, persistedState);
54 | result.settings.selectedModel = findBestMatchingModel(
55 | result.settings.selectedModel,
56 | result.settings.agentMode,
57 | result.settings.openAIKey,
58 | result.settings.anthropicKey,
59 | result.settings.geminiKey,
60 | );
61 | return result;
62 | },
63 | },
64 | ),
65 | );
66 |
67 | // @ts-expect-error used for debugging
68 | window.getState = useAppState.getState;
69 |
--------------------------------------------------------------------------------
/src/shared/storages/base.ts:
--------------------------------------------------------------------------------
1 | export enum StorageType {
2 | Local = "local",
3 | Sync = "sync",
4 | Managed = "managed",
5 | Session = "session",
6 | }
7 |
8 | type ValueOrUpdate = D | ((prev: D) => Promise | D);
9 |
10 | export type BaseStorage = {
11 | get: () => Promise;
12 | set: (value: ValueOrUpdate) => Promise;
13 | getSnapshot: () => D | null;
14 | subscribe: (listener: () => void) => () => void;
15 | };
16 |
17 | export function createStorage(
18 | key: string,
19 | fallback: D,
20 | config?: { storageType?: StorageType },
21 | ): BaseStorage {
22 | let cache: D | null = null;
23 | let listeners: Array<() => void> = [];
24 | const storageType = config?.storageType ?? StorageType.Local;
25 |
26 | const _getDataFromStorage = async (): Promise => {
27 | if (chrome.storage[storageType] === undefined) {
28 | throw new Error(
29 | `Check your storage permission into manifest.json: ${storageType} is not defined`,
30 | );
31 | }
32 | const value = await chrome.storage[storageType].get([key]);
33 | return value[key] ?? fallback;
34 | };
35 |
36 | const _emitChange = () => {
37 | listeners.forEach((listener) => listener());
38 | };
39 |
40 | const set = async (valueOrUpdate: ValueOrUpdate) => {
41 | if (typeof valueOrUpdate === "function") {
42 | // eslint-disable-next-line no-prototype-builtins
43 | if (valueOrUpdate.hasOwnProperty("then")) {
44 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment
45 | // @ts-ignore
46 | cache = await valueOrUpdate(cache);
47 | } else {
48 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment
49 | // @ts-ignore
50 | cache = valueOrUpdate(cache);
51 | }
52 | } else {
53 | cache = valueOrUpdate;
54 | }
55 | await chrome.storage[storageType].set({ [key]: cache });
56 | _emitChange();
57 | };
58 |
59 | const subscribe = (listener: () => void) => {
60 | listeners = [...listeners, listener];
61 | return () => {
62 | listeners = listeners.filter((l) => l !== listener);
63 | };
64 | };
65 |
66 | const getSnapshot = () => {
67 | return cache;
68 | };
69 |
70 | _getDataFromStorage().then((data) => {
71 | cache = data;
72 | _emitChange();
73 | });
74 |
75 | return {
76 | get: _getDataFromStorage,
77 | set,
78 | getSnapshot,
79 | subscribe,
80 | };
81 | }
82 |
--------------------------------------------------------------------------------
/src/pages/content/domOperations.ts:
--------------------------------------------------------------------------------
1 | // The content script runs inside each page this extension is enabled on
2 | // Do NOT import from here from outside of content script (other than types).
3 |
4 | import getAnnotatedDOM, { getUniqueElementSelectorId } from "./getAnnotatedDOM";
5 | import { copyToClipboard } from "./copyToClipboard";
6 | import attachFile from "./attachFile";
7 | import { drawLabels, removeLabels } from "./drawLabels";
8 | import ripple from "./ripple";
9 | import { getDataFromRenderedMarkdown } from "./reverseMarkdown";
10 | import getViewportPercentage from "./getViewportPercentage";
11 | import { injectMicrophonePermissionIframe } from "./permission";
12 |
13 | function clickWithSelector(selector: string) {
14 | const element = document.querySelector(selector) as HTMLElement;
15 | // get center coordinates of the element
16 | const { x, y } = element.getBoundingClientRect();
17 | const centerX = x + element.offsetWidth / 2;
18 | const centerY = y + element.offsetHeight / 2;
19 | ripple(centerX, centerY);
20 | if (element) {
21 | element.click();
22 | }
23 | }
24 |
25 | export const rpcMethods = {
26 | clickWithSelector,
27 | getAnnotatedDOM,
28 | getUniqueElementSelectorId,
29 | ripple,
30 | copyToClipboard,
31 | attachFile,
32 | drawLabels,
33 | removeLabels,
34 | getDataFromRenderedMarkdown,
35 | getViewportPercentage,
36 | injectMicrophonePermissionIframe,
37 | } as const;
38 |
39 | export type RPCMethods = typeof rpcMethods;
40 | type MethodName = keyof RPCMethods;
41 |
42 | export type RPCMessage = {
43 | [K in MethodName]: {
44 | method: K;
45 | payload: Parameters;
46 | };
47 | }[MethodName];
48 |
49 | // This function should run in the content script
50 | export const initializeRPC = () => {
51 | chrome.runtime.onMessage.addListener(
52 | (message: RPCMessage, sender, sendResponse): true | undefined => {
53 | const { method, payload } = message;
54 | console.log("RPC listener", method);
55 | if (method in rpcMethods) {
56 | // @ts-expect-error - we know this is valid (see pageRPC)
57 | const resp = rpcMethods[method as keyof RPCMethods](...payload);
58 | if (resp instanceof Promise) {
59 | resp.then((resolvedResp) => {
60 | sendResponse(resolvedResp);
61 | });
62 | } else {
63 | sendResponse(resp);
64 | }
65 | return true;
66 | }
67 | },
68 | );
69 | };
70 |
--------------------------------------------------------------------------------
/src/helpers/utils.ts:
--------------------------------------------------------------------------------
1 | export async function sleep(ms: number) {
2 | return new Promise((resolve) => setTimeout(resolve, ms));
3 | }
4 |
5 | export function truthyFilter(value: T | null | undefined): value is T {
6 | return Boolean(value);
7 | }
8 |
9 | export async function waitFor(
10 | predicate: () => Promise,
11 | interval: number,
12 | _maxChecks: number,
13 | rejectOnTimeout = true,
14 | ): Promise {
15 | // special case for 0 maxChecks (wait forever)
16 | const maxChecks = _maxChecks === 0 ? Infinity : _maxChecks;
17 | let checkCount = 0;
18 | return new Promise((resolve, reject) => {
19 | const intervalId = setInterval(async () => {
20 | if (await predicate()) {
21 | clearInterval(intervalId);
22 | resolve();
23 | } else {
24 | checkCount++;
25 | if (checkCount >= maxChecks) {
26 | clearInterval(intervalId);
27 | if (rejectOnTimeout) {
28 | reject(new Error("Timed out waiting for condition"));
29 | } else {
30 | resolve();
31 | }
32 | }
33 | }
34 | }, interval);
35 | });
36 | }
37 |
38 | export async function waitTillStable(
39 | getSize: () => Promise,
40 | interval: number,
41 | timeout: number,
42 | rejectOnTimeout = false, // default to assuming stable after timeout
43 | ): Promise {
44 | let lastSize = 0;
45 | let countStableSizeIterations = 0;
46 | const minStableSizeIterations = 3;
47 |
48 | return waitFor(
49 | async () => {
50 | const currentSize = await getSize();
51 |
52 | console.log("last: ", lastSize, " <> curr: ", currentSize);
53 |
54 | if (lastSize != 0 && currentSize === lastSize) {
55 | countStableSizeIterations++;
56 | } else {
57 | countStableSizeIterations = 0; //reset the counter
58 | }
59 |
60 | if (countStableSizeIterations >= minStableSizeIterations) {
61 | console.log("Size stable! Assume fully rendered..");
62 | return true;
63 | }
64 |
65 | lastSize = currentSize;
66 | return false;
67 | },
68 | interval,
69 | timeout / interval,
70 | rejectOnTimeout,
71 | );
72 | }
73 |
74 | export function enumKeys(
75 | obj: O,
76 | ): K[] {
77 | return Object.keys(obj) as K[];
78 | }
79 |
80 | export function enumValues(obj: O): O[keyof O][] {
81 | return enumKeys(obj).map((key) => obj[key]);
82 | }
83 |
--------------------------------------------------------------------------------
/src/helpers/errorChecker.ts:
--------------------------------------------------------------------------------
1 | import Anthropic from "@anthropic-ai/sdk";
2 | import OpenAI from "openai";
3 | import { debugMode } from "../constants";
4 |
5 | // returns true if the error is recoverable by retrying the query
6 | export default function errorChecker(
7 | err: Error,
8 | notifyError?: (errMsg: string) => void,
9 | ): boolean {
10 | const log = (msg: string, e: Error) => {
11 | if (debugMode) {
12 | console.error(msg, e);
13 | }
14 | if (notifyError) {
15 | notifyError(msg);
16 | }
17 | };
18 | if (err instanceof OpenAI.APIError) {
19 | if (err instanceof OpenAI.InternalServerError) {
20 | log(
21 | "There is a problem with the OpenAI API server. Please check its status page https://status.openai.com/ and try again later.",
22 | err,
23 | );
24 | return false;
25 | }
26 | if (
27 | err instanceof OpenAI.AuthenticationError ||
28 | err instanceof OpenAI.PermissionDeniedError
29 | ) {
30 | log("The OpenAI API key you provided might not be valid", err);
31 | return false;
32 | }
33 | if (err instanceof OpenAI.APIConnectionError) {
34 | log(
35 | "There is a problem with the network connection to the OpenAI API. Please check your network connection and try again later.",
36 | err,
37 | );
38 | return true;
39 | }
40 | // other API errors are not recoverable
41 | return false;
42 | } else if (err instanceof Anthropic.APIError) {
43 | if (err instanceof Anthropic.InternalServerError) {
44 | log(
45 | "There is a problem with the Anthropic API server. Please check its status page https://status.anthropic.com/ and try again later.",
46 | err,
47 | );
48 | return false;
49 | }
50 | if (
51 | err instanceof Anthropic.AuthenticationError ||
52 | err instanceof Anthropic.PermissionDeniedError
53 | ) {
54 | log("The Anthropic API key you provided might not be valid", err);
55 | return false;
56 | }
57 | if (err instanceof Anthropic.APIConnectionError) {
58 | log(
59 | "There is a problem with the network connection to the Anthropic API. Please check your network connection and try again later.",
60 | err,
61 | );
62 | return true;
63 | }
64 | // other API errors are not recoverable
65 | return false;
66 | }
67 | log("Error: " + err.message, err);
68 | // retry everything else (e.g. network errors, syntax error, timeout)
69 | return true;
70 | }
71 |
--------------------------------------------------------------------------------
/src/helpers/chromeDebugger.ts:
--------------------------------------------------------------------------------
1 | // Not sure why but this won't work properly if running inside a devtools panel
2 | // a lot of tabs are shown as attached to debugger when they are not
3 | // export async function isDebuggerAttached(tabId: number) {
4 | // const targets = await chrome.debugger.getTargets();
5 | // console.log(targets);
6 | // return targets.some((target) => target.tabId === tabId && target.attached);
7 | // }
8 |
9 | // maintain a set of attached tabs
10 | const attachedTabs = new Set();
11 | let detachListenerSetUp = false;
12 |
13 | function setUpDetachListener() {
14 | // only set up the listener once
15 | if (detachListenerSetUp) return;
16 | detachListenerSetUp = true;
17 | chrome.tabs.onRemoved.addListener((tabId) => {
18 | if (attachedTabs.has(tabId)) {
19 | attachedTabs.delete(tabId);
20 | }
21 | });
22 | chrome.debugger.onDetach.addListener((source) => {
23 | if (source.tabId) {
24 | attachedTabs.delete(source.tabId);
25 | }
26 | });
27 | }
28 |
29 | export async function attachDebugger(tabId: number) {
30 | setUpDetachListener();
31 | console.log("start attachDebugger");
32 | // const isAttached = await isDebuggerAttached(tabId);
33 | const isAttached = attachedTabs.has(tabId);
34 | if (isAttached) {
35 | console.log("already attached to debugger", tabId);
36 | return;
37 | }
38 | return new Promise((resolve, reject) => {
39 | return chrome.debugger.attach({ tabId }, "1.3", async () => {
40 | if (chrome.runtime.lastError) {
41 | console.error(
42 | "Failed to attach debugger:",
43 | chrome.runtime.lastError.message,
44 | );
45 | reject(
46 | new Error(
47 | `Failed to attach debugger: ${chrome.runtime.lastError.message}`,
48 | ),
49 | );
50 | } else {
51 | console.log("attached to debugger");
52 | await chrome.debugger.sendCommand({ tabId }, "DOM.enable");
53 | console.log("DOM enabled");
54 | await chrome.debugger.sendCommand({ tabId }, "Runtime.enable");
55 | console.log("Runtime enabled");
56 | attachedTabs.add(tabId);
57 | resolve();
58 | }
59 | });
60 | });
61 | }
62 |
63 | export async function detachDebugger(tabId: number) {
64 | attachedTabs.delete(tabId);
65 | chrome.debugger.detach({ tabId: tabId });
66 | }
67 |
68 | export async function detachAllDebuggers() {
69 | for (const tabId of attachedTabs) {
70 | await detachDebugger(tabId);
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/assets/img/logo.svg:
--------------------------------------------------------------------------------
1 |
8 |
--------------------------------------------------------------------------------
/src/helpers/simplifyDom.ts:
--------------------------------------------------------------------------------
1 | import { callRPC } from "./rpc/pageRPC";
2 | import { truthyFilter } from "./utils";
3 |
4 | export async function getSimplifiedDom() {
5 | const fullDom = await callRPC("getAnnotatedDOM", [], 3);
6 | if (!fullDom || typeof fullDom !== "string") return null;
7 |
8 | const dom = new DOMParser().parseFromString(fullDom, "text/html");
9 |
10 | // Mount the DOM to the document in an iframe so we can use getComputedStyle
11 |
12 | const interactiveElements: HTMLElement[] = [];
13 |
14 | const simplifiedDom = generateSimplifiedDom(
15 | dom.documentElement,
16 | interactiveElements,
17 | ) as HTMLElement;
18 |
19 | return simplifiedDom;
20 | }
21 |
22 | export function generateSimplifiedDom(
23 | element: ChildNode,
24 | interactiveElements: HTMLElement[],
25 | ): ChildNode | null {
26 | if (element.nodeType === Node.TEXT_NODE && element.textContent?.trim()) {
27 | return document.createTextNode(element.textContent + " ");
28 | }
29 |
30 | if (!(element instanceof HTMLElement || element instanceof SVGElement))
31 | return null;
32 |
33 | const isVisible = element.getAttribute("data-visible") === "true";
34 | if (!isVisible) return null;
35 |
36 | let children = Array.from(element.childNodes)
37 | .map((c) => generateSimplifiedDom(c, interactiveElements))
38 | .filter(truthyFilter);
39 |
40 | // Don't bother with text that is the direct child of the body
41 | if (element.tagName === "BODY")
42 | children = children.filter((c) => c.nodeType !== Node.TEXT_NODE);
43 |
44 | const interactive =
45 | element.getAttribute("data-interactive") === "true" ||
46 | element.hasAttribute("role");
47 | const hasLabel =
48 | element.hasAttribute("aria-label") || element.hasAttribute("name");
49 | const includeNode = interactive || hasLabel;
50 |
51 | if (!includeNode && children.length === 0) return null;
52 | if (!includeNode && children.length === 1) {
53 | return children[0];
54 | }
55 |
56 | const container = document.createElement(element.tagName);
57 |
58 | const allowedAttributes = [
59 | "aria-label",
60 | "data-name",
61 | "name",
62 | "type",
63 | "placeholder",
64 | "value",
65 | "role",
66 | "title",
67 | ];
68 |
69 | for (const attr of allowedAttributes) {
70 | if (element.hasAttribute(attr)) {
71 | container.setAttribute(attr, element.getAttribute(attr) as string);
72 | }
73 | }
74 | if (interactive) {
75 | interactiveElements.push(element as HTMLElement);
76 | container.setAttribute("id", element.getAttribute("data-id") as string);
77 | }
78 |
79 | children.forEach((child) => container.appendChild(child));
80 |
81 | return container;
82 | }
83 |
--------------------------------------------------------------------------------
/src/common/App.tsx:
--------------------------------------------------------------------------------
1 | import {
2 | Link,
3 | Box,
4 | ChakraProvider,
5 | Heading,
6 | HStack,
7 | IconButton,
8 | Icon,
9 | } from "@chakra-ui/react";
10 | import { SettingsIcon } from "@chakra-ui/icons";
11 | import { FaDiscord, FaGithub } from "react-icons/fa6";
12 | import { useState } from "react";
13 | import { useAppState } from "../state/store";
14 | import SetAPIKey from "./settings/SetAPIKey";
15 | import TaskUI from "./TaskUI";
16 | import Settings from "./Settings";
17 |
18 | const App = () => {
19 | const hasAPIKey = useAppState(
20 | (state) => state.settings.anthropicKey || state.settings.openAIKey,
21 | );
22 | const [inSettingsView, setInSettingsView] = useState(false);
23 |
24 | return (
25 |
26 |
27 |
28 |
29 | Fuji 🗻
30 |
31 | {hasAPIKey && (
32 | }
34 | onClick={() => setInSettingsView(true)}
35 | aria-label="open settings"
36 | />
37 | )}
38 |
39 | {hasAPIKey ? (
40 | inSettingsView ? (
41 |
42 | ) : (
43 |
44 | )
45 | ) : (
46 |
47 | )}
48 |
49 |
60 |
70 |
74 | About this project
75 |
76 |
77 | Leave Feedback
78 |
79 |
80 | GitHub
81 |
82 |
83 | Join Our Discord
84 |
85 |
86 |
87 |
88 | );
89 | };
90 |
91 | export default App;
92 |
--------------------------------------------------------------------------------
/src/pages/content/getAnnotatedDOM.ts:
--------------------------------------------------------------------------------
1 | import { TAXY_ELEMENT_SELECTOR } from "../../constants";
2 |
3 | function isInteractive(
4 | element: HTMLElement,
5 | style: CSSStyleDeclaration,
6 | ): boolean {
7 | return (
8 | element.tagName === "A" ||
9 | element.tagName === "INPUT" ||
10 | element.tagName === "BUTTON" ||
11 | element.tagName === "SELECT" ||
12 | element.tagName === "TEXTAREA" ||
13 | element.hasAttribute("onclick") ||
14 | element.hasAttribute("onmousedown") ||
15 | element.hasAttribute("onmouseup") ||
16 | element.hasAttribute("onkeydown") ||
17 | element.hasAttribute("onkeyup") ||
18 | style.cursor === "pointer"
19 | );
20 | }
21 |
22 | function isVisible(element: HTMLElement, style: CSSStyleDeclaration): boolean {
23 | return (
24 | style.opacity !== "" &&
25 | style.display !== "none" &&
26 | style.visibility !== "hidden" &&
27 | style.opacity !== "0" &&
28 | element.getAttribute("aria-hidden") !== "true"
29 | );
30 | }
31 |
32 | let currentElements: HTMLElement[] = [];
33 |
34 | function traverseDOM(node: Node, pageElements: HTMLElement[]) {
35 | const clonedNode = node.cloneNode(false) as Node;
36 |
37 | if (node.nodeType === Node.ELEMENT_NODE) {
38 | const element = node as HTMLElement;
39 | const style = window.getComputedStyle(element);
40 |
41 | const clonedElement = clonedNode as HTMLElement;
42 |
43 | pageElements.push(element);
44 | clonedElement.setAttribute("data-id", (pageElements.length - 1).toString());
45 | clonedElement.setAttribute(
46 | "data-interactive",
47 | isInteractive(element, style).toString(),
48 | );
49 | clonedElement.setAttribute(
50 | "data-visible",
51 | isVisible(element, style).toString(),
52 | );
53 | }
54 |
55 | node.childNodes.forEach((child) => {
56 | const result = traverseDOM(child, pageElements);
57 | clonedNode.appendChild(result.clonedDOM);
58 | });
59 |
60 | return {
61 | pageElements,
62 | clonedDOM: clonedNode,
63 | };
64 | }
65 |
66 | /**
67 | * getAnnotatedDom returns the pageElements array and a cloned DOM
68 | * with data-pe-idx attributes added to each element in the copy.
69 | */
70 | export default function getAnnotatedDOM() {
71 | currentElements = [];
72 | const result = traverseDOM(document.documentElement, currentElements);
73 | return (result.clonedDOM as HTMLElement).outerHTML;
74 | }
75 |
76 | // idempotent function to get a unique id for an element
77 | export function getUniqueElementSelectorId(id: number): string {
78 | const element = currentElements[id];
79 | // element may already have a unique id
80 | let uniqueId = element.getAttribute(TAXY_ELEMENT_SELECTOR);
81 | if (uniqueId) return uniqueId;
82 | uniqueId = Math.random().toString(36).substring(2, 10);
83 | element.setAttribute(TAXY_ELEMENT_SELECTOR, uniqueId);
84 | return uniqueId;
85 | }
86 |
--------------------------------------------------------------------------------
/CONTRIBUTING_KNOWLEDGE.md:
--------------------------------------------------------------------------------
1 | # Contributing to Prior Knowledge Augmentation
2 |
3 | Fuji-Web's Prior Knowledge Augmentation system is designed to enhance the tool's web navigation and task execution capabilities by leveraging a shared knowledge base. Contributions to this system help make Fuji-Web smarter and more capable.
4 |
5 | ## What Kind of Knowledge Are We Looking For?
6 |
7 | We seek knowledge that:
8 | - Enhances the understanding of specific web pages or actions, making task execution more reliable.
9 | - Includes insights into website layouts, common patterns, and user interfaces that are not immediately obvious.
10 | - Provides rules or annotations that help the AI better interpret the purpose of elements on a page.
11 |
12 | For example, if a website has two buttons with the same name but different functionalities, it's crucial to describe in notes how to distinguish between them.
13 |
14 | ## How to Add and Test New Knowledge
15 |
16 | We offer two convenient ways to add and test new knowledge in real-time:
17 | - Via Form: Within the Fuji-Web UI settings, navigate to the "Custom Knowledge Base" and select "Add Host Knowledge with Form" to input new knowledge using a user-friendly form.
18 | - Via JSON: If you prefer to work directly with JSON, choose "Add Host Knowledge with JSON" to enter your custom knowledge.
19 |
20 | You can test the new knowledge by running several tasks on the relevant web pages to ensure Fuji-Web behaves as expected.
21 |
22 | Once you've tested various knowledge inputs and are satisfied with the new knowledge's performance, you can then copy that knowledge into the db.json file.
23 |
24 | 1. Locate the `db.json` file in the `src/helpers/knowledge` directory of the Fuji-Web repository.
25 | 2. Add your knowledge in the JSON format, following the existing structure. `annotationRules` is optional.
26 | ```json
27 | {
28 | "example.com": {
29 | "rules": [
30 | {
31 | "regexes": ["regular expression to match pathname (not host name)"],
32 | "knowledge": {
33 | "notes": ["Your insights or notes about this page or action"],
34 | "annotationRules": [
35 | {
36 | "selector": "CSS selector",
37 | "allowInvisible": true,
38 | "allowCovered": true,
39 | "allowAriaHidden": true
40 | }
41 | ]
42 | }
43 | }
44 | ]
45 | }
46 | }
47 | ```
48 | 3. Please ensure your contributions are clear and concise, with `regexes` and `selector` accurately defined.
49 |
50 | ## Submitting Your Contribution
51 |
52 | Please check out the [Contribution Guide](CONTRIBUTING.md). Share your testing process and results in your pull request to help reviewers understand the impact of your contribution. Specifically, describe how the new knowledge help Fuji-Web achieve something it previously cannot perform correctly.
53 |
--------------------------------------------------------------------------------
/TROUBLESHOOTING.md:
--------------------------------------------------------------------------------
1 | # Troubleshooting Guide for Fuji-Web
2 |
3 | This guide aims to help you diagnose and resolve common problems you might encounter. If you're still facing difficulties after following these steps, please reach out to us through our [GitHub Issues](https://github.com/normal-computing/fuji-web/issues).
4 |
5 |
6 | ## Common Issues and Solutions
7 |
8 | ### Extension Not Loading
9 |
10 | **Symptom**: The Fuji-Web extension doesn't appear in your browser or won't load.
11 |
12 | **Solutions**:
13 | 1. Ensure your browser is compatible with Fuji-Web. Currently, Fuji-Web supports Chrome.
14 | 2. Verify that `Developer mode` is enabled in `chrome://extensions/`
15 | 3. (Only when you are building it from source) Make sure you've loaded the extension from the `dist` folder.
16 | 4. Restart your browser as this can resolve many loading issues.
17 |
18 | ### API Key Problems
19 |
20 | **Symptom**: Issues related to the OpenAI API key, such as authentication errors or features not working due to key issues. e.g., 404 The model `gpt-4-vision-preview` does not exist or you do not have access to it.
21 |
22 | **Solutions**:
23 | 1. Make sure you entered a valid OpenAI API key. Note that keys can expire, so verify if yours is still active.
24 | 2. Ensure that your OpenAI API key has the necessary permissions. Visit https://platform.openai.com/playground/chat to check your permissions. Lack of credits can also restrict access to certain models.
25 |
26 | ### Dom Actions Problems
27 |
28 | **Symptom**: Fuji-Web did not perform dom actions properly.
29 |
30 | **Solutions**:
31 | 1. Currently, Fuji-Web does not support running in the background. If you open a new tab or navigate away from the website that Fuji-Web is working on, some actions may fail. Please stay on the website where you execute Fuji-Web.
32 |
33 | ### Custom Knowledge Base Problems
34 |
35 | **Symptom**: After adding custom knowledge, Fuji-Web crashed or did not perform according to the new knowledge about the active tab.
36 |
37 | **Solutions**:
38 | 1. Currently, Fuji-Web only supports basic entry validation for custom knowledge. Make sure you entered the correct host name and correct regular expressions if using a custom URL Matching Pattern.
39 |
40 | ### Voice Mode Problems
41 |
42 | **Symptom**: Fuji-Web did not capture speech.
43 |
44 | **Solutions**:
45 | 1. Check if Fuji-Web has microphone access in the browser. When you turn on the voice mode in settings, the microphone access dialog should pop up in the browser; please select "allow".
46 | 2. If the dialog didn't pop up, right-click the Fuji-Web icon in the extensions group and select "View Web Permissions". Then select "Allow" for Microphone.
47 |
48 |
49 | ## Reporting New Issues
50 |
51 | If you encounter a problem not covered in this guide, please help us by reporting it. Provide as much detail as possible, including steps to reproduce the issue, browser version, and any error messages you receive. Your reports are invaluable in helping us improve Fuji-Web.
--------------------------------------------------------------------------------
/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from "vite";
2 | import react from "@vitejs/plugin-react";
3 | import path, { resolve } from "path";
4 | import makeManifest from "./utils/plugins/make-manifest";
5 | import customDynamicImport from "./utils/plugins/custom-dynamic-import";
6 | import addHmr from "./utils/plugins/add-hmr";
7 | import inlineVitePreloadScript from "./utils/plugins/inline-vite-preload-script";
8 |
9 | const rootDir = resolve(__dirname);
10 | const srcDir = resolve(rootDir, "src");
11 | const pagesDir = resolve(srcDir, "pages");
12 | const assetsDir = resolve(srcDir, "assets");
13 | const outDir = resolve(rootDir, "dist");
14 | const publicDir = resolve(rootDir, "public");
15 |
16 | const isDev = process.env.__DEV__ === "true";
17 | const isProduction = !isDev;
18 |
19 | // ENABLE HMR IN BACKGROUND SCRIPT
20 | const enableHmrInBackgroundScript = true;
21 |
22 | export default defineConfig({
23 | resolve: {
24 | alias: {
25 | "@root": rootDir,
26 | "@src": srcDir,
27 | "@assets": assetsDir,
28 | "@pages": pagesDir,
29 | },
30 | },
31 | plugins: [
32 | makeManifest({}),
33 | react(),
34 | customDynamicImport(),
35 | addHmr({ background: enableHmrInBackgroundScript, view: true }),
36 | inlineVitePreloadScript(),
37 | ],
38 | publicDir,
39 | build: {
40 | outDir,
41 | /** Can slow down build speed. */
42 | // sourcemap: isDev,
43 | minify: isProduction,
44 | modulePreload: false,
45 | reportCompressedSize: isProduction,
46 | emptyOutDir: !isDev,
47 | rollupOptions: {
48 | input: {
49 | devtools: resolve(pagesDir, "devtools", "index.html"),
50 | panel: resolve(pagesDir, "panel", "index.html"),
51 | background: resolve(pagesDir, "background", "index.ts"),
52 | content: resolve(pagesDir, "content", "index.ts"),
53 | contentStyleGlobal: resolve(pagesDir, "content", "style.global.scss"),
54 | contentStyle: resolve(pagesDir, "content", "style.scss"),
55 | contentInjected: resolve(pagesDir, "content/mainWorld", "index.ts"),
56 | permission: resolve(pagesDir, "permission", "index.html"),
57 | popup: resolve(pagesDir, "popup", "index.html"),
58 | newtab: resolve(pagesDir, "newtab", "index.html"),
59 | options: resolve(pagesDir, "options", "index.html"),
60 | sidepanel: resolve(pagesDir, "sidepanel", "index.html"),
61 | },
62 | output: {
63 | entryFileNames: "src/pages/[name]/index.js",
64 | chunkFileNames: isDev
65 | ? "assets/js/[name].js"
66 | : "assets/js/[name].[hash].js",
67 | assetFileNames: (assetInfo) => {
68 | const { name, ext } = path.parse(assetInfo.name ?? "");
69 | if (isFont(ext)) {
70 | return `assets/fonts/${name}${ext}`;
71 | }
72 | return `assets/[ext]/[name].[ext]`;
73 | },
74 | },
75 | },
76 | },
77 | });
78 |
79 | function isFont(ext: string): boolean {
80 | return /^\.(woff2?|eot|ttf|otf)$/.test(ext);
81 | }
82 |
--------------------------------------------------------------------------------
/src/common/CustomKnowledgeBase/index.tsx:
--------------------------------------------------------------------------------
1 | import React, { useState } from "react";
2 | import { Button, Text, VStack, Box } from "@chakra-ui/react";
3 | import { useAppState } from "@root/src/state/store";
4 | import NewKnowledgeForm from "./NewKnowledgeForm";
5 | import { type EditingData } from "../../helpers/knowledge";
6 | import DefaultKnowledge from "./DefaultKnowledge";
7 | import HostKnowledge from "./HostKnowledge";
8 | // import NewKnowledgeJson from "./NewKnowledgeJson";
9 | import { findActiveTab } from "../../helpers/browserUtils";
10 |
11 | const CustomKnowledgeBase = () => {
12 | const [isFormOpen, setIsFormOpen] = useState(false);
13 | const [editKnowledge, setEditKnowledge] = useState(
14 | undefined,
15 | );
16 | const customKnowledgeBase = useAppState(
17 | (state) => state.settings.customKnowledgeBase,
18 | );
19 | // const {
20 | // isOpen: isJsonInputOpen,
21 | // onOpen: openJsonInput,
22 | // onClose: closeJsonInput,
23 | // } = useDisclosure();
24 | const [defaultHost, setDefaultHost] = useState("");
25 | const [currentURL, setCurrentUrl] = useState("");
26 |
27 | const openForm = async () => {
28 | const tab = await findActiveTab();
29 | if (tab && tab.url) {
30 | setCurrentUrl(tab.url);
31 | if (tab.url.startsWith("chrome")) {
32 | setDefaultHost("");
33 | } else {
34 | const url = new URL(tab.url);
35 | const host = url.hostname.replace(/^www\./, "");
36 | setDefaultHost(host);
37 | }
38 | }
39 | setIsFormOpen(true);
40 | };
41 |
42 | const closeForm = () => {
43 | setEditKnowledge(undefined);
44 | setIsFormOpen(false);
45 | };
46 |
47 | const openEditForm = (host: string) => {
48 | const originalRules = customKnowledgeBase[host].rules;
49 |
50 | const transformedRules = originalRules?.map((rule) => ({
51 | ...rule,
52 | regexType: "custom",
53 | }));
54 |
55 | if (transformedRules) {
56 | setEditKnowledge({
57 | host,
58 | rules: transformedRules,
59 | });
60 | }
61 |
62 | openForm();
63 | };
64 |
65 | return (
66 |
67 |
68 | {Object.keys(customKnowledgeBase).length > 0 ? (
69 | Object.keys(customKnowledgeBase).map((host) => (
70 |
71 |
76 |
77 | ))
78 | ) : (
79 | No instructions found
80 | )}
81 |
82 | {/* */}
83 |
91 | {/* */}
92 |
93 | );
94 | };
95 |
96 | export default CustomKnowledgeBase;
97 |
--------------------------------------------------------------------------------
/src/helpers/disableExtensions.ts:
--------------------------------------------------------------------------------
1 | // These are extensions that are known to interfere with the operation of Taxy.
2 | // We'll turn them off temporarily while Taxy is performing actions.
3 | const incompatibleExtensions = [
4 | // Dashlane
5 | "fdjamakpfbbddfjaooikfcpapjohcfmg",
6 | // LastPass
7 | "hdokiejnpimakedhajhdlcegeplioahd",
8 | ];
9 |
10 | const disableCounts: Record = {};
11 |
12 | export const disableIncompatibleExtensions = async () => {
13 | const enabledBlacklistedExtensions = await new Promise<
14 | chrome.management.ExtensionInfo[]
15 | >((resolve, reject) => {
16 | chrome.management.getAll((extensions) => {
17 | if (chrome.runtime.lastError) {
18 | console.error(
19 | "Failed to get extensions:",
20 | chrome.runtime.lastError.message,
21 | );
22 | reject(chrome.runtime.lastError);
23 | } else {
24 | resolve(
25 | extensions.filter(
26 | (extension) =>
27 | extension.type === "extension" &&
28 | extension.enabled &&
29 | incompatibleExtensions.includes(extension.id),
30 | ),
31 | );
32 | }
33 | });
34 | });
35 |
36 | for (const extension of enabledBlacklistedExtensions) {
37 | chrome.management.setEnabled(extension.id, false, () => {
38 | if (chrome.runtime.lastError) {
39 | console.error(
40 | `Failed to disable extension ${extension.id}:`,
41 | chrome.runtime.lastError.message,
42 | );
43 | }
44 | disableCounts[extension.id] = (disableCounts[extension.id] || 0) + 1;
45 | });
46 | }
47 | };
48 |
49 | export const reenableExtensions = async () => {
50 | const disabledBlacklistedExtensions = await new Promise<
51 | chrome.management.ExtensionInfo[]
52 | >((resolve, reject) => {
53 | chrome.management.getAll((extensions) => {
54 | if (chrome.runtime.lastError) {
55 | console.error(
56 | "Failed to get extensions:",
57 | chrome.runtime.lastError.message,
58 | );
59 | reject(chrome.runtime.lastError);
60 | } else {
61 | resolve(
62 | extensions.filter(
63 | (extension) =>
64 | extension.type === "extension" &&
65 | !extension.enabled &&
66 | incompatibleExtensions.includes(extension.id),
67 | ),
68 | );
69 | }
70 | });
71 | });
72 |
73 | for (const extension of disabledBlacklistedExtensions) {
74 | if (disableCounts[extension.id] > 1) {
75 | // If we have multiple sessions running and have disabled the extension
76 | // multiple times, we only want to re-enable it once all sessions have
77 | // finished.
78 | disableCounts[extension.id] = disableCounts[extension.id] - 1;
79 | } else if (disableCounts[extension.id] === 1) {
80 | await new Promise((resolve, reject) => {
81 | chrome.management.setEnabled(extension.id, true, () => {
82 | if (chrome.runtime.lastError) {
83 | console.error(
84 | `Failed to enable extension ${extension.id}:`,
85 | chrome.runtime.lastError.message,
86 | );
87 | reject(chrome.runtime.lastError);
88 | }
89 | delete disableCounts[extension.id];
90 | resolve(0);
91 | });
92 | });
93 | }
94 | }
95 | };
96 |
--------------------------------------------------------------------------------
/src/helpers/knowledge/index.ts:
--------------------------------------------------------------------------------
1 | import _db from "./db.json" assert { type: "json" };
2 | import _redirects from "./redirects.json" assert { type: "json" };
3 |
4 | type Redirects = {
5 | [host: string]: string;
6 | };
7 | export type AnnotationRule = {
8 | selector: string;
9 | useStaticName?: string;
10 | useAttributeAsName?: string;
11 | allowInvisible?: boolean;
12 | allowAriaHidden?: boolean;
13 | allowCovered?: boolean;
14 | };
15 |
16 | export type Knowledge = {
17 | notes?: string[];
18 | annotationRules?: AnnotationRule[];
19 | };
20 |
21 | export type Rule = {
22 | regexes: string[];
23 | knowledge: Knowledge;
24 | };
25 |
26 | export type Data = {
27 | [host: string]: {
28 | rules?: Rule[];
29 | };
30 | };
31 |
32 | // rule type used only in editing mode
33 | export type EditingRule = Rule & {
34 | regexType: string;
35 | };
36 |
37 | // data type used only in editing mode
38 | export type EditingData = {
39 | host: string;
40 | rules: EditingRule[];
41 | };
42 |
43 | export type LocationInfo = {
44 | host: string;
45 | pathname: string;
46 | };
47 |
48 | export function fetchKnowledge(
49 | location: LocationInfo,
50 | customKnowledgeBase?: Data,
51 | ): Knowledge {
52 | // TODO: fetch from a server
53 | const data = _db as Data;
54 | const redirects = _redirects as Redirects;
55 | let result: Knowledge = {
56 | notes: [],
57 | annotationRules: [],
58 | };
59 |
60 | const { host, pathname } = location;
61 | const normalizedHosts = getNormalizedHosts(host, redirects);
62 |
63 | for (const searchHost of normalizedHosts) {
64 | const hostKnowledge = data[searchHost] || customKnowledgeBase?.[searchHost];
65 | if (hostKnowledge) {
66 | result = mergeKnowledge(result, hostKnowledge, pathname);
67 | }
68 | }
69 |
70 | return result;
71 | }
72 |
73 | function getNormalizedHosts(host: string, redirects: Redirects): string[] {
74 | const hostWithWww = host.startsWith("www.") ? host : `www.${host}`;
75 | const hostWithoutWww = host.startsWith("www.") ? host.slice(4) : host;
76 | const redirectedHostWithWww = redirects[hostWithWww] || hostWithWww;
77 | const redirectedHostWithoutWww = redirects[hostWithoutWww] || hostWithoutWww;
78 | return [
79 | ...new Set([
80 | hostWithWww,
81 | hostWithoutWww,
82 | redirectedHostWithWww,
83 | redirectedHostWithoutWww,
84 | ]),
85 | ];
86 | }
87 |
88 | function mergeKnowledge(
89 | result: Knowledge,
90 | dataSource: { rules?: Rule[] },
91 | pathname: string,
92 | ): Knowledge {
93 | const rules = dataSource.rules;
94 | if (rules != null) {
95 | for (const rule of rules) {
96 | for (const regex of rule.regexes) {
97 | if (new RegExp(regex, "i").test(pathname)) {
98 | // merge all matching rules
99 | result.notes = result.notes?.concat(rule.knowledge.notes ?? []);
100 |
101 | // filter out invalid annotaion rules
102 | const filteredAnnotationRules =
103 | rule.knowledge.annotationRules?.filter(
104 | (rule) => rule.selector !== "",
105 | ) ?? [];
106 | result.annotationRules = result.annotationRules?.concat(
107 | filteredAnnotationRules,
108 | );
109 | }
110 | }
111 | }
112 | }
113 | return result;
114 | }
115 |
116 | export function fetchAllDefaultKnowledge(): Data {
117 | return _db as Data;
118 | }
119 |
--------------------------------------------------------------------------------
/src/helpers/vision-agent/determineNavigateAction.ts:
--------------------------------------------------------------------------------
1 | import { parseResponse } from "./parseResponse";
2 | import { QueryResult } from "./determineNextAction";
3 | import { useAppState } from "../../state/store";
4 | import errorChecker from "../errorChecker";
5 | import { fetchResponseFromModel } from "../aiSdkUtils";
6 |
7 | import { schemaToDescription, navigateSchema } from "./tools";
8 |
9 | const navigateSchemaDescription = schemaToDescription(navigateSchema);
10 |
11 | const systemMessage = (voiceMode: boolean) => `
12 | You are a browser automation assistant.
13 |
14 | You can use the following tool:
15 |
16 | ${navigateSchemaDescription}
17 |
18 | You will have access to more tools as you progress through the task.
19 |
20 | You will be given a task to perform.
21 | This is an example of expected response from you:
22 |
23 | {
24 | "thought": "To find latest news on AI, I am navigating to Google.",${
25 | voiceMode
26 | ? `,
27 | "speak": "To find the latest news on AI, I am navigating to Google."`
28 | : ""
29 | }
30 | "action": {
31 | "name": "navigate",
32 | "args": {
33 | "url": "https://www.google.com/"
34 | }
35 | }
36 | }
37 |
38 | Your response must always be in JSON format and must include string "thought"${
39 | voiceMode ? ', string "speak",' : ""
40 | } and object "action", which contains the string "name" of tool of choice, and necessary arguments ("args") if required by the tool.
41 | `;
42 |
43 | export async function determineNavigateAction(
44 | taskInstructions: string,
45 | maxAttempts = 3,
46 | notifyError?: (error: string) => void,
47 | ): Promise {
48 | const model = useAppState.getState().settings.selectedModel;
49 | const voiceMode = useAppState.getState().settings.voiceMode;
50 | const prompt = formatPrompt(taskInstructions);
51 |
52 | for (let i = 0; i < maxAttempts; i++) {
53 | try {
54 | const completion = await fetchResponseFromModel(model, {
55 | systemMessage: systemMessage(voiceMode),
56 | prompt,
57 | jsonMode: true,
58 | });
59 |
60 | const rawResponse = completion.rawResponse;
61 | let action = null;
62 | try {
63 | action = parseResponse(rawResponse);
64 | } catch (e) {
65 | console.error(e);
66 | // TODO: try use LLM to fix format when response is not valid
67 | throw new Error(`Incorrectly formatted response: ${e}`);
68 | }
69 |
70 | return {
71 | usage: completion.usage,
72 | prompt,
73 | rawResponse,
74 | action,
75 | };
76 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
77 | } catch (error: any) {
78 | if (error instanceof Error) {
79 | const recoverable = errorChecker(error, notifyError);
80 | if (!recoverable) {
81 | throw error;
82 | }
83 | } else {
84 | console.error("Unexpected determineNextAction error:");
85 | console.error(error);
86 | }
87 | }
88 | }
89 | const errMsg = `Failed to complete query after ${maxAttempts} attempts. Please try again later.`;
90 | if (notifyError) {
91 | notifyError(errMsg);
92 | }
93 | throw new Error(errMsg);
94 | }
95 |
96 | export function formatPrompt(taskInstructions: string) {
97 | return `The user requests the following task:
98 |
99 | ${taskInstructions}
100 |
101 | Current time: ${new Date().toLocaleString()}
102 | `;
103 | }
104 |
--------------------------------------------------------------------------------
/src/helpers/voiceControl.ts:
--------------------------------------------------------------------------------
1 | import { useAppState } from "../state/store";
2 | import OpenAI from "openai";
3 |
4 | type SetTranscriptionFunction = (transcript: string, isFinal: boolean) => void;
5 |
6 | class VoiceControlManager {
7 | private recognition: SpeechRecognition | null;
8 | private cumulativeTranscript = "";
9 | private setTranscription: SetTranscriptionFunction | null = null;
10 |
11 | constructor() {
12 | const SpeechRecognition =
13 | window.SpeechRecognition || window.webkitSpeechRecognition;
14 | if (SpeechRecognition) {
15 | this.recognition = new SpeechRecognition();
16 | this.recognition.continuous = true;
17 | this.recognition.interimResults = true;
18 | this.recognition.lang = "en-US";
19 |
20 | this.recognition.onresult = (event) => {
21 | let interimTranscript = "";
22 | for (let i = event.resultIndex; i < event.results.length; ++i) {
23 | if (event.results[i].isFinal) {
24 | const transcript = event.results[i][0].transcript;
25 | this.cumulativeTranscript += transcript.trim() + " ";
26 | } else {
27 | interimTranscript += event.results[i][0].transcript;
28 | }
29 | }
30 | if (this.setTranscription) {
31 | this.setTranscription(
32 | this.cumulativeTranscript + interimTranscript,
33 | false,
34 | );
35 | }
36 | };
37 |
38 | this.recognition.onerror = (event) => {
39 | console.error("Speech recognition error:", event.error);
40 | };
41 | } else {
42 | console.error("Browser does not support Speech Recognition.");
43 | this.recognition = null;
44 | }
45 | }
46 |
47 | public startListening = async (): Promise => {
48 | if (!this.recognition) {
49 | console.error("Speech Recognition is not initialized.");
50 | return;
51 | }
52 |
53 | this.cumulativeTranscript = "";
54 | this.setTranscription = useAppState.getState().ui.actions.setInstructions;
55 | this.recognition.start();
56 | };
57 |
58 | public stopListening = (): void => {
59 | if (this.recognition) {
60 | this.recognition.stop();
61 | }
62 | if (this.setTranscription && this.cumulativeTranscript !== "") {
63 | this.setTranscription(this.cumulativeTranscript, true);
64 | }
65 | this.setTranscription = null;
66 | };
67 |
68 | public basicSpeak = (text: string): void => {
69 | const utterance = new SpeechSynthesisUtterance(text);
70 | utterance.rate = 2;
71 | speechSynthesis.speak(utterance);
72 | };
73 |
74 | public speak = async (text: string, onError: (error: string) => void) => {
75 | const key = useAppState.getState().settings.openAIKey ?? undefined;
76 | const openai = new OpenAI({
77 | apiKey: key,
78 | dangerouslyAllowBrowser: true,
79 | });
80 |
81 | try {
82 | const mp3Response = await openai.audio.speech.create({
83 | model: "tts-1",
84 | voice: "nova",
85 | input: text,
86 | speed: 1,
87 | });
88 | const arrayBuffer = await mp3Response.arrayBuffer();
89 | const blob = new Blob([arrayBuffer], { type: "audio/mp3" });
90 | const audioUrl = URL.createObjectURL(blob);
91 | const audio = new Audio(audioUrl);
92 | audio.play();
93 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
94 | } catch (error: any) {
95 | console.error("Error generating or playing speech:", error);
96 | onError(error.message);
97 | }
98 | };
99 | }
100 |
101 | export const voiceControl = new VoiceControlManager();
102 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Fuji-Web
2 |
3 | Thank you for your interest in contributing to Fuji-Web! Fuji-Web is a tool that simplifies web interactions through the innovative use of multi-modal Large Language Models, offering users a more intuitive and efficient online experience. We welcome contributions from the community to help make Fuji-Web even better.
4 |
5 | ## How to Contribute
6 |
7 | There are many ways to contribute to Fuji-Web, from writing code to improving documentation, reporting bugs, and suggesting enhancements. Here's how you can get started:
8 |
9 | ### Spread the Word
10 | If you love Fuji-Web, you can make a big difference by telling others about it. Write a blog post, talk about it on social media, or share your experience with friends and colleagues. Every bit helps in growing our community and bringing new contributors on board.
11 |
12 | ### Reporting Bugs
13 |
14 | Before reporting a new bug, please ensure that the issue has not already been reported. You can do this by searching through the existing issues in our GitHub repository.
15 |
16 | If you encounter a bug while using Fuji-Web and it has not been reported yet, please report it by creating a new issue. Be sure to include:
17 |
18 | - A clear and descriptive title
19 | - A detailed description of the bug, including steps to reproduce it
20 | - Any relevant screenshots or error messages
21 | - Your Fuji-Web version and browser details
22 |
23 | ### Suggesting Enhancements
24 |
25 | We're always looking for ways to improve Fuji-Web. If you have an idea for a new feature or an enhancement to an existing one, please submit it as an issue, using a clear and concise title and description. Explain why this enhancement would be useful, and if possible, include examples of how it could be implemented.
26 |
27 | ### Contributing Code
28 |
29 | Confirm alignment on the proposed work. For small fixes or minor enhancements, make sure there is an open and accepted issue. For larger contributions, a design or plan should have been reviewed and agreed upon by the maintainers.
30 |
31 | Before submitting your first code contribution, please make sure to:
32 |
33 | 1. Clone the repository.
34 | 2. Follow the setup instructions in the README.md to get your development environment running.
35 | 4. Make your changes in a new git branch and test your changes locally.
36 | 5. Commit your changes using a clear and descriptive commit message.
37 | 6. Push your branch to GitHub and open a pull request against the `main` branch. In your pull request, include any relevant issue numbers and a description of the changes you've made.
38 |
39 | ### Pull Request Guidelines
40 |
41 | - Ensure that your code follows the project's coding conventions and is properly documented.
42 | - Include screenshots or animated GIFs in your pull request whenever possible, especially for UI-related changes.
43 | - Follow the [Pull Request Template](https://github.com/normal-computing/fuji-web/PULL_REQUEST_TEMPLATE.md) provided in the repository for the description of your pull request.
44 |
45 | ### Code Review Process
46 |
47 | After you submit a pull request, the project maintainers will review your proposed changes. This process helps to ensure the quality and consistency of the Fuji-Web codebase. The review may require some back-and-forth communication, so please be patient. We appreciate your contributions and will do our best to provide feedback and guidance as quickly as possible.
48 |
49 | ## Community and Conduct
50 |
51 | We are committed to providing a welcoming and inspiring community for all. We encourage all contributors to foster an open and welcoming environment, and to be respectful of differing viewpoints and experiences.
52 |
53 | ## Acknowledgements
54 |
55 | Your contributions help make Fuji-Web a better tool for everyone. We look forward to your ideas, feedback, and contributions. Thank you for being part of the Fuji-Web community! Happy contributing!
56 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "fuji-web",
3 | "version": "2.2.0",
4 | "description": "A tool that redefines web interaction, making complex online tasks as simple as uttering a single command.",
5 | "repository": {
6 | "type": "git",
7 | "url": "https://github.com/normal-computing/fuji-web"
8 | },
9 | "scripts": {
10 | "build": "tsc --noEmit && vite build",
11 | "build:lib": "rollup --config rollup.lib.config.js && cp package.lib.json dist-lib/package.json",
12 | "build:firefox": "tsc --noEmit && cross-env __FIREFOX__=true vite build",
13 | "build:watch": "cross-env __DEV__=true vite build -w --mode development",
14 | "build:firefox:watch": "cross-env __DEV__=true __FIREFOX__=true vite build -w --mode development",
15 | "build:hmr": "rollup --config utils/reload/rollup.config.mjs",
16 | "wss": "node utils/reload/initReloadServer.js",
17 | "dev": "pnpm build:hmr && (run-p wss build:watch)",
18 | "dev:firefox": "pnpm build:hmr && (run-p wss build:firefox:watch)",
19 | "test": "exit 0",
20 | "commitlint": "commitlint --edit",
21 | "lint": "eslint src --ext .ts",
22 | "lint:fix": "pnpm lint --fix",
23 | "prettier": "prettier . --write",
24 | "prepare": "husky install"
25 | },
26 | "type": "module",
27 | "dependencies": {
28 | "@anthropic-ai/sdk": "^0.19.1",
29 | "@chakra-ui/icons": "^2.1.1",
30 | "@chakra-ui/react": "^2.8.2",
31 | "@emotion/react": "^11.11.4",
32 | "@emotion/styled": "^11.11.5",
33 | "@google/generative-ai": "^0.19.0",
34 | "accname": "^1.1.0",
35 | "construct-style-sheets-polyfill": "3.1.0",
36 | "formik": "^2.4.5",
37 | "immer": "^10.0.3",
38 | "lodash": "^4.17.21",
39 | "openai": "^4.60.0",
40 | "react": "18.2.0",
41 | "react-dom": "18.2.0",
42 | "react-icons": "^5.3.0",
43 | "react-syntax-highlighter": "^15.5.0",
44 | "react-textarea-autosize": "^8.4.1",
45 | "react-use": "^17.4.0",
46 | "tailwindcss": "^3.4.4",
47 | "webextension-polyfill": "0.10.0",
48 | "zod": "^3.23.8",
49 | "zod-validation-error": "^3.3.1",
50 | "zustand": "^4.5.2"
51 | },
52 | "devDependencies": {
53 | "@commitlint/cli": "19.5.0",
54 | "@commitlint/config-conventional": "18.1.0",
55 | "@jest/globals": "^29.7.0",
56 | "@rollup/plugin-typescript": "11.1.6",
57 | "@testing-library/react": "14.0.0",
58 | "@types/chrome": "0.0.251",
59 | "@types/dom-speech-recognition": "^0.0.4",
60 | "@types/jest": "29.5.7",
61 | "@types/lodash": "^4.17.7",
62 | "@types/node": "20.11.24",
63 | "@types/react": "18.2.37",
64 | "@types/react-dom": "18.2.22",
65 | "@types/ws": "8.5.10",
66 | "@typescript-eslint/eslint-plugin": "6.10.0",
67 | "@typescript-eslint/parser": "6.9.1",
68 | "@vitejs/plugin-react": "4.2.1",
69 | "autoprefixer": "^10.4.16",
70 | "chokidar": "3.6.0",
71 | "cross-env": "7.0.3",
72 | "eslint": "8.57.0",
73 | "eslint-config-airbnb-typescript": "17.1.0",
74 | "eslint-config-prettier": "9.1.0",
75 | "eslint-plugin-import": "2.29.1",
76 | "eslint-plugin-jsx-a11y": "6.8.0",
77 | "eslint-plugin-prettier": "5.1.3",
78 | "eslint-plugin-react": "7.35.0",
79 | "eslint-plugin-react-hooks": "4.6.0",
80 | "fs-extra": "11.1.1",
81 | "husky": "9.0.11",
82 | "jest": "29.7.0",
83 | "jest-environment-jsdom": "29.7.0",
84 | "lint-staged": "15.2.7",
85 | "npm-run-all": "4.1.5",
86 | "postcss": "^8.4.38",
87 | "prettier": "3.2.5",
88 | "rollup": "4.17.2",
89 | "rollup-plugin-dts": "^6.1.1",
90 | "rollup-plugin-esbuild": "^6.1.1",
91 | "sass": "1.72.0",
92 | "ts-jest": "29.2.5",
93 | "tslib": "2.6.2",
94 | "typescript": "5.5.3",
95 | "vite": "5.2.14",
96 | "ws": "8.18.0"
97 | },
98 | "lint-staged": {
99 | "*.{js,jsx,ts,tsx}": [
100 | "prettier --write",
101 | "eslint --fix"
102 | ]
103 | },
104 | "packageManager": "pnpm@9.13.2"
105 | }
106 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Fuji-Web: AI based Full Browser Automation 🗻
4 |
5 | Fuji-Web is an intelligent AI partner that understands the user’s intent, navigates websites autonomously, and executes tasks on the user’s behalf while explaining each action step.
6 |
7 | ### Demo
8 | https://github.com/normal-computing/fuji-web/assets/1001890/88a2fa12-31d9-4856-be67-27dcf9f1e634
9 |
10 | ## How does it work?
11 |
12 | **Please read [our blog post](https://blog.normalcomputing.ai/posts/2024-05-22-introducing-fuji-web/fuji-web.html) for a demo video, benchmarks and deep-dive technical overview!**
13 |
14 | ## Installing and Running
15 |
16 | ### Download and Install the extension in your browser
17 |
18 | 1. Go to the [releases page](https://github.com/normal-computing/fuji-web/releases), find the latest version of the extension and download "fuji-extension.zip".
19 | 2. Unzip the file.
20 | 3. Load your extension on Chrome by doing the following:
21 | 1. Navigate to `chrome://extensions/`
22 | 2. Toggle `Developer mode`
23 | 3. Click on `Load unpacked extension`
24 | 4. Select the unzipped folder
25 |
26 | ### Use the extension
27 |
28 | *Please note that you may need to refresh the page for the extension to work.*
29 |
30 | 1. Find the Fuji icon in the top right corner of your browser and click on it to open the sidepanel.
31 | 2. Create or access an existing [OpenAI API Key](https://platform.openai.com/account/api-keys) or [Anthropic API key](https://console.anthropic.com/settings/keys) and paste it in the provided box. This key will be stored in your browser, and will not be uploaded to a third party.
32 | 3. Finally, navigate to a webpage you want Fuji-Web and type in the task you want it to perform.
33 |
34 | _Note: all prompts (text and image) are sent directly to the API of your selection. Fuji-Web does not attempt to collect any information from you._
35 |
36 | ### Build the extension
37 |
38 | If you want to build the extension from source, follow these instructions:
39 |
40 | 1. Ensure you have [Node.js](https://nodejs.org/). The development was done on Node v20 but it should work with some lower versions.
41 | 2. Clone this repository
42 | 3. Install `pnpm` globally: `npm install -g pnpm`
43 | 4. Run `pnpm install`
44 | 5. Run `pnpm dev` to start the development server, or `pnpm build` to build the extension.
45 |
46 | When loading the extension, you will need to load the `dist` folder created by the build process.
47 |
48 | ## Roadmap
49 |
50 | - Expose API for easy integration with browser automation frameworks (e.g. Puppeteer, Playwright, Selenium)
51 | - Add support for more complex & cross-tab workflows
52 | - Add support for more browsing behaviors (select from dropdown, extract content from entire page etc.)
53 | - Add support for saving workflows
54 | - Add support for sharing workflows & instructions with others
55 | - Create wikipedia-like knowledge base where users can work together to create knowledge that can improve the Fuji-Web's performance
56 |
57 | ## Troubleshooting
58 |
59 | Check out our [Troubleshooting Guide](TROUBLESHOOTING.md) for help with common problems.
60 |
61 | ## Contributing
62 |
63 | Interested in contributing to Fuji-Web? We'd love your help! Check out our [Contribution Guide](CONTRIBUTING.md) for guidelines on how to contribute, report bugs, suggest enhancements, and more.
64 |
65 | We also have set up a dedicated channel for Fuji-Web feedback on Discord at https://discord.gg/yfMjZ8udb5.
66 |
67 | ## Credits
68 |
69 | - Fuji-Web's image annotation method was inspired by Microsoft's [UFO paper](https://arxiv.org/abs/2402.07939).
70 | - Fuji as a tool that lives in the browser sidepanel was inspired by [TaxyAI's browser extension](https://github.com/TaxyAI/browser-extension). We also used some of its UI code.
71 | - The Chrome extension set-up leveraged an awesome boilerplate project [Jonghakseo/chrome-extension-boilerplate-react-vite](https://github.com/Jonghakseo/chrome-extension-boilerplate-react-vite).
72 | - The Fuji logo is from [Toss Face](https://emojipedia.org/toss-face) Emoji design set.
73 |
--------------------------------------------------------------------------------
/src/helpers/vision-agent/tools.ts:
--------------------------------------------------------------------------------
1 | import { z } from "zod";
2 |
3 | export const clickSchema = z.object({
4 | name: z.literal("click"),
5 | description: z
6 | .literal("Click on an element with the uid on the annotation.")
7 | .optional(),
8 | args: z.object({
9 | uid: z.string(),
10 | }),
11 | });
12 |
13 | export const setValueSchema = z.object({
14 | name: z.literal("setValue"),
15 | description: z
16 | .literal(
17 | "Focus on and set the value of an input element with the uid on the annotation.",
18 | )
19 | .optional(),
20 | args: z.object({
21 | uid: z.string(),
22 | value: z.string(),
23 | }),
24 | });
25 |
26 | export const setValueAndEnterSchema = z.object({
27 | name: z.literal("setValueAndEnter"),
28 | description: z
29 | .literal(
30 | 'Like "setValue", except then it presses ENTER. Use this tool can submit the form when there\'s no "submit" button.',
31 | )
32 | .optional(),
33 | args: z.object({
34 | uid: z.string(),
35 | value: z.string(),
36 | }),
37 | });
38 |
39 | export const navigateSchema = z.object({
40 | name: z.literal("navigate"),
41 | description: z
42 | .literal(
43 | "Navigate to a new page. The value should be a URL. Use this tool only when the current task requires navigating to a new page.",
44 | )
45 | .optional(),
46 | args: z.object({
47 | url: z.string(),
48 | }),
49 | });
50 |
51 | export const scrollSchema = z.object({
52 | name: z.literal("scroll"),
53 | description: z
54 | .literal(
55 | 'Scroll the page to see the other parts. Use "up" or "down" to scroll 2/3 of height of the window. Use "top" or "bottom" to quickly scroll to the top or bottom of the page.',
56 | )
57 | .optional(),
58 | args: z.object({
59 | value: z.string(),
60 | }),
61 | });
62 |
63 | export const waitSchema = z.object({
64 | name: z.literal("wait"),
65 | description: z
66 | .literal(
67 | "Wait for 3 seconds before the next action. Useful when the page is loading.",
68 | )
69 | .optional(),
70 | args: z.object({}).optional(),
71 | });
72 |
73 | export const finishSchema = z.object({
74 | name: z.literal("finish"),
75 | description: z.literal("Indicate the task is finished").optional(),
76 | args: z.object({}).optional(),
77 | });
78 |
79 | export const failSchema = z.object({
80 | name: z.literal("fail"),
81 | description: z
82 | .literal("Indicate that you are unable to complete the task")
83 | .optional(),
84 | args: z.object({}).optional(),
85 | });
86 |
87 | export const toolSchemaUnion = z.discriminatedUnion("name", [
88 | clickSchema,
89 | setValueSchema,
90 | setValueAndEnterSchema,
91 | navigateSchema,
92 | scrollSchema,
93 | waitSchema,
94 | finishSchema,
95 | failSchema,
96 | ]);
97 | const allTools = toolSchemaUnion.options;
98 | type ToolSchema = (typeof allTools)[number];
99 |
100 | export type ToolOperation = z.infer;
101 |
102 | export function schemaToDescription(schema: ToolSchema): string {
103 | let description = "";
104 | const shape = schema.shape;
105 | const name = shape.name._def.value;
106 | const descriptionText = shape.description.unwrap()._def.value;
107 | description += `Name: ${name}\nDescription: ${descriptionText}\n`;
108 |
109 | const args = shape.args;
110 | // If the tool has arguments, list them. If entire args is ZodOptional, there are no arguments.
111 | if (args instanceof z.ZodObject && Object.keys(args.shape).length > 0) {
112 | description += "Arguments:\n";
113 | Object.entries(args.shape).forEach(([key, value]) => {
114 | const argType = value instanceof z.ZodString ? "string" : "unknown";
115 | description += ` - ${key} (${argType})\n`;
116 | });
117 | } else {
118 | description += "No arguments.\n";
119 | }
120 |
121 | return description;
122 | }
123 |
124 | function getAllToolsDescriptions(): string {
125 | return allTools.map(schemaToDescription).join("\n");
126 | }
127 | export const allToolsDescriptions = getAllToolsDescriptions();
128 |
--------------------------------------------------------------------------------
/src/helpers/knowledge/db.json:
--------------------------------------------------------------------------------
1 | {
2 | "x.com": {
3 | "rules": [
4 | {
5 | "regexes": [".*"],
6 | "knowledge": {
7 | "notes": [
8 | "The website X (formerly Twitter) is a social media platform. Many people still call it Twitter and use the term \"tweet\" to refer to a post.",
9 | "Do not confuse \"post\" with \"message\". A post is a public message that can be seen by anyone, while a message is a private message that can only be seen by the recipient."
10 | ]
11 | }
12 | },
13 | {
14 | "regexes": ["^/compose/post/?$"],
15 | "knowledge": {
16 | "notes": [
17 | "The \"Add post\" button is used to compose a thread. Do not confuse with the \"Post\" button that sends the composed tweet."
18 | ]
19 | }
20 | }
21 | ]
22 | },
23 | "calendar.google.com": {
24 | "rules": [
25 | {
26 | "regexes": [".*"],
27 | "knowledge": {
28 | "notes": [
29 | "The best way to create a new event on Google Calendar is to click on the \"Create\" button, then click on the \"Event\" option, then fill in the details in the form, and click on the \"Save\" button."
30 | ],
31 | "annotationRules": [
32 | {
33 | "selector": "[data-key]",
34 | "useAttributeAsName": "data-key",
35 | "allowInvisible": false,
36 | "allowCovered": true,
37 | "allowAriaHidden": false
38 | },
39 | {
40 | "selector": "[data-hovercard-id]",
41 | "useAttributeAsName": "data-hovercard-id",
42 | "allowInvisible": false,
43 | "allowCovered": false,
44 | "allowAriaHidden": false
45 | }
46 | ]
47 | }
48 | }
49 | ]
50 | },
51 | "airbnb.com": {
52 | "rules": [
53 | {
54 | "regexes": [".*"],
55 | "knowledge": {
56 | "annotationRules": [
57 | {
58 | "selector": "[data-testid=\"listing-card-title\"]",
59 | "allowInvisible": false,
60 | "allowCovered": true,
61 | "allowAriaHidden": true
62 | }
63 | ]
64 | }
65 | }
66 | ]
67 | },
68 | "amazon.com": {
69 | "rules": [
70 | {
71 | "regexes": [".*"],
72 | "knowledge": {
73 | "notes": [
74 | "Be careful not to confuse the \"Add to Cart\" with the \"Buy Now\". The \"Add to Cart\" button adds the item to the cart, while the \"Buy Now\" button takes you to the checkout page.",
75 | "Do not confuse \"Buy Now\" and \"Buy New\" on product page.",
76 | "You should always verify if the product is in stock before buying or adding to the cart. If the product is not in stock, you should notify the user about it."
77 | ]
78 | }
79 | },
80 | {
81 | "regexes": ["^/s$"],
82 | "knowledge": {
83 | "notes": [
84 | "There is no \"Add to Cart\" button on the search results page. You need to click on the product name to go to the product details page first."
85 | ]
86 | }
87 | }
88 | ]
89 | },
90 | "github.com": {
91 | "rules": [
92 | {
93 | "regexes": [
94 | ".*"
95 | ],
96 | "knowledge": {
97 | "notes": [
98 | "You can open the account menu by clicking the user's avatar on the top right. You can find and manage current user's profile, repositories, projects, organizations, etc. in the menu.",
99 | "To invite a member to an organization or a team, you need to first click \"Invite member\" or \"Invite someone\". When you see the dialog, type in the input to search by username or email, then click the button appear under the input that says \"[name] invite to [org]\". Please note that this does not actually send the invite: it only adds the user to the selection. You must then click the green \"Invite\" button to send the invitation."
100 | ]
101 | }
102 | }
103 | ]
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/src/common/CustomKnowledgeBase/NewKnowledgeJson.tsx:
--------------------------------------------------------------------------------
1 | import {
2 | Button,
3 | Modal,
4 | ModalBody,
5 | ModalCloseButton,
6 | ModalContent,
7 | ModalFooter,
8 | ModalHeader,
9 | ModalOverlay,
10 | Textarea,
11 | useToast,
12 | } from "@chakra-ui/react";
13 | import { useAppState } from "@root/src/state/store";
14 | import { useState } from "react";
15 | import DuplicateKnowledgeAlert from "./DuplicateKnowledgeAlert";
16 | import { type Data } from "@root/src/helpers/knowledge";
17 |
18 | type NewKnowledgeJsonProps = {
19 | isOpen: boolean;
20 | onClose: () => void;
21 | };
22 |
23 | const NewKnowledgeJson = ({ isOpen, onClose }: NewKnowledgeJsonProps) => {
24 | const [jsonInput, setJsonInput] = useState("");
25 | const [showDuplicateAlert, setShowDuplicateAlert] = useState(false);
26 | const [newCustomKnowledge, setNewCustomKnowledge] = useState(
27 | null,
28 | );
29 | const [duplicatedHosts, setduplicatedHosts] = useState(null);
30 | const toast = useToast();
31 | const updateSettings = useAppState((state) => state.settings.actions.update);
32 | const customKnowledgeBase = useAppState(
33 | (state) => state.settings.customKnowledgeBase,
34 | );
35 |
36 | function saveKnowledges() {
37 | const newKnowledge = { ...customKnowledgeBase, ...newCustomKnowledge };
38 | updateSettings({ customKnowledgeBase: newKnowledge });
39 | }
40 |
41 | const validateJSON = () => {
42 | try {
43 | const parsedJson = JSON.parse(jsonInput);
44 | const dupHosts: Data = {};
45 | const hostsKnowledges: Data = {};
46 | Object.keys(parsedJson).forEach((host: string) => {
47 | const hostKnowledge = parsedJson[host];
48 | // Basic validation for the structure
49 | if (!hostKnowledge.rules || !Array.isArray(hostKnowledge.rules)) {
50 | throw new Error(`Invalid structure for host: ${host}`);
51 | }
52 | // Further validation can be added here, e.g., checking if regex is valid, checking each rule's structure
53 |
54 | const hostName = host.startsWith("www.") ? host.slice(4) : host;
55 | hostsKnowledges[hostName] = hostKnowledge;
56 | if (hostName in customKnowledgeBase) {
57 | dupHosts[hostName] = hostKnowledge;
58 | }
59 | });
60 | setNewCustomKnowledge(hostsKnowledges);
61 | if (Object.keys(dupHosts).length > 0) {
62 | setduplicatedHosts(dupHosts);
63 | setShowDuplicateAlert(true);
64 | } else {
65 | saveKnowledges();
66 | setJsonInput("");
67 | onClose();
68 | }
69 | } catch (error) {
70 | console.error("Failed to save JSON", error);
71 | toast({
72 | title: "Error",
73 | description: `"Failed to save JSON: ${error}`,
74 | status: "error",
75 | duration: 5000,
76 | isClosable: true,
77 | });
78 | }
79 | };
80 |
81 | const duplicatedHostsNames = (): string => {
82 | let names = "";
83 | if (duplicatedHosts) {
84 | Object.keys(duplicatedHosts).forEach((host) => {
85 | names = names + host;
86 | });
87 | }
88 | return names;
89 | };
90 |
91 | function handleAlertOnSave(): void {
92 | saveKnowledges();
93 | setduplicatedHosts(null);
94 | setShowDuplicateAlert(false);
95 | setJsonInput("");
96 | onClose();
97 | }
98 |
99 | return (
100 |
101 |
102 |
103 |
104 | setShowDuplicateAlert(false)}
109 | />
110 | New Host Knowledge
111 |
112 |
120 |
121 |
124 |
125 |
126 |
127 |
128 | );
129 | };
130 |
131 | export default NewKnowledgeJson;
132 |
--------------------------------------------------------------------------------
/src/common/CustomKnowledgeBase/HostKnowledge.tsx:
--------------------------------------------------------------------------------
1 | import { DeleteIcon, EditIcon } from "@chakra-ui/icons";
2 | import {
3 | Heading,
4 | Accordion,
5 | AccordionItem,
6 | AccordionButton,
7 | AccordionPanel,
8 | AccordionIcon,
9 | IconButton,
10 | Tooltip,
11 | Flex,
12 | Box,
13 | } from "@chakra-ui/react";
14 | import { fetchAllDefaultKnowledge } from "../../helpers/knowledge";
15 | import { useAppState } from "@root/src/state/store";
16 | import Notes from "./Notes";
17 |
18 | type HostKnowledgeProps = {
19 | host: string;
20 | isDefaultKnowledge: boolean;
21 | onEdit?: (host: string) => void;
22 | };
23 |
24 | const HostKnowledge = ({
25 | host,
26 | isDefaultKnowledge,
27 | onEdit,
28 | }: HostKnowledgeProps) => {
29 | const updateSettings = useAppState((state) => state.settings.actions.update);
30 | const customKnowledgeBase = useAppState(
31 | (state) => state.settings.customKnowledgeBase,
32 | );
33 | const knowledgeBase = isDefaultKnowledge
34 | ? fetchAllDefaultKnowledge()
35 | : customKnowledgeBase;
36 |
37 | if (knowledgeBase[host] === undefined) {
38 | return null;
39 | }
40 | const rules = knowledgeBase[host].rules;
41 | if (rules === undefined) {
42 | return null;
43 | }
44 |
45 | const handleRemove = () => {
46 | const newKnowledge = { ...knowledgeBase };
47 | delete newKnowledge[host];
48 | updateSettings({ customKnowledgeBase: newKnowledge });
49 | };
50 |
51 | // temporarily disable copy feature
52 | /*
53 | const getJsonString = (): string => {
54 | return JSON.stringify(knowledgeBase[host], null, 2);
55 | };
56 |
57 | const handleCopy = async () => {
58 | try {
59 | await navigator.clipboard.writeText(getJsonString());
60 | toast({
61 | title: "Copied",
62 | description: "Knowledge has been copied to clipboard.",
63 | status: "success",
64 | duration: 2000,
65 | isClosable: true,
66 | });
67 | } catch (err) {
68 | toast({
69 | title: "Error",
70 | description: "Failed to copy knowledge to clipboard.",
71 | status: "error",
72 | duration: 2000,
73 | isClosable: true,
74 | });
75 | }
76 | };
77 | */
78 |
79 | return (
80 | <>
81 |
82 |
89 | {!isDefaultKnowledge && (
90 |
94 |
95 | }
98 | size="sm"
99 | variant="ghost"
100 | onClick={() => {
101 | if (onEdit) onEdit(host);
102 | }}
103 | />
104 |
105 |
106 | }
109 | size="sm"
110 | variant="ghost"
111 | onClick={handleRemove}
112 | />
113 |
114 |
115 | )}
116 | {host}
117 |
118 |
119 |
120 | {rules.map((rule, ruleIndex) => {
121 | // Skip rules without notes
122 | if (
123 | rule.knowledge === undefined ||
124 | rule.knowledge.notes === undefined ||
125 | rule.knowledge.notes.length === 0
126 | ) {
127 | return null;
128 | }
129 | return (
130 |
131 |
132 |
133 |
134 | Instructions Set {ruleIndex + 1}
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 | );
144 | })}
145 |
146 | >
147 | );
148 | };
149 |
150 | export default HostKnowledge;
151 |
--------------------------------------------------------------------------------
/src/shared/images/mergeScreenshots.ts:
--------------------------------------------------------------------------------
1 | // TODO: make it configurable?
2 | const DEFAULT_FONT_SIZE = 40;
3 | const DEFAULT_FONT_STYLE = `${DEFAULT_FONT_SIZE}px serif`;
4 |
5 | export type ImageSourceAttrs = {
6 | src: string;
7 | caption?: string;
8 | opacity?: number | undefined;
9 | };
10 |
11 | type ExtendedImageData = ImageSourceAttrs & {
12 | img: HTMLImageElement;
13 | };
14 |
15 | export type MergeImageOptionsInput = {
16 | format?: string;
17 | quality?: number;
18 | maxFileSizeMB?: number;
19 | padding?: number;
20 | };
21 |
22 | type MergeImageOptions = MergeImageOptionsInput & {
23 | format: string;
24 | padding: number;
25 | };
26 |
27 | export type GetCanvasSize = (
28 | images: ExtendedImageData[],
29 | options: MergeImageOptions,
30 | ) => {
31 | width: number;
32 | height: number;
33 | };
34 |
35 | const getHorizontalLayoutCanvasSize: GetCanvasSize = (images, options) => {
36 | let width = 0;
37 | let height = 0;
38 | images.forEach((image) => {
39 | const padding = options.padding;
40 | width += image.img.width + padding * 2;
41 | height = Math.max(height, image.img.height + padding * 2);
42 | });
43 | return {
44 | width,
45 | height: height + DEFAULT_FONT_SIZE,
46 | };
47 | };
48 |
49 | // Function to get WebP data URL and ensure it's less than 5 MB
50 | function getWebPDataURL(
51 | canvas: HTMLCanvasElement,
52 | maxFileSizeMB: number = 5,
53 | maxQuality = 1,
54 | qualityStep = 0.05,
55 | ) {
56 | const maxFileSizeBytes = maxFileSizeMB * 1024 * 1024;
57 | let quality = maxQuality;
58 | let dataURL = canvas.toDataURL("image/webp", quality);
59 |
60 | // Check the size of the data URL
61 | while (dataURL.length * 0.75 > maxFileSizeBytes && quality > 0) {
62 | quality -= qualityStep; // Decrease quality
63 | dataURL = canvas.toDataURL("image/webp", quality);
64 | }
65 |
66 | return dataURL;
67 | }
68 |
69 | // Defaults
70 | const defaultOptions: MergeImageOptions = {
71 | format: "image/webp",
72 | quality: 1,
73 | maxFileSizeMB: 5,
74 | padding: 40,
75 | };
76 |
77 | const mergeImages = async (
78 | sources: ImageSourceAttrs[] = [],
79 | optionsInput: MergeImageOptionsInput = {},
80 | ) => {
81 | const options: MergeImageOptions = {
82 | ...defaultOptions,
83 | ...optionsInput,
84 | };
85 |
86 | // Setup browser/Node.js specific variables
87 | const canvas = window.document.createElement("canvas");
88 |
89 | // Load sources
90 | const images: Promise[] = sources.map(
91 | (source) =>
92 | new Promise((resolve, reject) => {
93 | // Resolve source and img when loaded
94 | const img = new Image();
95 | img.onerror = () => reject(new Error("Couldn't load image"));
96 | const data = {
97 | ...source,
98 | img,
99 | };
100 | img.onload = () => resolve(data);
101 | img.src = source.src;
102 | }),
103 | );
104 |
105 | // Get canvas context
106 | const ctx = canvas.getContext("2d");
107 | if (!ctx) {
108 | throw new Error("Could not get canvas context");
109 | }
110 |
111 | // When sources have loaded
112 | return await Promise.all(images).then((images) => {
113 | // Set canvas dimensions
114 | const canvasSize = getHorizontalLayoutCanvasSize(images, options);
115 | canvas.width = canvasSize.width;
116 | canvas.height = canvasSize.height;
117 | // fill canvas with gray background
118 | ctx.fillStyle = "#f0f0f0";
119 | ctx.fillRect(0, 0, canvas.width, canvas.height);
120 |
121 | // Draw images and captions to canvas (horizontally)
122 | let x = options.padding;
123 | const y = options.padding;
124 | ctx.textAlign = "center";
125 | ctx.font = DEFAULT_FONT_STYLE;
126 | ctx.fillStyle = "black";
127 | ctx.strokeStyle = "black";
128 | images.forEach((image) => {
129 | ctx.globalAlpha = image.opacity ?? 1;
130 | ctx.drawImage(image.img, x, y);
131 | // border around image
132 | ctx.strokeRect(x, y, image.img.width, image.img.height);
133 | if (image.caption != null) {
134 | ctx.fillText(
135 | image.caption,
136 | x + image.img.width / 2,
137 | y + image.img.height + DEFAULT_FONT_SIZE,
138 | );
139 | }
140 | // Increment x to where the next image should be drawn
141 | x += image.img.width + options.padding;
142 | });
143 |
144 | if (options.format === "image/webp") {
145 | return getWebPDataURL(canvas, options.maxFileSizeMB, options.quality);
146 | }
147 |
148 | return canvas.toDataURL(options.format, options.quality);
149 | });
150 | };
151 |
152 | export default mergeImages;
153 |
--------------------------------------------------------------------------------
/src/helpers/dom-agent/determineNextAction.ts:
--------------------------------------------------------------------------------
1 | import { useAppState } from "../../state/store";
2 | import { availableActions } from "./availableActions";
3 | import { ParsedResponseSuccess, parseResponse } from "./parseResponse";
4 | import { QueryResult } from "../vision-agent/determineNextAction";
5 | import errorChecker from "../errorChecker";
6 | import { fetchResponseFromModel } from "../aiSdkUtils";
7 |
8 | type Action = NonNullable["action"];
9 |
10 | const formattedActions = availableActions
11 | .map((action, i) => {
12 | const args = action.args
13 | .map((arg) => `${arg.name}: ${arg.type}`)
14 | .join(", ");
15 | return `${i + 1}. ${action.name}(${args}): ${action.description}`;
16 | })
17 | .join("\n");
18 |
19 | const systemMessage = `
20 | You are a browser automation assistant.
21 |
22 | You can use the following tools:
23 |
24 | ${formattedActions}
25 |
26 | You will be given a task to perform and the current state of the DOM.
27 | You will also be given previous actions that you have taken. You may retry a failed action up to one time.
28 |
29 | There are two examples of actions:
30 |
31 | Example 1:
32 | {
33 | thought: "I am clicking the add to cart button",
34 | action: "click(223)"
35 | }
36 |
37 | Example 2:
38 | {
39 | thought: "I am typing 'fish food' into the search bar",
40 | action: "setValue(123, 'fish food')"
41 | }
42 |
43 | Example 3:
44 | {
45 | thought: "I continue to scroll down to find the section",
46 | action: "scroll('down')"
47 | }
48 |
49 | Your response must always be in JSON format and must include "thought" and "action".
50 | When finish, use "finish()" in "action" and include a brief summary of the task in "thought".
51 | `;
52 |
53 | export async function determineNextAction(
54 | taskInstructions: string,
55 | previousActions: Action[],
56 | simplifiedDOM: string,
57 | maxAttempts = 3,
58 | notifyError?: (error: string) => void,
59 | ): Promise {
60 | const model = useAppState.getState().settings.selectedModel;
61 | const prompt = formatPrompt(taskInstructions, previousActions, simplifiedDOM);
62 |
63 | for (let i = 0; i < maxAttempts; i++) {
64 | try {
65 | const completion = await fetchResponseFromModel(model, {
66 | systemMessage,
67 | prompt,
68 | jsonMode: true,
69 | });
70 |
71 | const rawResponse = completion.rawResponse;
72 |
73 | try {
74 | const parsed = await parseResponse(rawResponse);
75 | if ("error" in parsed) {
76 | throw new Error(parsed.error);
77 | }
78 | return {
79 | usage: completion.usage,
80 | prompt,
81 | rawResponse,
82 | // TODO: refactor dom agent so we don't need this
83 | action: visionActionAdapter(parsed),
84 | };
85 | } catch (e) {
86 | console.error("Failed to parse response", e);
87 | }
88 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
89 | } catch (error: any) {
90 | if (error instanceof Error) {
91 | const recoverable = errorChecker(error, notifyError);
92 | if (!recoverable) {
93 | throw error;
94 | }
95 | } else {
96 | console.error("Unexpected determineNextAction error:");
97 | console.error(error);
98 | }
99 | }
100 | }
101 | const errMsg = `Failed to complete query after ${maxAttempts} attempts. Please try again later.`;
102 | if (notifyError) {
103 | notifyError(errMsg);
104 | }
105 | throw new Error(errMsg);
106 | }
107 |
108 | export function formatPrompt(
109 | taskInstructions: string,
110 | previousActions: Action[],
111 | pageContents?: string,
112 | ) {
113 | let previousActionsString = "";
114 |
115 | if (previousActions.length > 0) {
116 | const serializedActions = previousActions
117 | .map(
118 | (action) =>
119 | `Thought: ${action.thought}\nAction:${JSON.stringify(
120 | action.operation,
121 | )}`,
122 | )
123 | .join("\n\n");
124 | previousActionsString = `You have already taken the following actions: \n${serializedActions}\n\n`;
125 | }
126 |
127 | let result = `The user requests the following task:
128 |
129 | ${taskInstructions}
130 |
131 | ${previousActionsString}
132 |
133 | Current time: ${new Date().toLocaleString()}
134 | `;
135 | if (pageContents) {
136 | result += `
137 | Current page contents:
138 | ${pageContents}`;
139 | }
140 | return result;
141 | }
142 |
143 | // make action compatible with vision agent
144 | // TODO: refactor dom agent so we don't need this
145 | function visionActionAdapter(action: ParsedResponseSuccess): Action {
146 | const args = { ...action.parsedAction.args, uid: "" };
147 | if ("elementId" in args) {
148 | args.uid = args.elementId;
149 | }
150 | return {
151 | thought: action.thought,
152 | operation: {
153 | name: action.parsedAction.name,
154 | args,
155 | } as Action["operation"],
156 | };
157 | }
158 |
--------------------------------------------------------------------------------
/src/common/TaskUI.tsx:
--------------------------------------------------------------------------------
1 | import React, { useCallback } from "react";
2 | import {
3 | Button,
4 | Box,
5 | HStack,
6 | Spacer,
7 | useToast,
8 | Alert,
9 | AlertIcon,
10 | AlertDescription,
11 | } from "@chakra-ui/react";
12 | import { debugMode } from "../constants";
13 | import { useAppState } from "../state/store";
14 | import RunTaskButton from "./RunTaskButton";
15 | import VoiceButton from "./VoiceButton";
16 | import TaskHistory from "./TaskHistory";
17 | import TaskStatus from "./TaskStatus";
18 | import RecommendedTasks from "./RecommendedTasks";
19 | import AutosizeTextarea from "./AutosizeTextarea";
20 |
21 | const injectContentScript = async () => {
22 | const [tab] = await chrome.tabs.query({ currentWindow: true, active: true });
23 | if (!tab || !tab.id) {
24 | return;
25 | }
26 |
27 | await chrome.scripting.executeScript({
28 | target: { tabId: tab.id },
29 | files: ["src/pages/contentInjected/index.js"],
30 | world: "MAIN",
31 | });
32 | };
33 |
34 | function ActionExecutor() {
35 | const state = useAppState((state) => ({
36 | attachDebugger: state.currentTask.actions.attachDebugger,
37 | detachDegugger: state.currentTask.actions.detachDebugger,
38 | performActionString: state.currentTask.actions.performActionString,
39 | prepareLabels: state.currentTask.actions.prepareLabels,
40 | showImagePrompt: state.currentTask.actions.showImagePrompt,
41 | }));
42 | return (
43 |
44 |
53 |
54 |
55 |
56 |
63 |
64 |
65 | );
66 | }
67 |
68 | const TaskUI = () => {
69 | const state = useAppState((state) => ({
70 | taskHistory: state.currentTask.history,
71 | taskStatus: state.currentTask.status,
72 | runTask: state.currentTask.actions.runTask,
73 | instructions: state.ui.instructions,
74 | setInstructions: state.ui.actions.setInstructions,
75 | voiceMode: state.settings.voiceMode,
76 | isListening: state.currentTask.isListening,
77 | }));
78 | const taskInProgress = state.taskStatus === "running";
79 |
80 | const toast = useToast();
81 |
82 | const toastError = useCallback(
83 | (message: string) => {
84 | toast({
85 | title: "Error",
86 | description: message,
87 | status: "error",
88 | duration: 5000,
89 | isClosable: true,
90 | });
91 | },
92 | [toast],
93 | );
94 |
95 | const runTask = useCallback(() => {
96 | state.instructions && state.runTask(toastError);
97 | }, [state, toastError]);
98 |
99 | const runTaskWithNewInstructions = (newInstructions: string = "") => {
100 | if (!newInstructions) {
101 | return;
102 | }
103 | state.setInstructions(newInstructions);
104 | state.runTask(toastError);
105 | };
106 |
107 | const onKeyDown = (e: React.KeyboardEvent) => {
108 | if (e.key === "Enter" && e.shiftKey) {
109 | e.preventDefault();
110 | runTask();
111 | }
112 | };
113 |
114 | return (
115 | <>
116 | state.setInstructions(e.target.value)}
123 | mb={2}
124 | onKeyDown={onKeyDown}
125 | />
126 |
127 |
128 | {state.voiceMode && (
129 |
133 | )}
134 |
135 |
136 | {state.voiceMode && (
137 |
138 |
139 |
140 | In Voice Mode, you can press Space to start speaking and Space again
141 | to stop. Fuji will run the task when you stop speaking. To turn off
142 | Voice Mode, click the Setting icon in the top right corner.
143 |
144 |
145 | )}
146 | {!state.voiceMode && !state.instructions && (
147 |
148 | )}
149 | {debugMode && }
150 |
151 |
152 | >
153 | );
154 | };
155 |
156 | export default TaskUI;
157 |
--------------------------------------------------------------------------------
/src/helpers/dom-agent/parseResponse.ts:
--------------------------------------------------------------------------------
1 | import { ActionPayload, availableActions } from "./availableActions";
2 |
3 | export type ParsedResponseSuccess = {
4 | thought: string;
5 | action: string;
6 | parsedAction: ActionPayload;
7 | };
8 |
9 | export type ParsedResponse =
10 | | ParsedResponseSuccess
11 | | {
12 | error: string;
13 | };
14 |
15 | // sometimes AI replies with a JSON wrapped in triple backticks
16 | export function extractJsonFromMarkdown(input: string): string[] {
17 | // Create a regular expression to capture code wrapped in triple backticks
18 | const regex = /```(json)?\s*([\s\S]*?)\s*```/g;
19 |
20 | const results = [];
21 | let match;
22 | while ((match = regex.exec(input)) !== null) {
23 | // If 'json' is specified, add the content to the results array
24 | if (match[1] === "json") {
25 | results.push(match[2]);
26 | } else if (match[2].startsWith("{")) {
27 | results.push(match[2]);
28 | }
29 | }
30 | return results;
31 | }
32 |
33 | function parseFunctionCall(callString: string) {
34 | // First, match the function name and the arguments part
35 | const functionPattern = /(\w+)\(([\s\S]*)\)/;
36 | const matches = callString.match(functionPattern);
37 |
38 | if (!matches) {
39 | console.error("Input does not match a function call pattern.", callString);
40 | throw new Error("Input does not match a function call pattern.");
41 | }
42 |
43 | const [, name, argsPart] = matches;
44 |
45 | // Then, match the arguments inside the args part
46 | // This pattern looks for either strings (handling escaped quotes) or numbers as arguments
47 | const argsPattern = /(["'])(?:(?=(\\?))\2[\s\S])*?\1|\d+/g;
48 | const argsMatches = argsPart.match(argsPattern);
49 |
50 | // Process matched arguments to strip quotes and unescape characters
51 | const args = argsMatches
52 | ? argsMatches.map((arg: string) => {
53 | // Remove leading and trailing quotes if they exist and unescape characters
54 | if (
55 | (arg.startsWith(`"`) && arg.endsWith(`"`)) ||
56 | (arg.startsWith(`'`) && arg.endsWith(`'`))
57 | ) {
58 | arg = arg.slice(1, -1);
59 | return arg
60 | .replace(/\\'/g, `'`)
61 | .replace(/\\"/g, `"`)
62 | .replace(/\\\\/g, `\\`);
63 | }
64 | // Parse numbers directly
65 | return JSON.parse(arg);
66 | })
67 | : [];
68 |
69 | return { name, args };
70 | }
71 |
72 | export function parseResponse(text: string): ParsedResponse {
73 | let action;
74 | try {
75 | action = JSON.parse(text);
76 | } catch (_e) {
77 | try {
78 | action = JSON.parse(extractJsonFromMarkdown(text)[0]);
79 | } catch (_e) {
80 | throw new Error("Response does not contain valid JSON.");
81 | }
82 | }
83 |
84 | if (!action.thought) {
85 | return {
86 | error: "Invalid response: Thought not found in the model response.",
87 | };
88 | }
89 |
90 | if (!action.action) {
91 | return {
92 | error: "Invalid response: Action not found in the model response.",
93 | };
94 | }
95 |
96 | const thought = action.thought;
97 | const actionString = action.action;
98 |
99 | const { name: actionName, args: argsArray } = parseFunctionCall(actionString);
100 | console.log(actionName, argsArray);
101 |
102 | const availableAction = availableActions.find(
103 | (action) => action.name === actionName,
104 | );
105 |
106 | if (!availableAction) {
107 | return {
108 | error: `Invalid action: "${actionName}" is not a valid action.`,
109 | };
110 | }
111 | const parsedArgs: Record = {};
112 |
113 | if (argsArray.length !== availableAction.args.length) {
114 | return {
115 | error: `Invalid number of arguments: Expected ${availableAction.args.length} for action "${actionName}", but got ${argsArray.length}.`,
116 | };
117 | }
118 |
119 | for (let i = 0; i < argsArray.length; i++) {
120 | const arg = argsArray[i];
121 | const expectedArg = availableAction.args[i];
122 |
123 | parsedArgs[expectedArg.name] = arg;
124 |
125 | // TODO: type-parsing is currently disabled because all our args are strings
126 | // if (expectedArg.type === 'number') {
127 | // const numberValue = Number(arg);
128 |
129 | // if (isNaN(numberValue)) {
130 | // return {
131 | // error: `Invalid argument type: Expected a number for argument "${expectedArg.name}", but got "${arg}".`,
132 | // };
133 | // }
134 |
135 | // parsedArgs[expectedArg.name] = numberValue;
136 | // } else if (expectedArg.type === 'string') {
137 | // parsedArgs[expectedArg.name] = arg;
138 | // } else {
139 | // return {
140 | // // @ts-expect-error this is here to make sure we don't forget to update this code if we add a new arg type
141 | // error: `Invalid argument type: Unknown type "${expectedArg.type}" for argument "${expectedArg.name}".`,
142 | // };
143 | // }
144 | }
145 |
146 | const parsedAction = {
147 | name: availableAction.name,
148 | args: parsedArgs,
149 | } as ActionPayload;
150 |
151 | return {
152 | thought,
153 | action: actionString,
154 | parsedAction,
155 | };
156 | }
157 |
--------------------------------------------------------------------------------
/src/common/TaskHistory.tsx:
--------------------------------------------------------------------------------
1 | import { useState } from "react";
2 | import {
3 | Alert,
4 | AlertIcon,
5 | AlertDescription,
6 | VStack,
7 | HStack,
8 | Box,
9 | Accordion,
10 | AccordionItem,
11 | Heading,
12 | AccordionButton,
13 | AccordionPanel,
14 | AccordionIcon,
15 | Icon,
16 | Spacer,
17 | ColorProps,
18 | BackgroundProps,
19 | } from "@chakra-ui/react";
20 | import { TaskHistoryEntry } from "../state/currentTask";
21 | import { BsSortNumericDown, BsSortNumericUp } from "react-icons/bs";
22 | import { useAppState } from "../state/store";
23 | import CopyButton from "./CopyButton";
24 | import Notes from "./CustomKnowledgeBase/Notes";
25 |
26 | function MatchedNotes() {
27 | const knowledge = useAppState((state) => state.currentTask.knowledgeInUse);
28 | const notes = knowledge?.notes;
29 | if (!notes || notes.length === 0) {
30 | return null;
31 | }
32 |
33 | return (
34 |
35 |
36 |
37 |
38 | 0.
39 |
40 |
41 | Found {notes.length} instructions.
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | You can customize instructions in the settings menu.
55 |
56 |
57 |
58 |
59 |
60 | );
61 | }
62 |
63 | type TaskHistoryItemProps = {
64 | index: number;
65 | entry: TaskHistoryEntry;
66 | };
67 |
68 | const CollapsibleComponent = (props: {
69 | title: string;
70 | subtitle?: string;
71 | text: string;
72 | }) => (
73 |
74 |
75 |
76 |
77 | {props.title}
78 |
79 | {props.subtitle && (
80 |
81 | {props.subtitle}
82 |
83 | )}
84 |
85 |
86 |
87 |
88 |
89 | {props.text.split("\n").map((line, index) => (
90 |
91 | {line}
92 |
93 |
94 | ))}
95 |
96 |
97 | );
98 |
99 | const TaskHistoryItem = ({ index, entry }: TaskHistoryItemProps) => {
100 | const itemTitle = entry.action.thought;
101 |
102 | const colors: {
103 | text: ColorProps["textColor"];
104 | bg: BackgroundProps["bgColor"];
105 | } = {
106 | text: undefined,
107 | bg: undefined,
108 | };
109 | if (entry.action.operation.name === "fail") {
110 | colors.text = "red.800";
111 | colors.bg = "red.100";
112 | } else if (entry.action.operation.name === "finish") {
113 | colors.text = "green.800";
114 | colors.bg = "green.100";
115 | }
116 |
117 | return (
118 |
119 |
120 |
121 |
122 | {index + 1}.
123 |
124 |
125 | {itemTitle}
126 |
127 |
128 |
129 |
130 |
131 |
132 | {entry.usage != null && (
133 | <>
134 |
139 |
144 |
148 | >
149 | )}
150 |
151 |
152 |
153 | );
154 | };
155 |
156 | export default function TaskHistory() {
157 | const { taskHistory, taskStatus } = useAppState((state) => ({
158 | taskStatus: state.currentTask.status,
159 | taskHistory: state.currentTask.history,
160 | }));
161 | const [sortNumericDown, setSortNumericDown] = useState(false);
162 | const toggleSort = () => {
163 | setSortNumericDown(!sortNumericDown);
164 | };
165 |
166 | if (taskHistory.length === 0 && taskStatus !== "running") return null;
167 | const historyItems = taskHistory.map((entry, index) => (
168 |
169 | ));
170 | historyItems.unshift();
171 | if (!sortNumericDown) {
172 | historyItems.reverse();
173 | }
174 |
175 | return (
176 |
177 |
178 |
179 | Action History
180 |
181 |
182 |
189 |
190 |
191 |
192 | {historyItems}
193 |
194 |
195 | );
196 | }
197 |
--------------------------------------------------------------------------------
/src/helpers/rpc/performAction.ts:
--------------------------------------------------------------------------------
1 | import { DomActions } from "./domActions";
2 | import {
3 | WEB_WAND_LABEL_ATTRIBUTE_NAME,
4 | VISIBLE_TEXT_ATTRIBUTE_NAME,
5 | } from "../../constants";
6 | import { sleep } from "../utils";
7 | import { type ToolOperation } from "../vision-agent/tools";
8 |
9 | function getSelector(label: string): string {
10 | return `[${WEB_WAND_LABEL_ATTRIBUTE_NAME}="${label}"]`;
11 | }
12 |
13 | function getFallbackSelector(selectorName: string): string {
14 | return `[${VISIBLE_TEXT_ATTRIBUTE_NAME}="${selectorName}"]`;
15 | }
16 |
17 | export async function clickWithSelector(
18 | domActions: DomActions,
19 | selector: string,
20 | ): Promise {
21 | console.log("clickWithSelector", selector);
22 | return await domActions.clickWithSelector({
23 | selector,
24 | });
25 | }
26 |
27 | export async function clickWithElementId(
28 | domActions: DomActions,
29 | elementId: string,
30 | ): Promise {
31 | console.log("clickWithElementId", elementId);
32 | return await domActions.clickWithElementId({
33 | elementId: parseInt(elementId),
34 | });
35 | }
36 |
37 | export async function clickWithLabel(
38 | domActions: DomActions,
39 | label: string,
40 | ): Promise {
41 | console.log("clickWithLabel", label);
42 | let success = false;
43 | try {
44 | success = await domActions.clickWithSelector({
45 | selector: `#${label}`,
46 | });
47 | } catch (e) {
48 | // `#${selectorName}` might not be valid
49 | }
50 | if (success) return true;
51 | success = await domActions.clickWithSelector({
52 | selector: getSelector(label),
53 | });
54 | if (success) return true;
55 | return await domActions.clickWithSelector({
56 | selector: getFallbackSelector(label),
57 | });
58 | }
59 |
60 | export async function setValueWithSelector(
61 | domActions: DomActions,
62 | selector: string,
63 | value: string,
64 | ): Promise {
65 | console.log("setValueWithSelector", selector);
66 | return await domActions.setValueWithSelector({
67 | selector,
68 | value,
69 | });
70 | }
71 |
72 | export async function setValueWithElementId(
73 | domActions: DomActions,
74 | elementId: string,
75 | value: string,
76 | ): Promise {
77 | console.log("setValueWithElementId", elementId);
78 | return await domActions.setValueWithElementId({
79 | elementId: parseInt(elementId),
80 | value,
81 | });
82 | }
83 |
84 | export async function setValueWithLabel(
85 | domActions: DomActions,
86 | label: string,
87 | value: string,
88 | ): Promise {
89 | console.log("setValueWithLabel", label);
90 | let success = false;
91 | try {
92 | success = await domActions.setValueWithSelector({
93 | selector: `#${label}`,
94 | value,
95 | });
96 | } catch (e) {
97 | // `#${selectorName}` might not be valid
98 | }
99 | if (success) return true;
100 | success = await domActions.setValueWithSelector({
101 | selector: getSelector(label),
102 | value,
103 | });
104 | if (success) return true;
105 | return await domActions.setValueWithSelector({
106 | selector: getFallbackSelector(label),
107 | value,
108 | });
109 | }
110 |
111 | export async function scroll(domActions: DomActions, value: string) {
112 | switch (value) {
113 | case "up":
114 | await domActions.scrollUp();
115 | break;
116 | case "down":
117 | await domActions.scrollDown();
118 | break;
119 | case "top":
120 | await domActions.scrollToTop();
121 | break;
122 | case "bottom":
123 | await domActions.scrollToBottom();
124 | break;
125 | default:
126 | console.error("Invalid scroll value", value);
127 | }
128 | }
129 |
130 | function createOperateTool(
131 | click: (domActions: DomActions, label: string) => Promise,
132 | setValue: (
133 | domActions: DomActions,
134 | label: string,
135 | value: string,
136 | ) => Promise,
137 | ): (tabId: number, action: ToolOperation) => Promise {
138 | return async (tabId: number, action: ToolOperation) => {
139 | const domActions = new DomActions(tabId);
140 | console.log("operateTool", action);
141 | switch (action.name) {
142 | case "scroll":
143 | await scroll(domActions, action.args.value);
144 | break;
145 | case "wait":
146 | await sleep(3000);
147 | break;
148 | case "finish":
149 | console.log("Action finished successfully.");
150 | break;
151 | case "fail":
152 | console.warn("Action failed.");
153 | break;
154 | case "navigate":
155 | console.log("Navigate to new page", action.args.url);
156 | window.open(action.args.url, "_blank");
157 | break;
158 | case "click": {
159 | const success = await click(domActions, action.args.uid);
160 | if (!success) {
161 | console.error("Unable to find element with uid: ", action.args.uid);
162 | }
163 | break;
164 | }
165 | case "setValue": {
166 | const success = await setValue(
167 | domActions,
168 | action.args.uid,
169 | action.args.value || "",
170 | );
171 | if (!success) {
172 | console.error("Unable to find element with uid: ", action.args.uid);
173 | }
174 | break;
175 | }
176 | case "setValueAndEnter": {
177 | const success = await setValue(
178 | domActions,
179 | action.args.uid,
180 | (action.args.value || "") + "\n",
181 | );
182 | if (!success) {
183 | console.error("Unable to find element with uid: ", action.args.uid);
184 | }
185 | break;
186 | }
187 | default:
188 | console.error("Invalid action name", action);
189 | }
190 | };
191 | }
192 |
193 | export const operateTool = createOperateTool(clickWithLabel, setValueWithLabel);
194 |
195 | // DOM agent currently use this (using elementId instead of label)
196 | export const operateToolWithSimpliedDom = createOperateTool(
197 | clickWithElementId,
198 | setValueWithElementId,
199 | );
200 |
201 | export const operateToolWithSelector = createOperateTool(
202 | clickWithSelector,
203 | setValueWithSelector,
204 | );
205 |
--------------------------------------------------------------------------------
/src/common/settings/SetAPIKey.tsx:
--------------------------------------------------------------------------------
1 | import {
2 | AbsoluteCenter,
3 | Box,
4 | Button,
5 | Divider,
6 | Input,
7 | VStack,
8 | Text,
9 | Link,
10 | HStack,
11 | FormControl,
12 | FormLabel,
13 | } from "@chakra-ui/react";
14 | import React from "react";
15 | import { useAppState } from "../../state/store";
16 |
17 | type SetAPIKeyProps = {
18 | asInitializerView?: boolean;
19 | initialOpenAIKey?: string;
20 | initialAnthropicKey?: string;
21 | initialGeminiKey?: string;
22 | onClose?: () => void;
23 | };
24 |
25 | const SetAPIKey = ({
26 | asInitializerView = false,
27 | initialOpenAIKey = "",
28 | initialAnthropicKey = "",
29 | initialGeminiKey = "",
30 | onClose,
31 | }: SetAPIKeyProps) => {
32 | const { updateSettings, initialOpenAIBaseUrl, initialAnthropicBaseUrl } =
33 | useAppState((state) => ({
34 | initialOpenAIBaseUrl: state.settings.openAIBaseUrl,
35 | initialAnthropicBaseUrl: state.settings.anthropicBaseUrl,
36 | updateSettings: state.settings.actions.update,
37 | }));
38 |
39 | const [openAIKey, setOpenAIKey] = React.useState(initialOpenAIKey || "");
40 | const [anthropicKey, setAnthropicKey] = React.useState(
41 | initialAnthropicKey || "",
42 | );
43 | const [geminiKey, setGeminiKey] = React.useState(initialGeminiKey || "");
44 | const [openAIBaseUrl, setOpenAIBaseUrl] = React.useState(
45 | initialOpenAIBaseUrl || "",
46 | );
47 | const [anthropicBaseUrl, setAnthropicBaseUrl] = React.useState(
48 | initialAnthropicBaseUrl || "",
49 | );
50 |
51 | const [showPassword, setShowPassword] = React.useState(false);
52 |
53 | const onSave = () => {
54 | updateSettings({
55 | openAIKey,
56 | openAIBaseUrl,
57 | anthropicKey,
58 | anthropicBaseUrl,
59 | geminiKey,
60 | });
61 | onClose && onClose();
62 | };
63 |
64 | return (
65 |
66 |
67 | You’ll need an OpenAI or Anthropic API Key to run the Fuji in
68 | developer mode. If you don’t already have one available, you can
69 | create one in your{" "}
70 |
75 | OpenAI account
76 | {" "}
77 | or your{" "}
78 |
83 | Anthropic account
84 |
85 | .
86 |
87 |
88 | Fuji stores your API keys locally on your device, and they are only used
89 | to communicate with the OpenAI API and/or the Anthropic API.
90 |
91 |
92 |
93 |
94 | OpenAI
95 |
96 |
97 |
98 | OpenAI API Key
99 |
100 | setOpenAIKey(event.target.value)}
104 | type={showPassword ? "text" : "password"}
105 | />
106 | {asInitializerView && (
107 |
113 | )}
114 |
115 |
116 | {!asInitializerView && (
117 |
118 | Base Url (optional)
119 | setOpenAIBaseUrl(event.target.value)}
123 | type="text"
124 | />
125 |
126 | )}
127 |
128 |
129 |
130 |
131 | Anthropic
132 |
133 |
134 |
135 | Anthropic API Key
136 |
137 | setAnthropicKey(event.target.value)}
141 | type={showPassword ? "text" : "password"}
142 | />
143 | {asInitializerView && (
144 |
150 | )}
151 |
152 |
153 | {!asInitializerView && (
154 |
155 | Base Url (optional)
156 | setAnthropicBaseUrl(event.target.value)}
160 | type="text"
161 | />
162 |
163 | )}
164 |
165 |
166 |
167 |
168 | Gemini (Google)
169 |
170 |
171 |
172 | Gemini API Key
173 |
174 | setGeminiKey(event.target.value)}
178 | type={showPassword ? "text" : "password"}
179 | />
180 | {asInitializerView && (
181 |
187 | )}
188 |
189 |
190 |
191 |
199 |
200 | );
201 | };
202 |
203 | export default SetAPIKey;
204 |
--------------------------------------------------------------------------------