├── icon.png
├── tsconfig.json
├── eslint.config.mjs
├── tsconfig.build.json
├── src
    ├── tools
    │   ├── index.ts
    │   └── computer.ts
    ├── utils
    │   └── response.ts
    ├── index.ts
    ├── xdotoolStringToKeys.test.ts
    ├── main.ts
    ├── xdotoolStringToKeys.ts
    └── e2e.test.ts
├── LICENSE
├── manifest.json
├── .github
    └── workflows
    │   ├── auto-dependabot.yaml
    │   └── ci.yaml
├── server.json
├── package.json
├── .gitignore
└── README.md


/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/domdomegg/computer-use-mcp/HEAD/icon.png


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": ["@tsconfig/node-lts/tsconfig.json", "tsconfig-domdomegg/tsconfig.json"],
3 |   "include": [
4 |     "src/**/*"
5 |   ],
6 | }
7 | 


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import domdomegg from 'eslint-config-domdomegg';
2 | 
3 | /** @type {import('@typescript-eslint/utils').TSESLint.FlatConfig.ConfigFile} */
4 | export default [
5 | 	...domdomegg,
6 | ];
7 | 


--------------------------------------------------------------------------------
/tsconfig.build.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "./tsconfig.json",
 3 |   "compilerOptions": {
 4 |     "outDir": "dist",
 5 |     "declaration": true,
 6 |   },
 7 |   "exclude": [
 8 |     "**/*.test.ts",
 9 |   ],
10 | }
11 | 


--------------------------------------------------------------------------------
/src/tools/index.ts:
--------------------------------------------------------------------------------
1 | import type {McpServer} from '@modelcontextprotocol/sdk/server/mcp.js';
2 | import {registerComputer} from './computer.js';
3 | 
4 | export function registerAll(server: McpServer): void {
5 | 	registerComputer(server);
6 | }
7 | 


--------------------------------------------------------------------------------
/src/utils/response.ts:
--------------------------------------------------------------------------------
1 | import type {CallToolResult} from '@modelcontextprotocol/sdk/types.js';
2 | 
3 | export function jsonResult<T extends Record<string, unknown>>(data: T): CallToolResult & {structuredContent: T} {
4 | 	return {
5 | 		content: [{type: 'text', text: JSON.stringify(data, null, 2)}],
6 | 		structuredContent: data,
7 | 	};
8 | }
9 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | // Library exports for programmatic usage
 2 | import {McpServer} from '@modelcontextprotocol/sdk/server/mcp.js';
 3 | import {registerAll} from './tools/index.js';
 4 | 
 5 | export function createServer(): McpServer {
 6 | 	const server = new McpServer({
 7 | 		name: 'computer-use-mcp',
 8 | 		version: '1.0.0',
 9 | 	});
10 | 
11 | 	registerAll(server);
12 | 
13 | 	return server;
14 | }
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Adam Jones (domdomegg)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": "0.3",
 3 |   "name": "computer-use-mcp",
 4 |   "display_name": "Computer Use",
 5 |   "version": "{{VERSION}}",
 6 |   "description": "💻 MCP server for Claude to control your computer. Implements computer use capabilities similar to Anthropic's official computer use guide.",
 7 |   "icon": "icon.png",
 8 |   "author": {
 9 |     "name": "Adam Jones (domdomegg)",
10 |     "url": "https://github.com/domdomegg"
11 |   },
12 |   "repository": {
13 |     "type": "git",
14 |     "url": "https://github.com/domdomegg/computer-use-mcp"
15 |   },
16 |   "server": {
17 |     "type": "node",
18 |     "entry_point": "dist/main.js",
19 |     "mcp_config": {
20 |       "command": "node",
21 |       "args": ["${__dirname}/dist/main.js"]
22 |     }
23 |   },
24 |   "tools": [
25 |     {
26 |       "name": "computer",
27 |       "description": "Use a computer to complete tasks. This tool can take screenshots, click, type, scroll, and more."
28 |     }
29 |   ],
30 |   "compatibility": {
31 |     "claude_desktop": ">=0.10.0",
32 |     "platforms": ["darwin", "win32", "linux"],
33 |     "runtimes": {
34 |       "node": ">=16.0.0"
35 |     }
36 |   }
37 | }


--------------------------------------------------------------------------------
/.github/workflows/auto-dependabot.yaml:
--------------------------------------------------------------------------------
 1 | # This file is centrally managed
 2 | # https://github.com/domdomegg/domdomegg/blob/master/file-sync/auto-dependabot.yaml
 3 | 
 4 | name: Dependabot automation
 5 | 
 6 | on:
 7 |   pull_request:
 8 |     types:
 9 |       - opened
10 |       - reopened
11 |       - synchronize
12 |       - edited
13 |       - ready_for_review
14 |       - unlabeled
15 | 
16 | permissions:
17 |   pull-requests: write
18 |   contents: write
19 | 
20 | jobs:
21 |   dependabot_automation:
22 |     runs-on: ubuntu-latest
23 |     timeout-minutes: 10
24 |     if: ${{ github.actor == 'dependabot[bot]' && github.event.pull_request.head.repo.full_name == github.repository}}
25 |     steps:
26 |       - name: Approve
27 |         run: gh pr review --approve "$PR_URL"
28 |         env:
29 |           PR_URL: ${{github.event.pull_request.html_url}}
30 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
31 |       - name: Enable auto-merge
32 |         if: ${{ !contains(github.event.pull_request.labels.*.name, 'do not merge') }}
33 |         run: gh pr merge --auto --squash "$PR_URL"
34 |         env:
35 |           PR_URL: ${{github.event.pull_request.html_url}}
36 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
37 | 


--------------------------------------------------------------------------------
/server.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://static.modelcontextprotocol.io/schemas/2025-10-17/server.schema.json",
 3 |   "name": "io.github.domdomegg/computer-use-mcp",
 4 |   "title": "Computer Use",
 5 |   "description": "Control your computer with screenshots, mouse, and keyboard automation.",
 6 |   "websiteUrl": "https://github.com/domdomegg/computer-use-mcp#readme",
 7 |   "icons": [{
 8 |     "mimeType": "image/png",
 9 |     "src": "https://raw.githubusercontent.com/domdomegg/computer-use-mcp/refs/heads/master/icon.png"
10 |   }],
11 |   "repository": {
12 |     "url": "https://github.com/domdomegg/computer-use-mcp.git",
13 |     "source": "github"
14 |   },
15 |   "version": "{{VERSION}}",
16 |   "packages": [
17 |     {
18 |       "registryType": "npm",
19 |       "identifier": "computer-use-mcp",
20 |       "version": "{{VERSION}}",
21 |       "runtimeHint": "npx",
22 |       "environmentVariables": [],
23 |       "transport": {
24 |         "type": "stdio"
25 |       }
26 |     },
27 |     {
28 |       "registryType": "mcpb",
29 |       "identifier": "https://github.com/domdomegg/computer-use-mcp/releases/download/v{{VERSION}}/computer-use-mcp.mcpb",
30 |       "version": "{{VERSION}}",
31 |       "fileSha256": "{{MCPB_FILE_SHA256}}",
32 |       "transport": {
33 |         "type": "stdio"
34 |       }
35 |     }
36 |   ]
37 | }
38 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "computer-use-mcp",
 3 |   "version": "1.5.0",
 4 |   "description": "💻 MCP server for Claude to control your computer",
 5 |   "license": "MIT",
 6 |   "author": "Adam Jones (domdomegg)",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": "https://github.com/domdomegg/computer-use-mcp.git"
10 |   },
11 |   "type": "module",
12 |   "main": "dist/main.js",
13 |   "types": "dist/index.d.ts",
14 |   "bin": "dist/main.js",
15 |   "files": [
16 |     "dist"
17 |   ],
18 |   "mcpName": "io.github.domdomegg/computer-use-mcp",
19 |   "scripts": {
20 |     "start": "npm run build && node dist/main.js",
21 |     "start:http": "npm run build && MCP_TRANSPORT=http node dist/main.js",
22 |     "test": "vitest run",
23 |     "test:watch": "vitest --watch",
24 |     "test:e2e": "vitest run --config vitest.e2e.config.ts",
25 |     "lint": "eslint",
26 |     "clean": "rm -rf dist",
27 |     "build": "tsc --project tsconfig.build.json",
28 |     "build:mcpb": "./build-mcpb.sh",
29 |     "prepublishOnly": "npm run clean && npm run build"
30 |   },
31 |   "dependencies": {
32 |     "@modelcontextprotocol/sdk": "^1.24.3",
33 |     "@nut-tree-fork/nut-js": "^4.2.6",
34 |     "express": "^5.1.0",
35 |     "sharp": "^0.34.5",
36 |     "zod": "^4.1.13"
37 |   },
38 |   "devDependencies": {
39 |     "@tsconfig/node-lts": "^24.0.0",
40 |     "@types/express": "^5.0.6",
41 |     "@types/node": "^24.10.1",
42 |     "eslint": "^9.39.1",
43 |     "eslint-config-domdomegg": "^2.0.9",
44 |     "tsconfig-domdomegg": "^1.0.0",
45 |     "typescript": "^5.9.3",
46 |     "vitest": "^4.0.15"
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/xdotoolStringToKeys.test.ts:
--------------------------------------------------------------------------------
 1 | import {describe, it, expect} from 'vitest';
 2 | import {Key} from '@nut-tree-fork/nut-js';
 3 | import {toKeys, InvalidKeyError} from './xdotoolStringToKeys.js';
 4 | 
 5 | describe('toKeys', () => {
 6 | 	it('should convert single keys', () => {
 7 | 		expect(toKeys('a')).toEqual([Key.A]);
 8 | 		expect(toKeys('Return')).toEqual([Key.Return]);
 9 | 		expect(toKeys('space')).toEqual([Key.Space]);
10 | 	});
11 | 
12 | 	it('should convert key combinations', () => {
13 | 		expect(toKeys('Control_L+a')).toEqual([Key.LeftControl, Key.A]);
14 | 		expect(toKeys('Shift_L+Return')).toEqual([Key.LeftShift, Key.Return]);
15 | 		expect(toKeys('Alt_L+Tab')).toEqual([Key.LeftAlt, Key.Tab]);
16 | 		expect(toKeys('Control_L+Alt_L+Delete')).toEqual([Key.LeftControl, Key.LeftAlt, Key.Delete]);
17 | 	});
18 | 
19 | 	it('should handle function keys', () => {
20 | 		expect(toKeys('F1')).toEqual([Key.F1]);
21 | 		expect(toKeys('F12')).toEqual([Key.F12]);
22 | 		expect(toKeys('Control_L+F5')).toEqual([Key.LeftControl, Key.F5]);
23 | 	});
24 | 
25 | 	it('should handle navigation keys', () => {
26 | 		expect(toKeys('Home')).toEqual([Key.Home]);
27 | 		expect(toKeys('Left')).toEqual([Key.Left]);
28 | 		expect(toKeys('Page_Up')).toEqual([Key.PageUp]);
29 | 		expect(toKeys('Prior')).toEqual([Key.PageUp]); // Prior is an alias for Page_Up
30 | 	});
31 | 
32 | 	it('should handle keypad keys', () => {
33 | 		expect(toKeys('KP_0')).toEqual([Key.NumPad0]);
34 | 		expect(toKeys('KP_Add')).toEqual([Key.Add]);
35 | 		expect(toKeys('Num_Lock')).toEqual([Key.NumLock]);
36 | 	});
37 | 
38 | 	it('should handle case insensitivity', () => {
39 | 		expect(toKeys('RETURN')).toEqual([Key.Return]);
40 | 		expect(toKeys('Return')).toEqual([Key.Return]);
41 | 		expect(toKeys('return')).toEqual([Key.Return]);
42 | 		expect(toKeys('CONTROL_L+A')).toEqual([Key.LeftControl, Key.A]);
43 | 	});
44 | 
45 | 	it('should handle whitespace', () => {
46 | 		expect(toKeys('Control_L + a')).toEqual([Key.LeftControl, Key.A]);
47 | 		expect(toKeys(' Return ')).toEqual([Key.Return]);
48 | 		expect(toKeys('Control_L + Alt_L + Delete')).toEqual([Key.LeftControl, Key.LeftAlt, Key.Delete]);
49 | 	});
50 | 
51 | 	it('should throw InvalidKeyError for invalid keys', () => {
52 | 		expect(() => toKeys('')).toThrow(InvalidKeyError);
53 | 		expect(() => toKeys('invalid')).toThrow(InvalidKeyError);
54 | 		expect(() => toKeys('Control_L+invalid')).toThrow(InvalidKeyError);
55 | 		expect(() => toKeys('kp_enter')).toThrow(InvalidKeyError);
56 | 	});
57 | });
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | .pnpm-debug.log*
  9 | 
 10 | # Diagnostic reports (https://nodejs.org/api/report.html)
 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 12 | 
 13 | # Runtime data
 14 | pids
 15 | *.pid
 16 | *.seed
 17 | *.pid.lock
 18 | 
 19 | # Directory for instrumented libs generated by jscoverage/JSCover
 20 | lib-cov
 21 | 
 22 | # Coverage directory used by tools like istanbul
 23 | coverage
 24 | *.lcov
 25 | 
 26 | # nyc test coverage
 27 | .nyc_output
 28 | 
 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 30 | .grunt
 31 | 
 32 | # Bower dependency directory (https://bower.io/)
 33 | bower_components
 34 | 
 35 | # node-waf configuration
 36 | .lock-wscript
 37 | 
 38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 39 | build/Release
 40 | 
 41 | # Dependency directories
 42 | node_modules/
 43 | jspm_packages/
 44 | 
 45 | # Snowpack dependency directory (https://snowpack.dev/)
 46 | web_modules/
 47 | 
 48 | # TypeScript cache
 49 | *.tsbuildinfo
 50 | 
 51 | # Optional npm cache directory
 52 | .npm
 53 | 
 54 | # Optional eslint cache
 55 | .eslintcache
 56 | 
 57 | # Microbundle cache
 58 | .rpt2_cache/
 59 | .rts2_cache_cjs/
 60 | .rts2_cache_es/
 61 | .rts2_cache_umd/
 62 | 
 63 | # Optional REPL history
 64 | .node_repl_history
 65 | 
 66 | # Yarn Integrity file
 67 | .yarn-integrity
 68 | 
 69 | # dotenv environment variables file
 70 | .env
 71 | .env.test
 72 | .env.production
 73 | 
 74 | # parcel-bundler cache (https://parceljs.org/)
 75 | .cache
 76 | .parcel-cache
 77 | 
 78 | # Next.js build output
 79 | .next
 80 | out
 81 | 
 82 | # Nuxt.js build / generate output
 83 | .nuxt
 84 | dist
 85 | 
 86 | # Gatsby files
 87 | .cache/
 88 | # Comment in the public line in if your project uses Gatsby and not Next.js
 89 | # https://nextjs.org/blog/next-9-1#public-directory-support
 90 | # public
 91 | 
 92 | # vuepress build output
 93 | .vuepress/dist
 94 | 
 95 | # Serverless directories
 96 | .serverless/
 97 | 
 98 | # FuseBox cache
 99 | .fusebox/
100 | 
101 | # DynamoDB Local files
102 | .dynamodb/
103 | 
104 | # TernJS port file
105 | .tern-port
106 | 
107 | # Stores VSCode versions used for testing VSCode extensions
108 | .vscode-test
109 | 
110 | # yarn v2
111 | .yarn/cache
112 | .yarn/unplugged
113 | .yarn/build-state.yml
114 | .yarn/install-state.gz
115 | .pnp.*
116 | 
117 | # built artifacts
118 | build/
119 | dist/
120 | computer-use-mcp.dxt
121 | 


--------------------------------------------------------------------------------
/src/main.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | import {execSync} from 'node:child_process';
 3 | import {dirname} from 'node:path';
 4 | import {fileURLToPath} from 'node:url';
 5 | 
 6 | // Clear macOS quarantine attributes from native binaries before importing them
 7 | // This is needed for MCPB packages downloaded from the internet
 8 | if (process.platform === 'darwin') {
 9 | 	try {
10 | 		const projectRoot = dirname(dirname(fileURLToPath(import.meta.url)));
11 | 		execSync(`xattr -cr "${projectRoot}/node_modules"`, {stdio: 'ignore'});
12 | 	} catch {
13 | 		// Ignore errors - xattr may not exist or may fail on some files
14 | 	}
15 | }
16 | 
17 | import {StdioServerTransport} from '@modelcontextprotocol/sdk/server/stdio.js';
18 | import {StreamableHTTPServerTransport} from '@modelcontextprotocol/sdk/server/streamableHttp.js';
19 | import express from 'express';
20 | import {createServer} from './index.js';
21 | 
22 | function setupSignalHandlers(cleanup: () => Promise<void>): void {
23 | 	process.on('SIGINT', async () => {
24 | 		await cleanup();
25 | 		process.exit(0);
26 | 	});
27 | 	process.on('SIGTERM', async () => {
28 | 		await cleanup();
29 | 		process.exit(0);
30 | 	});
31 | }
32 | 
33 | (async () => {
34 | 	const transport = process.env.MCP_TRANSPORT || 'stdio';
35 | 
36 | 	if (transport === 'stdio') {
37 | 		const server = createServer();
38 | 		setupSignalHandlers(async () => server.close());
39 | 
40 | 		const stdioTransport = new StdioServerTransport();
41 | 		await server.connect(stdioTransport);
42 | 		console.error('Computer Use MCP server running on stdio');
43 | 	} else if (transport === 'http') {
44 | 		const app = express();
45 | 		app.use(express.json());
46 | 
47 | 		const httpTransport = new StreamableHTTPServerTransport({
48 | 			sessionIdGenerator: undefined,
49 | 			enableJsonResponse: true,
50 | 		});
51 | 
52 | 		app.post('/mcp', async (req, res) => {
53 | 			await httpTransport.handleRequest(req, res, req.body);
54 | 		});
55 | 
56 | 		const server = createServer();
57 | 		await server.connect(httpTransport);
58 | 
59 | 		const port = parseInt(process.env.PORT || '3000', 10);
60 | 		const httpServer = app.listen(port, () => {
61 | 			console.error(`Computer Use MCP server running on http://localhost:${port}/mcp`);
62 | 			console.error('WARNING: HTTP transport has no authentication. Only use behind a reverse proxy or in a secured setup.');
63 | 		});
64 | 
65 | 		setupSignalHandlers(async () => {
66 | 			await server.close();
67 | 			httpServer.close();
68 | 		});
69 | 	} else {
70 | 		console.error(`Unknown transport: ${transport}. Use MCP_TRANSPORT=stdio or MCP_TRANSPORT=http`);
71 | 		process.exit(1);
72 | 	}
73 | })();
74 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
  1 | name: CI/CD
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [master]
  6 |     tags:
  7 |       - 'v*'
  8 |   pull_request:
  9 | 
 10 | jobs:
 11 |   ci:
 12 |     runs-on: ubuntu-latest
 13 |     timeout-minutes: 10
 14 |     strategy:
 15 |       matrix:
 16 |         node-version: [lts/*, current]
 17 |     env:
 18 |       CI: true
 19 |     steps:
 20 |       - name: Checkout ${{ github.sha }}
 21 |         uses: actions/checkout@v4
 22 |       - name: Use Node.js ${{ matrix.node-version }}
 23 |         uses: actions/setup-node@v4
 24 |         with:
 25 |           node-version: ${{ matrix.node-version }}
 26 |           registry-url: https://registry.npmjs.org/
 27 |       - name: Install dependencies
 28 |         run: npm ci
 29 |       - name: Lint
 30 |         run: npm run lint --if-present
 31 |       - name: Build
 32 |         run: npm run build --if-present
 33 |       - name: Test
 34 |         run: npm run test --if-present
 35 |       - name: Prepare MCPB artifact
 36 |         if: matrix.node-version == 'lts/*'
 37 |         run: |
 38 |           npm run build:mcpb
 39 |           mkdir -p .github/tmp
 40 |           unzip computer-use-mcp.mcpb -d .github/tmp
 41 |       - name: Upload MCPB artifact
 42 |         if: matrix.node-version == 'lts/*'
 43 |         uses: actions/upload-artifact@v4
 44 |         with:
 45 |           name: computer-use-mcp-mcpb
 46 |           path: .github/tmp/*
 47 | 
 48 |   deploy:
 49 |     if: startsWith(github.ref, 'refs/tags/v') && github.event_name == 'push'
 50 |     needs: ci
 51 |     runs-on: ubuntu-latest
 52 |     timeout-minutes: 15
 53 |     permissions:
 54 |       contents: write
 55 |       id-token: write
 56 |     env:
 57 |       CI: true
 58 |     steps:
 59 |       - name: Checkout ${{ github.sha }}
 60 |         uses: actions/checkout@v4
 61 |       - name: Use Node.js with the npmjs.org registry
 62 |         uses: actions/setup-node@v4
 63 |         with:
 64 |           node-version: lts/*
 65 |           registry-url: https://registry.npmjs.org/
 66 |       - name: Install dependencies
 67 |         run: npm ci
 68 |       - name: Build
 69 |         run: npm run build --if-present
 70 |       - name: Build MCPB
 71 |         run: npm run build:mcpb
 72 |       - name: Update server.json version and SHA256
 73 |         run: |
 74 |           VERSION=$(node -p "require('./package.json').version")
 75 |           MCPB_FILE_SHA256=$(sha256sum computer-use-mcp.mcpb | cut -d' ' -f1)
 76 |           sed "s/{{VERSION}}/$VERSION/g; s/{{MCPB_FILE_SHA256}}/$MCPB_FILE_SHA256/g" server.json > server.json.tmp
 77 |           mv server.json.tmp server.json
 78 |       - uses: google-github-actions/auth@v2
 79 |         with:
 80 |           workload_identity_provider: 'projects/457105351064/locations/global/workloadIdentityPools/github-secrets-pool/providers/github-secrets-github'
 81 |       - uses: google-github-actions/setup-gcloud@v2
 82 |       - name: Get NPM token
 83 |         id: npm-token
 84 |         run: |
 85 |           token=$(gcloud secrets versions access latest --secret=npm-token --project=gcp-github-secrets)
 86 |           echo "::add-mask::$token"
 87 |           echo "token=$token" >> "$GITHUB_OUTPUT"
 88 |       - name: Publish to NPM
 89 |         run: npm publish
 90 |         env:
 91 |           NODE_AUTH_TOKEN: ${{ steps.npm-token.outputs.token }}
 92 |       - name: Create GitHub Release
 93 |         uses: softprops/action-gh-release@v2
 94 |         with:
 95 |           files: computer-use-mcp.mcpb
 96 |           generate_release_notes: true
 97 |         env:
 98 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 99 |       - name: Install MCP Publisher
100 |         run: |
101 |           curl -L "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/').tar.gz" | tar xz
102 |       - name: Login to MCP Registry
103 |         run: ./mcp-publisher login github-oidc
104 |       - name: Publish to MCP Registry
105 |         run: ./mcp-publisher publish
106 | 


--------------------------------------------------------------------------------
/src/xdotoolStringToKeys.ts:
--------------------------------------------------------------------------------
  1 | import {Key} from '@nut-tree-fork/nut-js';
  2 | 
  3 | const keyMap: Record<string, Key> = {
  4 | 	// Function keys
  5 | 	f1: Key.F1,
  6 | 	f2: Key.F2,
  7 | 	f3: Key.F3,
  8 | 	f4: Key.F4,
  9 | 	f5: Key.F5,
 10 | 	f6: Key.F6,
 11 | 	f7: Key.F7,
 12 | 	f8: Key.F8,
 13 | 	f9: Key.F9,
 14 | 	f10: Key.F10,
 15 | 	f11: Key.F11,
 16 | 	f12: Key.F12,
 17 | 	f13: Key.F13,
 18 | 	f14: Key.F14,
 19 | 	f15: Key.F15,
 20 | 	f16: Key.F16,
 21 | 	f17: Key.F17,
 22 | 	f18: Key.F18,
 23 | 	f19: Key.F19,
 24 | 	f20: Key.F20,
 25 | 	f21: Key.F21,
 26 | 	f22: Key.F22,
 27 | 	f23: Key.F23,
 28 | 	f24: Key.F24,
 29 | 
 30 | 	// Navigation
 31 | 	home: Key.Home,
 32 | 	left: Key.Left,
 33 | 	up: Key.Up,
 34 | 	right: Key.Right,
 35 | 	down: Key.Down,
 36 | 	page_up: Key.PageUp,
 37 | 	pageup: Key.PageUp,
 38 | 	prior: Key.PageUp,
 39 | 	page_down: Key.PageDown,
 40 | 	pagedown: Key.PageDown,
 41 | 	next: Key.PageDown,
 42 | 	end: Key.End,
 43 | 
 44 | 	// Editing
 45 | 	return: Key.Return,
 46 | 	enter: Key.Return,
 47 | 	tab: Key.Tab,
 48 | 	space: Key.Space,
 49 | 	backspace: Key.Backspace,
 50 | 	delete: Key.Delete,
 51 | 	del: Key.Delete,
 52 | 	escape: Key.Escape,
 53 | 	esc: Key.Escape,
 54 | 	insert: Key.Insert,
 55 | 	ins: Key.Insert,
 56 | 
 57 | 	// Modifiers
 58 | 	shift_l: Key.LeftShift,
 59 | 	shift_r: Key.RightShift,
 60 | 	l_shift: Key.LeftShift,
 61 | 	r_shift: Key.RightShift,
 62 | 	shift: Key.LeftShift,
 63 | 
 64 | 	control_l: Key.LeftControl,
 65 | 	control_r: Key.RightControl,
 66 | 	l_control: Key.LeftControl,
 67 | 	r_control: Key.RightControl,
 68 | 	control: Key.LeftControl,
 69 | 	ctrl_l: Key.LeftControl,
 70 | 	ctrl_r: Key.RightControl,
 71 | 	l_ctrl: Key.LeftControl,
 72 | 	r_ctrl: Key.RightControl,
 73 | 	ctrl: Key.LeftControl,
 74 | 
 75 | 	alt_l: Key.LeftAlt,
 76 | 	alt_r: Key.RightAlt,
 77 | 	l_alt: Key.LeftAlt,
 78 | 	r_alt: Key.RightAlt,
 79 | 	alt: Key.LeftAlt,
 80 | 
 81 | 	super_l: Key.LeftSuper,
 82 | 	super_r: Key.RightSuper,
 83 | 	l_super: Key.LeftSuper,
 84 | 	r_super: Key.RightSuper,
 85 | 	super: Key.LeftSuper,
 86 | 	win_l: Key.LeftSuper,
 87 | 	win_r: Key.RightSuper,
 88 | 	l_win: Key.LeftSuper,
 89 | 	r_win: Key.RightSuper,
 90 | 	win: Key.LeftSuper,
 91 | 	meta_l: Key.LeftSuper,
 92 | 	meta_r: Key.RightSuper,
 93 | 	l_meta: Key.LeftSuper,
 94 | 	r_meta: Key.RightSuper,
 95 | 	meta: Key.LeftSuper,
 96 | 	command: Key.LeftSuper,
 97 | 	command_l: Key.LeftSuper,
 98 | 	l_command: Key.LeftSuper,
 99 | 	command_r: Key.RightSuper,
100 | 	r_command: Key.RightSuper,
101 | 	cmd: Key.LeftSuper,
102 | 	cmd_l: Key.LeftSuper,
103 | 	l_cmd: Key.LeftSuper,
104 | 	cmd_r: Key.RightSuper,
105 | 	r_cmd: Key.RightSuper,
106 | 
107 | 	caps_lock: Key.CapsLock,
108 | 	capslock: Key.CapsLock,
109 | 	caps: Key.CapsLock,
110 | 
111 | 	// Keypad
112 | 	kp_0: Key.NumPad0,
113 | 	kp_1: Key.NumPad1,
114 | 	kp_2: Key.NumPad2,
115 | 	kp_3: Key.NumPad3,
116 | 	kp_4: Key.NumPad4,
117 | 	kp_5: Key.NumPad5,
118 | 	kp_6: Key.NumPad6,
119 | 	kp_7: Key.NumPad7,
120 | 	kp_8: Key.NumPad8,
121 | 	kp_9: Key.NumPad9,
122 | 	kp_divide: Key.Divide,
123 | 	kp_multiply: Key.Multiply,
124 | 	kp_subtract: Key.Subtract,
125 | 	kp_add: Key.Add,
126 | 	kp_decimal: Key.Decimal,
127 | 	kp_equal: Key.NumPadEqual,
128 | 	num_lock: Key.NumLock,
129 | 	numlock: Key.NumLock,
130 | 
131 | 	// Letters
132 | 	a: Key.A,
133 | 	b: Key.B,
134 | 	c: Key.C,
135 | 	d: Key.D,
136 | 	e: Key.E,
137 | 	f: Key.F,
138 | 	g: Key.G,
139 | 	h: Key.H,
140 | 	i: Key.I,
141 | 	j: Key.J,
142 | 	k: Key.K,
143 | 	l: Key.L,
144 | 	m: Key.M,
145 | 	n: Key.N,
146 | 	o: Key.O,
147 | 	p: Key.P,
148 | 	q: Key.Q,
149 | 	r: Key.R,
150 | 	s: Key.S,
151 | 	t: Key.T,
152 | 	u: Key.U,
153 | 	v: Key.V,
154 | 	w: Key.W,
155 | 	x: Key.X,
156 | 	y: Key.Y,
157 | 	z: Key.Z,
158 | 
159 | 	// Numbers
160 | 	0: Key.Num0,
161 | 	1: Key.Num1,
162 | 	2: Key.Num2,
163 | 	3: Key.Num3,
164 | 	4: Key.Num4,
165 | 	5: Key.Num5,
166 | 	6: Key.Num6,
167 | 	7: Key.Num7,
168 | 	8: Key.Num8,
169 | 	9: Key.Num9,
170 | 
171 | 	// Punctuation
172 | 	minus: Key.Minus,
173 | 	equal: Key.Equal,
174 | 	bracketleft: Key.LeftBracket,
175 | 	bracketright: Key.RightBracket,
176 | 	bracket_l: Key.LeftBracket,
177 | 	bracket_r: Key.RightBracket,
178 | 	l_bracket: Key.LeftBracket,
179 | 	r_bracket: Key.RightBracket,
180 | 	backslash: Key.Backslash,
181 | 	semicolon: Key.Semicolon,
182 | 	semi: Key.Semicolon,
183 | 	quote: Key.Quote,
184 | 	grave: Key.Grave,
185 | 	comma: Key.Comma,
186 | 	period: Key.Period,
187 | 	slash: Key.Slash,
188 | 
189 | 	// Media keys
190 | 	audio_mute: Key.AudioMute,
191 | 	mute: Key.AudioMute,
192 | 	audio_vol_down: Key.AudioVolDown,
193 | 	voldown: Key.AudioVolDown,
194 | 	vol_down: Key.AudioVolDown,
195 | 	audio_vol_up: Key.AudioVolUp,
196 | 	volup: Key.AudioVolUp,
197 | 	vol_up: Key.AudioVolUp,
198 | 	audio_play: Key.AudioPlay,
199 | 	play: Key.AudioPlay,
200 | 	audio_stop: Key.AudioStop,
201 | 	stop: Key.AudioStop,
202 | 	audio_pause: Key.AudioPause,
203 | 	pause: Key.AudioPause,
204 | 	audio_prev: Key.AudioPrev,
205 | 	audio_next: Key.AudioNext,
206 | };
207 | 
208 | export class InvalidKeyError extends Error {
209 | 	constructor(key: string) {
210 | 		super(`Invalid key: ${key}`);
211 | 		this.name = 'InvalidKeyError';
212 | 	}
213 | }
214 | 
215 | export const toKeys = (xdotoolString: string): Key[] => {
216 | 	if (!xdotoolString) {
217 | 		throw new InvalidKeyError('Empty string');
218 | 	}
219 | 
220 | 	return xdotoolString.split('+').map((keyStr) => {
221 | 		const key = keyStr.trim().toLowerCase();
222 | 		const mappedKey = keyMap[key];
223 | 
224 | 		if (mappedKey === undefined) {
225 | 			throw new InvalidKeyError(key);
226 | 		}
227 | 
228 | 		return mappedKey;
229 | 	});
230 | };
231 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # computer-use-mcp
  2 | 
  3 | 💻 An model context protocol server for Claude to control your computer. This is very similar to [computer use](https://docs.anthropic.com/en/docs/build-with-claude/computer-use), but easy to set up and use locally.
  4 | 
  5 | Here's Claude Haiku 4.5 changing my desktop background (4x speed):
  6 | 
  7 | https://github.com/user-attachments/assets/cd0bc190-52c4-49db-b3bc-4b8a74544789
  8 | 
  9 | > [!WARNING]
 10 | > At time of writing, models make frequent mistakes and are vulnerable to prompt injections. As this MCP server gives the model complete control of your computer, this could do a lot of damage. You should therefore treat this like giving a hyperactive toddler access to your computer - you probably want to supervise it closely, and consider only doing this in a sandboxed user account.
 11 | 
 12 | ## Installation
 13 | 
 14 | <details>
 15 | <summary><strong>Claude Code</strong></summary>
 16 | 
 17 | Run:
 18 | 
 19 | ```bash
 20 | claude mcp add --scope user --transport stdio computer-use -- npx -y computer-use-mcp
 21 | ```
 22 | 
 23 | This installs the server at user scope (available in all projects). To install locally (current directory only), omit `--scope user`.
 24 | 
 25 | </details>
 26 | 
 27 | <details>
 28 | <summary><strong>Claude Desktop</strong></summary>
 29 | 
 30 | #### (Recommended) Via manual .dxt installation
 31 | 
 32 | 1. Find the latest dxt build in [the GitHub Actions history](https://github.com/domdomegg/computer-use-mcp/actions/workflows/dxt.yaml?query=branch%3Amaster) (the top one)
 33 | 2. In the 'Artifacts' section, download the `computer-use-mcp-dxt` file
 34 | 3. Rename the `.zip` file to `.dxt`
 35 | 4. Double-click the `.dxt` file to open with Claude Desktop
 36 | 5. Click "Install"
 37 | 
 38 | #### (Advanced) Alternative: Via JSON configuration
 39 | 
 40 | 1. Install [Node.js](https://nodejs.org/en/download)
 41 | 2. Open Claude Desktop and go to Settings → Developer
 42 | 3. Click "Edit Config" to open your `claude_desktop_config.json` file
 43 | 4. Add the following configuration to the "mcpServers" section:
 44 | 
 45 | ```json
 46 | {
 47 |   "mcpServers": {
 48 |     "computer-use": {
 49 |       "command": "npx",
 50 |       "args": [
 51 |         "-y",
 52 |         "computer-use-mcp"
 53 |       ]
 54 |     }
 55 |   }
 56 | }
 57 | ```
 58 | 
 59 | 5. Save the file and restart Claude Desktop
 60 | 
 61 | </details>
 62 | 
 63 | <details>
 64 | <summary><strong>Cursor</strong></summary>
 65 | 
 66 | #### (Recommended) Via one-click install
 67 | 
 68 | 1. Click [![Install MCP Server](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/install-mcp?name=computer-use&config=JTdCJTIyY29tbWFuZCUyMiUzQSUyMm5weCUyMC15JTIwY29tcHV0ZXItdXNlLW1jcCUyMiU3RA%3D%3D)
 69 | 
 70 | #### (Advanced) Alternative: Via JSON configuration
 71 | 
 72 | Create either a global (`~/.cursor/mcp.json`) or project-specific (`.cursor/mcp.json`) configuration file:
 73 | 
 74 | ```json
 75 | {
 76 |   "mcpServers": {
 77 |     "computer-use": {
 78 |       "command": "npx",
 79 |       "args": ["-y", "computer-use-mcp"]
 80 |     }
 81 |   }
 82 | }
 83 | ```
 84 | 
 85 | </details>
 86 | 
 87 | <details>
 88 | <summary><strong>Cline</strong></summary>
 89 | 
 90 | #### (Recommended) Via marketplace
 91 | 
 92 | 1. Click the "MCP Servers" icon in the Cline extension
 93 | 2. Search for "Computer Use" and click "Install"
 94 | 3. Follow the prompts to install the server
 95 | 
 96 | #### (Advanced) Alternative: Via JSON configuration
 97 | 
 98 | 1. Click the "MCP Servers" icon in the Cline extension
 99 | 2. Click on the "Installed" tab, then the "Configure MCP Servers" button at the bottom
100 | 3. Add the following configuration to the "mcpServers" section:
101 | 
102 | ```json
103 | {
104 |   "mcpServers": {
105 |     "computer-use": {
106 |       "type": "stdio",
107 |       "command": "npx",
108 |       "args": ["-y", "computer-use-mcp"]
109 |     }
110 |   }
111 | }
112 | ```
113 | 
114 | </details>
115 | 
116 | ## Tips
117 | 
118 | This should just work out of the box.
119 | 
120 | However, to get best results:
121 | - Use a model good at computer use - I recommend [the latest Claude models](https://platform.claude.com/docs/en/about-claude/models/overview).
122 | - Use a small, common resolution - 720p works particularly well. On macOS, you can use [displayoverride-mac](https://github.com/domdomegg/displayoverride-mac) to do this. If you can't use a different resolution, try zooming in to active windows.
123 | - Install and enable the [Rango browser extension](https://chromewebstore.google.com/detail/rango/lnemjdnjjofijemhdogofbpcedhgcpmb). This enables keyboard navigation for websites, which is far more reliable than Claude trying to click coordinates. You can bump up the font size setting in Rango to make the hints more visible.
124 | 
125 | ## How it works
126 | 
127 | We implement a near identical computer use tool to [Anthropic's official computer use guide](https://docs.anthropic.com/en/docs/build-with-claude/computer-use), with some more nudging to prefer keyboard shortcuts.
128 | 
129 | This talks to your computer using [nut.js](https://github.com/nut-tree/nut.js)
130 | 
131 | ## Contributing
132 | 
133 | Pull requests are welcomed on GitHub! To get started:
134 | 
135 | 1. Install Git and Node.js
136 | 2. Clone the repository
137 | 3. Install dependencies with `npm install`
138 | 4. Run `npm run test` to run tests
139 | 5. Build with `npm run build`
140 | 
141 | ## Releases
142 | 
143 | Versions follow the [semantic versioning spec](https://semver.org/).
144 | 
145 | To release:
146 | 
147 | 1. Use `npm version <major | minor | patch>` to bump the version
148 | 2. Run `git push --follow-tags` to push with tags
149 | 3. Wait for GitHub Actions to publish to the NPM registry.
150 | 


--------------------------------------------------------------------------------
/src/e2e.test.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 | 	describe, test, expect, beforeEach, afterEach,
  3 | } from 'vitest';
  4 | import type {
  5 | 	JSONRPCMessage,
  6 | 	JSONRPCRequest,
  7 | 	JSONRPCResponse,
  8 | 	ListToolsResult,
  9 | } from '@modelcontextprotocol/sdk/types.js';
 10 | import {InMemoryTransport} from '@modelcontextprotocol/sdk/inMemory.js';
 11 | import {execSync, spawn} from 'node:child_process';
 12 | import {existsSync} from 'node:fs';
 13 | import * as fs from 'node:fs';
 14 | import * as path from 'node:path';
 15 | import {createServer} from './index.js';
 16 | 
 17 | type MCPClient = {
 18 | 	sendRequest: <T>(message: JSONRPCRequest) => Promise<T>;
 19 | 	close: () => Promise<void>;
 20 | };
 21 | 
 22 | /**
 23 |  * Creates an MCP client that communicates with a spawned process via stdin/stdout
 24 |  */
 25 | function createProcessBasedClient(
 26 | 	serverProcess: ReturnType<typeof spawn>,
 27 | 	cleanup?: () => void,
 28 | ): MCPClient {
 29 | 	let requestId = 1;
 30 | 
 31 | 	const pendingRequests = new Map<string, {resolve: (value: any) => void; reject: (error: any) => void}>();
 32 | 
 33 | 	// Handle server responses
 34 | 	serverProcess.stdout?.on('data', (data) => {
 35 | 		const lines = data.toString().split('\n').filter((line: string) => line.trim());
 36 | 
 37 | 		for (const line of lines) {
 38 | 			try {
 39 | 				const response = JSON.parse(line);
 40 | 				if (response.id && pendingRequests.has(response.id)) {
 41 | 					const {resolve, reject} = pendingRequests.get(response.id)!;
 42 | 					pendingRequests.delete(response.id);
 43 | 					if ('result' in response) {
 44 | 						resolve(response.result);
 45 | 					} else if ('error' in response) {
 46 | 						reject(new Error(response.error.message || 'Unknown error'));
 47 | 					}
 48 | 				}
 49 | 			} catch {
 50 | 				// Ignore non-JSON lines
 51 | 			}
 52 | 		}
 53 | 	});
 54 | 
 55 | 	const sendRequest = async <T>(message: JSONRPCRequest): Promise<T> => {
 56 | 		return new Promise((resolve, reject) => {
 57 | 			// eslint-disable-next-line no-plusplus
 58 | 			const id = (requestId++).toString();
 59 | 			const requestWithId = {...message, id};
 60 | 
 61 | 			pendingRequests.set(id, {resolve: resolve as any, reject: reject as any});
 62 | 
 63 | 			try {
 64 | 				serverProcess.stdin?.write(`${JSON.stringify(requestWithId)}\n`);
 65 | 			} catch (e: unknown) {
 66 | 				pendingRequests.delete(id);
 67 | 				reject(e instanceof Error ? e : new Error(String(e)));
 68 | 			}
 69 | 
 70 | 			// Timeout
 71 | 			setTimeout(() => {
 72 | 				if (pendingRequests.has(id)) {
 73 | 					pendingRequests.delete(id);
 74 | 					reject(new Error('Request timeout'));
 75 | 				}
 76 | 			}, 10_000);
 77 | 		});
 78 | 	};
 79 | 
 80 | 	return {
 81 | 		sendRequest,
 82 | 		async close() {
 83 | 			try {
 84 | 				serverProcess.kill();
 85 | 			} catch {
 86 | 				// Process might already be dead
 87 | 			}
 88 | 
 89 | 			// Run any additional cleanup
 90 | 			if (cleanup) {
 91 | 				cleanup();
 92 | 			}
 93 | 		},
 94 | 	};
 95 | }
 96 | 
 97 | /**
 98 |  * Main test suite that runs the same tests across different deployment methods
 99 |  */
100 | describe.each([
101 | 	{
102 | 		name: 'InMemory Transport',
103 | 		condition: true,
104 | 		async createClient(): Promise<MCPClient> {
105 | 			const server = createServer();
106 | 			const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair();
107 | 			await server.connect(serverTransport);
108 | 
109 | 			const sendRequest = async <T>(message: JSONRPCRequest): Promise<T> => {
110 | 				return new Promise((resolve, reject) => {
111 | 					clientTransport.onmessage = (response: JSONRPCMessage) => {
112 | 						const typedResponse = response as JSONRPCResponse;
113 | 						if ('result' in typedResponse) {
114 | 							resolve(typedResponse.result as T);
115 | 							return;
116 | 						}
117 | 
118 | 						reject(new Error('No result in response'));
119 | 					};
120 | 
121 | 					clientTransport.onerror = (err: Error) => {
122 | 						reject(err);
123 | 					};
124 | 
125 | 					clientTransport.send(message).catch((err: unknown) => {
126 | 						reject(err instanceof Error ? err : new Error(String(err)));
127 | 					});
128 | 				});
129 | 			};
130 | 
131 | 			return {
132 | 				sendRequest,
133 | 				close: async () => server.close(),
134 | 			};
135 | 		},
136 | 	},
137 | 	{
138 | 		name: 'DXT Package',
139 | 		condition: process.env.RUN_DXT_TEST,
140 | 		async createClient(): Promise<MCPClient> {
141 | 			// Build DXT package if it doesn't exist
142 | 			if (!existsSync('computer-use-mcp.dxt')) {
143 | 				execSync('./build-dxt.sh', {stdio: 'inherit'});
144 | 			}
145 | 
146 | 			// Extract DXT package to test directory
147 | 			const testDir = 'test-dxt-client';
148 | 			execSync(`rm -rf ${testDir}`);
149 | 			execSync(`mkdir -p ${testDir} && unzip -q computer-use-mcp.dxt -d ${testDir}`);
150 | 
151 | 			// Start the MCP server from the extracted DXT package
152 | 			const serverProcess = spawn('node', [path.join(testDir, 'dist/index.js')], {
153 | 				stdio: ['pipe', 'pipe', 'pipe'],
154 | 				env: {...process.env},
155 | 			});
156 | 
157 | 			return createProcessBasedClient(
158 | 				serverProcess,
159 | 				() => {
160 | 					// Clean up test directory
161 | 					if (fs.existsSync(testDir)) {
162 | 						execSync(`rm -rf ${testDir}`);
163 | 					}
164 | 				},
165 | 			);
166 | 		},
167 | 	},
168 | ])('MCP Server Tests - $name', ({name, condition, createClient}) => {
169 | 	(condition ? describe : describe.skip)(`${name} Integration`, () => {
170 | 		let client: MCPClient;
171 | 
172 | 		beforeEach(async () => {
173 | 			client = await createClient();
174 | 		}, 60_000);
175 | 
176 | 		afterEach(async () => {
177 | 			if (client) {
178 | 				await client.close();
179 | 			}
180 | 		});
181 | 
182 | 		test('should list available tools', async () => {
183 | 			const result = await client.sendRequest<ListToolsResult>({
184 | 				jsonrpc: '2.0',
185 | 				id: '1',
186 | 				method: 'tools/list',
187 | 				params: {},
188 | 			});
189 | 
190 | 			expect(result.tools.map((t) => t.name)).toEqual([
191 | 				'computer',
192 | 			]);
193 | 			expect(result.tools[0]).toMatchObject({
194 | 				name: 'computer',
195 | 				description: expect.any(String),
196 | 				inputSchema: expect.objectContaining({
197 | 					type: 'object',
198 | 				}),
199 | 			});
200 | 		}, 30_000);
201 | 	});
202 | });
203 | 


--------------------------------------------------------------------------------
/src/tools/computer.ts:
--------------------------------------------------------------------------------
  1 | import type {McpServer} from '@modelcontextprotocol/sdk/server/mcp.js';
  2 | import {z} from 'zod';
  3 | import {
  4 | 	mouse,
  5 | 	keyboard,
  6 | 	Point,
  7 | 	screen,
  8 | 	Button,
  9 | 	imageToJimp,
 10 | } from '@nut-tree-fork/nut-js';
 11 | import {setTimeout} from 'node:timers/promises';
 12 | import sharp from 'sharp';
 13 | import {toKeys} from '../xdotoolStringToKeys.js';
 14 | import {jsonResult} from '../utils/response.js';
 15 | 
 16 | // Configure nut-js
 17 | mouse.config.autoDelayMs = 100;
 18 | mouse.config.mouseSpeed = 1000;
 19 | keyboard.config.autoDelayMs = 10;
 20 | 
 21 | // The Claude API automatically downsamples images larger than ~1.15MP or 1568px on the long edge.
 22 | // We already downsampled screenshots to fit these limits and reported the original screen
 23 | // dimensions via display_width_px/display_height_px, but Claude wasn't correctly using those
 24 | // reported dimensions - it was using coordinates from the downsampled image space directly.
 25 | // As a workaround, we now report the actual image dimensions and scale Claude's coordinates
 26 | // back up to logical screen coordinates.
 27 | // See: https://docs.anthropic.com/en/docs/build-with-claude/vision#evaluate-image-size
 28 | const maxLongEdge = 1568;
 29 | const maxPixels = 1.15 * 1024 * 1024; // 1.15 megapixels
 30 | 
 31 | /**
 32 |  * Calculate the scale factor to downsample an image to fit API limits.
 33 |  * Returns a value <= 1 representing how much to shrink the image.
 34 |  */
 35 | function getSizeToApiScale(width: number, height: number): number {
 36 | 	const longEdge = Math.max(width, height);
 37 | 	const totalPixels = width * height;
 38 | 
 39 | 	const longEdgeScale = longEdge > maxLongEdge ? maxLongEdge / longEdge : 1;
 40 | 	const pixelScale = totalPixels > maxPixels ? Math.sqrt(maxPixels / totalPixels) : 1;
 41 | 
 42 | 	return Math.min(longEdgeScale, pixelScale);
 43 | }
 44 | 
 45 | /**
 46 |  * Get the scale factor from API image coordinates to logical screen coordinates.
 47 |  * This is the inverse of the downsampling we apply to fit API limits.
 48 |  */
 49 | async function getApiToLogicalScale(): Promise<number> {
 50 | 	const logicalWidth = await screen.width();
 51 | 	const logicalHeight = await screen.height();
 52 | 	const apiScaleFactor = getSizeToApiScale(logicalWidth, logicalHeight);
 53 | 	return 1 / apiScaleFactor;
 54 | }
 55 | 
 56 | // Define the action enum values
 57 | const ActionEnum = z.enum([
 58 | 	'key',
 59 | 	'type',
 60 | 	'mouse_move',
 61 | 	'left_click',
 62 | 	'left_click_drag',
 63 | 	'right_click',
 64 | 	'middle_click',
 65 | 	'double_click',
 66 | 	'scroll',
 67 | 	'get_screenshot',
 68 | 	'get_cursor_position',
 69 | ]);
 70 | 
 71 | const actionDescription = `The action to perform. The available actions are:
 72 | * key: Press a key or key-combination on the keyboard.
 73 | * type: Type a string of text on the keyboard.
 74 | * get_cursor_position: Get the current (x, y) pixel coordinate of the cursor on the screen.
 75 | * mouse_move: Move the cursor to a specified (x, y) pixel coordinate on the screen.
 76 | * left_click: Click the left mouse button. If coordinate is provided, moves to that position first.
 77 | * left_click_drag: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
 78 | * right_click: Click the right mouse button. If coordinate is provided, moves to that position first.
 79 | * middle_click: Click the middle mouse button. If coordinate is provided, moves to that position first.
 80 | * double_click: Double-click the left mouse button. If coordinate is provided, moves to that position first.
 81 | * scroll: Scroll the screen in a specified direction. Requires coordinate (moves there first) and text parameter with direction: "up", "down", "left", or "right". Optionally append ":N" to scroll N pixels (default 300), e.g. "down:500".
 82 | * get_screenshot: Take a screenshot of the screen.`;
 83 | 
 84 | const toolDescription = `Use a mouse and keyboard to interact with a computer, and take screenshots.
 85 | * This is an interface to a desktop GUI. You do not have access to a terminal or applications menu. You must click on desktop icons to start applications.
 86 | * Always prefer using keyboard shortcuts rather than clicking, where possible.
 87 | * If you see boxes with two letters in them, typing these letters will click that element. Use this instead of other shortcuts or clicking, where possible.
 88 | * Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try taking another screenshot.
 89 | * Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.
 90 | * If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click.
 91 | * Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.
 92 | 
 93 | Using the crosshair:
 94 | * Screenshots show a red crosshair at the current cursor position.
 95 | * After clicking, check where the crosshair appears vs your target. If it missed, adjust coordinates proportionally to the distance - start with large adjustments and refine. Avoid small incremental changes when the crosshair is far from the target (distances are often further than you expect).
 96 | * Consider display dimensions when estimating positions. E.g. if it's 90% to the bottom of the screen, the coordinates should reflect this.`;
 97 | 
 98 | export function registerComputer(server: McpServer): void {
 99 | 	server.registerTool(
100 | 		'computer',
101 | 		{
102 | 			title: 'Computer Control',
103 | 			description: toolDescription,
104 | 			inputSchema: {
105 | 				action: ActionEnum.describe(actionDescription),
106 | 				coordinate: z.tuple([z.number(), z.number()]).optional().describe('(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates'),
107 | 				text: z.string().optional().describe('Text to type or key command to execute'),
108 | 			},
109 | 			// Note: No outputSchema because this tool returns varying content types including images
110 | 			annotations: {
111 | 				readOnlyHint: false,
112 | 			},
113 | 		},
114 | 		async ({action, coordinate, text}) => {
115 | 			// Scale coordinates from API image space to logical screen space
116 | 			let scaledCoordinate = coordinate;
117 | 			if (coordinate) {
118 | 				const scale = await getApiToLogicalScale();
119 | 				scaledCoordinate = [
120 | 					Math.round(coordinate[0] * scale),
121 | 					Math.round(coordinate[1] * scale),
122 | 				];
123 | 
124 | 				// Validate coordinates are within display bounds
125 | 				const [x, y] = scaledCoordinate;
126 | 				const [width, height] = [await screen.width(), await screen.height()];
127 | 				if (x < 0 || x >= width || y < 0 || y >= height) {
128 | 					throw new Error(`Coordinates (${x}, ${y}) are outside display bounds of ${width}x${height}`);
129 | 				}
130 | 			}
131 | 
132 | 			// Implement system actions using nut-js
133 | 			switch (action) {
134 | 				case 'key': {
135 | 					if (!text) {
136 | 						throw new Error('Text required for key');
137 | 					}
138 | 
139 | 					const keys = toKeys(text);
140 | 					await keyboard.pressKey(...keys);
141 | 					await keyboard.releaseKey(...keys);
142 | 
143 | 					return jsonResult({ok: true});
144 | 				}
145 | 
146 | 				case 'type': {
147 | 					if (!text) {
148 | 						throw new Error('Text required for type');
149 | 					}
150 | 
151 | 					await keyboard.type(text);
152 | 					return jsonResult({ok: true});
153 | 				}
154 | 
155 | 				case 'get_cursor_position': {
156 | 					const pos = await mouse.getPosition();
157 | 					const scale = await getApiToLogicalScale();
158 | 					// Return coordinates in API image space (scaled down from logical)
159 | 					// so Claude can correlate with what it sees in screenshots
160 | 					return jsonResult({
161 | 						x: Math.round(pos.x / scale),
162 | 						y: Math.round(pos.y / scale),
163 | 					});
164 | 				}
165 | 
166 | 				case 'mouse_move': {
167 | 					if (!scaledCoordinate) {
168 | 						throw new Error('Coordinate required for mouse_move');
169 | 					}
170 | 
171 | 					await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1]));
172 | 					return jsonResult({ok: true});
173 | 				}
174 | 
175 | 				case 'left_click': {
176 | 					if (scaledCoordinate) {
177 | 						await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1]));
178 | 					}
179 | 
180 | 					await mouse.leftClick();
181 | 					return jsonResult({ok: true});
182 | 				}
183 | 
184 | 				case 'left_click_drag': {
185 | 					if (!scaledCoordinate) {
186 | 						throw new Error('Coordinate required for left_click_drag');
187 | 					}
188 | 
189 | 					await mouse.pressButton(Button.LEFT);
190 | 					await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1]));
191 | 					await mouse.releaseButton(Button.LEFT);
192 | 					return jsonResult({ok: true});
193 | 				}
194 | 
195 | 				case 'right_click': {
196 | 					if (scaledCoordinate) {
197 | 						await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1]));
198 | 					}
199 | 
200 | 					await mouse.rightClick();
201 | 					return jsonResult({ok: true});
202 | 				}
203 | 
204 | 				case 'middle_click': {
205 | 					if (scaledCoordinate) {
206 | 						await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1]));
207 | 					}
208 | 
209 | 					await mouse.click(Button.MIDDLE);
210 | 					return jsonResult({ok: true});
211 | 				}
212 | 
213 | 				case 'double_click': {
214 | 					if (scaledCoordinate) {
215 | 						await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1]));
216 | 					}
217 | 
218 | 					await mouse.doubleClick(Button.LEFT);
219 | 					return jsonResult({ok: true});
220 | 				}
221 | 
222 | 				case 'scroll': {
223 | 					if (!scaledCoordinate) {
224 | 						throw new Error('Coordinate required for scroll');
225 | 					}
226 | 
227 | 					if (!text) {
228 | 						throw new Error('Text required for scroll (direction like "up", "down:5")');
229 | 					}
230 | 
231 | 					// Parse direction and optional amount from text (e.g. "down" or "down:5")
232 | 					const parts = text.split(':');
233 | 					const direction = parts[0];
234 | 					const amountStr = parts[1];
235 | 					const amount = amountStr ? parseInt(amountStr, 10) : 300;
236 | 
237 | 					if (!direction) {
238 | 						throw new Error('Scroll direction required');
239 | 					}
240 | 
241 | 					if (amountStr !== undefined && (isNaN(amount) || amount <= 0)) {
242 | 						throw new Error(`Invalid scroll amount: ${amountStr}`);
243 | 					}
244 | 
245 | 					// Move to position first
246 | 					await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1]));
247 | 
248 | 					// Scroll in the specified direction
249 | 					switch (direction.toLowerCase()) {
250 | 						case 'up':
251 | 							await mouse.scrollUp(amount);
252 | 							break;
253 | 						case 'down':
254 | 							await mouse.scrollDown(amount);
255 | 							break;
256 | 						case 'left':
257 | 							await mouse.scrollLeft(amount);
258 | 							break;
259 | 						case 'right':
260 | 							await mouse.scrollRight(amount);
261 | 							break;
262 | 						default:
263 | 							throw new Error(`Invalid scroll direction: ${direction}. Use "up", "down", "left", or "right"`);
264 | 					}
265 | 
266 | 					return jsonResult({ok: true});
267 | 				}
268 | 
269 | 				case 'get_screenshot': {
270 | 					// Wait a bit to let things load before showing it to Claude
271 | 					await setTimeout(1000);
272 | 
273 | 					// Get cursor position in logical coordinates
274 | 					const cursorPos = await mouse.getPosition();
275 | 
276 | 					// Capture the entire screen (may be at Retina resolution)
277 | 					const image = imageToJimp(await screen.grab());
278 | 
279 | 					// Then resize to fit within API limits
280 | 					const apiScaleFactor = getSizeToApiScale(image.getWidth(), image.getHeight());
281 | 					if (apiScaleFactor < 1) {
282 | 						image.resize(
283 | 							Math.floor(image.getWidth() * apiScaleFactor),
284 | 							Math.floor(image.getHeight() * apiScaleFactor),
285 | 						);
286 | 					}
287 | 
288 | 					// Calculate cursor position in API image coordinates
289 | 					// cursor is in logical coords, need to convert to API image coords
290 | 					const scale = await getApiToLogicalScale();
291 | 					const cursorInImageX = Math.floor(cursorPos.x / scale);
292 | 					const cursorInImageY = Math.floor(cursorPos.y / scale);
293 | 
294 | 					// Draw a crosshair at cursor position (red color)
295 | 					const crosshairSize = 20;
296 | 					const crosshairColor = 0xFF0000FF; // Red with full opacity (RGBA)
297 | 					const imageWidth = image.getWidth();
298 | 					const imageHeight = image.getHeight();
299 | 
300 | 					// Draw horizontal line
301 | 					for (let x = Math.max(0, cursorInImageX - crosshairSize); x <= Math.min(imageWidth - 1, cursorInImageX + crosshairSize); x++) {
302 | 						if (cursorInImageY >= 0 && cursorInImageY < imageHeight) {
303 | 							image.setPixelColor(crosshairColor, x, cursorInImageY);
304 | 							// Make it thicker
305 | 							if (cursorInImageY > 0) {
306 | 								image.setPixelColor(crosshairColor, x, cursorInImageY - 1);
307 | 							}
308 | 
309 | 							if (cursorInImageY < imageHeight - 1) {
310 | 								image.setPixelColor(crosshairColor, x, cursorInImageY + 1);
311 | 							}
312 | 						}
313 | 					}
314 | 
315 | 					// Draw vertical line
316 | 					for (let y = Math.max(0, cursorInImageY - crosshairSize); y <= Math.min(imageHeight - 1, cursorInImageY + crosshairSize); y++) {
317 | 						if (cursorInImageX >= 0 && cursorInImageX < imageWidth) {
318 | 							image.setPixelColor(crosshairColor, cursorInImageX, y);
319 | 							// Make it thicker
320 | 							if (cursorInImageX > 0) {
321 | 								image.setPixelColor(crosshairColor, cursorInImageX - 1, y);
322 | 							}
323 | 
324 | 							if (cursorInImageX < imageWidth - 1) {
325 | 								image.setPixelColor(crosshairColor, cursorInImageX + 1, y);
326 | 							}
327 | 						}
328 | 					}
329 | 
330 | 					// Get PNG buffer from Jimp
331 | 					const pngBuffer = await image.getBufferAsync('image/png');
332 | 
333 | 					// Compress PNG using sharp, to fit size limits
334 | 					const optimizedBuffer = await sharp(pngBuffer)
335 | 						.png({quality: 80, compressionLevel: 9})
336 | 						.toBuffer();
337 | 
338 | 					// Convert optimized buffer to base64
339 | 					const base64Data = optimizedBuffer.toString('base64');
340 | 
341 | 					return {
342 | 						content: [
343 | 							{
344 | 								type: 'text',
345 | 								text: JSON.stringify({
346 | 									// Report the image dimensions - Claude should use coordinates within this space
347 | 									// These may differ from the actual display due to scaling for API limits
348 | 									image_width: imageWidth,
349 | 									image_height: imageHeight,
350 | 								}),
351 | 							},
352 | 							{
353 | 								type: 'image',
354 | 								data: base64Data,
355 | 								mimeType: 'image/png',
356 | 							},
357 | 						],
358 | 					};
359 | 				}
360 | 			}
361 | 		},
362 | 	);
363 | }
364 | 


--------------------------------------------------------------------------------