├── icon.png ├── tsconfig.json ├── eslint.config.mjs ├── tsconfig.build.json ├── src ├── tools │ ├── index.ts │ └── computer.ts ├── utils │ └── response.ts ├── index.ts ├── xdotoolStringToKeys.test.ts ├── main.ts ├── xdotoolStringToKeys.ts └── e2e.test.ts ├── LICENSE ├── manifest.json ├── .github └── workflows │ ├── auto-dependabot.yaml │ └── ci.yaml ├── server.json ├── package.json ├── .gitignore └── README.md /icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/domdomegg/computer-use-mcp/HEAD/icon.png -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["@tsconfig/node-lts/tsconfig.json", "tsconfig-domdomegg/tsconfig.json"], 3 | "include": [ 4 | "src/**/*" 5 | ], 6 | } 7 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import domdomegg from 'eslint-config-domdomegg'; 2 | 3 | /** @type {import('@typescript-eslint/utils').TSESLint.FlatConfig.ConfigFile} */ 4 | export default [ 5 | ...domdomegg, 6 | ]; 7 | -------------------------------------------------------------------------------- /tsconfig.build.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "dist", 5 | "declaration": true, 6 | }, 7 | "exclude": [ 8 | "**/*.test.ts", 9 | ], 10 | } 11 | -------------------------------------------------------------------------------- /src/tools/index.ts: -------------------------------------------------------------------------------- 1 | import type {McpServer} from '@modelcontextprotocol/sdk/server/mcp.js'; 2 | import {registerComputer} from './computer.js'; 3 | 4 | export function registerAll(server: McpServer): void { 5 | registerComputer(server); 6 | } 7 | -------------------------------------------------------------------------------- /src/utils/response.ts: -------------------------------------------------------------------------------- 1 | import type {CallToolResult} from '@modelcontextprotocol/sdk/types.js'; 2 | 3 | export function jsonResult>(data: T): CallToolResult & {structuredContent: T} { 4 | return { 5 | content: [{type: 'text', text: JSON.stringify(data, null, 2)}], 6 | structuredContent: data, 7 | }; 8 | } 9 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | // Library exports for programmatic usage 2 | import {McpServer} from '@modelcontextprotocol/sdk/server/mcp.js'; 3 | import {registerAll} from './tools/index.js'; 4 | 5 | export function createServer(): McpServer { 6 | const server = new McpServer({ 7 | name: 'computer-use-mcp', 8 | version: '1.0.0', 9 | }); 10 | 11 | registerAll(server); 12 | 13 | return server; 14 | } 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Adam Jones (domdomegg) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": "0.3", 3 | "name": "computer-use-mcp", 4 | "display_name": "Computer Use", 5 | "version": "{{VERSION}}", 6 | "description": "💻 MCP server for Claude to control your computer. Implements computer use capabilities similar to Anthropic's official computer use guide.", 7 | "icon": "icon.png", 8 | "author": { 9 | "name": "Adam Jones (domdomegg)", 10 | "url": "https://github.com/domdomegg" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "https://github.com/domdomegg/computer-use-mcp" 15 | }, 16 | "server": { 17 | "type": "node", 18 | "entry_point": "dist/main.js", 19 | "mcp_config": { 20 | "command": "node", 21 | "args": ["${__dirname}/dist/main.js"] 22 | } 23 | }, 24 | "tools": [ 25 | { 26 | "name": "computer", 27 | "description": "Use a computer to complete tasks. This tool can take screenshots, click, type, scroll, and more." 28 | } 29 | ], 30 | "compatibility": { 31 | "claude_desktop": ">=0.10.0", 32 | "platforms": ["darwin", "win32", "linux"], 33 | "runtimes": { 34 | "node": ">=16.0.0" 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /.github/workflows/auto-dependabot.yaml: -------------------------------------------------------------------------------- 1 | # This file is centrally managed 2 | # https://github.com/domdomegg/domdomegg/blob/master/file-sync/auto-dependabot.yaml 3 | 4 | name: Dependabot automation 5 | 6 | on: 7 | pull_request: 8 | types: 9 | - opened 10 | - reopened 11 | - synchronize 12 | - edited 13 | - ready_for_review 14 | - unlabeled 15 | 16 | permissions: 17 | pull-requests: write 18 | contents: write 19 | 20 | jobs: 21 | dependabot_automation: 22 | runs-on: ubuntu-latest 23 | timeout-minutes: 10 24 | if: ${{ github.actor == 'dependabot[bot]' && github.event.pull_request.head.repo.full_name == github.repository}} 25 | steps: 26 | - name: Approve 27 | run: gh pr review --approve "$PR_URL" 28 | env: 29 | PR_URL: ${{github.event.pull_request.html_url}} 30 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 31 | - name: Enable auto-merge 32 | if: ${{ !contains(github.event.pull_request.labels.*.name, 'do not merge') }} 33 | run: gh pr merge --auto --squash "$PR_URL" 34 | env: 35 | PR_URL: ${{github.event.pull_request.html_url}} 36 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 37 | -------------------------------------------------------------------------------- /server.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://static.modelcontextprotocol.io/schemas/2025-10-17/server.schema.json", 3 | "name": "io.github.domdomegg/computer-use-mcp", 4 | "title": "Computer Use", 5 | "description": "Control your computer with screenshots, mouse, and keyboard automation.", 6 | "websiteUrl": "https://github.com/domdomegg/computer-use-mcp#readme", 7 | "icons": [{ 8 | "mimeType": "image/png", 9 | "src": "https://raw.githubusercontent.com/domdomegg/computer-use-mcp/refs/heads/master/icon.png" 10 | }], 11 | "repository": { 12 | "url": "https://github.com/domdomegg/computer-use-mcp.git", 13 | "source": "github" 14 | }, 15 | "version": "{{VERSION}}", 16 | "packages": [ 17 | { 18 | "registryType": "npm", 19 | "identifier": "computer-use-mcp", 20 | "version": "{{VERSION}}", 21 | "runtimeHint": "npx", 22 | "environmentVariables": [], 23 | "transport": { 24 | "type": "stdio" 25 | } 26 | }, 27 | { 28 | "registryType": "mcpb", 29 | "identifier": "https://github.com/domdomegg/computer-use-mcp/releases/download/v{{VERSION}}/computer-use-mcp.mcpb", 30 | "version": "{{VERSION}}", 31 | "fileSha256": "{{MCPB_FILE_SHA256}}", 32 | "transport": { 33 | "type": "stdio" 34 | } 35 | } 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "computer-use-mcp", 3 | "version": "1.5.0", 4 | "description": "💻 MCP server for Claude to control your computer", 5 | "license": "MIT", 6 | "author": "Adam Jones (domdomegg)", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/domdomegg/computer-use-mcp.git" 10 | }, 11 | "type": "module", 12 | "main": "dist/main.js", 13 | "types": "dist/index.d.ts", 14 | "bin": "dist/main.js", 15 | "files": [ 16 | "dist" 17 | ], 18 | "mcpName": "io.github.domdomegg/computer-use-mcp", 19 | "scripts": { 20 | "start": "npm run build && node dist/main.js", 21 | "start:http": "npm run build && MCP_TRANSPORT=http node dist/main.js", 22 | "test": "vitest run", 23 | "test:watch": "vitest --watch", 24 | "test:e2e": "vitest run --config vitest.e2e.config.ts", 25 | "lint": "eslint", 26 | "clean": "rm -rf dist", 27 | "build": "tsc --project tsconfig.build.json", 28 | "build:mcpb": "./build-mcpb.sh", 29 | "prepublishOnly": "npm run clean && npm run build" 30 | }, 31 | "dependencies": { 32 | "@modelcontextprotocol/sdk": "^1.24.3", 33 | "@nut-tree-fork/nut-js": "^4.2.6", 34 | "express": "^5.1.0", 35 | "sharp": "^0.34.5", 36 | "zod": "^4.1.13" 37 | }, 38 | "devDependencies": { 39 | "@tsconfig/node-lts": "^24.0.0", 40 | "@types/express": "^5.0.6", 41 | "@types/node": "^24.10.1", 42 | "eslint": "^9.39.1", 43 | "eslint-config-domdomegg": "^2.0.9", 44 | "tsconfig-domdomegg": "^1.0.0", 45 | "typescript": "^5.9.3", 46 | "vitest": "^4.0.15" 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/xdotoolStringToKeys.test.ts: -------------------------------------------------------------------------------- 1 | import {describe, it, expect} from 'vitest'; 2 | import {Key} from '@nut-tree-fork/nut-js'; 3 | import {toKeys, InvalidKeyError} from './xdotoolStringToKeys.js'; 4 | 5 | describe('toKeys', () => { 6 | it('should convert single keys', () => { 7 | expect(toKeys('a')).toEqual([Key.A]); 8 | expect(toKeys('Return')).toEqual([Key.Return]); 9 | expect(toKeys('space')).toEqual([Key.Space]); 10 | }); 11 | 12 | it('should convert key combinations', () => { 13 | expect(toKeys('Control_L+a')).toEqual([Key.LeftControl, Key.A]); 14 | expect(toKeys('Shift_L+Return')).toEqual([Key.LeftShift, Key.Return]); 15 | expect(toKeys('Alt_L+Tab')).toEqual([Key.LeftAlt, Key.Tab]); 16 | expect(toKeys('Control_L+Alt_L+Delete')).toEqual([Key.LeftControl, Key.LeftAlt, Key.Delete]); 17 | }); 18 | 19 | it('should handle function keys', () => { 20 | expect(toKeys('F1')).toEqual([Key.F1]); 21 | expect(toKeys('F12')).toEqual([Key.F12]); 22 | expect(toKeys('Control_L+F5')).toEqual([Key.LeftControl, Key.F5]); 23 | }); 24 | 25 | it('should handle navigation keys', () => { 26 | expect(toKeys('Home')).toEqual([Key.Home]); 27 | expect(toKeys('Left')).toEqual([Key.Left]); 28 | expect(toKeys('Page_Up')).toEqual([Key.PageUp]); 29 | expect(toKeys('Prior')).toEqual([Key.PageUp]); // Prior is an alias for Page_Up 30 | }); 31 | 32 | it('should handle keypad keys', () => { 33 | expect(toKeys('KP_0')).toEqual([Key.NumPad0]); 34 | expect(toKeys('KP_Add')).toEqual([Key.Add]); 35 | expect(toKeys('Num_Lock')).toEqual([Key.NumLock]); 36 | }); 37 | 38 | it('should handle case insensitivity', () => { 39 | expect(toKeys('RETURN')).toEqual([Key.Return]); 40 | expect(toKeys('Return')).toEqual([Key.Return]); 41 | expect(toKeys('return')).toEqual([Key.Return]); 42 | expect(toKeys('CONTROL_L+A')).toEqual([Key.LeftControl, Key.A]); 43 | }); 44 | 45 | it('should handle whitespace', () => { 46 | expect(toKeys('Control_L + a')).toEqual([Key.LeftControl, Key.A]); 47 | expect(toKeys(' Return ')).toEqual([Key.Return]); 48 | expect(toKeys('Control_L + Alt_L + Delete')).toEqual([Key.LeftControl, Key.LeftAlt, Key.Delete]); 49 | }); 50 | 51 | it('should throw InvalidKeyError for invalid keys', () => { 52 | expect(() => toKeys('')).toThrow(InvalidKeyError); 53 | expect(() => toKeys('invalid')).toThrow(InvalidKeyError); 54 | expect(() => toKeys('Control_L+invalid')).toThrow(InvalidKeyError); 55 | expect(() => toKeys('kp_enter')).toThrow(InvalidKeyError); 56 | }); 57 | }); 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Microbundle cache 58 | .rpt2_cache/ 59 | .rts2_cache_cjs/ 60 | .rts2_cache_es/ 61 | .rts2_cache_umd/ 62 | 63 | # Optional REPL history 64 | .node_repl_history 65 | 66 | # Yarn Integrity file 67 | .yarn-integrity 68 | 69 | # dotenv environment variables file 70 | .env 71 | .env.test 72 | .env.production 73 | 74 | # parcel-bundler cache (https://parceljs.org/) 75 | .cache 76 | .parcel-cache 77 | 78 | # Next.js build output 79 | .next 80 | out 81 | 82 | # Nuxt.js build / generate output 83 | .nuxt 84 | dist 85 | 86 | # Gatsby files 87 | .cache/ 88 | # Comment in the public line in if your project uses Gatsby and not Next.js 89 | # https://nextjs.org/blog/next-9-1#public-directory-support 90 | # public 91 | 92 | # vuepress build output 93 | .vuepress/dist 94 | 95 | # Serverless directories 96 | .serverless/ 97 | 98 | # FuseBox cache 99 | .fusebox/ 100 | 101 | # DynamoDB Local files 102 | .dynamodb/ 103 | 104 | # TernJS port file 105 | .tern-port 106 | 107 | # Stores VSCode versions used for testing VSCode extensions 108 | .vscode-test 109 | 110 | # yarn v2 111 | .yarn/cache 112 | .yarn/unplugged 113 | .yarn/build-state.yml 114 | .yarn/install-state.gz 115 | .pnp.* 116 | 117 | # built artifacts 118 | build/ 119 | dist/ 120 | computer-use-mcp.dxt 121 | -------------------------------------------------------------------------------- /src/main.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import {execSync} from 'node:child_process'; 3 | import {dirname} from 'node:path'; 4 | import {fileURLToPath} from 'node:url'; 5 | 6 | // Clear macOS quarantine attributes from native binaries before importing them 7 | // This is needed for MCPB packages downloaded from the internet 8 | if (process.platform === 'darwin') { 9 | try { 10 | const projectRoot = dirname(dirname(fileURLToPath(import.meta.url))); 11 | execSync(`xattr -cr "${projectRoot}/node_modules"`, {stdio: 'ignore'}); 12 | } catch { 13 | // Ignore errors - xattr may not exist or may fail on some files 14 | } 15 | } 16 | 17 | import {StdioServerTransport} from '@modelcontextprotocol/sdk/server/stdio.js'; 18 | import {StreamableHTTPServerTransport} from '@modelcontextprotocol/sdk/server/streamableHttp.js'; 19 | import express from 'express'; 20 | import {createServer} from './index.js'; 21 | 22 | function setupSignalHandlers(cleanup: () => Promise): void { 23 | process.on('SIGINT', async () => { 24 | await cleanup(); 25 | process.exit(0); 26 | }); 27 | process.on('SIGTERM', async () => { 28 | await cleanup(); 29 | process.exit(0); 30 | }); 31 | } 32 | 33 | (async () => { 34 | const transport = process.env.MCP_TRANSPORT || 'stdio'; 35 | 36 | if (transport === 'stdio') { 37 | const server = createServer(); 38 | setupSignalHandlers(async () => server.close()); 39 | 40 | const stdioTransport = new StdioServerTransport(); 41 | await server.connect(stdioTransport); 42 | console.error('Computer Use MCP server running on stdio'); 43 | } else if (transport === 'http') { 44 | const app = express(); 45 | app.use(express.json()); 46 | 47 | const httpTransport = new StreamableHTTPServerTransport({ 48 | sessionIdGenerator: undefined, 49 | enableJsonResponse: true, 50 | }); 51 | 52 | app.post('/mcp', async (req, res) => { 53 | await httpTransport.handleRequest(req, res, req.body); 54 | }); 55 | 56 | const server = createServer(); 57 | await server.connect(httpTransport); 58 | 59 | const port = parseInt(process.env.PORT || '3000', 10); 60 | const httpServer = app.listen(port, () => { 61 | console.error(`Computer Use MCP server running on http://localhost:${port}/mcp`); 62 | console.error('WARNING: HTTP transport has no authentication. Only use behind a reverse proxy or in a secured setup.'); 63 | }); 64 | 65 | setupSignalHandlers(async () => { 66 | await server.close(); 67 | httpServer.close(); 68 | }); 69 | } else { 70 | console.error(`Unknown transport: ${transport}. Use MCP_TRANSPORT=stdio or MCP_TRANSPORT=http`); 71 | process.exit(1); 72 | } 73 | })(); 74 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI/CD 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | tags: 7 | - 'v*' 8 | pull_request: 9 | 10 | jobs: 11 | ci: 12 | runs-on: ubuntu-latest 13 | timeout-minutes: 10 14 | strategy: 15 | matrix: 16 | node-version: [lts/*, current] 17 | env: 18 | CI: true 19 | steps: 20 | - name: Checkout ${{ github.sha }} 21 | uses: actions/checkout@v4 22 | - name: Use Node.js ${{ matrix.node-version }} 23 | uses: actions/setup-node@v4 24 | with: 25 | node-version: ${{ matrix.node-version }} 26 | registry-url: https://registry.npmjs.org/ 27 | - name: Install dependencies 28 | run: npm ci 29 | - name: Lint 30 | run: npm run lint --if-present 31 | - name: Build 32 | run: npm run build --if-present 33 | - name: Test 34 | run: npm run test --if-present 35 | - name: Prepare MCPB artifact 36 | if: matrix.node-version == 'lts/*' 37 | run: | 38 | npm run build:mcpb 39 | mkdir -p .github/tmp 40 | unzip computer-use-mcp.mcpb -d .github/tmp 41 | - name: Upload MCPB artifact 42 | if: matrix.node-version == 'lts/*' 43 | uses: actions/upload-artifact@v4 44 | with: 45 | name: computer-use-mcp-mcpb 46 | path: .github/tmp/* 47 | 48 | deploy: 49 | if: startsWith(github.ref, 'refs/tags/v') && github.event_name == 'push' 50 | needs: ci 51 | runs-on: ubuntu-latest 52 | timeout-minutes: 15 53 | permissions: 54 | contents: write 55 | id-token: write 56 | env: 57 | CI: true 58 | steps: 59 | - name: Checkout ${{ github.sha }} 60 | uses: actions/checkout@v4 61 | - name: Use Node.js with the npmjs.org registry 62 | uses: actions/setup-node@v4 63 | with: 64 | node-version: lts/* 65 | registry-url: https://registry.npmjs.org/ 66 | - name: Install dependencies 67 | run: npm ci 68 | - name: Build 69 | run: npm run build --if-present 70 | - name: Build MCPB 71 | run: npm run build:mcpb 72 | - name: Update server.json version and SHA256 73 | run: | 74 | VERSION=$(node -p "require('./package.json').version") 75 | MCPB_FILE_SHA256=$(sha256sum computer-use-mcp.mcpb | cut -d' ' -f1) 76 | sed "s/{{VERSION}}/$VERSION/g; s/{{MCPB_FILE_SHA256}}/$MCPB_FILE_SHA256/g" server.json > server.json.tmp 77 | mv server.json.tmp server.json 78 | - uses: google-github-actions/auth@v2 79 | with: 80 | workload_identity_provider: 'projects/457105351064/locations/global/workloadIdentityPools/github-secrets-pool/providers/github-secrets-github' 81 | - uses: google-github-actions/setup-gcloud@v2 82 | - name: Get NPM token 83 | id: npm-token 84 | run: | 85 | token=$(gcloud secrets versions access latest --secret=npm-token --project=gcp-github-secrets) 86 | echo "::add-mask::$token" 87 | echo "token=$token" >> "$GITHUB_OUTPUT" 88 | - name: Publish to NPM 89 | run: npm publish 90 | env: 91 | NODE_AUTH_TOKEN: ${{ steps.npm-token.outputs.token }} 92 | - name: Create GitHub Release 93 | uses: softprops/action-gh-release@v2 94 | with: 95 | files: computer-use-mcp.mcpb 96 | generate_release_notes: true 97 | env: 98 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 99 | - name: Install MCP Publisher 100 | run: | 101 | curl -L "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/').tar.gz" | tar xz 102 | - name: Login to MCP Registry 103 | run: ./mcp-publisher login github-oidc 104 | - name: Publish to MCP Registry 105 | run: ./mcp-publisher publish 106 | -------------------------------------------------------------------------------- /src/xdotoolStringToKeys.ts: -------------------------------------------------------------------------------- 1 | import {Key} from '@nut-tree-fork/nut-js'; 2 | 3 | const keyMap: Record = { 4 | // Function keys 5 | f1: Key.F1, 6 | f2: Key.F2, 7 | f3: Key.F3, 8 | f4: Key.F4, 9 | f5: Key.F5, 10 | f6: Key.F6, 11 | f7: Key.F7, 12 | f8: Key.F8, 13 | f9: Key.F9, 14 | f10: Key.F10, 15 | f11: Key.F11, 16 | f12: Key.F12, 17 | f13: Key.F13, 18 | f14: Key.F14, 19 | f15: Key.F15, 20 | f16: Key.F16, 21 | f17: Key.F17, 22 | f18: Key.F18, 23 | f19: Key.F19, 24 | f20: Key.F20, 25 | f21: Key.F21, 26 | f22: Key.F22, 27 | f23: Key.F23, 28 | f24: Key.F24, 29 | 30 | // Navigation 31 | home: Key.Home, 32 | left: Key.Left, 33 | up: Key.Up, 34 | right: Key.Right, 35 | down: Key.Down, 36 | page_up: Key.PageUp, 37 | pageup: Key.PageUp, 38 | prior: Key.PageUp, 39 | page_down: Key.PageDown, 40 | pagedown: Key.PageDown, 41 | next: Key.PageDown, 42 | end: Key.End, 43 | 44 | // Editing 45 | return: Key.Return, 46 | enter: Key.Return, 47 | tab: Key.Tab, 48 | space: Key.Space, 49 | backspace: Key.Backspace, 50 | delete: Key.Delete, 51 | del: Key.Delete, 52 | escape: Key.Escape, 53 | esc: Key.Escape, 54 | insert: Key.Insert, 55 | ins: Key.Insert, 56 | 57 | // Modifiers 58 | shift_l: Key.LeftShift, 59 | shift_r: Key.RightShift, 60 | l_shift: Key.LeftShift, 61 | r_shift: Key.RightShift, 62 | shift: Key.LeftShift, 63 | 64 | control_l: Key.LeftControl, 65 | control_r: Key.RightControl, 66 | l_control: Key.LeftControl, 67 | r_control: Key.RightControl, 68 | control: Key.LeftControl, 69 | ctrl_l: Key.LeftControl, 70 | ctrl_r: Key.RightControl, 71 | l_ctrl: Key.LeftControl, 72 | r_ctrl: Key.RightControl, 73 | ctrl: Key.LeftControl, 74 | 75 | alt_l: Key.LeftAlt, 76 | alt_r: Key.RightAlt, 77 | l_alt: Key.LeftAlt, 78 | r_alt: Key.RightAlt, 79 | alt: Key.LeftAlt, 80 | 81 | super_l: Key.LeftSuper, 82 | super_r: Key.RightSuper, 83 | l_super: Key.LeftSuper, 84 | r_super: Key.RightSuper, 85 | super: Key.LeftSuper, 86 | win_l: Key.LeftSuper, 87 | win_r: Key.RightSuper, 88 | l_win: Key.LeftSuper, 89 | r_win: Key.RightSuper, 90 | win: Key.LeftSuper, 91 | meta_l: Key.LeftSuper, 92 | meta_r: Key.RightSuper, 93 | l_meta: Key.LeftSuper, 94 | r_meta: Key.RightSuper, 95 | meta: Key.LeftSuper, 96 | command: Key.LeftSuper, 97 | command_l: Key.LeftSuper, 98 | l_command: Key.LeftSuper, 99 | command_r: Key.RightSuper, 100 | r_command: Key.RightSuper, 101 | cmd: Key.LeftSuper, 102 | cmd_l: Key.LeftSuper, 103 | l_cmd: Key.LeftSuper, 104 | cmd_r: Key.RightSuper, 105 | r_cmd: Key.RightSuper, 106 | 107 | caps_lock: Key.CapsLock, 108 | capslock: Key.CapsLock, 109 | caps: Key.CapsLock, 110 | 111 | // Keypad 112 | kp_0: Key.NumPad0, 113 | kp_1: Key.NumPad1, 114 | kp_2: Key.NumPad2, 115 | kp_3: Key.NumPad3, 116 | kp_4: Key.NumPad4, 117 | kp_5: Key.NumPad5, 118 | kp_6: Key.NumPad6, 119 | kp_7: Key.NumPad7, 120 | kp_8: Key.NumPad8, 121 | kp_9: Key.NumPad9, 122 | kp_divide: Key.Divide, 123 | kp_multiply: Key.Multiply, 124 | kp_subtract: Key.Subtract, 125 | kp_add: Key.Add, 126 | kp_decimal: Key.Decimal, 127 | kp_equal: Key.NumPadEqual, 128 | num_lock: Key.NumLock, 129 | numlock: Key.NumLock, 130 | 131 | // Letters 132 | a: Key.A, 133 | b: Key.B, 134 | c: Key.C, 135 | d: Key.D, 136 | e: Key.E, 137 | f: Key.F, 138 | g: Key.G, 139 | h: Key.H, 140 | i: Key.I, 141 | j: Key.J, 142 | k: Key.K, 143 | l: Key.L, 144 | m: Key.M, 145 | n: Key.N, 146 | o: Key.O, 147 | p: Key.P, 148 | q: Key.Q, 149 | r: Key.R, 150 | s: Key.S, 151 | t: Key.T, 152 | u: Key.U, 153 | v: Key.V, 154 | w: Key.W, 155 | x: Key.X, 156 | y: Key.Y, 157 | z: Key.Z, 158 | 159 | // Numbers 160 | 0: Key.Num0, 161 | 1: Key.Num1, 162 | 2: Key.Num2, 163 | 3: Key.Num3, 164 | 4: Key.Num4, 165 | 5: Key.Num5, 166 | 6: Key.Num6, 167 | 7: Key.Num7, 168 | 8: Key.Num8, 169 | 9: Key.Num9, 170 | 171 | // Punctuation 172 | minus: Key.Minus, 173 | equal: Key.Equal, 174 | bracketleft: Key.LeftBracket, 175 | bracketright: Key.RightBracket, 176 | bracket_l: Key.LeftBracket, 177 | bracket_r: Key.RightBracket, 178 | l_bracket: Key.LeftBracket, 179 | r_bracket: Key.RightBracket, 180 | backslash: Key.Backslash, 181 | semicolon: Key.Semicolon, 182 | semi: Key.Semicolon, 183 | quote: Key.Quote, 184 | grave: Key.Grave, 185 | comma: Key.Comma, 186 | period: Key.Period, 187 | slash: Key.Slash, 188 | 189 | // Media keys 190 | audio_mute: Key.AudioMute, 191 | mute: Key.AudioMute, 192 | audio_vol_down: Key.AudioVolDown, 193 | voldown: Key.AudioVolDown, 194 | vol_down: Key.AudioVolDown, 195 | audio_vol_up: Key.AudioVolUp, 196 | volup: Key.AudioVolUp, 197 | vol_up: Key.AudioVolUp, 198 | audio_play: Key.AudioPlay, 199 | play: Key.AudioPlay, 200 | audio_stop: Key.AudioStop, 201 | stop: Key.AudioStop, 202 | audio_pause: Key.AudioPause, 203 | pause: Key.AudioPause, 204 | audio_prev: Key.AudioPrev, 205 | audio_next: Key.AudioNext, 206 | }; 207 | 208 | export class InvalidKeyError extends Error { 209 | constructor(key: string) { 210 | super(`Invalid key: ${key}`); 211 | this.name = 'InvalidKeyError'; 212 | } 213 | } 214 | 215 | export const toKeys = (xdotoolString: string): Key[] => { 216 | if (!xdotoolString) { 217 | throw new InvalidKeyError('Empty string'); 218 | } 219 | 220 | return xdotoolString.split('+').map((keyStr) => { 221 | const key = keyStr.trim().toLowerCase(); 222 | const mappedKey = keyMap[key]; 223 | 224 | if (mappedKey === undefined) { 225 | throw new InvalidKeyError(key); 226 | } 227 | 228 | return mappedKey; 229 | }); 230 | }; 231 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # computer-use-mcp 2 | 3 | 💻 An model context protocol server for Claude to control your computer. This is very similar to [computer use](https://docs.anthropic.com/en/docs/build-with-claude/computer-use), but easy to set up and use locally. 4 | 5 | Here's Claude Haiku 4.5 changing my desktop background (4x speed): 6 | 7 | https://github.com/user-attachments/assets/cd0bc190-52c4-49db-b3bc-4b8a74544789 8 | 9 | > [!WARNING] 10 | > At time of writing, models make frequent mistakes and are vulnerable to prompt injections. As this MCP server gives the model complete control of your computer, this could do a lot of damage. You should therefore treat this like giving a hyperactive toddler access to your computer - you probably want to supervise it closely, and consider only doing this in a sandboxed user account. 11 | 12 | ## Installation 13 | 14 |
15 | Claude Code 16 | 17 | Run: 18 | 19 | ```bash 20 | claude mcp add --scope user --transport stdio computer-use -- npx -y computer-use-mcp 21 | ``` 22 | 23 | This installs the server at user scope (available in all projects). To install locally (current directory only), omit `--scope user`. 24 | 25 |
26 | 27 |
28 | Claude Desktop 29 | 30 | #### (Recommended) Via manual .dxt installation 31 | 32 | 1. Find the latest dxt build in [the GitHub Actions history](https://github.com/domdomegg/computer-use-mcp/actions/workflows/dxt.yaml?query=branch%3Amaster) (the top one) 33 | 2. In the 'Artifacts' section, download the `computer-use-mcp-dxt` file 34 | 3. Rename the `.zip` file to `.dxt` 35 | 4. Double-click the `.dxt` file to open with Claude Desktop 36 | 5. Click "Install" 37 | 38 | #### (Advanced) Alternative: Via JSON configuration 39 | 40 | 1. Install [Node.js](https://nodejs.org/en/download) 41 | 2. Open Claude Desktop and go to Settings → Developer 42 | 3. Click "Edit Config" to open your `claude_desktop_config.json` file 43 | 4. Add the following configuration to the "mcpServers" section: 44 | 45 | ```json 46 | { 47 | "mcpServers": { 48 | "computer-use": { 49 | "command": "npx", 50 | "args": [ 51 | "-y", 52 | "computer-use-mcp" 53 | ] 54 | } 55 | } 56 | } 57 | ``` 58 | 59 | 5. Save the file and restart Claude Desktop 60 | 61 |
62 | 63 |
64 | Cursor 65 | 66 | #### (Recommended) Via one-click install 67 | 68 | 1. Click [![Install MCP Server](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/install-mcp?name=computer-use&config=JTdCJTIyY29tbWFuZCUyMiUzQSUyMm5weCUyMC15JTIwY29tcHV0ZXItdXNlLW1jcCUyMiU3RA%3D%3D) 69 | 70 | #### (Advanced) Alternative: Via JSON configuration 71 | 72 | Create either a global (`~/.cursor/mcp.json`) or project-specific (`.cursor/mcp.json`) configuration file: 73 | 74 | ```json 75 | { 76 | "mcpServers": { 77 | "computer-use": { 78 | "command": "npx", 79 | "args": ["-y", "computer-use-mcp"] 80 | } 81 | } 82 | } 83 | ``` 84 | 85 |
86 | 87 |
88 | Cline 89 | 90 | #### (Recommended) Via marketplace 91 | 92 | 1. Click the "MCP Servers" icon in the Cline extension 93 | 2. Search for "Computer Use" and click "Install" 94 | 3. Follow the prompts to install the server 95 | 96 | #### (Advanced) Alternative: Via JSON configuration 97 | 98 | 1. Click the "MCP Servers" icon in the Cline extension 99 | 2. Click on the "Installed" tab, then the "Configure MCP Servers" button at the bottom 100 | 3. Add the following configuration to the "mcpServers" section: 101 | 102 | ```json 103 | { 104 | "mcpServers": { 105 | "computer-use": { 106 | "type": "stdio", 107 | "command": "npx", 108 | "args": ["-y", "computer-use-mcp"] 109 | } 110 | } 111 | } 112 | ``` 113 | 114 |
115 | 116 | ## Tips 117 | 118 | This should just work out of the box. 119 | 120 | However, to get best results: 121 | - Use a model good at computer use - I recommend [the latest Claude models](https://platform.claude.com/docs/en/about-claude/models/overview). 122 | - Use a small, common resolution - 720p works particularly well. On macOS, you can use [displayoverride-mac](https://github.com/domdomegg/displayoverride-mac) to do this. If you can't use a different resolution, try zooming in to active windows. 123 | - Install and enable the [Rango browser extension](https://chromewebstore.google.com/detail/rango/lnemjdnjjofijemhdogofbpcedhgcpmb). This enables keyboard navigation for websites, which is far more reliable than Claude trying to click coordinates. You can bump up the font size setting in Rango to make the hints more visible. 124 | 125 | ## How it works 126 | 127 | We implement a near identical computer use tool to [Anthropic's official computer use guide](https://docs.anthropic.com/en/docs/build-with-claude/computer-use), with some more nudging to prefer keyboard shortcuts. 128 | 129 | This talks to your computer using [nut.js](https://github.com/nut-tree/nut.js) 130 | 131 | ## Contributing 132 | 133 | Pull requests are welcomed on GitHub! To get started: 134 | 135 | 1. Install Git and Node.js 136 | 2. Clone the repository 137 | 3. Install dependencies with `npm install` 138 | 4. Run `npm run test` to run tests 139 | 5. Build with `npm run build` 140 | 141 | ## Releases 142 | 143 | Versions follow the [semantic versioning spec](https://semver.org/). 144 | 145 | To release: 146 | 147 | 1. Use `npm version ` to bump the version 148 | 2. Run `git push --follow-tags` to push with tags 149 | 3. Wait for GitHub Actions to publish to the NPM registry. 150 | -------------------------------------------------------------------------------- /src/e2e.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | describe, test, expect, beforeEach, afterEach, 3 | } from 'vitest'; 4 | import type { 5 | JSONRPCMessage, 6 | JSONRPCRequest, 7 | JSONRPCResponse, 8 | ListToolsResult, 9 | } from '@modelcontextprotocol/sdk/types.js'; 10 | import {InMemoryTransport} from '@modelcontextprotocol/sdk/inMemory.js'; 11 | import {execSync, spawn} from 'node:child_process'; 12 | import {existsSync} from 'node:fs'; 13 | import * as fs from 'node:fs'; 14 | import * as path from 'node:path'; 15 | import {createServer} from './index.js'; 16 | 17 | type MCPClient = { 18 | sendRequest: (message: JSONRPCRequest) => Promise; 19 | close: () => Promise; 20 | }; 21 | 22 | /** 23 | * Creates an MCP client that communicates with a spawned process via stdin/stdout 24 | */ 25 | function createProcessBasedClient( 26 | serverProcess: ReturnType, 27 | cleanup?: () => void, 28 | ): MCPClient { 29 | let requestId = 1; 30 | 31 | const pendingRequests = new Map void; reject: (error: any) => void}>(); 32 | 33 | // Handle server responses 34 | serverProcess.stdout?.on('data', (data) => { 35 | const lines = data.toString().split('\n').filter((line: string) => line.trim()); 36 | 37 | for (const line of lines) { 38 | try { 39 | const response = JSON.parse(line); 40 | if (response.id && pendingRequests.has(response.id)) { 41 | const {resolve, reject} = pendingRequests.get(response.id)!; 42 | pendingRequests.delete(response.id); 43 | if ('result' in response) { 44 | resolve(response.result); 45 | } else if ('error' in response) { 46 | reject(new Error(response.error.message || 'Unknown error')); 47 | } 48 | } 49 | } catch { 50 | // Ignore non-JSON lines 51 | } 52 | } 53 | }); 54 | 55 | const sendRequest = async (message: JSONRPCRequest): Promise => { 56 | return new Promise((resolve, reject) => { 57 | // eslint-disable-next-line no-plusplus 58 | const id = (requestId++).toString(); 59 | const requestWithId = {...message, id}; 60 | 61 | pendingRequests.set(id, {resolve: resolve as any, reject: reject as any}); 62 | 63 | try { 64 | serverProcess.stdin?.write(`${JSON.stringify(requestWithId)}\n`); 65 | } catch (e: unknown) { 66 | pendingRequests.delete(id); 67 | reject(e instanceof Error ? e : new Error(String(e))); 68 | } 69 | 70 | // Timeout 71 | setTimeout(() => { 72 | if (pendingRequests.has(id)) { 73 | pendingRequests.delete(id); 74 | reject(new Error('Request timeout')); 75 | } 76 | }, 10_000); 77 | }); 78 | }; 79 | 80 | return { 81 | sendRequest, 82 | async close() { 83 | try { 84 | serverProcess.kill(); 85 | } catch { 86 | // Process might already be dead 87 | } 88 | 89 | // Run any additional cleanup 90 | if (cleanup) { 91 | cleanup(); 92 | } 93 | }, 94 | }; 95 | } 96 | 97 | /** 98 | * Main test suite that runs the same tests across different deployment methods 99 | */ 100 | describe.each([ 101 | { 102 | name: 'InMemory Transport', 103 | condition: true, 104 | async createClient(): Promise { 105 | const server = createServer(); 106 | const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair(); 107 | await server.connect(serverTransport); 108 | 109 | const sendRequest = async (message: JSONRPCRequest): Promise => { 110 | return new Promise((resolve, reject) => { 111 | clientTransport.onmessage = (response: JSONRPCMessage) => { 112 | const typedResponse = response as JSONRPCResponse; 113 | if ('result' in typedResponse) { 114 | resolve(typedResponse.result as T); 115 | return; 116 | } 117 | 118 | reject(new Error('No result in response')); 119 | }; 120 | 121 | clientTransport.onerror = (err: Error) => { 122 | reject(err); 123 | }; 124 | 125 | clientTransport.send(message).catch((err: unknown) => { 126 | reject(err instanceof Error ? err : new Error(String(err))); 127 | }); 128 | }); 129 | }; 130 | 131 | return { 132 | sendRequest, 133 | close: async () => server.close(), 134 | }; 135 | }, 136 | }, 137 | { 138 | name: 'DXT Package', 139 | condition: process.env.RUN_DXT_TEST, 140 | async createClient(): Promise { 141 | // Build DXT package if it doesn't exist 142 | if (!existsSync('computer-use-mcp.dxt')) { 143 | execSync('./build-dxt.sh', {stdio: 'inherit'}); 144 | } 145 | 146 | // Extract DXT package to test directory 147 | const testDir = 'test-dxt-client'; 148 | execSync(`rm -rf ${testDir}`); 149 | execSync(`mkdir -p ${testDir} && unzip -q computer-use-mcp.dxt -d ${testDir}`); 150 | 151 | // Start the MCP server from the extracted DXT package 152 | const serverProcess = spawn('node', [path.join(testDir, 'dist/index.js')], { 153 | stdio: ['pipe', 'pipe', 'pipe'], 154 | env: {...process.env}, 155 | }); 156 | 157 | return createProcessBasedClient( 158 | serverProcess, 159 | () => { 160 | // Clean up test directory 161 | if (fs.existsSync(testDir)) { 162 | execSync(`rm -rf ${testDir}`); 163 | } 164 | }, 165 | ); 166 | }, 167 | }, 168 | ])('MCP Server Tests - $name', ({name, condition, createClient}) => { 169 | (condition ? describe : describe.skip)(`${name} Integration`, () => { 170 | let client: MCPClient; 171 | 172 | beforeEach(async () => { 173 | client = await createClient(); 174 | }, 60_000); 175 | 176 | afterEach(async () => { 177 | if (client) { 178 | await client.close(); 179 | } 180 | }); 181 | 182 | test('should list available tools', async () => { 183 | const result = await client.sendRequest({ 184 | jsonrpc: '2.0', 185 | id: '1', 186 | method: 'tools/list', 187 | params: {}, 188 | }); 189 | 190 | expect(result.tools.map((t) => t.name)).toEqual([ 191 | 'computer', 192 | ]); 193 | expect(result.tools[0]).toMatchObject({ 194 | name: 'computer', 195 | description: expect.any(String), 196 | inputSchema: expect.objectContaining({ 197 | type: 'object', 198 | }), 199 | }); 200 | }, 30_000); 201 | }); 202 | }); 203 | -------------------------------------------------------------------------------- /src/tools/computer.ts: -------------------------------------------------------------------------------- 1 | import type {McpServer} from '@modelcontextprotocol/sdk/server/mcp.js'; 2 | import {z} from 'zod'; 3 | import { 4 | mouse, 5 | keyboard, 6 | Point, 7 | screen, 8 | Button, 9 | imageToJimp, 10 | } from '@nut-tree-fork/nut-js'; 11 | import {setTimeout} from 'node:timers/promises'; 12 | import sharp from 'sharp'; 13 | import {toKeys} from '../xdotoolStringToKeys.js'; 14 | import {jsonResult} from '../utils/response.js'; 15 | 16 | // Configure nut-js 17 | mouse.config.autoDelayMs = 100; 18 | mouse.config.mouseSpeed = 1000; 19 | keyboard.config.autoDelayMs = 10; 20 | 21 | // The Claude API automatically downsamples images larger than ~1.15MP or 1568px on the long edge. 22 | // We already downsampled screenshots to fit these limits and reported the original screen 23 | // dimensions via display_width_px/display_height_px, but Claude wasn't correctly using those 24 | // reported dimensions - it was using coordinates from the downsampled image space directly. 25 | // As a workaround, we now report the actual image dimensions and scale Claude's coordinates 26 | // back up to logical screen coordinates. 27 | // See: https://docs.anthropic.com/en/docs/build-with-claude/vision#evaluate-image-size 28 | const maxLongEdge = 1568; 29 | const maxPixels = 1.15 * 1024 * 1024; // 1.15 megapixels 30 | 31 | /** 32 | * Calculate the scale factor to downsample an image to fit API limits. 33 | * Returns a value <= 1 representing how much to shrink the image. 34 | */ 35 | function getSizeToApiScale(width: number, height: number): number { 36 | const longEdge = Math.max(width, height); 37 | const totalPixels = width * height; 38 | 39 | const longEdgeScale = longEdge > maxLongEdge ? maxLongEdge / longEdge : 1; 40 | const pixelScale = totalPixels > maxPixels ? Math.sqrt(maxPixels / totalPixels) : 1; 41 | 42 | return Math.min(longEdgeScale, pixelScale); 43 | } 44 | 45 | /** 46 | * Get the scale factor from API image coordinates to logical screen coordinates. 47 | * This is the inverse of the downsampling we apply to fit API limits. 48 | */ 49 | async function getApiToLogicalScale(): Promise { 50 | const logicalWidth = await screen.width(); 51 | const logicalHeight = await screen.height(); 52 | const apiScaleFactor = getSizeToApiScale(logicalWidth, logicalHeight); 53 | return 1 / apiScaleFactor; 54 | } 55 | 56 | // Define the action enum values 57 | const ActionEnum = z.enum([ 58 | 'key', 59 | 'type', 60 | 'mouse_move', 61 | 'left_click', 62 | 'left_click_drag', 63 | 'right_click', 64 | 'middle_click', 65 | 'double_click', 66 | 'scroll', 67 | 'get_screenshot', 68 | 'get_cursor_position', 69 | ]); 70 | 71 | const actionDescription = `The action to perform. The available actions are: 72 | * key: Press a key or key-combination on the keyboard. 73 | * type: Type a string of text on the keyboard. 74 | * get_cursor_position: Get the current (x, y) pixel coordinate of the cursor on the screen. 75 | * mouse_move: Move the cursor to a specified (x, y) pixel coordinate on the screen. 76 | * left_click: Click the left mouse button. If coordinate is provided, moves to that position first. 77 | * left_click_drag: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen. 78 | * right_click: Click the right mouse button. If coordinate is provided, moves to that position first. 79 | * middle_click: Click the middle mouse button. If coordinate is provided, moves to that position first. 80 | * double_click: Double-click the left mouse button. If coordinate is provided, moves to that position first. 81 | * scroll: Scroll the screen in a specified direction. Requires coordinate (moves there first) and text parameter with direction: "up", "down", "left", or "right". Optionally append ":N" to scroll N pixels (default 300), e.g. "down:500". 82 | * get_screenshot: Take a screenshot of the screen.`; 83 | 84 | const toolDescription = `Use a mouse and keyboard to interact with a computer, and take screenshots. 85 | * This is an interface to a desktop GUI. You do not have access to a terminal or applications menu. You must click on desktop icons to start applications. 86 | * Always prefer using keyboard shortcuts rather than clicking, where possible. 87 | * If you see boxes with two letters in them, typing these letters will click that element. Use this instead of other shortcuts or clicking, where possible. 88 | * Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try taking another screenshot. 89 | * Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor. 90 | * If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click. 91 | * Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked. 92 | 93 | Using the crosshair: 94 | * Screenshots show a red crosshair at the current cursor position. 95 | * After clicking, check where the crosshair appears vs your target. If it missed, adjust coordinates proportionally to the distance - start with large adjustments and refine. Avoid small incremental changes when the crosshair is far from the target (distances are often further than you expect). 96 | * Consider display dimensions when estimating positions. E.g. if it's 90% to the bottom of the screen, the coordinates should reflect this.`; 97 | 98 | export function registerComputer(server: McpServer): void { 99 | server.registerTool( 100 | 'computer', 101 | { 102 | title: 'Computer Control', 103 | description: toolDescription, 104 | inputSchema: { 105 | action: ActionEnum.describe(actionDescription), 106 | coordinate: z.tuple([z.number(), z.number()]).optional().describe('(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates'), 107 | text: z.string().optional().describe('Text to type or key command to execute'), 108 | }, 109 | // Note: No outputSchema because this tool returns varying content types including images 110 | annotations: { 111 | readOnlyHint: false, 112 | }, 113 | }, 114 | async ({action, coordinate, text}) => { 115 | // Scale coordinates from API image space to logical screen space 116 | let scaledCoordinate = coordinate; 117 | if (coordinate) { 118 | const scale = await getApiToLogicalScale(); 119 | scaledCoordinate = [ 120 | Math.round(coordinate[0] * scale), 121 | Math.round(coordinate[1] * scale), 122 | ]; 123 | 124 | // Validate coordinates are within display bounds 125 | const [x, y] = scaledCoordinate; 126 | const [width, height] = [await screen.width(), await screen.height()]; 127 | if (x < 0 || x >= width || y < 0 || y >= height) { 128 | throw new Error(`Coordinates (${x}, ${y}) are outside display bounds of ${width}x${height}`); 129 | } 130 | } 131 | 132 | // Implement system actions using nut-js 133 | switch (action) { 134 | case 'key': { 135 | if (!text) { 136 | throw new Error('Text required for key'); 137 | } 138 | 139 | const keys = toKeys(text); 140 | await keyboard.pressKey(...keys); 141 | await keyboard.releaseKey(...keys); 142 | 143 | return jsonResult({ok: true}); 144 | } 145 | 146 | case 'type': { 147 | if (!text) { 148 | throw new Error('Text required for type'); 149 | } 150 | 151 | await keyboard.type(text); 152 | return jsonResult({ok: true}); 153 | } 154 | 155 | case 'get_cursor_position': { 156 | const pos = await mouse.getPosition(); 157 | const scale = await getApiToLogicalScale(); 158 | // Return coordinates in API image space (scaled down from logical) 159 | // so Claude can correlate with what it sees in screenshots 160 | return jsonResult({ 161 | x: Math.round(pos.x / scale), 162 | y: Math.round(pos.y / scale), 163 | }); 164 | } 165 | 166 | case 'mouse_move': { 167 | if (!scaledCoordinate) { 168 | throw new Error('Coordinate required for mouse_move'); 169 | } 170 | 171 | await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1])); 172 | return jsonResult({ok: true}); 173 | } 174 | 175 | case 'left_click': { 176 | if (scaledCoordinate) { 177 | await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1])); 178 | } 179 | 180 | await mouse.leftClick(); 181 | return jsonResult({ok: true}); 182 | } 183 | 184 | case 'left_click_drag': { 185 | if (!scaledCoordinate) { 186 | throw new Error('Coordinate required for left_click_drag'); 187 | } 188 | 189 | await mouse.pressButton(Button.LEFT); 190 | await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1])); 191 | await mouse.releaseButton(Button.LEFT); 192 | return jsonResult({ok: true}); 193 | } 194 | 195 | case 'right_click': { 196 | if (scaledCoordinate) { 197 | await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1])); 198 | } 199 | 200 | await mouse.rightClick(); 201 | return jsonResult({ok: true}); 202 | } 203 | 204 | case 'middle_click': { 205 | if (scaledCoordinate) { 206 | await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1])); 207 | } 208 | 209 | await mouse.click(Button.MIDDLE); 210 | return jsonResult({ok: true}); 211 | } 212 | 213 | case 'double_click': { 214 | if (scaledCoordinate) { 215 | await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1])); 216 | } 217 | 218 | await mouse.doubleClick(Button.LEFT); 219 | return jsonResult({ok: true}); 220 | } 221 | 222 | case 'scroll': { 223 | if (!scaledCoordinate) { 224 | throw new Error('Coordinate required for scroll'); 225 | } 226 | 227 | if (!text) { 228 | throw new Error('Text required for scroll (direction like "up", "down:5")'); 229 | } 230 | 231 | // Parse direction and optional amount from text (e.g. "down" or "down:5") 232 | const parts = text.split(':'); 233 | const direction = parts[0]; 234 | const amountStr = parts[1]; 235 | const amount = amountStr ? parseInt(amountStr, 10) : 300; 236 | 237 | if (!direction) { 238 | throw new Error('Scroll direction required'); 239 | } 240 | 241 | if (amountStr !== undefined && (isNaN(amount) || amount <= 0)) { 242 | throw new Error(`Invalid scroll amount: ${amountStr}`); 243 | } 244 | 245 | // Move to position first 246 | await mouse.setPosition(new Point(scaledCoordinate[0], scaledCoordinate[1])); 247 | 248 | // Scroll in the specified direction 249 | switch (direction.toLowerCase()) { 250 | case 'up': 251 | await mouse.scrollUp(amount); 252 | break; 253 | case 'down': 254 | await mouse.scrollDown(amount); 255 | break; 256 | case 'left': 257 | await mouse.scrollLeft(amount); 258 | break; 259 | case 'right': 260 | await mouse.scrollRight(amount); 261 | break; 262 | default: 263 | throw new Error(`Invalid scroll direction: ${direction}. Use "up", "down", "left", or "right"`); 264 | } 265 | 266 | return jsonResult({ok: true}); 267 | } 268 | 269 | case 'get_screenshot': { 270 | // Wait a bit to let things load before showing it to Claude 271 | await setTimeout(1000); 272 | 273 | // Get cursor position in logical coordinates 274 | const cursorPos = await mouse.getPosition(); 275 | 276 | // Capture the entire screen (may be at Retina resolution) 277 | const image = imageToJimp(await screen.grab()); 278 | 279 | // Then resize to fit within API limits 280 | const apiScaleFactor = getSizeToApiScale(image.getWidth(), image.getHeight()); 281 | if (apiScaleFactor < 1) { 282 | image.resize( 283 | Math.floor(image.getWidth() * apiScaleFactor), 284 | Math.floor(image.getHeight() * apiScaleFactor), 285 | ); 286 | } 287 | 288 | // Calculate cursor position in API image coordinates 289 | // cursor is in logical coords, need to convert to API image coords 290 | const scale = await getApiToLogicalScale(); 291 | const cursorInImageX = Math.floor(cursorPos.x / scale); 292 | const cursorInImageY = Math.floor(cursorPos.y / scale); 293 | 294 | // Draw a crosshair at cursor position (red color) 295 | const crosshairSize = 20; 296 | const crosshairColor = 0xFF0000FF; // Red with full opacity (RGBA) 297 | const imageWidth = image.getWidth(); 298 | const imageHeight = image.getHeight(); 299 | 300 | // Draw horizontal line 301 | for (let x = Math.max(0, cursorInImageX - crosshairSize); x <= Math.min(imageWidth - 1, cursorInImageX + crosshairSize); x++) { 302 | if (cursorInImageY >= 0 && cursorInImageY < imageHeight) { 303 | image.setPixelColor(crosshairColor, x, cursorInImageY); 304 | // Make it thicker 305 | if (cursorInImageY > 0) { 306 | image.setPixelColor(crosshairColor, x, cursorInImageY - 1); 307 | } 308 | 309 | if (cursorInImageY < imageHeight - 1) { 310 | image.setPixelColor(crosshairColor, x, cursorInImageY + 1); 311 | } 312 | } 313 | } 314 | 315 | // Draw vertical line 316 | for (let y = Math.max(0, cursorInImageY - crosshairSize); y <= Math.min(imageHeight - 1, cursorInImageY + crosshairSize); y++) { 317 | if (cursorInImageX >= 0 && cursorInImageX < imageWidth) { 318 | image.setPixelColor(crosshairColor, cursorInImageX, y); 319 | // Make it thicker 320 | if (cursorInImageX > 0) { 321 | image.setPixelColor(crosshairColor, cursorInImageX - 1, y); 322 | } 323 | 324 | if (cursorInImageX < imageWidth - 1) { 325 | image.setPixelColor(crosshairColor, cursorInImageX + 1, y); 326 | } 327 | } 328 | } 329 | 330 | // Get PNG buffer from Jimp 331 | const pngBuffer = await image.getBufferAsync('image/png'); 332 | 333 | // Compress PNG using sharp, to fit size limits 334 | const optimizedBuffer = await sharp(pngBuffer) 335 | .png({quality: 80, compressionLevel: 9}) 336 | .toBuffer(); 337 | 338 | // Convert optimized buffer to base64 339 | const base64Data = optimizedBuffer.toString('base64'); 340 | 341 | return { 342 | content: [ 343 | { 344 | type: 'text', 345 | text: JSON.stringify({ 346 | // Report the image dimensions - Claude should use coordinates within this space 347 | // These may differ from the actual display due to scaling for API limits 348 | image_width: imageWidth, 349 | image_height: imageHeight, 350 | }), 351 | }, 352 | { 353 | type: 'image', 354 | data: base64Data, 355 | mimeType: 'image/png', 356 | }, 357 | ], 358 | }; 359 | } 360 | } 361 | }, 362 | ); 363 | } 364 | --------------------------------------------------------------------------------