├── .nvmrc ├── .prettierignore ├── .dockerignore ├── .gitignore ├── .prettierrc ├── .env.example ├── assets ├── ai-chat.jpg └── raycast-settings.jpg ├── src ├── logger.ts ├── errors │ └── index.ts ├── routes │ └── api.ts ├── config.ts ├── app.ts ├── index.ts ├── middleware │ └── index.ts ├── data │ └── models.ts ├── controllers │ └── api.ts └── util.ts ├── Dockerfile ├── tsconfig.json ├── .github └── workflows │ └── ci.yml ├── eslint.config.mjs ├── models.json ├── docker-compose.yml ├── package.json └── README.md /.nvmrc: -------------------------------------------------------------------------------- 1 | 22 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .env 3 | .git 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | .env 3 | /dist 4 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "printWidth": 100 4 | } 5 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | PORT="3000" 2 | API_KEY="sk-or-..." 3 | BASE_URL="https://openrouter.ai/api/v1" 4 | -------------------------------------------------------------------------------- /assets/ai-chat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miikkaylisiurunen/raycast-ai-openrouter-proxy/HEAD/assets/ai-chat.jpg -------------------------------------------------------------------------------- /assets/raycast-settings.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miikkaylisiurunen/raycast-ai-openrouter-proxy/HEAD/assets/raycast-settings.jpg -------------------------------------------------------------------------------- /src/logger.ts: -------------------------------------------------------------------------------- 1 | import pino, { Logger } from 'pino'; 2 | 3 | export function makeLogger(): Logger { 4 | return pino({ 5 | redact: ['req.headers.authorization', 'req.headers.cookie'], 6 | }); 7 | } 8 | -------------------------------------------------------------------------------- /src/errors/index.ts: -------------------------------------------------------------------------------- 1 | export class HttpError extends Error { 2 | readonly status: number; 3 | readonly message: string; 4 | 5 | constructor(status: number, message: string) { 6 | super(message); 7 | this.name = 'HttpError'; 8 | this.status = status; 9 | this.message = message; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:22-slim AS base 2 | WORKDIR /app 3 | COPY package*.json ./ 4 | RUN npm ci 5 | COPY . . 6 | RUN npm run build 7 | 8 | FROM node:22-slim 9 | WORKDIR /app 10 | COPY package*.json ./ 11 | ENV NODE_ENV=production 12 | RUN npm ci --omit=dev 13 | COPY --from=base /app/dist ./dist 14 | CMD ["npm", "start"] 15 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "lib": ["es2022"], 4 | "target": "es2022", 5 | "module": "NodeNext", 6 | "moduleResolution": "node16", 7 | 8 | "esModuleInterop": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "strict": true, 11 | "skipLibCheck": true, 12 | "resolveJsonModule": true, 13 | 14 | "rootDir": "src", 15 | "outDir": "dist" 16 | }, 17 | "include": ["src"] 18 | } 19 | -------------------------------------------------------------------------------- /src/routes/api.ts: -------------------------------------------------------------------------------- 1 | import { Router } from 'express'; 2 | import { makeApiController } from '../controllers/api'; 3 | import { AppContext } from '../app'; 4 | 5 | export const makeApiRoutes = (ctx: AppContext): Router => { 6 | const router = Router(); 7 | const controller = makeApiController(ctx); 8 | 9 | router.get('/tags', controller.getTags); 10 | router.post('/show', controller.getModelInfo); 11 | router.post('/chat', controller.chatCompletion); 12 | 13 | return router; 14 | }; 15 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_call: 9 | workflow_dispatch: 10 | 11 | jobs: 12 | build: 13 | name: Build 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | 19 | - name: Setup Node.js 20 | uses: actions/setup-node@v4 21 | with: 22 | node-version: 22 23 | 24 | - name: Install dependencies 25 | run: npm ci 26 | 27 | - name: Build project 28 | run: npm run build 29 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | import * as dotenv from 'dotenv'; 2 | import { z } from 'zod/v4'; 3 | 4 | export const Config = z.object({ 5 | port: z.coerce.number().int().positive().default(3000), 6 | apiKey: z.string().trim().min(1, 'API key is required'), 7 | baseUrl: z.url().default('https://openrouter.ai/api/v1'), 8 | }); 9 | export type Config = z.infer; 10 | 11 | export const getConfig = (): Config => { 12 | dotenv.config(); 13 | return Config.parse({ 14 | port: process.env.PORT, 15 | apiKey: process.env.API_KEY, 16 | baseUrl: process.env.BASE_URL, 17 | }); 18 | }; 19 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | 3 | import eslint from '@eslint/js'; 4 | import tseslint from 'typescript-eslint'; 5 | import eslintPluginPrettierRecommended from 'eslint-plugin-prettier/recommended'; 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist/', 'node_modules/'] }, 9 | eslint.configs.recommended, 10 | tseslint.configs.recommended, 11 | eslintPluginPrettierRecommended, 12 | { 13 | rules: { 14 | 'prettier/prettier': 'warn', 15 | '@typescript-eslint/no-unused-vars': [ 16 | 'error', 17 | { 18 | argsIgnorePattern: '^_', 19 | caughtErrorsIgnorePattern: '^_', 20 | destructuredArrayIgnorePattern: '^_', 21 | }, 22 | ], 23 | }, 24 | }, 25 | ); 26 | -------------------------------------------------------------------------------- /src/app.ts: -------------------------------------------------------------------------------- 1 | import express, { Express } from 'express'; 2 | import { Middleware } from './middleware'; 3 | import { makeApiRoutes } from './routes/api'; 4 | import { Config } from './config'; 5 | import { ModelConfig } from './data/models'; 6 | import OpenAI from 'openai'; 7 | 8 | export interface AppContext { 9 | middleware: Middleware; 10 | config: Config; 11 | models: ModelConfig[]; 12 | openai: OpenAI; 13 | } 14 | 15 | export function makeApp(ctx: AppContext): Express { 16 | const app = express(); 17 | app.use(express.json({ limit: '100mb' })); 18 | app.use(ctx.middleware.logger); 19 | 20 | app.use('/api', makeApiRoutes(ctx)); 21 | 22 | app.use(ctx.middleware.routeNotFound); 23 | app.use(ctx.middleware.errorHandler); 24 | 25 | return app; 26 | } 27 | -------------------------------------------------------------------------------- /models.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Gemini 2.5 Flash", 4 | "id": "google/gemini-2.5-flash-preview-05-20", 5 | "contextLength": 1000000, 6 | "capabilities": ["vision", "tools"], 7 | "temperature": 0 8 | }, 9 | { 10 | "name": "Gemini 2.5 Flash Thinking", 11 | "id": "google/gemini-2.5-flash-preview-05-20:thinking", 12 | "contextLength": 1000000, 13 | "capabilities": ["vision", "tools"], 14 | "temperature": 0 15 | }, 16 | { 17 | "name": "DeepSeek V3", 18 | "id": "deepseek/deepseek-chat-v3-0324", 19 | "contextLength": 128000, 20 | "capabilities": ["tools"] 21 | }, 22 | { 23 | "name": "GPT-4o Mini", 24 | "id": "openai/gpt-4o-mini", 25 | "contextLength": 128000, 26 | "capabilities": ["vision", "tools"] 27 | }, 28 | { 29 | "name": "Claude Sonnet 4", 30 | "id": "anthropic/claude-sonnet-4", 31 | "contextLength": 200000, 32 | "capabilities": ["vision", "tools"], 33 | "temperature": 0.7 34 | } 35 | ] 36 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | raycast-ai-proxy: 3 | restart: unless-stopped 4 | build: . 5 | # The proxy runs on port 3000 inside the container. 6 | # Change the host port if needed. 7 | # You need to set this port in Raycast settings. 8 | ports: 9 | - "11435:3000" 10 | # Mount the local 'models.json' file into the container. 11 | # This file contains the model definitions used by the proxy. 12 | volumes: 13 | - ./models.json:/app/models.json:ro 14 | environment: 15 | # Set the API key as an environment variable. 16 | # For production environments, it's highly recommended to use 17 | # a .env file or Docker secrets for sensitive information like API keys, 18 | # rather than hardcoding them here. 19 | - API_KEY=YOUR_API_KEY 20 | # Set the base URL for the API. 21 | # This should be an OpenAI-compatible API endpoint. 22 | # The default is OpenRouter. 23 | - BASE_URL=https://openrouter.ai/api/v1 24 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "raycast-ai-openrouter-proxy", 3 | "version": "0.0.2", 4 | "description": "", 5 | "main": "src/index.ts", 6 | "scripts": { 7 | "start": "node dist/index.js", 8 | "dev": "tsx watch src/index.ts", 9 | "build": "tsc", 10 | "lint": "eslint . --max-warnings 0", 11 | "lint:fix": "npm run lint -- --fix" 12 | }, 13 | "dependencies": { 14 | "dotenv": "^16.5.0", 15 | "express": "^5.1.0", 16 | "openai": "^5.0.1", 17 | "pino": "^9.7.0", 18 | "pino-http": "^10.4.0", 19 | "zod": "^3.25.42" 20 | }, 21 | "devDependencies": { 22 | "@eslint/js": "^9.27.0", 23 | "@types/express": "^5.0.2", 24 | "@types/node": "^22.15.28", 25 | "eslint": "^9.27.0", 26 | "eslint-config-prettier": "^10.1.5", 27 | "eslint-plugin-prettier": "^5.4.1", 28 | "prettier": "^3.5.3", 29 | "tsx": "^4.19.4", 30 | "typescript": "^5.8.3", 31 | "typescript-eslint": "^8.33.0" 32 | }, 33 | "engines": { 34 | "node": ">=20" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import { z } from 'zod/v4'; 3 | import { makeApp } from './app'; 4 | import { getConfig } from './config'; 5 | import { loadModels } from './data/models'; 6 | import { makeLogger } from './logger'; 7 | import { makeMiddleware } from './middleware'; 8 | 9 | async function main() { 10 | const config = getConfig(); 11 | const models = loadModels(); 12 | 13 | if (models instanceof z.ZodError) { 14 | console.log(`Invalid model configuration:\n${z.prettifyError(models)}`); 15 | process.exit(1); 16 | } 17 | 18 | const logger = makeLogger(); 19 | const middleware = makeMiddleware(logger); 20 | const openai = new OpenAI({ 21 | baseURL: config.baseUrl, 22 | apiKey: config.apiKey, 23 | }); 24 | const app = makeApp({ config, middleware, models, openai }); 25 | 26 | app.listen(config.port, () => { 27 | logger.info(`Server is up on port ${config.port}`); 28 | }); 29 | } 30 | 31 | main().catch((error) => { 32 | console.log(error); 33 | process.exit(1); 34 | }); 35 | -------------------------------------------------------------------------------- /src/middleware/index.ts: -------------------------------------------------------------------------------- 1 | import { NextFunction, Request, Response } from 'express'; 2 | import { ZodError } from 'zod/v4'; 3 | import { HttpError } from '../errors'; 4 | import { HttpLogger, pinoHttp } from 'pino-http'; 5 | import { Logger } from 'pino'; 6 | import { randomUUID } from 'node:crypto'; 7 | import { makeOllamaChunk, makeSSEMessage } from '../util'; 8 | 9 | interface ErrorBody { 10 | status: number; 11 | error: string; 12 | name: string; 13 | } 14 | 15 | export interface Middleware { 16 | logger: HttpLogger; 17 | routeNotFound(req: Request, res: Response, next: NextFunction): void; 18 | errorHandler(err: Error, req: Request, res: Response, next: NextFunction): void; 19 | } 20 | 21 | export const makeMiddleware = (logger: Logger): Middleware => { 22 | return { 23 | logger: pinoHttp({ 24 | logger: logger.child({ category: 'HttpEvent' }), 25 | genReqId: function (req, res) { 26 | const id = randomUUID(); 27 | res.setHeader('X-Request-Id', id); 28 | return id; 29 | }, 30 | customLogLevel: function (req, res, err) { 31 | if (res.statusCode >= 400 && res.statusCode < 500) { 32 | return 'warn'; 33 | } else if (res.statusCode >= 500 || err) { 34 | return 'error'; 35 | } 36 | return 'info'; 37 | }, 38 | quietReqLogger: true, 39 | }), 40 | 41 | routeNotFound: (_req, _res, _next) => { 42 | throw new HttpError(404, 'Route not found'); 43 | }, 44 | 45 | errorHandler: (err, req, res, _next) => { 46 | const loggerMsg = 'ErrorHandler'; 47 | const isChatCompletionEndpoint = req.path === '/api/chat'; 48 | 49 | if (res.writableEnded) { 50 | req.log.error(err, loggerMsg); 51 | return; 52 | } 53 | 54 | // For chat completion endpoint, check if streaming has started 55 | if (isChatCompletionEndpoint && res.headersSent) { 56 | req.log.error(err, loggerMsg); 57 | const ollamaChunk = makeOllamaChunk('Unknown', { content: '' }, true); 58 | res.write(makeSSEMessage(ollamaChunk)); 59 | res.end(); 60 | return; 61 | } 62 | 63 | // Use JSON responses for other cases 64 | if (err instanceof HttpError) { 65 | req.log.warn(err, loggerMsg); 66 | res.status(err.status).send({ status: err.status, error: err.message, name: err.name }); 67 | return; 68 | } else if (err instanceof ZodError) { 69 | req.log.warn(err, loggerMsg); 70 | res.status(400).send({ status: 400, error: 'Invalid request', name: 'ZodError' }); 71 | return; 72 | } 73 | 74 | req.log.error(err, loggerMsg); 75 | res.status(500).send({ 76 | status: 500, 77 | error: err.message ?? 'Something went wrong', 78 | name: 'InternalServerError', 79 | }); 80 | }, 81 | }; 82 | }; 83 | -------------------------------------------------------------------------------- /src/data/models.ts: -------------------------------------------------------------------------------- 1 | import crypto from 'crypto'; 2 | import path from 'path'; 3 | import fs from 'fs'; 4 | import { z } from 'zod/v4'; 5 | 6 | export const ModelConfig = z.object({ 7 | name: z.string('Model name is required'), 8 | id: z.string('Model ID is required'), 9 | contextLength: z 10 | .int('Context length must be a positive integer') 11 | .positive('Context length must be a positive integer'), 12 | capabilities: z.array( 13 | z.enum(['vision', 'tools'], 'Capabilities must be an array of "vision" and/or "tools"'), 14 | 'Capabilities array is required', 15 | ), 16 | temperature: z 17 | .number('Temperature must be a number between 0 and 2') 18 | .min(0, 'Temperature must be a number between 0 and 2') 19 | .max(2, 'Temperature must be a number between 0 and 2') 20 | .optional(), 21 | topP: z 22 | .number('Top P must be a number between 0 and 1') 23 | .min(0, 'Top P must be a number between 0 and 1') 24 | .max(1, 'Top P must be a number between 0 and 1') 25 | .optional(), 26 | max_tokens: z 27 | .int('Max tokens must be a positive integer') 28 | .positive('Max tokens must be a positive integer') 29 | .optional(), 30 | extra: z 31 | .record(z.string(), z.any(), 'Extra properties must be a record of string keys and any values') 32 | .optional(), 33 | }); 34 | export type ModelConfig = z.infer; 35 | 36 | export function loadModels(): ModelConfig[] | z.ZodError { 37 | const filePath = path.resolve(__dirname, '../../models.json'); 38 | const fileContent = fs.readFileSync(filePath, 'utf8'); 39 | const models = JSON.parse(fileContent); 40 | const parsedModels = z.array(ModelConfig).safeParse(models); 41 | 42 | if (!parsedModels.success) { 43 | return parsedModels.error; 44 | } 45 | 46 | const names = new Set(); 47 | for (const model of parsedModels.data) { 48 | const lowerName = model.name.toLowerCase(); 49 | if (names.has(lowerName)) { 50 | throw new Error(`Duplicate model name found: ${model.name}`); 51 | } 52 | names.add(lowerName); 53 | } 54 | 55 | return parsedModels.data; 56 | } 57 | 58 | export const findModelConfig = ( 59 | models: ModelConfig[], 60 | modelName: string, 61 | ): ModelConfig | undefined => { 62 | return models.find((config) => config.name === modelName); 63 | }; 64 | 65 | function generateDigest(config: ModelConfig): string { 66 | const data = JSON.stringify({ 67 | name: config.name, 68 | id: config.id, 69 | contextLength: config.contextLength, 70 | capabilities: config.capabilities, 71 | }); 72 | return crypto.createHash('sha256').update(data).digest('hex'); 73 | } 74 | 75 | export const generateModelsList = (models: ModelConfig[]) => { 76 | return { 77 | models: models.map((config) => ({ 78 | name: config.name, 79 | model: config.id, 80 | modified_at: new Date().toISOString(), 81 | size: 500000000, // Fixed size 82 | digest: generateDigest(config), 83 | details: { 84 | parent_model: '', 85 | format: 'gguf', 86 | family: 'llama', 87 | families: ['llama'], 88 | parameter_size: '7B', 89 | quantization_level: 'Q4_K_M', 90 | }, 91 | })), 92 | }; 93 | }; 94 | 95 | export const generateModelInfo = (models: ModelConfig[], modelName: string) => { 96 | const config = findModelConfig(models, modelName); 97 | 98 | if (!config) { 99 | throw new Error(`Model ${modelName} not found`); 100 | } 101 | 102 | return { 103 | modelfile: `FROM ${config.name}`, 104 | parameters: 'stop "<|eot_id|>"', 105 | template: '{{ .Prompt }}', 106 | details: { 107 | parent_model: '', 108 | format: 'gguf', 109 | family: 'llama', 110 | families: ['llama'], 111 | parameter_size: '7B', 112 | quantization_level: 'Q4_K_M', 113 | }, 114 | model_info: { 115 | 'general.architecture': 'llama', 116 | 'general.file_type': 2, 117 | 'general.parameter_count': 7000000000, 118 | 'llama.context_length': config.contextLength, 119 | 'llama.embedding_length': 4096, 120 | 'tokenizer.ggml.model': 'gpt2', 121 | }, 122 | capabilities: ['completion', ...config.capabilities], 123 | }; 124 | }; 125 | -------------------------------------------------------------------------------- /src/controllers/api.ts: -------------------------------------------------------------------------------- 1 | import { NextFunction, Request, Response } from 'express'; 2 | import { generateModelsList, generateModelInfo, findModelConfig } from '../data/models'; 3 | import { HttpError } from '../errors'; 4 | import { AppContext } from '../app'; 5 | import { 6 | convertOllamaMessagesToOpenAI, 7 | convertRaycastToolsToOpenAI, 8 | getThoughtsFromResponseDelta, 9 | makeOllamaChunk, 10 | makeSSEMessage, 11 | OllamaChatRequest, 12 | OllamaChunkResponse, 13 | } from '../util'; 14 | import { ChatCompletionCreateParamsStreaming, ChatCompletionChunk } from 'openai/resources'; 15 | import { z } from 'zod/v4'; 16 | 17 | export interface ApiController { 18 | getTags(req: Request, res: Response, next: NextFunction): void; 19 | getModelInfo(req: Request, res: Response, next: NextFunction): void; 20 | chatCompletion(req: Request, res: Response, next: NextFunction): Promise; 21 | } 22 | 23 | export const makeApiController = ({ openai, models }: AppContext): ApiController => { 24 | return { 25 | getTags: (req, res) => { 26 | res.send(generateModelsList(models)); 27 | }, 28 | 29 | getModelInfo: (req, res) => { 30 | const { model } = z.object({ model: z.string() }).parse(req.body); 31 | const modelInfo = generateModelInfo(models, model); 32 | res.send(modelInfo); 33 | }, 34 | 35 | chatCompletion: async (req, res) => { 36 | const { messages, model: requestedModel, tools } = OllamaChatRequest.parse(req.body); 37 | 38 | const modelConfig = findModelConfig(models, requestedModel); 39 | 40 | if (!modelConfig) { 41 | throw new HttpError(400, `Model ${requestedModel} not found`); 42 | } 43 | 44 | const openaiMessages = convertOllamaMessagesToOpenAI(messages); 45 | const openaiTools = convertRaycastToolsToOpenAI(tools); 46 | 47 | const chatConfig: ChatCompletionCreateParamsStreaming = { 48 | ...modelConfig.extra, 49 | model: modelConfig.id, 50 | messages: openaiMessages, 51 | stream: true, 52 | stream_options: { include_usage: true }, 53 | temperature: modelConfig.temperature, 54 | top_p: modelConfig.topP, 55 | max_completion_tokens: modelConfig.max_tokens, 56 | ...(openaiTools && { tools: openaiTools }), 57 | }; 58 | 59 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 60 | const { messages: _, ...configWithoutMessages } = chatConfig; 61 | req.log.info({ configWithoutMessages }, 'ChatCompletionRequest'); 62 | 63 | let pingInterval: NodeJS.Timeout | undefined = undefined; 64 | const abortController = new AbortController(); 65 | 66 | const cleanup = () => { 67 | if (!abortController.signal.aborted) { 68 | abortController.abort(); 69 | } 70 | clearInterval(pingInterval); 71 | req.log.info('ConnectionCleanup'); 72 | }; 73 | 74 | try { 75 | const stream = await openai.chat.completions.create(chatConfig, { 76 | signal: abortController.signal, 77 | }); 78 | 79 | res.writeHead(200, { 80 | 'Content-Type': 'application/json', 81 | 'Transfer-Encoding': 'chunked', 82 | 'Cache-Control': 'no-cache', 83 | Connection: 'keep-alive', 84 | }); 85 | 86 | pingInterval = setInterval(() => { 87 | res.write('\n'); 88 | req.log.info('ConnectionPing'); 89 | }, 10000); 90 | 91 | res.on('close', () => { 92 | cleanup(); 93 | }); 94 | 95 | const finalToolCalls: Record = {}; 96 | let finish_reason: OllamaChunkResponse['done_reason'] = undefined; 97 | 98 | for await (const chunk of stream) { 99 | const delta = chunk.choices[0]?.delta; 100 | const content = delta?.content; 101 | const toolCalls = delta?.tool_calls; 102 | const thinking = getThoughtsFromResponseDelta(delta); 103 | 104 | if (thinking) { 105 | const ollamaChunk = makeOllamaChunk(requestedModel, { content: '', thinking }, false); 106 | res.write(makeSSEMessage(ollamaChunk)); 107 | } else if (content) { 108 | const ollamaChunk = makeOllamaChunk(requestedModel, { content }, false); 109 | res.write(makeSSEMessage(ollamaChunk)); 110 | } 111 | 112 | if (toolCalls) { 113 | for (const toolCall of toolCalls) { 114 | const { index } = toolCall; 115 | 116 | if (!finalToolCalls[index]) { 117 | finalToolCalls[index] = { 118 | index: toolCall.index, 119 | id: toolCall.id, 120 | type: toolCall.type, 121 | function: { 122 | name: toolCall.function?.name || '', 123 | arguments: toolCall.function?.arguments || '', 124 | }, 125 | }; 126 | } else { 127 | if (finalToolCalls[index]?.function) { 128 | finalToolCalls[index].function.arguments += toolCall.function?.arguments || ''; 129 | } 130 | } 131 | } 132 | } 133 | 134 | const reason = chunk.choices[0]?.finish_reason; 135 | if (reason) { 136 | if (reason === 'stop' || reason === 'tool_calls') { 137 | finish_reason = reason; 138 | } else { 139 | finish_reason = 'stop'; 140 | } 141 | } 142 | 143 | if (chunk.usage) { 144 | req.log.info({ usage: chunk.usage }, 'CompletionUsage'); 145 | } 146 | } 147 | 148 | // Send final chunk with tool calls 149 | const finalChunk = makeOllamaChunk( 150 | requestedModel, 151 | { content: '' }, 152 | true, 153 | finish_reason, 154 | finalToolCalls, 155 | ); 156 | res.write(makeSSEMessage(finalChunk)); 157 | res.end(); 158 | } finally { 159 | cleanup(); 160 | } 161 | }, 162 | }; 163 | }; 164 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | import { randomBytes } from 'node:crypto'; 2 | import { 3 | ChatCompletionChunk, 4 | ChatCompletionMessageParam, 5 | ChatCompletionTool, 6 | } from 'openai/resources'; 7 | import { z } from 'zod/v4'; 8 | 9 | const RaycastRequestTool = z.discriminatedUnion('type', [ 10 | z.object({ 11 | name: z.string(), 12 | type: z.literal('remote_tool'), 13 | }), 14 | z.object({ 15 | type: z.literal('local_tool'), 16 | function: z.object({ 17 | name: z.string(), 18 | description: z.string(), 19 | parameters: z.record(z.string(), z.any()).transform((value) => { 20 | if (Object.keys(value).length === 0) { 21 | return { 22 | type: 'object', 23 | properties: {}, 24 | required: [], 25 | }; 26 | } 27 | return value; 28 | }), 29 | }), 30 | }), 31 | ]); 32 | type RaycastRequestTool = z.infer; 33 | 34 | export const OllamaChatMessage = z.object({ 35 | role: z.enum(['user', 'assistant', 'system', 'tool']), 36 | images: z.array(z.string()).optional(), 37 | content: z.string(), 38 | tool_calls: z 39 | .array( 40 | z.record( 41 | z.literal('function'), 42 | z.object({ 43 | name: z.string(), 44 | arguments: z.record(z.string(), z.any()), 45 | }), 46 | ), 47 | ) 48 | .optional(), 49 | }); 50 | type OllamaChatMessage = z.infer; 51 | 52 | export const OllamaChatRequest = z.object({ 53 | model: z.string(), 54 | messages: z.array(OllamaChatMessage), 55 | tools: z.array(RaycastRequestTool).default([]), 56 | }); 57 | 58 | export interface OllamaChunkResponse { 59 | model: string; 60 | created_at: string; 61 | message: { 62 | role: 'assistant'; 63 | content: string; 64 | thinking?: string; 65 | tool_calls?: { 66 | function: { 67 | name: string; 68 | arguments: Record; 69 | }; 70 | }[]; 71 | }; 72 | done: boolean; 73 | done_reason?: 'stop' | 'tool_calls'; 74 | } 75 | 76 | export function makeOllamaChunk( 77 | model: string, 78 | data: { content: string; thinking?: string }, 79 | done: boolean, 80 | done_reason?: OllamaChunkResponse['done_reason'], 81 | toolCalls?: Record, 82 | ): OllamaChunkResponse { 83 | // Convert tools to Ollama format 84 | const finalToolCalls: OllamaChunkResponse['message']['tool_calls'] = []; 85 | if (toolCalls) { 86 | for (const key in toolCalls) { 87 | const tc = toolCalls[key]; 88 | if (!tc.function?.name) { 89 | continue; 90 | } 91 | 92 | const args = tc.function.arguments || '{}'; 93 | try { 94 | const parsedArgs = JSON.parse(args); 95 | finalToolCalls.push({ 96 | function: { 97 | name: tc.function.name, 98 | arguments: parsedArgs, 99 | }, 100 | }); 101 | } catch { 102 | continue; 103 | } 104 | } 105 | } 106 | 107 | return { 108 | model, 109 | created_at: new Date().toISOString(), 110 | message: { 111 | role: 'assistant', 112 | ...data, 113 | tool_calls: finalToolCalls.length > 0 ? finalToolCalls : undefined, 114 | }, 115 | done, 116 | done_reason, 117 | }; 118 | } 119 | 120 | export function convertOllamaMessagesToOpenAI( 121 | messages: OllamaChatMessage[], 122 | ): ChatCompletionMessageParam[] { 123 | // Store all tool call IDs in order 124 | const toolCallIds: string[] = []; 125 | 126 | const makeToolCallId = (): string => { 127 | return randomBytes(5).toString('hex').slice(0, 9); 128 | }; 129 | 130 | return messages.map((msg): ChatCompletionMessageParam => { 131 | // Handle tool calls in assistant messages 132 | if (msg.role === 'assistant' && msg.tool_calls) { 133 | // Clear previous tool call IDs and generate new ones 134 | toolCallIds.length = 0; 135 | 136 | return { 137 | role: 'assistant', 138 | content: msg.content, 139 | tool_calls: msg.tool_calls.map((tc) => { 140 | const toolCallId = makeToolCallId(); 141 | toolCallIds.push(toolCallId); // Store each tool call ID 142 | return { 143 | id: toolCallId, 144 | type: 'function', 145 | function: { 146 | name: tc.function.name, 147 | arguments: JSON.stringify(tc.function.arguments), 148 | }, 149 | }; 150 | }), 151 | }; 152 | } 153 | 154 | // Handle tool responses 155 | if (msg.role === 'tool') { 156 | // Use the next available tool call ID in sequence 157 | const toolCallId = toolCallIds.shift() || makeToolCallId(); 158 | return { 159 | role: 'tool', 160 | content: msg.content, 161 | tool_call_id: toolCallId, 162 | }; 163 | } 164 | 165 | // Handle images if present 166 | if (msg.images && msg.images.length > 0 && msg.role === 'user') { 167 | return { 168 | role: 'user', 169 | content: [ 170 | { type: 'text', text: msg.content }, 171 | ...msg.images.map((img) => ({ 172 | type: 'image_url' as const, 173 | image_url: { url: `data:image/jpeg;base64,${img}` }, 174 | })), 175 | ], 176 | }; 177 | } 178 | 179 | // Handle regular messages 180 | return { 181 | role: msg.role, 182 | content: msg.content, 183 | }; 184 | }); 185 | } 186 | 187 | export function convertRaycastToolsToOpenAI( 188 | raycastTools?: RaycastRequestTool[], 189 | ): ChatCompletionTool[] | undefined { 190 | const filteredTools = raycastTools?.filter((tool) => tool.type === 'local_tool'); 191 | 192 | if (!filteredTools || filteredTools.length === 0) { 193 | return undefined; 194 | } 195 | 196 | return filteredTools.map((tool) => { 197 | return { 198 | type: 'function', 199 | function: tool.function, 200 | }; 201 | }); 202 | } 203 | 204 | export function makeSSEMessage(message: OllamaChunkResponse): string { 205 | return `${JSON.stringify(message)}\n\n`; 206 | } 207 | 208 | type ThinkingDelta = 209 | | (ChatCompletionChunk.Choice.Delta & { 210 | reasoning?: unknown; 211 | reasoning_content?: unknown; 212 | extra_content?: { 213 | google?: { 214 | thought?: unknown; 215 | }; 216 | }; 217 | }) 218 | | undefined; 219 | 220 | export function getThoughtsFromResponseDelta(delta: ThinkingDelta): string | null { 221 | if (!delta) { 222 | return null; 223 | } 224 | 225 | // Some providers use reasoning or reasoning_content 226 | const reasoning = delta.reasoning ?? delta.reasoning_content; 227 | if (typeof reasoning === 'string' && reasoning.length > 0) { 228 | return reasoning; 229 | } 230 | 231 | // Gemini API 232 | const googleThought = delta.extra_content?.google?.thought; 233 | if (typeof googleThought === 'boolean' && googleThought) { 234 | const content = delta.content ?? ''; 235 | const thinking = content.replace(/|<\/thought>/g, ''); 236 | if (thinking.length > 0) { 237 | return thinking; 238 | } 239 | } 240 | 241 | return null; 242 | } 243 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Raycast AI OpenRouter Proxy 2 | 3 | This project provides a proxy server that allows Raycast AI to utilize models from any OpenAI-compatible API (OpenAI, Gemini, OpenRouter, etc.). This brings "Bring Your Own Key" (BYOK) functionality to Raycast AI, meaning you can use your own API key and models from your chosen provider. By default, the proxy is configured to use OpenRouter. 4 | 5 | **No Raycast Pro subscription required!** 🎉 6 | 7 | This proxy allows using custom models inside Raycast, including **AI Chat**, **AI Commands**, **Quick AI**, and **AI Presets**, giving you Raycast's native AI experience with the flexibility of custom models and your own API key. 8 | 9 | > [!WARNING] 10 | > 11 | > **Work In Progress**: This project is still in development. While it works well for many use cases, there may be bugs and possible issues. Use with caution. 12 | 13 | ![AI Chat](./assets/ai-chat.jpg) 14 | 15 | ## Features 16 | 17 | This proxy aims to provide a seamless experience for using custom models within Raycast. Here's what is supported and what is not: 18 | 19 | ### Supported: 20 | 21 | - 🧠 **Any model**: Access the wide range of models offered by OpenAI-compatible providers. OpenRouter is used by default. 22 | - 👀 **Vision support**: Use models capable of processing images. 23 | - 🛠️ **AI Extensions & MCP**: Use your favorite AI Extensions and MCP servers. Note that Ollama tool calling support is experimental in Raycast. 24 | - 📝 **System instructions**: Provide system-level prompts to guide model behavior. 25 | - 📎 **Attachments**: Attach all the same things as with the official models. 26 | - 🔨 **Parallel tool use**: Make multiple tool calls simultaneously. 27 | - ⚡ **Streaming**: Get real-time responses from models. 28 | - 🔤 **Chat title generation**: Automatically generate chat titles. 29 | - 🛑 **Stream cancellation**: Stop ongoing responses from models. 30 | 31 | ### Partial Support: 32 | 33 | - 💭 **Displaying thinking process**: See the model's thinking process. 34 | - This feature isn't supported by all providers because the OpenAI API specification does not define a standard for it. For example, when using OpenRouter, the thinking process is always shown by default for supported models. Other providers may not send it by default and require extra setup via the `extra` field in the model's [configuration](#configuration) as described in the provider's documentation. 35 | 36 | ### Not Supported: 37 | 38 | - 🌐 **Remote tools**: Some AI Extensions are classified as "remote tools" and are not supported. These include web search and image generation, as well as some others. You can replace these with MCP servers if you would like similar tools. 39 | 40 | ## Requirements 41 | 42 | - Docker 43 | - API key for your chosen provider (e.g., OpenRouter) 44 | 45 | ## Getting Started 46 | 47 | To get started, follow these steps: 48 | 49 | 1. Clone the repository: 50 | 51 | ```bash 52 | git clone https://github.com/miikkaylisiurunen/raycast-ai-openrouter-proxy.git 53 | ``` 54 | 55 | 2. Change into the project directory: 56 | 57 | ```bash 58 | cd raycast-ai-openrouter-proxy 59 | ``` 60 | 61 | 3. Configure your provider in the Docker Compose file. Open the `docker-compose.yml` file and replace `YOUR_API_KEY` with your API key. By default, the proxy uses OpenRouter. To use a different OpenAI-compatible provider, change the `BASE_URL` to your provider's API endpoint. 62 | 63 | 4. Update the models configuration file. An example `models.json` file is included in the project root for configuring models. Refer to the [Configuration](#configuration) section for details on its structure. 64 | 65 | 5. Start the proxy server: 66 | 67 | ```bash 68 | docker compose up -d --build 69 | ``` 70 | 71 | This will start the proxy server in the background. By default, it will run on port `11435`. If that port is already in use, you can change it in the `docker-compose.yml` file. 72 | 73 | 6. Set the Ollama host in Raycast settings: Open Raycast Settings, go to AI > Ollama Host and set the host to `localhost:11435`. If you changed the port in the `docker-compose.yml` file, make sure to update it here as well. You can also enable experimental AI Extension and MCP support in the Raycast settings. See the [Configuration](#configuration) section for more details. 74 | 75 | ![Raycast settings](./assets/raycast-settings.jpg) 76 | 77 | ## Configuration 78 | 79 | The proxy's behavior is primarily configured through a `models.json` file in the root directory of the project. This file defines the models available to Raycast and their specific settings. An example `models.json` file is included in this repository. Each entry in the JSON array represents a model and can include the following properties: 80 | 81 | - `name`: The name of the model as it will appear in Raycast. 82 | - `id`: The model ID in the format expected by your provider. 83 | - `contextLength`: The maximum context length (in tokens) the model supports. Only affects Raycast's UI and not the model itself. 84 | - `capabilities`: An array of strings indicating the model's capabilities. 85 | - `"vision"`: The model can process images. 86 | - `"tools"`: The model supports AI Extensions and MCP (tool calling). You need to enable the experimental AI Extensions for Ollama Models in Raycast settings for this to work. This can be found at the bottom of the AI settings in Raycast. 87 | - `temperature`: (Optional) Controls the creativity of the model. A value between 0 and 2. 88 | - `topP`: (Optional) Another parameter to control the randomness of the output, a value between 0 and 1. 89 | - `max_tokens`: (Optional) The maximum number of tokens the model is allowed to generate in a single response. 90 | - `extra`: (Optional) An object for advanced, provider-specific configurations. These options are passed directly to the provider's API. For example, you can use it for OpenRouter-specific settings like specifying a preferred provider (`"provider": { "only": ["openai"] }`) or setting the reasoning effort for supported models (`"reasoning": { "effort": "high" }`). Refer to your provider's documentation for available parameters. Note that `extra` properties are not validated at startup. If you encounter issues, check the container logs after sending a request for any errors related to these settings. 91 | 92 | When you modify the `models.json` file, you need to restart the proxy server for the changes to take effect. You can do this by running: 93 | 94 | ```bash 95 | docker compose restart 96 | ``` 97 | 98 | Example `models.json` structure for OpenRouter: 99 | 100 | ```json 101 | [ 102 | { 103 | "name": "Gemini 2.5 Flash", 104 | "id": "google/gemini-2.5-flash", 105 | "contextLength": 1000000, 106 | "capabilities": ["vision", "tools"], 107 | "temperature": 0 108 | }, 109 | { 110 | "name": "Gemini 2.5 Flash Thinking", 111 | "id": "google/gemini-2.5-flash:thinking", 112 | "contextLength": 1000000, 113 | "capabilities": ["vision", "tools"], 114 | "temperature": 1 115 | }, 116 | { 117 | "name": "GPT-4o Mini", 118 | "id": "openai/gpt-4o-mini", 119 | "contextLength": 128000, 120 | "capabilities": ["vision", "tools"] 121 | }, 122 | { 123 | "name": "Claude Sonnet 4 (Thinking)", 124 | "id": "anthropic/claude-sonnet-4", 125 | "contextLength": 200000, 126 | "capabilities": ["vision", "tools"], 127 | "extra": { 128 | "reasoning": { 129 | "max_tokens": 4000 130 | } 131 | } 132 | } 133 | ] 134 | ``` 135 | 136 | ## FAQ 137 | 138 | ### How does this compare to the official Raycast BYOK feature? 139 | 140 | Raycast released a built-in BYOK feature in v1.100.0. The official implementation has a few differences compared to this proxy: 141 | 142 | - It only supports Anthropic, Google and OpenAI. This proxy supports any OpenAI-compatible provider. 143 | - All your messages go through Raycast's servers. 144 | - Your API keys are sent to Raycast's servers. 145 | - You have less control over the models and their configurations. 146 | 147 | ### What works/does not work? 148 | 149 | Refer to the [Features](#features) section for a list of supported and unsupported functionalities. 150 | 151 | ### Is a Raycast Pro subscription required to use this? 152 | 153 | No, one of the main benefits of this proxy is to enable the use of custom models within Raycast without needing a Raycast Pro subscription. 154 | 155 | ### Can I deploy this on a remote server? 156 | 157 | Yes, but it is generally not recommended. There is currently no authentication implemented, meaning anyone with access to your server's address could potentially make requests using your API key. You would need to implement your own authentication mechanism if you want to secure it for remote access. 158 | 159 | ### Do I need to install Ollama? 160 | 161 | No, you do not need to install Ollama. 162 | 163 | ### How do I configure Raycast to use this proxy? 164 | 165 | See the [Getting Started](#getting-started) section. 166 | 167 | ### How does this work? 168 | 169 | This proxy acts as an Ollama server, allowing Raycast to communicate with it. It translates requests from Raycast into a format that the target OpenAI-compatible API understands. 170 | 171 | ### I updated the model configuration, but it doesn't seem to take effect. What should I do? 172 | 173 | If you modify the `models.json` file, you need to restart the proxy server for the changes to take effect. You can do this by running: 174 | 175 | ```bash 176 | docker compose restart 177 | ``` 178 | 179 | ### What if something doesn't work? 180 | 181 | If you encounter issues, a good first step is to check the container logs. You can do this by running the command: 182 | 183 | ```bash 184 | docker compose logs 185 | ``` 186 | --------------------------------------------------------------------------------