├── docs ├── logo.png ├── deployment.zh-CN.md ├── deployment.en-US.md ├── contributing.zh-CN.md ├── contributing.en-US.md ├── guide.zh-CN.md └── guide.en-US.md ├── packages ├── serverless │ ├── app │ │ ├── client.ts │ │ ├── server.ts │ │ ├── routes │ │ │ ├── _404.tsx │ │ │ ├── _error.tsx │ │ │ ├── reports │ │ │ │ └── create.tsx │ │ │ ├── api │ │ │ │ └── index.ts │ │ │ ├── _renderer.tsx │ │ │ └── index.tsx │ │ ├── global.d.ts │ │ ├── components │ │ │ ├── CircleCheckIcon.tsx │ │ │ ├── CircleXIcon.tsx │ │ │ ├── Navigation.tsx │ │ │ ├── YamlEditor.tsx │ │ │ ├── Roadmap.tsx │ │ │ ├── ErrorText.tsx │ │ │ ├── ReportTable.tsx │ │ │ └── PlanEditor.tsx │ │ ├── islands │ │ │ ├── use-case.tsx │ │ │ ├── headline.tsx │ │ │ └── create-form.tsx │ │ └── reports │ │ │ ├── u2.json │ │ │ ├── example.json │ │ │ ├── u1.json │ │ │ └── u3.json │ ├── public │ │ └── favicon.ico │ ├── .prettierrc │ ├── .editorconfig │ ├── vitest.config.ts │ ├── vite.config.ts │ ├── package.json │ ├── wrangler.toml │ └── tsconfig.json └── core │ ├── src │ ├── client.ts │ ├── runner.ts │ ├── types.ts │ └── index.ts │ ├── scripts │ └── build.js │ ├── package.json │ └── tsconfig.json ├── package.json ├── README.zh-CN.md ├── examples ├── basic.yaml ├── optimize-prompts.yaml ├── multi-providers.yaml └── continuous-testing.yaml ├── README.md ├── .gitignore └── LICENSE /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yuyz0112/relia/HEAD/docs/logo.png -------------------------------------------------------------------------------- /packages/serverless/app/client.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from 'honox/client' 2 | 3 | createClient() 4 | -------------------------------------------------------------------------------- /packages/serverless/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yuyz0112/relia/HEAD/packages/serverless/public/favicon.ico -------------------------------------------------------------------------------- /packages/serverless/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 140, 3 | "singleQuote": true, 4 | "semi": true, 5 | "useTabs": true 6 | } 7 | -------------------------------------------------------------------------------- /packages/serverless/app/server.ts: -------------------------------------------------------------------------------- 1 | import { createApp } from 'honox/server'; 2 | import { showRoutes } from 'hono/dev'; 3 | 4 | const app = createApp(); 5 | 6 | showRoutes(app); 7 | 8 | export default app; 9 | -------------------------------------------------------------------------------- /packages/serverless/.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = tab 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.yml] 12 | indent_style = space 13 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "relia", 3 | "private": true, 4 | "version": "0.0.0", 5 | "repository": "git@github.com:Yuyz0112/relia.git", 6 | "author": "Yanzhen Yu ", 7 | "license": "Apache-2.0", 8 | "workspaces": [ 9 | "packages/*" 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /packages/serverless/app/routes/_404.tsx: -------------------------------------------------------------------------------- 1 | import { NotFoundHandler } from 'hono'; 2 | 3 | const handler: NotFoundHandler = (c) => { 4 | return c.render( 5 |
6 |

Sorry, Not Found...

7 |
8 | ); 9 | }; 10 | 11 | export default handler; 12 | -------------------------------------------------------------------------------- /packages/serverless/vitest.config.ts: -------------------------------------------------------------------------------- 1 | import { defineWorkersConfig } from "@cloudflare/vitest-pool-workers/config"; 2 | 3 | export default defineWorkersConfig({ 4 | test: { 5 | poolOptions: { 6 | workers: { 7 | wrangler: { configPath: "./wrangler.toml" }, 8 | }, 9 | }, 10 | }, 11 | }); 12 | -------------------------------------------------------------------------------- /packages/serverless/app/routes/_error.tsx: -------------------------------------------------------------------------------- 1 | // app/routes/_error.tsx 2 | import { ErrorHandler } from 'hono'; 3 | 4 | const handler: ErrorHandler = (e, c) => { 5 | return c.render( 6 |
7 |

Error! {e.message}

8 |
9 | ); 10 | }; 11 | 12 | export default handler; 13 | -------------------------------------------------------------------------------- /packages/serverless/app/routes/reports/create.tsx: -------------------------------------------------------------------------------- 1 | import { createRoute } from 'honox/factory'; 2 | import CreateForm from '../../islands/create-form'; 3 | 4 | export default createRoute((c) => { 5 | return c.render( 6 |
7 | 8 |
, 9 | { title: 'Relia: crete report.' } 10 | ); 11 | }); 12 | -------------------------------------------------------------------------------- /packages/serverless/app/global.d.ts: -------------------------------------------------------------------------------- 1 | import {} from 'hono'; 2 | 3 | type Head = { 4 | title?: string; 5 | }; 6 | 7 | declare module 'hono' { 8 | interface Env { 9 | Variables: {}; 10 | Bindings: {}; 11 | } 12 | interface ContextRenderer { 13 | (content: string | Promise, head?: Head): Response | Promise; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /docs/deployment.zh-CN.md: -------------------------------------------------------------------------------- 1 | # 部署 2 | 3 | ## 部署到 Cloudflare 4 | 5 | 如果您还没有 Cloudflare 账户,请[在这里创建](https://dash.cloudflare.com/sign-up)。 6 | 7 | 1. 进入 Cloudflare 仪表板并设置 Cloudflare Workers 子域名。 8 | 2. 运行 `yarn` 9 | 3. 运行 `cd packages/serverless` 10 | 4. 运行 `yarn deploy` – 这将创建一个新的 Page `relia-pages`,现在可以在 Cloudflare 的 _Workers and Pages_ 下看到。 11 | 5. 它现在应该已经上线。访问 `https://relia-pages.{您的子域名}.pages.dev`。 12 | -------------------------------------------------------------------------------- /packages/core/src/client.ts: -------------------------------------------------------------------------------- 1 | import OpenAI from "openai"; 2 | import { Provider } from "./types"; 3 | 4 | export const createClient = (provider: Provider) => { 5 | switch (provider.name) { 6 | case "openAI": 7 | case "fireworks": 8 | case "groq": 9 | default: 10 | return new OpenAI({ 11 | baseURL: provider.baseURL, 12 | apiKey: provider.apiKey, 13 | }); 14 | } 15 | }; 16 | -------------------------------------------------------------------------------- /packages/serverless/app/routes/api/index.ts: -------------------------------------------------------------------------------- 1 | import { Hono } from 'hono'; 2 | import { load } from 'js-yaml'; 3 | import * as core from '@relia/core'; 4 | 5 | const app = new Hono(); 6 | 7 | app.post('/v0/reports', async (c) => { 8 | const b = await c.req.json<{ yamlPlan: string }>(); 9 | const plan = load(b.yamlPlan); 10 | // FIXME: remove hack 11 | const messages = await (core as any).default.runTests(plan); 12 | 13 | return c.json(messages); 14 | }); 15 | 16 | export default app; 17 | -------------------------------------------------------------------------------- /packages/serverless/app/components/CircleCheckIcon.tsx: -------------------------------------------------------------------------------- 1 | export default function CircleCheckIcon() { 2 | return ( 3 | 15 | 16 | 17 | 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /docs/deployment.en-US.md: -------------------------------------------------------------------------------- 1 | # Deployment 2 | 3 | ## Deploy to Cloudflare 4 | 5 | If you don't have one already, [create a Cloudflare account here](https://dash.cloudflare.com/sign-up). 6 | 7 | 1. Go to your Cloudflare dashboard and set up a Cloudflare Workers subdomain 8 | 2. Run `yarn` 9 | 3. Run `cd packages/serverless` 10 | 4. Run `yarn deploy` – this will create a new Page, `relia-pages`, now visible under _Workers and Pages_ in Cloudflare 11 | 5. It should now be live. Visit `https://relia-pages.{yoursubdomain}.pages.dev`. 12 | -------------------------------------------------------------------------------- /packages/serverless/app/components/CircleXIcon.tsx: -------------------------------------------------------------------------------- 1 | export default function CircleCheckIcon() { 2 | return ( 3 | 15 | 16 | 17 | 18 | 19 | ); 20 | } 21 | -------------------------------------------------------------------------------- /packages/core/scripts/build.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs"); 2 | const path = require("path"); 3 | const { build } = require("tsup"); 4 | 5 | (async () => { 6 | await build({ 7 | entry: ["src/index.ts"], 8 | dts: true, 9 | format: ["cjs"], 10 | }); 11 | 12 | const filePath = path.resolve(__dirname, "../dist/index.js"); 13 | const fileContent = fs.readFileSync(filePath, "utf-8"); 14 | const modifiedContent = fileContent.replace( 15 | 'require("assert")', 16 | 'require("node:assert")' 17 | ); 18 | fs.writeFileSync(filePath, modifiedContent); 19 | })(); 20 | -------------------------------------------------------------------------------- /packages/core/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@relia/core", 3 | "version": "0.1.3", 4 | "description": "", 5 | "main": "dist/index.js", 6 | "typings": "dist/index.d.ts", 7 | "type": "commonjs", 8 | "scripts": { 9 | "build": "node scripts/build.js", 10 | "prepublish": "npm run build" 11 | }, 12 | "keywords": [], 13 | "files": [ 14 | "dist" 15 | ], 16 | "author": "yanzhen@smartx.com", 17 | "license": "Apache-2.0", 18 | "dependencies": { 19 | "openai": "^4.47.3", 20 | "p-map": "^7.0.2" 21 | }, 22 | "devDependencies": { 23 | "tsup": "^8.1.0", 24 | "typescript": "^5.4.5" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /docs/contributing.zh-CN.md: -------------------------------------------------------------------------------- 1 | # 贡献代码 2 | 3 | ## 本地启动 Relia 4 | 5 | 1. 运行 `yarn` 6 | 2. 构建 core package 7 | 2-1. 运行 `cd packages/core` 8 | 2-2. 运行 `yarn build` 9 | 2-3. 运行 `cd ../../` 返回根目录 10 | 3. 启动 Relia 服务 11 | 3-1. 运行 `cd packages/serverless` 12 | 3-2. 运行 `yarn dev` 13 | 14 | 如需开发修改 core package 代码,则重复步骤 2。如需开发 Relia 服务,则保持 3-2 dev server 运行的情况下修改代码即可。 15 | 16 | ## 添加模型 API 客户端 17 | 18 | 许多模型 API 都实现了与 OpenAI API 兼容,Relia 默认也会使用 OpenAI SDK 调用 API。 19 | 20 | 但如果您需要测试的模型 API,则需要实现对应的 API 客户端,将 Relia 测试计划的数据结构转换为该模型 API 需要的结构。 21 | 22 | 具体为修改 [client.ts](../packages/core/src/client.ts),增加一个 `provider.name` case 及对应的客户端代码。 23 | -------------------------------------------------------------------------------- /packages/serverless/vite.config.ts: -------------------------------------------------------------------------------- 1 | import pages from '@hono/vite-cloudflare-pages'; 2 | import adapter from '@hono/vite-dev-server/cloudflare'; 3 | import honox from 'honox/vite'; 4 | import client from 'honox/vite/client'; 5 | import { defineConfig } from 'vite'; 6 | import commonjs from 'vite-plugin-commonjs'; 7 | 8 | export default defineConfig(({ mode }) => { 9 | if (mode === 'client') { 10 | return { 11 | define: { 12 | exports: {}, 13 | }, 14 | plugins: [commonjs(), client()], 15 | }; 16 | } else { 17 | return { 18 | ssr: { 19 | external: ['openai', 'highlight.js'], 20 | }, 21 | define: { 22 | exports: {}, 23 | }, 24 | plugins: [ 25 | commonjs(), 26 | honox({ 27 | devServer: { 28 | adapter, 29 | }, 30 | }), 31 | pages(), 32 | ], 33 | }; 34 | } 35 | }); 36 | -------------------------------------------------------------------------------- /packages/serverless/app/components/Navigation.tsx: -------------------------------------------------------------------------------- 1 | export default function Navigation() { 2 | return ( 3 |
4 | 11 | 35 |
36 | ); 37 | } 38 | -------------------------------------------------------------------------------- /README.zh-CN.md: -------------------------------------------------------------------------------- 1 | # Relia 2 | 3 | [立刻试用](https://relia.dev/) 4 | 5 | Relia 是一个针对 LLM 的 E2E 测试框架,帮助您构建适合特定使用场景的 AI 基准测试。 6 | 7 | 它可以识别最适合您的 LLM 模型,并通过持续测试确保模型升级不会导致性能回退。 8 | 9 | Relia 专门为 function calling(或“tool use”)场景设计,这也是代理类 AI 应用程序的核心能力。 10 | 11 | ## 文档 12 | 13 | [使用指南](./docs/guide.zh-CN.md) 14 | 15 | [自部署](./docs/deployment.zh-CN.md) 16 | 17 | [如何贡献](./docs/contributing.zh-CN.md) 18 | 19 | ## 适用场景 20 | 21 | ### 选择最合适的 LLM 22 | 23 | 在模型选型阶段,基于测试结果识别最适合特定使用场景的 LLM,兼顾性能与成本。 24 | 25 | ### 优化提示词 26 | 27 | 在应用开发阶段,在相同模型上比较多组提示词的结果,理解不同提示词对结果的影响并完成优化。 28 | 29 | ### 持续测试防止回退 30 | 31 | 在应用发布之后,持续测试同一模型的不同版本,以避免模型升级带来的性能回退。 32 | 33 | ## 路线图 34 | 35 | - [ ] 允许在测试报告中自定义 provider 标题和 suite 标题,以更好地组织和清晰展示。 36 | - [ ] 提高大规模测试计划执行的效率和可靠性。 37 | - [ ] 扩展支持更多的 LLM provider。 38 | - [x] 开发用于编辑测试计划的表单 UI,使创建和管理测试更容易、更直观。 39 | - [ ] 实现测试计划和报告的持久存储。 40 | - [ ] 允许为不同的 suite 自定义评分,以更好地评估和比较测试用例的性能。 41 | 42 | 欢迎在 [GitHub](https://github.com/Yuyz0112/relia)、[X](https://x.com/Aryu0112) 和 [Bilibili](https://space.bilibili.com/489667127) 上关注我们的项目。 43 | -------------------------------------------------------------------------------- /docs/contributing.en-US.md: -------------------------------------------------------------------------------- 1 | # Contributing Code 2 | 3 | ## Running Relia Locally 4 | 5 | 1. Run `yarn` 6 | 2. Build the core package 7 | 2-1. Run `cd packages/core` 8 | 2-2. Run `yarn build` 9 | 2-3. Run `cd ../../` to return to the root directory 10 | 3. Start the Relia service 11 | 3-1. Run `cd packages/serverless` 12 | 3-2. Run `yarn dev` 13 | 14 | To develop and modify the core package code, repeat step 2. To develop the Relia service, keep the dev server from step 3-2 running while modifying the code. 15 | 16 | ## Adding a Model API Client 17 | 18 | Many model APIs are compatible with the OpenAI API, and Relia will use the OpenAI SDK to call these APIs by default. 19 | 20 | However, if you need to test a model API that requires a different structure, you need to implement a corresponding API client to convert the Relia test plan data structure to the required format for that model API. 21 | 22 | Specifically, modify [client.ts](../packages/core/src/client.ts) to add a new `provider.name` case and the corresponding client code. 23 | -------------------------------------------------------------------------------- /packages/serverless/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@relia/serverless", 3 | "type": "module", 4 | "version": "0.0.0", 5 | "private": true, 6 | "scripts": { 7 | "dev": "vite", 8 | "prebuild": "npm run build --prefix ../core", 9 | "build": "vite build --mode client && vite build", 10 | "preview": "wrangler pages dev", 11 | "deploy": "npm run build && wrangler pages deploy", 12 | "wrangler": "wrangler" 13 | }, 14 | "devDependencies": { 15 | "@cloudflare/vitest-pool-workers": "^0.1.0", 16 | "@cloudflare/workers-types": "^4.20240603.0", 17 | "@hono/vite-cloudflare-pages": "^0.4.0", 18 | "@hono/vite-dev-server": "^0.12.1", 19 | "@types/diff": "^5.2.1", 20 | "typescript": "^5.0.4", 21 | "vite-plugin-commonjs": "^0.10.1", 22 | "vitest": "1.3.0", 23 | "wrangler": "^3.0.0" 24 | }, 25 | "dependencies": { 26 | "@relia/core": "*", 27 | "diff": "^5.2.0", 28 | "highlight.js": "^11.9.0", 29 | "hono": "^4.4.3", 30 | "honox": "^0.1.20", 31 | "js-yaml": "^4.1.0" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /packages/serverless/app/components/YamlEditor.tsx: -------------------------------------------------------------------------------- 1 | import { css, cx } from 'hono/css'; 2 | import { useState } from 'hono/jsx'; 3 | import { dump, load } from 'js-yaml'; 4 | 5 | type YamlEditorProps = { 6 | initialValue: T; 7 | onChange: (value: T) => void; 8 | rows?: number; 9 | cols?: number; 10 | }; 11 | 12 | const YamlEditorStyle = css` 13 | &.error { 14 | border-color: var(--error-color); 15 | } 16 | `; 17 | 18 | function YamlEditor({ initialValue, onChange, rows = 3, cols = 30 }: YamlEditorProps) { 19 | const [value, setValue] = useState(dump(initialValue)); 20 | const [error, setError] = useState(null); 21 | 22 | const handleInputChange = (event: Event) => { 23 | const newValue = (event.target as HTMLTextAreaElement).value; 24 | setValue(newValue); 25 | 26 | try { 27 | onChange(load(newValue) as T); 28 | setError(null); 29 | } catch (err) { 30 | setError(err); 31 | } 32 | }; 33 | 34 | return ( 35 |