├── packages
    ├── examples
    │   └── example-dsl-evaluator
    │   │   ├── .gitignore
    │   │   ├── tsconfig.json
    │   │   ├── package.json
    │   │   ├── README.md
    │   │   └── src
    │   │       ├── example-splitter.ts
    │   │       ├── index.ts
    │   │       ├── embedding-evaluator.ts
    │   │       ├── example-program-map.ts
    │   │       ├── helloworld-cases.ts
    │   │       ├── eval-langium.ts
    │   │       ├── eval-langdev.ts
    │   │       ├── runners.ts
    │   │       └── langium-cases.ts
    ├── langium-ai-mcp
    │   ├── tsconfig.tests.json
    │   ├── vitest.config.ts
    │   ├── README.MD
    │   ├── tsconfig.json
    │   ├── src
    │   │   ├── mcp-client.ts
    │   │   └── mcp-server.ts
    │   ├── tests
    │   │   └── mcp-server.test.ts
    │   └── package.json
    └── langium-ai-tools
    │   ├── tsconfig.tests.json
    │   ├── vitest.config.ts
    │   ├── src
    │       ├── analyzer
    │       │   ├── index.ts
    │       │   └── document-analyzer.ts
    │       ├── splitter
    │       │   ├── index.ts
    │       │   ├── program-map.ts
    │       │   └── splitter.ts
    │       ├── index.ts
    │       └── evaluator
    │       │   ├── index.ts
    │       │   ├── edit-distance-evaluator.ts
    │       │   ├── document-evaluator.ts
    │       │   ├── langium-evaluator.ts
    │       │   ├── eval-matrix.ts
    │       │   ├── evaluator.ts
    │       │   └── chart.ts
    │   ├── tsconfig.json
    │   ├── package.json
    │   ├── tests
    │       └── document-analyzer.test.ts
    │   └── README.md
├── .gitignore
├── .github
    └── workflows
    │   └── ci.yml
├── package.json
├── LICENSE
├── README.md
└── .vscode
    └── launch.json


/packages/examples/example-dsl-evaluator/.gitignore:
--------------------------------------------------------------------------------
1 | *.html


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules/
 2 | .DS_Store
 3 | logs
 4 | *.log
 5 | npm-debug.log*
 6 | .npm
 7 | .eslintcache
 8 | .env
 9 | dist
10 | modelfiles
11 | results
12 | downloads/
13 | stores/
14 | data/
15 | .langium-ai


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/tsconfig.tests.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "./tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "types": ["vitest/globals", "node"]
 5 |     },
 6 |     "include": [
 7 |         "src/**/*",
 8 |         "tests/**/*"
 9 |     ],
10 |     "exclude": [
11 |         "node_modules",
12 |         "dist"
13 |     ]
14 | }


--------------------------------------------------------------------------------
/packages/langium-ai-tools/tsconfig.tests.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "./tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "types": ["vitest/globals", "node"]
 5 |     },
 6 |     "include": [
 7 |         "src/**/*",
 8 |         "tests/**/*"
 9 |     ],
10 |     "exclude": [
11 |         "node_modules",
12 |         "dist"
13 |     ]
14 | }
15 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/vitest.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vitest/config';
 2 | 
 3 | export default defineConfig({
 4 |     test: {
 5 |         environment: 'node',
 6 |         globals: true,
 7 |         include: ['tests/**/*.test.ts'],
 8 |         typecheck: {
 9 |             tsconfig: 'tsconfig.tests.json'
10 |         }
11 |     },
12 |     esbuild: {
13 |         target: 'node20'
14 |     }
15 | });
16 | 


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/vitest.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vitest/config';
 2 | 
 3 | export default defineConfig({
 4 |     test: {
 5 |         environment: 'node',
 6 |         globals: true,
 7 |         include: ['tests/**/*.test.ts'],
 8 |         typecheck: {
 9 |             tsconfig: 'tsconfig.tests.json'
10 |         }
11 |     },
12 |     esbuild: {
13 |         target: 'node20'
14 |     }
15 | });
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/analyzer/index.ts:
--------------------------------------------------------------------------------
1 | /******************************************************************************
2 |  * Copyright 2025 TypeFox GmbH
3 |  * This program and the accompanying materials are made available under the
4 |  * terms of the MIT License, which is available in the project root.
5 |  ******************************************************************************/
6 | 
7 | export * from './document-analyzer.js';


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/splitter/index.ts:
--------------------------------------------------------------------------------
1 | /******************************************************************************
2 |  * Copyright 2024 - 2025 TypeFox GmbH
3 |  * This program and the accompanying materials are made available under the
4 |  * terms of the MIT License, which is available in the project root.
5 |  ******************************************************************************/
6 | 
7 | export * from './splitter.js';
8 | export * from './program-map.js';
9 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/index.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | export * from './evaluator/index.js';
 8 | export * from './analyzer/index.js';
 9 | export * from './splitter/index.js';
10 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/evaluator/index.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | export * from './evaluator.js';
 8 | export * from './langium-evaluator.js';
 9 | export * from './edit-distance-evaluator.js';
10 | export * from './eval-matrix.js';
11 | export * from './chart.js';
12 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "ES2020",
 4 |         "module": "NodeNext",
 5 |         "sourceMap": true,
 6 |         "declaration": true,
 7 |         "declarationMap": true,
 8 |         "outDir": "./dist",
 9 |         "strict": true,
10 |         "noImplicitAny": true,
11 |         "moduleResolution": "NodeNext",
12 |         "esModuleInterop": true,
13 |         "skipLibCheck": true,
14 |         "forceConsistentCasingInFileNames": true,
15 |         "rootDir": "./src"
16 |     },
17 |     "include": ["src/**/*.ts"],
18 |     "exclude": [
19 |         "node_modules"
20 |     ]
21 | }


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "ES2020",
 4 |         "module": "NodeNext",
 5 |         "sourceMap": true,
 6 |         "declaration": true,
 7 |         "declarationMap": true,
 8 |         "outDir": "./dist",
 9 |         "strict": true,
10 |         "noImplicitAny": true,
11 |         "moduleResolution": "NodeNext",
12 |         "esModuleInterop": true,
13 |         "skipLibCheck": true,
14 |         "forceConsistentCasingInFileNames": true,
15 |     },
16 |     "include": [
17 |         "src/**/*.ts"
18 |     ],
19 |     "exclude": [
20 |         "node_modules"
21 |     ]
22 | }


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/README.MD:
--------------------------------------------------------------------------------
 1 | How to try-out:
 2 | 
 3 | - `cd packages/langium-ai-mcp`
 4 | - Start MCP server with IO transport `npm run start`
 5 | - Run example client code `npm run cstart` - you should see the tool result containing the errors.
 6 | 
 7 | Example usage in Cursor:
 8 | 
 9 | - Open Cursor MCP settings
10 | - Add new server using following setup (user or workspace specific `.cursor/mcp.json` ):
11 | 
12 | ```json
13 | "mcpServers": {
14 |     "Langium MCP": {
15 |         "command": "node",
16 |         "args": [
17 |             "~/git/langium-ai/packages/mcp-server/dist/mcp-server.js"
18 |         ]
19 |     }
20 | }
21 | ```
22 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   build-and-test:
11 |     runs-on: ubuntu-latest
12 |     
13 |     steps:
14 |     - name: Checkout code
15 |       uses: actions/checkout@v4
16 |     
17 |     - name: Setup Node.js
18 |       uses: actions/setup-node@v4
19 |       with:
20 |         node-version: '20.x'
21 |         cache: 'npm'
22 |     
23 |     - name: Install dependencies
24 |       run: npm ci
25 |     
26 |     - name: Build packages
27 |       run: npm run build
28 |     
29 |     - name: Run tests
30 |       run: npm run test:ci
31 | 
32 | 


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "rootDir": "./src",
 4 |         "outDir": "./dist",
 5 |         "module": "nodenext",
 6 |         "target": "esnext",
 7 |         "types": [
 8 |             "node"
 9 |         ],
10 |         "sourceMap": true,
11 |         "declaration": true,
12 |         "declarationMap": true,
13 |         "noUncheckedIndexedAccess": true,
14 |         "exactOptionalPropertyTypes": true,
15 |         "strict": true,
16 |         "jsx": "react-jsx",
17 |         "verbatimModuleSyntax": true,
18 |         "isolatedModules": true,
19 |         "noUncheckedSideEffectImports": true,
20 |         "moduleDetection": "force",
21 |         "skipLibCheck": true,
22 |         "forceConsistentCasingInFileNames": true
23 |     },
24 |     "include": [
25 |         "src"
26 |     ],
27 |     "exclude": [
28 |         "node_modules",
29 |         "tests",
30 |         "dist"
31 |     ]
32 | }


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "langium-ai",
 3 |     "private": true,
 4 |     "version": "0.0.1",
 5 |     "description": "Monorepo for Langium AI and its associated projects",
 6 |     "scripts": {
 7 |         "build": "npm run build --workspaces",
 8 |         "test": "npm run test --workspaces --if-present --continue-on-error",
 9 |         "test:ci": "npm run test --workspaces --if-present",
10 |         "clean": "npm run clean --workspaces --if-present",
11 |         "watch": "npm run watch --workspaces --if-present"
12 |     },
13 |     "type": "module",
14 |     "author": {
15 |         "name": "TypeFox",
16 |         "url": "https://www.typefox.io"
17 |     },
18 |     "license": "MIT",
19 |     "workspaces": [
20 |         "packages/langium-ai-tools",
21 |         "packages/langium-ai-mcp",
22 |         "packages/examples/*"
23 |     ],
24 |     "devDependencies": {
25 |         "typescript": "^5.4.5",
26 |         "vitest": "^3.0.9",
27 |         "rimraf": "^6.0.1"
28 |     },
29 |     "volta": {
30 |         "node": "20.10.0",
31 |         "npm": "10.2.3"
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "example-dsl-evaluator",
 3 |     "description": "Example evaluator for a targeted DSL",
 4 |     "private": "true",
 5 |     "version": "0.0.1",
 6 |     "main": "dist/index.js",
 7 |     "scripts": {
 8 |         "build": "tsc",
 9 |         "start": "node ./dist/index.js",
10 |         "demo": "npm run build && npm run start -- run-langdev && open ./radar-chart.html",
11 |         "clean": "rimraf ./dist",
12 |         "test": "echo \"No tests yet...\""
13 |     },
14 |     "type": "module",
15 |     "author": {
16 |         "name": "TypeFox",
17 |         "url": "https://www.typefox.io"
18 |     },
19 |     "license": "MIT",
20 |     "dependencies": {
21 |         "chromadb": "^1.9.2",
22 |         "chromadb-default-embed": "^2.13.2",
23 |         "dotenv": "^16.4.5",
24 |         "langium-ai-tools": "^0.0.2",
25 |         "ollama": "^0.5.9",
26 |         "openai": "^4.67.3"
27 |     },
28 |     "volta": {
29 |         "node": "20.10.0",
30 |         "npm": "10.2.3"
31 |     },
32 |     "devDependencies": {
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/evaluator/edit-distance-evaluator.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | import {levenshteinEditDistance} from 'levenshtein-edit-distance';
 8 | import { Evaluator, EvaluatorResult, EvaluatorResultData } from './evaluator.js';
 9 | 
10 | export interface EditDistanceEvaluatorResultData extends EvaluatorResultData {
11 |     edit_distance: number;
12 | }
13 | 
14 | export class EditDistanceEvaluator extends Evaluator {
15 |     async evaluate(response: string, expected_response: string): Promise<Partial<EvaluatorResult>> {
16 |         const distance = levenshteinEditDistance(response, expected_response);
17 |         return {
18 |             data: {
19 |                 edit_distance: distance
20 |             }
21 |         };
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/src/mcp-client.ts:
--------------------------------------------------------------------------------
 1 | import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 2 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 3 | import { getDisplayName } from "@modelcontextprotocol/sdk/shared/metadataUtils.js";
 4 | 
 5 | const debug = false
 6 | 
 7 | const transport = new StdioClientTransport({
 8 |     command: "node",
 9 |     args: [ "./dist/mcp-server.js"]
10 | });
11 | 
12 | const client = new Client(
13 |     {
14 |         name: "example-client",
15 |         version: "1.0.0"
16 |     }
17 | );
18 | 
19 | await client.connect(transport);
20 | 
21 | const tools = await client.listTools();
22 | console.log("Available tools:", "\n", ...tools.tools.map(t => getDisplayName(t) + "\n"));
23 | 
24 | const theTool = tools.tools[0];
25 | if (!theTool) {
26 |     throw new Error("No tool available");
27 | }
28 | 
29 | const result = await client.callTool({
30 |     name: theTool.name,
31 |     arguments: {
32 |         code: 'syntax error'
33 |     }
34 | });
35 | 
36 | console.log("Tool result:", result.content);
37 | 
38 | // exit the process
39 | process.exit(0);


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 TypeFox
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/tests/mcp-server.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from 'vitest';
 2 | 
 3 | import { validateLangiumCode } from '../src/mcp-server';
 4 | 
 5 | describe('validateLangiumCode', () => {
 6 | 
 7 | 
 8 |     it('should return undefined for valid grammar code', async () => {
 9 |         const validCode = `
10 |                 grammar HelloWorld
11 |                 
12 |                 entry Model: persons+=Person*;
13 |                 Person: 'person' name=ID;
14 |                 hidden terminal WS: /\\s+/;
15 |                 terminal ID: /[_a-zA-Z][\\w_]*/;
16 |             `;
17 | 
18 |         const result = await validateLangiumCode(validCode);
19 |         expect(result).toBeUndefined();
20 |     });
21 | 
22 |     it('should return diagnostics for invalid grammar code', async () => {
23 |         const invalidCode = `
24 |             grammar HelloWorld
25 |             entry Model: persons+=Person*;
26 |         `;
27 | 
28 |         const result = await validateLangiumCode(invalidCode);
29 |         expect(result).toBeDefined();
30 |         expect(result).toContain("Error: Could not resolve reference to AbstractRule named 'Person'. at line 3, column 35");
31 |     });
32 | });
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Langium AI
 2 | 
 3 | ## Overview
 4 | 
 5 | Langium AI is a suite of tools that makes it easier to build AI applications for Langium DSLs, by providing helpful tools to leverage your existing Langium DSL services, and to integrate LLMs tightly with your Langium DSL. In doing so, existing or new Langium DSLs can benefit from first-class LLM support just as well as other common programming languages.
 6 | 
 7 | You can read more about Langium AI here as well, [Langium AI: The fusion of DSLs and LLMs](https://typefox.io/blog/langium-ai-the-fusion-of-dsls-and-llms/).
 8 | 
 9 | In particular, Langium AI helps to solve the following problems:
10 | 
11 | - Selecting good models with regards to your DSL
12 | - Evaluating DSL output from an LLM
13 | - Processing DSL programs as data, while respecting the structure of your DSL
14 | - Developing good natural language interfaces for DSLs
15 | 
16 | This is achieved by using a two pronged approach.
17 | 
18 | - [Core tools](/packages/langium-ai-tools/README.md): which leverage Langium's service set to help you build, evaluate, and trust your AI applications.
19 | - _Coming soon_: AI-based tools which leverage LLMs directly in tandem with your DSL.
20 | 
21 | For more information you can refer to the [langium-ai-tools](/packages/langium-ai-tools/README.md) package, which leverages much of your DSL's existing Langium services to make it easier to evaluate your application, as well as to process DSL documents.
22 | 


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "langium-ai-mcp",
 3 |     "version": "0.0.2",
 4 |     "displayName": "Langium AI - MCP",
 5 |     "publisher": "TypeFox",
 6 |     "description": "MCP server for Langium AI",
 7 |     "repository": {
 8 |         "type": "git",
 9 |         "url": "git+https://github.com/eclipse-langium/langium-ai.git",
10 |         "directory": "packages/langium-ai-mcp"
11 |     },
12 |     "bugs": "https://github.com/eclipse-langium/langium-ai/issues",
13 |     "type": "module",
14 |     "main": "dist/mcp-server.js",
15 |     "private": false,
16 |     "files": [
17 |         "dist"
18 |     ],
19 |     "scripts": {
20 |         "clean": "rm -rf ./dist",
21 |         "build": "npm run clean && tsc",
22 |         "watch": "tsc -w",
23 |         "start": "node .",
24 |         "cstart": "node ./dist/mcp-client.js",
25 |         "prepare": "npm run build",
26 |         "test": "vitest run"
27 |     },
28 |     "author": {
29 |         "name": "TypeFox",
30 |         "url": "https://www.typefox.io"
31 |     },
32 |     "keywords": [
33 |         "langium",
34 |         "ai",
35 |         "mcp",
36 |         "server",
37 |         "llm"
38 |     ],
39 |     "license": "MIT",
40 |     "dependencies": {
41 |         "@modelcontextprotocol/sdk": "^1.17.4",
42 |         "langium-ai-tools": "0.0.2"
43 |     },
44 |     "volta": {
45 |         "node": "20.10.0",
46 |         "npm": "10.2.3"
47 |     },
48 |     "devDependencies": {
49 |     }
50 | }


--------------------------------------------------------------------------------
/packages/langium-ai-tools/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "langium-ai-tools",
 3 |   "version": "4.1.3",
 4 |   "description": "Tooling for building AI Applications that leverage Langium DSLs",
 5 |   "repository": {
 6 |     "type": "git",
 7 |     "url": "git+https://github.com/eclipse-langium/langium-ai.git",
 8 |     "directory": "packages/langium-ai-tools"
 9 |   },
10 |   "bugs": "https://github.com/eclipse-langium/langium-ai/issues",
11 |   "type": "module",
12 |   "main": "dist/index.js",
13 |   "private": false,
14 |   "files": [
15 |     "dist"
16 |   ],
17 |   "exports": {
18 |     ".": {
19 |       "import": "./dist/index.js",
20 |       "types": "./dist/index.d.ts"
21 |     },
22 |     "./splitter": {
23 |       "import": "./dist/splitter/index.js",
24 |       "types": "./dist/splitter/index.d.ts"
25 |     },
26 |     "./evaluator": {
27 |       "import": "./dist/evaluator/index.js",
28 |       "types": "./dist/evaluator/index.d.ts"
29 |     }
30 |   },
31 |   "scripts": {
32 |     "build": "tsc",
33 |     "watch": "tsc -w",
34 |     "test": "vitest run",
35 |     "clean": "rimraf ./dist"
36 |   },
37 |   "author": {
38 |     "name": "TypeFox",
39 |     "url": "https://www.typefox.io"
40 |   },
41 |   "keywords": [
42 |     "langium",
43 |     "ai",
44 |     "tools",
45 |     "llm"
46 |   ],
47 |   "license": "MIT",
48 |   "dependencies": {
49 |     "langium": "~4.1.0",
50 |     "levenshtein-edit-distance": "^3.0.1"
51 |   },
52 |   "volta": {
53 |     "node": "20.10.0",
54 |     "npm": "10.2.3"
55 |   },
56 |   "devDependencies": {}
57 | }
58 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/README.md:
--------------------------------------------------------------------------------
 1 | # Example DSL Evaluator
 2 | 
 3 | A concrete usage of langium-ai-tools to help evaluate output from an LLM (and its related setup) in the context of the example dsl in this project.
 4 | 
 5 | ## Installation
 6 | 
 7 | Install via npm to setup the project & build
 8 | 
 9 | ```sh
10 | npm install
11 | npm run build
12 | ```
13 | 
14 | This demo needs **Ollama** installed & running, with the following models pulled (but feel free to adjust these to your needs):
15 | 
16 | ```sh
17 | # chat + code gen models
18 | ollama pull codellama
19 | ollama pull llama3.2
20 | ollama pull codegemma
21 | 
22 | # embedding model
23 | ollama pull mxbai-embed-large
24 | ```
25 | 
26 | ## Building
27 | 
28 | To build the project, run the following command:
29 | 
30 | ```sh
31 | npm run build
32 | ```
33 | 
34 | ## Running
35 | 
36 | You can run an example evaluation with `npm run demo`. This will run a pre-defined validation suite for an example (shown at LangDev 24'), and open up the generated radar chart report.
37 | 
38 | You can also run specific evaluations with the following commands:
39 | 
40 | ```sh
41 | # runs the langdev evaluation example
42 | npm run start -- run-langdev
43 | ```
44 | 
45 | If you want to run a more comprehensive langium example with & without RAG, you can run:
46 | 
47 | ```sh
48 | # make sure to first build embeddings in the example-dsl-splitter project!
49 | # these will be utilized in this phase
50 | 
51 | # additionally make sure to pull down llama3.1 for this one
52 | ollama pull llama3.1
53 | 
54 | npm run start -- run-langium
55 | ```
56 | 
57 | And lastly you can skip right to generating a radar chart report from the last results (which is automatically generated at the end of each of these runs):
58 | 
59 | ```sh
60 | npm run start -- report
61 | ```
62 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/example-splitter.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | import { NodeFileSystem } from "langium/node";
 8 | import { splitByNode } from "langium-ai-tools/splitter";
 9 | import { createLangiumGrammarServices } from "langium/grammar";
10 | import { LangiumServices } from "langium/lsp";
11 | 
12 | /**
13 |  * An example of utilizing the splitter in Langium AI
14 |  */
15 | 
16 | export function runSplitterExample() {
17 |   const exampleLangiumDoc = `
18 | grammar Test
19 | 
20 | entry Model: A | B | C | D | E;
21 | 
22 | A: 'A' ID;
23 | 
24 | /**
25 |  * Info about B (one line above)
26 |  */
27 | 
28 | B: 'B' ID;
29 | 
30 | /**
31 |  * Info about C
32 |  */
33 | C: 'C' ID;
34 | 
35 | 
36 | // info about D (one line above)
37 | 
38 | D: 'D' ID;
39 | 
40 | // info about E
41 | E: 'E' ID;
42 | 
43 | hidden terminal WS: /\s+/;
44 | terminal ID: /[_a-zA-Z][\w_]*/;
45 |     `;
46 | 
47 |   const langiumServices = createLangiumGrammarServices(NodeFileSystem);
48 | 
49 |   // split by ParserRule (w/ comments included)
50 |   const splits = splitByNode(
51 |     exampleLangiumDoc,
52 |     [(node) => node.$type === "ParserRule"],
53 |     langiumServices.grammar,
54 |   );
55 | 
56 |   console.log("Split by ParserRule w/ comments:");
57 |   console.dir(splits);
58 | 
59 |   // split by ParserRule (w/ comments excluded)
60 |   const splitsNoComments = splitByNode(
61 |     exampleLangiumDoc,
62 |     [(node) => node.$type === "ParserRule"],
63 |     langiumServices.grammar,
64 |     { commentRuleNames: [] },
65 |   );
66 | 
67 |   console.log("Split by ParserRule without comments:");
68 |   console.dir(splitsNoComments);
69 | }
70 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/index.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | import {
 8 |   runLangDevDemo,
 9 |   generateChartFromLastResults,
10 | } from "./eval-langdev.js";
11 | import { runLangiumEvals } from "./eval-langium.js";
12 | import { runExampleProgramMap } from "./example-program-map.js";
13 | import { runSplitterExample } from "./example-splitter.js";
14 | 
15 | function printHelp() {
16 |   console.log(
17 |     "Usage: node dist/index.js [run-langium|run-langdev|report|server|help]",
18 |   );
19 |   console.log("  run-langium: Run Langium evaluations");
20 |   console.log("  run-langdev: Run LangDev evaluations");
21 |   console.log("  report: Generate a report from the last results");
22 |   console.log("  splitter: Run the splitter example");
23 |   console.log("  program-map: Generate a program map");
24 |   console.log("  help: Show this help message");
25 | }
26 | 
27 | async function main() {
28 |   const args = process.argv.slice(2);
29 |   if (args.length === 0) {
30 |     printHelp();
31 |     return;
32 |   }
33 | 
34 |   const command = args[0];
35 | 
36 |   switch (command) {
37 |     case "run-langium":
38 |       await runLangiumEvals();
39 |       generateChartFromLastResults();
40 |       break;
41 |     case "run-langdev":
42 |       await runLangDevDemo();
43 |       generateChartFromLastResults();
44 |       break;
45 |     case "report":
46 |       generateChartFromLastResults();
47 |       break;
48 |     case "splitter":
49 |       runSplitterExample();
50 |       break;
51 |     case "program-map":
52 |       runExampleProgramMap();
53 |       break;
54 |     case "help":
55 |     default:
56 |       printHelp();
57 |       break;
58 |   }
59 | }
60 | 
61 | main();
62 | 
63 | 


--------------------------------------------------------------------------------
/packages/langium-ai-mcp/src/mcp-server.ts:
--------------------------------------------------------------------------------
 1 | import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
 2 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
 3 | import { LangiumEvaluator, type LangiumEvaluatorResultData } from 'langium-ai-tools';
 4 | import { createLangiumGrammarServices } from 'langium/grammar';
 5 | 
 6 | import { NodeFileSystem } from 'langium/node';
 7 | import { z } from 'zod';
 8 | 
 9 | const server = new McpServer({
10 |     name: 'langium-mpc-server',
11 |     version: '1.0.0'
12 | });
13 | 
14 | server.registerTool('langium-syntax-checker',
15 |     {
16 |         title: 'Langium Evaluator Tool',
17 |         description: 'Checks Langium code for errors',
18 |         inputSchema: { code: z.string() }
19 |     },
20 |     async ({ code }) => {
21 |         const validationResult = await validateLangiumCode(code);
22 |         return {
23 |             content: [
24 |                 {
25 |                     type: 'text',
26 |                     text: validationResult ?? 'The provided Langium code has no issues.'
27 |                 }
28 |             ]
29 |         }
30 |     }
31 | );
32 | 
33 | export const langiumEvaluator = new LangiumEvaluator(createLangiumGrammarServices(NodeFileSystem).grammar);
34 | 
35 | export async function validateLangiumCode(code: string): Promise<string | undefined> {
36 |     const evalResult = await langiumEvaluator.evaluate(code);
37 |     if (evalResult.data) {
38 |         const langiumData = evalResult.data as LangiumEvaluatorResultData;
39 |         if (langiumData.diagnostics.length > 0) {
40 |             return langiumData.diagnostics.map(d =>
41 |                 `${asText(d.severity)}: ${d.message} at line ${d.range.start.line + 1}, column ${d.range.start.character + 1}`
42 |             ).join('\n');
43 |         }
44 |     }
45 |     return undefined;
46 | }
47 | 
48 | function asText(severity: number | undefined): string {
49 | 
50 |     switch (severity) {
51 |         case 1: return 'Error';
52 |         case 2: return 'Warning';
53 |         case 3: return 'Information';
54 |         case 4: return 'Hint';
55 |         default: return 'Unknown';
56 |     }
57 | }
58 | 
59 | const transport = new StdioServerTransport();
60 | await server.connect(transport);
61 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/splitter/program-map.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | import { AstNode } from "langium";
 8 | import { LangiumServices } from "langium/lsp";
 9 | import { splitByNodeToAst } from "./splitter.js";
10 | 
11 | interface MappingRule {
12 |     /**
13 |      * Determines which nodes to map with this rule
14 |      */
15 |     predicate: (node: AstNode) => boolean;
16 | 
17 |     /**
18 |      * Determines how to map the node's textual split
19 |      */
20 |     map: (node: AstNode) => string;
21 | }
22 | 
23 | interface ProgramMapOptions {
24 |     /**
25 |      * List of mapping rules to apply to the document.
26 |      * Each rule is a predicate that determines which nodes to map & how to map them
27 |      */
28 |     mappingRules: MappingRule[]
29 | }
30 | 
31 | /**
32 |  * Mapper generates a repo-map like structure from a given Langium document.
33 |  * Leverages the splitter to produce a mapping from split chunks.
34 |  */
35 | export class ProgramMapper {
36 | 
37 |     private services: LangiumServices;
38 |     private options: ProgramMapOptions;
39 | 
40 |     constructor(services: LangiumServices, options: ProgramMapOptions) {
41 |         this.services = services;
42 |         this.options = options;
43 |     }
44 | 
45 |     /**
46 |      * Produces a map from the given document
47 |      * @param document - The text document to be mapped.
48 |      * @returns The mapped document as a list of strings, one for each mapped element
49 |      */
50 |     public map(document: string): string[] {
51 |         const mappingRules = this.options.mappingRules;
52 |         const mapChunks: string[] = [];
53 | 
54 |         // get all predicates
55 |         const predicates = mappingRules.map(rule => rule.predicate);
56 | 
57 |         const nodes = splitByNodeToAst(document, predicates, this.services);
58 | 
59 |         for (const node of nodes) {
60 |             // apply the mapping rule to each node
61 |             for (const rule of mappingRules) {
62 |                 if (rule.predicate(node)) {
63 |                     mapChunks.push(rule.map(node));
64 |                 }
65 |             }
66 |         }
67 | 
68 |         return mapChunks;
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/embedding-evaluator.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | /**
 8 |  * Simple evaluator that computes the embedding for two strings, and returns the cosine similarity
 9 |  */
10 | 
11 | import { Evaluator, EvaluatorResult } from "langium-ai-tools/evaluator";
12 | import ollama from 'ollama';
13 | 
14 | export interface EmbeddingEvaluatorResultData extends EvaluatorResult {
15 |     similarity: number;
16 | }
17 | 
18 | /**
19 |  * Computes embeddings by leveraging embedding models from Ollama
20 |  */
21 | export class OllamaEmbeddingEvaluator extends Evaluator {
22 |     private embeddingModel: string;
23 | 
24 |     constructor(model: string) {
25 |         super();
26 |         this.embeddingModel = model;
27 |     }
28 | 
29 |     /**
30 |      * Evaluate the similarity between two strings by comparing their embeddings
31 |      */
32 |     async evaluate(response: string, expected_response: string): Promise<Partial<EvaluatorResult>> {
33 |         // compute the embedding for both strings
34 |         const responseEmbedding = (await this.computeEmbedding(response));
35 |         const expectedEmbedding = (await this.computeEmbedding(expected_response));
36 | 
37 |         // compute the cosine similarity between the two embeddings
38 |         const similarity = this.cosineSimilarity(responseEmbedding, expectedEmbedding);
39 | 
40 |         return {
41 |             data: {
42 |                 similarity
43 |             }
44 |         };
45 |     }
46 | 
47 |     /**
48 |      * Computes the embedding for a given text
49 |      * @returns 
50 |      */
51 |     private async computeEmbedding(text: string): Promise<number[]> {
52 |         return (await ollama.embed({
53 |             model: this.embeddingModel,
54 |             input: [text],
55 |             keep_alive: 30
56 |         })).embeddings[0];
57 |     }
58 | 
59 |     /**
60 |      * Compute the cosine similarity between two vectors
61 |      */
62 |     private cosineSimilarity(a: number[], b: number[]): number {
63 |         const dotProduct = a.reduce((acc, val, i) => acc + val * b[i], 0);
64 | 
65 |         const aMagnitude = Math.sqrt(a.reduce((acc, val) => acc + val * val, 0));
66 |         const bMagnitude = Math.sqrt(b.reduce((acc, val) => acc + val * val, 0));
67 | 
68 |         if (aMagnitude === 0 || bMagnitude === 0) {
69 |             return 0;
70 |         }
71 | 
72 |         return dotProduct / (aMagnitude * bMagnitude);
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 | 
 8 |         {
 9 |             "type": "node",
10 |             "request": "launch",
11 |             "name": "Launch Example: Splitter",
12 |             "skipFiles": [
13 |                 "<node_internals>/**"
14 |             ],
15 |             "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts",
16 |             "args": [
17 |                 "splitter"
18 |             ],
19 |             "outFiles": [
20 |                 "${workspaceFolder}/**/*.js"
21 |             ]
22 |         },
23 |         {
24 |             "type": "node",
25 |             "request": "launch",
26 |             "name": "Launch Example: Program Map",
27 |             "skipFiles": [
28 |                 "<node_internals>/**"
29 |             ],
30 |             "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts",
31 |             "args": [
32 |                 "program-map"
33 |             ],
34 |             "outFiles": [
35 |                 "${workspaceFolder}/**/*.js"
36 |             ]
37 |         },
38 |         {
39 |             "type": "node",
40 |             "request": "launch",
41 |             "name": "Launch Example: LangDev Demo (with Ollama)",
42 |             "skipFiles": [
43 |                 "<node_internals>/**"
44 |             ],
45 |             "args": [
46 |                 "run-langdev"
47 |             ],
48 |             "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts",
49 |             "outFiles": [
50 |                 "${workspaceFolder}/**/*.js"
51 |             ]
52 |         },
53 |         {
54 |             "type": "node",
55 |             "request": "launch",
56 |             "name": "Launch Example: Langium Demo (with Ollama)",
57 |             "skipFiles": [
58 |                 "<node_internals>/**"
59 |             ],
60 |             "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts",
61 |             "outFiles": [
62 |                 "${workspaceFolder}/**/*.js"
63 |             ]
64 |         },
65 |         {
66 |             "type": "node",
67 |             "request": "launch",
68 |             "name": "Run Current Test File",
69 |             "skipFiles": [
70 |                 "<node_internals>/**"
71 |             ],
72 |             "program": "${workspaceFolder}/node_modules/vitest/vitest.mjs",
73 |             "args": [
74 |                 "run",
75 |                 "${relativeFile}"
76 |             ],
77 |             "cwd": "${workspaceFolder}",
78 |             "console": "integratedTerminal"
79 |         }
80 |     ]
81 | }


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/evaluator/document-evaluator.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2024 - 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | /**
 8 |  * Langium Document Evaluator (evaluates on a Langium document)
 9 |  */
10 | 
11 | import { LangiumDocument, URI } from "langium";
12 | import { LangiumServices } from "langium/lsp";
13 | import { Evaluator, EvaluatorResult, EvaluatorResultData } from "./evaluator.js";
14 | 
15 | 
16 | export abstract class AbstractDocumentEvaluator<T extends LangiumServices, RD extends FailureAwarenessData = FailureAwarenessData> extends Evaluator {
17 | 
18 |     /**
19 |      * Services to use for evaluation
20 |      */
21 |     protected services: T;
22 | 
23 |     constructor(services: T) {
24 |         super();
25 |         this.services = services;
26 |     }
27 | 
28 |     /**
29 |      * Validate an agent response as if it's a langium program. If we can parse it, we attempt to validate it.
30 |      */
31 |     async evaluate(input: string, fileExtension: string | undefined = undefined): Promise<Partial<EvaluatorResult<RD>>> {
32 | 
33 |         if (input.includes('```')) {
34 |             // take the first code block instead, if present (assuming it's a langium grammar)
35 |             const codeBlock = input.split(/```[a-z-]*/)[1];
36 |             input = codeBlock;
37 |         }
38 |         const fileExt = fileExtension ? fileExtension : this.services.LanguageMetaData.fileExtensions[0];
39 |         const doc = this.services.shared.workspace.LangiumDocumentFactory.fromString(input, URI.parse(`memory:/test.${fileExt}`));
40 |         const context: EvaluationContext = {
41 |             input: input
42 |         };
43 |         try {
44 |             await this.services.shared.workspace.DocumentBuilder.build([doc], { validation: true });
45 |             return this.evaluateDocument(doc, context);
46 |         } catch (e) {
47 |             return this.handleBuildError(e, context)
48 |         }
49 |     }
50 | 
51 |     abstract evaluateDocument(doc: LangiumDocument, ctx: EvaluationContext): Partial<EvaluatorResult<RD>>;
52 | 
53 | 
54 |     protected handleBuildError(e: unknown, ctx: EvaluationContext): Partial<EvaluatorResult<RD>> {
55 |         console.error('Error during evaluation: ', e);
56 |         return {
57 |             name: this.constructor.name,
58 |             data: <FailureAwarenessData>{
59 |                 failures: 1
60 |             } as unknown as RD
61 |         };
62 |     }
63 | }
64 | 
65 | export type FailureAwarenessData = EvaluatorResultData & {
66 |     /**
67 |     * Number of validation failures
68 |     */
69 |     failures: number;
70 | };
71 | 
72 | export type EvaluationContext = {
73 |     input: string;
74 | };


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/example-program-map.ts:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright 2025 TypeFox GmbH
 3 |  * This program and the accompanying materials are made available under the
 4 |  * terms of the MIT License, which is available in the project root.
 5 |  ******************************************************************************/
 6 | 
 7 | import { ProgramMapper } from "langium-ai-tools";
 8 | import { createLangiumGrammarServices } from "langium/grammar";
 9 | import { NodeFileSystem } from "langium/node";
10 | 
11 | export function runExampleProgramMap() {
12 | 
13 |     // simple langium grammar, as an example
14 |     const exampleLangiumDoc = `
15 |     grammar Test
16 |     
17 |     entry Model: A | B | C | D | E;
18 |     
19 |     A: 'A' ID;
20 |     
21 |     /**
22 |      * Info about B (one line above)
23 |      */
24 |     
25 |     B: 'B' ID;
26 |     
27 |     /**
28 |      * Info about C
29 |      */
30 |     C: 'C' ID;
31 |     
32 |     
33 |     // info about D (one line above)
34 |     
35 |     D: 'D' ID;
36 |     
37 |     // info about E
38 |     E: 'E' ID;
39 | 
40 |     // Datatype rule
41 |     DT returns string: ID;
42 |     
43 |     hidden terminal WS: /\s+/;
44 |     terminal ID: /[_a-zA-Z][\w_]*/;
45 |         `;
46 |     
47 |     // setup your language services (langium's, in this case)
48 |     const langiumServices = createLangiumGrammarServices(NodeFileSystem);
49 |     const grammarServices = langiumServices.grammar;
50 | 
51 |     // instantiate 
52 |     const mapper = new ProgramMapper(grammarServices, {
53 |         mappingRules: [
54 |             {
55 |                 predicate: (node) => node.$type === 'ParserRule',
56 |                 map: (node: any) => {
57 |                     const ruleName = node.name;
58 |                     const modifiers = [
59 |                         node.entry ? 'entry' : undefined,
60 |                         node.fragment ? 'fragment' : undefined,
61 |                         node.definesHiddenTokens ? 'hidden' : undefined,
62 |                         node.dataType ? 'datatype' : undefined
63 |                     ].filter(v => v !== undefined);
64 |                     const modifierString = modifiers.length > 0 ? `(${modifiers.join(', ')}) ` : '';
65 |                     return `${modifierString}rule ${ruleName}`;
66 |                 }
67 |             },
68 |             {
69 |                 predicate: (node) => node.$type === 'TerminalRule',
70 |                 map: (node: any) => {
71 |                     const modifiers = [
72 |                         node.fragment ? 'fragment' : undefined,
73 |                         node.hidden ? 'hidden' : undefined,
74 |                     ].filter(v => v !== undefined);
75 |                     const modifierString = modifiers.length > 0 ? `(${modifiers.join(', ')}) ` : '';
76 |                     return `${modifierString}terminal ${node.name}`;
77 |                 }
78 |             }
79 |         ]
80 |     });
81 |     const programMap = mapper.map(exampleLangiumDoc);
82 |     console.log('Program Map Output:');
83 |     console.log(programMap.join('\n'));
84 | }
85 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/evaluator/langium-evaluator.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | /**
  8 |  * Base Langium DSL validator (taps into Langium's validator messages to provide better results)
  9 |  */
 10 | 
 11 | import { LangiumDocument } from "langium";
 12 | import { LangiumServices } from "langium/lsp";
 13 | import { Diagnostic } from "vscode-languageserver-types";
 14 | import { AbstractDocumentEvaluator, EvaluationContext, FailureAwarenessData } from "./document-evaluator.js";
 15 | import { EvaluatorResult, EvaluatorResultData } from "./evaluator.js";
 16 | 
 17 | /**
 18 |  * Langium-specific evaluator result data
 19 |  */
 20 | export interface LangiumEvaluatorResultData extends FailureAwarenessData {
 21 | 
 22 |     /**
 23 |      * Number of errors
 24 |      */
 25 |     errors: number;
 26 | 
 27 |     /**
 28 |      * Number of warnings
 29 |      */
 30 |     warnings: number;
 31 | 
 32 |     /**
 33 |      * Number of infos
 34 |      */
 35 |     infos: number;
 36 | 
 37 |     /**
 38 |      * Number of hints
 39 |      */
 40 |     hints: number;
 41 | 
 42 |     /**
 43 |      * Number of unassigned diagnostics
 44 |      */
 45 |     unassigned: number;
 46 | 
 47 |     /**
 48 |      * Length of the response in chars
 49 |      */
 50 |     response_length: number;
 51 | 
 52 |     /**
 53 |      * Raw diagnostic data, same which is used to compute the other values above
 54 |      */
 55 |     diagnostics: Diagnostic[];
 56 | }
 57 | 
 58 | export class LangiumEvaluator<T extends LangiumServices> extends AbstractDocumentEvaluator<T, LangiumEvaluatorResultData> {
 59 | 
 60 | 
 61 |     /**
 62 |      * Validate an agent response as if it's a langium program. If we can parse it, we attempt to validate it.
 63 |      */
 64 |     evaluateDocument(doc: LangiumDocument, ctx: EvaluationContext): EvaluatorResult<LangiumEvaluatorResultData> {
 65 | 
 66 |         const validationResults = doc.diagnostics ?? [];
 67 | 
 68 |         const evalData: LangiumEvaluatorResultData = this.createEmptyResultData();
 69 |         // include length of the response for checking
 70 |         evalData.response_length = ctx.input.length;
 71 |         // include the diagnostics for debugging if desired
 72 |         evalData.diagnostics = validationResults;
 73 | 
 74 | 
 75 |         for (const diagnostic of validationResults) {
 76 |             if (diagnostic.severity) {
 77 |                 switch (diagnostic.severity) {
 78 |                     case 1:
 79 |                         evalData.errors++;
 80 |                         break;
 81 |                     case 2:
 82 |                         evalData.warnings++;
 83 |                         break;
 84 |                     case 3:
 85 |                         evalData.infos++;
 86 |                         break;
 87 |                     case 4:
 88 |                         evalData.hints++;
 89 |                         break;
 90 |                     default:
 91 |                         evalData.unassigned++;
 92 |                         break;
 93 |                 }
 94 |             }
 95 |         }
 96 | 
 97 |         return {
 98 |             name: this.constructor.name,
 99 |             metadata: {},
100 |             data: evalData
101 |         };
102 |     }
103 | 
104 |     protected createEmptyResultData(): LangiumEvaluatorResultData {
105 |         return {
106 |             failures: 0,
107 |             errors: 0,
108 |             warnings: 0,
109 |             infos: 0,
110 |             hints: 0,
111 |             unassigned: 0,
112 |             response_length: 0,
113 |             diagnostics: []
114 |         };
115 |     }
116 | }


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/helloworld-cases.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | import { Case } from "langium-ai-tools";
  8 | 
  9 | export const cases: Case[] = [
 10 |     {
 11 |         name: 'No Prompt',
 12 |         prompt: 'Generate a simple hello world grammar written in Langium',
 13 |         context: [],
 14 |         expected_response: `
 15 | Here's a simple Hello World written in Langium:
 16 | 
 17 | \`\`\`langium
 18 | grammar HelloWorld
 19 | 
 20 | entry Model:
 21 |     (persons+=Person | greetings+=Greeting)*;
 22 | 
 23 | Person:
 24 |     'person' name=ID;
 25 | 
 26 | Greeting:
 27 |     'Hello' person=[Person:ID] '!';
 28 | 
 29 | hidden terminal WS: /\s+/;
 30 | terminal ID: /[_a-zA-Z][\w_]*/;
 31 | 
 32 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
 33 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
 34 | \`\`\`
 35 | `
 36 |     },
 37 |     {
 38 |         name: 'With Langium System Prompt',
 39 |         history: [{
 40 |             role: 'system',
 41 |             content: `You are an expert in Langium grammars.
 42 |     Your role is to provide accurate, detailed, and insightful explanations and advice on Langium grammar-related topics.
 43 |     You will answer questions, provide examples, and offer guidance on best practices, ensuring clarity and precision in all responses.   
 44 | `
 45 |         }],
 46 |         context: [],
 47 |         prompt: `Generate a simple hello world program in the Hello World language.`,
 48 |         expected_response: `
 49 | Here's a simple Hello World written in Langium:
 50 | 
 51 | \`\`\`langium
 52 | grammar HelloWorld
 53 | 
 54 | entry Model:
 55 |     (persons+=Person | greetings+=Greeting)*;
 56 | 
 57 | Person:
 58 |     'person' name=ID;
 59 | 
 60 | Greeting:
 61 |     'Hello' person=[Person:ID] '!';
 62 | 
 63 | hidden terminal WS: /\s+/;
 64 | terminal ID: /[_a-zA-Z][\w_]*/;
 65 | 
 66 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
 67 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
 68 | \`\`\`
 69 |         `
 70 |     },
 71 |     {
 72 |         name: 'With Hello World Grammar & Prompt',
 73 |         history: [{
 74 |             role: 'system',
 75 |             content: `You are an expert in the Hello World language, which is written in Langium.
 76 | When prompted you will provide a simple hello world program.
 77 | For reference, here's the Langium grammar that defines the Hello World language:
 78 | \`\`\`langium
 79 | grammar HelloWorld
 80 | 
 81 | entry Model:
 82 | (persons+=Person | greetings+=Greeting)*;
 83 | 
 84 | Person:
 85 | 'person' name=ID;
 86 | 
 87 | Greeting:
 88 | 'Hello' person=[Person:ID] '!';
 89 | 
 90 | hidden terminal WS: /\s+/;
 91 | terminal ID: /[_a-zA-Z][\w_]*/;
 92 | terminal INT returns number: /[0-9]+/;
 93 | terminal STRING: /"(\\.|[^"\\])*"|'(\\.|[^'\\])*'/;
 94 | 
 95 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
 96 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
 97 | \`\`\`
 98 | 
 99 | And here is a simple Hello World program written in that grammar:
100 | \`\`\`hello-world
101 | person Alice
102 | person Bob
103 | 
104 | Hello Alice!
105 | Hello Bob!
106 | \`\`\``
107 |         }],
108 |         prompt: 'Generate a simple program for the Hello World language',
109 |         context: [],
110 |         expected_response: `
111 | person John
112 | person Carly
113 | 
114 | Hello Carly!
115 | Hello John!`
116 |     },
117 |     {
118 |         name: 'With Improved Hello World Prompt',
119 |         history: [{
120 |             role: 'system',
121 |             content: `You are an expert in the Hello World language, which is written in Langium.
122 | When prompted you will provide a simple hello world program.
123 | For reference, here's the Langium grammar that defines the Hello World language:
124 | \`\`\`langium
125 | // name of the grammar
126 | grammar HelloWorld
127 | 
128 | // entry point for the AST model
129 | // composed of a list of persons and greetings, in any order
130 | entry Model:
131 | (persons+=Person | greetings+=Greeting)*;
132 | 
133 | // declare a person with a given name
134 | Person:
135 | 'person' name=ID;
136 | 
137 | // greet a previously defined person (by name), ending with an exclamation mark
138 | Greeting:
139 | 'Hello' person=[Person:ID] '!';
140 | 
141 | // whitespace is ignored in this language
142 | hidden terminal WS: /\s+/;
143 | 
144 | // regex for ID terminals
145 | terminal ID: /[_a-zA-Z][\w_]*/;
146 | 
147 | // regex for multi-line & single line comments
148 | // however we do not support nested comments
149 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
150 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
151 | \`\`\`
152 | 
153 | And here is a simple Hello World program written in that grammar:
154 | \`\`\`hello-world
155 | // declare two persons, Alice & Bob
156 | person Alice
157 | person Bob
158 | 
159 | // greet Alice & Bob
160 | Hello Alice!
161 | Hello Bob!
162 | 
163 | // if we wanted to greet John & Carly, we would write:
164 | person John
165 | person Carly
166 | 
167 | Hello John!
168 | Hello Carly!
169 | \`\`\`
170 | `}],
171 |         prompt: 'Generate a simple program for the Hello World language',
172 |         context: [],
173 |         expected_response: `
174 | person John
175 | person Carly
176 | 
177 | Hello Carly!
178 | Hello John!`
179 |     }
180 | ];


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/splitter/splitter.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | import { AstNode, CstUtils, LangiumDocument, URI } from "langium";
  8 | import { LangiumServices } from "langium/lsp";
  9 | import { AstUtils } from 'langium';
 10 | 
 11 | interface SplitterOptions {
 12 |     /**
 13 |      * List of comment rule names to include in the chunk.
 14 |      * If not provided comments are ignored.
 15 |      * Default: ['ML_COMMENT', 'SL_COMMENT']
 16 |     */
 17 |     commentRuleNames?: string[]
 18 | }
 19 | 
 20 | /**
 21 |  * Helper function to parse a document string into a LangiumDocument object
 22 |  * @param document String to be parsed
 23 |  * @param services Associated Langium services for parsing
 24 |  * @returns The parsed LangiumDocument or undefined if there were errors
 25 |  */
 26 | function parseDocument(document: string, services: LangiumServices): LangiumDocument<AstNode> | undefined {
 27 |     const langiumDoc = services.shared.workspace.LangiumDocumentFactory.fromString(document, URI.parse('memory://document.langium'));
 28 |     if (langiumDoc.parseResult.lexerErrors.length > 0) {
 29 |         console.error('Lexer errors:', langiumDoc.parseResult.lexerErrors);
 30 |         return undefined;
 31 |     }
 32 |     if (langiumDoc.parseResult.parserErrors.length > 0) {
 33 |         console.error('Parser errors:', langiumDoc.parseResult.parserErrors);
 34 |         return undefined;
 35 |     }
 36 |     return langiumDoc;
 37 | }
 38 | 
 39 | /**
 40 |  * Extracts matching AST nodes from a document based on provided predicates.
 41 |  * This function is used as a shared utility for splitting documents into AST nodes or text chunks.
 42 |  * @param document - The text document to be processed.
 43 |  * @param nodePredicates - The predicates to determine the nodes for matching.
 44 |  * @param services - The Langium services used for parsing the document.
 45 |  * @returns The matching AST nodes.
 46 |  */
 47 | function getMatchingAstNodes(
 48 |     document: string,
 49 |     nodePredicates: Array<(node: AstNode) => boolean> | ((node: AstNode) => boolean),
 50 |     services: LangiumServices
 51 | ): AstNode[] {
 52 |     if (document.trim() === '') {
 53 |         return [];
 54 |     }
 55 | 
 56 |     const langiumDoc = parseDocument(document, services);
 57 |     if (!langiumDoc) {
 58 |         console.error('Failed to parse document');
 59 |         return [];
 60 |     }
 61 | 
 62 |     const astNodes: AstNode[] = [];
 63 | 
 64 |     const predicates = Array.isArray(nodePredicates) ? nodePredicates : [nodePredicates];
 65 | 
 66 |     // Stream nodes from the AST and filter them based on the predicates
 67 |     const stream = AstUtils.streamAst(langiumDoc.parseResult.value);
 68 |     for (const node of stream) {
 69 |         if (predicates.some(p => p(node))) {
 70 |             astNodes.push(node);
 71 |         }
 72 |     }
 73 |     return astNodes;
 74 | }
 75 | 
 76 | /**
 77 |  * Splits a document into text chunks based on AST nodes that match the provided predicates.
 78 |  * Finds the relevant nodes and then extracts corresponding text chunks from them
 79 |  * @param document - Document to split
 80 |  * @param nodePredicates - Predicates to determine the nodes for splitting
 81 |  * @param services - Langium grammar services used for parsing
 82 |  * @param options - The splitter configuration. See {@link SplitterOptions}
 83 |  * @returns Decoded text chunks from the document
 84 |  */
 85 | export function splitByNode(
 86 |     document: string,
 87 |     nodePredicates: Array<(node: AstNode) => boolean> | ((node: AstNode) => boolean),
 88 |     services: LangiumServices,
 89 |     options: SplitterOptions = { commentRuleNames: ['ML_COMMENT', 'SL_COMMENT'] }
 90 | ): string[] {
 91 |     const astNodes = getMatchingAstNodes(document, nodePredicates, services);
 92 | 
 93 |     const langiumDoc = parseDocument(document, services);
 94 |     if (!langiumDoc) {
 95 |         console.error('Failed to parse document');
 96 |         return [];
 97 |     }
 98 | 
 99 |     const txtDoc = langiumDoc.textDocument;
100 |     const chunks: string[] = [];
101 | 
102 |     for (const node of astNodes) {
103 |         let start = node.$cstNode?.range.start;
104 | 
105 |         if (options?.commentRuleNames) {
106 |             const cstNode = node.$cstNode;
107 |             const commentNode = CstUtils.findCommentNode(cstNode, options.commentRuleNames);
108 |             if (commentNode) {
109 |                 start = commentNode.range.start;
110 |             }
111 |         }
112 | 
113 |         const end = node.$cstNode?.range.end;
114 |         const chunk = txtDoc.getText({
115 |             start: {
116 |                 line: start?.line || 0,
117 |                 character: start?.character || 0
118 |             },
119 |             end: {
120 |                 line: end?.line || 0,
121 |                 character: end?.character || 0
122 |             }
123 |         });
124 | 
125 |         if (chunk.trim().length > 0) {
126 |             chunks.push(chunk);
127 |         }
128 |     }
129 |     return chunks;
130 | }
131 | 
132 | /**
133 |  * Splits a document into AST nodes based on the given predicates.
134 |  * Directly returns the matched AST nodes
135 |  * @param document - The text document to be split
136 |  * @param nodePredicates - The predicates to determine which nodes to include
137 |  * @param services - Langium grammar services used for parsing
138 |  * @returns The nodes picked up by the splitter
139 |  */
140 | export function splitByNodeToAst(
141 |     document: string,
142 |     nodePredicates: Array<(node: AstNode) => boolean> | ((node: AstNode) => boolean),
143 |     services: LangiumServices
144 | ): AstNode[] {
145 |     return getMatchingAstNodes(document, nodePredicates, services);
146 | }
147 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/tests/document-analyzer.test.ts:
--------------------------------------------------------------------------------
  1 | import { URI } from 'langium';
  2 | import { createServicesForGrammar } from 'langium/grammar';
  3 | import { describe, expect, it } from 'vitest';
  4 | import { AnalysisMode, LangiumDocumentAnalyzer } from '../src/analyzer/document-analyzer.js';
  5 | 
  6 | const domainModelServices = await createServicesForGrammar({
  7 |     grammar: `
  8 | grammar DomainModel
  9 | 
 10 | entry Domainmodel:
 11 |     (elements+=AbstractElement)*;
 12 | 
 13 | AbstractElement:
 14 |     PackageDeclaration | Type;
 15 | 
 16 | PackageDeclaration:
 17 |     'package' name=QualifiedName '{'
 18 |         (elements+=AbstractElement)*
 19 |     '}';
 20 | 
 21 | Type:
 22 |     DataType | Entity;
 23 | 
 24 | DataType:
 25 |     'datatype' name=ID;
 26 | 
 27 | Entity:
 28 |     'entity' name=ID ('extends' superType=[+Entity:QualifiedName])? '{'
 29 |         (features+=Feature)*
 30 |     '}';
 31 | 
 32 | Feature:
 33 |     (many?='many')? name=ID ':' type=[Type:QualifiedName];
 34 | 
 35 | QualifiedName returns string:
 36 |     ID ('.' ID)*;
 37 | 
 38 | hidden terminal WS: /\\s+/;
 39 | terminal ID: /[_a-zA-Z][\\w_]*/;
 40 | 
 41 | hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//;
 42 | hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/;
 43 | 
 44 | ` });
 45 | 
 46 | const docAnalyzer = new LangiumDocumentAnalyzer(domainModelServices);
 47 | 
 48 | const exampleModel = `package foo.bar {
 49 |     datatype Complex
 50 |     entity E2 extends E1 {
 51 |         next: E2
 52 |         other: Complex
 53 |         nested: Complex
 54 |         time: Complex
 55 |     }
 56 | }`
 57 | 
 58 | function collectSyntaxUsageStatistics(model: string, analyzer = docAnalyzer) {
 59 |     const doc = domainModelServices.shared.workspace.LangiumDocumentFactory.fromString(model, URI.parse('memory:/test.txt'));
 60 |     return analyzer.collectSyntaxUsageStatistics(doc, domainModelServices.Grammar);
 61 | }
 62 | 
 63 | describe('LangiumDocumentAnalyzer', () => {
 64 | 
 65 |     it('should collect syntax usage statistics from string', async () => {
 66 |         const result = await docAnalyzer.evaluate('package foo.bar {}');
 67 |         const statistics = docAnalyzer.extractStatisticsFromResult(result)!;
 68 | 
 69 |         expect(statistics).toBeDefined();
 70 |         expect(statistics.ruleUsage['AbstractElement']).toBe(1);
 71 |         expect(statistics.ruleUsage['PackageDeclaration']).toBe(1);
 72 |         expect(statistics.ruleUsage['QualifiedName']).toBe(1);
 73 |         expect(statistics.ruleUsage['ID']).toBe(2);
 74 |     });
 75 | 
 76 | 
 77 |     it('should compute coverage correctly', () => {
 78 |         const statistics = collectSyntaxUsageStatistics('package foo.bar {}');
 79 |         expect(Object.values(statistics.ruleUsage).filter(count => count > 0).length, 'Used rules number').toBe(4);
 80 |         expect(statistics.coverage).toBeCloseTo(40.0, 1);
 81 |     });
 82 | 
 83 |     it('should handle "includeHiddenRules" flag', () => {
 84 |         const model = 'package bar { /** Multi-line comment */ }';
 85 | 
 86 |         const statistics = collectSyntaxUsageStatistics(model);
 87 |         expect(statistics.ruleUsage['ML_COMMENT']).toBe(1);
 88 | 
 89 |         const noHidden = new LangiumDocumentAnalyzer(domainModelServices, {
 90 |             includeHiddenRules: false
 91 |         });
 92 |         const statisticsNoHidden = collectSyntaxUsageStatistics(model, noHidden);
 93 |         expect(statisticsNoHidden.ruleUsage['ML_COMMENT']).toBeUndefined();
 94 |     });
 95 | 
 96 | 
 97 |     it('should compute entropy correctly', () => {
 98 |         const statistics = collectSyntaxUsageStatistics(exampleModel);
 99 |         expect(statistics.diversity.entropy).toBeCloseTo(2.28, 1);
100 |     });
101 | 
102 |     it('should compute gini coefficient correctly', () => {
103 |         const statistics = collectSyntaxUsageStatistics(exampleModel);
104 |         expect(statistics.diversity.giniCoefficient).toBeCloseTo(0.65, 1);
105 |     });
106 | 
107 |     it('should compute simpson index correctly', () => {
108 |         const statistics = collectSyntaxUsageStatistics(exampleModel);
109 |         expect(statistics.diversity.simpsonIndex).toBeCloseTo(0.7, 1);
110 |     });
111 | 
112 |     it('should handle excluded rules', () => {
113 |         const analyzerWithExcludedRules = new LangiumDocumentAnalyzer(domainModelServices, {
114 |             excludeRules: ['Feature', 'DataType']
115 |         });
116 | 
117 |         const testModel = 'package foo.bar { entity TestEntity { } }';
118 | 
119 |         const stats = collectSyntaxUsageStatistics(testModel);
120 |         const statsWithExclude = collectSyntaxUsageStatistics(testModel, analyzerWithExcludedRules);
121 | 
122 |         expect(stats.ruleUsage).toHaveProperty('DataType');
123 |         expect(stats.ruleUsage).toHaveProperty('Feature');
124 |         // Verify that excluded rules are not present in the statistics
125 |         expect(statsWithExclude.ruleUsage).not.toHaveProperty('DataType');
126 |         expect(statsWithExclude.ruleUsage).not.toHaveProperty('Feature');
127 | 
128 |         // Check stats with exclude still has all rules from original except excluded ones
129 |         expect(Object.keys(statsWithExclude.ruleUsage).length).toBe(Object.keys(stats.ruleUsage).length - 2);
130 | 
131 |         // Less rules (unused) will result in higher coverage
132 |         expect(statsWithExclude.coverage).toBeGreaterThan(stats.coverage);
133 |     });
134 | 
135 |     it('should handle empty documents', () => {
136 |         const statistics = collectSyntaxUsageStatistics('');
137 |         expect(Object.keys(statistics.ruleUsage).length).toBeGreaterThan(0);
138 |         expect(statistics.coverage).toBe(0);
139 |         expect(statistics.diversity.entropy).toBe(0);
140 |         expect(statistics.diversity.giniCoefficient).toBe(0);
141 |         expect(statistics.diversity.simpsonIndex).toBe(0);
142 |     });
143 | 
144 |     it('should handle analysis mode NO_STATISTIC', async () => {
145 |         const noStatAnalyzer = new LangiumDocumentAnalyzer(domainModelServices, {
146 |             analysisMode: AnalysisMode.NO_STATISTIC
147 |         });
148 |         const result = await noStatAnalyzer.evaluate('package foo.bar { entity TestEntity { } }');
149 |         const statistics = noStatAnalyzer.extractStatisticsFromResult(result);
150 | 
151 |         expect(statistics).toBeUndefined();
152 |         expect(result.data?.failures, 'Expected validation passed.').toBe(0);
153 |     });
154 | });
155 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/eval-langium.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | import { config } from 'dotenv';
  8 | import { EmptyFileSystem } from 'langium';
  9 | import { averageAcrossCases, averageAcrossRunners, EditDistanceEvaluator, EditDistanceEvaluatorResultData, EvalMatrix, generateHistogram, generateHistoricalChart, generateRadarChart, LangiumEvaluator, LangiumEvaluatorResultData, loadLastResults, mergeEvaluators, normalizeData } from 'langium-ai-tools/evaluator';
 10 | import { createLangiumGrammarServices } from 'langium/grammar';
 11 | import { EmbeddingEvaluatorResultData, OllamaEmbeddingEvaluator } from './embedding-evaluator.js';
 12 | import { cases as langiumCases } from './langium-cases.js';
 13 | import { runner_codegemma, runner_codegemma_rag, runner_codellama, runner_codellama_rag, runner_llama3_1_rag, runner_llama3_2_3b, runner_llama3_2_3b_rag } from './runners.js';
 14 | config();
 15 | 
 16 | const langiumServices = createLangiumGrammarServices(EmptyFileSystem);
 17 | 
 18 | type MergedEvaluatorResultType = LangiumEvaluatorResultData & EditDistanceEvaluatorResultData & EmbeddingEvaluatorResultData;
 19 | 
 20 | /**
 21 |  * Runs a first set of evals for generating hello-world grammars
 22 |  * This is done with just a prompt
 23 |  */
 24 | export async function runLangiumEvals() {
 25 | 
 26 |     const eMat = new EvalMatrix({
 27 |         config: {
 28 |             // various config options
 29 |             name: 'Langium Grammar Evaluator w/ System Prompt',
 30 |             description: 'Evaluates Various Generated Grammars using Various Model Stacks',
 31 |             history_folder: '.langium-ai',
 32 |             // number of runs to average across
 33 |             num_runs: 3
 34 |         },
 35 |         runners: [
 36 |             runner_llama3_2_3b,
 37 |             runner_codellama,
 38 |             runner_codegemma,
 39 | 
 40 |             // rag versions
 41 |             runner_llama3_2_3b_rag,
 42 |             runner_codellama_rag,
 43 |             runner_codegemma_rag,
 44 |             runner_llama3_1_rag,
 45 |         ],
 46 |         evaluators: [
 47 |             {
 48 |                 name: 'Langium + Edit Distance + Embedding Evaluator (merged)',
 49 |                 eval: mergeEvaluators(
 50 |                     new LangiumEvaluator(langiumServices.grammar),
 51 |                     new EditDistanceEvaluator(),
 52 |                     new OllamaEmbeddingEvaluator('nomic-embed-text')
 53 |                 )
 54 |             }
 55 |         ],
 56 |         cases: langiumCases
 57 |     });
 58 |     
 59 |     // run & report
 60 |     const results = await eMat.run();
 61 |     console.log('Evaluation report: ');
 62 |     console.table(results.map(r => {
 63 |         return {
 64 |             name: r.name,
 65 |             ...r.data
 66 |         }
 67 |     }), ['name', 'errors', 'warnings', 'infos', 'hints', 'unassigned', 'edit_distance']);
 68 | 
 69 |     // get average scores too
 70 |     const processedResults = averageAcrossCases(results);
 71 |     console.log('Average Evaluation report: ');
 72 |     console.table(processedResults.map(r => {
 73 |         return {
 74 |             name: r.name,
 75 |             ...r.data
 76 |         }
 77 |     }), ['name', 'errors', 'warnings', 'infos', 'hints', 'unassigned', 'edit_distance']);
 78 |     
 79 | }
 80 | 
 81 | export function generateChartFromLastResults() {
 82 |     const rawResults = loadLastResults('.langium-ai', 3);
 83 |     console.log('Last Evaluation report: ');
 84 | 
 85 |     // generate a radar chart for this run
 86 |     generateRadarChart(
 87 |         'Radar Chart (smaller is better)',
 88 |         normalizeData(rawResults),
 89 |         './radar-chart.html',
 90 |         (data: MergedEvaluatorResultType, metadata: Record<string, unknown>) => {
 91 |             return {
 92 |                 'Failures': data.failures,
 93 |                 'Errors':   data.errors,
 94 |                 'Warnings': data.warnings,
 95 |                 'Semantic Diff': 1.0 - data.similarity, // inverse similarity
 96 |                 'Total Diagnostics':    (data.errors + data.warnings + data.infos + data.hints + data.unassigned) / 5.0,
 97 |                 'Response Size': data.response_length ?? 0,
 98 |                 'Edit Distance': data.edit_distance,
 99 |                 'Time': data._runtime ?? 0
100 |             }
101 |         }
102 |     );
103 | 
104 |     // generate a histogram chart for this run
105 |     generateHistogram(
106 |         'Histogram Chart (smaller is better)',
107 |         normalizeData(rawResults),
108 |         './histogram-chart.html',
109 |         (data: MergedEvaluatorResultType, metadata: Record<string, unknown>) =>  {
110 |             return {
111 |                 'Failures': data.failures,
112 |                 'Errors':   data.errors,
113 |                 'Warnings': data.warnings,
114 |                 'Semantic Diff': 1.0 - data.similarity,
115 |                 'Total Diagnostics':    (data.errors + data.warnings + data.infos + data.hints + data.unassigned) / 5.0,
116 |                 'Response Size': data.response_length ?? 0,
117 |                 'Edit Distance': data.edit_distance,
118 |                 'Time': data._runtime ?? 0
119 |             }   
120 |         }
121 |     );
122 | 
123 |     // generate a historical chart for all unique runners over time
124 |     generateHistoricalChart(
125 |         'Historical Chart (Approx. Area of Radar Chart)',
126 |         '.langium-ai', // src folder for data
127 |         './historical-chart.html',
128 |         (data: MergedEvaluatorResultType, metadata: Record<string, unknown>) => {
129 |             return calculateTriangleAreas(data).reduce((a, b) => a + b, 0);
130 |         },
131 |         {
132 |             preprocess: averageAcrossRunners,
133 |             // take: 30,
134 |             // filter: (r) =>  r.name.match(/rag/i) === null
135 |         }
136 |     );
137 | }
138 | 
139 | function calculateTriangleAreas(data: MergedEvaluatorResultType): number[] {
140 |     // order changes area, so be consistent here when checking!
141 |     const values = [
142 |         data.failures ?? 0,
143 |         data.errors,
144 |         data.warnings,
145 |         1.0 - (data.similarity ?? 0), // Semantic Diff
146 |         (data.errors + data.warnings + data.infos + data.hints + data.unassigned) / 5.0, // Total Diagnostics
147 |         data.response_length ?? 0,
148 |         data.edit_distance,
149 |         data._runtime ?? 0
150 |     ];
151 | 
152 |     const n = values.length;
153 |     const angle = 2 * Math.PI / n;
154 |     const areas = [];
155 | 
156 |     for (let i = 0; i < n; i++) {
157 |         const r1 = values[i];
158 |         const r2 = values[(i + 1) % n];
159 |         const area = 0.5 * r1 * r2 * Math.sin(angle);
160 |         areas.push(area);
161 |     }
162 | 
163 |     return areas;
164 | }
165 | 
166 | 
167 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/evaluator/eval-matrix.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | import { Evaluator, EvaluatorResult } from "./evaluator.js";
  8 | import fs from 'fs';
  9 | import * as path from 'path';
 10 | 
 11 | /**
 12 |  * Configuration for the evaluation matrix
 13 |  */
 14 | export interface EvalMatrixConfig {
 15 |     config: {
 16 |         /**
 17 |          * Name of the evaluation matrix
 18 |          */
 19 |         name: string;
 20 | 
 21 |         /**
 22 |          * Helpful description of the evaluation matrix
 23 |          */
 24 |         description: string;
 25 | 
 26 |         /**
 27 |          * Where to store run history
 28 |          */
 29 |         history_folder: string;
 30 | 
 31 |         /**
 32 |          * The number of runs to perform for each case
 33 |          * Note this will trigger evaluation for all registered evaluators for each run
 34 |          */
 35 |         num_runs: number;
 36 |     },
 37 | 
 38 |     /**
 39 |      * Runners to evaluate
 40 |      */
 41 |     runners: Runner[];
 42 | 
 43 |     /**
 44 |      * Evaluators to evaluate with
 45 |      */
 46 |     evaluators: NamedEvaluator[];
 47 | 
 48 |     /**
 49 |      * Cases to evaluate
 50 |      */
 51 |     cases: Case[];
 52 | }
 53 | 
 54 | /**
 55 |  * Evaluation matrix for running multiple runners on multiple cases with multiple evaluators
 56 |  */
 57 | export class EvalMatrix {
 58 |     private config: EvalMatrixConfig;
 59 | 
 60 |     constructor(config: EvalMatrixConfig) {
 61 |         this.config = config;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Run the evaluation matrix, getting all results back
 66 |      */
 67 |     async run(): Promise<EvaluatorResult[]> {
 68 | 
 69 |         // get the current timestamp
 70 |         const start = new Date();
 71 | 
 72 |         const results: EvaluatorResult[] = [];
 73 | 
 74 |         // verify that all runners have unique names first
 75 |         const runnerNames = this.config.runners.map(r => r.name);
 76 |         const uniqueRunnerNames = new Set();
 77 |         for (const name of runnerNames) {
 78 |             if (uniqueRunnerNames.has(name)) {
 79 |                 throw new Error(`Runner names must be unique, found duplicate: ${name}`);
 80 |             }
 81 |             uniqueRunnerNames.add(name);
 82 |         }
 83 | 
 84 |         console.log(`Running evaluation matrix: ${this.config.config.name}`);
 85 |         console.log(`Found ${this.config.runners.length * this.config.cases.length * this.config.evaluators.length} runner-evaluator-case combinations to handle`);
 86 | 
 87 |         // run all runners
 88 |         for (const runner of this.config.runners) {
 89 | 
 90 |             console.log(`* Runner: ${runner.name}`);
 91 | 
 92 |             // run all cases for this runner
 93 |             for (const testCase of this.config.cases) {
 94 |                 console.log(`  * Case: ${testCase.name}`);
 95 | 
 96 |                 const runCount = this.config.config.num_runs ?? 1;
 97 |                 for (let iteration = 0; iteration < runCount; iteration++) {
 98 |                     const runnerStartTime = new Date();
 99 |                     const response = await runner.runner(testCase.prompt, testCase.history ?? []);
100 |                     const runnerEndTime = new Date();
101 | 
102 |                     // run all evaluators on this response
103 |                     for (const evaluator of this.config.evaluators) {
104 |                         console.log(`    * Evaluator: ${evaluator.name} (run ${iteration + 1})`);
105 |                         const result = await evaluator.eval.evaluate(response, testCase.expected_response);
106 |                         if (!result.name) {
107 |                             result.name = `${runner.name} - ${testCase.name} - ${evaluator.name}`;
108 |                         }
109 |                         // add runtime there too, so we have access to it
110 |                         result.data!._runtime = (runnerEndTime.getTime() - runnerStartTime.getTime()) / 1000.0; // in seconds
111 | 
112 |                         result.metadata = {
113 |                             runner: runner.name,
114 |                             evaluator: evaluator.name,
115 |                             testCase: { ...testCase },
116 |                             actual_response: response,
117 |                             duration: (runnerEndTime.getTime() - runnerStartTime.getTime()) / 1000.0, // in seconds
118 |                             run_count: iteration + 1
119 |                         };
120 | 
121 |                         results.push(result as EvaluatorResult);
122 |                     }
123 |                 }
124 |             }
125 |         }
126 | 
127 |         // check if the folder exists first
128 |         if (!fs.existsSync(this.config.config.history_folder)) {
129 |             fs.mkdirSync(this.config.config.history_folder);
130 |         }
131 | 
132 |         const dateStr = new Date().toISOString();
133 |         const sanitizedDateStr = dateStr.replace(/:/g, '-').replace(/\./g, '-');
134 |         let fileName = `${sanitizedDateStr}-${this.config.config.name.toLowerCase().replace(/\s+/g, '-')}.json`;
135 |         // escape any slashes too
136 |         fileName = fileName.replace(/\//g, '-');
137 | 
138 |         console.log(`Writing results to file: ${path.join(this.config.config.history_folder, fileName)}`);
139 | 
140 |         // run time in seconds
141 |         const runTime = (new Date().getTime() - start.getTime()) / 1000;
142 |         console.log(`Evaluation matrix completed in ${runTime} seconds (${runTime / 60} minutes)`);
143 | 
144 |         // prepare & write results to file
145 |         const report = {
146 |             config: this.config.config,
147 |             date: dateStr,
148 |             runTime: `${runTime}s`,
149 |             results
150 |         };
151 |         fs.writeFileSync(path.join(this.config.config.history_folder, fileName), JSON.stringify(report, null, 2));
152 | 
153 |         // write the name of this last report into last.txt
154 |         fs.writeFileSync(path.join(this.config.config.history_folder, 'last.txt'), fileName);
155 | 
156 |         return results;
157 |     }
158 | }
159 | 
160 | /**
161 |  * General format for histories when prompting
162 |  */
163 | export interface Message {
164 |     role: 'user' | 'system' | 'assistant';
165 |     content: string;
166 | }
167 | 
168 | /**
169 |  * Runner interface for running a prompt against a mode, a service, or something else that provides a response
170 |  */
171 | export interface Runner {
172 |     name: string;
173 |     runner: (prompt: string, messages: Message[]) => Promise<string>;
174 | }
175 | 
176 | /**
177 |  * Generic evaluator interface w/ a name to identify it
178 |  */
179 | export interface NamedEvaluator {
180 |     name: string;
181 |     eval: Evaluator;
182 | }
183 | 
184 | /**
185 |  * Case interface for defining an evaluation case
186 |  */
187 | export interface Case {
188 |     /**
189 |      * Name of the case
190 |      */
191 |     name: string;
192 | 
193 |     /**
194 |      * Options Message history, used for system, user & assistant messages
195 |      */
196 |     history?: Message[];
197 | 
198 |     /**
199 |      * Core prompt to run with
200 |      */
201 |     prompt: string;
202 | 
203 |     /**
204 |      * Context for the prompt, used for RAG applications
205 |      */
206 |     context: string[];
207 | 
208 |     /**
209 |      * Expected response
210 |      */
211 |     expected_response: string;
212 | }


--------------------------------------------------------------------------------
/packages/langium-ai-tools/README.md:
--------------------------------------------------------------------------------
  1 | # Langium AI Tools
  2 | 
  3 | ## Overview
  4 | 
  5 | This project provides core tools that make it easier to build AI applications for Langium DSLs. These core tools help to solve the following problems around building AI applications by making it easier to:
  6 | 
  7 | - Determine which models work well for your DSL
  8 | - Evaluate which changes to your tooling actually improve your generation results
  9 | - How to process DSL documents in a way that makes sense for your DSL & target application
 10 | 
 11 | To solve these problems this package provides:
 12 | 
 13 | - Splitting Support: Using your DSL's parser to make it easier to pre-process documents before ingest (such as into a vector DB)
 14 | - Training & Evaluation Support: Assess the output of your model + RAG + whatever else you have in your stack with regards to a structured input/output evaluation phase.
 15 | - Constraint Support: Synthesize BNF-style grammars from your Langium grammar, which can be used to control the token output from an LLM to conform to your DSL's expected structure (this feature has been added directly into the **langium-cli** itself, as it has wider general applications).
 16 | 
 17 | What's also important is what is not provided:
 18 | - *We don't choose your model for you.* We believe this is your choice, and we don't want to presume we know best or lock you in. All we assume is that you have a model (or stack) that we can use. For tooling that leverages models directly, we'll be providing a separate package under Langium AI that will be separate from the core here.
 19 | - *We don't choose your stack for you.* There are many excellent choices for hosting providers, databases, caches, and other supporting services (local & remote). There's so many, and they change so often, that we decided it was best to not assume what works here, and rather support preparing information for whatever stack you choose.
 20 | 
 21 | LLMs (and transformers in general), are evolving quite rapidly. With this approach, these tools help you build your own specific approach, whilst letting you keep up with the latest and greatest in model developments.
 22 | 
 23 | ## Installation
 24 | 
 25 | Langium AI tools tracks tightly with Langium releases. If you're using Langium 3.X or 4.X in your project, you'll want to install the corresponding version of Langium AI Tools that matches it.
 26 | 
 27 | ```bash
 28 | # if you're using Langium 4.1.X
 29 | npm i --save langium-ai-tools@^4.1.0
 30 | 
 31 | # or 3.5.X
 32 | npm i --save langium-ai-tools@^3.5.0
 33 | ```
 34 | 
 35 | We don't actively support Langium 2.X or earlier.
 36 | 
 37 | ## Usage
 38 | 
 39 | ### Splitting
 40 | 
 41 | Langium AI Tools presents various splitting utilities that are simple but flexible. This includes:
 42 | - retrieving all nodes of a specific type from a document
 43 | - splitting a document into chunks by node type (with or without comments)
 44 | - generating program maps
 45 | 
 46 | For example, if you have a DSL that looks like this:
 47 | 
 48 | ```
 49 | // A dsl that allows writing functions...
 50 | function foo() { ... }
 51 | function bar() { ... }
 52 | ```
 53 | 
 54 | Assuming your Langium parser rule for these functions is called `Func`, you could write a splitter like so to chunk by functions:
 55 | 
 56 | ```ts
 57 | const chunks: string[] = splitByNode(
 58 |     dslProgramString,
 59 |     [
 60 |         (node) => node.$type === 'Func'
 61 |     ],
 62 |     myLanguageServices.grammar,
 63 |     // default options, comment rule names can be set to [] to avoid including comments in chunks
 64 |     // { commentRuleNames: ['ML_COMMENT', 'SL_COMMENT'] }
 65 | );
 66 | ```
 67 | 
 68 | And you would get back something like this:
 69 | ```
 70 | [
 71 |     'function foo() { ... }',
 72 |     'function bar() { ... }'
 73 | ]
 74 | ```
 75 | 
 76 | In case you want just the AST node, and don't want it transformed to text, you can use the `splitByNodeToAst` function instead, giving you back the raw AST nodes to do with as you want.
 77 | 
 78 | If you want a program map (like in the case of generating a mapping of your repo), you can utilize the `ProgramMapper` class.
 79 | 
 80 | ```ts
 81 | import { ProgramMapper } from 'langium-ai-tools/splitter';
 82 | 
 83 | const myProg = `
 84 | function foo() { ... }
 85 | function bar() { ... }
 86 | `;
 87 | 
 88 | const services = createMyDSLServices(EmptyFileSystem).mydsl; // will differ based on the name of your DSL
 89 | const mapper = new ProgramMapper(services, {
 90 |     mappingRules: [
 91 |         {
 92 |             predicate: (node) => node.$type === 'Func',
 93 |             map: (node) => `func ${node.name}`
 94 |         },
 95 |         ...
 96 |     ]
 97 | });
 98 | const programMap: string[] = mapper.map(myProg);
 99 | ```
100 | 
101 | Which would give you an output like so:
102 | 
103 | ```
104 | [
105 |     'func foo',
106 |     'func bar'
107 | ]
108 | ```
109 | 
110 | In both cases, you can provide predicates for the nodes that are of interest to you. The splitter chunking is a bit more opinionated, but the program mapper allows fine-grained generation when needed.
111 | 
112 | For examples you can check out the [example-dsl-evaluator](../examples/example-dsl-evaluator/README.md) project, which also contains splitting & mapping examples.
113 | 
114 | ### Evaluation
115 | 
116 | Regardless of how you've sourced your model, you'll need a metric for determining the quality of your output.
117 | 
118 | For Langium DSLs, we provide an series of *evaluator* utilities to help in assessing the correctness of DSL output.
119 | 
120 | It's important to point out that evaluations are *not* tests, instead this is more similar to [OpenAI's evals framework](https://github.com/openai/evals). The idea is that we're grading or scoring outputs with regards to an expected output from a known input. This is a simple but effective approach to determining if your model is generally doing what you expect it to in a structured way, and *not* doing something else as well.
121 | 
122 | Take the following evaluator for example. Let's assume you have [Ollama](https://ollama.com/) running locally, and the [ollama-js](https://github.com/ollama/ollama-js) package installed. From a given base model you can define evaluatiosn like so.
123 | 
124 | ```ts
125 | import { Evaluator, EvaluatorScore } from 'langium-ai-tools/evaluator';
126 | import ollama from 'ollama';
127 | 
128 | // get your language's services
129 | const services = createMyDSLServices(EmptyFileSystem).MyDSL;
130 | 
131 | // define an evaluator using your language's services
132 | // this effectively uses your existing parser & validations to 'grade' the response
133 | const evaluator = new LangiumEvaluator(services);
134 | 
135 | // make some prompt
136 | const response = await ollama.chat({
137 |     'llama3.2',
138 |     [{
139 |         role: 'user',
140 |         content: 'Write me a hello world program written in MyDSL.'
141 |     }]
142 | });
143 | 
144 | const es: EvaluatorScore = evaluator.evaluate(response.message.content);
145 | 
146 | // print out your score!
147 | console.log(es);
148 | ```
149 | 
150 | You can also define custom evaluators that are more tuned to the needs of your DSL. This could be handling diagnostics in a very specific fashion, extracting code out of the response itself to check, using an evaluation model to grade the response, or using a combination of techniques to get a more accurate score for your model's output.
151 | 
152 | In general we stick to focusing on what Langium can do to help with evaluation, but leave the opportunity open for you to extend, supplement, or modify evaluation logic as you see fit.
153 | 
154 | ## Contributing
155 | 
156 | If you want to help feel free to open an issue or a PR. As a general note we're open to accept changes that focus on improving how we can support AI application development for Langium DSLs. But we don't want to provide explicit bindings to actual services/providers at this time, such as LLamaIndex, Ollama, LangChain, or others. Similarly this package doesn't provide direct bindings for AI providers such as OpenAI and Anthropic here. Instead these changes will go into a separate package under Langium AI that is intended for this purpose.
157 | 


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/evaluator/evaluator.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | /**
  8 |  * Baseline Validator Class
  9 |  */
 10 | 
 11 | import { readFileSync, existsSync, readdirSync } from 'fs';
 12 | import * as path from 'path';
 13 | 
 14 | export type EvaluatorResultData = Record<string, unknown> & {
 15 |     _runtime?: number;
 16 | };
 17 | 
 18 | /**
 19 |  * Evaluator result type
 20 |  */
 21 | export type EvaluatorResult<T = EvaluatorResultData> = {
 22 |     /**
 23 |      * Name of this evaluation
 24 |      */
 25 |     name: string;
 26 | 
 27 |     /**
 28 |      * Optional metadata, can be used to store additional information
 29 |      */
 30 |     metadata: Record<string, any>;
 31 | 
 32 |     /**
 33 |      * Data for this evaluation
 34 |      */
 35 |     data: T;
 36 | 
 37 | };
 38 | 
 39 | /**
 40 |  * Helper to process a set of results, averaging all runs of each runner-evaluator-case combination
 41 |  */
 42 | export function averageAcrossCases(results: EvaluatorResult[]): EvaluatorResult[] {
 43 |     const mappedResults: Map<string, EvaluatorResult[]> = new Map();
 44 | 
 45 |     const averagedResults: EvaluatorResult[] = [];
 46 | 
 47 |     // collect like-results
 48 |     for (const result of results) {
 49 |         // add this result to the map (grouping by runner & case)
 50 |         const name = result.name;
 51 |         const existingResult = mappedResults.get(name) ?? [];
 52 |         existingResult.push(result);
 53 |         mappedResults.set(name, existingResult);
 54 |     }
 55 | 
 56 |     // average the results
 57 |     for (const [_key, groupedResults] of mappedResults) {
 58 |         const avgData = groupedResults[0].data;
 59 | 
 60 |         // sum all results except the first
 61 |         for (const result of groupedResults.slice(1)) {
 62 |             const resultData = result.data;
 63 |             for (const [key, value] of Object.entries(resultData)) {
 64 |                 if (typeof value === 'number') {
 65 |                     avgData[key] = (avgData[key] as number ?? 0) + value;
 66 |                 }
 67 |             }
 68 |         }
 69 | 
 70 |         // lastly, divide each entry by the number of 'groupedResults'
 71 |         for (const [key, value] of Object.entries(avgData)) {
 72 |             if (typeof value === 'number') {
 73 |                 avgData[key] = value / groupedResults.length;
 74 |                 // round to 2 decimal places
 75 |                 avgData[key] = Math.round((avgData[key] as number) * 100) / 100;
 76 |             }
 77 |         }
 78 | 
 79 |         averagedResults.push({
 80 |             name: groupedResults[0].name,
 81 |             metadata: groupedResults[0].metadata,
 82 |             data: avgData
 83 |         });
 84 |     }
 85 |     return averagedResults;
 86 | }
 87 | 
 88 | /**
 89 |  * Averages all results across runners at the highest level, to get a single result for each runner
 90 |  */
 91 | export function averageAcrossRunners(results: EvaluatorResult[]): EvaluatorResult[] {
 92 |     // first average across runs
 93 |     const processedResults = averageAcrossCases(results);
 94 | 
 95 |     // now average across runners
 96 |     const mappedResults: Map<string, EvaluatorResult[]> = new Map();
 97 | 
 98 |     const averagedResults: EvaluatorResult[] = [];
 99 | 
100 |     // collect like-results
101 |     for (const result of processedResults) {
102 |         // add this result to the map (grouping by runner)
103 |         const name = result.metadata.runner;
104 |         const existingResult = mappedResults.get(name) ?? [];
105 |         existingResult.push(result);
106 |         mappedResults.set(name, existingResult);
107 |     }
108 | 
109 |     // average the results
110 |     for (const [_key, groupedResults] of mappedResults) {
111 |         const avgData = groupedResults[0].data;
112 | 
113 |         // sum all results except the first
114 |         for (const result of groupedResults.slice(1)) {
115 |             const resultData = result.data;
116 |             for (const [key, value] of Object.entries(resultData)) {
117 |                 if (typeof value === 'number') {
118 |                     avgData[key] = (avgData[key] as number ?? 0) + value;
119 |                 }
120 |             }
121 |         }
122 | 
123 |         // lastly, divide each entry by the number of 'groupedResults'
124 |         for (const [key, value] of Object.entries(avgData)) {
125 |             if (typeof value === 'number') {
126 |                 avgData[key] = value / groupedResults.length;
127 |                 // round to 2 decimal places
128 |                 avgData[key] = Math.round((avgData[key] as number) * 100) / 100;
129 |             }
130 |         }
131 | 
132 |         averagedResults.push({
133 |             name: groupedResults[0].metadata.runner,
134 |             metadata: groupedResults[0].metadata,
135 |             data: avgData
136 |         });
137 |     }
138 | 
139 |     return averagedResults;
140 | }
141 | 
142 | /**
143 |  * Report 
144 |  */
145 | export interface Report {
146 |     config: {
147 |         name: string;
148 |         description: string;
149 |         history_folder: string;
150 |         num_runs: number;
151 |     };
152 |     date: string;
153 |     runTime: string;
154 |     results: EvaluatorResult[];
155 | }
156 | 
157 | /**
158 |  * Loads a specific report, containing evaluator results from a file & returns it
159 |  */
160 | export function loadReport(file: string): Report {
161 |     return JSON.parse(readFileSync(file, 'utf-8')) as Report;
162 | }
163 | 
164 | /**
165 |  * Attempts to load the most recent evaluator results from the given file
166 |  */
167 | export function loadLastResults(dir: string, take?: number): EvaluatorResult[] {
168 |     if (!existsSync(dir)) {
169 |         throw new Error(`Directory does not exist: ${dir}`);
170 |     }
171 | 
172 |     let files = readdirSync(dir).filter(f => f.endsWith('.json'));
173 | 
174 |     if (!take) {
175 |         const lastFile = path.join(dir, 'last.txt');
176 | 
177 |         if (!existsSync(lastFile)) {
178 |             throw new Error(`Last file does not exist in directory: ${dir}. Try running an evaluation matrix first.`);
179 |         }
180 |         // read name from last file
181 |         const lastFileName = readFileSync(lastFile).toString();
182 | 
183 |         files.push(lastFileName);
184 | 
185 |     } else {
186 |         // read the most recent files
187 |         files = files.sort().reverse().slice(0, take);
188 |         
189 |     }
190 | 
191 |     const results: EvaluatorResult[] = [];
192 | 
193 |     for (const file of files) {
194 |         const report = loadReport(path.join(dir, file));
195 |         results.push(...report.results);
196 |     }
197 | 
198 |     return results;
199 | }
200 | 
201 | /**
202 |  * Evaluator class for evaluating agent responses
203 |  */
204 | export abstract class Evaluator {
205 |     /**
206 |      * Validate some agent response
207 |      */
208 |     abstract evaluate(response: string, expected_response: string): Promise<Partial<EvaluatorResult>>;
209 | 
210 | }
211 | 
212 | export function mergeEvaluators(...evaluators: Evaluator[]): Evaluator {
213 |     // merge evaluators in sequence
214 |     return evaluators.reduce((acc, val) => mergeEvaluatorsInternal(acc, val));
215 | }
216 | 
217 | /**
218 |  * Merges two evaluators together in sequence, such that results of a are combined with b (b takes precedence in key overrides)
219 |  * @param a First evaluator to merge
220 |  * @param b Second evaluator to merge
221 |  */
222 | function mergeEvaluatorsInternal(a: Evaluator, b: Evaluator): Evaluator {
223 |     return {
224 |         async evaluate(response: string, expected_response: string): Promise<Partial<EvaluatorResult>> {
225 |             const r1 = await a.evaluate(response, expected_response);
226 |             const r2 = await b.evaluate(response, expected_response);
227 |             return {
228 |                 metadata: {
229 |                     ...r1.metadata,
230 |                     ...r2.metadata
231 |                 },
232 |                 data: {
233 |                     ...r1.data,
234 |                     ...r2.data
235 |                 }
236 |             };
237 |         }
238 |     };
239 | }
240 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/eval-langdev.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | import { EmptyFileSystem } from 'langium';
  8 | import { averageAcrossCases, Case, EvalMatrix, EvaluatorResult, generateRadarChart, LangiumEvaluator, LangiumEvaluatorResultData, loadLastResults, mergeEvaluators, Message, normalizeData, Runner } from 'langium-ai-tools/evaluator';
  9 | import { createLangiumGrammarServices } from 'langium/grammar';
 10 | import ollama from 'ollama';
 11 | import { EmbeddingEvaluatorResultData, OllamaEmbeddingEvaluator } from './embedding-evaluator.js';
 12 | import * as readline from 'readline/promises';
 13 | 
 14 | const rl = readline.createInterface({
 15 |     input: process.stdin,
 16 |     output: process.stdout
 17 | });
 18 | 
 19 | /**
 20 |  * Create services for the Langium grammar language.
 21 |  * 
 22 |  * In your case, you would do the same for your own language instead (using your module)
 23 |  */
 24 | const langiumServices = createLangiumGrammarServices(EmptyFileSystem);
 25 | 
 26 | /**
 27 |  * Runners
 28 |  */
 29 | namespace Runners {
 30 | 
 31 |     /**
 32 |      * llama3.2 3b runner
 33 |      */
 34 |     export const llama3_2_3b: Runner = {
 35 |         name: 'llama3.2 3B',
 36 |         runner: async (content: string, messages: Message[] = []) => {
 37 |             const newMsgs = [...messages, { role: 'user', content }];
 38 |             return (await prompt('llama3.2:latest', newMsgs)).message.content;
 39 |         }
 40 |     };
 41 | 
 42 |     /**
 43 |      * Codellama runner
 44 |      */
 45 |     export const codellama: Runner = {
 46 |         name: 'codellama',
 47 |         runner: async (content: string, messages: Message[] = []) => {
 48 |             const newMsgs = [...messages, { role: 'user', content }];
 49 |             return (await prompt('codellama:latest', newMsgs)).message.content;
 50 |         }
 51 |     };
 52 | 
 53 |     /**
 54 |      * And a codegemma runner
 55 |      */
 56 |     export const codegemma: Runner = {
 57 |         name: 'codegemma',
 58 |         runner: async (content: string, messages: Message[] = []) => {
 59 |             const newMsgs = [...messages, { role: 'user', content }];
 60 |             return (await prompt('codegemma:latest', newMsgs)).message.content;
 61 |         }
 62 |     };
 63 | 
 64 |     /**
 65 |      * Helper to prompt models hosted in Ollama
 66 |      */
 67 |     async function prompt(model: string, messages: any[]) {
 68 |         const response = await ollama.chat({
 69 |             model, messages
 70 |         });
 71 |         return response;
 72 |     }
 73 | }
 74 | 
 75 | /**
 76 |  * A simple case for generating a HelloWorld grammar
 77 |  */
 78 | const caseHelloWorld: Case = {
 79 |     name: "Hello World Grammar",
 80 |     context: [],
 81 |     history: [],
 82 |     prompt: "Generate a simple HelloWorld grammar in Langium.",
 83 |     expected_response: `Certainly. Here's an example of a possible HelloWorld grammar written in the Langium grammar language:
 84 | 
 85 | \`\`\`langium
 86 | grammar HelloWorld
 87 | 
 88 | entry Model:
 89 | (persons+=Person | greetings+=Greeting)*;
 90 | 
 91 | Person:
 92 | 'person' name=ID;
 93 | 
 94 | Greeting:
 95 | 'Hello' person=[Person:ID] '!';
 96 | 
 97 | hidden terminal WS: /\s+/;
 98 | terminal ID: /[_a-zA-Z][\w_]*/;
 99 | 
100 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
101 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
102 | \`\`\`
103 | 
104 | This grammar defines a simple HelloWorld language with two parser rules: Person and Greeting. The entry rule Model allows for an arbitrary sequence of Person and Greeting elements. A Person is defined by the keyword 'person' followed by an ID terminal, which represents the person's name. A Greeting consists of the word 'Hello' followed by a reference to a Person and an exclamation mark. The grammar also includes terminal rules for whitespace, identifiers, and comments.`
105 | };
106 | 
107 | /**
108 |  * An example evaluator for evaluating Langium grammars & checking for embedding similarity (using Ollama)
109 |  */
110 | const langiumAndEmbeddingEvaluator = mergeEvaluators(
111 | 
112 |     // built-in evaluator for Langium grammars (i.e. the Langium DSL itself)
113 |     new LangiumEvaluator(langiumServices.grammar),
114 | 
115 |     // then run the Ollama embedding evaluator to compare expected vs. actual
116 |     new OllamaEmbeddingEvaluator('nomic-embed-text')
117 | );
118 | 
119 | export async function runLangDevDemo() {
120 | 
121 |     // check if all the necessary models are installed via ollama
122 |     const models = ['llama3.2:latest', 'codellama:latest', 'codegemma:latest', 'nomic-embed-text:latest'];
123 |     const listedModels = (await ollama.list()).models;
124 |     console.log('Available models: ', listedModels.map(m => m.name));
125 |     const missingModels = models.filter(model => !listedModels.some(m => m.name === model));
126 |     if (missingModels.length > 0) {
127 |         console.error(`The following models are missing: ${missingModels.join(', ')}.`);
128 |         // prompt to install
129 |         const answer = await rl.question(`Do you want to install these missing models for this demo? (y/n) `);
130 |         if (answer.toLowerCase() === 'y') {
131 |             for (const model of missingModels) {
132 |                 console.log(`Installing model ${model}...`);
133 |                 await ollama.pull({
134 |                     model: model
135 |                 });
136 |             }
137 |         } else {
138 |             console.error('Please install missing models and try again.');
139 |             return;
140 |         }
141 |     }
142 | 
143 |     const eMat = new EvalMatrix({
144 | 
145 |         // basic configuration
146 |         config: {
147 |             name: 'LangDev Demo',
148 |             description: 'Showing basic evaluation capabilities of Langium AI',
149 |             history_folder: '.langium-ai',
150 |             num_runs: 3
151 |         },
152 | 
153 |         // just a few runners
154 |         runners: [
155 |             Runners.llama3_2_3b,
156 |             Runners.codellama,
157 |             Runners.codegemma,
158 |         ],
159 | 
160 |         // using our merged evaluator
161 |         evaluators: [
162 |             {
163 |                 name: 'Langium + Embedding Evaluator (merged)',
164 |                 eval: langiumAndEmbeddingEvaluator
165 |             }
166 |         ],
167 | 
168 |         // single case
169 |         cases: [
170 |             caseHelloWorld
171 |         ]
172 |     });
173 |     
174 |     // run the matrix
175 |     const results = await eMat.run();
176 | 
177 |     // print the full results (as saved)
178 |     console.log('Evaluation report: ');
179 |     printResults(results);
180 | 
181 |     // print the average results
182 |     const processedResults = averageAcrossCases(results);
183 |     console.log('Average Evaluation report: ');
184 |     printResults(processedResults);
185 |     
186 | }
187 | 
188 | /**
189 |  * Helper to print results to the console
190 |  */
191 | function printResults(results: EvaluatorResult[]) {
192 |     console.table(results.map(r => {
193 |         return {
194 |             name: r.name,
195 |             ...r.data
196 |         }
197 |     }), ['name', 'errors', 'warnings', 'infos', 'hints', 'unassigned', 'similarity']);
198 | }
199 | 
200 | /**
201 |  * Helper type to describe the merged evaluator results
202 |  */
203 | type MergedEvaluatorResultType = LangiumEvaluatorResultData & EmbeddingEvaluatorResultData;
204 | 
205 | /**
206 |  * Takes the last results we obtained, and builds a quick radar chart from them
207 |  */
208 | export function generateChartFromLastResults() {
209 |     const rawResults = loadLastResults('.langium-ai', 1);
210 |     console.log('Last Evaluation report: ');
211 | 
212 |     // generate a radar chart for this run
213 |     generateRadarChart(
214 |         'Radar Chart (smaller is better)',
215 |         normalizeData(rawResults),
216 |         './radar-chart.html',
217 |         (data: MergedEvaluatorResultType) => {
218 |             return {
219 |                 'Failures': data.failures,
220 |                 'Errors':   data.errors,
221 |                 'Warnings': data.warnings,
222 |                 'Semantic Diff': 1.0 - data.similarity, // inverse similarity
223 |                 'Response Size': data.response_length ?? 0,
224 |                 'Time': data._runtime ?? 0
225 |             }
226 |         }
227 |     );
228 | }
229 | 
230 | 
231 | 


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/runners.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | import OpenAI from 'openai';
  8 | import ollama from 'ollama';
  9 | import { Message, Runner } from 'langium-ai-tools';
 10 | import { ChromaClient } from 'chromadb';
 11 | 
 12 | async function prompt(model: string, messages: any[]) {
 13 |     const response = await ollama.chat({
 14 |         model, messages
 15 |     });
 16 |     return response;
 17 | }
 18 | 
 19 | /**
 20 |  * Construct a runner for a model provided by Ollama
 21 |  */
 22 | function getOllamaRunner(name: string, model: string): Runner {
 23 |     return {
 24 |         name,
 25 |         runner: async (content: string, messages: Message[] = []) => {
 26 |             const newMsgs = [...messages, { role: 'user', content }];
 27 |             return (await prompt(model, newMsgs)).message.content;
 28 |         }
 29 |     };
 30 | }
 31 | 
 32 | /**
 33 |  * Helper to wrap retrieved documents for RAG in a system message
 34 |  */
 35 | async function getRagSystemMessage(content: string): Promise<Message> {
 36 |     return {
 37 |         role: 'system',
 38 |         content: `Additionally, utilize the following context to answer the user's question: \n\n${(await ragLookup(content)).join('\n======\n')}`
 39 |     };
 40 | }
 41 | 
 42 | const LangiumDSLCollection = "langium-collection";
 43 | 
 44 | /**
 45 |  * Helper to perform a ChromaDB lookup to provide context for a RAG application, given some content
 46 |  */
 47 | async function ragLookup(content: string): Promise<string[][]> {
 48 |     // perform a database lookup first here via chroma
 49 |     const client = new ChromaClient({ path: 'http://localhost:8000' });
 50 | 
 51 |     // create a collection
 52 |     const collection = await client.getCollection({
 53 |         name: LangiumDSLCollection,
 54 |         embeddingFunction: {
 55 |             generate: async (texts: string[]) => {
 56 |                 return (await ollama.embed({
 57 |                     model: 'nomic-embed-text',
 58 |                     input: texts,
 59 |                     keep_alive: 30
 60 |                 })).embeddings;
 61 |             }
 62 |         }
 63 |     });
 64 | 
 65 |     // embed
 66 |     const queryEmbeddings = (await ollama.embed({
 67 |         model: 'nomic-embed-text',
 68 |         input: content,
 69 |         keep_alive: 30
 70 |     })).embeddings;
 71 | 
 72 |     // query
 73 |     const results = await collection.query({
 74 |         queryEmbeddings,
 75 |         nResults: 3,
 76 |     });
 77 | 
 78 |     return results.documents as string[][];
 79 | }
 80 | 
 81 | /**
 82 |  * Construct a runner w/ RAG for a model provided by Ollama
 83 |  */
 84 | function getOllamaRAGRunner(name: string, model: string): Runner {
 85 |     return {
 86 |         name,
 87 |         runner: async (content: string, messages: Message[] = []) => {
 88 |             const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }];
 89 |             return (await prompt(model, newMsgs)).message.content;
 90 |         }
 91 |     };
 92 | }
 93 | 
 94 | // ollama runners
 95 | export const runner_llama3_2_3b = getOllamaRunner('llama3.2 3B', 'llama3.2:latest');
 96 | export const runner_llama3_2_1b = getOllamaRunner('llama3.2 1B', 'llama3.2:1b');
 97 | export const runner_llama3_1 = getOllamaRunner('llama3.1', 'llama3.1');
 98 | export const runner_codellama = getOllamaRunner('codellama', 'codellama');
 99 | export const runner_codegemma = getOllamaRunner('codegemma', 'codegemma');
100 | export const runner_deepseek_coder_v2 = getOllamaRunner('deepseek-coder-v2', 'deepseek-coder-v2');
101 | export const runner_qwen_2_5_coder = getOllamaRunner('qwen-2.5-coder 7B', 'qwen2.5-coder');
102 | 
103 | // RAG ollama runners
104 | export const runner_llama3_2_3b_rag = getOllamaRAGRunner('llama3.2 3B w/ RAG', 'llama3.2:latest');
105 | export const runner_llama3_2_1b_rag = getOllamaRAGRunner('llama3.2 1B w/ RAG', 'llama3.2:1b');
106 | export const runner_llama3_1_rag = getOllamaRAGRunner('llama3.1 w/ RAG', 'llama3.1');
107 | export const runner_codellama_rag = getOllamaRAGRunner('codellama w/ RAG', 'codellama:latest');
108 | export const runner_codegemma_rag = getOllamaRAGRunner('codegemma w/ RAG', 'codegemma:latest');
109 | export const runner_deepseek_coder_v2_rag = getOllamaRAGRunner('deepseek-coder-v2 w/ RAG', 'deepseek-coder-v2');
110 | export const runner_qwen_2_5_coder_rag = getOllamaRAGRunner('qwen-2.5-coder 7B w/ RAG', 'qwen2.5-coder');
111 | 
112 | 
113 | 
114 | export const runner_openai_gpt3_5_turbo: Runner = {
115 |     name: 'openai-gpt3',
116 |     runner: async (content: string, messages: Message[] = []) => {
117 |         const newMsgs = [...messages, { role: 'user', content }];
118 | 
119 |         const client = new OpenAI({
120 |             apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted
121 |         });
122 | 
123 |         // prompt & return the first choice
124 |         const chatCompletion = await client.chat.completions.create({
125 |             messages: newMsgs as any,
126 |             model: 'gpt-3.5-turbo-0125',
127 |         });
128 |         return chatCompletion.choices[0].message.content as string;
129 |     }
130 | };
131 | 
132 | export const runner_openai_gpt3_5_turbo_rag: Runner = {
133 |     name: 'openai-gpt3.5-turbo-rag',
134 |     runner: async (content: string, messages: Message[] = []) => {
135 |         const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }];
136 | 
137 |         const client = new OpenAI({
138 |             apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted
139 |         });
140 | 
141 |         // prompt & return the first choice
142 |         const chatCompletion = await client.chat.completions.create({
143 |             messages: newMsgs as any,
144 |             model: 'gpt-3.5-turbo-0125',
145 |         });
146 |         return chatCompletion.choices[0].message.content as string;
147 |     }
148 | };
149 | 
150 | export const runner_openai_gpt4o_mini: Runner = {
151 |     name: 'openai-gpt4o-mini',
152 |     runner: async (content: string, messages: Message[] = []) => {
153 |         const newMsgs = [...messages, { role: 'user', content }];
154 | 
155 |         const client = new OpenAI({
156 |             apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted
157 |         });
158 | 
159 |         // prompt & return the first choice
160 |         const chatCompletion = await client.chat.completions.create({
161 |             messages: newMsgs as any,
162 |             model: 'gpt-4o-mini',
163 |         });
164 |         return chatCompletion.choices[0].message.content as string;
165 |     }
166 | };
167 | 
168 | export const runner_openai_gpt4o: Runner = {
169 |     name: 'openai-gpt4o',
170 |     runner: async (content: string, messages: Message[] = []) => {
171 |         const newMsgs = [...messages, { role: 'user', content }];
172 | 
173 |         const client = new OpenAI({
174 |             apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted
175 |         });
176 | 
177 |         // prompt & return the first choice
178 |         const chatCompletion = await client.chat.completions.create({
179 |             messages: newMsgs as any,
180 |             model: 'gpt-4o',
181 |         });
182 |         return chatCompletion.choices[0].message.content as string;
183 |     }
184 | };
185 | 
186 | export const runner_openai_gpt4o_rag: Runner = {
187 |     name: 'openai-gpt4o-rag',
188 |     runner: async (content: string, messages: Message[] = []) => {
189 |         const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }];
190 | 
191 |         const client = new OpenAI({
192 |             apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted
193 |         });
194 | 
195 |         // prompt & return the first choice
196 |         const chatCompletion = await client.chat.completions.create({
197 |             messages: newMsgs as any,
198 |             model: 'gpt-4o',
199 |         });
200 |         return chatCompletion.choices[0].message.content as string;
201 |     }
202 | };
203 | 
204 | export const runner_openai_gpt4o_mini_rag: Runner = {
205 |     name: 'openai-gpt4o-mini-rag',
206 |     runner: async (content: string, messages: Message[] = []) => {
207 |         const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }];
208 | 
209 |         const client = new OpenAI({
210 |             apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted
211 |         });
212 | 
213 |         // prompt & return the first choice
214 |         const chatCompletion = await client.chat.completions.create({
215 |             messages: newMsgs as any,
216 |             model: 'gpt-4o-mini',
217 |         });
218 |         return chatCompletion.choices[0].message.content as string;
219 |     }
220 | };


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/evaluator/chart.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | /**
  8 |  * Generates & exports an HTML radar chart report using plotly JS
  9 |  */
 10 | 
 11 | import { EvaluatorResult, EvaluatorResultData, averageAcrossRunners, loadReport } from "./evaluator.js";
 12 | import { writeFileSync, readdirSync, readFileSync } from 'fs';
 13 | import * as path from 'path';
 14 | 
 15 | /**
 16 |  * Generates an HTML radar chart from the provided data
 17 |  * @param evalResults Evaluator results to chart
 18 |  * @param dest Output file to write the chart to
 19 |  * @param rFunc polar r function, used to extract the r values from the data
 20 |  * @param theta theta values, i.e. the property names to use for the radar chart
 21 |  */
 22 | export function generateRadarChart<T extends EvaluatorResultData>(
 23 |     chartName: string,
 24 |     evalResults: EvaluatorResult[],
 25 |     dest: string,
 26 |     rFunc: (d: T, metadata: Record<string, unknown>) => Record<string, unknown>,
 27 |     preprocess?: (arr: EvaluatorResult[]) => EvaluatorResult[]
 28 | ): void {
 29 | 
 30 |     // process results first to average out data (either using the user supplied function, or defaulting to average across runners)
 31 |     const processedResults = preprocess ? preprocess(evalResults) : averageAcrossRunners(evalResults);
 32 | 
 33 |     const data = processedResults.map((result) => {
 34 |         const resultData = result.data as T;
 35 |         const rfuncResult = rFunc(resultData, result.metadata);
 36 |         const theta = Object.keys(rfuncResult);
 37 |         const r = Object.values(rfuncResult);
 38 | 
 39 |         return {
 40 |             type: 'scatterpolar',
 41 |             r,
 42 |             theta,
 43 |             fill: 'toself',
 44 |             name: result.name
 45 |         };
 46 |     });
 47 | 
 48 |     const layout = {
 49 |         title: chartName,
 50 |         name: chartName,
 51 |         polar: {
 52 |             radialaxis: {
 53 |                 visible: true,
 54 |                 range: [0, 1]
 55 |             }
 56 |         },
 57 |         showlegend: true,
 58 |         width: 1000,
 59 |         height: 800
 60 |     };
 61 | 
 62 |     const html = `
 63 | <!DOCTYPE html>
 64 | <html>
 65 |     <head>
 66 |     <title>${chartName}</title>
 67 |     <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
 68 |     </head>
 69 |     <body>
 70 |     <div id="langium-ai-chart" style="width:1000px;height:1000px;margin:8px auto;"></div>
 71 |     <script>
 72 |         data = ${JSON.stringify(data)};
 73 |         layout = ${JSON.stringify(layout)};
 74 |         Plotly.newPlot("langium-ai-chart", data, layout);
 75 |     </script>
 76 |     </body>
 77 | </html>
 78 |         `;
 79 | 
 80 |     writeFileSync(dest, html);
 81 |     console.log(`Radar chart report written to: ${dest}`);
 82 | }
 83 | 
 84 | export function generateHistogram<T extends EvaluatorResultData>(
 85 |     chartName: string,
 86 |     evalResults: EvaluatorResult[],
 87 |     dest: string,
 88 |     dataFunc: (d: T, metadata: Record<string, unknown>) => Record<string, unknown>,
 89 |     preprocess?: (arr: EvaluatorResult[]) => EvaluatorResult[]
 90 | ) {
 91 |     
 92 |     // process results first to average out data (either using the user supplied function, or defaulting to average across runners)
 93 |     const processedResults = preprocess ? preprocess(evalResults) : averageAcrossRunners(evalResults);
 94 | 
 95 |     const data = processedResults.map((result) => {
 96 |         const data = result.data as T;
 97 |         const dd = dataFunc(data, result.metadata);
 98 |         const yLabels = Object.keys(dd);
 99 |         const xData = Object.values(dd);
100 |         return {
101 |             type: 'bar',
102 |             x: xData,
103 |             y: yLabels,
104 |             orientation: 'h',
105 |             name: result.name
106 |         };
107 |     });
108 | 
109 |     const layout = {
110 |         title: chartName,
111 |         barmode: 'group',
112 |         showlegend: true,
113 |         width: 1000,
114 |         height: 800
115 |     };
116 | 
117 |     const html = `
118 | <!DOCTYPE html>
119 | <html>
120 |     <head>
121 |     <title>${chartName}</title>
122 |     <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
123 |     </head>
124 |     <body>
125 |     <div id="langium-ai-chart" style="width:1000px;height:1000px;margin:8px auto;"></div>
126 |     <script>
127 |         data = ${JSON.stringify(data)};
128 |         layout = ${JSON.stringify(layout)};
129 |         Plotly.newPlot("langium-ai-chart", data, layout);
130 |     </script>
131 |     </body>
132 | </html>
133 |         `;
134 | 
135 |     writeFileSync(dest, html);
136 |     console.log(`Histogram report written to: ${dest}`);
137 | }
138 | 
139 | type RunnerName = string;
140 | 
141 | /**
142 |  * Normalizes all numeric data entries in results (while also retaining non-numeric entries)
143 |  */
144 | export function normalizeData(data: EvaluatorResult[]): EvaluatorResult[] {
145 |     const maxValues = new Map<string, number>();
146 | 
147 |     for (const result of data) {
148 |         const d = result.data as EvaluatorResultData;
149 |         for (const [key, value] of Object.entries(d)) {
150 |             if (typeof value !== 'number') {
151 |                 continue;
152 |             }
153 |             const existingMax = maxValues.get(key) ?? 0;
154 |             if (value > existingMax) {
155 |                 maxValues.set(key, value);
156 |             }
157 |         }
158 |     }
159 | 
160 |     for (const result of data) {
161 |         const d = result.data as EvaluatorResultData;
162 |         for (const [key, value] of Object.entries(d)) {
163 |             if (typeof value === 'number') {
164 |                 const max = maxValues.get(key) ?? 1;
165 |                 d[key] = value / max;
166 |             }
167 |         }
168 |     }
169 | 
170 |     return data;
171 | }
172 | 
173 | /**
174 |  * Generates a historical chart from the provided data, showing runners along the X, and their performance over time along the X axis
175 |  * @param chartName 
176 |  * @param folder 
177 |  * @param dest 
178 |  * @param dataFunc 
179 |  * @param options 
180 |  */
181 | export function generateHistoricalChart<T extends EvaluatorResultData>(
182 |     chartName: string,
183 |     folder: string,
184 |     dest: string,
185 |     dataFunc: (d: T, metadata: Record<string, unknown>) => number,
186 |     options?: {
187 |         preprocess?: (arr: EvaluatorResult[]) => EvaluatorResult[],
188 |         filter?: (r: EvaluatorResult) => boolean,
189 |         take?: number,
190 |         chartType?: string
191 |     }
192 | ) {
193 |     // generate a historical chart by calculating the average for runners in all previous reports, and organizing them in ascending date order
194 |     let files = readdirSync(folder).filter(f => f.endsWith('.json'));
195 | 
196 |     // array of results, where each array of results is presumed to be a stream of results from a collection of historical runs
197 |     const runnerResultsMap: Map<RunnerName, EvaluatorResult[]> = new Map();
198 | 
199 |     // take the most recent files if take is set
200 |     if (options?.take) {
201 |         files = files.sort().slice(0, options.take);
202 |     }
203 | 
204 |     for (const file of files) {
205 |         // retrieve results from this file
206 |         const report = loadReport(path.join(folder, file));
207 |         const results = report.results;
208 |         const date: string = report.date;
209 |         console.log(`Processing historical results from: ${date}`);
210 | 
211 |         // process results first
212 |         let processedResults = options?.preprocess ? options.preprocess(results) : averageAcrossRunners(results);
213 |         // normalize
214 |         processedResults = normalizeData(processedResults);
215 | 
216 |         // add to the map based by runner name
217 |         for (const result of processedResults) {
218 |             if (options?.filter && !options.filter(result)) {
219 |                 // skip
220 |                 continue;
221 |             }
222 | 
223 |             const name = result.metadata.runner;
224 |             const existingResults = runnerResultsMap.get(name) ?? [];
225 | 
226 |             const rc = {
227 |                 ...result
228 |             };
229 |             rc.metadata.date = new Date(date).toISOString();
230 | 
231 |             existingResults.push(result);
232 |             runnerResultsMap.set(name, existingResults);
233 |         }
234 |     }
235 | 
236 |     const allData: unknown[] = [];
237 | 
238 |     // organize by date in ascending order
239 |     for (let [name, results] of runnerResultsMap) {
240 |         results.sort((a, b) => {
241 |             return new Date(a.metadata.date).getTime() - new Date(b.metadata.date).getTime();
242 |         });
243 | 
244 |         const runners = results.map(r => r.metadata.runner);
245 |         const data = results.map(r => dataFunc(r.data as T, r.metadata)).sort();
246 | 
247 |         allData.push({
248 |             type: options?.chartType ? options.chartType : 'scatter',
249 |             x: runners,
250 |             y: data,
251 |             name
252 |         });
253 |     }
254 | 
255 |     const layout = {
256 |         title: chartName,
257 |         showlegend: true,
258 |         width: 1000,
259 |         height: 1000
260 |     };
261 | 
262 |     const html = `
263 | <!DOCTYPE html>
264 | <html>
265 |     <head>
266 |     <title>${chartName}</title>
267 |     <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script>
268 |     </head>
269 |     <body>
270 |     <div id="langium-ai-chart" style="width:1000px;height:1000px;margin:8px auto;"></div>
271 |     <script>
272 |         data = ${JSON.stringify(allData)};
273 |         layout = ${JSON.stringify(layout)};
274 |         Plotly.newPlot("langium-ai-chart", data, layout);
275 |     </script>
276 |     </body>
277 | </html>
278 |         `;
279 | 
280 |     writeFileSync(dest, html);
281 |     console.log(`Historical report written to: ${dest}`);
282 | 
283 | }


--------------------------------------------------------------------------------
/packages/langium-ai-tools/src/analyzer/document-analyzer.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  * 
  6 |  * @author Dennis Hübner
  7 |  ******************************************************************************/
  8 | 
  9 | import { CstUtils, Grammar, GrammarAST, LangiumDocument, isLeafCstNode } from "langium";
 10 | import { resolveTransitiveImports, } from 'langium/grammar';
 11 | import { LangiumServices } from "langium/lsp";
 12 | import { EvaluationContext } from "../evaluator/document-evaluator.js";
 13 | import { EvaluatorResult } from "../evaluator/evaluator.js";
 14 | import { LangiumEvaluator, LangiumEvaluatorResultData } from "../evaluator/langium-evaluator.js";
 15 | 
 16 | /**
 17 |  * Extends LangiumEvaluator and adds analysis capabilities.
 18 |  */
 19 | export class LangiumDocumentAnalyzer<T extends LangiumServices> extends LangiumEvaluator<T> {
 20 | 
 21 |     public static readonly METADATA_KEY = 'syntax_statistics';
 22 | 
 23 |     private readonly analysisOptions: AnalysisOptions;
 24 | 
 25 |     /**
 26 |      * Creates an instance of LangiumDocumentAnalyzer.
 27 |      * @param services Langium services
 28 |      * @param analysisOptions Analysis options
 29 |      * @example
 30 |      * ```typescript
 31 |      * const analyzer = new LangiumDocumentAnalyzer(services, {
 32 |      *     analysisMode: AnalysisMode.ALL,
 33 |      *     excludeRules: ['DeprecatedRule'],
 34 |      *     computeDiversity: false
 35 |      * });
 36 |      * ```
 37 |      */
 38 |     constructor(services: T, analysisOptions: Partial<AnalysisOptions> = {}) {
 39 |         super(services);
 40 |         this.analysisOptions = { ...DEFAULT_OPTIONS, ...analysisOptions };
 41 |     }
 42 | 
 43 |     evaluateDocument(doc: LangiumDocument, ctx: EvaluationContext): EvaluatorResult<LangiumEvaluatorResultData> {
 44 |         const validationResult = super.evaluateDocument(doc, ctx);
 45 |         if (this.analysisOptions.analysisMode !== AnalysisMode.NO_STATISTIC && validationResult.data.failures === 0) {
 46 |             // Add syntax usage statistics only if build was successful
 47 |             const statistics = this.collectSyntaxUsageStatistics(doc, this.services.Grammar);
 48 |             validationResult.metadata[LangiumDocumentAnalyzer.METADATA_KEY] = statistics;
 49 |         }
 50 |         return validationResult;
 51 |     }
 52 | 
 53 |     collectSyntaxUsageStatistics(doc: LangiumDocument, grammar: Grammar): SyntaxStatistic {
 54 |         const rootCstNode = doc.parseResult.value.$cstNode;
 55 |         if (!rootCstNode) {
 56 |             return this.createEmptySyntaxStatistic();
 57 |         }
 58 |         const { includeImportedRules, excludeRules, computeDiversity, includeHiddenRules } = this.analysisOptions;
 59 |         const excludedRules = new Set(excludeRules);
 60 |         const isRuleExcluded = (ruleName: string) => ruleName === 'WS' || excludedRules.has(ruleName);
 61 | 
 62 |         const allRules = includeImportedRules ? this.collectAllRules(grammar) : grammar.rules;
 63 |         const ruleUsage: Record<string, number> = {};
 64 |         // Initialize rule usage map, excluding rules specified in excludeRules. Also skip entry rule.
 65 |         for (const rule of allRules) {
 66 |             if (!isRuleExcluded(rule.name)) {
 67 |                 if (
 68 |                     (GrammarAST.isParserRule(rule) && rule.entry)
 69 |                     || (GrammarAST.isTerminalRule(rule) && rule.hidden && !includeHiddenRules)
 70 |                 ) {
 71 |                     continue;
 72 |                 }
 73 |                 ruleUsage[rule.name] = 0;
 74 |             }
 75 |         }
 76 | 
 77 |         for (const cstNode of CstUtils.streamCst(rootCstNode)) {
 78 |             const grammarSource = cstNode.grammarSource;
 79 | 
 80 |             const addIfNotExcluded = (ruleName: string) => {
 81 |                 if (!isRuleExcluded(ruleName)) {
 82 |                     ruleUsage[ruleName] = (ruleUsage[ruleName] ?? 0) + 1;
 83 |                 }
 84 |             };
 85 | 
 86 |             if (grammarSource && GrammarAST.isRuleCall(grammarSource)) {
 87 |                 // For now handle only RuleCalls
 88 |                 addIfNotExcluded(grammarSource.rule.ref?.name ?? 'unknown');
 89 |             } else if (includeHiddenRules && cstNode.hidden && isLeafCstNode(cstNode)) {
 90 |                 addIfNotExcluded(cstNode.tokenType.name);
 91 |             }
 92 |         }
 93 | 
 94 |         let diversity = { entropy: 0, giniCoefficient: 0, simpsonIndex: 0 };
 95 |         if (computeDiversity) {
 96 |             diversity = {
 97 |                 entropy: this.computeEntropy(ruleUsage),
 98 |                 giniCoefficient: this.computeGiniCoefficient(ruleUsage),
 99 |                 simpsonIndex: this.computeSimpsonIndex(ruleUsage)
100 |             };
101 |         }
102 |         const coverage = this.computeCoverage(ruleUsage);
103 |         return { ruleUsage, coverage, diversity };
104 |     }
105 | 
106 |     /**
107 |      * Computes coverage as percentage of used rules over all available rules
108 |      */
109 |     computeCoverage(ruleUsage: Record<string, number>): number {
110 |         const usedRules = Object.values(ruleUsage).filter(count => count > 0).length;
111 |         return usedRules > 0 ? (usedRules / Object.keys(ruleUsage).length) * 100 : 0;
112 |     }
113 | 
114 |     /**
115 |      * Computes Shannon entropy - measure of information diversity
116 |      * Higher values indicate more diverse usage patterns
117 |      */
118 |     computeEntropy(ruleUsage: Record<string, number>): number {
119 |         const totalUsage = Object.values(ruleUsage).reduce((sum, count) => sum + count, 0);
120 |         if (totalUsage === 0) return 0;
121 | 
122 |         let entropy = 0;
123 |         for (const count of Object.values(ruleUsage)) {
124 |             if (count > 0) {
125 |                 const probability = count / totalUsage;
126 |                 entropy -= probability * Math.log2(probability);
127 |             }
128 |         }
129 |         return entropy;
130 |     }
131 | 
132 |     /**
133 |      * Computes Gini coefficient - measure of inequality in rule usage
134 |      * 0 = perfect equality, 1 = maximum inequality
135 |      */
136 |     computeGiniCoefficient(ruleUsage: Record<string, number>): number {
137 |         const counts = Object.values(ruleUsage).sort((a, b) => a - b);
138 |         const n = counts.length;
139 |         if (n === 0) return 0;
140 | 
141 |         const sum = counts.reduce((acc, val) => acc + val, 0);
142 |         if (sum === 0) return 0;
143 | 
144 |         let numerator = 0;
145 |         for (let i = 0; i < n; i++) {
146 |             numerator += (2 * (i + 1) - n - 1) * counts[i];
147 |         }
148 |         return numerator / (n * sum);
149 |     }
150 | 
151 |     /**
152 |      * Computes Simpson's diversity index - probability that two randomly selected items are different
153 |      * Higher values indicate more diversity
154 |      */
155 |     computeSimpsonIndex(ruleUsage: Record<string, number>): number {
156 |         const totalUsage = Object.values(ruleUsage).reduce((sum, count) => sum + count, 0);
157 |         if (totalUsage === 0) return 0;
158 | 
159 |         let sum = 0;
160 |         for (const count of Object.values(ruleUsage)) {
161 |             const probability = count / totalUsage;
162 |             sum += probability * probability;
163 |         }
164 | 
165 |         return 1 - sum; // Simpson's diversity index (1-D)
166 |     }
167 | 
168 |     /**
169 |      * Extracts syntax statistics from the evaluation result.
170 |      * @param result The evaluation result.
171 |      * @returns The extracted syntax statistics or undefined if not found.
172 |      */
173 |     extractStatisticsFromResult(result: Partial<EvaluatorResult> | undefined): SyntaxStatistic | undefined {
174 |         const metadata = result?.metadata;
175 |         if (metadata && metadata[LangiumDocumentAnalyzer.METADATA_KEY]) {
176 |             return metadata[LangiumDocumentAnalyzer.METADATA_KEY] as SyntaxStatistic;
177 |         }
178 |         return undefined;
179 |     }
180 | 
181 |     protected collectAllRules(grammar: Grammar): GrammarAST.AbstractRule[] {
182 |         try {
183 |             return grammar.rules.concat(
184 |                 resolveTransitiveImports(this.services.shared.workspace.LangiumDocuments, grammar).map(g => g.rules).flat()
185 |             );
186 |         } catch (e) {
187 |             console.error('Error resolving imports: ', e);
188 |             return [];
189 |         }
190 |     }
191 | 
192 |     protected createEmptySyntaxStatistic(): SyntaxStatistic {
193 |         return {
194 |             ruleUsage: {},
195 |             coverage: 0,
196 |             diversity: {
197 |                 entropy: 0,
198 |                 giniCoefficient: 0,
199 |                 simpsonIndex: 0
200 |             }
201 |         };
202 |     }
203 | }
204 | 
205 | /**
206 |  * Analysis mode for controlling what analysis operations to perform
207 |  */
208 | export enum AnalysisMode {
209 |     ALL = 'ALL',
210 |     NO_STATISTIC = 'NO_STATISTIC'
211 | }
212 | 
213 | interface AnalysisOptions {
214 |     analysisMode: AnalysisMode;
215 |     /**
216 |      * Filter for specific rules (e.g deprecated) to exclude in the analysis.
217 |      * Rule WS (whitespace) is always excluded.
218 |      */
219 |     excludeRules: string[];
220 |     /** 
221 |      * Whether to include rules from imported grammars. Default is true.
222 |      */
223 |     includeImportedRules: boolean;
224 |     /**
225 |      * Whether to include hidden tokens (like comments, whitespace) in the analysis. Default is false.
226 |      * Rule WS (whitespace) is always excluded.
227 |      */
228 |     includeHiddenRules: boolean;
229 |     /**
230 |      * Whether to compute diversity metrics for rule usage. Default is true.
231 |      */
232 |     computeDiversity: boolean;
233 | }
234 | 
235 | const DEFAULT_OPTIONS: AnalysisOptions = {
236 |     analysisMode: AnalysisMode.ALL,
237 |     excludeRules: [],
238 |     includeImportedRules: true,
239 |     includeHiddenRules: true,
240 |     computeDiversity: true
241 | };
242 | 
243 | /**
244 |  * Type representing syntax usage statistics.
245 |  */
246 | export type SyntaxStatistic = {
247 |     /** Map of rule names to their usage counts */
248 |     ruleUsage: Record<string, number>;
249 | 
250 |     /** Percentage of used rules compared to all available rules */
251 |     coverage: number;
252 | 
253 |     /** Diversity metrics for rule usage patterns */
254 |     diversity: {
255 | 
256 |         /**
257 |          * Shannon entropy - information diversity measure.
258 |          * **Range:** 0 to log₂(n) where n = number of rules.
259 |          * - **Low (0-1):** dominated by few rules
260 |          * - **Medium (1-3):** moderate diversity  
261 |          * - **High (>3):** high diversity
262 |          */
263 |         entropy: number;
264 | 
265 |         /**
266 |          * Gini coefficient - inequality measure. Range: 0 to 1.
267 |          * - **Low (0-0.3):** equal distribution
268 |          * - **Medium (0.3-0.7):** moderate inequality
269 |          * - **High (0.7-1):** high inequality
270 |          */
271 |         giniCoefficient: number;
272 | 
273 |         /**
274 |          * Simpson's diversity index - probability that two randomly selected items are different. **Range:** 0 to 1.
275 |          * - **Low (0-0.3):** low diversity
276 |          * - **Medium (0.3-0.7):** moderate diversity
277 |          * - **High (0.7-1):** high diversity
278 |          */
279 |         simpsonIndex: number;
280 |     };
281 | }


--------------------------------------------------------------------------------
/packages/examples/example-dsl-evaluator/src/langium-cases.ts:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright 2024 - 2025 TypeFox GmbH
  3 |  * This program and the accompanying materials are made available under the
  4 |  * terms of the MIT License, which is available in the project root.
  5 |  ******************************************************************************/
  6 | 
  7 | import { Case, Message } from "langium-ai-tools";
  8 | 
  9 | /**
 10 |  * Common history for all cases here
 11 |  */
 12 | const history: Message[] = [
 13 |     {
 14 |         role: 'system',
 15 |         content: `You are an expert in Langium grammars.
 16 | Your role is to provide accurate, detailed, and insightful explanations and advice on Langium grammar-related topics.
 17 | You will answer questions, provide examples, and offer guidance on best practices, ensuring clarity and precision in all responses.
 18 | For reference, here is a simple Langium grammar:
 19 | 
 20 | \`\`\`langium
 21 | // starts a grammar w/ a name
 22 | grammar HelloWorld
 23 | 
 24 | // entry parser rule
 25 | entry Model:
 26 | // parses a list of zero or more persons & greetings in any order
 27 | (persons+=Person | greetings+=Greeting)*;
 28 | 
 29 | // person parser rule
 30 | Person:
 31 | // parses a person keyword followed by an ID terminal (the person's name)
 32 | 'person' name=ID;
 33 | 
 34 | // greeting parser rule
 35 | Greeting:
 36 | // parses Hello followed by a person reference and an exclamation mark at the end
 37 | 'Hello' person=[Person:ID] '!';
 38 | 
 39 | // whitespace is a hidden terminal, i.e. ignored and not part of the AST
 40 | hidden terminal WS: /\s+/;
 41 | // ID is a terminal rule that matches this regex
 42 | terminal ID: /[_a-zA-Z][\w_]*/;
 43 | 
 44 | // multi-line & single-line comments are also hidden terminals
 45 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
 46 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
 47 | \`\`\`
 48 | 
 49 | Use this information to help answer questions about Langium grammars.
 50 | ` }
 51 | ];
 52 | 
 53 | export const cases: Case[] = [
 54 |     {
 55 |         name: "Hello World Grammar",
 56 |         context: [],
 57 |         history,
 58 |         prompt: "Generate a simple HelloWorld grammar in Langium.",
 59 |         expected_response: `Certainly. Here's an example of a possible HelloWorld grammar written in the Langium grammar language:
 60 | 
 61 | \`\`\`langium
 62 | grammar HelloWorld
 63 | 
 64 | entry Model:
 65 |     (persons+=Person | greetings+=Greeting)*;
 66 | 
 67 | Person:
 68 |     'person' name=ID;
 69 | 
 70 | Greeting:
 71 |     'Hello' person=[Person:ID] '!';
 72 | 
 73 | hidden terminal WS: /\s+/;
 74 | terminal ID: /[_a-zA-Z][\w_]*/;
 75 | 
 76 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
 77 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
 78 | \`\`\`
 79 | 
 80 | This grammar defines a simple HelloWorld language with two parser rules: Person and Greeting. The entry rule Model allows for an arbitrary sequence of Person and Greeting elements. A Person is defined by the keyword 'person' followed by an ID terminal, which represents the person's name. A Greeting consists of the word 'Hello' followed by a reference to a Person and an exclamation mark. The grammar also includes terminal rules for whitespace, identifiers, and comments.`
 81 |     },
 82 |     {
 83 |         name: "StateMachine Grammar",
 84 |         context: [],
 85 |         history,
 86 |         prompt: "Create a simple Statemachine grammar in Langium.",
 87 |         expected_response: `Sure. Here's an example of a basic calculator grammar written in the Langium grammar language:
 88 | 
 89 | \`\`\`langium
 90 | grammar Statemachine
 91 | 
 92 | entry Statemachine:
 93 |     'statemachine' name=ID
 94 |     ('events' events+=Event+)?
 95 |     ('commands'    commands+=Command+)?
 96 |     'initialState' init=[State]
 97 |     states+=State*;
 98 | 
 99 | Event:
100 |     name=ID;
101 | 
102 | Command:
103 |     name=ID;
104 | 
105 | State:
106 |     'state' name=ID
107 |         ('actions' '{' actions+=[Command]+ '}')?
108 |         transitions+=Transition*
109 |     'end';
110 | 
111 | Transition:
112 |     event=[Event] '=>' state=[State];
113 | 
114 | hidden terminal WS: /\s+/;
115 | terminal ID: /[_a-zA-Z][\w_]*/;
116 | 
117 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
118 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
119 | 
120 | \`\`\`
121 | 
122 | This grammar defines a simple Statemachine language with events, commands, states, and an initial state. The Statemachine rule starts with the keyword 'statemachine' followed by an ID representing the name of the statemachine. It can include lists of events and commands, an initial state, and a sequence of states. Each event and command is defined by an ID. A state consists of a name, optional actions, and a list of zero or more transitions, followed by 'end'.
123 | `
124 |     },
125 |     {
126 |         name: 'Domain Model',
127 |         context: [],
128 |         history,
129 |         prompt: 'Create a simple Domain Model grammar in Langium.',
130 |         expected_response: `Here's an example of a Domain Model grammar written in the Langium grammar language:
131 | 
132 | \`\`\`langium
133 | grammar DomainModel
134 | 
135 | entry Domainmodel:
136 |     (elements+=AbstractElement)*;
137 | 
138 | AbstractElement:
139 |     PackageDeclaration | Type;
140 | 
141 | PackageDeclaration:
142 |     'package' name=QualifiedName '{'
143 |         (elements+=AbstractElement)*
144 |     '}';
145 | 
146 | Type:
147 |     DataType | Entity;
148 | 
149 | DataType:
150 |     'datatype' name=ID;
151 | 
152 | Entity:
153 |     'entity' name=ID ('extends' superType=[Entity:QualifiedName])? '{'
154 |         (features+=Feature)*
155 |     '}';
156 | 
157 | Feature:
158 |     (many?='many')? name=ID ':' type=[Type:QualifiedName];
159 | 
160 | QualifiedName returns string:
161 |     ID ('.' ID)*;
162 | 
163 | hidden terminal WS: /\s+/;
164 | terminal ID: /[_a-zA-Z][\w_]*/;
165 | 
166 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
167 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
168 | \`\`\`
169 | 
170 | This grammar defines a Domain Model language with elements, package declarations, types, data types, entities, and features.
171 | The entry rule Domainmodel allows for an arbitrary sequence of AbstractElement elements. 
172 | An AbstractElement can be a PackageDeclaration or a Type. 
173 | A PackageDeclaration starts with the keyword 'package' followed by a qualified name and a block of zero or more elements. 
174 | A Type can be a DataType or an Entity. 
175 | A DataType is defined by the keyword 'datatype' followed by an ID terminal.
176 | An Entity is defined by the keyword 'entity' followed by an ID terminal, an optional super type, and a block of zero or more features. 
177 | A Feature can be a single or multiple association with a type.
178 | `
179 |     },
180 |     {
181 |         name: 'Arithmetics',
182 |         context: [],
183 |         history,
184 |         prompt: 'Create a simple Arithmetics grammar in Langium.',
185 |         expected_response: `Here's an example of an Arithmetics grammar written in the Langium grammar language:
186 | 
187 | \`\`\`langium
188 | grammar Arithmetics
189 | 
190 | entry Module:
191 |     'module' name=ID
192 |     (statements+=Statement)*;
193 | 
194 | Statement:
195 |     Definition | Evaluation;
196 | 
197 | Definition:
198 |     'def' name=ID ('(' args+=DeclaredParameter (',' args+=DeclaredParameter)* ')')?
199 |     ':' expr=Expression ';';
200 | 
201 | DeclaredParameter:
202 |     name=ID;
203 | 
204 | type AbstractDefinition = Definition | DeclaredParameter;
205 | 
206 | Evaluation:
207 |     expression=Expression ';';
208 | 
209 | Expression:
210 |     Addition;
211 | 
212 | Addition infers Expression:
213 |     Multiplication ({infer BinaryExpression.left=current} operator=('+' | '-') right=Multiplication)*;
214 | 
215 | Multiplication infers Expression:
216 |     Exponentiation ({infer BinaryExpression.left=current} operator=('*' | '/') right=Exponentiation)*;
217 | 
218 | Exponentiation infers Expression:
219 |     Modulo ({infer BinaryExpression.left=current} operator='^' right=Modulo)*;
220 | 
221 | Modulo infers Expression:
222 |     PrimaryExpression ({infer BinaryExpression.left=current} operator='%' right=PrimaryExpression)*;
223 | 
224 | PrimaryExpression infers Expression:
225 |     '(' Expression ')' |
226 |     {infer NumberLiteral} value=NUMBER |
227 |     {infer FunctionCall} func=[AbstractDefinition] ('(' args+=Expression (',' args+=Expression)* ')')?;
228 | 
229 | hidden terminal WS: /\s+/;
230 | terminal ID: /[_a-zA-Z][\w_]*/;
231 | terminal NUMBER returns number: /[0-9]+(\.[0-9]*)?/;
232 | 
233 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//;
234 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/;
235 | \`\`\`
236 | 
237 | This grammar defines an Arithmetics language with expressions & definitions (functions). It effectively allows for regular arithmetic operations, including addition, subtraction, multiplication, division, exponentiation, and modulo operations.` 
238 |     },
239 |     {
240 |         name: 'MiniLogo',
241 |         context: [],
242 |         history,
243 |         prompt: `Create a simple MiniLogo grammar in Langium, using the MiniLogo language (very akin to Turtle in Python and the original Logo language).
244 | It should support statements & definitions. The statements should be commands & macros, while the definitions should be named functions.
245 | The only commands are pen, move, color, and for (a very simple loop mechanic like \`for x = 1 to 10 { ... }\`)`,
246 |         expected_response: `Here's an example of a MiniLogo grammar written in the Langium grammar language.
247 | This grammar supports statements and definitions for the MiniLogo language, which is similar to Turtle graphics.
248 | The statements include commands like pen, move, color, and for, as well as macros for defining functions.
249 | Here's the grammar:
250 | 
251 | \`\`\`langium
252 | grammar MiniLogo
253 | 
254 | entry Model: (stmts+=Stmt | defs+=Def)*;
255 | 
256 | // Statements
257 | Stmt:   Cmd | Macro;
258 | Cmd:    Pen | Move | Color | For;
259 | Macro:  def=[Def:ID] '(' (args+=Expr (',' args+=Expr)*)? ')';
260 | 
261 | // Definitions
262 | Def:    'def' name=ID '(' (params+=Param (',' params+=Param)*)? ')' Block;
263 | 
264 | // Commands
265 | Pen:    'pen' '(' mode=('up' | 'down') ')';
266 | Move:   'move' '(' ex=Expr ',' ey=Expr ')';
267 | Color:  'color' '(' ((r = Expr ',' g=Expr ',' b=Expr) | color=ID | color=HEX) ')';
268 | For:    'for' var=Param '=' e1=Expr 'to' e2=Expr Block;
269 | 
270 | 
271 | fragment Block: '{' body+=Stmt* '}';
272 | 
273 | Param: name=ID;
274 | 
275 | 
276 | // Expressions
277 | Expr: Add;
278 | 
279 | Add  infers Expr: 
280 |     Mult     ({infer BinExpr.e1=current} op=('+'|'-') e2=Mult)*;
281 | Mult infers Expr: 
282 |     PrimExpr ({infer BinExpr.e1=current} op=('*'|'/') e2=PrimExpr)*;
283 | 
284 | PrimExpr: Lit | Ref | Group | NegExpr;
285 | 
286 | Lit:        val=NUMBER;
287 | Ref:        val=[Param:ID];
288 | Group:      '(' ge=Expr ')';
289 | NegExpr:    '-' ne=Expr;
290 | 
291 | // Terminals
292 | terminal HEX returns string:    /#(\d|[a-fA-F]){3,6}/;
293 | terminal ID returns string:     /[_a-zA-Z][\w_]*/;
294 | terminal NUMBER returns number:    /(?:(?:-?[0-9]+)?\.[0-9]+)|-?[0-9]+/;
295 | hidden terminal WS:             /\s+/;
296 | hidden terminal ML_COMMENT:     /\/\*[\s\S]*?\*\//;
297 | hidden terminal SL_COMMENT:     /\/\/[^\n\r]*/;
298 | \`\`\`
299 | 
300 | This grammar defines a MiniLogo language with statements and definitions. The Model rule allows for an arbitrary sequence of statements and definitions. A statement can be a command or a macro, while a definition is a named function. The commands include pen, move, color, and for, which is a simple loop mechanic. The expressions support arithmetic operations like addition, subtraction, multiplication, and division.
301 | You can use this grammar to create MiniLogo programs for drawing shapes and patterns.
302 | `
303 |     }
304 | ];


--------------------------------------------------------------------------------