├── packages ├── examples │ └── example-dsl-evaluator │ │ ├── .gitignore │ │ ├── tsconfig.json │ │ ├── package.json │ │ ├── README.md │ │ └── src │ │ ├── example-splitter.ts │ │ ├── index.ts │ │ ├── embedding-evaluator.ts │ │ ├── example-program-map.ts │ │ ├── helloworld-cases.ts │ │ ├── eval-langium.ts │ │ ├── eval-langdev.ts │ │ ├── runners.ts │ │ └── langium-cases.ts ├── langium-ai-mcp │ ├── tsconfig.tests.json │ ├── vitest.config.ts │ ├── README.MD │ ├── tsconfig.json │ ├── src │ │ ├── mcp-client.ts │ │ └── mcp-server.ts │ ├── tests │ │ └── mcp-server.test.ts │ └── package.json └── langium-ai-tools │ ├── tsconfig.tests.json │ ├── vitest.config.ts │ ├── src │ ├── analyzer │ │ ├── index.ts │ │ └── document-analyzer.ts │ ├── splitter │ │ ├── index.ts │ │ ├── program-map.ts │ │ └── splitter.ts │ ├── index.ts │ └── evaluator │ │ ├── index.ts │ │ ├── edit-distance-evaluator.ts │ │ ├── document-evaluator.ts │ │ ├── langium-evaluator.ts │ │ ├── eval-matrix.ts │ │ ├── evaluator.ts │ │ └── chart.ts │ ├── tsconfig.json │ ├── package.json │ ├── tests │ └── document-analyzer.test.ts │ └── README.md ├── .gitignore ├── .github └── workflows │ └── ci.yml ├── package.json ├── LICENSE ├── README.md └── .vscode └── launch.json /packages/examples/example-dsl-evaluator/.gitignore: -------------------------------------------------------------------------------- 1 | *.html -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | .DS_Store 3 | logs 4 | *.log 5 | npm-debug.log* 6 | .npm 7 | .eslintcache 8 | .env 9 | dist 10 | modelfiles 11 | results 12 | downloads/ 13 | stores/ 14 | data/ 15 | .langium-ai -------------------------------------------------------------------------------- /packages/langium-ai-mcp/tsconfig.tests.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "types": ["vitest/globals", "node"] 5 | }, 6 | "include": [ 7 | "src/**/*", 8 | "tests/**/*" 9 | ], 10 | "exclude": [ 11 | "node_modules", 12 | "dist" 13 | ] 14 | } -------------------------------------------------------------------------------- /packages/langium-ai-tools/tsconfig.tests.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "types": ["vitest/globals", "node"] 5 | }, 6 | "include": [ 7 | "src/**/*", 8 | "tests/**/*" 9 | ], 10 | "exclude": [ 11 | "node_modules", 12 | "dist" 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/vitest.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitest/config'; 2 | 3 | export default defineConfig({ 4 | test: { 5 | environment: 'node', 6 | globals: true, 7 | include: ['tests/**/*.test.ts'], 8 | typecheck: { 9 | tsconfig: 'tsconfig.tests.json' 10 | } 11 | }, 12 | esbuild: { 13 | target: 'node20' 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /packages/langium-ai-mcp/vitest.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitest/config'; 2 | 3 | export default defineConfig({ 4 | test: { 5 | environment: 'node', 6 | globals: true, 7 | include: ['tests/**/*.test.ts'], 8 | typecheck: { 9 | tsconfig: 'tsconfig.tests.json' 10 | } 11 | }, 12 | esbuild: { 13 | target: 'node20' 14 | } 15 | }); 16 | 17 | 18 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/analyzer/index.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | export * from './document-analyzer.js'; -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/splitter/index.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | export * from './splitter.js'; 8 | export * from './program-map.js'; 9 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/index.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | export * from './evaluator/index.js'; 8 | export * from './analyzer/index.js'; 9 | export * from './splitter/index.js'; 10 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/evaluator/index.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | export * from './evaluator.js'; 8 | export * from './langium-evaluator.js'; 9 | export * from './edit-distance-evaluator.js'; 10 | export * from './eval-matrix.js'; 11 | export * from './chart.js'; 12 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "NodeNext", 5 | "sourceMap": true, 6 | "declaration": true, 7 | "declarationMap": true, 8 | "outDir": "./dist", 9 | "strict": true, 10 | "noImplicitAny": true, 11 | "moduleResolution": "NodeNext", 12 | "esModuleInterop": true, 13 | "skipLibCheck": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "rootDir": "./src" 16 | }, 17 | "include": ["src/**/*.ts"], 18 | "exclude": [ 19 | "node_modules" 20 | ] 21 | } -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "NodeNext", 5 | "sourceMap": true, 6 | "declaration": true, 7 | "declarationMap": true, 8 | "outDir": "./dist", 9 | "strict": true, 10 | "noImplicitAny": true, 11 | "moduleResolution": "NodeNext", 12 | "esModuleInterop": true, 13 | "skipLibCheck": true, 14 | "forceConsistentCasingInFileNames": true, 15 | }, 16 | "include": [ 17 | "src/**/*.ts" 18 | ], 19 | "exclude": [ 20 | "node_modules" 21 | ] 22 | } -------------------------------------------------------------------------------- /packages/langium-ai-mcp/README.MD: -------------------------------------------------------------------------------- 1 | How to try-out: 2 | 3 | - `cd packages/langium-ai-mcp` 4 | - Start MCP server with IO transport `npm run start` 5 | - Run example client code `npm run cstart` - you should see the tool result containing the errors. 6 | 7 | Example usage in Cursor: 8 | 9 | - Open Cursor MCP settings 10 | - Add new server using following setup (user or workspace specific `.cursor/mcp.json` ): 11 | 12 | ```json 13 | "mcpServers": { 14 | "Langium MCP": { 15 | "command": "node", 16 | "args": [ 17 | "~/git/langium-ai/packages/mcp-server/dist/mcp-server.js" 18 | ] 19 | } 20 | } 21 | ``` 22 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build-and-test: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v4 16 | 17 | - name: Setup Node.js 18 | uses: actions/setup-node@v4 19 | with: 20 | node-version: '20.x' 21 | cache: 'npm' 22 | 23 | - name: Install dependencies 24 | run: npm ci 25 | 26 | - name: Build packages 27 | run: npm run build 28 | 29 | - name: Run tests 30 | run: npm run test:ci 31 | 32 | -------------------------------------------------------------------------------- /packages/langium-ai-mcp/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "rootDir": "./src", 4 | "outDir": "./dist", 5 | "module": "nodenext", 6 | "target": "esnext", 7 | "types": [ 8 | "node" 9 | ], 10 | "sourceMap": true, 11 | "declaration": true, 12 | "declarationMap": true, 13 | "noUncheckedIndexedAccess": true, 14 | "exactOptionalPropertyTypes": true, 15 | "strict": true, 16 | "jsx": "react-jsx", 17 | "verbatimModuleSyntax": true, 18 | "isolatedModules": true, 19 | "noUncheckedSideEffectImports": true, 20 | "moduleDetection": "force", 21 | "skipLibCheck": true, 22 | "forceConsistentCasingInFileNames": true 23 | }, 24 | "include": [ 25 | "src" 26 | ], 27 | "exclude": [ 28 | "node_modules", 29 | "tests", 30 | "dist" 31 | ] 32 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "langium-ai", 3 | "private": true, 4 | "version": "0.0.1", 5 | "description": "Monorepo for Langium AI and its associated projects", 6 | "scripts": { 7 | "build": "npm run build --workspaces", 8 | "test": "npm run test --workspaces --if-present --continue-on-error", 9 | "test:ci": "npm run test --workspaces --if-present", 10 | "clean": "npm run clean --workspaces --if-present", 11 | "watch": "npm run watch --workspaces --if-present" 12 | }, 13 | "type": "module", 14 | "author": { 15 | "name": "TypeFox", 16 | "url": "https://www.typefox.io" 17 | }, 18 | "license": "MIT", 19 | "workspaces": [ 20 | "packages/langium-ai-tools", 21 | "packages/langium-ai-mcp", 22 | "packages/examples/*" 23 | ], 24 | "devDependencies": { 25 | "typescript": "^5.4.5", 26 | "vitest": "^3.0.9", 27 | "rimraf": "^6.0.1" 28 | }, 29 | "volta": { 30 | "node": "20.10.0", 31 | "npm": "10.2.3" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "example-dsl-evaluator", 3 | "description": "Example evaluator for a targeted DSL", 4 | "private": "true", 5 | "version": "0.0.1", 6 | "main": "dist/index.js", 7 | "scripts": { 8 | "build": "tsc", 9 | "start": "node ./dist/index.js", 10 | "demo": "npm run build && npm run start -- run-langdev && open ./radar-chart.html", 11 | "clean": "rimraf ./dist", 12 | "test": "echo \"No tests yet...\"" 13 | }, 14 | "type": "module", 15 | "author": { 16 | "name": "TypeFox", 17 | "url": "https://www.typefox.io" 18 | }, 19 | "license": "MIT", 20 | "dependencies": { 21 | "chromadb": "^1.9.2", 22 | "chromadb-default-embed": "^2.13.2", 23 | "dotenv": "^16.4.5", 24 | "langium-ai-tools": "^0.0.2", 25 | "ollama": "^0.5.9", 26 | "openai": "^4.67.3" 27 | }, 28 | "volta": { 29 | "node": "20.10.0", 30 | "npm": "10.2.3" 31 | }, 32 | "devDependencies": { 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/evaluator/edit-distance-evaluator.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import {levenshteinEditDistance} from 'levenshtein-edit-distance'; 8 | import { Evaluator, EvaluatorResult, EvaluatorResultData } from './evaluator.js'; 9 | 10 | export interface EditDistanceEvaluatorResultData extends EvaluatorResultData { 11 | edit_distance: number; 12 | } 13 | 14 | export class EditDistanceEvaluator extends Evaluator { 15 | async evaluate(response: string, expected_response: string): Promise> { 16 | const distance = levenshteinEditDistance(response, expected_response); 17 | return { 18 | data: { 19 | edit_distance: distance 20 | } 21 | }; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /packages/langium-ai-mcp/src/mcp-client.ts: -------------------------------------------------------------------------------- 1 | import { Client } from "@modelcontextprotocol/sdk/client/index.js"; 2 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; 3 | import { getDisplayName } from "@modelcontextprotocol/sdk/shared/metadataUtils.js"; 4 | 5 | const debug = false 6 | 7 | const transport = new StdioClientTransport({ 8 | command: "node", 9 | args: [ "./dist/mcp-server.js"] 10 | }); 11 | 12 | const client = new Client( 13 | { 14 | name: "example-client", 15 | version: "1.0.0" 16 | } 17 | ); 18 | 19 | await client.connect(transport); 20 | 21 | const tools = await client.listTools(); 22 | console.log("Available tools:", "\n", ...tools.tools.map(t => getDisplayName(t) + "\n")); 23 | 24 | const theTool = tools.tools[0]; 25 | if (!theTool) { 26 | throw new Error("No tool available"); 27 | } 28 | 29 | const result = await client.callTool({ 30 | name: theTool.name, 31 | arguments: { 32 | code: 'syntax error' 33 | } 34 | }); 35 | 36 | console.log("Tool result:", result.content); 37 | 38 | // exit the process 39 | process.exit(0); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 TypeFox 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /packages/langium-ai-mcp/tests/mcp-server.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from 'vitest'; 2 | 3 | import { validateLangiumCode } from '../src/mcp-server'; 4 | 5 | describe('validateLangiumCode', () => { 6 | 7 | 8 | it('should return undefined for valid grammar code', async () => { 9 | const validCode = ` 10 | grammar HelloWorld 11 | 12 | entry Model: persons+=Person*; 13 | Person: 'person' name=ID; 14 | hidden terminal WS: /\\s+/; 15 | terminal ID: /[_a-zA-Z][\\w_]*/; 16 | `; 17 | 18 | const result = await validateLangiumCode(validCode); 19 | expect(result).toBeUndefined(); 20 | }); 21 | 22 | it('should return diagnostics for invalid grammar code', async () => { 23 | const invalidCode = ` 24 | grammar HelloWorld 25 | entry Model: persons+=Person*; 26 | `; 27 | 28 | const result = await validateLangiumCode(invalidCode); 29 | expect(result).toBeDefined(); 30 | expect(result).toContain("Error: Could not resolve reference to AbstractRule named 'Person'. at line 3, column 35"); 31 | }); 32 | }); 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Langium AI 2 | 3 | ## Overview 4 | 5 | Langium AI is a suite of tools that makes it easier to build AI applications for Langium DSLs, by providing helpful tools to leverage your existing Langium DSL services, and to integrate LLMs tightly with your Langium DSL. In doing so, existing or new Langium DSLs can benefit from first-class LLM support just as well as other common programming languages. 6 | 7 | You can read more about Langium AI here as well, [Langium AI: The fusion of DSLs and LLMs](https://typefox.io/blog/langium-ai-the-fusion-of-dsls-and-llms/). 8 | 9 | In particular, Langium AI helps to solve the following problems: 10 | 11 | - Selecting good models with regards to your DSL 12 | - Evaluating DSL output from an LLM 13 | - Processing DSL programs as data, while respecting the structure of your DSL 14 | - Developing good natural language interfaces for DSLs 15 | 16 | This is achieved by using a two pronged approach. 17 | 18 | - [Core tools](/packages/langium-ai-tools/README.md): which leverage Langium's service set to help you build, evaluate, and trust your AI applications. 19 | - _Coming soon_: AI-based tools which leverage LLMs directly in tandem with your DSL. 20 | 21 | For more information you can refer to the [langium-ai-tools](/packages/langium-ai-tools/README.md) package, which leverages much of your DSL's existing Langium services to make it easier to evaluate your application, as well as to process DSL documents. 22 | -------------------------------------------------------------------------------- /packages/langium-ai-mcp/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "langium-ai-mcp", 3 | "version": "0.0.2", 4 | "displayName": "Langium AI - MCP", 5 | "publisher": "TypeFox", 6 | "description": "MCP server for Langium AI", 7 | "repository": { 8 | "type": "git", 9 | "url": "git+https://github.com/eclipse-langium/langium-ai.git", 10 | "directory": "packages/langium-ai-mcp" 11 | }, 12 | "bugs": "https://github.com/eclipse-langium/langium-ai/issues", 13 | "type": "module", 14 | "main": "dist/mcp-server.js", 15 | "private": false, 16 | "files": [ 17 | "dist" 18 | ], 19 | "scripts": { 20 | "clean": "rm -rf ./dist", 21 | "build": "npm run clean && tsc", 22 | "watch": "tsc -w", 23 | "start": "node .", 24 | "cstart": "node ./dist/mcp-client.js", 25 | "prepare": "npm run build", 26 | "test": "vitest run" 27 | }, 28 | "author": { 29 | "name": "TypeFox", 30 | "url": "https://www.typefox.io" 31 | }, 32 | "keywords": [ 33 | "langium", 34 | "ai", 35 | "mcp", 36 | "server", 37 | "llm" 38 | ], 39 | "license": "MIT", 40 | "dependencies": { 41 | "@modelcontextprotocol/sdk": "^1.17.4", 42 | "langium-ai-tools": "0.0.2" 43 | }, 44 | "volta": { 45 | "node": "20.10.0", 46 | "npm": "10.2.3" 47 | }, 48 | "devDependencies": { 49 | } 50 | } -------------------------------------------------------------------------------- /packages/langium-ai-tools/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "langium-ai-tools", 3 | "version": "4.1.3", 4 | "description": "Tooling for building AI Applications that leverage Langium DSLs", 5 | "repository": { 6 | "type": "git", 7 | "url": "git+https://github.com/eclipse-langium/langium-ai.git", 8 | "directory": "packages/langium-ai-tools" 9 | }, 10 | "bugs": "https://github.com/eclipse-langium/langium-ai/issues", 11 | "type": "module", 12 | "main": "dist/index.js", 13 | "private": false, 14 | "files": [ 15 | "dist" 16 | ], 17 | "exports": { 18 | ".": { 19 | "import": "./dist/index.js", 20 | "types": "./dist/index.d.ts" 21 | }, 22 | "./splitter": { 23 | "import": "./dist/splitter/index.js", 24 | "types": "./dist/splitter/index.d.ts" 25 | }, 26 | "./evaluator": { 27 | "import": "./dist/evaluator/index.js", 28 | "types": "./dist/evaluator/index.d.ts" 29 | } 30 | }, 31 | "scripts": { 32 | "build": "tsc", 33 | "watch": "tsc -w", 34 | "test": "vitest run", 35 | "clean": "rimraf ./dist" 36 | }, 37 | "author": { 38 | "name": "TypeFox", 39 | "url": "https://www.typefox.io" 40 | }, 41 | "keywords": [ 42 | "langium", 43 | "ai", 44 | "tools", 45 | "llm" 46 | ], 47 | "license": "MIT", 48 | "dependencies": { 49 | "langium": "~4.1.0", 50 | "levenshtein-edit-distance": "^3.0.1" 51 | }, 52 | "volta": { 53 | "node": "20.10.0", 54 | "npm": "10.2.3" 55 | }, 56 | "devDependencies": {} 57 | } 58 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/README.md: -------------------------------------------------------------------------------- 1 | # Example DSL Evaluator 2 | 3 | A concrete usage of langium-ai-tools to help evaluate output from an LLM (and its related setup) in the context of the example dsl in this project. 4 | 5 | ## Installation 6 | 7 | Install via npm to setup the project & build 8 | 9 | ```sh 10 | npm install 11 | npm run build 12 | ``` 13 | 14 | This demo needs **Ollama** installed & running, with the following models pulled (but feel free to adjust these to your needs): 15 | 16 | ```sh 17 | # chat + code gen models 18 | ollama pull codellama 19 | ollama pull llama3.2 20 | ollama pull codegemma 21 | 22 | # embedding model 23 | ollama pull mxbai-embed-large 24 | ``` 25 | 26 | ## Building 27 | 28 | To build the project, run the following command: 29 | 30 | ```sh 31 | npm run build 32 | ``` 33 | 34 | ## Running 35 | 36 | You can run an example evaluation with `npm run demo`. This will run a pre-defined validation suite for an example (shown at LangDev 24'), and open up the generated radar chart report. 37 | 38 | You can also run specific evaluations with the following commands: 39 | 40 | ```sh 41 | # runs the langdev evaluation example 42 | npm run start -- run-langdev 43 | ``` 44 | 45 | If you want to run a more comprehensive langium example with & without RAG, you can run: 46 | 47 | ```sh 48 | # make sure to first build embeddings in the example-dsl-splitter project! 49 | # these will be utilized in this phase 50 | 51 | # additionally make sure to pull down llama3.1 for this one 52 | ollama pull llama3.1 53 | 54 | npm run start -- run-langium 55 | ``` 56 | 57 | And lastly you can skip right to generating a radar chart report from the last results (which is automatically generated at the end of each of these runs): 58 | 59 | ```sh 60 | npm run start -- report 61 | ``` 62 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/example-splitter.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { NodeFileSystem } from "langium/node"; 8 | import { splitByNode } from "langium-ai-tools/splitter"; 9 | import { createLangiumGrammarServices } from "langium/grammar"; 10 | import { LangiumServices } from "langium/lsp"; 11 | 12 | /** 13 | * An example of utilizing the splitter in Langium AI 14 | */ 15 | 16 | export function runSplitterExample() { 17 | const exampleLangiumDoc = ` 18 | grammar Test 19 | 20 | entry Model: A | B | C | D | E; 21 | 22 | A: 'A' ID; 23 | 24 | /** 25 | * Info about B (one line above) 26 | */ 27 | 28 | B: 'B' ID; 29 | 30 | /** 31 | * Info about C 32 | */ 33 | C: 'C' ID; 34 | 35 | 36 | // info about D (one line above) 37 | 38 | D: 'D' ID; 39 | 40 | // info about E 41 | E: 'E' ID; 42 | 43 | hidden terminal WS: /\s+/; 44 | terminal ID: /[_a-zA-Z][\w_]*/; 45 | `; 46 | 47 | const langiumServices = createLangiumGrammarServices(NodeFileSystem); 48 | 49 | // split by ParserRule (w/ comments included) 50 | const splits = splitByNode( 51 | exampleLangiumDoc, 52 | [(node) => node.$type === "ParserRule"], 53 | langiumServices.grammar, 54 | ); 55 | 56 | console.log("Split by ParserRule w/ comments:"); 57 | console.dir(splits); 58 | 59 | // split by ParserRule (w/ comments excluded) 60 | const splitsNoComments = splitByNode( 61 | exampleLangiumDoc, 62 | [(node) => node.$type === "ParserRule"], 63 | langiumServices.grammar, 64 | { commentRuleNames: [] }, 65 | ); 66 | 67 | console.log("Split by ParserRule without comments:"); 68 | console.dir(splitsNoComments); 69 | } 70 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/index.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { 8 | runLangDevDemo, 9 | generateChartFromLastResults, 10 | } from "./eval-langdev.js"; 11 | import { runLangiumEvals } from "./eval-langium.js"; 12 | import { runExampleProgramMap } from "./example-program-map.js"; 13 | import { runSplitterExample } from "./example-splitter.js"; 14 | 15 | function printHelp() { 16 | console.log( 17 | "Usage: node dist/index.js [run-langium|run-langdev|report|server|help]", 18 | ); 19 | console.log(" run-langium: Run Langium evaluations"); 20 | console.log(" run-langdev: Run LangDev evaluations"); 21 | console.log(" report: Generate a report from the last results"); 22 | console.log(" splitter: Run the splitter example"); 23 | console.log(" program-map: Generate a program map"); 24 | console.log(" help: Show this help message"); 25 | } 26 | 27 | async function main() { 28 | const args = process.argv.slice(2); 29 | if (args.length === 0) { 30 | printHelp(); 31 | return; 32 | } 33 | 34 | const command = args[0]; 35 | 36 | switch (command) { 37 | case "run-langium": 38 | await runLangiumEvals(); 39 | generateChartFromLastResults(); 40 | break; 41 | case "run-langdev": 42 | await runLangDevDemo(); 43 | generateChartFromLastResults(); 44 | break; 45 | case "report": 46 | generateChartFromLastResults(); 47 | break; 48 | case "splitter": 49 | runSplitterExample(); 50 | break; 51 | case "program-map": 52 | runExampleProgramMap(); 53 | break; 54 | case "help": 55 | default: 56 | printHelp(); 57 | break; 58 | } 59 | } 60 | 61 | main(); 62 | 63 | -------------------------------------------------------------------------------- /packages/langium-ai-mcp/src/mcp-server.ts: -------------------------------------------------------------------------------- 1 | import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; 2 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 3 | import { LangiumEvaluator, type LangiumEvaluatorResultData } from 'langium-ai-tools'; 4 | import { createLangiumGrammarServices } from 'langium/grammar'; 5 | 6 | import { NodeFileSystem } from 'langium/node'; 7 | import { z } from 'zod'; 8 | 9 | const server = new McpServer({ 10 | name: 'langium-mpc-server', 11 | version: '1.0.0' 12 | }); 13 | 14 | server.registerTool('langium-syntax-checker', 15 | { 16 | title: 'Langium Evaluator Tool', 17 | description: 'Checks Langium code for errors', 18 | inputSchema: { code: z.string() } 19 | }, 20 | async ({ code }) => { 21 | const validationResult = await validateLangiumCode(code); 22 | return { 23 | content: [ 24 | { 25 | type: 'text', 26 | text: validationResult ?? 'The provided Langium code has no issues.' 27 | } 28 | ] 29 | } 30 | } 31 | ); 32 | 33 | export const langiumEvaluator = new LangiumEvaluator(createLangiumGrammarServices(NodeFileSystem).grammar); 34 | 35 | export async function validateLangiumCode(code: string): Promise { 36 | const evalResult = await langiumEvaluator.evaluate(code); 37 | if (evalResult.data) { 38 | const langiumData = evalResult.data as LangiumEvaluatorResultData; 39 | if (langiumData.diagnostics.length > 0) { 40 | return langiumData.diagnostics.map(d => 41 | `${asText(d.severity)}: ${d.message} at line ${d.range.start.line + 1}, column ${d.range.start.character + 1}` 42 | ).join('\n'); 43 | } 44 | } 45 | return undefined; 46 | } 47 | 48 | function asText(severity: number | undefined): string { 49 | 50 | switch (severity) { 51 | case 1: return 'Error'; 52 | case 2: return 'Warning'; 53 | case 3: return 'Information'; 54 | case 4: return 'Hint'; 55 | default: return 'Unknown'; 56 | } 57 | } 58 | 59 | const transport = new StdioServerTransport(); 60 | await server.connect(transport); 61 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/splitter/program-map.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { AstNode } from "langium"; 8 | import { LangiumServices } from "langium/lsp"; 9 | import { splitByNodeToAst } from "./splitter.js"; 10 | 11 | interface MappingRule { 12 | /** 13 | * Determines which nodes to map with this rule 14 | */ 15 | predicate: (node: AstNode) => boolean; 16 | 17 | /** 18 | * Determines how to map the node's textual split 19 | */ 20 | map: (node: AstNode) => string; 21 | } 22 | 23 | interface ProgramMapOptions { 24 | /** 25 | * List of mapping rules to apply to the document. 26 | * Each rule is a predicate that determines which nodes to map & how to map them 27 | */ 28 | mappingRules: MappingRule[] 29 | } 30 | 31 | /** 32 | * Mapper generates a repo-map like structure from a given Langium document. 33 | * Leverages the splitter to produce a mapping from split chunks. 34 | */ 35 | export class ProgramMapper { 36 | 37 | private services: LangiumServices; 38 | private options: ProgramMapOptions; 39 | 40 | constructor(services: LangiumServices, options: ProgramMapOptions) { 41 | this.services = services; 42 | this.options = options; 43 | } 44 | 45 | /** 46 | * Produces a map from the given document 47 | * @param document - The text document to be mapped. 48 | * @returns The mapped document as a list of strings, one for each mapped element 49 | */ 50 | public map(document: string): string[] { 51 | const mappingRules = this.options.mappingRules; 52 | const mapChunks: string[] = []; 53 | 54 | // get all predicates 55 | const predicates = mappingRules.map(rule => rule.predicate); 56 | 57 | const nodes = splitByNodeToAst(document, predicates, this.services); 58 | 59 | for (const node of nodes) { 60 | // apply the mapping rule to each node 61 | for (const rule of mappingRules) { 62 | if (rule.predicate(node)) { 63 | mapChunks.push(rule.map(node)); 64 | } 65 | } 66 | } 67 | 68 | return mapChunks; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/embedding-evaluator.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | /** 8 | * Simple evaluator that computes the embedding for two strings, and returns the cosine similarity 9 | */ 10 | 11 | import { Evaluator, EvaluatorResult } from "langium-ai-tools/evaluator"; 12 | import ollama from 'ollama'; 13 | 14 | export interface EmbeddingEvaluatorResultData extends EvaluatorResult { 15 | similarity: number; 16 | } 17 | 18 | /** 19 | * Computes embeddings by leveraging embedding models from Ollama 20 | */ 21 | export class OllamaEmbeddingEvaluator extends Evaluator { 22 | private embeddingModel: string; 23 | 24 | constructor(model: string) { 25 | super(); 26 | this.embeddingModel = model; 27 | } 28 | 29 | /** 30 | * Evaluate the similarity between two strings by comparing their embeddings 31 | */ 32 | async evaluate(response: string, expected_response: string): Promise> { 33 | // compute the embedding for both strings 34 | const responseEmbedding = (await this.computeEmbedding(response)); 35 | const expectedEmbedding = (await this.computeEmbedding(expected_response)); 36 | 37 | // compute the cosine similarity between the two embeddings 38 | const similarity = this.cosineSimilarity(responseEmbedding, expectedEmbedding); 39 | 40 | return { 41 | data: { 42 | similarity 43 | } 44 | }; 45 | } 46 | 47 | /** 48 | * Computes the embedding for a given text 49 | * @returns 50 | */ 51 | private async computeEmbedding(text: string): Promise { 52 | return (await ollama.embed({ 53 | model: this.embeddingModel, 54 | input: [text], 55 | keep_alive: 30 56 | })).embeddings[0]; 57 | } 58 | 59 | /** 60 | * Compute the cosine similarity between two vectors 61 | */ 62 | private cosineSimilarity(a: number[], b: number[]): number { 63 | const dotProduct = a.reduce((acc, val, i) => acc + val * b[i], 0); 64 | 65 | const aMagnitude = Math.sqrt(a.reduce((acc, val) => acc + val * val, 0)); 66 | const bMagnitude = Math.sqrt(b.reduce((acc, val) => acc + val * val, 0)); 67 | 68 | if (aMagnitude === 0 || bMagnitude === 0) { 69 | return 0; 70 | } 71 | 72 | return dotProduct / (aMagnitude * bMagnitude); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | 8 | { 9 | "type": "node", 10 | "request": "launch", 11 | "name": "Launch Example: Splitter", 12 | "skipFiles": [ 13 | "/**" 14 | ], 15 | "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts", 16 | "args": [ 17 | "splitter" 18 | ], 19 | "outFiles": [ 20 | "${workspaceFolder}/**/*.js" 21 | ] 22 | }, 23 | { 24 | "type": "node", 25 | "request": "launch", 26 | "name": "Launch Example: Program Map", 27 | "skipFiles": [ 28 | "/**" 29 | ], 30 | "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts", 31 | "args": [ 32 | "program-map" 33 | ], 34 | "outFiles": [ 35 | "${workspaceFolder}/**/*.js" 36 | ] 37 | }, 38 | { 39 | "type": "node", 40 | "request": "launch", 41 | "name": "Launch Example: LangDev Demo (with Ollama)", 42 | "skipFiles": [ 43 | "/**" 44 | ], 45 | "args": [ 46 | "run-langdev" 47 | ], 48 | "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts", 49 | "outFiles": [ 50 | "${workspaceFolder}/**/*.js" 51 | ] 52 | }, 53 | { 54 | "type": "node", 55 | "request": "launch", 56 | "name": "Launch Example: Langium Demo (with Ollama)", 57 | "skipFiles": [ 58 | "/**" 59 | ], 60 | "program": "${workspaceFolder}/packages/examples/example-dsl-evaluator/src/index.ts", 61 | "outFiles": [ 62 | "${workspaceFolder}/**/*.js" 63 | ] 64 | }, 65 | { 66 | "type": "node", 67 | "request": "launch", 68 | "name": "Run Current Test File", 69 | "skipFiles": [ 70 | "/**" 71 | ], 72 | "program": "${workspaceFolder}/node_modules/vitest/vitest.mjs", 73 | "args": [ 74 | "run", 75 | "${relativeFile}" 76 | ], 77 | "cwd": "${workspaceFolder}", 78 | "console": "integratedTerminal" 79 | } 80 | ] 81 | } -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/evaluator/document-evaluator.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | /** 8 | * Langium Document Evaluator (evaluates on a Langium document) 9 | */ 10 | 11 | import { LangiumDocument, URI } from "langium"; 12 | import { LangiumServices } from "langium/lsp"; 13 | import { Evaluator, EvaluatorResult, EvaluatorResultData } from "./evaluator.js"; 14 | 15 | 16 | export abstract class AbstractDocumentEvaluator extends Evaluator { 17 | 18 | /** 19 | * Services to use for evaluation 20 | */ 21 | protected services: T; 22 | 23 | constructor(services: T) { 24 | super(); 25 | this.services = services; 26 | } 27 | 28 | /** 29 | * Validate an agent response as if it's a langium program. If we can parse it, we attempt to validate it. 30 | */ 31 | async evaluate(input: string, fileExtension: string | undefined = undefined): Promise>> { 32 | 33 | if (input.includes('```')) { 34 | // take the first code block instead, if present (assuming it's a langium grammar) 35 | const codeBlock = input.split(/```[a-z-]*/)[1]; 36 | input = codeBlock; 37 | } 38 | const fileExt = fileExtension ? fileExtension : this.services.LanguageMetaData.fileExtensions[0]; 39 | const doc = this.services.shared.workspace.LangiumDocumentFactory.fromString(input, URI.parse(`memory:/test.${fileExt}`)); 40 | const context: EvaluationContext = { 41 | input: input 42 | }; 43 | try { 44 | await this.services.shared.workspace.DocumentBuilder.build([doc], { validation: true }); 45 | return this.evaluateDocument(doc, context); 46 | } catch (e) { 47 | return this.handleBuildError(e, context) 48 | } 49 | } 50 | 51 | abstract evaluateDocument(doc: LangiumDocument, ctx: EvaluationContext): Partial>; 52 | 53 | 54 | protected handleBuildError(e: unknown, ctx: EvaluationContext): Partial> { 55 | console.error('Error during evaluation: ', e); 56 | return { 57 | name: this.constructor.name, 58 | data: { 59 | failures: 1 60 | } as unknown as RD 61 | }; 62 | } 63 | } 64 | 65 | export type FailureAwarenessData = EvaluatorResultData & { 66 | /** 67 | * Number of validation failures 68 | */ 69 | failures: number; 70 | }; 71 | 72 | export type EvaluationContext = { 73 | input: string; 74 | }; -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/example-program-map.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { ProgramMapper } from "langium-ai-tools"; 8 | import { createLangiumGrammarServices } from "langium/grammar"; 9 | import { NodeFileSystem } from "langium/node"; 10 | 11 | export function runExampleProgramMap() { 12 | 13 | // simple langium grammar, as an example 14 | const exampleLangiumDoc = ` 15 | grammar Test 16 | 17 | entry Model: A | B | C | D | E; 18 | 19 | A: 'A' ID; 20 | 21 | /** 22 | * Info about B (one line above) 23 | */ 24 | 25 | B: 'B' ID; 26 | 27 | /** 28 | * Info about C 29 | */ 30 | C: 'C' ID; 31 | 32 | 33 | // info about D (one line above) 34 | 35 | D: 'D' ID; 36 | 37 | // info about E 38 | E: 'E' ID; 39 | 40 | // Datatype rule 41 | DT returns string: ID; 42 | 43 | hidden terminal WS: /\s+/; 44 | terminal ID: /[_a-zA-Z][\w_]*/; 45 | `; 46 | 47 | // setup your language services (langium's, in this case) 48 | const langiumServices = createLangiumGrammarServices(NodeFileSystem); 49 | const grammarServices = langiumServices.grammar; 50 | 51 | // instantiate 52 | const mapper = new ProgramMapper(grammarServices, { 53 | mappingRules: [ 54 | { 55 | predicate: (node) => node.$type === 'ParserRule', 56 | map: (node: any) => { 57 | const ruleName = node.name; 58 | const modifiers = [ 59 | node.entry ? 'entry' : undefined, 60 | node.fragment ? 'fragment' : undefined, 61 | node.definesHiddenTokens ? 'hidden' : undefined, 62 | node.dataType ? 'datatype' : undefined 63 | ].filter(v => v !== undefined); 64 | const modifierString = modifiers.length > 0 ? `(${modifiers.join(', ')}) ` : ''; 65 | return `${modifierString}rule ${ruleName}`; 66 | } 67 | }, 68 | { 69 | predicate: (node) => node.$type === 'TerminalRule', 70 | map: (node: any) => { 71 | const modifiers = [ 72 | node.fragment ? 'fragment' : undefined, 73 | node.hidden ? 'hidden' : undefined, 74 | ].filter(v => v !== undefined); 75 | const modifierString = modifiers.length > 0 ? `(${modifiers.join(', ')}) ` : ''; 76 | return `${modifierString}terminal ${node.name}`; 77 | } 78 | } 79 | ] 80 | }); 81 | const programMap = mapper.map(exampleLangiumDoc); 82 | console.log('Program Map Output:'); 83 | console.log(programMap.join('\n')); 84 | } 85 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/evaluator/langium-evaluator.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | /** 8 | * Base Langium DSL validator (taps into Langium's validator messages to provide better results) 9 | */ 10 | 11 | import { LangiumDocument } from "langium"; 12 | import { LangiumServices } from "langium/lsp"; 13 | import { Diagnostic } from "vscode-languageserver-types"; 14 | import { AbstractDocumentEvaluator, EvaluationContext, FailureAwarenessData } from "./document-evaluator.js"; 15 | import { EvaluatorResult, EvaluatorResultData } from "./evaluator.js"; 16 | 17 | /** 18 | * Langium-specific evaluator result data 19 | */ 20 | export interface LangiumEvaluatorResultData extends FailureAwarenessData { 21 | 22 | /** 23 | * Number of errors 24 | */ 25 | errors: number; 26 | 27 | /** 28 | * Number of warnings 29 | */ 30 | warnings: number; 31 | 32 | /** 33 | * Number of infos 34 | */ 35 | infos: number; 36 | 37 | /** 38 | * Number of hints 39 | */ 40 | hints: number; 41 | 42 | /** 43 | * Number of unassigned diagnostics 44 | */ 45 | unassigned: number; 46 | 47 | /** 48 | * Length of the response in chars 49 | */ 50 | response_length: number; 51 | 52 | /** 53 | * Raw diagnostic data, same which is used to compute the other values above 54 | */ 55 | diagnostics: Diagnostic[]; 56 | } 57 | 58 | export class LangiumEvaluator extends AbstractDocumentEvaluator { 59 | 60 | 61 | /** 62 | * Validate an agent response as if it's a langium program. If we can parse it, we attempt to validate it. 63 | */ 64 | evaluateDocument(doc: LangiumDocument, ctx: EvaluationContext): EvaluatorResult { 65 | 66 | const validationResults = doc.diagnostics ?? []; 67 | 68 | const evalData: LangiumEvaluatorResultData = this.createEmptyResultData(); 69 | // include length of the response for checking 70 | evalData.response_length = ctx.input.length; 71 | // include the diagnostics for debugging if desired 72 | evalData.diagnostics = validationResults; 73 | 74 | 75 | for (const diagnostic of validationResults) { 76 | if (diagnostic.severity) { 77 | switch (diagnostic.severity) { 78 | case 1: 79 | evalData.errors++; 80 | break; 81 | case 2: 82 | evalData.warnings++; 83 | break; 84 | case 3: 85 | evalData.infos++; 86 | break; 87 | case 4: 88 | evalData.hints++; 89 | break; 90 | default: 91 | evalData.unassigned++; 92 | break; 93 | } 94 | } 95 | } 96 | 97 | return { 98 | name: this.constructor.name, 99 | metadata: {}, 100 | data: evalData 101 | }; 102 | } 103 | 104 | protected createEmptyResultData(): LangiumEvaluatorResultData { 105 | return { 106 | failures: 0, 107 | errors: 0, 108 | warnings: 0, 109 | infos: 0, 110 | hints: 0, 111 | unassigned: 0, 112 | response_length: 0, 113 | diagnostics: [] 114 | }; 115 | } 116 | } -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/helloworld-cases.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { Case } from "langium-ai-tools"; 8 | 9 | export const cases: Case[] = [ 10 | { 11 | name: 'No Prompt', 12 | prompt: 'Generate a simple hello world grammar written in Langium', 13 | context: [], 14 | expected_response: ` 15 | Here's a simple Hello World written in Langium: 16 | 17 | \`\`\`langium 18 | grammar HelloWorld 19 | 20 | entry Model: 21 | (persons+=Person | greetings+=Greeting)*; 22 | 23 | Person: 24 | 'person' name=ID; 25 | 26 | Greeting: 27 | 'Hello' person=[Person:ID] '!'; 28 | 29 | hidden terminal WS: /\s+/; 30 | terminal ID: /[_a-zA-Z][\w_]*/; 31 | 32 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 33 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 34 | \`\`\` 35 | ` 36 | }, 37 | { 38 | name: 'With Langium System Prompt', 39 | history: [{ 40 | role: 'system', 41 | content: `You are an expert in Langium grammars. 42 | Your role is to provide accurate, detailed, and insightful explanations and advice on Langium grammar-related topics. 43 | You will answer questions, provide examples, and offer guidance on best practices, ensuring clarity and precision in all responses. 44 | ` 45 | }], 46 | context: [], 47 | prompt: `Generate a simple hello world program in the Hello World language.`, 48 | expected_response: ` 49 | Here's a simple Hello World written in Langium: 50 | 51 | \`\`\`langium 52 | grammar HelloWorld 53 | 54 | entry Model: 55 | (persons+=Person | greetings+=Greeting)*; 56 | 57 | Person: 58 | 'person' name=ID; 59 | 60 | Greeting: 61 | 'Hello' person=[Person:ID] '!'; 62 | 63 | hidden terminal WS: /\s+/; 64 | terminal ID: /[_a-zA-Z][\w_]*/; 65 | 66 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 67 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 68 | \`\`\` 69 | ` 70 | }, 71 | { 72 | name: 'With Hello World Grammar & Prompt', 73 | history: [{ 74 | role: 'system', 75 | content: `You are an expert in the Hello World language, which is written in Langium. 76 | When prompted you will provide a simple hello world program. 77 | For reference, here's the Langium grammar that defines the Hello World language: 78 | \`\`\`langium 79 | grammar HelloWorld 80 | 81 | entry Model: 82 | (persons+=Person | greetings+=Greeting)*; 83 | 84 | Person: 85 | 'person' name=ID; 86 | 87 | Greeting: 88 | 'Hello' person=[Person:ID] '!'; 89 | 90 | hidden terminal WS: /\s+/; 91 | terminal ID: /[_a-zA-Z][\w_]*/; 92 | terminal INT returns number: /[0-9]+/; 93 | terminal STRING: /"(\\.|[^"\\])*"|'(\\.|[^'\\])*'/; 94 | 95 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 96 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 97 | \`\`\` 98 | 99 | And here is a simple Hello World program written in that grammar: 100 | \`\`\`hello-world 101 | person Alice 102 | person Bob 103 | 104 | Hello Alice! 105 | Hello Bob! 106 | \`\`\`` 107 | }], 108 | prompt: 'Generate a simple program for the Hello World language', 109 | context: [], 110 | expected_response: ` 111 | person John 112 | person Carly 113 | 114 | Hello Carly! 115 | Hello John!` 116 | }, 117 | { 118 | name: 'With Improved Hello World Prompt', 119 | history: [{ 120 | role: 'system', 121 | content: `You are an expert in the Hello World language, which is written in Langium. 122 | When prompted you will provide a simple hello world program. 123 | For reference, here's the Langium grammar that defines the Hello World language: 124 | \`\`\`langium 125 | // name of the grammar 126 | grammar HelloWorld 127 | 128 | // entry point for the AST model 129 | // composed of a list of persons and greetings, in any order 130 | entry Model: 131 | (persons+=Person | greetings+=Greeting)*; 132 | 133 | // declare a person with a given name 134 | Person: 135 | 'person' name=ID; 136 | 137 | // greet a previously defined person (by name), ending with an exclamation mark 138 | Greeting: 139 | 'Hello' person=[Person:ID] '!'; 140 | 141 | // whitespace is ignored in this language 142 | hidden terminal WS: /\s+/; 143 | 144 | // regex for ID terminals 145 | terminal ID: /[_a-zA-Z][\w_]*/; 146 | 147 | // regex for multi-line & single line comments 148 | // however we do not support nested comments 149 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 150 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 151 | \`\`\` 152 | 153 | And here is a simple Hello World program written in that grammar: 154 | \`\`\`hello-world 155 | // declare two persons, Alice & Bob 156 | person Alice 157 | person Bob 158 | 159 | // greet Alice & Bob 160 | Hello Alice! 161 | Hello Bob! 162 | 163 | // if we wanted to greet John & Carly, we would write: 164 | person John 165 | person Carly 166 | 167 | Hello John! 168 | Hello Carly! 169 | \`\`\` 170 | `}], 171 | prompt: 'Generate a simple program for the Hello World language', 172 | context: [], 173 | expected_response: ` 174 | person John 175 | person Carly 176 | 177 | Hello Carly! 178 | Hello John!` 179 | } 180 | ]; -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/splitter/splitter.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { AstNode, CstUtils, LangiumDocument, URI } from "langium"; 8 | import { LangiumServices } from "langium/lsp"; 9 | import { AstUtils } from 'langium'; 10 | 11 | interface SplitterOptions { 12 | /** 13 | * List of comment rule names to include in the chunk. 14 | * If not provided comments are ignored. 15 | * Default: ['ML_COMMENT', 'SL_COMMENT'] 16 | */ 17 | commentRuleNames?: string[] 18 | } 19 | 20 | /** 21 | * Helper function to parse a document string into a LangiumDocument object 22 | * @param document String to be parsed 23 | * @param services Associated Langium services for parsing 24 | * @returns The parsed LangiumDocument or undefined if there were errors 25 | */ 26 | function parseDocument(document: string, services: LangiumServices): LangiumDocument | undefined { 27 | const langiumDoc = services.shared.workspace.LangiumDocumentFactory.fromString(document, URI.parse('memory://document.langium')); 28 | if (langiumDoc.parseResult.lexerErrors.length > 0) { 29 | console.error('Lexer errors:', langiumDoc.parseResult.lexerErrors); 30 | return undefined; 31 | } 32 | if (langiumDoc.parseResult.parserErrors.length > 0) { 33 | console.error('Parser errors:', langiumDoc.parseResult.parserErrors); 34 | return undefined; 35 | } 36 | return langiumDoc; 37 | } 38 | 39 | /** 40 | * Extracts matching AST nodes from a document based on provided predicates. 41 | * This function is used as a shared utility for splitting documents into AST nodes or text chunks. 42 | * @param document - The text document to be processed. 43 | * @param nodePredicates - The predicates to determine the nodes for matching. 44 | * @param services - The Langium services used for parsing the document. 45 | * @returns The matching AST nodes. 46 | */ 47 | function getMatchingAstNodes( 48 | document: string, 49 | nodePredicates: Array<(node: AstNode) => boolean> | ((node: AstNode) => boolean), 50 | services: LangiumServices 51 | ): AstNode[] { 52 | if (document.trim() === '') { 53 | return []; 54 | } 55 | 56 | const langiumDoc = parseDocument(document, services); 57 | if (!langiumDoc) { 58 | console.error('Failed to parse document'); 59 | return []; 60 | } 61 | 62 | const astNodes: AstNode[] = []; 63 | 64 | const predicates = Array.isArray(nodePredicates) ? nodePredicates : [nodePredicates]; 65 | 66 | // Stream nodes from the AST and filter them based on the predicates 67 | const stream = AstUtils.streamAst(langiumDoc.parseResult.value); 68 | for (const node of stream) { 69 | if (predicates.some(p => p(node))) { 70 | astNodes.push(node); 71 | } 72 | } 73 | return astNodes; 74 | } 75 | 76 | /** 77 | * Splits a document into text chunks based on AST nodes that match the provided predicates. 78 | * Finds the relevant nodes and then extracts corresponding text chunks from them 79 | * @param document - Document to split 80 | * @param nodePredicates - Predicates to determine the nodes for splitting 81 | * @param services - Langium grammar services used for parsing 82 | * @param options - The splitter configuration. See {@link SplitterOptions} 83 | * @returns Decoded text chunks from the document 84 | */ 85 | export function splitByNode( 86 | document: string, 87 | nodePredicates: Array<(node: AstNode) => boolean> | ((node: AstNode) => boolean), 88 | services: LangiumServices, 89 | options: SplitterOptions = { commentRuleNames: ['ML_COMMENT', 'SL_COMMENT'] } 90 | ): string[] { 91 | const astNodes = getMatchingAstNodes(document, nodePredicates, services); 92 | 93 | const langiumDoc = parseDocument(document, services); 94 | if (!langiumDoc) { 95 | console.error('Failed to parse document'); 96 | return []; 97 | } 98 | 99 | const txtDoc = langiumDoc.textDocument; 100 | const chunks: string[] = []; 101 | 102 | for (const node of astNodes) { 103 | let start = node.$cstNode?.range.start; 104 | 105 | if (options?.commentRuleNames) { 106 | const cstNode = node.$cstNode; 107 | const commentNode = CstUtils.findCommentNode(cstNode, options.commentRuleNames); 108 | if (commentNode) { 109 | start = commentNode.range.start; 110 | } 111 | } 112 | 113 | const end = node.$cstNode?.range.end; 114 | const chunk = txtDoc.getText({ 115 | start: { 116 | line: start?.line || 0, 117 | character: start?.character || 0 118 | }, 119 | end: { 120 | line: end?.line || 0, 121 | character: end?.character || 0 122 | } 123 | }); 124 | 125 | if (chunk.trim().length > 0) { 126 | chunks.push(chunk); 127 | } 128 | } 129 | return chunks; 130 | } 131 | 132 | /** 133 | * Splits a document into AST nodes based on the given predicates. 134 | * Directly returns the matched AST nodes 135 | * @param document - The text document to be split 136 | * @param nodePredicates - The predicates to determine which nodes to include 137 | * @param services - Langium grammar services used for parsing 138 | * @returns The nodes picked up by the splitter 139 | */ 140 | export function splitByNodeToAst( 141 | document: string, 142 | nodePredicates: Array<(node: AstNode) => boolean> | ((node: AstNode) => boolean), 143 | services: LangiumServices 144 | ): AstNode[] { 145 | return getMatchingAstNodes(document, nodePredicates, services); 146 | } 147 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/tests/document-analyzer.test.ts: -------------------------------------------------------------------------------- 1 | import { URI } from 'langium'; 2 | import { createServicesForGrammar } from 'langium/grammar'; 3 | import { describe, expect, it } from 'vitest'; 4 | import { AnalysisMode, LangiumDocumentAnalyzer } from '../src/analyzer/document-analyzer.js'; 5 | 6 | const domainModelServices = await createServicesForGrammar({ 7 | grammar: ` 8 | grammar DomainModel 9 | 10 | entry Domainmodel: 11 | (elements+=AbstractElement)*; 12 | 13 | AbstractElement: 14 | PackageDeclaration | Type; 15 | 16 | PackageDeclaration: 17 | 'package' name=QualifiedName '{' 18 | (elements+=AbstractElement)* 19 | '}'; 20 | 21 | Type: 22 | DataType | Entity; 23 | 24 | DataType: 25 | 'datatype' name=ID; 26 | 27 | Entity: 28 | 'entity' name=ID ('extends' superType=[+Entity:QualifiedName])? '{' 29 | (features+=Feature)* 30 | '}'; 31 | 32 | Feature: 33 | (many?='many')? name=ID ':' type=[Type:QualifiedName]; 34 | 35 | QualifiedName returns string: 36 | ID ('.' ID)*; 37 | 38 | hidden terminal WS: /\\s+/; 39 | terminal ID: /[_a-zA-Z][\\w_]*/; 40 | 41 | hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; 42 | hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; 43 | 44 | ` }); 45 | 46 | const docAnalyzer = new LangiumDocumentAnalyzer(domainModelServices); 47 | 48 | const exampleModel = `package foo.bar { 49 | datatype Complex 50 | entity E2 extends E1 { 51 | next: E2 52 | other: Complex 53 | nested: Complex 54 | time: Complex 55 | } 56 | }` 57 | 58 | function collectSyntaxUsageStatistics(model: string, analyzer = docAnalyzer) { 59 | const doc = domainModelServices.shared.workspace.LangiumDocumentFactory.fromString(model, URI.parse('memory:/test.txt')); 60 | return analyzer.collectSyntaxUsageStatistics(doc, domainModelServices.Grammar); 61 | } 62 | 63 | describe('LangiumDocumentAnalyzer', () => { 64 | 65 | it('should collect syntax usage statistics from string', async () => { 66 | const result = await docAnalyzer.evaluate('package foo.bar {}'); 67 | const statistics = docAnalyzer.extractStatisticsFromResult(result)!; 68 | 69 | expect(statistics).toBeDefined(); 70 | expect(statistics.ruleUsage['AbstractElement']).toBe(1); 71 | expect(statistics.ruleUsage['PackageDeclaration']).toBe(1); 72 | expect(statistics.ruleUsage['QualifiedName']).toBe(1); 73 | expect(statistics.ruleUsage['ID']).toBe(2); 74 | }); 75 | 76 | 77 | it('should compute coverage correctly', () => { 78 | const statistics = collectSyntaxUsageStatistics('package foo.bar {}'); 79 | expect(Object.values(statistics.ruleUsage).filter(count => count > 0).length, 'Used rules number').toBe(4); 80 | expect(statistics.coverage).toBeCloseTo(40.0, 1); 81 | }); 82 | 83 | it('should handle "includeHiddenRules" flag', () => { 84 | const model = 'package bar { /** Multi-line comment */ }'; 85 | 86 | const statistics = collectSyntaxUsageStatistics(model); 87 | expect(statistics.ruleUsage['ML_COMMENT']).toBe(1); 88 | 89 | const noHidden = new LangiumDocumentAnalyzer(domainModelServices, { 90 | includeHiddenRules: false 91 | }); 92 | const statisticsNoHidden = collectSyntaxUsageStatistics(model, noHidden); 93 | expect(statisticsNoHidden.ruleUsage['ML_COMMENT']).toBeUndefined(); 94 | }); 95 | 96 | 97 | it('should compute entropy correctly', () => { 98 | const statistics = collectSyntaxUsageStatistics(exampleModel); 99 | expect(statistics.diversity.entropy).toBeCloseTo(2.28, 1); 100 | }); 101 | 102 | it('should compute gini coefficient correctly', () => { 103 | const statistics = collectSyntaxUsageStatistics(exampleModel); 104 | expect(statistics.diversity.giniCoefficient).toBeCloseTo(0.65, 1); 105 | }); 106 | 107 | it('should compute simpson index correctly', () => { 108 | const statistics = collectSyntaxUsageStatistics(exampleModel); 109 | expect(statistics.diversity.simpsonIndex).toBeCloseTo(0.7, 1); 110 | }); 111 | 112 | it('should handle excluded rules', () => { 113 | const analyzerWithExcludedRules = new LangiumDocumentAnalyzer(domainModelServices, { 114 | excludeRules: ['Feature', 'DataType'] 115 | }); 116 | 117 | const testModel = 'package foo.bar { entity TestEntity { } }'; 118 | 119 | const stats = collectSyntaxUsageStatistics(testModel); 120 | const statsWithExclude = collectSyntaxUsageStatistics(testModel, analyzerWithExcludedRules); 121 | 122 | expect(stats.ruleUsage).toHaveProperty('DataType'); 123 | expect(stats.ruleUsage).toHaveProperty('Feature'); 124 | // Verify that excluded rules are not present in the statistics 125 | expect(statsWithExclude.ruleUsage).not.toHaveProperty('DataType'); 126 | expect(statsWithExclude.ruleUsage).not.toHaveProperty('Feature'); 127 | 128 | // Check stats with exclude still has all rules from original except excluded ones 129 | expect(Object.keys(statsWithExclude.ruleUsage).length).toBe(Object.keys(stats.ruleUsage).length - 2); 130 | 131 | // Less rules (unused) will result in higher coverage 132 | expect(statsWithExclude.coverage).toBeGreaterThan(stats.coverage); 133 | }); 134 | 135 | it('should handle empty documents', () => { 136 | const statistics = collectSyntaxUsageStatistics(''); 137 | expect(Object.keys(statistics.ruleUsage).length).toBeGreaterThan(0); 138 | expect(statistics.coverage).toBe(0); 139 | expect(statistics.diversity.entropy).toBe(0); 140 | expect(statistics.diversity.giniCoefficient).toBe(0); 141 | expect(statistics.diversity.simpsonIndex).toBe(0); 142 | }); 143 | 144 | it('should handle analysis mode NO_STATISTIC', async () => { 145 | const noStatAnalyzer = new LangiumDocumentAnalyzer(domainModelServices, { 146 | analysisMode: AnalysisMode.NO_STATISTIC 147 | }); 148 | const result = await noStatAnalyzer.evaluate('package foo.bar { entity TestEntity { } }'); 149 | const statistics = noStatAnalyzer.extractStatisticsFromResult(result); 150 | 151 | expect(statistics).toBeUndefined(); 152 | expect(result.data?.failures, 'Expected validation passed.').toBe(0); 153 | }); 154 | }); 155 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/eval-langium.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { config } from 'dotenv'; 8 | import { EmptyFileSystem } from 'langium'; 9 | import { averageAcrossCases, averageAcrossRunners, EditDistanceEvaluator, EditDistanceEvaluatorResultData, EvalMatrix, generateHistogram, generateHistoricalChart, generateRadarChart, LangiumEvaluator, LangiumEvaluatorResultData, loadLastResults, mergeEvaluators, normalizeData } from 'langium-ai-tools/evaluator'; 10 | import { createLangiumGrammarServices } from 'langium/grammar'; 11 | import { EmbeddingEvaluatorResultData, OllamaEmbeddingEvaluator } from './embedding-evaluator.js'; 12 | import { cases as langiumCases } from './langium-cases.js'; 13 | import { runner_codegemma, runner_codegemma_rag, runner_codellama, runner_codellama_rag, runner_llama3_1_rag, runner_llama3_2_3b, runner_llama3_2_3b_rag } from './runners.js'; 14 | config(); 15 | 16 | const langiumServices = createLangiumGrammarServices(EmptyFileSystem); 17 | 18 | type MergedEvaluatorResultType = LangiumEvaluatorResultData & EditDistanceEvaluatorResultData & EmbeddingEvaluatorResultData; 19 | 20 | /** 21 | * Runs a first set of evals for generating hello-world grammars 22 | * This is done with just a prompt 23 | */ 24 | export async function runLangiumEvals() { 25 | 26 | const eMat = new EvalMatrix({ 27 | config: { 28 | // various config options 29 | name: 'Langium Grammar Evaluator w/ System Prompt', 30 | description: 'Evaluates Various Generated Grammars using Various Model Stacks', 31 | history_folder: '.langium-ai', 32 | // number of runs to average across 33 | num_runs: 3 34 | }, 35 | runners: [ 36 | runner_llama3_2_3b, 37 | runner_codellama, 38 | runner_codegemma, 39 | 40 | // rag versions 41 | runner_llama3_2_3b_rag, 42 | runner_codellama_rag, 43 | runner_codegemma_rag, 44 | runner_llama3_1_rag, 45 | ], 46 | evaluators: [ 47 | { 48 | name: 'Langium + Edit Distance + Embedding Evaluator (merged)', 49 | eval: mergeEvaluators( 50 | new LangiumEvaluator(langiumServices.grammar), 51 | new EditDistanceEvaluator(), 52 | new OllamaEmbeddingEvaluator('nomic-embed-text') 53 | ) 54 | } 55 | ], 56 | cases: langiumCases 57 | }); 58 | 59 | // run & report 60 | const results = await eMat.run(); 61 | console.log('Evaluation report: '); 62 | console.table(results.map(r => { 63 | return { 64 | name: r.name, 65 | ...r.data 66 | } 67 | }), ['name', 'errors', 'warnings', 'infos', 'hints', 'unassigned', 'edit_distance']); 68 | 69 | // get average scores too 70 | const processedResults = averageAcrossCases(results); 71 | console.log('Average Evaluation report: '); 72 | console.table(processedResults.map(r => { 73 | return { 74 | name: r.name, 75 | ...r.data 76 | } 77 | }), ['name', 'errors', 'warnings', 'infos', 'hints', 'unassigned', 'edit_distance']); 78 | 79 | } 80 | 81 | export function generateChartFromLastResults() { 82 | const rawResults = loadLastResults('.langium-ai', 3); 83 | console.log('Last Evaluation report: '); 84 | 85 | // generate a radar chart for this run 86 | generateRadarChart( 87 | 'Radar Chart (smaller is better)', 88 | normalizeData(rawResults), 89 | './radar-chart.html', 90 | (data: MergedEvaluatorResultType, metadata: Record) => { 91 | return { 92 | 'Failures': data.failures, 93 | 'Errors': data.errors, 94 | 'Warnings': data.warnings, 95 | 'Semantic Diff': 1.0 - data.similarity, // inverse similarity 96 | 'Total Diagnostics': (data.errors + data.warnings + data.infos + data.hints + data.unassigned) / 5.0, 97 | 'Response Size': data.response_length ?? 0, 98 | 'Edit Distance': data.edit_distance, 99 | 'Time': data._runtime ?? 0 100 | } 101 | } 102 | ); 103 | 104 | // generate a histogram chart for this run 105 | generateHistogram( 106 | 'Histogram Chart (smaller is better)', 107 | normalizeData(rawResults), 108 | './histogram-chart.html', 109 | (data: MergedEvaluatorResultType, metadata: Record) => { 110 | return { 111 | 'Failures': data.failures, 112 | 'Errors': data.errors, 113 | 'Warnings': data.warnings, 114 | 'Semantic Diff': 1.0 - data.similarity, 115 | 'Total Diagnostics': (data.errors + data.warnings + data.infos + data.hints + data.unassigned) / 5.0, 116 | 'Response Size': data.response_length ?? 0, 117 | 'Edit Distance': data.edit_distance, 118 | 'Time': data._runtime ?? 0 119 | } 120 | } 121 | ); 122 | 123 | // generate a historical chart for all unique runners over time 124 | generateHistoricalChart( 125 | 'Historical Chart (Approx. Area of Radar Chart)', 126 | '.langium-ai', // src folder for data 127 | './historical-chart.html', 128 | (data: MergedEvaluatorResultType, metadata: Record) => { 129 | return calculateTriangleAreas(data).reduce((a, b) => a + b, 0); 130 | }, 131 | { 132 | preprocess: averageAcrossRunners, 133 | // take: 30, 134 | // filter: (r) => r.name.match(/rag/i) === null 135 | } 136 | ); 137 | } 138 | 139 | function calculateTriangleAreas(data: MergedEvaluatorResultType): number[] { 140 | // order changes area, so be consistent here when checking! 141 | const values = [ 142 | data.failures ?? 0, 143 | data.errors, 144 | data.warnings, 145 | 1.0 - (data.similarity ?? 0), // Semantic Diff 146 | (data.errors + data.warnings + data.infos + data.hints + data.unassigned) / 5.0, // Total Diagnostics 147 | data.response_length ?? 0, 148 | data.edit_distance, 149 | data._runtime ?? 0 150 | ]; 151 | 152 | const n = values.length; 153 | const angle = 2 * Math.PI / n; 154 | const areas = []; 155 | 156 | for (let i = 0; i < n; i++) { 157 | const r1 = values[i]; 158 | const r2 = values[(i + 1) % n]; 159 | const area = 0.5 * r1 * r2 * Math.sin(angle); 160 | areas.push(area); 161 | } 162 | 163 | return areas; 164 | } 165 | 166 | 167 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/evaluator/eval-matrix.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { Evaluator, EvaluatorResult } from "./evaluator.js"; 8 | import fs from 'fs'; 9 | import * as path from 'path'; 10 | 11 | /** 12 | * Configuration for the evaluation matrix 13 | */ 14 | export interface EvalMatrixConfig { 15 | config: { 16 | /** 17 | * Name of the evaluation matrix 18 | */ 19 | name: string; 20 | 21 | /** 22 | * Helpful description of the evaluation matrix 23 | */ 24 | description: string; 25 | 26 | /** 27 | * Where to store run history 28 | */ 29 | history_folder: string; 30 | 31 | /** 32 | * The number of runs to perform for each case 33 | * Note this will trigger evaluation for all registered evaluators for each run 34 | */ 35 | num_runs: number; 36 | }, 37 | 38 | /** 39 | * Runners to evaluate 40 | */ 41 | runners: Runner[]; 42 | 43 | /** 44 | * Evaluators to evaluate with 45 | */ 46 | evaluators: NamedEvaluator[]; 47 | 48 | /** 49 | * Cases to evaluate 50 | */ 51 | cases: Case[]; 52 | } 53 | 54 | /** 55 | * Evaluation matrix for running multiple runners on multiple cases with multiple evaluators 56 | */ 57 | export class EvalMatrix { 58 | private config: EvalMatrixConfig; 59 | 60 | constructor(config: EvalMatrixConfig) { 61 | this.config = config; 62 | } 63 | 64 | /** 65 | * Run the evaluation matrix, getting all results back 66 | */ 67 | async run(): Promise { 68 | 69 | // get the current timestamp 70 | const start = new Date(); 71 | 72 | const results: EvaluatorResult[] = []; 73 | 74 | // verify that all runners have unique names first 75 | const runnerNames = this.config.runners.map(r => r.name); 76 | const uniqueRunnerNames = new Set(); 77 | for (const name of runnerNames) { 78 | if (uniqueRunnerNames.has(name)) { 79 | throw new Error(`Runner names must be unique, found duplicate: ${name}`); 80 | } 81 | uniqueRunnerNames.add(name); 82 | } 83 | 84 | console.log(`Running evaluation matrix: ${this.config.config.name}`); 85 | console.log(`Found ${this.config.runners.length * this.config.cases.length * this.config.evaluators.length} runner-evaluator-case combinations to handle`); 86 | 87 | // run all runners 88 | for (const runner of this.config.runners) { 89 | 90 | console.log(`* Runner: ${runner.name}`); 91 | 92 | // run all cases for this runner 93 | for (const testCase of this.config.cases) { 94 | console.log(` * Case: ${testCase.name}`); 95 | 96 | const runCount = this.config.config.num_runs ?? 1; 97 | for (let iteration = 0; iteration < runCount; iteration++) { 98 | const runnerStartTime = new Date(); 99 | const response = await runner.runner(testCase.prompt, testCase.history ?? []); 100 | const runnerEndTime = new Date(); 101 | 102 | // run all evaluators on this response 103 | for (const evaluator of this.config.evaluators) { 104 | console.log(` * Evaluator: ${evaluator.name} (run ${iteration + 1})`); 105 | const result = await evaluator.eval.evaluate(response, testCase.expected_response); 106 | if (!result.name) { 107 | result.name = `${runner.name} - ${testCase.name} - ${evaluator.name}`; 108 | } 109 | // add runtime there too, so we have access to it 110 | result.data!._runtime = (runnerEndTime.getTime() - runnerStartTime.getTime()) / 1000.0; // in seconds 111 | 112 | result.metadata = { 113 | runner: runner.name, 114 | evaluator: evaluator.name, 115 | testCase: { ...testCase }, 116 | actual_response: response, 117 | duration: (runnerEndTime.getTime() - runnerStartTime.getTime()) / 1000.0, // in seconds 118 | run_count: iteration + 1 119 | }; 120 | 121 | results.push(result as EvaluatorResult); 122 | } 123 | } 124 | } 125 | } 126 | 127 | // check if the folder exists first 128 | if (!fs.existsSync(this.config.config.history_folder)) { 129 | fs.mkdirSync(this.config.config.history_folder); 130 | } 131 | 132 | const dateStr = new Date().toISOString(); 133 | const sanitizedDateStr = dateStr.replace(/:/g, '-').replace(/\./g, '-'); 134 | let fileName = `${sanitizedDateStr}-${this.config.config.name.toLowerCase().replace(/\s+/g, '-')}.json`; 135 | // escape any slashes too 136 | fileName = fileName.replace(/\//g, '-'); 137 | 138 | console.log(`Writing results to file: ${path.join(this.config.config.history_folder, fileName)}`); 139 | 140 | // run time in seconds 141 | const runTime = (new Date().getTime() - start.getTime()) / 1000; 142 | console.log(`Evaluation matrix completed in ${runTime} seconds (${runTime / 60} minutes)`); 143 | 144 | // prepare & write results to file 145 | const report = { 146 | config: this.config.config, 147 | date: dateStr, 148 | runTime: `${runTime}s`, 149 | results 150 | }; 151 | fs.writeFileSync(path.join(this.config.config.history_folder, fileName), JSON.stringify(report, null, 2)); 152 | 153 | // write the name of this last report into last.txt 154 | fs.writeFileSync(path.join(this.config.config.history_folder, 'last.txt'), fileName); 155 | 156 | return results; 157 | } 158 | } 159 | 160 | /** 161 | * General format for histories when prompting 162 | */ 163 | export interface Message { 164 | role: 'user' | 'system' | 'assistant'; 165 | content: string; 166 | } 167 | 168 | /** 169 | * Runner interface for running a prompt against a mode, a service, or something else that provides a response 170 | */ 171 | export interface Runner { 172 | name: string; 173 | runner: (prompt: string, messages: Message[]) => Promise; 174 | } 175 | 176 | /** 177 | * Generic evaluator interface w/ a name to identify it 178 | */ 179 | export interface NamedEvaluator { 180 | name: string; 181 | eval: Evaluator; 182 | } 183 | 184 | /** 185 | * Case interface for defining an evaluation case 186 | */ 187 | export interface Case { 188 | /** 189 | * Name of the case 190 | */ 191 | name: string; 192 | 193 | /** 194 | * Options Message history, used for system, user & assistant messages 195 | */ 196 | history?: Message[]; 197 | 198 | /** 199 | * Core prompt to run with 200 | */ 201 | prompt: string; 202 | 203 | /** 204 | * Context for the prompt, used for RAG applications 205 | */ 206 | context: string[]; 207 | 208 | /** 209 | * Expected response 210 | */ 211 | expected_response: string; 212 | } -------------------------------------------------------------------------------- /packages/langium-ai-tools/README.md: -------------------------------------------------------------------------------- 1 | # Langium AI Tools 2 | 3 | ## Overview 4 | 5 | This project provides core tools that make it easier to build AI applications for Langium DSLs. These core tools help to solve the following problems around building AI applications by making it easier to: 6 | 7 | - Determine which models work well for your DSL 8 | - Evaluate which changes to your tooling actually improve your generation results 9 | - How to process DSL documents in a way that makes sense for your DSL & target application 10 | 11 | To solve these problems this package provides: 12 | 13 | - Splitting Support: Using your DSL's parser to make it easier to pre-process documents before ingest (such as into a vector DB) 14 | - Training & Evaluation Support: Assess the output of your model + RAG + whatever else you have in your stack with regards to a structured input/output evaluation phase. 15 | - Constraint Support: Synthesize BNF-style grammars from your Langium grammar, which can be used to control the token output from an LLM to conform to your DSL's expected structure (this feature has been added directly into the **langium-cli** itself, as it has wider general applications). 16 | 17 | What's also important is what is not provided: 18 | - *We don't choose your model for you.* We believe this is your choice, and we don't want to presume we know best or lock you in. All we assume is that you have a model (or stack) that we can use. For tooling that leverages models directly, we'll be providing a separate package under Langium AI that will be separate from the core here. 19 | - *We don't choose your stack for you.* There are many excellent choices for hosting providers, databases, caches, and other supporting services (local & remote). There's so many, and they change so often, that we decided it was best to not assume what works here, and rather support preparing information for whatever stack you choose. 20 | 21 | LLMs (and transformers in general), are evolving quite rapidly. With this approach, these tools help you build your own specific approach, whilst letting you keep up with the latest and greatest in model developments. 22 | 23 | ## Installation 24 | 25 | Langium AI tools tracks tightly with Langium releases. If you're using Langium 3.X or 4.X in your project, you'll want to install the corresponding version of Langium AI Tools that matches it. 26 | 27 | ```bash 28 | # if you're using Langium 4.1.X 29 | npm i --save langium-ai-tools@^4.1.0 30 | 31 | # or 3.5.X 32 | npm i --save langium-ai-tools@^3.5.0 33 | ``` 34 | 35 | We don't actively support Langium 2.X or earlier. 36 | 37 | ## Usage 38 | 39 | ### Splitting 40 | 41 | Langium AI Tools presents various splitting utilities that are simple but flexible. This includes: 42 | - retrieving all nodes of a specific type from a document 43 | - splitting a document into chunks by node type (with or without comments) 44 | - generating program maps 45 | 46 | For example, if you have a DSL that looks like this: 47 | 48 | ``` 49 | // A dsl that allows writing functions... 50 | function foo() { ... } 51 | function bar() { ... } 52 | ``` 53 | 54 | Assuming your Langium parser rule for these functions is called `Func`, you could write a splitter like so to chunk by functions: 55 | 56 | ```ts 57 | const chunks: string[] = splitByNode( 58 | dslProgramString, 59 | [ 60 | (node) => node.$type === 'Func' 61 | ], 62 | myLanguageServices.grammar, 63 | // default options, comment rule names can be set to [] to avoid including comments in chunks 64 | // { commentRuleNames: ['ML_COMMENT', 'SL_COMMENT'] } 65 | ); 66 | ``` 67 | 68 | And you would get back something like this: 69 | ``` 70 | [ 71 | 'function foo() { ... }', 72 | 'function bar() { ... }' 73 | ] 74 | ``` 75 | 76 | In case you want just the AST node, and don't want it transformed to text, you can use the `splitByNodeToAst` function instead, giving you back the raw AST nodes to do with as you want. 77 | 78 | If you want a program map (like in the case of generating a mapping of your repo), you can utilize the `ProgramMapper` class. 79 | 80 | ```ts 81 | import { ProgramMapper } from 'langium-ai-tools/splitter'; 82 | 83 | const myProg = ` 84 | function foo() { ... } 85 | function bar() { ... } 86 | `; 87 | 88 | const services = createMyDSLServices(EmptyFileSystem).mydsl; // will differ based on the name of your DSL 89 | const mapper = new ProgramMapper(services, { 90 | mappingRules: [ 91 | { 92 | predicate: (node) => node.$type === 'Func', 93 | map: (node) => `func ${node.name}` 94 | }, 95 | ... 96 | ] 97 | }); 98 | const programMap: string[] = mapper.map(myProg); 99 | ``` 100 | 101 | Which would give you an output like so: 102 | 103 | ``` 104 | [ 105 | 'func foo', 106 | 'func bar' 107 | ] 108 | ``` 109 | 110 | In both cases, you can provide predicates for the nodes that are of interest to you. The splitter chunking is a bit more opinionated, but the program mapper allows fine-grained generation when needed. 111 | 112 | For examples you can check out the [example-dsl-evaluator](../examples/example-dsl-evaluator/README.md) project, which also contains splitting & mapping examples. 113 | 114 | ### Evaluation 115 | 116 | Regardless of how you've sourced your model, you'll need a metric for determining the quality of your output. 117 | 118 | For Langium DSLs, we provide an series of *evaluator* utilities to help in assessing the correctness of DSL output. 119 | 120 | It's important to point out that evaluations are *not* tests, instead this is more similar to [OpenAI's evals framework](https://github.com/openai/evals). The idea is that we're grading or scoring outputs with regards to an expected output from a known input. This is a simple but effective approach to determining if your model is generally doing what you expect it to in a structured way, and *not* doing something else as well. 121 | 122 | Take the following evaluator for example. Let's assume you have [Ollama](https://ollama.com/) running locally, and the [ollama-js](https://github.com/ollama/ollama-js) package installed. From a given base model you can define evaluatiosn like so. 123 | 124 | ```ts 125 | import { Evaluator, EvaluatorScore } from 'langium-ai-tools/evaluator'; 126 | import ollama from 'ollama'; 127 | 128 | // get your language's services 129 | const services = createMyDSLServices(EmptyFileSystem).MyDSL; 130 | 131 | // define an evaluator using your language's services 132 | // this effectively uses your existing parser & validations to 'grade' the response 133 | const evaluator = new LangiumEvaluator(services); 134 | 135 | // make some prompt 136 | const response = await ollama.chat({ 137 | 'llama3.2', 138 | [{ 139 | role: 'user', 140 | content: 'Write me a hello world program written in MyDSL.' 141 | }] 142 | }); 143 | 144 | const es: EvaluatorScore = evaluator.evaluate(response.message.content); 145 | 146 | // print out your score! 147 | console.log(es); 148 | ``` 149 | 150 | You can also define custom evaluators that are more tuned to the needs of your DSL. This could be handling diagnostics in a very specific fashion, extracting code out of the response itself to check, using an evaluation model to grade the response, or using a combination of techniques to get a more accurate score for your model's output. 151 | 152 | In general we stick to focusing on what Langium can do to help with evaluation, but leave the opportunity open for you to extend, supplement, or modify evaluation logic as you see fit. 153 | 154 | ## Contributing 155 | 156 | If you want to help feel free to open an issue or a PR. As a general note we're open to accept changes that focus on improving how we can support AI application development for Langium DSLs. But we don't want to provide explicit bindings to actual services/providers at this time, such as LLamaIndex, Ollama, LangChain, or others. Similarly this package doesn't provide direct bindings for AI providers such as OpenAI and Anthropic here. Instead these changes will go into a separate package under Langium AI that is intended for this purpose. 157 | -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/evaluator/evaluator.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | /** 8 | * Baseline Validator Class 9 | */ 10 | 11 | import { readFileSync, existsSync, readdirSync } from 'fs'; 12 | import * as path from 'path'; 13 | 14 | export type EvaluatorResultData = Record & { 15 | _runtime?: number; 16 | }; 17 | 18 | /** 19 | * Evaluator result type 20 | */ 21 | export type EvaluatorResult = { 22 | /** 23 | * Name of this evaluation 24 | */ 25 | name: string; 26 | 27 | /** 28 | * Optional metadata, can be used to store additional information 29 | */ 30 | metadata: Record; 31 | 32 | /** 33 | * Data for this evaluation 34 | */ 35 | data: T; 36 | 37 | }; 38 | 39 | /** 40 | * Helper to process a set of results, averaging all runs of each runner-evaluator-case combination 41 | */ 42 | export function averageAcrossCases(results: EvaluatorResult[]): EvaluatorResult[] { 43 | const mappedResults: Map = new Map(); 44 | 45 | const averagedResults: EvaluatorResult[] = []; 46 | 47 | // collect like-results 48 | for (const result of results) { 49 | // add this result to the map (grouping by runner & case) 50 | const name = result.name; 51 | const existingResult = mappedResults.get(name) ?? []; 52 | existingResult.push(result); 53 | mappedResults.set(name, existingResult); 54 | } 55 | 56 | // average the results 57 | for (const [_key, groupedResults] of mappedResults) { 58 | const avgData = groupedResults[0].data; 59 | 60 | // sum all results except the first 61 | for (const result of groupedResults.slice(1)) { 62 | const resultData = result.data; 63 | for (const [key, value] of Object.entries(resultData)) { 64 | if (typeof value === 'number') { 65 | avgData[key] = (avgData[key] as number ?? 0) + value; 66 | } 67 | } 68 | } 69 | 70 | // lastly, divide each entry by the number of 'groupedResults' 71 | for (const [key, value] of Object.entries(avgData)) { 72 | if (typeof value === 'number') { 73 | avgData[key] = value / groupedResults.length; 74 | // round to 2 decimal places 75 | avgData[key] = Math.round((avgData[key] as number) * 100) / 100; 76 | } 77 | } 78 | 79 | averagedResults.push({ 80 | name: groupedResults[0].name, 81 | metadata: groupedResults[0].metadata, 82 | data: avgData 83 | }); 84 | } 85 | return averagedResults; 86 | } 87 | 88 | /** 89 | * Averages all results across runners at the highest level, to get a single result for each runner 90 | */ 91 | export function averageAcrossRunners(results: EvaluatorResult[]): EvaluatorResult[] { 92 | // first average across runs 93 | const processedResults = averageAcrossCases(results); 94 | 95 | // now average across runners 96 | const mappedResults: Map = new Map(); 97 | 98 | const averagedResults: EvaluatorResult[] = []; 99 | 100 | // collect like-results 101 | for (const result of processedResults) { 102 | // add this result to the map (grouping by runner) 103 | const name = result.metadata.runner; 104 | const existingResult = mappedResults.get(name) ?? []; 105 | existingResult.push(result); 106 | mappedResults.set(name, existingResult); 107 | } 108 | 109 | // average the results 110 | for (const [_key, groupedResults] of mappedResults) { 111 | const avgData = groupedResults[0].data; 112 | 113 | // sum all results except the first 114 | for (const result of groupedResults.slice(1)) { 115 | const resultData = result.data; 116 | for (const [key, value] of Object.entries(resultData)) { 117 | if (typeof value === 'number') { 118 | avgData[key] = (avgData[key] as number ?? 0) + value; 119 | } 120 | } 121 | } 122 | 123 | // lastly, divide each entry by the number of 'groupedResults' 124 | for (const [key, value] of Object.entries(avgData)) { 125 | if (typeof value === 'number') { 126 | avgData[key] = value / groupedResults.length; 127 | // round to 2 decimal places 128 | avgData[key] = Math.round((avgData[key] as number) * 100) / 100; 129 | } 130 | } 131 | 132 | averagedResults.push({ 133 | name: groupedResults[0].metadata.runner, 134 | metadata: groupedResults[0].metadata, 135 | data: avgData 136 | }); 137 | } 138 | 139 | return averagedResults; 140 | } 141 | 142 | /** 143 | * Report 144 | */ 145 | export interface Report { 146 | config: { 147 | name: string; 148 | description: string; 149 | history_folder: string; 150 | num_runs: number; 151 | }; 152 | date: string; 153 | runTime: string; 154 | results: EvaluatorResult[]; 155 | } 156 | 157 | /** 158 | * Loads a specific report, containing evaluator results from a file & returns it 159 | */ 160 | export function loadReport(file: string): Report { 161 | return JSON.parse(readFileSync(file, 'utf-8')) as Report; 162 | } 163 | 164 | /** 165 | * Attempts to load the most recent evaluator results from the given file 166 | */ 167 | export function loadLastResults(dir: string, take?: number): EvaluatorResult[] { 168 | if (!existsSync(dir)) { 169 | throw new Error(`Directory does not exist: ${dir}`); 170 | } 171 | 172 | let files = readdirSync(dir).filter(f => f.endsWith('.json')); 173 | 174 | if (!take) { 175 | const lastFile = path.join(dir, 'last.txt'); 176 | 177 | if (!existsSync(lastFile)) { 178 | throw new Error(`Last file does not exist in directory: ${dir}. Try running an evaluation matrix first.`); 179 | } 180 | // read name from last file 181 | const lastFileName = readFileSync(lastFile).toString(); 182 | 183 | files.push(lastFileName); 184 | 185 | } else { 186 | // read the most recent files 187 | files = files.sort().reverse().slice(0, take); 188 | 189 | } 190 | 191 | const results: EvaluatorResult[] = []; 192 | 193 | for (const file of files) { 194 | const report = loadReport(path.join(dir, file)); 195 | results.push(...report.results); 196 | } 197 | 198 | return results; 199 | } 200 | 201 | /** 202 | * Evaluator class for evaluating agent responses 203 | */ 204 | export abstract class Evaluator { 205 | /** 206 | * Validate some agent response 207 | */ 208 | abstract evaluate(response: string, expected_response: string): Promise>; 209 | 210 | } 211 | 212 | export function mergeEvaluators(...evaluators: Evaluator[]): Evaluator { 213 | // merge evaluators in sequence 214 | return evaluators.reduce((acc, val) => mergeEvaluatorsInternal(acc, val)); 215 | } 216 | 217 | /** 218 | * Merges two evaluators together in sequence, such that results of a are combined with b (b takes precedence in key overrides) 219 | * @param a First evaluator to merge 220 | * @param b Second evaluator to merge 221 | */ 222 | function mergeEvaluatorsInternal(a: Evaluator, b: Evaluator): Evaluator { 223 | return { 224 | async evaluate(response: string, expected_response: string): Promise> { 225 | const r1 = await a.evaluate(response, expected_response); 226 | const r2 = await b.evaluate(response, expected_response); 227 | return { 228 | metadata: { 229 | ...r1.metadata, 230 | ...r2.metadata 231 | }, 232 | data: { 233 | ...r1.data, 234 | ...r2.data 235 | } 236 | }; 237 | } 238 | }; 239 | } 240 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/eval-langdev.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { EmptyFileSystem } from 'langium'; 8 | import { averageAcrossCases, Case, EvalMatrix, EvaluatorResult, generateRadarChart, LangiumEvaluator, LangiumEvaluatorResultData, loadLastResults, mergeEvaluators, Message, normalizeData, Runner } from 'langium-ai-tools/evaluator'; 9 | import { createLangiumGrammarServices } from 'langium/grammar'; 10 | import ollama from 'ollama'; 11 | import { EmbeddingEvaluatorResultData, OllamaEmbeddingEvaluator } from './embedding-evaluator.js'; 12 | import * as readline from 'readline/promises'; 13 | 14 | const rl = readline.createInterface({ 15 | input: process.stdin, 16 | output: process.stdout 17 | }); 18 | 19 | /** 20 | * Create services for the Langium grammar language. 21 | * 22 | * In your case, you would do the same for your own language instead (using your module) 23 | */ 24 | const langiumServices = createLangiumGrammarServices(EmptyFileSystem); 25 | 26 | /** 27 | * Runners 28 | */ 29 | namespace Runners { 30 | 31 | /** 32 | * llama3.2 3b runner 33 | */ 34 | export const llama3_2_3b: Runner = { 35 | name: 'llama3.2 3B', 36 | runner: async (content: string, messages: Message[] = []) => { 37 | const newMsgs = [...messages, { role: 'user', content }]; 38 | return (await prompt('llama3.2:latest', newMsgs)).message.content; 39 | } 40 | }; 41 | 42 | /** 43 | * Codellama runner 44 | */ 45 | export const codellama: Runner = { 46 | name: 'codellama', 47 | runner: async (content: string, messages: Message[] = []) => { 48 | const newMsgs = [...messages, { role: 'user', content }]; 49 | return (await prompt('codellama:latest', newMsgs)).message.content; 50 | } 51 | }; 52 | 53 | /** 54 | * And a codegemma runner 55 | */ 56 | export const codegemma: Runner = { 57 | name: 'codegemma', 58 | runner: async (content: string, messages: Message[] = []) => { 59 | const newMsgs = [...messages, { role: 'user', content }]; 60 | return (await prompt('codegemma:latest', newMsgs)).message.content; 61 | } 62 | }; 63 | 64 | /** 65 | * Helper to prompt models hosted in Ollama 66 | */ 67 | async function prompt(model: string, messages: any[]) { 68 | const response = await ollama.chat({ 69 | model, messages 70 | }); 71 | return response; 72 | } 73 | } 74 | 75 | /** 76 | * A simple case for generating a HelloWorld grammar 77 | */ 78 | const caseHelloWorld: Case = { 79 | name: "Hello World Grammar", 80 | context: [], 81 | history: [], 82 | prompt: "Generate a simple HelloWorld grammar in Langium.", 83 | expected_response: `Certainly. Here's an example of a possible HelloWorld grammar written in the Langium grammar language: 84 | 85 | \`\`\`langium 86 | grammar HelloWorld 87 | 88 | entry Model: 89 | (persons+=Person | greetings+=Greeting)*; 90 | 91 | Person: 92 | 'person' name=ID; 93 | 94 | Greeting: 95 | 'Hello' person=[Person:ID] '!'; 96 | 97 | hidden terminal WS: /\s+/; 98 | terminal ID: /[_a-zA-Z][\w_]*/; 99 | 100 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 101 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 102 | \`\`\` 103 | 104 | This grammar defines a simple HelloWorld language with two parser rules: Person and Greeting. The entry rule Model allows for an arbitrary sequence of Person and Greeting elements. A Person is defined by the keyword 'person' followed by an ID terminal, which represents the person's name. A Greeting consists of the word 'Hello' followed by a reference to a Person and an exclamation mark. The grammar also includes terminal rules for whitespace, identifiers, and comments.` 105 | }; 106 | 107 | /** 108 | * An example evaluator for evaluating Langium grammars & checking for embedding similarity (using Ollama) 109 | */ 110 | const langiumAndEmbeddingEvaluator = mergeEvaluators( 111 | 112 | // built-in evaluator for Langium grammars (i.e. the Langium DSL itself) 113 | new LangiumEvaluator(langiumServices.grammar), 114 | 115 | // then run the Ollama embedding evaluator to compare expected vs. actual 116 | new OllamaEmbeddingEvaluator('nomic-embed-text') 117 | ); 118 | 119 | export async function runLangDevDemo() { 120 | 121 | // check if all the necessary models are installed via ollama 122 | const models = ['llama3.2:latest', 'codellama:latest', 'codegemma:latest', 'nomic-embed-text:latest']; 123 | const listedModels = (await ollama.list()).models; 124 | console.log('Available models: ', listedModels.map(m => m.name)); 125 | const missingModels = models.filter(model => !listedModels.some(m => m.name === model)); 126 | if (missingModels.length > 0) { 127 | console.error(`The following models are missing: ${missingModels.join(', ')}.`); 128 | // prompt to install 129 | const answer = await rl.question(`Do you want to install these missing models for this demo? (y/n) `); 130 | if (answer.toLowerCase() === 'y') { 131 | for (const model of missingModels) { 132 | console.log(`Installing model ${model}...`); 133 | await ollama.pull({ 134 | model: model 135 | }); 136 | } 137 | } else { 138 | console.error('Please install missing models and try again.'); 139 | return; 140 | } 141 | } 142 | 143 | const eMat = new EvalMatrix({ 144 | 145 | // basic configuration 146 | config: { 147 | name: 'LangDev Demo', 148 | description: 'Showing basic evaluation capabilities of Langium AI', 149 | history_folder: '.langium-ai', 150 | num_runs: 3 151 | }, 152 | 153 | // just a few runners 154 | runners: [ 155 | Runners.llama3_2_3b, 156 | Runners.codellama, 157 | Runners.codegemma, 158 | ], 159 | 160 | // using our merged evaluator 161 | evaluators: [ 162 | { 163 | name: 'Langium + Embedding Evaluator (merged)', 164 | eval: langiumAndEmbeddingEvaluator 165 | } 166 | ], 167 | 168 | // single case 169 | cases: [ 170 | caseHelloWorld 171 | ] 172 | }); 173 | 174 | // run the matrix 175 | const results = await eMat.run(); 176 | 177 | // print the full results (as saved) 178 | console.log('Evaluation report: '); 179 | printResults(results); 180 | 181 | // print the average results 182 | const processedResults = averageAcrossCases(results); 183 | console.log('Average Evaluation report: '); 184 | printResults(processedResults); 185 | 186 | } 187 | 188 | /** 189 | * Helper to print results to the console 190 | */ 191 | function printResults(results: EvaluatorResult[]) { 192 | console.table(results.map(r => { 193 | return { 194 | name: r.name, 195 | ...r.data 196 | } 197 | }), ['name', 'errors', 'warnings', 'infos', 'hints', 'unassigned', 'similarity']); 198 | } 199 | 200 | /** 201 | * Helper type to describe the merged evaluator results 202 | */ 203 | type MergedEvaluatorResultType = LangiumEvaluatorResultData & EmbeddingEvaluatorResultData; 204 | 205 | /** 206 | * Takes the last results we obtained, and builds a quick radar chart from them 207 | */ 208 | export function generateChartFromLastResults() { 209 | const rawResults = loadLastResults('.langium-ai', 1); 210 | console.log('Last Evaluation report: '); 211 | 212 | // generate a radar chart for this run 213 | generateRadarChart( 214 | 'Radar Chart (smaller is better)', 215 | normalizeData(rawResults), 216 | './radar-chart.html', 217 | (data: MergedEvaluatorResultType) => { 218 | return { 219 | 'Failures': data.failures, 220 | 'Errors': data.errors, 221 | 'Warnings': data.warnings, 222 | 'Semantic Diff': 1.0 - data.similarity, // inverse similarity 223 | 'Response Size': data.response_length ?? 0, 224 | 'Time': data._runtime ?? 0 225 | } 226 | } 227 | ); 228 | } 229 | 230 | 231 | -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/runners.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import OpenAI from 'openai'; 8 | import ollama from 'ollama'; 9 | import { Message, Runner } from 'langium-ai-tools'; 10 | import { ChromaClient } from 'chromadb'; 11 | 12 | async function prompt(model: string, messages: any[]) { 13 | const response = await ollama.chat({ 14 | model, messages 15 | }); 16 | return response; 17 | } 18 | 19 | /** 20 | * Construct a runner for a model provided by Ollama 21 | */ 22 | function getOllamaRunner(name: string, model: string): Runner { 23 | return { 24 | name, 25 | runner: async (content: string, messages: Message[] = []) => { 26 | const newMsgs = [...messages, { role: 'user', content }]; 27 | return (await prompt(model, newMsgs)).message.content; 28 | } 29 | }; 30 | } 31 | 32 | /** 33 | * Helper to wrap retrieved documents for RAG in a system message 34 | */ 35 | async function getRagSystemMessage(content: string): Promise { 36 | return { 37 | role: 'system', 38 | content: `Additionally, utilize the following context to answer the user's question: \n\n${(await ragLookup(content)).join('\n======\n')}` 39 | }; 40 | } 41 | 42 | const LangiumDSLCollection = "langium-collection"; 43 | 44 | /** 45 | * Helper to perform a ChromaDB lookup to provide context for a RAG application, given some content 46 | */ 47 | async function ragLookup(content: string): Promise { 48 | // perform a database lookup first here via chroma 49 | const client = new ChromaClient({ path: 'http://localhost:8000' }); 50 | 51 | // create a collection 52 | const collection = await client.getCollection({ 53 | name: LangiumDSLCollection, 54 | embeddingFunction: { 55 | generate: async (texts: string[]) => { 56 | return (await ollama.embed({ 57 | model: 'nomic-embed-text', 58 | input: texts, 59 | keep_alive: 30 60 | })).embeddings; 61 | } 62 | } 63 | }); 64 | 65 | // embed 66 | const queryEmbeddings = (await ollama.embed({ 67 | model: 'nomic-embed-text', 68 | input: content, 69 | keep_alive: 30 70 | })).embeddings; 71 | 72 | // query 73 | const results = await collection.query({ 74 | queryEmbeddings, 75 | nResults: 3, 76 | }); 77 | 78 | return results.documents as string[][]; 79 | } 80 | 81 | /** 82 | * Construct a runner w/ RAG for a model provided by Ollama 83 | */ 84 | function getOllamaRAGRunner(name: string, model: string): Runner { 85 | return { 86 | name, 87 | runner: async (content: string, messages: Message[] = []) => { 88 | const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }]; 89 | return (await prompt(model, newMsgs)).message.content; 90 | } 91 | }; 92 | } 93 | 94 | // ollama runners 95 | export const runner_llama3_2_3b = getOllamaRunner('llama3.2 3B', 'llama3.2:latest'); 96 | export const runner_llama3_2_1b = getOllamaRunner('llama3.2 1B', 'llama3.2:1b'); 97 | export const runner_llama3_1 = getOllamaRunner('llama3.1', 'llama3.1'); 98 | export const runner_codellama = getOllamaRunner('codellama', 'codellama'); 99 | export const runner_codegemma = getOllamaRunner('codegemma', 'codegemma'); 100 | export const runner_deepseek_coder_v2 = getOllamaRunner('deepseek-coder-v2', 'deepseek-coder-v2'); 101 | export const runner_qwen_2_5_coder = getOllamaRunner('qwen-2.5-coder 7B', 'qwen2.5-coder'); 102 | 103 | // RAG ollama runners 104 | export const runner_llama3_2_3b_rag = getOllamaRAGRunner('llama3.2 3B w/ RAG', 'llama3.2:latest'); 105 | export const runner_llama3_2_1b_rag = getOllamaRAGRunner('llama3.2 1B w/ RAG', 'llama3.2:1b'); 106 | export const runner_llama3_1_rag = getOllamaRAGRunner('llama3.1 w/ RAG', 'llama3.1'); 107 | export const runner_codellama_rag = getOllamaRAGRunner('codellama w/ RAG', 'codellama:latest'); 108 | export const runner_codegemma_rag = getOllamaRAGRunner('codegemma w/ RAG', 'codegemma:latest'); 109 | export const runner_deepseek_coder_v2_rag = getOllamaRAGRunner('deepseek-coder-v2 w/ RAG', 'deepseek-coder-v2'); 110 | export const runner_qwen_2_5_coder_rag = getOllamaRAGRunner('qwen-2.5-coder 7B w/ RAG', 'qwen2.5-coder'); 111 | 112 | 113 | 114 | export const runner_openai_gpt3_5_turbo: Runner = { 115 | name: 'openai-gpt3', 116 | runner: async (content: string, messages: Message[] = []) => { 117 | const newMsgs = [...messages, { role: 'user', content }]; 118 | 119 | const client = new OpenAI({ 120 | apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted 121 | }); 122 | 123 | // prompt & return the first choice 124 | const chatCompletion = await client.chat.completions.create({ 125 | messages: newMsgs as any, 126 | model: 'gpt-3.5-turbo-0125', 127 | }); 128 | return chatCompletion.choices[0].message.content as string; 129 | } 130 | }; 131 | 132 | export const runner_openai_gpt3_5_turbo_rag: Runner = { 133 | name: 'openai-gpt3.5-turbo-rag', 134 | runner: async (content: string, messages: Message[] = []) => { 135 | const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }]; 136 | 137 | const client = new OpenAI({ 138 | apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted 139 | }); 140 | 141 | // prompt & return the first choice 142 | const chatCompletion = await client.chat.completions.create({ 143 | messages: newMsgs as any, 144 | model: 'gpt-3.5-turbo-0125', 145 | }); 146 | return chatCompletion.choices[0].message.content as string; 147 | } 148 | }; 149 | 150 | export const runner_openai_gpt4o_mini: Runner = { 151 | name: 'openai-gpt4o-mini', 152 | runner: async (content: string, messages: Message[] = []) => { 153 | const newMsgs = [...messages, { role: 'user', content }]; 154 | 155 | const client = new OpenAI({ 156 | apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted 157 | }); 158 | 159 | // prompt & return the first choice 160 | const chatCompletion = await client.chat.completions.create({ 161 | messages: newMsgs as any, 162 | model: 'gpt-4o-mini', 163 | }); 164 | return chatCompletion.choices[0].message.content as string; 165 | } 166 | }; 167 | 168 | export const runner_openai_gpt4o: Runner = { 169 | name: 'openai-gpt4o', 170 | runner: async (content: string, messages: Message[] = []) => { 171 | const newMsgs = [...messages, { role: 'user', content }]; 172 | 173 | const client = new OpenAI({ 174 | apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted 175 | }); 176 | 177 | // prompt & return the first choice 178 | const chatCompletion = await client.chat.completions.create({ 179 | messages: newMsgs as any, 180 | model: 'gpt-4o', 181 | }); 182 | return chatCompletion.choices[0].message.content as string; 183 | } 184 | }; 185 | 186 | export const runner_openai_gpt4o_rag: Runner = { 187 | name: 'openai-gpt4o-rag', 188 | runner: async (content: string, messages: Message[] = []) => { 189 | const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }]; 190 | 191 | const client = new OpenAI({ 192 | apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted 193 | }); 194 | 195 | // prompt & return the first choice 196 | const chatCompletion = await client.chat.completions.create({ 197 | messages: newMsgs as any, 198 | model: 'gpt-4o', 199 | }); 200 | return chatCompletion.choices[0].message.content as string; 201 | } 202 | }; 203 | 204 | export const runner_openai_gpt4o_mini_rag: Runner = { 205 | name: 'openai-gpt4o-mini-rag', 206 | runner: async (content: string, messages: Message[] = []) => { 207 | const newMsgs = [await getRagSystemMessage(content), ...messages, { role: 'user', content }]; 208 | 209 | const client = new OpenAI({ 210 | apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted 211 | }); 212 | 213 | // prompt & return the first choice 214 | const chatCompletion = await client.chat.completions.create({ 215 | messages: newMsgs as any, 216 | model: 'gpt-4o-mini', 217 | }); 218 | return chatCompletion.choices[0].message.content as string; 219 | } 220 | }; -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/evaluator/chart.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | /** 8 | * Generates & exports an HTML radar chart report using plotly JS 9 | */ 10 | 11 | import { EvaluatorResult, EvaluatorResultData, averageAcrossRunners, loadReport } from "./evaluator.js"; 12 | import { writeFileSync, readdirSync, readFileSync } from 'fs'; 13 | import * as path from 'path'; 14 | 15 | /** 16 | * Generates an HTML radar chart from the provided data 17 | * @param evalResults Evaluator results to chart 18 | * @param dest Output file to write the chart to 19 | * @param rFunc polar r function, used to extract the r values from the data 20 | * @param theta theta values, i.e. the property names to use for the radar chart 21 | */ 22 | export function generateRadarChart( 23 | chartName: string, 24 | evalResults: EvaluatorResult[], 25 | dest: string, 26 | rFunc: (d: T, metadata: Record) => Record, 27 | preprocess?: (arr: EvaluatorResult[]) => EvaluatorResult[] 28 | ): void { 29 | 30 | // process results first to average out data (either using the user supplied function, or defaulting to average across runners) 31 | const processedResults = preprocess ? preprocess(evalResults) : averageAcrossRunners(evalResults); 32 | 33 | const data = processedResults.map((result) => { 34 | const resultData = result.data as T; 35 | const rfuncResult = rFunc(resultData, result.metadata); 36 | const theta = Object.keys(rfuncResult); 37 | const r = Object.values(rfuncResult); 38 | 39 | return { 40 | type: 'scatterpolar', 41 | r, 42 | theta, 43 | fill: 'toself', 44 | name: result.name 45 | }; 46 | }); 47 | 48 | const layout = { 49 | title: chartName, 50 | name: chartName, 51 | polar: { 52 | radialaxis: { 53 | visible: true, 54 | range: [0, 1] 55 | } 56 | }, 57 | showlegend: true, 58 | width: 1000, 59 | height: 800 60 | }; 61 | 62 | const html = ` 63 | 64 | 65 | 66 | ${chartName} 67 | 68 | 69 | 70 |
71 | 76 | 77 | 78 | `; 79 | 80 | writeFileSync(dest, html); 81 | console.log(`Radar chart report written to: ${dest}`); 82 | } 83 | 84 | export function generateHistogram( 85 | chartName: string, 86 | evalResults: EvaluatorResult[], 87 | dest: string, 88 | dataFunc: (d: T, metadata: Record) => Record, 89 | preprocess?: (arr: EvaluatorResult[]) => EvaluatorResult[] 90 | ) { 91 | 92 | // process results first to average out data (either using the user supplied function, or defaulting to average across runners) 93 | const processedResults = preprocess ? preprocess(evalResults) : averageAcrossRunners(evalResults); 94 | 95 | const data = processedResults.map((result) => { 96 | const data = result.data as T; 97 | const dd = dataFunc(data, result.metadata); 98 | const yLabels = Object.keys(dd); 99 | const xData = Object.values(dd); 100 | return { 101 | type: 'bar', 102 | x: xData, 103 | y: yLabels, 104 | orientation: 'h', 105 | name: result.name 106 | }; 107 | }); 108 | 109 | const layout = { 110 | title: chartName, 111 | barmode: 'group', 112 | showlegend: true, 113 | width: 1000, 114 | height: 800 115 | }; 116 | 117 | const html = ` 118 | 119 | 120 | 121 | ${chartName} 122 | 123 | 124 | 125 |
126 | 131 | 132 | 133 | `; 134 | 135 | writeFileSync(dest, html); 136 | console.log(`Histogram report written to: ${dest}`); 137 | } 138 | 139 | type RunnerName = string; 140 | 141 | /** 142 | * Normalizes all numeric data entries in results (while also retaining non-numeric entries) 143 | */ 144 | export function normalizeData(data: EvaluatorResult[]): EvaluatorResult[] { 145 | const maxValues = new Map(); 146 | 147 | for (const result of data) { 148 | const d = result.data as EvaluatorResultData; 149 | for (const [key, value] of Object.entries(d)) { 150 | if (typeof value !== 'number') { 151 | continue; 152 | } 153 | const existingMax = maxValues.get(key) ?? 0; 154 | if (value > existingMax) { 155 | maxValues.set(key, value); 156 | } 157 | } 158 | } 159 | 160 | for (const result of data) { 161 | const d = result.data as EvaluatorResultData; 162 | for (const [key, value] of Object.entries(d)) { 163 | if (typeof value === 'number') { 164 | const max = maxValues.get(key) ?? 1; 165 | d[key] = value / max; 166 | } 167 | } 168 | } 169 | 170 | return data; 171 | } 172 | 173 | /** 174 | * Generates a historical chart from the provided data, showing runners along the X, and their performance over time along the X axis 175 | * @param chartName 176 | * @param folder 177 | * @param dest 178 | * @param dataFunc 179 | * @param options 180 | */ 181 | export function generateHistoricalChart( 182 | chartName: string, 183 | folder: string, 184 | dest: string, 185 | dataFunc: (d: T, metadata: Record) => number, 186 | options?: { 187 | preprocess?: (arr: EvaluatorResult[]) => EvaluatorResult[], 188 | filter?: (r: EvaluatorResult) => boolean, 189 | take?: number, 190 | chartType?: string 191 | } 192 | ) { 193 | // generate a historical chart by calculating the average for runners in all previous reports, and organizing them in ascending date order 194 | let files = readdirSync(folder).filter(f => f.endsWith('.json')); 195 | 196 | // array of results, where each array of results is presumed to be a stream of results from a collection of historical runs 197 | const runnerResultsMap: Map = new Map(); 198 | 199 | // take the most recent files if take is set 200 | if (options?.take) { 201 | files = files.sort().slice(0, options.take); 202 | } 203 | 204 | for (const file of files) { 205 | // retrieve results from this file 206 | const report = loadReport(path.join(folder, file)); 207 | const results = report.results; 208 | const date: string = report.date; 209 | console.log(`Processing historical results from: ${date}`); 210 | 211 | // process results first 212 | let processedResults = options?.preprocess ? options.preprocess(results) : averageAcrossRunners(results); 213 | // normalize 214 | processedResults = normalizeData(processedResults); 215 | 216 | // add to the map based by runner name 217 | for (const result of processedResults) { 218 | if (options?.filter && !options.filter(result)) { 219 | // skip 220 | continue; 221 | } 222 | 223 | const name = result.metadata.runner; 224 | const existingResults = runnerResultsMap.get(name) ?? []; 225 | 226 | const rc = { 227 | ...result 228 | }; 229 | rc.metadata.date = new Date(date).toISOString(); 230 | 231 | existingResults.push(result); 232 | runnerResultsMap.set(name, existingResults); 233 | } 234 | } 235 | 236 | const allData: unknown[] = []; 237 | 238 | // organize by date in ascending order 239 | for (let [name, results] of runnerResultsMap) { 240 | results.sort((a, b) => { 241 | return new Date(a.metadata.date).getTime() - new Date(b.metadata.date).getTime(); 242 | }); 243 | 244 | const runners = results.map(r => r.metadata.runner); 245 | const data = results.map(r => dataFunc(r.data as T, r.metadata)).sort(); 246 | 247 | allData.push({ 248 | type: options?.chartType ? options.chartType : 'scatter', 249 | x: runners, 250 | y: data, 251 | name 252 | }); 253 | } 254 | 255 | const layout = { 256 | title: chartName, 257 | showlegend: true, 258 | width: 1000, 259 | height: 1000 260 | }; 261 | 262 | const html = ` 263 | 264 | 265 | 266 | ${chartName} 267 | 268 | 269 | 270 |
271 | 276 | 277 | 278 | `; 279 | 280 | writeFileSync(dest, html); 281 | console.log(`Historical report written to: ${dest}`); 282 | 283 | } -------------------------------------------------------------------------------- /packages/langium-ai-tools/src/analyzer/document-analyzer.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | * 6 | * @author Dennis Hübner 7 | ******************************************************************************/ 8 | 9 | import { CstUtils, Grammar, GrammarAST, LangiumDocument, isLeafCstNode } from "langium"; 10 | import { resolveTransitiveImports, } from 'langium/grammar'; 11 | import { LangiumServices } from "langium/lsp"; 12 | import { EvaluationContext } from "../evaluator/document-evaluator.js"; 13 | import { EvaluatorResult } from "../evaluator/evaluator.js"; 14 | import { LangiumEvaluator, LangiumEvaluatorResultData } from "../evaluator/langium-evaluator.js"; 15 | 16 | /** 17 | * Extends LangiumEvaluator and adds analysis capabilities. 18 | */ 19 | export class LangiumDocumentAnalyzer extends LangiumEvaluator { 20 | 21 | public static readonly METADATA_KEY = 'syntax_statistics'; 22 | 23 | private readonly analysisOptions: AnalysisOptions; 24 | 25 | /** 26 | * Creates an instance of LangiumDocumentAnalyzer. 27 | * @param services Langium services 28 | * @param analysisOptions Analysis options 29 | * @example 30 | * ```typescript 31 | * const analyzer = new LangiumDocumentAnalyzer(services, { 32 | * analysisMode: AnalysisMode.ALL, 33 | * excludeRules: ['DeprecatedRule'], 34 | * computeDiversity: false 35 | * }); 36 | * ``` 37 | */ 38 | constructor(services: T, analysisOptions: Partial = {}) { 39 | super(services); 40 | this.analysisOptions = { ...DEFAULT_OPTIONS, ...analysisOptions }; 41 | } 42 | 43 | evaluateDocument(doc: LangiumDocument, ctx: EvaluationContext): EvaluatorResult { 44 | const validationResult = super.evaluateDocument(doc, ctx); 45 | if (this.analysisOptions.analysisMode !== AnalysisMode.NO_STATISTIC && validationResult.data.failures === 0) { 46 | // Add syntax usage statistics only if build was successful 47 | const statistics = this.collectSyntaxUsageStatistics(doc, this.services.Grammar); 48 | validationResult.metadata[LangiumDocumentAnalyzer.METADATA_KEY] = statistics; 49 | } 50 | return validationResult; 51 | } 52 | 53 | collectSyntaxUsageStatistics(doc: LangiumDocument, grammar: Grammar): SyntaxStatistic { 54 | const rootCstNode = doc.parseResult.value.$cstNode; 55 | if (!rootCstNode) { 56 | return this.createEmptySyntaxStatistic(); 57 | } 58 | const { includeImportedRules, excludeRules, computeDiversity, includeHiddenRules } = this.analysisOptions; 59 | const excludedRules = new Set(excludeRules); 60 | const isRuleExcluded = (ruleName: string) => ruleName === 'WS' || excludedRules.has(ruleName); 61 | 62 | const allRules = includeImportedRules ? this.collectAllRules(grammar) : grammar.rules; 63 | const ruleUsage: Record = {}; 64 | // Initialize rule usage map, excluding rules specified in excludeRules. Also skip entry rule. 65 | for (const rule of allRules) { 66 | if (!isRuleExcluded(rule.name)) { 67 | if ( 68 | (GrammarAST.isParserRule(rule) && rule.entry) 69 | || (GrammarAST.isTerminalRule(rule) && rule.hidden && !includeHiddenRules) 70 | ) { 71 | continue; 72 | } 73 | ruleUsage[rule.name] = 0; 74 | } 75 | } 76 | 77 | for (const cstNode of CstUtils.streamCst(rootCstNode)) { 78 | const grammarSource = cstNode.grammarSource; 79 | 80 | const addIfNotExcluded = (ruleName: string) => { 81 | if (!isRuleExcluded(ruleName)) { 82 | ruleUsage[ruleName] = (ruleUsage[ruleName] ?? 0) + 1; 83 | } 84 | }; 85 | 86 | if (grammarSource && GrammarAST.isRuleCall(grammarSource)) { 87 | // For now handle only RuleCalls 88 | addIfNotExcluded(grammarSource.rule.ref?.name ?? 'unknown'); 89 | } else if (includeHiddenRules && cstNode.hidden && isLeafCstNode(cstNode)) { 90 | addIfNotExcluded(cstNode.tokenType.name); 91 | } 92 | } 93 | 94 | let diversity = { entropy: 0, giniCoefficient: 0, simpsonIndex: 0 }; 95 | if (computeDiversity) { 96 | diversity = { 97 | entropy: this.computeEntropy(ruleUsage), 98 | giniCoefficient: this.computeGiniCoefficient(ruleUsage), 99 | simpsonIndex: this.computeSimpsonIndex(ruleUsage) 100 | }; 101 | } 102 | const coverage = this.computeCoverage(ruleUsage); 103 | return { ruleUsage, coverage, diversity }; 104 | } 105 | 106 | /** 107 | * Computes coverage as percentage of used rules over all available rules 108 | */ 109 | computeCoverage(ruleUsage: Record): number { 110 | const usedRules = Object.values(ruleUsage).filter(count => count > 0).length; 111 | return usedRules > 0 ? (usedRules / Object.keys(ruleUsage).length) * 100 : 0; 112 | } 113 | 114 | /** 115 | * Computes Shannon entropy - measure of information diversity 116 | * Higher values indicate more diverse usage patterns 117 | */ 118 | computeEntropy(ruleUsage: Record): number { 119 | const totalUsage = Object.values(ruleUsage).reduce((sum, count) => sum + count, 0); 120 | if (totalUsage === 0) return 0; 121 | 122 | let entropy = 0; 123 | for (const count of Object.values(ruleUsage)) { 124 | if (count > 0) { 125 | const probability = count / totalUsage; 126 | entropy -= probability * Math.log2(probability); 127 | } 128 | } 129 | return entropy; 130 | } 131 | 132 | /** 133 | * Computes Gini coefficient - measure of inequality in rule usage 134 | * 0 = perfect equality, 1 = maximum inequality 135 | */ 136 | computeGiniCoefficient(ruleUsage: Record): number { 137 | const counts = Object.values(ruleUsage).sort((a, b) => a - b); 138 | const n = counts.length; 139 | if (n === 0) return 0; 140 | 141 | const sum = counts.reduce((acc, val) => acc + val, 0); 142 | if (sum === 0) return 0; 143 | 144 | let numerator = 0; 145 | for (let i = 0; i < n; i++) { 146 | numerator += (2 * (i + 1) - n - 1) * counts[i]; 147 | } 148 | return numerator / (n * sum); 149 | } 150 | 151 | /** 152 | * Computes Simpson's diversity index - probability that two randomly selected items are different 153 | * Higher values indicate more diversity 154 | */ 155 | computeSimpsonIndex(ruleUsage: Record): number { 156 | const totalUsage = Object.values(ruleUsage).reduce((sum, count) => sum + count, 0); 157 | if (totalUsage === 0) return 0; 158 | 159 | let sum = 0; 160 | for (const count of Object.values(ruleUsage)) { 161 | const probability = count / totalUsage; 162 | sum += probability * probability; 163 | } 164 | 165 | return 1 - sum; // Simpson's diversity index (1-D) 166 | } 167 | 168 | /** 169 | * Extracts syntax statistics from the evaluation result. 170 | * @param result The evaluation result. 171 | * @returns The extracted syntax statistics or undefined if not found. 172 | */ 173 | extractStatisticsFromResult(result: Partial | undefined): SyntaxStatistic | undefined { 174 | const metadata = result?.metadata; 175 | if (metadata && metadata[LangiumDocumentAnalyzer.METADATA_KEY]) { 176 | return metadata[LangiumDocumentAnalyzer.METADATA_KEY] as SyntaxStatistic; 177 | } 178 | return undefined; 179 | } 180 | 181 | protected collectAllRules(grammar: Grammar): GrammarAST.AbstractRule[] { 182 | try { 183 | return grammar.rules.concat( 184 | resolveTransitiveImports(this.services.shared.workspace.LangiumDocuments, grammar).map(g => g.rules).flat() 185 | ); 186 | } catch (e) { 187 | console.error('Error resolving imports: ', e); 188 | return []; 189 | } 190 | } 191 | 192 | protected createEmptySyntaxStatistic(): SyntaxStatistic { 193 | return { 194 | ruleUsage: {}, 195 | coverage: 0, 196 | diversity: { 197 | entropy: 0, 198 | giniCoefficient: 0, 199 | simpsonIndex: 0 200 | } 201 | }; 202 | } 203 | } 204 | 205 | /** 206 | * Analysis mode for controlling what analysis operations to perform 207 | */ 208 | export enum AnalysisMode { 209 | ALL = 'ALL', 210 | NO_STATISTIC = 'NO_STATISTIC' 211 | } 212 | 213 | interface AnalysisOptions { 214 | analysisMode: AnalysisMode; 215 | /** 216 | * Filter for specific rules (e.g deprecated) to exclude in the analysis. 217 | * Rule WS (whitespace) is always excluded. 218 | */ 219 | excludeRules: string[]; 220 | /** 221 | * Whether to include rules from imported grammars. Default is true. 222 | */ 223 | includeImportedRules: boolean; 224 | /** 225 | * Whether to include hidden tokens (like comments, whitespace) in the analysis. Default is false. 226 | * Rule WS (whitespace) is always excluded. 227 | */ 228 | includeHiddenRules: boolean; 229 | /** 230 | * Whether to compute diversity metrics for rule usage. Default is true. 231 | */ 232 | computeDiversity: boolean; 233 | } 234 | 235 | const DEFAULT_OPTIONS: AnalysisOptions = { 236 | analysisMode: AnalysisMode.ALL, 237 | excludeRules: [], 238 | includeImportedRules: true, 239 | includeHiddenRules: true, 240 | computeDiversity: true 241 | }; 242 | 243 | /** 244 | * Type representing syntax usage statistics. 245 | */ 246 | export type SyntaxStatistic = { 247 | /** Map of rule names to their usage counts */ 248 | ruleUsage: Record; 249 | 250 | /** Percentage of used rules compared to all available rules */ 251 | coverage: number; 252 | 253 | /** Diversity metrics for rule usage patterns */ 254 | diversity: { 255 | 256 | /** 257 | * Shannon entropy - information diversity measure. 258 | * **Range:** 0 to log₂(n) where n = number of rules. 259 | * - **Low (0-1):** dominated by few rules 260 | * - **Medium (1-3):** moderate diversity 261 | * - **High (>3):** high diversity 262 | */ 263 | entropy: number; 264 | 265 | /** 266 | * Gini coefficient - inequality measure. Range: 0 to 1. 267 | * - **Low (0-0.3):** equal distribution 268 | * - **Medium (0.3-0.7):** moderate inequality 269 | * - **High (0.7-1):** high inequality 270 | */ 271 | giniCoefficient: number; 272 | 273 | /** 274 | * Simpson's diversity index - probability that two randomly selected items are different. **Range:** 0 to 1. 275 | * - **Low (0-0.3):** low diversity 276 | * - **Medium (0.3-0.7):** moderate diversity 277 | * - **High (0.7-1):** high diversity 278 | */ 279 | simpsonIndex: number; 280 | }; 281 | } -------------------------------------------------------------------------------- /packages/examples/example-dsl-evaluator/src/langium-cases.ts: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright 2024 - 2025 TypeFox GmbH 3 | * This program and the accompanying materials are made available under the 4 | * terms of the MIT License, which is available in the project root. 5 | ******************************************************************************/ 6 | 7 | import { Case, Message } from "langium-ai-tools"; 8 | 9 | /** 10 | * Common history for all cases here 11 | */ 12 | const history: Message[] = [ 13 | { 14 | role: 'system', 15 | content: `You are an expert in Langium grammars. 16 | Your role is to provide accurate, detailed, and insightful explanations and advice on Langium grammar-related topics. 17 | You will answer questions, provide examples, and offer guidance on best practices, ensuring clarity and precision in all responses. 18 | For reference, here is a simple Langium grammar: 19 | 20 | \`\`\`langium 21 | // starts a grammar w/ a name 22 | grammar HelloWorld 23 | 24 | // entry parser rule 25 | entry Model: 26 | // parses a list of zero or more persons & greetings in any order 27 | (persons+=Person | greetings+=Greeting)*; 28 | 29 | // person parser rule 30 | Person: 31 | // parses a person keyword followed by an ID terminal (the person's name) 32 | 'person' name=ID; 33 | 34 | // greeting parser rule 35 | Greeting: 36 | // parses Hello followed by a person reference and an exclamation mark at the end 37 | 'Hello' person=[Person:ID] '!'; 38 | 39 | // whitespace is a hidden terminal, i.e. ignored and not part of the AST 40 | hidden terminal WS: /\s+/; 41 | // ID is a terminal rule that matches this regex 42 | terminal ID: /[_a-zA-Z][\w_]*/; 43 | 44 | // multi-line & single-line comments are also hidden terminals 45 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 46 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 47 | \`\`\` 48 | 49 | Use this information to help answer questions about Langium grammars. 50 | ` } 51 | ]; 52 | 53 | export const cases: Case[] = [ 54 | { 55 | name: "Hello World Grammar", 56 | context: [], 57 | history, 58 | prompt: "Generate a simple HelloWorld grammar in Langium.", 59 | expected_response: `Certainly. Here's an example of a possible HelloWorld grammar written in the Langium grammar language: 60 | 61 | \`\`\`langium 62 | grammar HelloWorld 63 | 64 | entry Model: 65 | (persons+=Person | greetings+=Greeting)*; 66 | 67 | Person: 68 | 'person' name=ID; 69 | 70 | Greeting: 71 | 'Hello' person=[Person:ID] '!'; 72 | 73 | hidden terminal WS: /\s+/; 74 | terminal ID: /[_a-zA-Z][\w_]*/; 75 | 76 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 77 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 78 | \`\`\` 79 | 80 | This grammar defines a simple HelloWorld language with two parser rules: Person and Greeting. The entry rule Model allows for an arbitrary sequence of Person and Greeting elements. A Person is defined by the keyword 'person' followed by an ID terminal, which represents the person's name. A Greeting consists of the word 'Hello' followed by a reference to a Person and an exclamation mark. The grammar also includes terminal rules for whitespace, identifiers, and comments.` 81 | }, 82 | { 83 | name: "StateMachine Grammar", 84 | context: [], 85 | history, 86 | prompt: "Create a simple Statemachine grammar in Langium.", 87 | expected_response: `Sure. Here's an example of a basic calculator grammar written in the Langium grammar language: 88 | 89 | \`\`\`langium 90 | grammar Statemachine 91 | 92 | entry Statemachine: 93 | 'statemachine' name=ID 94 | ('events' events+=Event+)? 95 | ('commands' commands+=Command+)? 96 | 'initialState' init=[State] 97 | states+=State*; 98 | 99 | Event: 100 | name=ID; 101 | 102 | Command: 103 | name=ID; 104 | 105 | State: 106 | 'state' name=ID 107 | ('actions' '{' actions+=[Command]+ '}')? 108 | transitions+=Transition* 109 | 'end'; 110 | 111 | Transition: 112 | event=[Event] '=>' state=[State]; 113 | 114 | hidden terminal WS: /\s+/; 115 | terminal ID: /[_a-zA-Z][\w_]*/; 116 | 117 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 118 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 119 | 120 | \`\`\` 121 | 122 | This grammar defines a simple Statemachine language with events, commands, states, and an initial state. The Statemachine rule starts with the keyword 'statemachine' followed by an ID representing the name of the statemachine. It can include lists of events and commands, an initial state, and a sequence of states. Each event and command is defined by an ID. A state consists of a name, optional actions, and a list of zero or more transitions, followed by 'end'. 123 | ` 124 | }, 125 | { 126 | name: 'Domain Model', 127 | context: [], 128 | history, 129 | prompt: 'Create a simple Domain Model grammar in Langium.', 130 | expected_response: `Here's an example of a Domain Model grammar written in the Langium grammar language: 131 | 132 | \`\`\`langium 133 | grammar DomainModel 134 | 135 | entry Domainmodel: 136 | (elements+=AbstractElement)*; 137 | 138 | AbstractElement: 139 | PackageDeclaration | Type; 140 | 141 | PackageDeclaration: 142 | 'package' name=QualifiedName '{' 143 | (elements+=AbstractElement)* 144 | '}'; 145 | 146 | Type: 147 | DataType | Entity; 148 | 149 | DataType: 150 | 'datatype' name=ID; 151 | 152 | Entity: 153 | 'entity' name=ID ('extends' superType=[Entity:QualifiedName])? '{' 154 | (features+=Feature)* 155 | '}'; 156 | 157 | Feature: 158 | (many?='many')? name=ID ':' type=[Type:QualifiedName]; 159 | 160 | QualifiedName returns string: 161 | ID ('.' ID)*; 162 | 163 | hidden terminal WS: /\s+/; 164 | terminal ID: /[_a-zA-Z][\w_]*/; 165 | 166 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 167 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 168 | \`\`\` 169 | 170 | This grammar defines a Domain Model language with elements, package declarations, types, data types, entities, and features. 171 | The entry rule Domainmodel allows for an arbitrary sequence of AbstractElement elements. 172 | An AbstractElement can be a PackageDeclaration or a Type. 173 | A PackageDeclaration starts with the keyword 'package' followed by a qualified name and a block of zero or more elements. 174 | A Type can be a DataType or an Entity. 175 | A DataType is defined by the keyword 'datatype' followed by an ID terminal. 176 | An Entity is defined by the keyword 'entity' followed by an ID terminal, an optional super type, and a block of zero or more features. 177 | A Feature can be a single or multiple association with a type. 178 | ` 179 | }, 180 | { 181 | name: 'Arithmetics', 182 | context: [], 183 | history, 184 | prompt: 'Create a simple Arithmetics grammar in Langium.', 185 | expected_response: `Here's an example of an Arithmetics grammar written in the Langium grammar language: 186 | 187 | \`\`\`langium 188 | grammar Arithmetics 189 | 190 | entry Module: 191 | 'module' name=ID 192 | (statements+=Statement)*; 193 | 194 | Statement: 195 | Definition | Evaluation; 196 | 197 | Definition: 198 | 'def' name=ID ('(' args+=DeclaredParameter (',' args+=DeclaredParameter)* ')')? 199 | ':' expr=Expression ';'; 200 | 201 | DeclaredParameter: 202 | name=ID; 203 | 204 | type AbstractDefinition = Definition | DeclaredParameter; 205 | 206 | Evaluation: 207 | expression=Expression ';'; 208 | 209 | Expression: 210 | Addition; 211 | 212 | Addition infers Expression: 213 | Multiplication ({infer BinaryExpression.left=current} operator=('+' | '-') right=Multiplication)*; 214 | 215 | Multiplication infers Expression: 216 | Exponentiation ({infer BinaryExpression.left=current} operator=('*' | '/') right=Exponentiation)*; 217 | 218 | Exponentiation infers Expression: 219 | Modulo ({infer BinaryExpression.left=current} operator='^' right=Modulo)*; 220 | 221 | Modulo infers Expression: 222 | PrimaryExpression ({infer BinaryExpression.left=current} operator='%' right=PrimaryExpression)*; 223 | 224 | PrimaryExpression infers Expression: 225 | '(' Expression ')' | 226 | {infer NumberLiteral} value=NUMBER | 227 | {infer FunctionCall} func=[AbstractDefinition] ('(' args+=Expression (',' args+=Expression)* ')')?; 228 | 229 | hidden terminal WS: /\s+/; 230 | terminal ID: /[_a-zA-Z][\w_]*/; 231 | terminal NUMBER returns number: /[0-9]+(\.[0-9]*)?/; 232 | 233 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 234 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 235 | \`\`\` 236 | 237 | This grammar defines an Arithmetics language with expressions & definitions (functions). It effectively allows for regular arithmetic operations, including addition, subtraction, multiplication, division, exponentiation, and modulo operations.` 238 | }, 239 | { 240 | name: 'MiniLogo', 241 | context: [], 242 | history, 243 | prompt: `Create a simple MiniLogo grammar in Langium, using the MiniLogo language (very akin to Turtle in Python and the original Logo language). 244 | It should support statements & definitions. The statements should be commands & macros, while the definitions should be named functions. 245 | The only commands are pen, move, color, and for (a very simple loop mechanic like \`for x = 1 to 10 { ... }\`)`, 246 | expected_response: `Here's an example of a MiniLogo grammar written in the Langium grammar language. 247 | This grammar supports statements and definitions for the MiniLogo language, which is similar to Turtle graphics. 248 | The statements include commands like pen, move, color, and for, as well as macros for defining functions. 249 | Here's the grammar: 250 | 251 | \`\`\`langium 252 | grammar MiniLogo 253 | 254 | entry Model: (stmts+=Stmt | defs+=Def)*; 255 | 256 | // Statements 257 | Stmt: Cmd | Macro; 258 | Cmd: Pen | Move | Color | For; 259 | Macro: def=[Def:ID] '(' (args+=Expr (',' args+=Expr)*)? ')'; 260 | 261 | // Definitions 262 | Def: 'def' name=ID '(' (params+=Param (',' params+=Param)*)? ')' Block; 263 | 264 | // Commands 265 | Pen: 'pen' '(' mode=('up' | 'down') ')'; 266 | Move: 'move' '(' ex=Expr ',' ey=Expr ')'; 267 | Color: 'color' '(' ((r = Expr ',' g=Expr ',' b=Expr) | color=ID | color=HEX) ')'; 268 | For: 'for' var=Param '=' e1=Expr 'to' e2=Expr Block; 269 | 270 | 271 | fragment Block: '{' body+=Stmt* '}'; 272 | 273 | Param: name=ID; 274 | 275 | 276 | // Expressions 277 | Expr: Add; 278 | 279 | Add infers Expr: 280 | Mult ({infer BinExpr.e1=current} op=('+'|'-') e2=Mult)*; 281 | Mult infers Expr: 282 | PrimExpr ({infer BinExpr.e1=current} op=('*'|'/') e2=PrimExpr)*; 283 | 284 | PrimExpr: Lit | Ref | Group | NegExpr; 285 | 286 | Lit: val=NUMBER; 287 | Ref: val=[Param:ID]; 288 | Group: '(' ge=Expr ')'; 289 | NegExpr: '-' ne=Expr; 290 | 291 | // Terminals 292 | terminal HEX returns string: /#(\d|[a-fA-F]){3,6}/; 293 | terminal ID returns string: /[_a-zA-Z][\w_]*/; 294 | terminal NUMBER returns number: /(?:(?:-?[0-9]+)?\.[0-9]+)|-?[0-9]+/; 295 | hidden terminal WS: /\s+/; 296 | hidden terminal ML_COMMENT: /\/\*[\s\S]*?\*\//; 297 | hidden terminal SL_COMMENT: /\/\/[^\n\r]*/; 298 | \`\`\` 299 | 300 | This grammar defines a MiniLogo language with statements and definitions. The Model rule allows for an arbitrary sequence of statements and definitions. A statement can be a command or a macro, while a definition is a named function. The commands include pen, move, color, and for, which is a simple loop mechanic. The expressions support arithmetic operations like addition, subtraction, multiplication, and division. 301 | You can use this grammar to create MiniLogo programs for drawing shapes and patterns. 302 | ` 303 | } 304 | ]; --------------------------------------------------------------------------------