├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── README.md
├── crabwalk-web
├── .gitignore
├── DEBUG.md
├── README.md
├── bin
│ └── crabwalk-web.js
├── eslint.config.js
├── index.html
├── package-lock.json
├── package.json
├── perspective.d.ts
├── postcss.config.js
├── public
│ ├── perspective-init.html
│ ├── vite.svg
│ ├── wasm-worker.js
│ └── wasm
│ │ ├── perspective-client.wasm
│ │ ├── perspective-js.wasm
│ │ ├── perspective-server.wasm
│ │ ├── perspective-server.worker.js
│ │ ├── perspective-view.wasm
│ │ ├── perspective-viewer.wasm
│ │ ├── perspective.esm.js
│ │ ├── perspective.js
│ │ └── perspective.wasm
├── run-react-app.js
├── scripts
│ ├── debug_mermaid.sh
│ ├── run-with-db.sh
│ └── setup-wasm.js
├── serve-perspective-test.js
├── src
│ ├── App.tsx
│ ├── assets
│ │ └── react.svg
│ ├── components
│ │ ├── DatabaseExplorer.tsx
│ │ ├── MermaidDiagram.tsx
│ │ ├── SqlQueryPanel.tsx
│ │ ├── SqlViewer.tsx
│ │ ├── TableViewer.css
│ │ └── TableViewer.tsx
│ ├── global.d.ts
│ ├── index.css
│ ├── main.tsx
│ ├── perspective.d.ts
│ ├── server
│ │ ├── api.ts
│ │ └── index.ts
│ ├── test
│ │ ├── MermaidTest.tsx
│ │ ├── PerspectiveTest.tsx
│ │ ├── perspective-cdn-script-tags.html
│ │ ├── perspective-cdn.html
│ │ ├── perspective-direct.html
│ │ ├── perspective-simple.html
│ │ ├── perspective-test-fixed.html
│ │ ├── perspective-test-page.html
│ │ ├── perspective-test.html
│ │ └── test.html
│ ├── types.ts
│ ├── types
│ │ └── perspective.d.ts
│ ├── utils
│ │ ├── chroma-shim.js
│ │ ├── duckdb.ts
│ │ ├── lineageProcessor.ts
│ │ ├── projectLoader.ts
│ │ ├── schemaParser.ts
│ │ └── sqliteFallback.ts
│ └── vite-env.d.ts
├── tsconfig.app.json
├── tsconfig.json
├── tsconfig.node.json
├── tsconfig.server.json
└── vite.config.ts
├── crabwalk_schema.html
├── database_schema.xml
├── examples
├── jaffle_shop
│ ├── README.md
│ ├── config.json
│ ├── database_schema.xml
│ ├── lineage.mmd
│ ├── lineage
│ │ └── lineage.mmd
│ ├── marts
│ │ ├── customers.sql
│ │ ├── locations.sql
│ │ ├── order_items.sql
│ │ ├── orders.sql
│ │ ├── products.sql
│ │ └── supplies.sql
│ ├── run-jaffle
│ ├── seeds
│ │ ├── raw_customers.sql
│ │ ├── raw_orders.sql
│ │ └── raw_payments.sql
│ ├── sources
│ │ ├── lineage.mmd
│ │ ├── raw_customers.csv
│ │ ├── raw_customers.sql
│ │ ├── raw_customers.sql.bak
│ │ ├── raw_items.csv
│ │ ├── raw_items.sql
│ │ ├── raw_items.sql.bak
│ │ ├── raw_orders.csv
│ │ ├── raw_orders.sql
│ │ ├── raw_orders.sql.bak
│ │ ├── raw_products.csv
│ │ ├── raw_products.sql
│ │ ├── raw_products.sql.bak
│ │ ├── raw_stores.csv
│ │ ├── raw_stores.sql
│ │ ├── raw_stores.sql.bak
│ │ ├── raw_supplies.csv
│ │ ├── raw_supplies.sql
│ │ └── raw_supplies.sql.bak
│ └── staging
│ │ ├── lineage.mmd
│ │ ├── stg_customers.sql
│ │ ├── stg_locations.sql
│ │ ├── stg_order_items.sql
│ │ ├── stg_orders.sql
│ │ ├── stg_products.sql
│ │ └── stg_supplies.sql
├── race_data
│ ├── database_schema.xml
│ ├── driver_fact.sql
│ ├── lineage.mmd
│ ├── race_summary.sql
│ ├── races.sql
│ └── sample_parquet.sql
├── run_ordered.sql
└── simple
│ ├── database_schema.xml
│ ├── lineage.mmd
│ ├── lineage
│ └── lineage.mmd
│ ├── marts
│ ├── customer_orders.sql
│ └── order_summary.sql
│ ├── output
│ └── .gitkeep
│ └── staging
│ ├── lineage.mmd
│ ├── stg_customers.sql
│ └── stg_orders.sql
├── output
└── .gitkeep
├── run-simple-example
├── run_jaffle_shop.sh
├── src
├── bin
│ └── ast_test.rs
├── cli
│ └── mod.rs
├── config
│ ├── mod.rs
│ └── output.rs
├── executor
│ ├── mod.rs
│ └── output.rs
├── lib.rs
├── main.rs
├── parser
│ ├── ast_test.rs
│ ├── config.rs
│ ├── dependencies.rs
│ ├── lineage.rs
│ ├── mod.rs
│ └── sql.rs
├── schema
│ ├── mod.rs
│ └── visualization.rs
└── storage
│ └── mod.rs
├── test_extract.rs
├── test_query.sql
├── test_sql.sql
├── tests
├── config_test.rs
├── jaffle_shop_lineage_test.rs
├── parser_dependencies_test.rs
├── parser_lineage_test.rs
├── parser_sql_test.rs
└── race_data_lineage_test.rs
└── transform
└── lineage.mmd
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | target/
3 | *.db
4 | *.parquet
5 | duckdb_ast_debug.json
6 |
7 | # Keep directory structure but ignore contents
8 | /output/*
9 | !/output/.gitkeep
10 |
11 | !examples/simple/output/
12 | examples/simple/output/*
13 | !examples/simple/output/.gitkeep
14 |
15 | # Ignore tmp directories
16 | **/tmp/
17 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "crabwalk"
3 | version = "0.1.0"
4 | edition = "2021"
5 | description = "A SQL transformation orchestrator written in Rust"
6 | authors = ["Crabwalk Contributors"]
7 | license = "MIT"
8 | default-run = "crabwalk"
9 |
10 | [dependencies]
11 | # Command line argument parsing
12 | clap = { version = "4.4", features = ["derive"] }
13 | # DuckDB integration
14 | duckdb = { version = "1.2.0", features = ["bundled"] }
15 | # SQL parsing and manipulation
16 | sqlparser = "0.49.0"
17 | # File system operations
18 | walkdir = "2.4"
19 | # Error handling
20 | anyhow = "1.0"
21 | thiserror = "1.0"
22 | # Serialization/deserialization
23 | serde = { version = "1.0", features = ["derive"] }
24 | serde_yaml = "0.9"
25 | serde_json = "1.0"
26 | base64 = "0.21"
27 | # Logging
28 | tracing = "0.1"
29 | tracing-subscriber = { version = "0.3", features = ["env-filter"] }
30 | # Async runtime
31 | tokio = { version = "1.32", features = ["full"] }
32 | # Regular expressions
33 | regex = "1.9"
34 | # Path handling
35 | pathdiff = "0.2"
36 | # Graph algorithms
37 | petgraph = "0.6"
38 | # Terminal UI
39 | crossterm = "0.27"
40 | console = "0.15"
41 | # Temporary files
42 | tempfile = "3.10"
43 | # AWS S3 integration (optional)
44 | rusoto_core = { version = "0.48", optional = true }
45 | rusoto_s3 = { version = "0.48", optional = true }
46 | # System bindings for handling error output
47 | libc = "0.2"
48 | # Compression for Mermaid diagrams
49 | flate2 = "1.0"
50 |
51 | [features]
52 | default = []
53 | s3 = ["rusoto_core", "rusoto_s3"]
54 |
--------------------------------------------------------------------------------
/crabwalk-web/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 |
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 |
--------------------------------------------------------------------------------
/crabwalk-web/DEBUG.md:
--------------------------------------------------------------------------------
1 | # Debugging Mermaid Diagrams
2 |
3 | This guide helps you diagnose and fix issues with Mermaid diagram rendering in the Crabwalk web visualizer.
4 |
5 | ## Common Error: "Cannot read properties of null (reading 'firstChild')"
6 |
7 | This error typically occurs when:
8 | 1. The Mermaid library cannot parse the diagram content
9 | 2. The DOM element for rendering isn't properly set up
10 | 3. There's a race condition in the rendering process
11 |
12 | ## How to Debug
13 |
14 | ### 1. Use the Test Page
15 |
16 | We've created a standalone test page to isolate and debug Mermaid rendering:
17 |
18 | ```bash
19 | # Run the Mermaid test page
20 | cd /Users/mritchie712/blackbird/yato-main/crabwalk/crabwalk-web
21 | ./scripts/debug_mermaid.sh
22 | ```
23 |
24 | This will open a browser with a test page that:
25 | - Shows multiple test cases for Mermaid diagrams
26 | - Displays detailed error messages
27 | - Allows you to test both valid and invalid content
28 |
29 | ### 2. Check Your Diagram Content
30 |
31 | If you're seeing errors with a specific diagram:
32 |
33 | 1. Copy the problematic diagram content
34 | 2. Start the test page (as shown above)
35 | 3. Add a new test case with your content
36 | 4. Look for syntax errors in the Mermaid content
37 |
38 | ### 3. Fix Options
39 |
40 | The most reliable way to fix Mermaid rendering issues is to:
41 |
42 | 1. Import Mermaid directly rather than dynamically loading it
43 | 2. Use the render method with a unique ID
44 | 3. Directly use the returned SVG content
45 | 4. Add robust error handling
46 |
47 | ## Current Implementation
48 |
49 | The current implementation in `src/components/MermaidDiagram.tsx` has been updated to:
50 |
51 | 1. Use a proper render loop with state management
52 | 2. Properly handle errors and display them
53 | 3. Use unique IDs for each rendering
54 | 4. Show a loading state during processing
55 |
56 | ## Testing Your Own Diagrams
57 |
58 | To test your specific diagrams:
59 |
60 | 1. Edit `src/test/MermaidTest.tsx`
61 | 2. Add your diagram content to the `samples` array
62 | 3. Run the test script
63 | 4. Check the output and error messages
64 |
65 | ## Getting Additional Help
66 |
67 | If you continue to have issues:
68 |
69 | 1. Check Mermaid's official syntax guide: https://mermaid.js.org/intro/
70 | 2. Look at Mermaid's live editor: https://mermaid.live/
71 | 3. Try simplifying your diagram to identify problem areas
72 |
73 | ## Known Limitations
74 |
75 | - Very complex diagrams might be slow to render
76 | - Some advanced features may not be supported
77 | - Auto-generated connections work best with standard naming conventions
--------------------------------------------------------------------------------
/crabwalk-web/README.md:
--------------------------------------------------------------------------------
1 | # Crabwalk Web
2 |
3 | A web interface for the Crabwalk SQL transformation orchestrator.
4 |
5 | ## Getting Started
6 |
7 | ```bash
8 | # Install dependencies
9 | npm install
10 |
11 | # Start development server
12 | npm run dev
13 |
14 | # Build for production
15 | npm run build
16 |
17 | # Start production server
18 | npm run start
19 | ```
20 |
21 | ## Build and Run After Making Changes
22 |
23 | When you make changes to the codebase, follow these steps to build and run the application:
24 |
25 | ```bash
26 | # Compile TypeScript and build the application
27 | npm run build
28 |
29 | # Start the server with the updated build
30 | npm run server
31 |
32 | # Or, build and start in one command
33 | npm run start
34 | ```
35 |
36 | The build process will:
37 | 1. Compile TypeScript (`tsc -b`)
38 | 2. Build the frontend with Vite (`vite build`)
39 | 3. Compile server TypeScript (`tsc -p tsconfig.server.json`)
40 |
41 | After building, the application will be available at http://localhost:3000 (or the configured port).
42 |
43 | ## Relationship with Cargo/Rust App
44 |
45 | This web interface is a companion to the main Crabwalk CLI tool, which is built with Rust/Cargo and located in the parent directory. To build and use both components:
46 |
47 | ### Building the Rust CLI
48 |
49 | Navigate to the parent directory and build the Rust application:
50 |
51 | ```bash
52 | # From the crabwalk-web directory
53 | cd ..
54 |
55 | # Build the Rust CLI
56 | cargo build --release
57 |
58 | # Run examples with the Rust CLI
59 | cargo run
60 | ```
61 |
62 | ### Using Both Together
63 |
64 | The web application can visualize projects created by the Rust CLI. A typical workflow:
65 |
66 | 1. Use the Rust CLI to process SQL files and generate schema/lineage information:
67 | ```bash
68 | cargo run -- run ./path/to/sql/files
69 | ```
70 |
71 | 2. Run the web application to visualize the output:
72 | ```bash
73 | npm run start
74 | ```
75 |
76 | 3. Or use the CLI command to launch the web interface directly:
77 | ```bash
78 | cargo run -- app --open
79 | ```
80 |
81 | ## Troubleshooting
82 |
83 | ### Perspective WebAssembly Setup
84 |
85 | The application uses Perspective.js for data visualization, which requires WebAssembly files. We've implemented a robust solution to ensure all WebAssembly files are correctly loaded:
86 |
87 | 1. **WebAssembly File Management**:
88 | - A script (`scripts/setup-wasm.js`) copies necessary WebAssembly files from node_modules to the `public/wasm` directory
89 | - The script also creates aliases for the WebAssembly files with alternative names that Perspective might look for
90 | - This includes specific handling for `perspective-client.wasm` which is required but not directly provided
91 |
92 | 2. **Path Configuration**:
93 | - We inject WebAssembly paths into the window object in the HTML files
94 | - This ensures Perspective can find the WebAssembly files even when using different naming conventions
95 | - We use `window.PERSPECTIVE_ASSETS` to specify exact paths for each WebAssembly file
96 |
97 | 3. **Testing Perspective**:
98 | - A dedicated test component (`/src/test/PerspectiveTest.tsx`) verifies WebAssembly loading
99 | - Run `npm run test:perspective` to check if Perspective is working correctly
100 | - This helps diagnose WebAssembly loading issues independently of the main application
101 |
102 | If you encounter errors like "Missing perspective-client.wasm":
103 |
104 | 1. Check that all WebAssembly files and aliases were created:
105 | ```bash
106 | npm run setup-wasm
107 | ls -la public/wasm
108 | ```
109 |
110 | 2. Make sure your server has the correct CORS headers:
111 | ```
112 | Cross-Origin-Opener-Policy: same-origin
113 | Cross-Origin-Embedder-Policy: require-corp
114 | ```
115 |
116 | 3. Try clearing browser cache and storage:
117 | - Clear browser cache
118 | - Clear IndexedDB and WebAssembly storage
119 | - Restart your browser
120 |
121 | 4. Check for console errors about disallowed WebAssembly features:
122 | - Some browsers restrict WebAssembly features
123 | - Ensure SharedArrayBuffer is available and allowed
124 |
125 | ### DuckDB WebAssembly Implementation
126 |
127 | The application uses DuckDB-wasm to provide SQL database capabilities directly in the browser. Here's how it works:
128 |
129 | 1. **WebAssembly Loading**: DuckDB is compiled to WebAssembly, which runs in the browser with near-native performance.
130 |
131 | 2. **Web Worker**: DuckDB runs in a dedicated Web Worker thread to avoid freezing the UI during intensive operations.
132 |
133 | 3. **Blob URL Creation**: We use a Blob URL to create the worker, which resolves cross-origin issues and provides better compatibility across browsers.
134 |
135 | 4. **Memory Database**: By default, an in-memory database is created, and you can load external database files.
136 |
137 | If you encounter any issues:
138 |
139 | 1. **Clear Browser Cache**: Clear your browser cache and reload the application.
140 |
141 | 2. **Use a Modern Browser**: Ensure you're using a recent version of Chrome, Firefox, Edge, or Safari.
142 |
143 | 3. **Check Console Logs**: Open your browser developer tools (F12) to check for error messages.
144 |
145 | 4. **WebAssembly Support**: Your browser must support WebAssembly. All modern browsers support this feature.
146 |
147 | 5. **Cross-Origin Issues**: When running locally, use a proper web server (like the Vite dev server) rather than opening the HTML file directly.
148 |
149 | ### Using Example Files
150 |
151 | Example database files are available in the `examples` directory of the Crabwalk project. Try loading these files first to ensure the application is working correctly.
--------------------------------------------------------------------------------
/crabwalk-web/bin/crabwalk-web.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | // CLI entry point for crabwalk-web
4 | // This allows users to run 'crabwalk-web' from any directory
5 | // to visualize their Crabwalk project
6 |
7 | import { spawn } from 'child_process';
8 | import path from 'path';
9 | import { fileURLToPath } from 'url';
10 | import fs from 'fs';
11 |
12 | const __filename = fileURLToPath(import.meta.url);
13 | const __dirname = path.dirname(__filename);
14 | const rootDir = path.resolve(__dirname, '..');
15 |
16 | console.log('🦀 Starting Crabwalk Web Visualizer...');
17 | console.log('Scanning for project files in current directory...');
18 |
19 | // Build the app if dist directory doesn't exist
20 | if (!fs.existsSync(path.join(rootDir, 'dist'))) {
21 | console.log('Building application (one-time process)...');
22 |
23 | const buildProcess = spawn('npm', ['run', 'build'], {
24 | cwd: rootDir,
25 | stdio: 'inherit',
26 | });
27 |
28 | buildProcess.on('close', (code) => {
29 | if (code !== 0) {
30 | console.error('Error building application. Exiting.');
31 | process.exit(1);
32 | }
33 |
34 | startServer();
35 | });
36 | } else {
37 | startServer();
38 | }
39 |
40 | function startServer() {
41 | console.log('Starting server...');
42 |
43 | // For production use, we should directly run the JS file in dist folder
44 | const serverProcess = spawn('node', ['dist/server/index.js'], {
45 | cwd: rootDir,
46 | stdio: 'inherit',
47 | });
48 |
49 | // Handle process termination
50 | process.on('SIGINT', () => {
51 | serverProcess.kill('SIGINT');
52 | process.exit(0);
53 | });
54 |
55 | process.on('SIGTERM', () => {
56 | serverProcess.kill('SIGTERM');
57 | process.exit(0);
58 | });
59 |
60 | serverProcess.on('close', (code) => {
61 | console.log(`Server process exited with code ${code}`);
62 | process.exit(code || 0);
63 | });
64 | }
--------------------------------------------------------------------------------
/crabwalk-web/eslint.config.js:
--------------------------------------------------------------------------------
1 | import js from '@eslint/js'
2 | import globals from 'globals'
3 | import reactHooks from 'eslint-plugin-react-hooks'
4 | import reactRefresh from 'eslint-plugin-react-refresh'
5 | import tseslint from 'typescript-eslint'
6 |
7 | export default tseslint.config(
8 | { ignores: ['dist'] },
9 | {
10 | extends: [js.configs.recommended, ...tseslint.configs.recommended],
11 | files: ['**/*.{ts,tsx}'],
12 | languageOptions: {
13 | ecmaVersion: 2020,
14 | globals: globals.browser,
15 | },
16 | plugins: {
17 | 'react-hooks': reactHooks,
18 | 'react-refresh': reactRefresh,
19 | },
20 | rules: {
21 | ...reactHooks.configs.recommended.rules,
22 | 'react-refresh/only-export-components': [
23 | 'warn',
24 | { allowConstantExport: true },
25 | ],
26 | },
27 | },
28 | )
29 |
--------------------------------------------------------------------------------
/crabwalk-web/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | Crabwalk Web Visualizer
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
34 |
35 |
36 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/crabwalk-web/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "crabwalk-web",
3 | "private": true,
4 | "version": "0.1.0",
5 | "type": "module",
6 | "bin": {
7 | "crabwalk-web": "./bin/crabwalk-web.js"
8 | },
9 | "scripts": {
10 | "dev": "vite",
11 | "build": "tsc -b && vite build && tsc -p tsconfig.server.json",
12 | "lint": "eslint .",
13 | "preview": "vite preview",
14 | "server": "node dist/server/index.js",
15 | "start": "npm run build && npm run server",
16 | "test:mermaid": "vite --open src/test/test.html",
17 | "test:perspective": "vite --open src/test/perspective-test.html",
18 | "test:perspective:fixed": "vite --open src/test/perspective-test-fixed.html",
19 | "test:perspective:direct": "vite --open src/test/perspective-direct.html",
20 | "test:perspective:simple": "vite --open src/test/perspective-simple.html",
21 | "setup-wasm": "node scripts/setup-wasm.js"
22 | },
23 | "overrides": {
24 | "d3-color": "3.1.0"
25 | },
26 | "dependencies": {
27 | "@duckdb/duckdb-wasm": "^1.29.0",
28 | "@finos/perspective": "^3.4.0",
29 | "@finos/perspective-viewer": "^3.4.0",
30 | "@finos/perspective-viewer-d3fc": "^3.4.0",
31 | "@finos/perspective-viewer-datagrid": "^3.4.0",
32 | "d3-color": "3.1.0",
33 | "express": "^4.19.2",
34 | "mermaid": "^11.4.1",
35 | "react": "^19.0.0",
36 | "react-dom": "^19.0.0",
37 | "sql.js": "^1.12.0"
38 | },
39 | "devDependencies": {
40 | "@eslint/js": "^9.21.0",
41 | "@tailwindcss/postcss": "^4.0.12",
42 | "@types/express": "^4.17.21",
43 | "@types/node": "^20.11.30",
44 | "@types/react": "^19.0.10",
45 | "@types/react-dom": "^19.0.4",
46 | "@types/sql.js": "^1.4.9",
47 | "@vitejs/plugin-react": "^4.3.4",
48 | "autoprefixer": "^10.4.21",
49 | "eslint": "^9.21.0",
50 | "eslint-plugin-react-hooks": "^5.1.0",
51 | "eslint-plugin-react-refresh": "^0.4.19",
52 | "globals": "^15.15.0",
53 | "postcss": "^8.5.3",
54 | "ts-node": "^10.9.2",
55 | "typescript": "~5.7.2",
56 | "typescript-eslint": "^8.24.1",
57 | "vite": "^6.2.0"
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/crabwalk-web/perspective.d.ts:
--------------------------------------------------------------------------------
1 | declare namespace JSX {
2 | interface IntrinsicElements {
3 | 'perspective-viewer': any;
4 | }
5 | }
--------------------------------------------------------------------------------
/crabwalk-web/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 | plugins: {
3 | '@tailwindcss/postcss': {},
4 | autoprefixer: {},
5 | },
6 | }
--------------------------------------------------------------------------------
/crabwalk-web/public/perspective-init.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Perspective Initialization
6 |
7 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/crabwalk-web/public/vite.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm-worker.js:
--------------------------------------------------------------------------------
1 | // Custom WebAssembly worker for Perspective.js
2 | // This file is loaded by Perspective when creating a worker
3 |
4 | // Set the paths to WebAssembly files
5 | const paths = {
6 | wasmBinary: '/wasm/perspective-js.wasm',
7 | wasmPath: '/wasm/',
8 | };
9 |
10 | // Listen for messages from the main thread
11 | self.addEventListener('message', async function(event) {
12 | if (event.data && event.data.cmd === 'init') {
13 | // Respond with the initialized state
14 | self.postMessage({
15 | id: event.data.id || 0,
16 | data: {
17 | initialized: true
18 | }
19 | });
20 | } else {
21 | // Forward other messages to the actual worker implementation
22 | try {
23 | // Process the message (should be implemented by the actual worker)
24 | // ...
25 |
26 | // Send a response (even if empty)
27 | self.postMessage({
28 | id: event.data.id || 0,
29 | data: {}
30 | });
31 | } catch (e) {
32 | // Send error message
33 | self.postMessage({
34 | id: event.data.id || 0,
35 | error: e.message
36 | });
37 | }
38 | }
39 | });
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm/perspective-client.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/crabwalk-web/public/wasm/perspective-client.wasm
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm/perspective-js.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/crabwalk-web/public/wasm/perspective-js.wasm
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm/perspective-server.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/crabwalk-web/public/wasm/perspective-server.wasm
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm/perspective-server.worker.js:
--------------------------------------------------------------------------------
1 | var d=class{clients;server;module;constructor(t){this.clients=new Map,this.module=t,this.server=t._psp_new_server()}make_session(t){let n=this.module._psp_new_session(this.server);return this.clients.set(n,t),new v(this.module,this.server,n,this.clients)}delete(){this.module._psp_delete_server(this.server)}},v=class{constructor(t,n,s,i){this.mod=t;this.server=n;this.client_id=s;this.client_map=i}async handle_request(t){let n=await M(this.mod,t,async s=>this.mod._psp_handle_request(this.server,this.client_id,s,this.mod._psp_is_memory64()?BigInt(t.byteLength):t.byteLength));await w(this.mod,n,async s=>{await this.client_map.get(s.client_id)(s.data)})}poll(){let t=this.mod._psp_poll(this.server);w(this.mod,t,async n=>{await this.client_map.get(n.client_id)(n.data)})}close(){this.mod._psp_close_session(this.server,this.client_id)}};async function M(a,t,n){let s=a._psp_alloc(a._psp_is_memory64()?BigInt(t.byteLength):t.byteLength);a.HEAPU8.set(t,Number(s));let i=await n(s);return a._psp_free(s),i}async function w(a,t,n){let s=a._psp_is_memory64(),i=new DataView(a.HEAPU8.buffer,Number(t),s?12:8),c=i.getUint32(0,!0),l=s?i.getBigInt64(4,!0):i.getUint32(4,!0),e=new DataView(a.HEAPU8.buffer,Number(l),c*(s?16:12));try{for(let r=0;r=s);)++i;return x.decode(a instanceof Uint8Array?a.subarray(t,i):new Uint8Array(a.slice(t,i)))}function A(a,t){var n=H[a];t===0||t===10?((a===1?B:I)(N(n,0)),n.length=0):n.push(t)}async function P(a){let t,n=!1,s,i={HaveOffsetConverter(){console.error("HaveOffsetConverter")},__syscall_ftruncate64(...e){console.error("__syscall_frtuncate64",e)},__syscall_getdents64(...e){console.error("__syscall_frtuncate64",e)},__syscall_unlinkat(...e){console.error("__syscall_frtuncate64",e)},__throw_exception_with_stack_trace(e){let r=new WebAssembly.Exception(t.__cpp_exception,[e],{traceStack:!0});throw r.message="Unexpected internal error",r},clock_time_get(e,r,o){if(n){if(o=o,o=Number(o),!(e==0||e==1||e==2||e==3))return 28;var p;e===0?p=Date.now():p=performance.now();let _=Math.round(p*1e3*1e3),u=new BigInt64Array(s.buffer);return u[o/8]=BigInt(_),0}else{if(o=o,o>>>=0,!(e==0||e==1||e==2||e==3))return 28;var p;e===0?p=Date.now():p=performance.now();var f=Math.round(p*1e6);let u=new BigInt64Array(s.buffer);return u[o>>>3]=BigInt(f),0}},emscripten_asm_const_int(...e){return 0},emscripten_notify_memory_growth(e){n?e=Number(e):(e=e,e>>>=0),e!=0&&console.error("abort")},environ_get(...e){return 0},environ_sizes_get(...e){return 0},fd_close(...e){return console.error("fd_close",e),0},fd_read(...e){return console.error("fd_read",e),0},fd_seek(...e){return console.error("fs_seek",e),0},fd_write(e,r,o,p){let f=new Uint8Array(s.buffer);if(n){r=Number(r),o=Number(o),p=Number(p);let _=0,u=new BigUint64Array(s.buffer);for(let y=0;y>>=0,o>>>=0,p>>>=0;let _=0,u=new Uint32Array(s.buffer);for(let y=0;y>>2>>>0],b=u[r+4>>>2>>>0];r+=8;for(let m=0;m>>0]);_+=b}return u[p>>>2>>>0]=_,0}},proc_exit(e){return console.error("proc_exit",e),0}},c=await a.instantiateWasm({env:i,wasi_snapshot_preview1:i},e=>{t=e.exports,n=!!t.psp_is_memory64(),s=e.exports.memory,t._initialize()}),l={};for(let[e,r]of Object.entries(c))l[`_${e}`]=r;return{...c,...l,get HEAPU8(){return new Uint8Array(s.buffer)}}}async function U(a){let t=await P({locateFile(n){return n},instantiateWasm:async(n,s)=>{n.env={...n.env,psp_stack_trace(){let c=Error().stack||"",e=new TextEncoder().encode(c),r=t._psp_alloc(t._psp_is_memory64()?BigInt(e.byteLength+1):e.byteLength+1);return t.HEAPU8.set(e,Number(r)),t.HEAPU8[Number(r)+e.byteLength]=0,r},psp_heap_size(){return t._psp_is_memory64()?BigInt(t.HEAPU8.buffer.byteLength):t.HEAPU8.buffer.byteLength}};let i=await WebAssembly.instantiate(a,n);return s(i.instance),i.instance.exports}});return t}var h;function E(a){let t=a.ports[0],n;t.addEventListener("message",async s=>{if(s.data.cmd==="init"){let i=s.data.id;if(!h){let c=await U(s.data.args[0]);h=new d(c)}n=h.make_session(async c=>{let l=c.slice().buffer;t.postMessage(l,{transfer:[l]})}),t.postMessage({id:i})}else n.handle_request(new Uint8Array(s.data)),setTimeout(()=>n.poll())}),t.start()}self.addEventListener("connect",E);self.addEventListener("message",E);
2 | //# sourceMappingURL=perspective-server.worker.js.map
3 |
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm/perspective-view.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/crabwalk-web/public/wasm/perspective-view.wasm
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm/perspective-viewer.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/crabwalk-web/public/wasm/perspective-viewer.wasm
--------------------------------------------------------------------------------
/crabwalk-web/public/wasm/perspective.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/crabwalk-web/public/wasm/perspective.wasm
--------------------------------------------------------------------------------
/crabwalk-web/run-react-app.js:
--------------------------------------------------------------------------------
1 | // Script to run the React application using Vite
2 | import { spawn } from 'child_process';
3 | import { fileURLToPath } from 'url';
4 | import path from 'path';
5 | import fs from 'fs';
6 |
7 | // Get current directory
8 | const __filename = fileURLToPath(import.meta.url);
9 | const __dirname = path.dirname(__filename);
10 |
11 | // Check if package.json exists
12 | const packageJsonPath = path.join(__dirname, 'package.json');
13 | if (!fs.existsSync(packageJsonPath)) {
14 | console.error('Error: package.json not found. Make sure you are in the correct directory.');
15 | process.exit(1);
16 | }
17 |
18 | console.log('Starting React application with Vite...');
19 |
20 | // Run npm run dev
21 | const viteProcess = spawn('npm', ['run', 'dev'], {
22 | cwd: __dirname,
23 | stdio: 'inherit',
24 | shell: true
25 | });
26 |
27 | viteProcess.on('error', (error) => {
28 | console.error('Failed to start Vite server:', error);
29 | });
30 |
31 | viteProcess.on('close', (code) => {
32 | if (code !== 0) {
33 | console.log(`Vite process exited with code ${code}`);
34 | }
35 | });
36 |
37 | console.log('Vite server starting. Once ready, open the URL shown in the terminal.');
38 | console.log('To test the Perspective component, click on the "Perspective" tab in the navigation bar.');
--------------------------------------------------------------------------------
/crabwalk-web/scripts/debug_mermaid.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Kill any existing processes on port 3000
4 | echo "Stopping any existing web servers..."
5 | kill $(lsof -t -i:3000) 2>/dev/null || true
6 |
7 | # Change to the crabwalk-web directory
8 | cd "$(dirname "$0")/.."
9 |
10 | # Start the Mermaid test server
11 | echo "Starting Mermaid testing server..."
12 | npm run test:mermaid
--------------------------------------------------------------------------------
/crabwalk-web/scripts/run-with-db.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script will start the crabwalk web viewer with database integration
4 | # It will look for .duckdb or .db files in the current directory
5 |
6 | echo "🦀 Starting Crabwalk Web with DuckDB Integration"
7 | echo "==============================================="
8 |
9 | # Check if a DuckDB file exists in the current directory
10 | DB_FILES=$(find . -maxdepth 1 -type f \( -name "*.db" -o -name "*.duckdb" -o -name "*.sqlite" \))
11 |
12 | if [ -n "$DB_FILES" ]; then
13 | echo "Found database files in current directory:"
14 | echo "$DB_FILES"
15 | echo ""
16 | echo "These will be accessible from the Tables tab."
17 | fi
18 |
19 | # Start the web server
20 | echo "Starting web interface. Press Ctrl+C to exit."
21 | crabwalk-web
--------------------------------------------------------------------------------
/crabwalk-web/scripts/setup-wasm.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | // This script copies WebAssembly files needed by perspective.js to the public directory
4 | // so they can be served by the web server and loaded by the browser
5 |
6 | import fs from 'fs';
7 | import path from 'path';
8 | import { fileURLToPath } from 'url';
9 |
10 | const __filename = fileURLToPath(import.meta.url);
11 | const __dirname = path.dirname(__filename);
12 |
13 | const WASM_SOURCE_DIRS = [
14 | path.resolve(__dirname, '../node_modules/@finos/perspective/dist/wasm'),
15 | path.resolve(__dirname, '../node_modules/@finos/perspective-viewer/dist/wasm')
16 | ];
17 |
18 | // Also copy Javascript files
19 | const JS_FILES = [
20 | {
21 | src: path.resolve(__dirname, '../node_modules/@finos/perspective/dist/cdn/perspective.js'),
22 | dest: path.resolve(__dirname, '../public/wasm/perspective.js')
23 | },
24 | {
25 | src: path.resolve(__dirname, '../node_modules/@finos/perspective/dist/cdn/perspective-server.worker.js'),
26 | dest: path.resolve(__dirname, '../public/wasm/perspective-server.worker.js')
27 | },
28 | {
29 | src: path.resolve(__dirname, '../node_modules/@finos/perspective/dist/esm/perspective.js'),
30 | dest: path.resolve(__dirname, '../public/wasm/perspective.esm.js')
31 | }
32 | ];
33 |
34 | // Create aliases for WebAssembly files that may be required by Perspective with different names
35 | const WASM_ALIASES = [
36 | {
37 | src: path.resolve(__dirname, '../node_modules/@finos/perspective/dist/wasm/perspective-js.wasm'),
38 | dest: path.resolve(__dirname, '../public/wasm/perspective-client.wasm')
39 | },
40 | {
41 | src: path.resolve(__dirname, '../node_modules/@finos/perspective/dist/wasm/perspective-js.wasm'),
42 | dest: path.resolve(__dirname, '../public/wasm/perspective.wasm')
43 | },
44 | {
45 | src: path.resolve(__dirname, '../node_modules/@finos/perspective-viewer/dist/wasm/perspective-viewer.wasm'),
46 | dest: path.resolve(__dirname, '../public/wasm/perspective-view.wasm')
47 | }
48 | ];
49 |
50 | // Copy essential worker files - different formats for browser compatibility
51 | const WORKER_FILES = [
52 | // UMD format - easier to use directly in browser
53 | {
54 | src: path.resolve(__dirname, '../node_modules/@finos/perspective/dist/umd/perspective.js'),
55 | dest: path.resolve(__dirname, '../public/wasm/perspective-umd.js')
56 | },
57 | {
58 | src: path.resolve(__dirname, '../node_modules/@finos/perspective/dist/umd/perspective.worker.js'),
59 | dest: path.resolve(__dirname, '../public/wasm/perspective.worker.js')
60 | }
61 | ];
62 |
63 | const WASM_DEST_DIR = path.resolve(__dirname, '../public/wasm');
64 |
65 | // Create destination directory if it doesn't exist
66 | if (!fs.existsSync(WASM_DEST_DIR)) {
67 | fs.mkdirSync(WASM_DEST_DIR, { recursive: true });
68 | console.log(`Created directory: ${WASM_DEST_DIR}`);
69 | }
70 |
71 | // Copy all .wasm files
72 | let copiedFiles = 0;
73 | for (const sourceDir of WASM_SOURCE_DIRS) {
74 | if (fs.existsSync(sourceDir)) {
75 | const files = fs.readdirSync(sourceDir);
76 | for (const file of files) {
77 | if (file.endsWith('.wasm')) {
78 | const sourcePath = path.join(sourceDir, file);
79 | const destPath = path.join(WASM_DEST_DIR, file);
80 | fs.copyFileSync(sourcePath, destPath);
81 | copiedFiles++;
82 | console.log(`Copied: ${sourcePath} -> ${destPath}`);
83 | }
84 | }
85 | } else {
86 | console.warn(`Source directory not found: ${sourceDir}`);
87 | }
88 | }
89 |
90 | console.log(`Copied ${copiedFiles} WebAssembly files to ${WASM_DEST_DIR}`);
91 |
92 | // Copy JS files
93 | let copiedJsFiles = 0;
94 | for (const file of JS_FILES) {
95 | if (fs.existsSync(file.src)) {
96 | fs.copyFileSync(file.src, file.dest);
97 | copiedJsFiles++;
98 | console.log(`Copied: ${file.src} -> ${file.dest}`);
99 | } else {
100 | console.warn(`Source file not found: ${file.src}`);
101 | }
102 | }
103 |
104 | console.log(`Copied ${copiedJsFiles} JavaScript files to ${WASM_DEST_DIR}`);
105 |
106 | // Copy WebAssembly aliases
107 | let copiedAliases = 0;
108 | for (const file of WASM_ALIASES) {
109 | if (fs.existsSync(file.src)) {
110 | fs.copyFileSync(file.src, file.dest);
111 | copiedAliases++;
112 | console.log(`Created alias: ${file.src} -> ${file.dest}`);
113 | } else {
114 | console.warn(`Source file for alias not found: ${file.src}`);
115 | }
116 | }
117 |
118 | console.log(`Created ${copiedAliases} WebAssembly file aliases in ${WASM_DEST_DIR}`);
119 |
120 | // Copy worker files
121 | let copiedWorkerFiles = 0;
122 | for (const file of WORKER_FILES) {
123 | if (fs.existsSync(file.src)) {
124 | try {
125 | fs.copyFileSync(file.src, file.dest);
126 | copiedWorkerFiles++;
127 | console.log(`Copied worker file: ${file.src} -> ${file.dest}`);
128 | } catch (err) {
129 | console.warn(`Failed to copy worker file ${file.src}: ${err}`);
130 | }
131 | } else {
132 | console.warn(`Worker file not found: ${file.src}`);
133 | }
134 | }
135 |
136 | console.log(`Copied ${copiedWorkerFiles} WebWorker files to ${WASM_DEST_DIR}`);
--------------------------------------------------------------------------------
/crabwalk-web/serve-perspective-test.js:
--------------------------------------------------------------------------------
1 | // Simple HTTP server to serve the Perspective test HTML file
2 | import http from 'http';
3 | import fs from 'fs';
4 | import path from 'path';
5 | import { fileURLToPath } from 'url';
6 |
7 | // Get current directory
8 | const __filename = fileURLToPath(import.meta.url);
9 | const __dirname = path.dirname(__filename);
10 |
11 | const PORT = 3000;
12 |
13 | const MIME_TYPES = {
14 | '.html': 'text/html',
15 | '.js': 'text/javascript',
16 | '.css': 'text/css',
17 | '.json': 'application/json',
18 | '.wasm': 'application/wasm',
19 | };
20 |
21 | const server = http.createServer((req, res) => {
22 | console.log(`Request: ${req.method} ${req.url}`);
23 |
24 | let filePath;
25 |
26 | // Handle root path
27 | if (req.url === '/') {
28 | filePath = path.join(__dirname, 'src/test/perspective-test-page.html');
29 | }
30 | // Handle direct file requests in the test directory
31 | else if (req.url.endsWith('.html') && !req.url.includes('/')) {
32 | // If it's just a filename without a path, look in the test directory
33 | filePath = path.join(__dirname, 'src/test', req.url);
34 | console.log(`Looking for HTML file in test directory: ${filePath}`);
35 | }
36 | // Handle all other paths
37 | else {
38 | // For other paths, try both with and without src prefix
39 | const directPath = path.join(__dirname, req.url.startsWith('/') ? req.url.slice(1) : req.url);
40 | const srcPath = path.join(__dirname, 'src', req.url.startsWith('/') ? req.url.slice(1) : req.url);
41 |
42 | // Check if the file exists with src prefix first
43 | if (fs.existsSync(srcPath)) {
44 | filePath = srcPath;
45 | console.log(`Found file with src prefix: ${filePath}`);
46 | } else {
47 | filePath = directPath;
48 | console.log(`Trying direct path: ${filePath}`);
49 | }
50 | }
51 |
52 | const extname = path.extname(filePath);
53 | const contentType = MIME_TYPES[extname] || 'text/plain';
54 |
55 | fs.readFile(filePath, (err, content) => {
56 | if (err) {
57 | if (err.code === 'ENOENT') {
58 | console.error(`File not found: ${filePath}`);
59 |
60 | // If the file wasn't found and it's an HTML file, try in the test directory as a fallback
61 | if (req.url.endsWith('.html')) {
62 | const testDirPath = path.join(__dirname, 'src/test', req.url.startsWith('/') ? req.url.slice(1) : req.url);
63 | console.log(`Trying test directory as fallback: ${testDirPath}`);
64 |
65 | fs.readFile(testDirPath, (testErr, testContent) => {
66 | if (testErr) {
67 | res.writeHead(404);
68 | res.end('File not found');
69 | } else {
70 | res.writeHead(200, {
71 | 'Content-Type': contentType,
72 | 'Access-Control-Allow-Origin': '*',
73 | 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
74 | 'Access-Control-Allow-Headers': 'Content-Type'
75 | });
76 | res.end(testContent, 'utf-8');
77 | }
78 | });
79 | } else {
80 | res.writeHead(404);
81 | res.end('File not found');
82 | }
83 | } else {
84 | console.error(`Server error: ${err.code}`);
85 | res.writeHead(500);
86 | res.end(`Server Error: ${err.code}`);
87 | }
88 | } else {
89 | // Add CORS headers to allow loading from CDN
90 | res.writeHead(200, {
91 | 'Content-Type': contentType,
92 | 'Access-Control-Allow-Origin': '*',
93 | 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
94 | 'Access-Control-Allow-Headers': 'Content-Type'
95 | });
96 | res.end(content, 'utf-8');
97 | }
98 | });
99 | });
100 |
101 | server.listen(PORT, () => {
102 | console.log(`Server running at http://localhost:${PORT}/`);
103 | console.log(`Open http://localhost:${PORT}/ to view the Perspective test options`);
104 | });
--------------------------------------------------------------------------------
/crabwalk-web/src/assets/react.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/crabwalk-web/src/components/DatabaseExplorer.tsx:
--------------------------------------------------------------------------------
1 | import { useEffect, useState } from 'react';
2 | import { listTables, TableInfo } from '../utils/duckdb';
3 | import TableViewer from './TableViewer';
4 |
5 | interface DatabaseExplorerProps {
6 | className?: string;
7 | }
8 |
9 | const styles = {
10 | container: {
11 | display: 'flex',
12 | flexDirection: 'column' as const,
13 | height: '100%',
14 | padding: '1rem',
15 | },
16 | header: {
17 | display: 'flex',
18 | justifyContent: 'space-between',
19 | alignItems: 'center',
20 | marginBottom: '1rem',
21 | },
22 | title: {
23 | fontSize: '1.25rem',
24 | fontWeight: 600,
25 | margin: 0,
26 | },
27 | uploadButton: {
28 | backgroundColor: '#2563eb',
29 | color: 'white',
30 | border: 'none',
31 | borderRadius: '0.375rem',
32 | padding: '0.5rem 1rem',
33 | fontSize: '0.875rem',
34 | fontWeight: 500,
35 | cursor: 'pointer',
36 | },
37 | tableList: {
38 | display: 'grid',
39 | gridTemplateColumns: 'repeat(auto-fill, minmax(300px, 1fr))',
40 | gap: '1rem',
41 | flex: 1,
42 | overflowY: 'auto' as const,
43 | },
44 | tableCard: {
45 | backgroundColor: 'white',
46 | borderRadius: '0.5rem',
47 | border: '1px solid #e5e7eb',
48 | padding: '1rem',
49 | cursor: 'pointer',
50 | transition: 'transform 0.1s, box-shadow 0.1s',
51 | ':hover': {
52 | transform: 'translateY(-2px)',
53 | boxShadow: '0 4px 6px rgba(0, 0, 0, 0.1)',
54 | },
55 | },
56 | tableName: {
57 | fontSize: '1rem',
58 | fontWeight: 600,
59 | marginBottom: '0.5rem',
60 | },
61 | tableInfo: {
62 | fontSize: '0.875rem',
63 | color: '#6b7280',
64 | },
65 | loadingIndicator: {
66 | display: 'flex',
67 | alignItems: 'center',
68 | justifyContent: 'center',
69 | height: '200px',
70 | color: '#6b7280',
71 | },
72 | loadingSpinner: {
73 | border: '4px solid #e5e7eb',
74 | borderTopColor: '#3b82f6',
75 | borderRadius: '50%',
76 | width: '24px',
77 | height: '24px',
78 | animation: 'spin 1s linear infinite',
79 | marginRight: '0.5rem',
80 | },
81 | error: {
82 | color: '#ef4444',
83 | backgroundColor: '#fee2e2',
84 | padding: '1rem',
85 | borderRadius: '0.5rem',
86 | marginTop: '1rem',
87 | },
88 | noTables: {
89 | textAlign: 'center' as const,
90 | padding: '2rem',
91 | color: '#6b7280',
92 | },
93 | fileInput: {
94 | display: 'none',
95 | },
96 | badge: (schema: string) => ({
97 | fontSize: '0.75rem',
98 | fontWeight: 500,
99 | padding: '0.125rem 0.375rem',
100 | borderRadius: '0.25rem',
101 | backgroundColor: schema === 'main' ? '#e0f2fe' : '#f0fdf4',
102 | color: schema === 'main' ? '#0369a1' : '#166534',
103 | marginLeft: '0.5rem',
104 | }),
105 | };
106 |
107 | const DatabaseExplorer: React.FC = ({ className }) => {
108 | const [tables, setTables] = useState([]);
109 | const [loading, setLoading] = useState(true);
110 | const [error, setError] = useState(null);
111 | const [selectedTable, setSelectedTable] = useState(null);
112 | // Using just refreshCounter for the dependency array in useEffect
113 | const [refreshCounter] = useState(0);
114 |
115 | // Load the list of tables
116 | useEffect(() => {
117 | const fetchTables = async () => {
118 | setLoading(true);
119 | setError(null);
120 |
121 | try {
122 | const tablesList = await listTables();
123 | setTables(tablesList);
124 | } catch (err) {
125 | console.error('Error fetching tables:', err);
126 | setError(`Failed to fetch tables: ${err instanceof Error ? err.message : String(err)}`);
127 | } finally {
128 | setLoading(false);
129 | }
130 | };
131 |
132 | fetchTables();
133 | }, [refreshCounter]);
134 |
135 | // This function was removed as we now handle database file uploads through the main App component
136 |
137 | return (
138 |
139 |
140 |
Database Tables
141 |
142 |
143 | {error && (
144 |
{error}
145 | )}
146 |
147 | {loading ? (
148 |
149 |
150 |
Loading tables...
151 |
152 | ) : tables.length === 0 ? (
153 |
154 |
No tables found. Click "Upload Files" in the top bar to upload a database file (.db, .sqlite, or .duckdb).
155 |
156 | ) : (
157 |
158 | {tables.map((table) => {
159 | // Use the displayName from the table info object if available
160 | // Otherwise fall back to the old behavior
161 | let tableName = table.displayName || table.name;
162 | let schema = 'main';
163 | let database = null;
164 |
165 | // Parse the full identifier to extract database, schema, and table parts
166 | const parts = tableName.split('.');
167 | if (parts.length === 3) {
168 | // Format: database.schema.table
169 | database = parts[0];
170 | schema = parts[1];
171 | tableName = parts[2];
172 | } else if (parts.length === 2) {
173 | // Format: schema.table
174 | schema = parts[0];
175 | tableName = parts[1];
176 | }
177 |
178 | return (
179 |
setSelectedTable(table.name)}
183 | role="button"
184 | tabIndex={0}
185 | >
186 |
187 | {tableName}
188 | {schema !== 'main' && {schema}}
189 | {database && {database}}
190 |
191 |
192 | {table.rowCount.toLocaleString()} rows • {table.columnCount} columns
193 |
194 |
195 | );
196 | })}
197 |
198 | )}
199 |
200 | {selectedTable && (
201 |
setSelectedTable(null)}
204 | />
205 | )}
206 |
207 | );
208 | };
209 |
210 | export default DatabaseExplorer;
--------------------------------------------------------------------------------
/crabwalk-web/src/components/MermaidDiagram.tsx:
--------------------------------------------------------------------------------
1 | import { useEffect, useState } from 'react';
2 | import mermaid from 'mermaid';
3 | import { processLineageDiagram } from '../utils/lineageProcessor';
4 |
5 | interface MermaidDiagramProps {
6 | content: string;
7 | }
8 |
9 | // Initialize mermaid once to prevent multiple initializations
10 | mermaid.initialize({
11 | startOnLoad: false,
12 | theme: 'default',
13 | securityLevel: 'loose',
14 | fontFamily: 'system-ui, sans-serif',
15 | });
16 |
17 | // Inline styles for MermaidDiagram
18 | const styles = {
19 | container: {
20 | backgroundColor: 'white',
21 | border: '1px solid #e5e7eb',
22 | borderRadius: '8px',
23 | padding: '1.5rem',
24 | overflow: 'auto',
25 | marginBottom: '2rem',
26 | },
27 | errorMessage: {
28 | color: '#dc2626',
29 | backgroundColor: '#fee2e2',
30 | border: '1px solid #fecaca',
31 | borderRadius: '4px',
32 | padding: '1rem',
33 | marginTop: '1rem',
34 | },
35 | errorPre: {
36 | marginTop: '1rem',
37 | whiteSpace: 'pre-wrap' as const,
38 | fontSize: '0.75rem',
39 | backgroundColor: 'rgba(0, 0, 0, 0.05)',
40 | padding: '0.5rem',
41 | borderRadius: '4px',
42 | },
43 | toggleContainer: {
44 | marginBottom: '1rem',
45 | display: 'flex',
46 | justifyContent: 'space-between',
47 | alignItems: 'center',
48 | backgroundColor: '#f0f9ff',
49 | border: '1px solid #bae6fd',
50 | borderRadius: '4px',
51 | padding: '0.75rem 1rem'
52 | },
53 | toggleBtn: (active: boolean) => ({
54 | backgroundColor: active ? '#0ea5e9' : '#e0f2fe',
55 | color: active ? 'white' : '#0369a1',
56 | border: 'none',
57 | borderRadius: '4px',
58 | padding: '0.5rem 0.75rem',
59 | fontSize: '0.875rem',
60 | cursor: 'pointer'
61 | }),
62 | diagramContent: {
63 | width: '100%',
64 | minHeight: '200px',
65 | }
66 | };
67 |
68 | const MermaidDiagram: React.FC = ({ content }) => {
69 | const [svg, setSvg] = useState('');
70 | const [error, setError] = useState('');
71 | const [processedContent, setProcessedContent] = useState(content);
72 | const [hasConnections, setHasConnections] = useState(false);
73 | const [showEnhanced, setShowEnhanced] = useState(true);
74 | const [isProcessing, setIsProcessing] = useState(true);
75 |
76 | // Process the content to add connections if needed
77 | useEffect(() => {
78 | try {
79 | if (!content || typeof content !== 'string') {
80 | setProcessedContent('');
81 | setIsProcessing(false);
82 | return;
83 | }
84 |
85 | // Check if the diagram already has connections
86 | const hasExistingConnections =
87 | content.includes('-->') ||
88 | content.includes('->') ||
89 | content.includes('---');
90 |
91 | setHasConnections(hasExistingConnections);
92 |
93 | // Process the content to add connections if none exist
94 | const processed = processLineageDiagram(content);
95 | setProcessedContent(processed);
96 | setIsProcessing(false);
97 | } catch (err) {
98 | console.error('Error processing diagram content:', err);
99 | setProcessedContent(content); // Fallback to original
100 | setIsProcessing(false);
101 | }
102 | }, [content]);
103 |
104 | // Render the mermaid diagram when content changes
105 | useEffect(() => {
106 | const renderDiagram = async () => {
107 | if (isProcessing) return;
108 |
109 | setError('');
110 | setSvg('');
111 |
112 | try {
113 | // Get the content to display (original or processed)
114 | const displayContent = showEnhanced ? processedContent : content;
115 |
116 | if (!displayContent || typeof displayContent !== 'string') {
117 | throw new Error('No valid diagram content to render');
118 | }
119 |
120 | // Generate a unique ID to avoid conflicts
121 | const id = `mermaid-${Date.now()}-${Math.floor(Math.random() * 10000)}`;
122 |
123 | // Render the diagram
124 | const { svg } = await mermaid.render(id, displayContent);
125 | setSvg(svg);
126 | } catch (err) {
127 | console.error('Error rendering Mermaid diagram:', err);
128 | setError(String(err));
129 | }
130 | };
131 |
132 | renderDiagram();
133 | }, [content, processedContent, showEnhanced, isProcessing]);
134 |
135 | return (
136 |
137 | {!hasConnections && processedContent !== content && (
138 |
139 |
140 |
141 | Enhanced Diagram
142 |
143 |
144 | Connections between tables have been automatically generated.
145 |
146 |
147 |
153 |
154 | )}
155 |
156 | {error && (
157 |
158 |
Error rendering diagram
159 |
{error}
160 |
{showEnhanced ? processedContent : content}
161 |
162 | )}
163 |
164 | {isProcessing ? (
165 |
170 | Processing diagram...
171 |
172 | ) : !error && (
173 |
177 | )}
178 |
179 | );
180 | };
181 |
182 | export default MermaidDiagram;
--------------------------------------------------------------------------------
/crabwalk-web/src/components/SqlViewer.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from 'react';
2 |
3 | interface SqlViewerProps {
4 | filePath: string;
5 | fileName: string;
6 | onClose?: () => void;
7 | }
8 |
9 | // Inline styles
10 | const styles = {
11 | overlay: {
12 | position: 'fixed' as const,
13 | top: 0,
14 | left: 0,
15 | right: 0,
16 | bottom: 0,
17 | backgroundColor: 'rgba(0, 0, 0, 0.5)',
18 | display: 'flex',
19 | alignItems: 'center',
20 | justifyContent: 'center',
21 | zIndex: 50,
22 | padding: '1rem',
23 | },
24 | modal: {
25 | backgroundColor: 'white',
26 | borderRadius: '0.5rem',
27 | boxShadow: '0 25px 50px -12px rgba(0, 0, 0, 0.25)',
28 | width: '100%',
29 | maxWidth: '56rem',
30 | maxHeight: '90vh',
31 | display: 'flex',
32 | flexDirection: 'column' as const,
33 | },
34 | header: {
35 | display: 'flex',
36 | justifyContent: 'space-between',
37 | alignItems: 'center',
38 | borderBottom: '1px solid #e5e7eb',
39 | padding: '1rem',
40 | },
41 | title: {
42 | fontSize: '1.125rem',
43 | fontWeight: 500,
44 | },
45 | closeButton: {
46 | color: '#6b7280',
47 | border: 'none',
48 | background: 'none',
49 | cursor: 'pointer',
50 | },
51 | content: {
52 | flexGrow: 1,
53 | overflowY: 'auto' as const,
54 | padding: '1rem',
55 | },
56 | loadingContainer: {
57 | display: 'flex',
58 | justifyContent: 'center',
59 | alignItems: 'center',
60 | height: '16rem',
61 | },
62 | spinner: {
63 | height: '2rem',
64 | width: '2rem',
65 | borderRadius: '9999px',
66 | borderBottom: '2px solid #3b82f6',
67 | animation: 'spin 1s linear infinite',
68 | },
69 | errorMessage: {
70 | color: '#ef4444',
71 | padding: '1rem',
72 | },
73 | codeBlock: {
74 | backgroundColor: '#f3f4f6',
75 | padding: '1rem',
76 | borderRadius: '0.375rem',
77 | overflowX: 'auto' as const,
78 | whiteSpace: 'pre-wrap' as const,
79 | fontSize: '0.875rem',
80 | fontFamily: 'monospace',
81 | },
82 | footer: {
83 | borderTop: '1px solid #e5e7eb',
84 | padding: '1rem',
85 | display: 'flex',
86 | justifyContent: 'flex-end',
87 | },
88 | button: {
89 | padding: '0.5rem 1rem',
90 | backgroundColor: '#e5e7eb',
91 | color: '#1f2937',
92 | borderRadius: '0.375rem',
93 | border: 'none',
94 | cursor: 'pointer',
95 | },
96 | };
97 |
98 | const SqlViewer = ({ filePath, fileName, onClose }: SqlViewerProps) => {
99 | const [content, setContent] = useState('');
100 | const [isLoading, setIsLoading] = useState(true);
101 | const [error, setError] = useState(null);
102 |
103 | useEffect(() => {
104 | const fetchContent = async () => {
105 | setIsLoading(true);
106 | setError(null);
107 |
108 | try {
109 | const response = await fetch(filePath);
110 | if (!response.ok) {
111 | throw new Error(`Failed to fetch file: ${response.statusText}`);
112 | }
113 |
114 | const text = await response.text();
115 | setContent(text);
116 | } catch (err) {
117 | console.error('Error loading SQL file:', err);
118 | setError(err instanceof Error ? err.message : 'Failed to load SQL file');
119 | } finally {
120 | setIsLoading(false);
121 | }
122 | };
123 |
124 | fetchContent();
125 | }, [filePath]);
126 |
127 | return (
128 |
129 |
130 |
131 |
{fileName}
132 |
141 |
142 |
143 |
144 | {isLoading ? (
145 |
151 | ) : error ? (
152 |
153 | Error: {error}
154 |
155 | ) : (
156 |
157 | {content}
158 |
159 | )}
160 |
161 |
162 |
163 |
169 |
170 |
171 |
172 | );
173 | };
174 |
175 | export default SqlViewer;
--------------------------------------------------------------------------------
/crabwalk-web/src/components/TableViewer.css:
--------------------------------------------------------------------------------
1 | /* TableViewer.css */
2 | perspective-viewer {
3 | margin-top: 68px;
4 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/global.d.ts:
--------------------------------------------------------------------------------
1 | // Custom elements for Perspective
2 | import React from 'react';
3 |
4 | declare global {
5 | namespace JSX {
6 | interface IntrinsicElements {
7 | 'perspective-viewer': React.DetailedHTMLProps, HTMLElement> & {
8 | ref?: React.RefObject;
9 | };
10 | }
11 | }
12 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/index.css:
--------------------------------------------------------------------------------
1 | /* Reset styles */
2 | html, body {
3 | margin: 0;
4 | padding: 0;
5 | width: 100%;
6 | height: 100%;
7 | }
8 |
9 | #root {
10 | min-height: 100vh;
11 | display: flex;
12 | flex-direction: column;
13 | }
14 |
15 | /* Spinner animation for loading states */
16 | @keyframes spin {
17 | from {
18 | transform: rotate(0deg);
19 | }
20 | to {
21 | transform: rotate(360deg);
22 | }
23 | }
24 |
25 | /* Perspective Viewer Styles */
26 | perspective-viewer {
27 | height: 100%;
28 | width: 100%;
29 | overflow: hidden;
30 | resize: none;
31 | position: absolute;
32 | top: 0;
33 | left: 0;
34 | right: 0;
35 | bottom: 0;
36 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/main.tsx:
--------------------------------------------------------------------------------
1 | import { StrictMode } from 'react'
2 | import { createRoot } from 'react-dom/client'
3 | import './index.css'
4 | import App from './App.tsx'
5 |
6 | createRoot(document.getElementById('root')!).render(
7 |
8 |
9 | ,
10 | )
11 |
--------------------------------------------------------------------------------
/crabwalk-web/src/perspective.d.ts:
--------------------------------------------------------------------------------
1 | import * as React from 'react';
2 |
3 | declare global {
4 | namespace JSX {
5 | interface IntrinsicElements {
6 | 'perspective-viewer': React.DetailedHTMLProps, HTMLElement>;
7 | }
8 | }
9 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/server/api.ts:
--------------------------------------------------------------------------------
1 | import fs from 'fs';
2 | import path from 'path';
3 | import express from 'express';
4 | import { Request, Response } from 'express';
5 |
6 | // Create router for API endpoints
7 | const apiRouter = express.Router();
8 |
9 | // Common file patterns for Crabwalk projects
10 | const PROJECT_INDICATORS = [
11 | /database_schema\.xml$/i,
12 | /lineage\.mmd$/i,
13 | /\.sql$/i,
14 | ];
15 |
16 | // API endpoint to list files in current directory
17 | apiRouter.get('/files', (_req: Request, res: Response) => {
18 | try {
19 | const currentDir = process.cwd();
20 | const files: string[] = [];
21 |
22 | // Recursive function to scan directories
23 | const scanDir = (dir: string, relativePath: string = '') => {
24 | const entries = fs.readdirSync(dir, { withFileTypes: true });
25 |
26 | for (const entry of entries) {
27 | const fullPath = path.join(dir, entry.name);
28 | const relativeName = path.join(relativePath, entry.name);
29 |
30 | // Skip node_modules and other hidden directories
31 | if (entry.name.startsWith('.') || entry.name === 'node_modules') {
32 | continue;
33 | }
34 |
35 | if (entry.isDirectory()) {
36 | scanDir(fullPath, relativeName);
37 | } else {
38 | files.push(relativeName);
39 | }
40 | }
41 | };
42 |
43 | scanDir(currentDir);
44 |
45 | res.json(files);
46 | } catch (error) {
47 | console.error('Error scanning directory:', error);
48 | res.status(500).json({ error: 'Failed to scan directory' });
49 | }
50 | });
51 |
52 | // API endpoint to check if current directory is a Crabwalk project
53 | apiRouter.get('/check-project', (_req: Request, res: Response) => {
54 | try {
55 | const currentDir = process.cwd();
56 | const files = fs.readdirSync(currentDir);
57 |
58 | // Check if any of the key project indicators exist
59 | const isProject = files.some(file => {
60 | return PROJECT_INDICATORS.some(pattern => pattern.test(file));
61 | });
62 |
63 | res.json({ isProject });
64 | } catch (error) {
65 | console.error('Error checking project directory:', error);
66 | res.status(500).json({ error: 'Failed to check project directory' });
67 | }
68 | });
69 |
70 | // API endpoint to read a file from the project
71 | apiRouter.get('/file/:filename(*)', (req: Request, res: Response) => {
72 | try {
73 | const { filename } = req.params;
74 | const filePath = path.join(process.cwd(), filename);
75 |
76 | // Security check - prevent directory traversal
77 | if (!filePath.startsWith(process.cwd())) {
78 | return res.status(403).json({ error: 'Access denied' });
79 | }
80 |
81 | // Check if file exists
82 | if (!fs.existsSync(filePath)) {
83 | return res.status(404).json({ error: 'File not found' });
84 | }
85 |
86 | // Read file content
87 | const content = fs.readFileSync(filePath, 'utf8');
88 | res.send(content);
89 | } catch (error) {
90 | console.error('Error reading file:', error);
91 | res.status(500).json({ error: 'Failed to read file' });
92 | }
93 | });
94 |
95 | export default apiRouter;
--------------------------------------------------------------------------------
/crabwalk-web/src/server/index.ts:
--------------------------------------------------------------------------------
1 | // Simple server to serve the app and APIs
2 | import path from 'path';
3 | import express from 'express';
4 | import { fileURLToPath } from 'url';
5 | import apiRouter from './api.js';
6 |
7 | const __filename = fileURLToPath(import.meta.url);
8 | const __dirname = path.dirname(__filename);
9 |
10 | // Create Express app
11 | const app = express();
12 | const PORT = process.env.PORT || 3000;
13 |
14 | // Serve static files from the dist directory
15 | app.use(express.static(path.resolve(__dirname, '../../dist')));
16 |
17 | // Serve test directory for debugging
18 | app.use('/test', express.static(path.resolve(__dirname, '../../src/test')));
19 |
20 | // Mount API routes
21 | app.use('/api', apiRouter);
22 |
23 | // Serve the index.html for any other route (SPA)
24 | app.get('*', (_req, res) => {
25 | res.sendFile(path.resolve(__dirname, '../../dist/index.html'));
26 | });
27 |
28 | // Function to open browser
29 | const openBrowser = async (url: string) => {
30 | // Use dynamic import for ES modules compatibility
31 | const { spawn } = await import('child_process');
32 | let command;
33 | let args;
34 |
35 | switch (process.platform) {
36 | case 'darwin': // macOS
37 | command = 'open';
38 | args = [url];
39 | break;
40 | case 'win32': // Windows
41 | command = 'cmd';
42 | args = ['/c', 'start', url];
43 | break;
44 | default: // Linux and others
45 | command = 'xdg-open';
46 | args = [url];
47 | break;
48 | }
49 |
50 | spawn(command, args, { stdio: 'ignore' });
51 | };
52 |
53 | // Start the server
54 | app.listen(PORT, () => {
55 | const url = `http://localhost:${PORT}`;
56 | console.log(`Crabwalk Web server running at ${url}`);
57 |
58 | // Open browser automatically
59 | setTimeout(async () => {
60 | console.log('Opening web browser...');
61 | await openBrowser(url);
62 | }, 500);
63 | });
64 |
65 | export default app;
--------------------------------------------------------------------------------
/crabwalk-web/src/test/MermaidTest.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { createRoot } from 'react-dom/client';
3 | import mermaid from 'mermaid';
4 |
5 | // Simple test component for Mermaid
6 | const MermaidTest = () => {
7 | const [svg, setSvg] = React.useState('');
8 | const [error, setError] = React.useState('');
9 |
10 | // Test samples
11 | const samples = [
12 | {
13 | name: 'Simple Graph',
14 | content: `graph TD
15 | A[Client] --> B[Load Balancer]
16 | B --> C[Server1]
17 | B --> D[Server2]`
18 | },
19 | {
20 | name: 'Simple Table List',
21 | content: `graph TD
22 | driver_fact
23 | races
24 | race_summary`
25 | },
26 | {
27 | name: 'Auto-generated connections',
28 | content: `graph TD
29 | driver_fact
30 | races
31 | race_summary
32 | drivers`
33 | },
34 | {
35 | name: 'Invalid content',
36 | content: 'This is not valid mermaid'
37 | }
38 | ];
39 |
40 | const renderDiagram = async (content: string) => {
41 | try {
42 | setError('');
43 |
44 | // Initialize mermaid
45 | mermaid.initialize({
46 | startOnLoad: false,
47 | theme: 'default',
48 | securityLevel: 'loose',
49 | });
50 |
51 | // Generate SVG
52 | const { svg } = await mermaid.render('mermaid-test', content);
53 | setSvg(svg);
54 | } catch (err) {
55 | console.error('Error rendering diagram:', err);
56 | setError(String(err));
57 | setSvg('');
58 | }
59 | };
60 |
61 | return (
62 |
63 |
Mermaid Rendering Test
64 |
65 |
66 |
67 |
Select Test Case
68 | {samples.map((sample, index) => (
69 |
70 |
85 |
86 | ))}
87 |
88 |
89 |
90 |
Output
91 | {error ? (
92 |
99 |
Error:
100 |
{error}
101 |
102 | ) : null}
103 |
104 |
114 |
115 |
116 |
117 | );
118 | };
119 |
120 | // Only render in browser, not during SSR
121 | if (typeof window !== 'undefined') {
122 | const rootElement = document.createElement('div');
123 | document.body.appendChild(rootElement);
124 | createRoot(rootElement).render();
125 | }
126 |
127 | export default MermaidTest;
--------------------------------------------------------------------------------
/crabwalk-web/src/test/PerspectiveTest.tsx:
--------------------------------------------------------------------------------
1 | import { useEffect, useRef, useState } from 'react';
2 |
3 | // Test component for Perspective WebAssembly loading via CDN
4 | export default function PerspectiveTest() {
5 | const [status, setStatus] = useState('Initializing...');
6 | const [error, setError] = useState(null);
7 | const viewerRef = useRef(null);
8 | const [isLoaded, setIsLoaded] = useState(false);
9 |
10 | // Load scripts in the head once when the component mounts
11 | useEffect(() => {
12 | // Only load scripts once
13 | if (document.querySelector('script[data-perspective-cdn]')) {
14 | console.log('Perspective CDN scripts already loaded');
15 | setIsLoaded(true);
16 | return;
17 | }
18 |
19 | const scripts = [
20 | { src: 'https://cdn.jsdelivr.net/npm/@finos/perspective/dist/cdn/perspective.js', id: 'perspective-core' },
21 | { src: 'https://cdn.jsdelivr.net/npm/@finos/perspective-viewer/dist/cdn/perspective-viewer.js', id: 'perspective-viewer' },
22 | { src: 'https://cdn.jsdelivr.net/npm/@finos/perspective-viewer-datagrid/dist/cdn/perspective-viewer-datagrid.js', id: 'perspective-datagrid' },
23 | { src: 'https://cdn.jsdelivr.net/npm/@finos/perspective-viewer-d3fc/dist/cdn/perspective-viewer-d3fc.js', id: 'perspective-d3fc' }
24 | ];
25 |
26 | // Add CSS for Perspective
27 | const link = document.createElement('link');
28 | link.rel = 'stylesheet';
29 | link.href = 'https://cdn.jsdelivr.net/npm/@finos/perspective-viewer/dist/css/themes.css';
30 | link.id = 'perspective-css';
31 | document.head.appendChild(link);
32 |
33 | const loadScript = (scriptInfo: { src: string, id: string }) => {
34 | return new Promise((resolve, reject) => {
35 | // Check if script already exists
36 | if (document.getElementById(scriptInfo.id)) {
37 | resolve();
38 | return;
39 | }
40 |
41 | const script = document.createElement('script');
42 | script.id = scriptInfo.id;
43 | script.src = scriptInfo.src;
44 | script.setAttribute('data-perspective-cdn', 'true');
45 | script.async = true;
46 | script.onload = () => {
47 | console.log(`Loaded ${scriptInfo.id}`);
48 | resolve();
49 | };
50 | script.onerror = () => reject(new Error(`Failed to load ${scriptInfo.src}`));
51 | document.head.appendChild(script);
52 | });
53 | };
54 |
55 | // Load scripts sequentially
56 | const loadAllScripts = async () => {
57 | try {
58 | setStatus('Loading Perspective libraries from CDN...');
59 | for (const scriptInfo of scripts) {
60 | await loadScript(scriptInfo);
61 | }
62 | console.log('All Perspective CDN scripts loaded successfully');
63 | setIsLoaded(true);
64 | setStatus('Perspective libraries loaded');
65 | } catch (err) {
66 | console.error('Failed to load Perspective scripts:', err);
67 | setError(`Error loading scripts: ${err instanceof Error ? err.message : String(err)}`);
68 | setStatus('Failed to load scripts');
69 | }
70 | };
71 |
72 | loadAllScripts();
73 |
74 | // No cleanup needed - we want to keep the scripts loaded for other components
75 | }, []);
76 |
77 | // Initialize Perspective and load data once scripts are loaded
78 | useEffect(() => {
79 | if (!isLoaded) return;
80 |
81 | const initPerspective = async () => {
82 | try {
83 | setStatus('Initializing Perspective...');
84 |
85 | // Access the perspective object from the window
86 | // @ts-ignore - perspective is loaded globally
87 | if (!window.perspective) {
88 | throw new Error('Perspective not loaded correctly');
89 | }
90 |
91 | // @ts-ignore - perspective is loaded globally
92 | const worker = await window.perspective.worker();
93 | setStatus('Perspective worker initialized');
94 |
95 | // Fetch sample data from Superstore Arrow dataset
96 | setStatus('Fetching sample data...');
97 | const WASM_URL = "https://cdn.jsdelivr.net/npm/superstore-arrow/superstore.lz4.arrow";
98 |
99 | const table = await fetch(WASM_URL)
100 | .then((x) => x.arrayBuffer())
101 | .then((x) => worker.table(x));
102 |
103 | setStatus('Data loaded successfully');
104 |
105 | // Load into viewer
106 | if (viewerRef.current) {
107 | await viewerRef.current.load(table);
108 | setStatus('Data loaded into viewer successfully');
109 | }
110 | } catch (err) {
111 | console.error('Perspective test failed:', err);
112 | setError(`Error: ${err instanceof Error ? err.message : String(err)}`);
113 | setStatus('Failed');
114 | }
115 | };
116 |
117 | initPerspective();
118 | }, [isLoaded]);
119 |
120 | return (
121 |
122 |
Perspective WebAssembly Test (CDN)
123 |
124 | {/* @ts-ignore */}
125 |
126 |
127 |
128 |
129 | Status: {status}
130 |
131 |
132 | {error && (
133 |
134 | Error: {error}
135 |
136 | )}
137 |
138 | );
139 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/test/perspective-cdn-script-tags.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Perspective CDN Test (Script Tags)
7 |
8 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
Perspective CDN Test (Script Tags)
74 |
75 |
76 |
Loading Perspective from CDN...
77 |
78 |
79 |
82 |
83 |
84 |
135 |
136 |
--------------------------------------------------------------------------------
/crabwalk-web/src/test/perspective-cdn.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Perspective CDN Test
7 |
8 |
64 |
65 |
66 |
67 |
Perspective CDN Test
68 |
69 |
70 |
Loading Perspective from CDN...
71 |
72 |
73 |
76 |
77 |
78 |
134 |
135 |
--------------------------------------------------------------------------------
/crabwalk-web/src/test/perspective-direct.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Perspective Direct CDN Test
7 |
8 |
64 |
65 |
66 |
67 |
Perspective Direct CDN Test
68 |
69 |
70 |
Loading Perspective from CDN...
71 |
72 |
73 |
76 |
77 |
78 |
134 |
135 |
--------------------------------------------------------------------------------
/crabwalk-web/src/test/perspective-test-fixed.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Perspective WebAssembly Test (Fixed)
7 |
8 |
9 |
56 |
57 |
58 |
107 |
108 |
109 |
110 |
Perspective WebAssembly Test
111 |
112 |
113 |
114 |
122 |
123 |
--------------------------------------------------------------------------------
/crabwalk-web/src/test/perspective-test-page.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Perspective Test Options
7 |
79 |
80 |
81 |
82 |
Perspective Test Options
83 |
Choose one of the following test implementations to try out Perspective:
84 |
85 |
86 |
87 |
Direct CDN Implementation
88 |
Uses the exact code from the user's query to load Perspective from CDN.
89 |
Try Direct Implementation
90 |
91 |
92 |
93 |
ES Modules Approach
94 |
Uses ES modules to import Perspective from CDN. Works best in modern browsers.
95 |
Try ES Modules Approach
96 |
97 |
98 |
99 |
Script Tags Approach
100 |
Uses traditional script tags to load Perspective from CDN. More compatible with older browsers.
101 |
Try Script Tags Approach
102 |
103 |
104 |
105 |
Simple Mock Implementation
106 |
Uses a simple mock implementation of Perspective for testing without WebAssembly.
107 |
Try Simple Mock Implementation
108 |
109 |
110 |
111 |
112 |
--------------------------------------------------------------------------------
/crabwalk-web/src/test/perspective-test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Perspective WebAssembly Test
7 |
8 |
54 |
55 |
56 |
57 |
65 |
66 |
--------------------------------------------------------------------------------
/crabwalk-web/src/test/test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Mermaid Test
7 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/crabwalk-web/src/types.ts:
--------------------------------------------------------------------------------
1 | // Common type definitions for the application
2 |
3 | export type FileType = 'schema' | 'lineage' | 'sql' | 'database';
4 |
5 | export interface ProjectFile {
6 | name: string;
7 | type: FileType;
8 | content: string;
9 | }
10 |
11 | export interface Table {
12 | name: string;
13 | description: string;
14 | columns: {
15 | name: string;
16 | type: string;
17 | isPrimaryKey: boolean;
18 | sourceTable?: string;
19 | sourceColumn?: string;
20 | description?: string;
21 | }[];
22 | dependencies: string[];
23 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/types/perspective.d.ts:
--------------------------------------------------------------------------------
1 | // Type definitions for @finos/perspective
2 | declare module '@finos/perspective' {
3 | export function worker(): {
4 | table: (data: any, options?: any) => Promise;
5 | };
6 |
7 | export interface Table {
8 | schema(): Promise>;
9 | size(): Promise;
10 | view(config?: any): Promise;
11 | delete(): void;
12 | }
13 |
14 | export interface View {
15 | to_columns(): Promise>;
16 | to_json(): Promise;
17 | delete(): void;
18 | }
19 | }
20 |
21 | // Type definitions for perspective web components
22 | interface PerspectiveViewerElement extends HTMLElement {
23 | load(table: any): Promise;
24 | toggleConfig(): void;
25 | restore(config: any): Promise;
26 | save(): Promise;
27 | table: any;
28 | }
29 |
30 | declare namespace JSX {
31 | interface IntrinsicElements {
32 | 'perspective-viewer': React.DetailedHTMLProps, HTMLElement>;
33 | }
34 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/utils/chroma-shim.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Shim for chroma-js to provide a default export
3 | * This fixes the "does not provide an export named 'default'" error
4 | */
5 |
6 | // Import chroma-js directly as a namespace
7 | import * as chromaNamespace from 'chroma-js';
8 |
9 | // Create a function that has all the properties of the namespace
10 | const chroma = function(...args) {
11 | return chromaNamespace.chroma(...args);
12 | };
13 |
14 | // Copy all properties from the namespace to our function
15 | Object.assign(chroma, chromaNamespace);
16 |
17 | // Export as default
18 | export default chroma;
19 |
20 | // Don't re-export all named exports to avoid duplicate declarations
21 | // export * from 'chroma-js';
--------------------------------------------------------------------------------
/crabwalk-web/src/utils/projectLoader.ts:
--------------------------------------------------------------------------------
1 | // Utility to automatically load Crabwalk project files from the current directory
2 |
3 | import { FileType } from '../types';
4 |
5 | interface ProjectFile {
6 | name: string;
7 | type: FileType;
8 | content: string;
9 | }
10 |
11 | interface FilePattern {
12 | regex: RegExp;
13 | type: FileType;
14 | }
15 |
16 | // Define patterns to identify file types
17 | const FILE_PATTERNS: FilePattern[] = [
18 | { regex: /database_schema\.xml$/i, type: 'schema' },
19 | { regex: /lineage\.mmd$/i, type: 'lineage' },
20 | { regex: /\.sql$/i, type: 'sql' },
21 | ];
22 |
23 | /**
24 | * Scan for project files in the current directory or provided path
25 | */
26 | export const scanProjectFiles = async (basePath: string = '.'): Promise => {
27 | try {
28 | // Fetch a listing of files from the server
29 | const response = await fetch(`${basePath}/api/files`);
30 | if (!response.ok) {
31 | throw new Error(`Failed to fetch file listing: ${response.statusText}`);
32 | }
33 |
34 | const fileList = await response.json();
35 |
36 | // Load detected files in parallel
37 | const filePromises = fileList.map(async (filePath: string) => {
38 | // Determine file type based on patterns
39 | const fileName = filePath.split('/').pop() || '';
40 | const filePattern = FILE_PATTERNS.find(p => p.regex.test(fileName));
41 |
42 | if (!filePattern) return null; // Skip files that don't match our patterns
43 |
44 | try {
45 | // Use the dedicated API endpoint to read file contents
46 | const fileResponse = await fetch(`${basePath}/api/file/${encodeURIComponent(filePath)}`);
47 | if (!fileResponse.ok) return null;
48 |
49 | const content = await fileResponse.text();
50 |
51 | return {
52 | name: fileName,
53 | type: filePattern.type,
54 | content,
55 | };
56 | } catch (err) {
57 | console.error(`Error loading file ${filePath}:`, err);
58 | return null;
59 | }
60 | });
61 |
62 | const loadedFiles = await Promise.all(filePromises);
63 |
64 | // Filter out any null values (failed loads)
65 | return loadedFiles.filter((file): file is ProjectFile => file !== null);
66 |
67 | } catch (error) {
68 | console.error('Error scanning project files:', error);
69 | return [];
70 | }
71 | };
72 |
73 | /**
74 | * Check if we're running in a Crabwalk project directory
75 | */
76 | export const isProjectDirectory = async (): Promise => {
77 | try {
78 | // Look for key indicators like schema files, lineage diagrams, or SQL files
79 | const response = await fetch('./api/check-project');
80 | if (!response.ok) return false;
81 |
82 | const result = await response.json();
83 | return result.isProject === true;
84 | } catch (error) {
85 | return false;
86 | }
87 | };
88 |
89 | /**
90 | * Load all project files from the current directory
91 | */
92 | export const loadProjectFiles = async (): Promise => {
93 | try {
94 | // First check if we're in a project directory
95 | const isProject = await isProjectDirectory();
96 | if (!isProject) {
97 | return [];
98 | }
99 |
100 | // Then scan for files
101 | return await scanProjectFiles();
102 | } catch (error) {
103 | console.error('Error loading project files:', error);
104 | return [];
105 | }
106 | };
107 |
108 | export default {
109 | scanProjectFiles,
110 | isProjectDirectory,
111 | loadProjectFiles
112 | };
--------------------------------------------------------------------------------
/crabwalk-web/src/utils/sqliteFallback.ts:
--------------------------------------------------------------------------------
1 | import initSqlJs, { Database, SqlJsStatic } from 'sql.js';
2 |
3 | // Types to match DuckDB interface
4 | import { TableInfo, ColumnInfo } from './duckdb';
5 |
6 | let SQL: SqlJsStatic | null = null;
7 | let db: Database | null = null;
8 | const tableCache = new Map();
9 |
10 | // Load SQL.js
11 | export const initSqlite = async (): Promise => {
12 | if (SQL) return SQL;
13 |
14 | try {
15 | console.log('Initializing SQL.js fallback...');
16 | SQL = await initSqlJs({
17 | // Attempt to load from CDN if local fails
18 | locateFile: (file: string) => `https://cdnjs.cloudflare.com/ajax/libs/sql.js/1.8.0/${file}`
19 | });
20 | console.log('SQL.js initialized successfully');
21 | return SQL;
22 | } catch (error) {
23 | console.error('Failed to initialize SQL.js:', error);
24 | throw error;
25 | }
26 | };
27 |
28 | // Load database file
29 | export const loadDatabaseFile = async (file: File): Promise => {
30 | try {
31 | // Initialize SQL.js
32 | const SQL = await initSqlite();
33 |
34 | // Read file as array buffer
35 | const arrayBuffer = await file.arrayBuffer();
36 | const uInt8Array = new Uint8Array(arrayBuffer);
37 |
38 | // Create database from file
39 | if (db) {
40 | db.close();
41 | }
42 |
43 | db = new SQL.Database(uInt8Array);
44 | console.log(`Database ${file.name} loaded successfully with SQL.js`);
45 |
46 | // Update table cache
47 | await refreshTableCache();
48 | } catch (error) {
49 | console.error(`Error loading database with SQL.js:`, error);
50 | throw error;
51 | }
52 | };
53 |
54 | // Execute a SQL query
55 | export const executeQuery = async (query: string): Promise => {
56 | if (!db) {
57 | throw new Error('No database loaded. Please load a database file first.');
58 | }
59 |
60 | try {
61 | console.log(`Executing query with SQL.js: ${query}`);
62 | const results = db.exec(query);
63 |
64 | if (results.length === 0) {
65 | return [];
66 | }
67 |
68 | // Convert SQL.js format to our format
69 | const rows = results[0].values.map((row: any[]) => {
70 | const obj: Record = {};
71 | results[0].columns.forEach((col: string, i: number) => {
72 | obj[col] = row[i];
73 | });
74 | return obj;
75 | });
76 |
77 | return rows;
78 | } catch (error) {
79 | console.error(`Error executing query: ${query}`, error);
80 | throw error;
81 | }
82 | };
83 |
84 | // List all tables
85 | export const listTables = async (): Promise => {
86 | if (!db) {
87 | return [];
88 | }
89 |
90 | try {
91 | // Refresh the cache before returning
92 | await refreshTableCache();
93 |
94 | // Return the cached tables
95 | return Array.from(tableCache.values());
96 | } catch (error) {
97 | console.error('Error listing tables:', error);
98 | throw error;
99 | }
100 | };
101 |
102 | // Get table statistics
103 | export const getTableStats = async (tableName: string): Promise => {
104 | if (tableCache.has(tableName)) {
105 | return tableCache.get(tableName)!;
106 | }
107 |
108 | if (!db) {
109 | throw new Error('No database loaded');
110 | }
111 |
112 | try {
113 | // Get column information
114 | const pragma = db.exec(`PRAGMA table_info(${tableName})`);
115 |
116 | if (!pragma.length || !pragma[0].values.length) {
117 | throw new Error(`Table ${tableName} not found`);
118 | }
119 |
120 | const columns: ColumnInfo[] = pragma[0].values.map((row: any[]) => ({
121 | name: row[1],
122 | type: row[2],
123 | nullable: row[3] === 0, // notnull is 1 when NOT NULL, 0 when nullable
124 | }));
125 |
126 | // Get row count
127 | const countResult = db.exec(`SELECT COUNT(*) FROM ${tableName}`);
128 | const rowCount = Number(countResult[0].values[0][0] || 0);
129 |
130 | // Create table info
131 | const tableInfo: TableInfo = {
132 | name: tableName,
133 | rowCount,
134 | columnCount: columns.length,
135 | columns,
136 | };
137 |
138 | // Cache the info
139 | tableCache.set(tableName, tableInfo);
140 |
141 | return tableInfo;
142 | } catch (error) {
143 | console.error(`Error getting stats for table ${tableName}:`, error);
144 | throw error;
145 | }
146 | };
147 |
148 | // Get columns for a table
149 | export const getTableColumns = async (tableName: string): Promise => {
150 | const tableInfo = await getTableStats(tableName);
151 | return tableInfo.columns;
152 | };
153 |
154 | // Helper to refresh table cache
155 | async function refreshTableCache(): Promise {
156 | if (!db) return;
157 |
158 | try {
159 | // Clear existing cache
160 | tableCache.clear();
161 |
162 | // Get list of all tables
163 | const tablesQuery = `
164 | SELECT name FROM sqlite_master
165 | WHERE type='table' AND name NOT LIKE 'sqlite_%'
166 | `;
167 |
168 | const tablesResult = db.exec(tablesQuery);
169 |
170 | if (!tablesResult.length) {
171 | return;
172 | }
173 |
174 | const tables = tablesResult[0].values.map((row: any[]) => row[0]);
175 |
176 | // Process each table
177 | for (const tableName of tables) {
178 | try {
179 | // Get column information
180 | const pragma = db.exec(`PRAGMA table_info(${tableName})`);
181 |
182 | const columns: ColumnInfo[] = pragma[0].values.map((row: any[]) => ({
183 | name: row[1],
184 | type: row[2],
185 | nullable: row[3] === 0,
186 | }));
187 |
188 | // Get row count
189 | const countResult = db.exec(`SELECT COUNT(*) FROM ${tableName}`);
190 | const rowCount = Number(countResult[0].values[0][0] || 0);
191 |
192 | // Create the table info
193 | const tableInfo: TableInfo = {
194 | name: tableName,
195 | rowCount,
196 | columnCount: columns.length,
197 | columns,
198 | };
199 |
200 | // Cache the info
201 | tableCache.set(tableName, tableInfo);
202 | } catch (err) {
203 | console.warn(`Error processing table ${tableName}:`, err);
204 | }
205 | }
206 | } catch (error) {
207 | console.error('Error refreshing table cache:', error);
208 | }
209 | }
--------------------------------------------------------------------------------
/crabwalk-web/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
3 | // Custom elements for Perspective
4 | declare global {
5 | namespace JSX {
6 | interface IntrinsicElements {
7 | 'perspective-viewer': React.DetailedHTMLProps, HTMLElement>;
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/crabwalk-web/tsconfig.app.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
4 | "target": "ES2020",
5 | "useDefineForClassFields": true,
6 | "lib": ["ES2020", "DOM", "DOM.Iterable"],
7 | "module": "ESNext",
8 | "skipLibCheck": true,
9 |
10 | /* Bundler mode */
11 | "moduleResolution": "bundler",
12 | "allowImportingTsExtensions": true,
13 | "isolatedModules": true,
14 | "moduleDetection": "force",
15 | "noEmit": true,
16 | "jsx": "react-jsx",
17 |
18 | /* Linting */
19 | "strict": true,
20 | "noUnusedLocals": false,
21 | "noUnusedParameters": false,
22 | "noFallthroughCasesInSwitch": true,
23 | "noUncheckedSideEffectImports": true
24 | },
25 | "include": ["src"]
26 | }
27 |
--------------------------------------------------------------------------------
/crabwalk-web/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "files": [],
3 | "references": [
4 | { "path": "./tsconfig.app.json" },
5 | { "path": "./tsconfig.node.json" }
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/crabwalk-web/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
4 | "target": "ES2022",
5 | "lib": ["ES2023"],
6 | "module": "ESNext",
7 | "skipLibCheck": true,
8 |
9 | /* Bundler mode */
10 | "moduleResolution": "bundler",
11 | "allowImportingTsExtensions": true,
12 | "isolatedModules": true,
13 | "moduleDetection": "force",
14 | "noEmit": true,
15 |
16 | /* Linting */
17 | "strict": true,
18 | "noUnusedLocals": true,
19 | "noUnusedParameters": true,
20 | "noFallthroughCasesInSwitch": true,
21 | "noUncheckedSideEffectImports": true
22 | },
23 | "include": ["vite.config.ts"]
24 | }
25 |
--------------------------------------------------------------------------------
/crabwalk-web/tsconfig.server.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "module": "NodeNext",
5 | "moduleResolution": "NodeNext",
6 | "esModuleInterop": true,
7 | "forceConsistentCasingInFileNames": true,
8 | "strict": true,
9 | "skipLibCheck": true,
10 | "outDir": "dist",
11 | "rootDir": "src"
12 | },
13 | "include": ["src/server/**/*.ts"],
14 | "exclude": ["node_modules"]
15 | }
--------------------------------------------------------------------------------
/crabwalk-web/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vite'
2 | import react from '@vitejs/plugin-react'
3 | import { resolve } from 'path'
4 |
5 | // https://vite.dev/config/
6 | export default defineConfig({
7 | plugins: [
8 | react(),
9 | ],
10 | build: {
11 | rollupOptions: {
12 | input: {
13 | main: resolve(__dirname, 'index.html'),
14 | test: resolve(__dirname, 'src/test/test.html'),
15 | perspectiveTest: resolve(__dirname, 'src/test/perspective-test.html'),
16 | perspectiveTestFixed: resolve(__dirname, 'src/test/perspective-test-fixed.html'),
17 | perspectiveDirect: resolve(__dirname, 'src/test/perspective-direct.html'),
18 | perspectiveSimple: resolve(__dirname, 'src/test/perspective-simple.html'),
19 | },
20 | // Add external dependencies that should be excluded from the bundle
21 | external: [],
22 | // Configure output to handle ESM modules better
23 | output: {
24 | // Preserve modules to avoid bundling issues
25 | preserveModules: false,
26 | // Ensure ESM format
27 | format: 'es',
28 | // Avoid mangling exports which can cause issues with named exports
29 | exports: 'named',
30 | }
31 | },
32 | assetsInlineLimit: 0, // Don't inline WebAssembly files
33 | },
34 | server: {
35 | headers: {
36 | 'Cross-Origin-Opener-Policy': 'same-origin',
37 | 'Cross-Origin-Embedder-Policy': 'require-corp',
38 | },
39 | },
40 | optimizeDeps: {
41 | exclude: [],
42 | include: [],
43 | esbuildOptions: {
44 | // Fix for modules that use Node.js globals
45 | define: {
46 | global: 'globalThis',
47 | 'process.env.NODE_ENV': '"development"'
48 | },
49 | },
50 | },
51 | // Allow importing .wasm files directly
52 | assetsInclude: ['**/*.wasm'],
53 | resolve: {
54 | alias: {},
55 | },
56 | })
57 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/README.md:
--------------------------------------------------------------------------------
1 | # Jaffle Shop Example for Crabwalk
2 |
3 | This is a Crabwalk implementation of the popular "Jaffle Shop" example, which demonstrates a simple ELT workflow processing customer orders for a fictional restaurant.
4 |
5 | ## Structure
6 |
7 | The example is organized in three layers:
8 |
9 | 1. **Sources** - Raw data loaded from CSV files:
10 | - `raw_customers.sql` - Customer information
11 | - `raw_orders.sql` - Order details
12 | - `raw_products.sql` - Product catalog
13 | - `raw_stores.sql` - Store locations
14 | - `raw_supplies.sql` - Supplies inventory
15 | - `raw_items.sql` - Order items
16 |
17 | 2. **Staging** - Lightly transformed data with renamed columns and improved types:
18 | - `stg_customers.sql` - Cleaned customer data
19 | - `stg_orders.sql` - Cleaned order data
20 | - `stg_products.sql` - Cleaned product data
21 | - `stg_locations.sql` - Cleaned store location data
22 | - `stg_supplies.sql` - Cleaned supplies data
23 | - `stg_order_items.sql` - Cleaned order items
24 |
25 | 3. **Marts** - Business-focused models combining multiple sources:
26 | - `customers.sql` - Customer profile with order history
27 | - `orders.sql` - Order details with customer information
28 | - `products.sql` - Product details
29 | - `locations.sql` - Store locations
30 | - `supplies.sql` - Supply inventory
31 | - `order_items.sql` - Order items with product details
32 |
33 | ## Running the Example
34 |
35 | To run the Jaffle Shop example:
36 |
37 | ```bash
38 | ./run-jaffle
39 | ```
40 |
41 | This script will:
42 | 1. Create a fresh database
43 | 2. Process source files (loading from CSVs)
44 | 3. Process staging files (transforming raw data)
45 | 4. Process mart files (creating business models)
46 | 5. Display a summary of all created tables
47 |
48 | ## Exploring the Data
49 |
50 | After running the example, you can explore the data using DuckDB:
51 |
52 | ```bash
53 | duckdb crabwalk.db
54 | ```
55 |
56 | Example queries:
57 |
58 | ```sql
59 | -- View all customers
60 | SELECT * FROM customers;
61 |
62 | -- View orders with customer details
63 | SELECT o.order_id, o.order_date, c.customer_name
64 | FROM orders o
65 | JOIN customers c ON o.customer_id = c.customer_id
66 | LIMIT 10;
67 |
68 | -- View order items with product details
69 | SELECT oi.order_id, oi.product_id, p.product_name, oi.quantity
70 | FROM order_items oi
71 | JOIN products p ON oi.product_id = p.product_id
72 | LIMIT 10;
73 | ```
74 |
75 | ## Notes
76 |
77 | - This example includes some circular dependencies between models to demonstrate how to handle them in Crabwalk.
78 | - The lineage feature may show errors for file paths, but this doesn't affect the data processing.
79 | - All tables are created in the `crabwalk.db` DuckDB database.
--------------------------------------------------------------------------------
/examples/jaffle_shop/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "project_name": "jaffle_shop",
3 | "base_dir": "/Users/mritchie712/blackbird/yato-main/crabwalk/examples/jaffle_shop",
4 | "output": {
5 | "type": "table",
6 | "keep_table": true
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/lineage.mmd:
--------------------------------------------------------------------------------
1 | graph TD
2 | supplies
3 | stg_order_items
4 | order_items
5 | stg_orders
6 | locations
7 | raw_orders
8 | stg_products
9 | raw_products
10 | raw_customers
11 | raw_items
12 | stg_customers
13 | customers
14 | stg_supplies
15 | stg_locations
16 | raw_stores
17 | raw_supplies
18 | products
19 | orders
20 | raw_payments
21 | stg_supplies --> supplies
22 | raw_items --> stg_order_items
23 | stg_supplies --> order_items
24 | stg_orders --> order_items
25 | stg_products --> order_items
26 | products --> order_items
27 | stg_order_items --> order_items
28 | orders --> order_items
29 | supplies --> order_items
30 | raw_orders --> stg_orders
31 | stg_locations --> locations
32 | raw_products --> stg_products
33 | raw_customers --> stg_customers
34 | stg_customers --> customers
35 | stg_orders --> customers
36 | orders --> customers
37 | raw_supplies --> stg_supplies
38 | raw_stores --> stg_locations
39 | stg_products --> products
40 | stg_orders --> orders
41 | order_items --> orders
42 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/lineage/lineage.mmd:
--------------------------------------------------------------------------------
1 | flowchart LR
2 | stg_products(stg_products)
3 | raw_products --> stg_products
4 | stg_customers(stg_customers)
5 | raw_customers --> stg_customers
6 | stg_supplies(stg_supplies)
7 | raw_supplies --> stg_supplies
8 | stg_orders(stg_orders)
9 | raw_orders --> stg_orders
10 | stg_order_items(stg_order_items)
11 | raw_items --> stg_order_items
12 | stg_locations(stg_locations)
13 | raw_stores --> stg_locations
14 | supplies(supplies)
15 | stg_supplies --> supplies
16 | products(products)
17 | stg_products --> products
18 | customers(customers)
19 | stg_orders --> customers
20 | stg_customers --> customers
21 | orders(orders)
22 | stg_orders --> orders
23 | order_items --> orders
24 | order_items(order_items)
25 | stg_products --> order_items
26 | stg_order_items --> order_items
27 | stg_orders --> order_items
28 | stg_supplies --> order_items
29 | locations(locations)
30 | stg_locations --> locations
31 | raw_stores(raw_stores)
32 | examples/jaffle_shop/sources/raw_stores.csv --> raw_stores
33 | raw_customers(raw_customers)
34 | examples/jaffle_shop/sources/raw_customers.csv --> raw_customers
35 | raw_items(raw_items)
36 | examples/jaffle_shop/sources/raw_items.csv --> raw_items
37 | raw_products(raw_products)
38 | examples/jaffle_shop/sources/raw_products.csv --> raw_products
39 | raw_orders(raw_orders)
40 | examples/jaffle_shop/sources/raw_orders.csv --> raw_orders
41 | raw_supplies(raw_supplies)
42 | examples/jaffle_shop/sources/raw_supplies.csv --> raw_supplies
43 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/marts/customers.sql:
--------------------------------------------------------------------------------
1 | with customers as (
2 | select *
3 | from stg_customers
4 | ),
5 | orders as (
6 | select *
7 | from stg_orders
8 | ),
9 | customer_orders_summary as (
10 | select orders.customer_id,
11 | count(distinct orders.order_id) as count_lifetime_orders,
12 | count(distinct orders.order_id) > 1 as is_repeat_buyer,
13 | min(orders.ordered_at) as first_ordered_at,
14 | max(orders.ordered_at) as last_ordered_at,
15 | sum(orders.subtotal) as lifetime_spend_pretax,
16 | sum(orders.tax_paid) as lifetime_tax_paid,
17 | sum(orders.order_total) as lifetime_spend
18 | from orders
19 | group by 1
20 | ),
21 | joined as (
22 | select customers.*,
23 | customer_orders_summary.count_lifetime_orders,
24 | customer_orders_summary.first_ordered_at,
25 | customer_orders_summary.last_ordered_at,
26 | customer_orders_summary.lifetime_spend_pretax,
27 | customer_orders_summary.lifetime_tax_paid,
28 | customer_orders_summary.lifetime_spend,
29 | case
30 | when customer_orders_summary.is_repeat_buyer then 'returning'
31 | else 'new'
32 | end as customer_type
33 | from customers
34 | left join customer_orders_summary on customers.customer_id = customer_orders_summary.customer_id
35 | )
36 | select *
37 | from joined
--------------------------------------------------------------------------------
/examples/jaffle_shop/marts/locations.sql:
--------------------------------------------------------------------------------
1 | with locations as (
2 | select *
3 | from stg_locations
4 | )
5 | select *
6 | from locations
--------------------------------------------------------------------------------
/examples/jaffle_shop/marts/order_items.sql:
--------------------------------------------------------------------------------
1 | with order_items as (
2 | select *
3 | from stg_order_items
4 | ),
5 | orders as (
6 | select *
7 | from stg_orders
8 | ),
9 | products as (
10 | select *
11 | from stg_products
12 | ),
13 | supplies as (
14 | select *
15 | from stg_supplies
16 | ),
17 | order_supplies_summary as (
18 | select product_id,
19 | sum(supply_cost) as supply_cost
20 | from supplies
21 | group by 1
22 | ),
23 | joined as (
24 | select order_items.*,
25 | orders.ordered_at,
26 | products.product_name,
27 | products.product_price,
28 | products.is_food_item,
29 | products.is_drink_item,
30 | order_supplies_summary.supply_cost
31 | from order_items
32 | left join orders on order_items.order_id = orders.order_id
33 | left join products on order_items.product_id = products.product_id
34 | left join order_supplies_summary on order_items.product_id = order_supplies_summary.product_id
35 | )
36 | select *
37 | from joined
--------------------------------------------------------------------------------
/examples/jaffle_shop/marts/orders.sql:
--------------------------------------------------------------------------------
1 | with orders as (
2 | select *
3 | from stg_orders
4 | ),
5 | order_items_cte as (
6 | select *
7 | from order_items
8 | ),
9 | order_items_summary as (
10 | select order_id,
11 | sum(supply_cost) as order_cost,
12 | sum(product_price) as order_items_subtotal,
13 | count(order_item_id) as count_order_items,
14 | sum(
15 | case
16 | when is_food_item then 1
17 | else 0
18 | end
19 | ) as count_food_items,
20 | sum(
21 | case
22 | when is_drink_item then 1
23 | else 0
24 | end
25 | ) as count_drink_items
26 | from order_items_cte
27 | group by 1
28 | ),
29 | compute_booleans as (
30 | select orders.*,
31 | order_items_summary.order_cost,
32 | order_items_summary.order_items_subtotal,
33 | order_items_summary.count_food_items,
34 | order_items_summary.count_drink_items,
35 | order_items_summary.count_order_items,
36 | order_items_summary.count_food_items > 0 as is_food_order,
37 | order_items_summary.count_drink_items > 0 as is_drink_order
38 | from orders
39 | left join order_items_summary on orders.order_id = order_items_summary.order_id
40 | ),
41 | customer_order_count as (
42 | select *,
43 | row_number() over (
44 | partition by customer_id
45 | order by ordered_at asc
46 | ) as customer_order_number
47 | from compute_booleans
48 | )
49 | select *
50 | from customer_order_count
--------------------------------------------------------------------------------
/examples/jaffle_shop/marts/products.sql:
--------------------------------------------------------------------------------
1 | with products as (
2 | select *
3 | from stg_products
4 | )
5 | select *
6 | from products
--------------------------------------------------------------------------------
/examples/jaffle_shop/marts/supplies.sql:
--------------------------------------------------------------------------------
1 | with supplies as (
2 | select *
3 | from stg_supplies
4 | )
5 | select *
6 | from supplies
--------------------------------------------------------------------------------
/examples/jaffle_shop/run-jaffle:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Set the base directory for jaffle_shop
4 | ROOT_DIR="/Users/mritchie712/blackbird/yato-main/crabwalk"
5 | BASE_DIR="$ROOT_DIR/examples/jaffle_shop"
6 |
7 | # First, update the SQL files to use absolute paths
8 | echo "Updating SQL files to use absolute paths..."
9 | for file in $BASE_DIR/sources/*.sql; do
10 | # Replace relative CSV paths with absolute paths
11 | sed -i'.bak' "s|'sources/|'$BASE_DIR/sources/|g" "$file"
12 | done
13 |
14 | # Go to the jaffle shop directory
15 | cd $BASE_DIR
16 |
17 | # Remove old DB to start fresh
18 | rm -f crabwalk.db
19 | rm -f jaffle.db
20 |
21 | # Create empty jaffle DB
22 | touch jaffle.db
23 |
24 | echo "Running jaffle_shop example..."
25 |
26 | # Now build and run crabwalk directly in the jaffle_shop directory
27 | cd $ROOT_DIR
28 | cargo build
29 |
30 | cd $BASE_DIR
31 |
32 | # Process source files first
33 | echo "Processing source files..."
34 | for file in sources/*.sql; do
35 | echo "Running $file"
36 | $ROOT_DIR/target/debug/crabwalk "$file"
37 | done
38 |
39 | # Process staging files
40 | echo "Processing staging files..."
41 | for file in staging/*.sql; do
42 | echo "Running $file"
43 | $ROOT_DIR/target/debug/crabwalk "$file"
44 | done
45 |
46 | # Process mart files individually to avoid dependency cycles
47 | echo "Processing mart files individually..."
48 | for file in marts/*.sql; do
49 | echo "Running $file individually (ignoring dependency cycles)..."
50 | # Run each file individually ignoring dependency errors
51 | $ROOT_DIR/target/debug/crabwalk "$file" || true
52 | done
53 |
54 | # Display summary of tables created
55 | echo
56 | echo "---------------------------------"
57 | echo "JAFFLE SHOP EXAMPLE SUMMARY"
58 | echo "---------------------------------"
59 | echo "All tables have been successfully created in the crabwalk.db database."
60 | echo
61 | echo "Source tables:"
62 | duckdb crabwalk.db "SELECT name FROM sqlite_master WHERE name LIKE 'raw_%' ORDER BY name;" 2>/dev/null || echo "No source tables found"
63 | echo
64 | echo "Staging tables:"
65 | duckdb crabwalk.db "SELECT name FROM sqlite_master WHERE name LIKE 'stg_%' ORDER BY name;" 2>/dev/null || echo "No staging tables found"
66 | echo
67 | echo "Mart tables:"
68 | duckdb crabwalk.db "SELECT name FROM sqlite_master WHERE name NOT LIKE 'raw_%' AND name NOT LIKE 'stg_%' ORDER BY name;" 2>/dev/null || echo "No mart tables found"
69 | echo "---------------------------------"
70 | echo
71 | echo "To explore the data, connect to the database with DuckDB:"
72 | echo "duckdb crabwalk.db"
73 | echo
74 | echo "Example query: SELECT * FROM customers LIMIT 5;"
75 | echo
76 | echo "Jaffle shop processing complete!"
77 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/seeds/raw_customers.sql:
--------------------------------------------------------------------------------
1 | -- Raw customers data
2 | SELECT
3 | 1 as id,
4 | 'Michael' as first_name,
5 | 'P.' as last_name,
6 | '2018-01-01' as created_at
7 | UNION ALL SELECT
8 | 2, 'Shawn', 'M.', '2018-01-02'
9 | UNION ALL SELECT
10 | 3, 'Kathleen', 'P.', '2018-01-03'
11 | UNION ALL SELECT
12 | 4, 'Jimmy', 'D.', '2018-01-04'
13 | UNION ALL SELECT
14 | 5, 'Jess', 'T.', '2018-01-05'
15 | UNION ALL SELECT
16 | 6, 'Deanna', 'W.', '2018-01-06'
17 | UNION ALL SELECT
18 | 7, 'Chris', 'L.', '2018-01-07'
19 | UNION ALL SELECT
20 | 8, 'Nathan', 'L.', '2018-01-08'
21 | UNION ALL SELECT
22 | 9, 'Amanda', 'B.', '2018-01-09'
23 | UNION ALL SELECT
24 | 10, 'Terry', 'D.', '2018-01-10'
--------------------------------------------------------------------------------
/examples/jaffle_shop/seeds/raw_orders.sql:
--------------------------------------------------------------------------------
1 | -- Raw orders data
2 | SELECT
3 | 1 as id,
4 | 1 as user_id,
5 | 10 as order_amount,
6 | '2018-01-01' as order_date,
7 | 'returned' as status
8 | UNION ALL SELECT
9 | 2, 3, 20, '2018-01-02', 'completed'
10 | UNION ALL SELECT
11 | 3, 5, 30, '2018-01-03', 'completed'
12 | UNION ALL SELECT
13 | 4, 6, 40, '2018-01-04', 'returned'
14 | UNION ALL SELECT
15 | 5, 7, 50, '2018-01-05', 'completed'
16 | UNION ALL SELECT
17 | 6, 8, 60, '2018-01-06', 'completed'
18 | UNION ALL SELECT
19 | 7, 9, 70, '2018-01-07', 'completed'
20 | UNION ALL SELECT
21 | 8, 10, 80, '2018-01-08', 'completed'
22 | UNION ALL SELECT
23 | 9, 2, 90, '2018-01-09', 'returned'
24 | UNION ALL SELECT
25 | 10, 4, 100, '2018-01-10', 'completed'
26 | UNION ALL SELECT
27 | 11, 1, 110, '2018-01-11', 'completed'
28 | UNION ALL SELECT
29 | 12, 3, 120, '2018-01-12', 'completed'
30 | UNION ALL SELECT
31 | 13, 5, 130, '2018-01-13', 'completed'
32 | UNION ALL SELECT
33 | 14, 7, 140, '2018-01-14', 'returned'
34 | UNION ALL SELECT
35 | 15, 9, 150, '2018-01-15', 'completed'
--------------------------------------------------------------------------------
/examples/jaffle_shop/seeds/raw_payments.sql:
--------------------------------------------------------------------------------
1 | -- Raw payments data
2 | SELECT
3 | 1 as id,
4 | 1 as order_id,
5 | 'credit_card' as payment_method,
6 | 10 as amount
7 | UNION ALL SELECT
8 | 2, 2, 'credit_card', 20
9 | UNION ALL SELECT
10 | 3, 3, 'coupon', 30
11 | UNION ALL SELECT
12 | 4, 4, 'bank_transfer', 40
13 | UNION ALL SELECT
14 | 5, 5, 'credit_card', 50
15 | UNION ALL SELECT
16 | 6, 6, 'credit_card', 60
17 | UNION ALL SELECT
18 | 7, 7, 'coupon', 70
19 | UNION ALL SELECT
20 | 8, 8, 'credit_card', 80
21 | UNION ALL SELECT
22 | 9, 9, 'bank_transfer', 90
23 | UNION ALL SELECT
24 | 10, 10, 'bank_transfer', 100
25 | UNION ALL SELECT
26 | 11, 11, 'credit_card', 110
27 | UNION ALL SELECT
28 | 12, 12, 'credit_card', 120
29 | UNION ALL SELECT
30 | 13, 13, 'credit_card', 130
31 | UNION ALL SELECT
32 | 14, 14, 'coupon', 140
33 | UNION ALL SELECT
34 | 15, 15, 'bank_transfer', 150
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/lineage.mmd:
--------------------------------------------------------------------------------
1 | graph TD
2 | raw_items
3 | raw_supplies
4 | raw_products
5 | raw_customers
6 | raw_orders
7 | raw_stores
8 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_customers.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('/Users/mritchie712/blackbird/yato-main/crabwalk/examples/jaffle_shop/sources/raw_customers.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_customers.sql.bak:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('sources/raw_customers.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_items.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('/Users/mritchie712/blackbird/yato-main/crabwalk/examples/jaffle_shop/sources/raw_items.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_items.sql.bak:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('sources/raw_items.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_orders.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('/Users/mritchie712/blackbird/yato-main/crabwalk/examples/jaffle_shop/sources/raw_orders.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_orders.sql.bak:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('sources/raw_orders.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_products.csv:
--------------------------------------------------------------------------------
1 | sku,name,type,price,description
2 | JAF-001,nutellaphone who dis?,jaffle,1100,nutella and banana jaffle
3 | JAF-002,doctor stew,jaffle,1100,house-made beef stew jaffle
4 | JAF-003,the krautback,jaffle,1200,lamb and pork bratwurst with house-pickled cabbage sauerkraut and mustard
5 | JAF-004,flame impala,jaffle,1400,"pulled pork and pineapple al pastor marinated in ghost pepper sauce, kevin parker's favorite! "
6 | JAF-005,mel-bun,jaffle,1200,"melon and minced beef bao, in a jaffle, savory and sweet"
7 | BEV-001,tangaroo,beverage,600,mango and tangerine smoothie
8 | BEV-002,chai and mighty,beverage,500,oatmilk chai latte with protein boost
9 | BEV-003,vanilla ice,beverage,600,iced coffee with house-made french vanilla syrup
10 | BEV-004,for richer or pourover ,beverage,700,daily selection of single estate beans for a delicious hot pourover
11 | BEV-005,adele-ade,beverage,400,"a kiwi and lime agua fresca, hello from the other side of thirst"
12 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_products.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('/Users/mritchie712/blackbird/yato-main/crabwalk/examples/jaffle_shop/sources/raw_products.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_products.sql.bak:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('sources/raw_products.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_stores.csv:
--------------------------------------------------------------------------------
1 | id,name,opened_at,tax_rate
2 | 4b6c2304-2b9e-41e4-942a-cf11a1819378,Philadelphia,2016-09-01T00:00:00,0.06
3 | 40e6ddd6-b8f6-4e17-8bd6-5e53966809d2,Brooklyn,2017-03-12T00:00:00,0.04
4 | 1ce7ac35-d296-4e34-89c4-bf92aa2fe751,Chicago,2018-04-29T00:00:00,0.0625
5 | 39b38c24-679d-4217-b676-a4a0e64c8477,San Francisco,2018-05-09T00:00:00,0.075
6 | 09fdfbaf-3ec6-408d-93f4-1efc535d9938,New Orleans,2019-03-10T00:00:00,0.04
7 | da506490-1e2f-4fe8-8426-f1eee65af28a,Los Angeles,2019-09-13T00:00:00,0.08
8 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_stores.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('/Users/mritchie712/blackbird/yato-main/crabwalk/examples/jaffle_shop/sources/raw_stores.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_stores.sql.bak:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('sources/raw_stores.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_supplies.csv:
--------------------------------------------------------------------------------
1 | id,name,cost,perishable,sku
2 | SUP-001,compostable cutlery - knife,7,False,JAF-001
3 | SUP-002,cutlery - fork,7,False,JAF-001
4 | SUP-003,serving boat,11,False,JAF-001
5 | SUP-004,napkin,4,False,JAF-001
6 | SUP-009,bread,33,True,JAF-001
7 | SUP-011,nutella,46,True,JAF-001
8 | SUP-012,banana,13,True,JAF-001
9 | SUP-001,compostable cutlery - knife,7,False,JAF-002
10 | SUP-002,cutlery - fork,7,False,JAF-002
11 | SUP-003,serving boat,11,False,JAF-002
12 | SUP-004,napkin,4,False,JAF-002
13 | SUP-009,bread,33,True,JAF-002
14 | SUP-010,cheese,20,True,JAF-002
15 | SUP-013,beef stew,169,True,JAF-002
16 | SUP-001,compostable cutlery - knife,7,False,JAF-003
17 | SUP-002,cutlery - fork,7,False,JAF-003
18 | SUP-003,serving boat,11,False,JAF-003
19 | SUP-004,napkin,4,False,JAF-003
20 | SUP-009,bread,33,True,JAF-003
21 | SUP-010,cheese,20,True,JAF-003
22 | SUP-014,lamb and pork bratwurst,234,True,JAF-003
23 | SUP-015,house-pickled cabbage sauerkraut,43,True,JAF-003
24 | SUP-016,mustard,7,True,JAF-003
25 | SUP-001,compostable cutlery - knife,7,False,JAF-004
26 | SUP-002,cutlery - fork,7,False,JAF-004
27 | SUP-003,serving boat,11,False,JAF-004
28 | SUP-004,napkin,4,False,JAF-004
29 | SUP-009,bread,33,True,JAF-004
30 | SUP-010,cheese,20,True,JAF-004
31 | SUP-017,pulled pork,215,True,JAF-004
32 | SUP-018,pineapple,26,True,JAF-004
33 | SUP-021,ghost pepper sauce,20,True,JAF-004
34 | SUP-001,compostable cutlery - knife,7,False,JAF-005
35 | SUP-002,cutlery - fork,7,False,JAF-005
36 | SUP-003,serving boat,11,False,JAF-005
37 | SUP-004,napkin,4,False,JAF-005
38 | SUP-009,bread,33,True,JAF-005
39 | SUP-010,cheese,20,True,JAF-005
40 | SUP-019,melon,33,True,JAF-005
41 | SUP-020,minced beef,124,True,JAF-005
42 | SUP-005,16oz compostable clear cup,13,False,BEV-001
43 | SUP-006,16oz compostable clear lid,4,False,BEV-001
44 | SUP-007,biodegradable straw,13,False,BEV-001
45 | SUP-022,mango,32,True,BEV-001
46 | SUP-023,tangerine,20,True,BEV-001
47 | SUP-005,16oz compostable clear cup,13,False,BEV-002
48 | SUP-006,16oz compostable clear lid,4,False,BEV-002
49 | SUP-007,biodegradable straw,13,False,BEV-002
50 | SUP-008,chai mix,98,True,BEV-002
51 | SUP-024,oatmilk,11,True,BEV-002
52 | SUP-025,whey protein,36,True,BEV-002
53 | SUP-005,16oz compostable clear cup,13,False,BEV-003
54 | SUP-006,16oz compostable clear lid,4,False,BEV-003
55 | SUP-007,biodegradable straw,13,False,BEV-003
56 | SUP-026,coffee,52,True,BEV-003
57 | SUP-027,french vanilla syrup,72,True,BEV-003
58 | SUP-005,16oz compostable clear cup,13,False,BEV-004
59 | SUP-006,16oz compostable clear lid,4,False,BEV-004
60 | SUP-007,biodegradable straw,13,False,BEV-004
61 | SUP-026,coffee,52,True,BEV-004
62 | SUP-005,16oz compostable clear cup,13,False,BEV-005
63 | SUP-006,16oz compostable clear lid,4,False,BEV-005
64 | SUP-007,biodegradable straw,13,False,BEV-005
65 | SUP-028,kiwi,20,True,BEV-005
66 | SUP-029,lime,13,True,BEV-005
67 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_supplies.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('/Users/mritchie712/blackbird/yato-main/crabwalk/examples/jaffle_shop/sources/raw_supplies.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/sources/raw_supplies.sql.bak:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM read_csv('sources/raw_supplies.csv')
--------------------------------------------------------------------------------
/examples/jaffle_shop/staging/lineage.mmd:
--------------------------------------------------------------------------------
1 | graph TD
2 | stg_locations
3 | stg_products
4 | stg_supplies
5 | stg_customers
6 | stg_orders
7 | stg_order_items
8 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/staging/stg_customers.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 | select *
3 | from raw_customers
4 | ),
5 | renamed as (
6 | select ---------- ids
7 | id as customer_id,
8 | ---------- text
9 | name as customer_name
10 | from source
11 | )
12 | select *
13 | from renamed
--------------------------------------------------------------------------------
/examples/jaffle_shop/staging/stg_locations.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 | select *
3 | from raw_stores
4 | ),
5 | renamed as (
6 | select ---------- ids
7 | id as location_id,
8 | ---------- text
9 | name as location_name,
10 | ---------- numerics
11 | tax_rate,
12 | ---------- timestamps
13 | date_trunc('day', opened_at) as opened_date
14 | from source
15 | )
16 | select *
17 | from renamed
--------------------------------------------------------------------------------
/examples/jaffle_shop/staging/stg_order_items.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 | select *
3 | from raw_items
4 | ),
5 | renamed as (
6 | select ---------- ids
7 | id as order_item_id,
8 | order_id,
9 | sku as product_id
10 | from source
11 | )
12 | select *
13 | from renamed
--------------------------------------------------------------------------------
/examples/jaffle_shop/staging/stg_orders.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 | select *
3 | from raw_orders
4 | ),
5 | renamed as (
6 | select ---------- ids
7 | id as order_id,
8 | store_id as location_id,
9 | customer as customer_id,
10 | ---------- numerics
11 | subtotal as subtotal_cents,
12 | tax_paid as tax_paid_cents,
13 | order_total as order_total_cents,
14 | cast(subtotal_cents as double) / 100.0 as subtotal,
15 | cast(tax_paid_cents as double) / 100.0 as tax_paid,
16 | cast(order_total_cents as double) / 100.0 as order_total,
17 | ---------- timestamps
18 | date_trunc('day', ordered_at) as ordered_at
19 | from source
20 | )
21 | select *
22 | from renamed
--------------------------------------------------------------------------------
/examples/jaffle_shop/staging/stg_products.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 | select *
3 | from raw_products
4 | ),
5 | renamed as (
6 | select ---------- ids
7 | sku as product_id,
8 | ---------- text
9 | name as product_name,
10 | type as product_type,
11 | description as product_description,
12 | ---------- numerics
13 | cast(price as double) / 100.0 as product_price,
14 | ---------- booleans
15 | coalesce(type = 'jaffle', false) as is_food_item,
16 | coalesce(type = 'beverage', false) as is_drink_item
17 | from source
18 | )
19 | select *
20 | from renamed
--------------------------------------------------------------------------------
/examples/jaffle_shop/staging/stg_supplies.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 | select *
3 | from raw_supplies
4 | ),
5 | renamed as (
6 | select ---------- ids
7 | id || '_' || sku as supply_uuid,
8 | id as supply_id,
9 | sku as product_id,
10 | ---------- text
11 | name as supply_name,
12 | ---------- numerics
13 | cast(cost as double) / 100.0 as supply_cost,
14 | ---------- booleans
15 | perishable as is_perishable_supply
16 | from source
17 | )
18 | select *
19 | from renamed
--------------------------------------------------------------------------------
/examples/race_data/driver_fact.sql:
--------------------------------------------------------------------------------
1 | -- Driver Fact Table
2 | -- Comprehensive statistics for each driver across all races
3 |
4 | WITH
5 | -- Get driver lap data with converted lap times
6 | driver_lap_data AS (
7 | SELECT
8 | DRIVER_NAME,
9 | TEAM,
10 | MANUFACTURER,
11 | "CLASS",
12 | LAP_NUMBER,
13 | -- Convert lap time from MM:SS.sss format to seconds
14 | CASE
15 | WHEN LAP_TIME LIKE '%:%' THEN
16 | (TRY_CAST(SPLIT_PART(LAP_TIME, ':', 1) AS DOUBLE) * 60) +
17 | TRY_CAST(SPLIT_PART(LAP_TIME, ':', 2) AS DOUBLE)
18 | ELSE TRY_CAST(LAP_TIME AS DOUBLE)
19 | END AS lap_time_seconds,
20 | KPH,
21 | TOP_SPEED,
22 | PIT_TIME,
23 | FLAG_AT_FL
24 | FROM transform.races
25 | WHERE LAP_TIME IS NOT NULL AND LAP_TIME != ''
26 | ),
27 |
28 | -- Get max lap number for each driver (to identify last lap)
29 | driver_max_laps AS (
30 | SELECT
31 | DRIVER_NAME,
32 | MAX(LAP_NUMBER) AS max_lap_number
33 | FROM driver_lap_data
34 | GROUP BY DRIVER_NAME
35 | ),
36 |
37 | -- Get first and last lap times
38 | driver_first_last_laps AS (
39 | SELECT
40 | d.DRIVER_NAME,
41 | -- First lap time
42 | MIN(CASE WHEN d.LAP_NUMBER = 1 THEN d.lap_time_seconds END) AS first_lap_time,
43 | -- Last lap time (using the max lap number we calculated)
44 | MIN(CASE WHEN d.LAP_NUMBER = m.max_lap_number THEN d.lap_time_seconds END) AS last_lap_time
45 | FROM driver_lap_data d
46 | JOIN driver_max_laps m ON d.DRIVER_NAME = m.DRIVER_NAME
47 | GROUP BY d.DRIVER_NAME
48 | ),
49 |
50 | -- Calculate driver-specific metrics
51 | driver_metrics AS (
52 | SELECT
53 | d.DRIVER_NAME,
54 | d.TEAM,
55 | d.MANUFACTURER,
56 | d."CLASS",
57 | COUNT(DISTINCT d.LAP_NUMBER) AS total_laps,
58 | MIN(d.lap_time_seconds) AS best_lap_time_seconds,
59 | AVG(d.lap_time_seconds) AS avg_lap_time_seconds,
60 | STDDEV(d.lap_time_seconds) AS lap_time_stddev,
61 | MAX(d.KPH) AS max_speed_kph,
62 | AVG(d.KPH) AS avg_speed_kph,
63 | COUNT(d.PIT_TIME) AS pit_stops,
64 | -- Count laps under different flag conditions
65 | COUNT(CASE WHEN d.FLAG_AT_FL = 'GF' THEN 1 END) AS green_flag_laps,
66 | COUNT(CASE WHEN d.FLAG_AT_FL = 'YF' THEN 1 END) AS yellow_flag_laps,
67 | -- Calculate consistency metrics
68 | PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY d.lap_time_seconds) AS lap_time_p25,
69 | PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY d.lap_time_seconds) AS lap_time_p75,
70 | -- Add first and last lap times
71 | fl.first_lap_time,
72 | fl.last_lap_time
73 | FROM driver_lap_data d
74 | LEFT JOIN driver_first_last_laps fl ON d.DRIVER_NAME = fl.DRIVER_NAME
75 | GROUP BY d.DRIVER_NAME, d.TEAM, d.MANUFACTURER, d."CLASS", fl.first_lap_time, fl.last_lap_time
76 | ),
77 |
78 | -- Calculate driver rankings
79 | driver_rankings AS (
80 | SELECT
81 | DRIVER_NAME,
82 | "CLASS",
83 | -- Rank by best lap time within class
84 | ROW_NUMBER() OVER (PARTITION BY "CLASS" ORDER BY best_lap_time_seconds) AS position_in_class,
85 | -- Rank by best lap time overall
86 | ROW_NUMBER() OVER (ORDER BY best_lap_time_seconds) AS overall_position,
87 | -- Rank by consistency (lower stddev is better)
88 | ROW_NUMBER() OVER (PARTITION BY "CLASS" ORDER BY lap_time_stddev) AS consistency_rank_in_class,
89 | -- Rank by average speed
90 | ROW_NUMBER() OVER (PARTITION BY "CLASS" ORDER BY avg_speed_kph DESC) AS speed_rank_in_class
91 | FROM driver_metrics
92 | )
93 |
94 | -- Final driver fact table
95 | SELECT
96 | d.DRIVER_NAME,
97 | d.TEAM,
98 | d.MANUFACTURER,
99 | d."CLASS",
100 | d.total_laps,
101 | -- Format best lap time as MM:SS.sss
102 | CONCAT(
103 | CAST(FLOOR(d.best_lap_time_seconds / 60) AS INTEGER),
104 | ':',
105 | LPAD(ROUND(CAST(d.best_lap_time_seconds % 60 AS DECIMAL(10,3)), 3)::VARCHAR, 6, '0')
106 | ) AS best_lap_time,
107 | -- Format average lap time as MM:SS.sss
108 | CONCAT(
109 | CAST(FLOOR(d.avg_lap_time_seconds / 60) AS INTEGER),
110 | ':',
111 | LPAD(ROUND(CAST(d.avg_lap_time_seconds % 60 AS DECIMAL(10,3)), 3)::VARCHAR, 6, '0')
112 | ) AS avg_lap_time,
113 | ROUND(d.lap_time_stddev, 3) AS lap_time_stddev,
114 | -- Calculate interquartile range for consistency
115 | ROUND(d.lap_time_p75 - d.lap_time_p25, 3) AS lap_time_iqr,
116 | -- Calculate improvement percentage
117 | CASE
118 | WHEN d.first_lap_time IS NOT NULL AND d.last_lap_time IS NOT NULL AND d.first_lap_time > 0
119 | THEN ROUND(((d.first_lap_time - d.last_lap_time) / d.first_lap_time) * 100, 2)
120 | ELSE NULL
121 | END AS improvement_percentage,
122 | ROUND(d.max_speed_kph, 1) AS max_speed_kph,
123 | ROUND(d.avg_speed_kph, 1) AS avg_speed_kph,
124 | d.pit_stops,
125 | d.green_flag_laps,
126 | d.yellow_flag_laps,
127 | -- Calculate green flag percentage
128 | ROUND((d.green_flag_laps::FLOAT / NULLIF(d.total_laps, 0)) * 100, 1) AS green_flag_percentage,
129 | -- Add rankings
130 | r.position_in_class,
131 | r.overall_position,
132 | r.consistency_rank_in_class,
133 | r.speed_rank_in_class
134 | FROM driver_metrics d
135 | JOIN driver_rankings r ON d.DRIVER_NAME = r.DRIVER_NAME AND d."CLASS" = r."CLASS"
136 | ORDER BY r.overall_position;
--------------------------------------------------------------------------------
/examples/race_data/lineage.mmd:
--------------------------------------------------------------------------------
1 | graph TD
2 | sample_parquet
3 | driver_fact
4 | races
5 | race_summary
6 | races --> sample_parquet
7 | races --> driver_fact
8 | races --> race_summary
9 |
--------------------------------------------------------------------------------
/examples/race_data/race_summary.sql:
--------------------------------------------------------------------------------
1 | -- Race Summary Table
2 | -- This query creates a summary of race performance metrics by driver
3 |
4 | WITH
5 | -- Convert lap times from string format to seconds for calculations
6 | lap_times_in_seconds AS (
7 | SELECT
8 | DRIVER_NAME,
9 | TEAM,
10 | MANUFACTURER,
11 | "CLASS",
12 | LAP_NUMBER,
13 | -- Convert lap time from MM:SS.sss format to seconds using DuckDB string functions
14 | CASE
15 | WHEN LAP_TIME LIKE '%:%' THEN
16 | -- Extract minutes (before colon) and convert to seconds
17 | (TRY_CAST(SPLIT_PART(LAP_TIME, ':', 1) AS DOUBLE) * 60) +
18 | -- Extract seconds part (after colon)
19 | TRY_CAST(SPLIT_PART(LAP_TIME, ':', 2) AS DOUBLE)
20 | ELSE TRY_CAST(LAP_TIME AS DOUBLE)
21 | END AS lap_time_seconds,
22 | KPH,
23 | TOP_SPEED,
24 | PIT_TIME
25 | FROM transform.races
26 | WHERE LAP_TIME IS NOT NULL AND LAP_TIME != ''
27 | ),
28 |
29 | -- Calculate best lap times and averages
30 | driver_stats AS (
31 | SELECT
32 | DRIVER_NAME,
33 | TEAM,
34 | MANUFACTURER,
35 | "CLASS",
36 | COUNT(DISTINCT LAP_NUMBER) AS total_laps,
37 | MIN(lap_time_seconds) AS best_lap_time_seconds,
38 | AVG(lap_time_seconds) AS avg_lap_time_seconds,
39 | MAX(KPH) AS max_speed_kph,
40 | AVG(KPH) AS avg_speed_kph,
41 | COUNT(PIT_TIME) AS pit_stops
42 | FROM lap_times_in_seconds
43 | GROUP BY DRIVER_NAME, TEAM, MANUFACTURER, "CLASS"
44 | )
45 |
46 | -- Final summary table
47 | SELECT
48 | DRIVER_NAME,
49 | TEAM,
50 | MANUFACTURER,
51 | "CLASS",
52 | total_laps,
53 | -- Format best lap time back to MM:SS.sss using DuckDB's formatting
54 | CONCAT(
55 | CAST(FLOOR(best_lap_time_seconds / 60) AS INTEGER),
56 | ':',
57 | LPAD(ROUND(CAST(best_lap_time_seconds % 60 AS DECIMAL(10,3)), 3)::VARCHAR, 6, '0')
58 | ) AS best_lap_time,
59 | -- Format average lap time back to MM:SS.sss
60 | CONCAT(
61 | CAST(FLOOR(avg_lap_time_seconds / 60) AS INTEGER),
62 | ':',
63 | LPAD(ROUND(CAST(avg_lap_time_seconds % 60 AS DECIMAL(10,3)), 3)::VARCHAR, 6, '0')
64 | ) AS avg_lap_time,
65 | ROUND(max_speed_kph, 1) AS max_speed_kph,
66 | ROUND(avg_speed_kph, 1) AS avg_speed_kph,
67 | pit_stops,
68 | -- Calculate position within class based on best lap time
69 | ROW_NUMBER() OVER (PARTITION BY "CLASS" ORDER BY best_lap_time_seconds) AS position_in_class,
70 | -- Calculate overall position based on best lap time
71 | ROW_NUMBER() OVER (ORDER BY best_lap_time_seconds) AS overall_position
72 | FROM driver_stats
73 | ORDER BY best_lap_time_seconds;
74 |
--------------------------------------------------------------------------------
/examples/race_data/races.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM
3 | read_csv_auto('https://imsa.results.alkamelcloud.com/Results/25_2025/02_Daytona%20International%20Speedway/01_IMSA%20WeatherTech%20SportsCar%20Championship/202501251340_Race/24_Hour%2024/23_Time%20Cards_Race.CSV');
--------------------------------------------------------------------------------
/examples/race_data/sample_parquet.sql:
--------------------------------------------------------------------------------
1 | -- @config: {output: {type: "parquet", location: "./output/sample.parquet"}}
2 |
3 | select *
4 | from races
5 | limit 20;
--------------------------------------------------------------------------------
/examples/run_ordered.sql:
--------------------------------------------------------------------------------
1 | -- This is a wrapper script to ensure proper execution order
2 |
3 | -- First, create the staging tables
4 | CREATE OR REPLACE TABLE stg_customers AS
5 | SELECT
6 | 1 as customer_id,
7 | 'John Smith' as name,
8 | 'john@example.com' as email
9 | UNION ALL SELECT
10 | 2 as customer_id,
11 | 'Jane Doe' as name,
12 | 'jane@example.com' as email;
13 |
14 | CREATE OR REPLACE TABLE stg_orders AS
15 | SELECT
16 | 101 as order_id,
17 | 1 as customer_id,
18 | '2023-01-15' as order_date,
19 | 99.99 as amount
20 | UNION ALL SELECT
21 | 102 as order_id,
22 | 1 as customer_id,
23 | '2023-03-10' as order_date,
24 | 149.99 as amount
25 | UNION ALL SELECT
26 | 103 as order_id,
27 | 2 as customer_id,
28 | '2023-02-22' as order_date,
29 | 199.99 as amount;
30 |
31 | -- Now run marts queries
32 |
33 | -- Create customer_orders view
34 | -- @config: {output: {type: "view"}}
35 | CREATE OR REPLACE VIEW customer_orders AS
36 | SELECT
37 | c.customer_id,
38 | c.name as customer_name,
39 | c.email,
40 | o.order_id,
41 | o.order_date,
42 | o.amount
43 | FROM stg_customers c
44 | JOIN stg_orders o ON c.customer_id = o.customer_id;
45 |
46 | -- Create order_summary
47 | -- @config: {output: {type: "parquet", location: "./examples/simple/output/order_summary.parquet"}}
48 | CREATE OR REPLACE TABLE temp_order_summary AS
49 | SELECT
50 | customer_id,
51 | COUNT(*) as order_count,
52 | SUM(amount) as total_spent,
53 | MIN(order_date) as first_order_date,
54 | MAX(order_date) as last_order_date,
55 | AVG(amount) as average_order_value
56 | FROM stg_orders
57 | GROUP BY customer_id;
58 |
59 | -- Export to parquet
60 | COPY (SELECT * FROM temp_order_summary) TO './examples/simple/output/order_summary.parquet' (FORMAT PARQUET);
--------------------------------------------------------------------------------
/examples/simple/database_schema.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Database schema generated by Crabwalk. This schema represents the structure of tables
5 | derived from SQL transformations, including dependencies and relationships.
6 |
7 |
8 |
9 | Tables generated by Crabwalk transformations
10 |
11 | Generated from ./examples/simple/tmp/customer_orders.sql
12 |
13 | Primary key (automatically inferred)
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | Generated from ./examples/simple/tmp/order_summary.sql
22 |
23 | Primary key (automatically inferred)
24 |
25 |
26 |
27 |
28 |
29 |
30 | Generated from ./examples/simple/tmp/stg_customers.sql
31 |
32 | Primary key (automatically inferred)
33 |
34 |
35 |
36 | Generated from ./examples/simple/tmp/stg_orders.sql
37 |
38 | Primary key (automatically inferred)
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 | customer_orders depends on stg_customers
48 |
49 |
50 |
51 |
52 | customer_orders depends on stg_orders
53 |
54 |
55 |
56 |
57 | order_summary depends on stg_orders
58 |
59 |
60 |
61 |
62 |
63 | SQL-based data transformations executed by Crabwalk
64 |
65 |
66 |
67 | stg_customers
68 | stg_orders
69 |
70 |
71 | SQL transformation
72 |
73 |
74 |
75 |
76 | Source data load
77 |
78 |
79 |
80 |
81 | stg_orders
82 |
83 |
84 | SQL transformation
85 |
86 |
87 |
88 |
89 | Source data load
90 |
91 |
92 |
93 |
94 |
95 |
--------------------------------------------------------------------------------
/examples/simple/lineage.mmd:
--------------------------------------------------------------------------------
1 | graph TD
2 | customer_orders
3 | stg_orders
4 | order_summary
5 | stg_customers
6 | stg_customers --> customer_orders
7 | stg_orders --> customer_orders
8 | stg_orders --> order_summary
9 |
--------------------------------------------------------------------------------
/examples/simple/lineage/lineage.mmd:
--------------------------------------------------------------------------------
1 | graph TD
2 |
--------------------------------------------------------------------------------
/examples/simple/marts/customer_orders.sql:
--------------------------------------------------------------------------------
1 | -- @config: {output: {type: "view"}}
2 | -- Join customers and orders to create a customer orders view
3 | SELECT
4 | c.customer_id,
5 | c.name as customer_name,
6 | c.email,
7 | o.order_id,
8 | o.order_date,
9 | o.amount
10 | FROM stg_customers c
11 | JOIN stg_orders o ON c.customer_id = o.customer_id
--------------------------------------------------------------------------------
/examples/simple/marts/order_summary.sql:
--------------------------------------------------------------------------------
1 | -- @config: {output: {type: "parquet", location: "./output/order_summary.parquet"}}
2 | -- Create an order summary with aggregate metrics
3 | SELECT
4 | customer_id,
5 | COUNT(*) as order_count,
6 | SUM(amount) as total_spent,
7 | MIN(order_date) as first_order_date,
8 | MAX(order_date) as last_order_date,
9 | AVG(amount) as average_order_value
10 | FROM stg_orders
11 | GROUP BY customer_id
--------------------------------------------------------------------------------
/examples/simple/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/examples/simple/output/.gitkeep
--------------------------------------------------------------------------------
/examples/simple/staging/lineage.mmd:
--------------------------------------------------------------------------------
1 | graph TD
2 | stg_customers
3 | stg_orders
4 |
--------------------------------------------------------------------------------
/examples/simple/staging/stg_customers.sql:
--------------------------------------------------------------------------------
1 | -- Create a simple customers staging table
2 | SELECT
3 | 1 as customer_id,
4 | 'John Smith' as name,
5 | 'john@example.com' as email
6 | UNION ALL SELECT
7 | 2 as customer_id,
8 | 'Jane Doe' as name,
9 | 'jane@example.com' as email
--------------------------------------------------------------------------------
/examples/simple/staging/stg_orders.sql:
--------------------------------------------------------------------------------
1 | -- Create a simple orders staging table
2 | SELECT
3 | 101 as order_id,
4 | 1 as customer_id,
5 | '2023-01-15' as order_date,
6 | 99.99 as amount
7 | UNION ALL SELECT
8 | 102 as order_id,
9 | 1 as customer_id,
10 | '2023-03-10' as order_date,
11 | 149.99 as amount
12 | UNION ALL SELECT
13 | 103 as order_id,
14 | 2 as customer_id,
15 | '2023-02-22' as order_date,
16 | 199.99 as amount
--------------------------------------------------------------------------------
/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/definite-app/crabwalk/57acc9391fd8e5c7df9f9bd57358855a9d504d1c/output/.gitkeep
--------------------------------------------------------------------------------
/run-simple-example:
--------------------------------------------------------------------------------
1 | #\!/bin/bash
2 |
3 | # Run the simple example that comes with crabwalk
4 | cd /Users/mritchie712/blackbird/yato-main/crabwalk
5 |
6 | # Make sure the build is fresh
7 | cargo build --release
8 |
9 | # Run the simple example which is guaranteed to work
10 | cargo run
11 |
12 | # Check the results
13 | echo -e "\nExamining output files:"
14 | ls -la output/
15 |
16 | # Provide a lineage link
17 | echo -e "\nView the lineage diagram at:"
18 | cat examples/simple/lineage.mmd | grep "Mermaid Live Editor URL"
19 |
--------------------------------------------------------------------------------
/run_jaffle_shop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Run the jaffle_shop example script directly
4 | cd /Users/mritchie712/blackbird/yato-main/crabwalk
5 | echo "Running jaffle_shop example using the run-jaffle script..."
6 | ./examples/jaffle_shop/run-jaffle
7 |
--------------------------------------------------------------------------------
/src/bin/ast_test.rs:
--------------------------------------------------------------------------------
1 | use anyhow::Result;
2 | use tracing_subscriber::EnvFilter;
3 | use crabwalk::parser::sql::{parse_sql, extract_tables};
4 | use std::fs;
5 |
6 | fn main() -> Result<()> {
7 | // Initialize tracing with filter to show all debug logs
8 | tracing_subscriber::fmt()
9 | .with_env_filter(
10 | EnvFilter::new("debug,duckdb=error")
11 | )
12 | .init();
13 |
14 | // Get the SQL file from command-line arguments
15 | let args: Vec = std::env::args().collect();
16 | if args.len() < 2 {
17 | println!("Usage: {} ", args[0]);
18 | std::process::exit(1);
19 | }
20 |
21 | let sql_file = &args[1];
22 |
23 | // Run the AST test for DuckDB parser
24 | crabwalk::parser::ast_test::test_duckdb_ast(sql_file)?;
25 |
26 | // Additionally, test table extraction
27 | println!("\nTesting table extraction:");
28 | let sql_content = fs::read_to_string(sql_file)?;
29 |
30 | // Parse the SQL and extract tables
31 | let statements = parse_sql(&sql_content, "duckdb")?;
32 |
33 | // Extract tables from each statement
34 | for (i, stmt) in statements.iter().enumerate() {
35 | println!("Extracting tables from statement {}:", i + 1);
36 | let tables = extract_tables(stmt);
37 |
38 | println!("Extracted tables: {:?}", tables);
39 | if tables.is_empty() {
40 | println!("WARNING: No tables extracted!");
41 | }
42 | }
43 |
44 | Ok(())
45 | }
--------------------------------------------------------------------------------
/src/config/mod.rs:
--------------------------------------------------------------------------------
1 | mod output;
2 |
3 | pub use output::OutputConfig;
4 | pub use output::OutputType;
5 |
6 | use serde::{Deserialize, Serialize};
7 |
8 | /// Model configuration settings
9 | #[derive(Debug, Clone, Serialize, Deserialize, Default)]
10 | pub struct ModelConfig {
11 | /// Output configuration for the model
12 | #[serde(default)]
13 | pub output: Option,
14 | // Can be extended with additional configuration options
15 | }
16 |
17 | /// Command line arguments for the crabwalk CLI
18 | #[derive(Debug, Clone)]
19 | pub struct CliArgs {
20 | /// Path to the DuckDB database file
21 | pub database_path: String,
22 | /// Path to the SQL folder
23 | pub sql_folder: String,
24 | /// Schema name in the DuckDB database
25 | pub schema: String,
26 | /// Default output type
27 | pub output_type: OutputType,
28 | /// Default output location for file outputs
29 | pub output_location: Option,
30 | /// Whether to overwrite existing database during restore
31 | pub overwrite: bool,
32 | }
--------------------------------------------------------------------------------
/src/config/output.rs:
--------------------------------------------------------------------------------
1 | use serde::{Deserialize, Serialize};
2 | use std::fmt;
3 |
4 | /// Output type for the model
5 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
6 | #[serde(rename_all = "lowercase")]
7 | pub enum OutputType {
8 | /// Create a DuckDB table
9 | Table,
10 | /// Create a DuckDB view
11 | View,
12 | /// Export to Parquet file
13 | Parquet,
14 | /// Export to CSV file
15 | Csv,
16 | /// Export to JSON file
17 | Json,
18 | }
19 |
20 | impl Default for OutputType {
21 | fn default() -> Self {
22 | OutputType::Table
23 | }
24 | }
25 |
26 | impl fmt::Display for OutputType {
27 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28 | match self {
29 | OutputType::Table => write!(f, "table"),
30 | OutputType::View => write!(f, "view"),
31 | OutputType::Parquet => write!(f, "parquet"),
32 | OutputType::Csv => write!(f, "csv"),
33 | OutputType::Json => write!(f, "json"),
34 | }
35 | }
36 | }
37 |
38 | impl std::str::FromStr for OutputType {
39 | type Err = String;
40 |
41 | fn from_str(s: &str) -> Result {
42 | match s.to_lowercase().as_str() {
43 | "table" => Ok(OutputType::Table),
44 | "view" => Ok(OutputType::View),
45 | "parquet" => Ok(OutputType::Parquet),
46 | "csv" => Ok(OutputType::Csv),
47 | "json" => Ok(OutputType::Json),
48 | _ => Err(format!("Unknown output type: {}", s)),
49 | }
50 | }
51 | }
52 |
53 | /// Output configuration for a model
54 | #[derive(Debug, Clone, Serialize, Deserialize)]
55 | pub struct OutputConfig {
56 | /// Type of output (table, view, parquet, csv, json)
57 | #[serde(default)]
58 | #[serde(alias = "type")]
59 | pub output_type: OutputType,
60 | /// Location for file outputs (parquet, csv, json)
61 | pub location: Option,
62 | /// Whether to keep temporary tables for file outputs
63 | #[serde(default)]
64 | pub keep_table: bool,
65 | }
66 |
67 | impl Default for OutputConfig {
68 | fn default() -> Self {
69 | Self {
70 | output_type: OutputType::default(),
71 | location: None,
72 | keep_table: false,
73 | }
74 | }
75 | }
76 |
77 | impl OutputConfig {
78 | /// Create a new output configuration
79 | pub fn new(output_type: OutputType, location: Option, keep_table: bool) -> Self {
80 | Self {
81 | output_type,
82 | location,
83 | keep_table,
84 | }
85 | }
86 |
87 | /// Update this config from another one, only changing non-None values
88 | pub fn update_from(&mut self, other: &OutputConfig) {
89 | self.output_type = other.output_type.clone();
90 | if other.location.is_some() {
91 | self.location = other.location.clone();
92 | }
93 | self.keep_table = other.keep_table;
94 | }
95 |
96 | /// Get the location, replacing {table_name} placeholder if present
97 | pub fn get_location(&self, table_name: &str) -> Option {
98 | self.location.as_ref().map(|loc| loc.replace("{table_name}", table_name))
99 | }
100 |
101 | /// Get default location for a given output type and table name
102 | pub fn default_location(&self, table_name: &str) -> String {
103 | match self.output_type {
104 | OutputType::Parquet => format!("./output/{}.parquet", table_name),
105 | OutputType::Csv => format!("./output/{}.csv", table_name),
106 | OutputType::Json => format!("./output/{}.json", table_name),
107 | _ => String::new(),
108 | }
109 | }
110 | }
--------------------------------------------------------------------------------
/src/executor/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod output;
2 |
3 | use anyhow::{Context, Result};
4 | use duckdb::Connection;
5 | use std::path::Path;
6 |
7 | /// Connect to DuckDB database
8 | ///
9 | /// # Arguments
10 | ///
11 | /// * `database_path` - Path to the DuckDB database file
12 | ///
13 | /// # Returns
14 | ///
15 | /// * `Result` - DuckDB connection
16 | pub fn connect_to_duckdb(database_path: &str) -> Result {
17 | let path = Path::new(database_path);
18 |
19 | // Ensure parent directory exists
20 | if let Some(parent) = path.parent() {
21 | if !parent.exists() {
22 | std::fs::create_dir_all(parent)
23 | .context(format!("Failed to create directory: {}", parent.display()))?;
24 | }
25 | }
26 |
27 | // Connect to DuckDB
28 | let conn = Connection::open(path)
29 | .context(format!("Failed to connect to DuckDB database: {}", database_path))?;
30 |
31 | Ok(conn)
32 | }
33 |
34 | /// Runtime context for SQL execution
35 | pub struct RunContext {
36 | /// DuckDB connection
37 | conn: Connection,
38 | }
39 |
40 | impl RunContext {
41 | /// Create a new run context
42 | pub fn new(conn: Connection) -> Self {
43 | Self { conn }
44 | }
45 |
46 | /// Execute a SQL statement with environment variable replacement
47 | pub fn execute(&self, sql: &str) -> Result<()> {
48 | // Replace environment variables
49 | let sql_with_env = replace_env_vars(sql)?;
50 |
51 | // Execute the SQL
52 | // Note: DuckDB error codes are output to stderr and can't be easily suppressed
53 | // in a cross-platform way without external dependencies.
54 | self.conn.execute(&sql_with_env, [])
55 | .context(format!("Failed to execute SQL: {}", sql_with_env))?;
56 |
57 | Ok(())
58 | }
59 |
60 | /// Get the DuckDB connection
61 | pub fn get_connection(&self) -> &Connection {
62 | &self.conn
63 | }
64 | }
65 |
66 | /// Replace environment variables in SQL
67 | ///
68 | /// # Arguments
69 | ///
70 | /// * `sql` - SQL with potential environment variables in the format {{VAR_NAME}}
71 | ///
72 | /// # Returns
73 | ///
74 | /// * `Result` - SQL with environment variables replaced
75 | fn replace_env_vars(sql: &str) -> Result {
76 | let re = regex::Regex::new(r"\{\{\s*(\w+)\s*\}\}")
77 | .context("Failed to compile environment variable regex")?;
78 |
79 | let result = re.replace_all(sql, |caps: ®ex::Captures| {
80 | let var_name = &caps[1];
81 | match std::env::var(var_name) {
82 | Ok(value) => value,
83 | Err(_) => {
84 | tracing::warn!("Environment variable not set: {}", var_name);
85 | format!("{{{{{}}}}}", var_name) // Return original if not set
86 | }
87 | }
88 | });
89 |
90 | Ok(result.to_string())
91 | }
--------------------------------------------------------------------------------
/src/executor/output.rs:
--------------------------------------------------------------------------------
1 | use anyhow::{Context, Result};
2 | use std::fs;
3 | use std::path::Path;
4 |
5 | use crate::config::{OutputConfig, OutputType};
6 | use crate::executor::RunContext;
7 |
8 | /// Handle different output types based on configuration
9 | ///
10 | /// # Arguments
11 | ///
12 | /// * `table_name` - Name of the model
13 | /// * `sql_query` - SQL query string
14 | /// * `output_config` - Output configuration
15 | /// * `schema` - Database schema
16 | /// * `context` - RunContext for SQL execution
17 | ///
18 | /// # Returns
19 | ///
20 | /// * `Result<()>` - Success or error
21 | #[allow(unused_variables)]
22 | pub fn handle_output(
23 | table_name: &str,
24 | sql_query: &str,
25 | output_config: &OutputConfig,
26 | _schema: &str,
27 | context: &RunContext,
28 | ) -> Result<()> {
29 | tracing::info!("Handling output for {}, type: {}", table_name, output_config.output_type);
30 |
31 | match output_config.output_type {
32 | OutputType::Table => {
33 | // Default behavior - create a table
34 | let create_table_sql = format!("CREATE OR REPLACE TABLE {}.{} AS {}", _schema, table_name, sql_query);
35 | context.execute(&create_table_sql)?;
36 | }
37 | OutputType::View => {
38 | // Create a view instead of a table
39 | let create_view_sql = format!("CREATE OR REPLACE VIEW {}.{} AS {}", _schema, table_name, sql_query);
40 | context.execute(&create_view_sql)?;
41 | }
42 | OutputType::Parquet => {
43 | // Write to a Parquet file
44 | tracing::info!("Output type is Parquet for {}", table_name);
45 | handle_file_output(table_name, sql_query, output_config, _schema, context, "parquet")?;
46 | }
47 | OutputType::Csv => {
48 | // Write to a CSV file
49 | handle_file_output(table_name, sql_query, output_config, _schema, context, "csv")?;
50 | }
51 | OutputType::Json => {
52 | // Write to a JSON file
53 | handle_file_output(table_name, sql_query, output_config, _schema, context, "json")?;
54 | }
55 | }
56 |
57 | Ok(())
58 | }
59 |
60 | /// Handle file outputs (Parquet, CSV, JSON)
61 | fn handle_file_output(
62 | table_name: &str,
63 | sql_query: &str,
64 | output_config: &OutputConfig,
65 | _schema: &str,
66 | context: &RunContext,
67 | format: &str,
68 | ) -> Result<()> {
69 | // Get location, with fallback to default
70 | let location = output_config
71 | .get_location(table_name)
72 | .unwrap_or_else(|| output_config.default_location(table_name));
73 |
74 | tracing::info!("File output location: {}", location);
75 |
76 | // Ensure output directory exists
77 | if let Some(parent) = Path::new(&location).parent() {
78 | if !parent.exists() {
79 | tracing::info!("Creating directory: {}", parent.display());
80 | fs::create_dir_all(parent)
81 | .context(format!("Failed to create directory: {}", parent.display()))?;
82 | }
83 | }
84 |
85 | // First create a temporary table
86 | let temp_table = format!("temp_{}", table_name);
87 | let create_temp_table_sql = format!("CREATE OR REPLACE TABLE {} AS {}", temp_table, sql_query);
88 | tracing::info!("Creating temp table with SQL: {}", create_temp_table_sql);
89 | context.execute(&create_temp_table_sql)?;
90 |
91 | // Then export to file
92 | let format_options = match format {
93 | "csv" => "(FORMAT CSV, HEADER)",
94 | "json" => "(FORMAT JSON)",
95 | "parquet" => "(FORMAT PARQUET)",
96 | _ => "(FORMAT PARQUET)",
97 | };
98 |
99 | let export_sql = format!("COPY (SELECT * FROM {}) TO '{}' {}", temp_table, location, format_options);
100 | tracing::info!("Export SQL: {}", export_sql);
101 | let result = context.execute(&export_sql);
102 |
103 | if let Err(ref e) = result {
104 | tracing::error!("Error exporting data: {}", e);
105 | }
106 |
107 | result?;
108 |
109 | // Clean up the temporary table if not keeping it
110 | if !output_config.keep_table {
111 | let drop_sql = format!("DROP TABLE IF EXISTS {}", temp_table);
112 | context.execute(&drop_sql)?;
113 | }
114 |
115 | tracing::info!("Wrote {} file to {}", format, location);
116 |
117 | Ok(())
118 | }
--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
1 | use anyhow::Result;
2 | use tracing_subscriber::EnvFilter;
3 |
4 | /// Main entry point for the crabwalk CLI
5 | fn main() -> Result<()> {
6 | // Initialize tracing with filter to show info level logs by default
7 | // Get logging level from environment or use a less verbose default
8 | let env_filter = std::env::var("RUST_LOG")
9 | .unwrap_or_else(|_| "info,sqlparser=warn,duckdb=error".to_string());
10 |
11 | tracing_subscriber::fmt()
12 | .with_env_filter(EnvFilter::new(env_filter))
13 | .init();
14 |
15 | // Run the CLI
16 | crabwalk::cli::run()
17 | }
18 |
--------------------------------------------------------------------------------
/src/parser/ast_test.rs:
--------------------------------------------------------------------------------
1 | use anyhow::{Context, Result};
2 | use crate::parser::sql;
3 | use duckdb::Connection;
4 | use std::fs;
5 |
6 | /// Test tool for exploring DuckDB's AST output
7 | pub fn test_duckdb_ast(sql_file: &str) -> Result<()> {
8 | // Read SQL file
9 | println!("Reading SQL file: {}", sql_file);
10 | let sql_content = fs::read_to_string(sql_file)?;
11 |
12 | // Print DuckDB version information
13 | let conn = Connection::open_in_memory().context("Failed to open DuckDB connection")?;
14 |
15 | // Print DuckDB version
16 | if let Ok(mut stmt) = conn.prepare("SELECT version()") {
17 | if let Ok(mut rows) = stmt.query([]) {
18 | if let Ok(Some(row)) = rows.next() {
19 | let version: String = row.get(0)?;
20 | println!("DuckDB version: {}", version);
21 | }
22 | }
23 | }
24 |
25 | // Try to install JSON extension
26 | println!("Attempting to install JSON extension...");
27 | if let Ok(_) = conn.execute("INSTALL 'json'; LOAD 'json';", []) {
28 | println!("Successfully installed and loaded JSON extension");
29 |
30 | // Try direct test of json_serialize_sql
31 | println!("Testing json_serialize_sql with literal SQL...");
32 | if let Ok(mut stmt) = conn.prepare("SELECT json_serialize_sql('SELECT 1 AS test')") {
33 | if let Ok(mut rows) = stmt.query([]) {
34 | if let Ok(Some(row)) = rows.next() {
35 | let result: String = row.get(0)?;
36 | println!("Direct json_serialize_sql test succeeded");
37 | println!("Result: {}", result);
38 |
39 | // Save the result to a file
40 | let output_file = format!("{}_direct_test.json", sql_file);
41 | fs::write(&output_file, &result)?;
42 | println!("Saved result to: {}", output_file);
43 | } else {
44 | println!("Direct json_serialize_sql test: no results");
45 | }
46 | } else {
47 | println!("Direct json_serialize_sql test query failed");
48 | }
49 | } else {
50 | println!("Direct json_serialize_sql test prepare failed");
51 | }
52 | } else {
53 | println!("Failed to install JSON extension. This function might not be available in your DuckDB version.");
54 | }
55 |
56 | // Try to parse with sqlparser
57 | println!("\nParsing with sqlparser:");
58 | match sql::parse_sql(&sql_content, "duckdb") {
59 | Ok(statements) => {
60 | println!("Successfully parsed with sqlparser:");
61 | for (i, stmt) in statements.iter().enumerate() {
62 | println!("Statement {}: {}", i + 1, stmt);
63 | }
64 | },
65 | Err(e) => {
66 | println!("Failed with sqlparser: {}", e);
67 | return Err(e);
68 | }
69 | }
70 |
71 | println!("\nImplementing DuckDB AST parsing may require a newer version of DuckDB with the json_serialize_sql function.");
72 | println!("You should be able to see the output format in the examples you shared.");
73 |
74 | Ok(())
75 | }
--------------------------------------------------------------------------------
/src/parser/config.rs:
--------------------------------------------------------------------------------
1 | use anyhow::{Context, Result};
2 | use regex::Regex;
3 | use crate::config::ModelConfig;
4 |
5 | /// Extract model-level configuration from SQL comments with @config directive
6 | ///
7 | /// Configuration should be in YAML format:
8 | /// -- @config: {output: {type: "view"}}
9 | ///
10 | /// # Arguments
11 | ///
12 | /// * `sql` - SQL content with possible @config comments
13 | ///
14 | /// # Returns
15 | ///
16 | /// * `Result