Percival

85 | 86 | 97 | -------------------------------------------------------------------------------- /src/components/App.svelte: -------------------------------------------------------------------------------- 1 | 66 | 67 | 68 | 69 |

(sharing = "none")} 73 | /> 74 |

75 | {#if notebook === undefined} 76 |

77 |

78 |

79 |

80 |

81 |

82 |

83 | {:else} 84 | 85 | {/if} 86 |

87 | -------------------------------------------------------------------------------- /src/lib/runtime.worker.ts: -------------------------------------------------------------------------------- 1 | import Immutable from "immutable"; 2 | import { autoType, csvParse, tsvParse } from "d3-dsv"; 3 | 4 | /** Load data from an external source. */ 5 | async function load(url: string): Promise { 6 | const resp = await fetch(url); 7 | if (!resp.ok) { 8 | throw new Error(`Failed to fetch ${url}:\n${await resp.text()}`); 9 | } 10 | const contentType = resp.headers.get("Content-Type"); 11 | if (url.endsWith(".json") || contentType?.match(/application\/json/i)) { 12 | return resp.json(); 13 | } else if (url.endsWith(".csv") || contentType?.match(/text\/csv/i)) { 14 | return csvParse(await resp.text(), autoType); 15 | } else if ( 16 | url.endsWith(".tsv") || 17 | contentType?.match(/text\/tab-separated-values/i) 18 | ) { 19 | return tsvParse(await resp.text(), autoType); 20 | } else { 21 | throw new Error( 22 | `Unknown file format for ${url}. Only JSON, CSV, and TSV are supported. 23 | Try adding a file extension to the URL or providing a MIME Content-Type header.`, 24 | ); 25 | } 26 | } 27 | 28 | /** Implementations of aggregates. Keep this in sync with `codegen.rs`. */ 29 | const aggregates: Record any> = { 30 | count(results) { 31 | return results.length; 32 | }, 33 | sum(results) { 34 | return results.reduce((x, y) => x + y, 0); 35 | }, 36 | mean(results) { 37 | return results.reduce((x, y) => x + y, 0) / results.length; 38 | }, 39 | min(results) { 40 | let min = null; 41 | for (const x of results) { 42 | if (min === null || x < min) { 43 | min = x; 44 | } 45 | } 46 | return min; 47 | }, 48 | max(results) { 49 | let max = null; 50 | for (const x of results) { 51 | if (max === null || x > max) { 52 | max = x; 53 | } 54 | } 55 | return max; 56 | }, 57 | }; 58 | 59 | const AsyncFunction = Object.getPrototypeOf(async function () {}).constructor; 60 | 61 | let evaluate: 62 | | undefined 63 | | ((deps: Record) => Promise>); 64 | 65 | function initialize(js: string) { 66 | if (evaluate) { 67 | throw new Error("internal: worker was already initialized"); 68 | } 69 | const fn = new AsyncFunction("__percival_deps", "__percival", js); 70 | evaluate = (deps: Record) => 71 | fn(deps, { Immutable, load, aggregates }); 72 | } 73 | 74 | onmessage = (event) => { 75 | if (event.data.type === "source") { 76 | initialize(event.data.code); 77 | } else if (event.data.type === "eval") { 78 | if (!evaluate) { 79 | throw new Error("internal: worker was not initialized"); 80 | } 81 | evaluate(event.data.deps) 82 | .then((results) => { 83 | postMessage(results); 84 | }) 85 | .catch((error: unknown) => { 86 | // Bubble up asynchronous errors to the global worker context. 87 | setTimeout(() => { 88 | throw error; 89 | }); 90 | }); 91 | } else { 92 | throw new Error(`internal: unknown event type: ${event.data.type}`); 93 | } 94 | }; 95 | -------------------------------------------------------------------------------- /crates/percival-wasm/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Datalog compiler for Percival, shared with the client through WebAssembly. 2 | 3 | #![warn(missing_docs)] 4 | 5 | use wasm_bindgen::prelude::*; 6 | use yansi::Paint; 7 | 8 | use percival::{ast::Program, codegen, errors::format_errors, parser::Grammar}; 9 | 10 | /// Set a panic listener to display better error messages. 11 | #[wasm_bindgen(start)] 12 | pub fn start() { 13 | // When the `console_error_panic_hook` feature is enabled, we can call the 14 | // `set_panic_hook` function at least once during initialization, and then 15 | // we will get better error messages if our code ever panics. 16 | // 17 | // For more details see 18 | // https://github.com/rustwasm/console_error_panic_hook#readme 19 | #[cfg(feature = "console_error_panic_hook")] 20 | console_error_panic_hook::set_once(); 21 | } 22 | 23 | /// Compile a Percival program and return the result. 24 | #[wasm_bindgen] 25 | pub fn compile(src: &str) -> CompilerResult { 26 | thread_local! { 27 | static GRAMMAR: Grammar = Grammar::new(); 28 | } 29 | 30 | let mut src = String::from(src); 31 | if !src.ends_with('\n') { 32 | src += "\n"; 33 | } 34 | CompilerResult(GRAMMAR.with(|grammar| { 35 | grammar 36 | .parse(&src[..]) 37 | .map_err(|err| format_errors(&src[..], err)) 38 | .and_then(|prog| { 39 | let js = codegen::compile(&prog) 40 | .map_err(|err| format!("{} {}", Paint::red("Error:"), err))?; 41 | Ok((prog, js)) 42 | }) 43 | })) 44 | } 45 | 46 | /// The result of a compilation. 47 | #[wasm_bindgen] 48 | pub struct CompilerResult(Result<(Program, String), String>); 49 | 50 | #[wasm_bindgen] 51 | impl CompilerResult { 52 | /// Returns the compiled JavaScript program. 53 | pub fn js(&self) -> Option { 54 | self.0.as_ref().ok().map(|(_, js)| js.clone()) 55 | } 56 | 57 | /// Returns the names of relations that are dependencies of this program. 58 | pub fn deps(&self) -> Option> { 59 | self.0.as_ref().ok().map(|(prog, _)| { 60 | prog.deps() 61 | .into_iter() 62 | .map(|s| JsValue::from_str(&s)) 63 | .collect() 64 | }) 65 | } 66 | 67 | /// Returns the names of relations produced by this program, including imports. 68 | pub fn results(&self) -> Option> { 69 | self.0.as_ref().ok().map(|(prog, _)| { 70 | prog.results() 71 | .into_iter() 72 | .chain(prog.imports().into_iter()) 73 | .map(|s| JsValue::from_str(&s)) 74 | .collect() 75 | }) 76 | } 77 | 78 | /// Returns a string representation of any errors during compilation. 79 | pub fn err(&self) -> Option { 80 | self.0.as_ref().err().cloned() 81 | } 82 | 83 | /// Returns `true` if the result is `Ok`. 84 | pub fn is_ok(&self) -> bool { 85 | self.0.is_ok() 86 | } 87 | 88 | /// Returns `true` if the result is `Err`. 89 | pub fn is_err(&self) -> bool { 90 | self.0.is_err() 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /crates/percival/src/errors.rs: -------------------------------------------------------------------------------- 1 | //! Module for human-readable error handling with Ariadne. 2 | 3 | use ariadne::{Color, Fmt, Label, Report, ReportKind, Source}; 4 | use chumsky::prelude::*; 5 | 6 | /// Format parser errors into a human-readable message. 7 | pub fn format_errors(src: &str, errors: Vec>) -> String { 8 | let mut reports = vec![]; 9 | 10 | for e in errors { 11 | let report = Report::build(ReportKind::Error, (), e.span().start); 12 | 13 | let report = match e.reason() { 14 | chumsky::error::SimpleReason::Unclosed { span, delimiter } => report 15 | .with_message(format!( 16 | "Unclosed delimiter {}", 17 | delimiter.fg(Color::Yellow) 18 | )) 19 | .with_label( 20 | Label::new(span.clone()) 21 | .with_message(format!( 22 | "Unclosed delimiter {}", 23 | delimiter.fg(Color::Yellow) 24 | )) 25 | .with_color(Color::Yellow), 26 | ) 27 | .with_label( 28 | Label::new(e.span()) 29 | .with_message(format!( 30 | "Must be closed before this {}", 31 | e.found() 32 | .unwrap_or(&"end of file".to_string()) 33 | .fg(Color::Red) 34 | )) 35 | .with_color(Color::Red), 36 | ), 37 | chumsky::error::SimpleReason::Unexpected => report 38 | .with_message(format!( 39 | "{}, expected {}", 40 | if e.found().is_some() { 41 | "Unexpected token in input" 42 | } else { 43 | "Unexpected end of input" 44 | }, 45 | if e.expected().len() == 0 { 46 | "end of input".to_string() 47 | } else { 48 | e.expected() 49 | .map(|expected| match expected { 50 | Some(expected) => expected.to_string(), 51 | None => "end of input".to_string(), 52 | }) 53 | .collect::>() 54 | .join(", ") 55 | } 56 | )) 57 | .with_label( 58 | Label::new(e.span()) 59 | .with_message(format!( 60 | "Unexpected token {}", 61 | e.found() 62 | .unwrap_or(&"end of file".to_string()) 63 | .fg(Color::Red) 64 | )) 65 | .with_color(Color::Red), 66 | ), 67 | chumsky::error::SimpleReason::Custom(msg) => report.with_message(msg).with_label( 68 | Label::new(e.span()) 69 | .with_message(format!("{}", msg.fg(Color::Red))) 70 | .with_color(Color::Red), 71 | ), 72 | }; 73 | 74 | let mut buf = vec![]; 75 | report.finish().write(Source::from(&src), &mut buf).unwrap(); 76 | reports.push(std::str::from_utf8(&buf[..]).unwrap().to_string()); 77 | } 78 | 79 | reports.join("\n") 80 | } 81 | -------------------------------------------------------------------------------- /src/components/cell/CellOutput.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 | {#if state.type === "markdown"} 11 |

12 | {@html markdownToHtml(state.value)} 13 |

14 | {:else if state.result.ok === false} 15 | {#if state.type === "code"} 16 |

{@html ansiToHtml(state.result.errors)}

17 | {:else} 18 |

19 | Error: 20 | {state.result.error} 21 |

22 | {/if} 23 | {:else if state.graphErrors !== undefined} 24 |

25 | Graph Error: 26 | {state.graphErrors} 27 |

28 | {:else if state.runtimeErrors !== undefined} 29 |

30 | Runtime Error: 31 | {state.runtimeErrors} 32 |

33 | {:else} 34 |

39 | {#if state.output !== undefined} 40 | {#if state.type === "code"} 41 | 42 | {:else} 43 | 44 | {/if} 45 | {/if} 46 |

47 | {/if} 48 | 49 | 137 | -------------------------------------------------------------------------------- /src/components/Header.svelte: -------------------------------------------------------------------------------- 1 | 17 | 18 |

19 |

20 | 21 |

22 | 23 |

24 | 33 | 36 |

37 |

38 |

39 | 40 | {#if sharing !== "none" && sharing !== "pending"} 41 | 56 | {/if} 57 | 58 | {#if about} 59 | 95 | {/if} 96 | 97 | 117 | -------------------------------------------------------------------------------- /crates/percival/src/ast.rs: -------------------------------------------------------------------------------- 1 | //! Abstract syntax tree definitions for the Percival language. 2 | 3 | use std::collections::{BTreeMap, BTreeSet}; 4 | 5 | /// A program translation unit in the Percival language. 6 | #[derive(Clone, Debug, PartialEq, Eq)] 7 | pub struct Program { 8 | /// Rules that make up the program. 9 | pub rules: Vec, 10 | /// Imports prefixed with the `import` keyword. 11 | pub imports: Vec, 12 | } 13 | 14 | /// Represents a single Horn clause. 15 | #[derive(Clone, Debug, PartialEq, Eq)] 16 | pub struct Rule { 17 | /// Head or implicand of the Horn clause. 18 | pub goal: Fact, 19 | /// Tail or conditional assumptions of the Horn clause. 20 | pub clauses: Vec, 21 | } 22 | 23 | /// An element of the right-hand side of a rule. 24 | #[derive(Clone, Debug, PartialEq, Eq)] 25 | pub enum Clause { 26 | /// Relational assumption in the rule. 27 | Fact(Fact), 28 | /// Raw JavaScript conditional expression between backticks. 29 | Expr(String), 30 | /// Local variable binding within a rule. 31 | Binding(String, Value), 32 | } 33 | 34 | /// Literal part of a Horn clause, written in terms of relations. 35 | #[derive(Clone, Debug, PartialEq, Eq)] 36 | pub struct Fact { 37 | /// Name of the relation being referenced. 38 | pub name: String, 39 | /// Named properties of the relation. 40 | pub props: BTreeMap, 41 | } 42 | 43 | /// A bound or unbound value assigned to part of a relation. 44 | #[derive(Clone, Debug, PartialEq, Eq)] 45 | pub enum Value { 46 | /// A simple identifier, which can be either bound or unbound. 47 | Id(String), 48 | /// A literal value, translated directly to JavaScript. 49 | Literal(Literal), 50 | /// A raw JavaScript expression between backticks. 51 | Expr(String), 52 | /// A custom aggregate operation over a subquery. 53 | Aggregate(Aggregate), 54 | } 55 | 56 | /// Literal values supported by the Percival grammar. 57 | #[derive(Clone, Debug, PartialEq, Eq)] 58 | pub enum Literal { 59 | /// A standard floating-point number literal. 60 | Number(String), 61 | /// A string literal, with escape sequences unevaluated. 62 | String(String), 63 | /// A boolean literal in simplest form. 64 | Boolean(bool), 65 | } 66 | 67 | /// An aggregate operation over stratified dependency relations. 68 | #[derive(Clone, Debug, PartialEq, Eq)] 69 | pub struct Aggregate { 70 | /// Name of the aggregate operator, such as `min` or `sum`. 71 | pub operator: String, 72 | /// Value being aggregated. 73 | pub value: Box, 74 | /// List of clauses to treat as a subquery for the aggregate. 75 | pub subquery: Vec, 76 | } 77 | 78 | /// An external import from a static JSON dataset. 79 | #[derive(Clone, Debug, PartialEq, Eq)] 80 | pub struct Import { 81 | /// Name of the relation being imported. 82 | pub name: String, 83 | /// Source URI of the import. 84 | pub uri: String, 85 | } 86 | 87 | impl Value { 88 | /// Returns all relations referenced by this value. 89 | pub fn deps(&self) -> BTreeSet { 90 | match self { 91 | Value::Aggregate(aggregate) => { 92 | let mut deps: BTreeSet<_> = aggregate 93 | .subquery 94 | .iter() 95 | .flat_map(|clause| clause.deps()) 96 | .collect(); 97 | deps.extend(aggregate.value.deps()); 98 | deps 99 | } 100 | _ => BTreeSet::new(), 101 | } 102 | } 103 | } 104 | 105 | impl Clause { 106 | /// Returns all relations referenced by this clause. 107 | pub fn deps(&self) -> BTreeSet { 108 | match self { 109 | Clause::Fact(fact) => { 110 | let mut deps = BTreeSet::new(); 111 | deps.insert(fact.name.clone()); 112 | for value in fact.props.values() { 113 | deps.extend(value.deps()); 114 | } 115 | deps 116 | } 117 | Clause::Expr(_) => BTreeSet::new(), 118 | Clause::Binding(_, value) => value.deps(), 119 | } 120 | } 121 | } 122 | 123 | impl Program { 124 | /// Returns the names of all relations produced by this program. 125 | pub fn results(&self) -> BTreeSet { 126 | self.rules 127 | .iter() 128 | .map(|rule| rule.goal.name.clone()) 129 | .collect() 130 | } 131 | 132 | /// Returns the names of all external relations that this program uses. 133 | pub fn deps(&self) -> BTreeSet { 134 | let results = self.results(); 135 | let imports = self.imports(); 136 | self.rules 137 | .iter() 138 | .flat_map(|rule| { 139 | let mut deps: BTreeSet = rule 140 | .clauses 141 | .iter() 142 | .flat_map(|clause| clause.deps()) 143 | .collect(); 144 | deps.extend(rule.goal.props.values().flat_map(|value| value.deps())); 145 | deps 146 | }) 147 | .filter(|name| !results.contains(name) && !imports.contains(name)) 148 | .collect() 149 | } 150 | 151 | /// Returns the names of all external imports made by the program. 152 | pub fn imports(&self) -> BTreeSet { 153 | self.imports 154 | .iter() 155 | .map(|import| import.name.clone()) 156 | .collect() 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Percival 2 | 3 | [Percival](https://percival.ink/) is a **declarative data query and 4 | visualization language**. It provides a reactive, web-based notebook environment 5 | for exploring complex datasets, producing interactive graphics, and sharing 6 | results. 7 | 8 |

9 | 10 |
11 | percival.ink 12 | 13 |

14 | 15 | Percival combines the flexibility of 16 | [_Datalog_](https://en.wikipedia.org/wiki/Datalog) as a query language for 17 | relational data with the beauty of 18 | [_exploratory visualization grammars_](https://observablehq.com/@observablehq/plot). 19 | These declarative components interact through a reactive dataflow system. 20 | Because Percival uses web technologies (including Web Workers for multithreaded, 21 | sandboxed execution), fully-interactive notebooks can be shared with anyone on 22 | the Internet, making data analyses more tangible to others. 23 | 24 | At the core of Percival is a custom Datalog compiler, built with Rust and 25 | WebAssembly, which integrates with its notebook runtime. This compiles the query 26 | language to JavaScript through a staged evaluation process that also allows 27 | users to embed their own JavaScript code. The interface aims to be lightweight, 28 | friendly, and accessible, and there is no hidden workspace state. 29 | 30 | This is an early-stage research project, and we welcome your feedback, so please 31 | feel free to say hello at our 32 | [discussions page](https://github.com/ekzhang/percival/discussions)! 33 | 34 | ## Getting Started 35 | 36 | If you've gotten to this point in the README, please first try out the web 37 | application and demo notebook at [percival.ink](https://percival.ink/)! The 38 | information below is technical documentation intended for contributors. 39 | 40 | Building Percival from scratch requires [Node v16+](https://nodejs.org/en/), 41 | [NPM v8+](https://www.npmjs.com/), [Rust 1.56+](https://www.rust-lang.org/), 42 | [Cargo](https://crates.io/), and 43 | [Wasm-Pack](https://rustwasm.github.io/wasm-pack/) installed on your machine. To 44 | build the Rust/WebAssembly portion of the project, use the command: 45 | 46 | ```shell 47 | wasm-pack build --target web crates/percival-wasm 48 | ``` 49 | 50 | Next, run `npm install` to install JavaScript dependencies, then run the 51 | following command to start the development server: 52 | 53 | ```shell 54 | npm run dev 55 | ``` 56 | 57 | This should open a Percival notebook in your browser, with live reloading. 58 | 59 | ## Architecture 60 | 61 | This section outlines the high-level technical design of Percival. 62 | 63 | ### User Interface 64 | 65 | Percival is a client-side web application running fully in the user's browser. 66 | The notebook interface is built with [Svelte](https://svelte.dev/) and styled 67 | with [Tailwind CSS](https://tailwindcss.com/). It relies on numerous other open 68 | source libraries, including [CodeMirror 6](https://codemirror.net/6/) for live 69 | code editing and syntax highlighting, 70 | [Remark](https://github.com/remarkjs/remark) and [KaTeX](https://katex.org/) for 71 | Markdown rendering, and [Vite](https://vitejs.dev/) for frontend bundling. 72 | 73 | The code for the web frontend is located in `src/`, which contains a mix of 74 | Svelte (in `src/components/`) and TypeScript (in `src/lib/`). These modules are 75 | bundled into a static website at build time, and there is no dynamic server-side 76 | rendering. 77 | 78 | ### JIT Compiler 79 | 80 | Users write code cells in a custom dialect of Datalog, and they are translated 81 | to JavaScript by a Rust compiler, which itself is compiled to WebAssembly using 82 | [wasm-bindgen](https://github.com/rustwasm/wasm-bindgen). The Percival 83 | compiler's code is located in the `crates/` folder. For ergonomic parsing with 84 | human-readable error messages, the compiler relies on 85 | [chumsky](https://github.com/zesterer/chumsky), a parser combinator library. 86 | 87 | After the `percival-wasm` crate is compiled to WebAssembly, it can be used by 88 | client-side code. The compiler processes code cells, then sends the resulting 89 | JavaScript to separate 90 | [web workers](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API) 91 | that sandbox the code and execute it just-in-time. As the user writes queries, 92 | their notebook automatically tracks inter-cell dependencies and evaluates cells 93 | in topological order, spawning / terminating worker threads on demand. 94 | 95 | ### Data Visualization 96 | 97 | Plotting is done using a specialized web worker that runs JavaScript code with 98 | access to the [Observable Plot](https://observablehq.com/@observablehq/plot) 99 | library. In order for this library (and D3) to run in a worker context, we patch 100 | the global document with a lightweight virtual DOM implementation ported from 101 | [Domino](https://github.com/fgnass/domino). 102 | 103 | ### Deployment 104 | 105 | In production, the `main` branch of this repository is continuously deployed to 106 | [percival.ink](https://percival.ink/) via [Vercel](https://vercel.com/), which 107 | hosts the static website. It also runs a serverless function (see 108 | `api/index.go`) that allows users to share notebooks through the GitHub Gist 109 | API. 110 | 111 | ## Development 112 | 113 | To build, lint, and format the Svelte project, use the corresponding scripts: 114 | 115 | ```shell 116 | npm run build 117 | npm run check 118 | npm run format 119 | ``` 120 | 121 | For the Rust crates, you can run unit tests for the core functionality with: 122 | 123 | ```shell 124 | cargo test 125 | ``` 126 | 127 | You can also run tests for the WebAssembly component using a headless Chrome or 128 | Firefox browser: 129 | 130 | ```shell 131 | wasm-pack test --chrome --headless crates/percival-wasm 132 | ``` 133 | 134 | Since Percival uses a Rust-based compiler but outputs JavaScript, the easiest 135 | way to test code generation functionality is within the browser. We use Mocha 136 | and Puppeteer for this, and tests can be run with: 137 | 138 | ```shell 139 | npm test 140 | ``` 141 | 142 | ## Acknowledgements 143 | 144 | Created by Eric Zhang ([@ekzhang1](https://twitter.com/ekzhang1)). Licensed 145 | under the [MIT license](LICENSE). 146 | -------------------------------------------------------------------------------- /patches/domino+2.1.6.patch: -------------------------------------------------------------------------------- 1 | diff --git a/node_modules/domino/lib/DOMImplementation.js b/node_modules/domino/lib/DOMImplementation.js 2 | index 675b687..8b64565 100644 3 | --- a/node_modules/domino/lib/DOMImplementation.js 4 | +++ b/node_modules/domino/lib/DOMImplementation.js 5 | @@ -41,7 +41,7 @@ DOMImplementation.prototype = { 6 | // namespace and doctype are propertly set. See this thread: 7 | // http://lists.w3.org/Archives/Public/www-dom/2011AprJun/0132.html 8 | // 9 | - var d = new Document(false, null); 10 | + var d = new Document(false, null, DOMImplementation); 11 | var e; 12 | 13 | if (qualifiedName) 14 | @@ -66,7 +66,7 @@ DOMImplementation.prototype = { 15 | }, 16 | 17 | createHTMLDocument: function createHTMLDocument(titleText) { 18 | - var d = new Document(true, null); 19 | + var d = new Document(true, null, DOMImplementation); 20 | d.appendChild(new DocumentType(d, 'html')); 21 | var html = d.createElement('html'); 22 | d.appendChild(html); 23 | diff --git a/node_modules/domino/lib/Document.js b/node_modules/domino/lib/Document.js 24 | index 8f9cbe5..cb725a9 100644 25 | --- a/node_modules/domino/lib/Document.js 26 | +++ b/node_modules/domino/lib/Document.js 27 | @@ -10,7 +10,6 @@ var Comment = require('./Comment'); 28 | var Event = require('./Event'); 29 | var DocumentFragment = require('./DocumentFragment'); 30 | var ProcessingInstruction = require('./ProcessingInstruction'); 31 | -var DOMImplementation = require('./DOMImplementation'); 32 | var TreeWalker = require('./TreeWalker'); 33 | var NodeIterator = require('./NodeIterator'); 34 | var NodeFilter = require('./NodeFilter'); 35 | @@ -25,13 +24,14 @@ var MUTATE = require('./MutationConstants'); 36 | var NAMESPACE = utils.NAMESPACE; 37 | var isApiWritable = require("./config").isApiWritable; 38 | 39 | -function Document(isHTML, address) { 40 | +function Document(isHTML, address, DomImpl) { 41 | ContainerNode.call(this); 42 | + this._DomImpl = DomImpl; 43 | this.nodeType = Node.DOCUMENT_NODE; 44 | this.isHTML = isHTML; 45 | this._address = address || 'about:blank'; 46 | this.readyState = 'loading'; 47 | - this.implementation = new DOMImplementation(this); 48 | + this.implementation = new DomImpl(this); 49 | 50 | // DOMCore says that documents are always associated with themselves 51 | this.ownerDocument = null; // ... but W3C tests expect null 52 | @@ -507,7 +507,7 @@ Document.prototype = Object.create(ContainerNode.prototype, { 53 | 54 | // Utility methods 55 | clone: { value: function clone() { 56 | - var d = new Document(this.isHTML, this._address); 57 | + var d = new Document(this.isHTML, this._address, this._DomImpl); 58 | d._quirks = this._quirks; 59 | d._contentType = this._contentType; 60 | return d; 61 | @@ -711,7 +711,7 @@ Document.prototype = Object.create(ContainerNode.prototype, { 62 | _templateDoc: { get: function() { 63 | if (!this._templateDocCache) { 64 | // "associated inert template document" 65 | - var newDoc = new Document(this.isHTML, this._address); 66 | + var newDoc = new Document(this.isHTML, this._address, this._DomImpl); 67 | this._templateDocCache = newDoc._templateDocCache = newDoc; 68 | } 69 | return this._templateDocCache; 70 | diff --git a/node_modules/domino/lib/Element.js b/node_modules/domino/lib/Element.js 71 | index ecc90a8..9fc2666 100644 72 | --- a/node_modules/domino/lib/Element.js 73 | +++ b/node_modules/domino/lib/Element.js 74 | @@ -1075,8 +1075,8 @@ AttributesArray.prototype = Object.create(NamedNodeMap.prototype, { 75 | 76 | // We can't make direct array access work (without Proxies, node >=6) 77 | // but we can make `Array.from(node.attributes)` and for-of loops work. 78 | -if (global.Symbol && global.Symbol.iterator) { 79 | - AttributesArray.prototype[global.Symbol.iterator] = function() { 80 | +if (globalThis.Symbol && globalThis.Symbol.iterator) { 81 | + AttributesArray.prototype[globalThis.Symbol.iterator] = function() { 82 | var i=0, n=this.length, self=this; 83 | return { 84 | next: function() { 85 | diff --git a/node_modules/domino/lib/HTMLParser.js b/node_modules/domino/lib/HTMLParser.js 86 | index 2d80093..f448b01 100644 87 | --- a/node_modules/domino/lib/HTMLParser.js 88 | +++ b/node_modules/domino/lib/HTMLParser.js 89 | @@ -1,6 +1,7 @@ 90 | "use strict"; 91 | module.exports = HTMLParser; 92 | 93 | +var DOMImplementation = require('./DOMImplementation'); 94 | var Document = require('./Document'); 95 | var DocumentType = require('./DocumentType'); 96 | var Node = require('./Node'); 97 | @@ -2126,7 +2127,7 @@ function HTMLParser(address, fragmentContext, options) { 98 | 99 | 100 | // This is the document we'll be building up 101 | - var doc = new Document(true, address); 102 | + var doc = new Document(true, address, DOMImplementation); 103 | 104 | // The document needs to know about the parser, for document.write(). 105 | // This _parser property will be deleted when we're done parsing. 106 | diff --git a/node_modules/domino/lib/config.js b/node_modules/domino/lib/config.js 107 | index abd3475..559a847 100644 108 | --- a/node_modules/domino/lib/config.js 109 | +++ b/node_modules/domino/lib/config.js 110 | @@ -4,4 +4,4 @@ 111 | * you call `require("domino")`. 112 | */ 113 | 114 | -exports.isApiWritable = !global.__domino_frozen__; 115 | +exports.isApiWritable = !globalThis.__domino_frozen__; 116 | diff --git a/node_modules/domino/lib/sloppy.js b/node_modules/domino/lib/sloppy.js 117 | index b5d8950..e920db1 100644 118 | --- a/node_modules/domino/lib/sloppy.js 119 | +++ b/node_modules/domino/lib/sloppy.js 120 | @@ -6,19 +6,9 @@ 121 | /* jshint -W085 */ 122 | module.exports = { 123 | Window_run: function _run(code, file) { 124 | - if (file) code += '\n//@ sourceURL=' + file; 125 | - with(this) eval(code); 126 | + console.log("Window_run removed") 127 | }, 128 | EventHandlerBuilder_build: function build() { 129 | - try { 130 | - with(this.document.defaultView || Object.create(null)) 131 | - with(this.document) 132 | - with(this.form) 133 | - with(this.element) 134 | - return eval("(function(event){" + this.body + "})"); 135 | - } 136 | - catch (err) { 137 | - return function() { throw err; }; 138 | - } 139 | + console.log("EventHandlerBuilder_build removed") 140 | } 141 | }; 142 | -------------------------------------------------------------------------------- /src/assets/logo.svg: -------------------------------------------------------------------------------- 1 | 8 | -------------------------------------------------------------------------------- /src/lib/notebook.ts: -------------------------------------------------------------------------------- 1 | import { nanoid } from "nanoid"; 2 | import { build } from "./runtime"; 3 | import type { CompilerResult } from "./runtime"; 4 | import { buildPlot } from "./plot"; 5 | import type { PlotResult } from "./plot"; 6 | 7 | export type MarkdownCell = { 8 | type: "markdown"; 9 | hidden: boolean; 10 | value: string; 11 | }; 12 | 13 | export type CodeCellData = { 14 | type: "code"; 15 | hidden: boolean; 16 | value: string; 17 | }; 18 | 19 | export type PlotCellData = { 20 | type: "plot"; 21 | hidden: boolean; 22 | value: string; 23 | }; 24 | 25 | export type CellData = MarkdownCell | CodeCellData | PlotCellData; 26 | 27 | export type CodeCellState = CodeCellData & { 28 | result: CompilerResult; 29 | status: "stale" | "pending" | "done"; 30 | output?: Record; 31 | graphErrors?: string; 32 | runtimeErrors?: string; 33 | evaluateHandle?: () => void; 34 | }; 35 | 36 | export type PlotCellState = PlotCellData & { 37 | result: PlotResult; 38 | status: "stale" | "pending" | "done"; 39 | output?: string; 40 | graphErrors?: string; 41 | runtimeErrors?: string; 42 | evaluateHandle?: () => void; 43 | }; 44 | 45 | export type CellState = MarkdownCell | CodeCellState | PlotCellState; 46 | 47 | function clear( 48 | cell: CodeCellState | PlotCellState, 49 | status: CodeCellState["status"], 50 | ) { 51 | cell.evaluateHandle?.(); // cancel evaluation 52 | cell.graphErrors = cell.runtimeErrors = cell.evaluateHandle = undefined; 53 | cell.status = status; 54 | } 55 | 56 | export class NotebookState { 57 | /** Order of cells by ID. */ 58 | private order: string[]; 59 | 60 | /** Current state of each cell. */ 61 | private cells: Map; 62 | 63 | /** Callbacks on notebook state change. */ 64 | private callbacks: Map void>; 65 | 66 | constructor() { 67 | this.order = []; 68 | this.cells = new Map(); 69 | this.callbacks = new Map(); 70 | } 71 | 72 | get length() { 73 | return this.order.length; 74 | } 75 | 76 | addCell(cell: CellData) { 77 | this.insertCell(this.order.length, cell); 78 | this.rebuildGraph(); 79 | } 80 | 81 | addCellBefore(id: string, cell: CellData) { 82 | const index = this.order.findIndex((v) => v === id); 83 | this.insertCell(index, cell); 84 | this.rebuildGraph(); 85 | } 86 | 87 | private insertCell(index: number, cell: CellData) { 88 | if (index < 0 || index > this.order.length) { 89 | throw new Error(`Invalid cell index: ${index}`); 90 | } 91 | const id = nanoid(); 92 | this.order.splice(index, 0, id); 93 | if (cell.type === "markdown") { 94 | this.cells.set(id, cell); 95 | } else if (cell.type === "code") { 96 | this.cells.set(id, { 97 | ...cell, 98 | result: build(cell.value), 99 | status: "stale", 100 | }); 101 | } else { 102 | this.cells.set(id, { 103 | ...cell, 104 | result: buildPlot(cell.value), 105 | status: "stale", 106 | }); 107 | } 108 | } 109 | 110 | deleteCell(id: string) { 111 | const index = this.order.findIndex((v) => v === id); 112 | if (index === -1) { 113 | throw new Error(`Invalid cell ID: ${id}`); 114 | } 115 | this.order.splice(index, 1); 116 | this.cells.delete(id); 117 | this.rebuildGraph(); 118 | } 119 | 120 | editCell(id: string, value: string) { 121 | const cell = this.getCell(id); 122 | cell.value = value; 123 | if (cell.type === "code") { 124 | clear(cell, "stale"); 125 | cell.result = build(value); 126 | this.rebuildGraph(); 127 | } else if (cell.type === "plot") { 128 | clear(cell, "stale"); 129 | cell.result = buildPlot(value); 130 | this.rebuildGraph(); 131 | } else { 132 | this.revalidate(); 133 | } 134 | } 135 | 136 | toggleHidden(id: string) { 137 | const cell = this.getCell(id); 138 | cell.hidden = !cell.hidden; 139 | this.revalidate(); 140 | } 141 | 142 | private getCell(id: string): CellState { 143 | const cell = this.cells.get(id); 144 | if (!cell) { 145 | throw new Error(`Invalid cell ID: ${id}`); 146 | } 147 | return cell; 148 | } 149 | 150 | /** 151 | * Update graph dependencies and evaluate pending/running cells. 152 | * 153 | * This is a fairly complex function. Roughly speaking, it is responsible for 154 | * the following execution strategy: 155 | * 156 | * 1. Find orphaned cells and duplicate outputs, set error messages. 157 | * 2. Set to "pending" - all stale cells that need to be re-evaluated. Cancel 158 | * execution of all previously running cells. 159 | * 3. Revalidate to track changes. 160 | * 4. Start evaluating those stale cells asynchronously in separate worker 161 | * processes. On error, set the "runtimeErrors" property, and otherwise set 162 | * the output on success while marking dependents as stale. 163 | */ 164 | private rebuildGraph() { 165 | // For each relation, a list of all cells that create that relation. 166 | const creators = new Map(); 167 | 168 | for (const [id, cell] of this.executableCells()) { 169 | if (cell.graphErrors !== undefined) { 170 | delete cell.graphErrors; 171 | } 172 | if (cell.result.ok && cell.type === "code") { 173 | for (const relation of cell.result.results) { 174 | const array = creators.get(relation) ?? []; 175 | array.push(id); 176 | creators.set(relation, array); 177 | } 178 | } 179 | } 180 | 181 | // Check for duplicate outputs. 182 | for (const [relation, cellIds] of creators) { 183 | if (cellIds.length > 1) { 184 | for (const id of cellIds) { 185 | const cell = this.getCell(id); 186 | if (cell.type !== "code") throw new Error("unreachable"); 187 | clear(cell, "stale"); 188 | cell.graphErrors = `Relation "${relation}" is defined in multiple cells.`; 189 | } 190 | } 191 | } 192 | 193 | // Check for orphaned cells. 194 | for (const [, cell] of this.executableCells()) { 195 | if (cell.result.ok) { 196 | for (const relation of cell.result.deps) { 197 | if (!creators.has(relation)) { 198 | clear(cell, "stale"); 199 | cell.graphErrors = `Dependency "${relation}" was not found in any cell.`; 200 | break; 201 | } 202 | } 203 | } 204 | } 205 | 206 | // Asynchronously evaluate all stale cells that have dependencies met. 207 | for (const [, cell] of this.executableCells()) { 208 | if ( 209 | cell.result.ok && 210 | cell.graphErrors === undefined && 211 | cell.status === "stale" 212 | ) { 213 | let depsOk = true; 214 | const deps: Record = {}; 215 | for (const relation of cell.result.deps) { 216 | const cellIds = creators.get(relation); 217 | if (!cellIds || cellIds.length != 1) { 218 | depsOk = false; 219 | break; 220 | } 221 | const prev = this.getCell(cellIds[0]); 222 | if (prev.type !== "code") throw new Error("unreachable"); 223 | if ( 224 | prev.status === "done" && 225 | prev.result.ok && 226 | prev.graphErrors === undefined && 227 | prev.runtimeErrors === undefined && 228 | prev.output?.[relation] 229 | ) { 230 | deps[relation] = prev.output[relation]; 231 | } else { 232 | depsOk = false; 233 | break; 234 | } 235 | } 236 | 237 | if (depsOk) { 238 | clear(cell, "pending"); 239 | if (cell.type === "code") { 240 | const promise = cell.result.evaluate(deps); 241 | cell.evaluateHandle = () => promise.cancel(); 242 | const results = cell.result.results; // storing for async callback 243 | promise 244 | .then((data) => { 245 | cell.output = data; 246 | cell.status = "done"; 247 | this.markUpdate(results); 248 | }) 249 | .catch((err: Error) => { 250 | if (err.message !== "Promise was cancelled by user") { 251 | cell.status = "done"; 252 | cell.runtimeErrors = err.message; 253 | this.revalidate(); 254 | } 255 | }); 256 | } else { 257 | const promise = cell.result.evaluate(deps[cell.result.deps[0]]); 258 | cell.evaluateHandle = () => promise.cancel(); 259 | promise 260 | .then((figure) => { 261 | cell.output = figure; 262 | cell.status = "done"; 263 | this.revalidate(); 264 | }) 265 | .catch((err: Error) => { 266 | if (err.message !== "Promise was cancelled by user") { 267 | cell.status = "done"; 268 | cell.runtimeErrors = err.message; 269 | this.revalidate(); 270 | } 271 | }); 272 | } 273 | } 274 | } 275 | } 276 | 277 | this.revalidate(); 278 | } 279 | 280 | private markUpdate(relations: string[]) { 281 | const changed = new Set(relations); 282 | for (const [, cell] of this.executableCells()) { 283 | if ( 284 | cell.result.ok && 285 | cell.result.deps.filter((relation) => changed.has(relation)).length > 0 286 | ) { 287 | clear(cell, "stale"); 288 | } 289 | } 290 | this.rebuildGraph(); 291 | } 292 | 293 | [Symbol.iterator](): IterableIterator<[string, Readonly]> { 294 | return this.iter(); 295 | } 296 | 297 | private *iter(): IterableIterator<[string, CellState]> { 298 | for (const id of this.order) { 299 | yield [id, this.getCell(id)]; 300 | } 301 | } 302 | 303 | private *executableCells(): IterableIterator< 304 | [string, CodeCellState | PlotCellState] 305 | > { 306 | for (const [id, cell] of this.iter()) { 307 | if (cell.type === "code" || cell.type === "plot") { 308 | yield [id, cell]; 309 | } 310 | } 311 | } 312 | 313 | /** 314 | * Listen to changes in the notebook, returning a function that can be used to 315 | * dispose of the listener when completed. 316 | */ 317 | listen(callback: () => void): () => void { 318 | const callbackId = nanoid(); 319 | this.callbacks.set(callbackId, callback); 320 | return () => { 321 | this.callbacks.delete(callbackId); 322 | }; 323 | } 324 | 325 | /** Send a message to all listeners that the state was changed. */ 326 | private revalidate() { 327 | this.callbacks.forEach((callback) => { 328 | callback(); 329 | }); 330 | } 331 | 332 | /** Save the notebook data in a reproducible format for storage. */ 333 | save(): Readonly[] { 334 | const data = []; 335 | for (const [, cell] of this) { 336 | data.push({ 337 | type: cell.type, 338 | hidden: cell.hidden, 339 | value: cell.value, 340 | }); 341 | } 342 | return data; 343 | } 344 | 345 | /** Load a notebook from cell data. */ 346 | static load(data: Readonly[]): NotebookState { 347 | const notebook = new NotebookState(); 348 | for (let i = 0; i < data.length; i++) { 349 | notebook.insertCell(i, data[i]); 350 | } 351 | notebook.rebuildGraph(); 352 | return notebook; 353 | } 354 | } 355 | -------------------------------------------------------------------------------- /crates/percival/tests/parse.rs: -------------------------------------------------------------------------------- 1 | use maplit::btreemap; 2 | 3 | use percival::{ 4 | ast::{Aggregate, Clause, Fact, Import, Literal, Program, Rule, Value}, 5 | errors::format_errors, 6 | parser::Grammar, 7 | }; 8 | 9 | #[test] 10 | fn parse_single_rule() { 11 | let grammar = Grammar::new(); 12 | let result = grammar.parse("tc(x, y) :- tc(x, y: z), edge(x: z, y)."); 13 | assert!(result.is_ok()); 14 | assert_eq!( 15 | result.unwrap(), 16 | Program { 17 | rules: vec![Rule { 18 | goal: Fact { 19 | name: "tc".into(), 20 | props: btreemap! { 21 | "x".into() => Value::Id("x".into()), 22 | "y".into() => Value::Id("y".into()), 23 | }, 24 | }, 25 | clauses: vec![ 26 | Clause::Fact(Fact { 27 | name: "tc".into(), 28 | props: btreemap! { 29 | "x".into() => Value::Id("x".into()), 30 | "y".into() => Value::Id("z".into()), 31 | }, 32 | }), 33 | Clause::Fact(Fact { 34 | name: "edge".into(), 35 | props: btreemap! { 36 | "x".into() => Value::Id("z".into()), 37 | "y".into() => Value::Id("y".into()), 38 | }, 39 | }), 40 | ], 41 | }], 42 | imports: vec![], 43 | }, 44 | ); 45 | } 46 | 47 | #[test] 48 | fn parse_no_clauses() { 49 | let grammar = Grammar::new(); 50 | let result = grammar.parse("person(name, age)."); 51 | assert!(result.is_ok()); 52 | let result = grammar.parse("person(name, age) :-."); 53 | assert!(result.is_err()); 54 | } 55 | 56 | #[test] 57 | fn parse_literal() { 58 | let grammar = Grammar::new(); 59 | let result = grammar.parse("person(name: \"eric\\t\", age: 20, weight: 1.234e+2)."); 60 | assert!(result.is_ok()); 61 | assert_eq!( 62 | result.unwrap(), 63 | Program { 64 | rules: vec![Rule { 65 | goal: Fact { 66 | name: "person".into(), 67 | props: btreemap! { 68 | "name".into() => Value::Literal(Literal::String("eric\\t".into())), 69 | "age".into() => Value::Literal(Literal::Number("20".into())), 70 | "weight".into() => Value::Literal(Literal::Number("1.234e+2".into())), 71 | }, 72 | }, 73 | clauses: vec![], 74 | }], 75 | imports: vec![], 76 | }, 77 | ); 78 | } 79 | 80 | #[test] 81 | fn parse_err() { 82 | let grammar = Grammar::new(); 83 | let text = "tc(x, y) :- f(. 84 | tc(z) :- tc(z, &)."; 85 | let errors = grammar.parse(text).unwrap_err(); 86 | assert!(errors.len() == 2); 87 | let message = format_errors(text, errors); 88 | assert!(message.contains("Unexpected token in input, expected ")); 89 | } 90 | 91 | #[test] 92 | fn parse_reserved_word() { 93 | let grammar = Grammar::new(); 94 | let text = "bad(x: continue)."; 95 | let errors = grammar.parse(text).unwrap_err(); 96 | assert!(errors.len() == 1); 97 | let message = format_errors(text, errors); 98 | assert!(message.contains("Cannot use reserved word as a variable binding")); 99 | 100 | let text = "bad(x: __percival_first_iteration)."; 101 | let errors = grammar.parse(text).unwrap_err(); 102 | assert!(errors.len() == 1); 103 | 104 | // It is okay to use a reserved word as a field name, just not a variable. 105 | let text = "ok(continue: x)."; 106 | let result = grammar.parse(text); 107 | assert!(result.is_ok()); 108 | } 109 | 110 | #[test] 111 | fn parse_js_expr() { 112 | let grammar = Grammar::new(); 113 | let result = grammar.parse("ok(x: `2 * num`) :- input(x: num), `num < 10`."); 114 | assert!(result.is_ok()); 115 | assert_eq!( 116 | result.unwrap(), 117 | Program { 118 | rules: vec![Rule { 119 | goal: Fact { 120 | name: "ok".into(), 121 | props: btreemap! { 122 | "x".into() => Value::Expr("2 * num".into()), 123 | }, 124 | }, 125 | clauses: vec![ 126 | Clause::Fact(Fact { 127 | name: "input".into(), 128 | props: btreemap! { 129 | "x".into() => Value::Id("num".into()), 130 | }, 131 | }), 132 | Clause::Expr("num < 10".into()), 133 | ], 134 | }], 135 | imports: vec![], 136 | }, 137 | ); 138 | } 139 | 140 | #[test] 141 | fn parse_comments() { 142 | let grammar = Grammar::new(); 143 | let result = grammar.parse( 144 | " 145 | hello(x: /* asdf */ 3) :- 146 | // a comment! 147 | world(k) /* another comment */, 148 | `k < 10`. 149 | " 150 | .trim(), 151 | ); 152 | assert!(result.is_ok()); 153 | } 154 | 155 | #[test] 156 | fn parse_whitespace() { 157 | let grammar = Grammar::new(); 158 | let result = grammar.parse("\n\n\n"); 159 | assert!(result.is_ok()); 160 | } 161 | 162 | #[test] 163 | fn parse_trailing_eof_comment() { 164 | // This example technically invalid under our grammar; however, most 165 | // users would usually want to allow for comments at the end of a cell. 166 | // To fix this, Percival programs should be terminated by newlines. 167 | let grammar = Grammar::new(); 168 | let result = grammar.parse("// this comment has no trailing newline"); 169 | assert!(result.is_err()); 170 | 171 | let result = grammar.parse("// this comment has a trailing newline\n"); 172 | assert!(result.is_ok()); 173 | } 174 | 175 | #[test] 176 | fn parse_empty() { 177 | let grammar = Grammar::new(); 178 | let result = grammar.parse("any() :- ok()."); 179 | assert!(result.is_ok()); 180 | assert_eq!( 181 | result.unwrap(), 182 | Program { 183 | rules: vec![Rule { 184 | goal: Fact { 185 | name: "any".into(), 186 | props: btreemap! {}, 187 | }, 188 | clauses: vec![Clause::Fact(Fact { 189 | name: "ok".into(), 190 | props: btreemap! {}, 191 | })], 192 | }], 193 | imports: vec![], 194 | }, 195 | ); 196 | } 197 | 198 | #[test] 199 | fn parse_imports() { 200 | let grammar = Grammar::new(); 201 | let result = grammar.parse( 202 | r#" 203 | import hello from "https://example.com/hello.json" 204 | import barley from "npm://vega-datasets/data/barley.json" 205 | import football from "gh://vega/vega-datasets@next/data/football.json" 206 | "# 207 | .trim(), 208 | ); 209 | assert!(result.is_ok()); 210 | assert_eq!( 211 | result.unwrap(), 212 | Program { 213 | rules: vec![], 214 | imports: vec![ 215 | Import { 216 | name: "hello".into(), 217 | uri: "https://example.com/hello.json".into() 218 | }, 219 | Import { 220 | name: "barley".into(), 221 | uri: "npm://vega-datasets/data/barley.json".into() 222 | }, 223 | Import { 224 | name: "football".into(), 225 | uri: "gh://vega/vega-datasets@next/data/football.json".into() 226 | }, 227 | ], 228 | }, 229 | ); 230 | } 231 | 232 | #[test] 233 | fn parse_boolean() { 234 | let grammar = Grammar::new(); 235 | let result = grammar.parse("hello(x: true, y: false)."); 236 | assert!(result.is_ok()); 237 | assert_eq!( 238 | result.unwrap(), 239 | Program { 240 | rules: vec![Rule { 241 | goal: Fact { 242 | name: "hello".into(), 243 | props: btreemap! { 244 | "x".into() => Value::Literal(Literal::Boolean(true)), 245 | "y".into() => Value::Literal(Literal::Boolean(false)), 246 | }, 247 | }, 248 | clauses: vec![], 249 | }], 250 | imports: vec![], 251 | }, 252 | ); 253 | } 254 | 255 | #[test] 256 | fn parse_import_edge_cases() { 257 | let grammar = Grammar::new(); 258 | let result = grammar.parse("importhello from \"gh://hello\""); 259 | assert!(result.is_err()); 260 | 261 | let result = grammar.parse("importa(value: 3)."); 262 | assert!(result.is_ok()); 263 | } 264 | 265 | #[test] 266 | fn parse_binding() { 267 | let grammar = Grammar::new(); 268 | let result = grammar.parse( 269 | r#" 270 | ok(val) :- 271 | attempt(x), 272 | val = `3 * x`. 273 | "#, 274 | ); 275 | assert!(result.is_ok()); 276 | assert_eq!( 277 | result.unwrap(), 278 | Program { 279 | rules: vec![Rule { 280 | goal: Fact { 281 | name: "ok".into(), 282 | props: btreemap! { 283 | "val".into() => Value::Id("val".into()), 284 | }, 285 | }, 286 | clauses: vec![ 287 | Clause::Fact(Fact { 288 | name: "attempt".into(), 289 | props: btreemap! { 290 | "x".into() => Value::Id("x".into()), 291 | }, 292 | }), 293 | Clause::Binding("val".into(), Value::Expr("3 * x".into())), 294 | ], 295 | }], 296 | imports: vec![], 297 | }, 298 | ); 299 | } 300 | 301 | #[test] 302 | fn parse_aggregate() { 303 | let grammar = Grammar::new(); 304 | let result = grammar.parse( 305 | r#" 306 | ok(value) :- 307 | year(year), 308 | value = mean[mpg] { 309 | cars(Year: year, mpg) 310 | }. 311 | "#, 312 | ); 313 | assert!(result.is_ok()); 314 | assert_eq!( 315 | result.unwrap(), 316 | Program { 317 | rules: vec![Rule { 318 | goal: Fact { 319 | name: "ok".into(), 320 | props: btreemap! { 321 | "value".into() => Value::Id("value".into()), 322 | }, 323 | }, 324 | clauses: vec![ 325 | Clause::Fact(Fact { 326 | name: "year".into(), 327 | props: btreemap! { 328 | "year".into() => Value::Id("year".into()), 329 | }, 330 | }), 331 | Clause::Binding( 332 | "value".into(), 333 | Value::Aggregate(Aggregate { 334 | operator: "mean".into(), 335 | value: Box::new(Value::Id("mpg".into())), 336 | subquery: vec![Clause::Fact(Fact { 337 | name: "cars".into(), 338 | props: btreemap! { 339 | "Year".into() => Value::Id("year".into()), 340 | "mpg".into() => Value::Id("mpg".into()), 341 | }, 342 | }),], 343 | }), 344 | ), 345 | ], 346 | }], 347 | imports: vec![], 348 | }, 349 | ); 350 | } 351 | -------------------------------------------------------------------------------- /src/samples/starter.percival: -------------------------------------------------------------------------------- 1 | This is a Percival notebook (https://percival.ink/). 2 | 3 | ╔═╣ Markdown 4 | # Welcome to Percival! 5 | 6 | Percival is an interactive in-browser notebook for **declarative data analysis** and **visualization**. It combines the power of compiled [Datalog](https://en.wikipedia.org/wiki/Datalog) queries with the flexibility of [modern plotting libraries](https://observablehq.com/@observablehq/plot) for the web. 7 | 8 | ![Picture of a landscape](https://upload.wikimedia.org/wikipedia/commons/e/ee/Lake_Geneva_after_storm.jpg) 9 | 10 | This notebook is fully interactive! Here are the controls: 11 | 12 | - Edit the contents of any cell in the code editor, and press Shift+Enter to save. 13 | - Toggle source code visibility and delete cells in the left gutter. 14 | - Create new cells by hovering your mouse in an empty space and clicking the popup. 15 | 16 | To get started, let's dive into the basics of the language. 17 | 18 | ╔═╣ Markdown 19 | ## Intro to Datalog 20 | 21 | Datalog is a fully-featured database query language, similar to SQL. It originates from logic programming as a subset of Prolog. The basic object in Datalog is called a _relation_, and it is the equivalent of a table in traditional databases. 22 | 23 | Let's create a very simple relation that stores edges in a directed graph. This relation has two named fields, `x` and `y`. 24 | 25 | ╔═╡ Code 26 | // Edge relation: each line is a database entry. 27 | edge(x: 1, y: 2). 28 | edge(x: 2, y: 3). 29 | edge(x: 2, y: 4). 30 | 31 | ╔═╣ Markdown 32 | With Datalog, you can compute all paths within this graph by writing the query in the following code cell. This query consists of two _rules_, which use the `:-` notation. When we run this query, its outputs are displayed above the cell. 33 | 34 | ╔═╡ Code 35 | // Given an edge x -> y, there is a path x -> y. 36 | path(x, y) :- edge(x, y). 37 | 38 | // Given an edge x -> z and a path z -> y, there is a path x -> y. 39 | path(x, y) :- edge(x, y: z), path(x: z, y). 40 | 41 | ╔═╣ Markdown 42 | One of Percival's key features is _reactivity_. Try changing the first code cell by adding the following line, which introduces a new entry to the database: 43 | ``` 44 | edge(x: 4, y: 5). 45 | ``` 46 | After that, press Shift+Enter to save you work. What happens? You should see the results of the `path` cell change as well, since its dependent relation `edge` was updated. 47 | 48 | **Exercise:** Now it's your turn. See if you can understand what the following query is doing, and try to modify it to also return direct connections to node 1. 49 | 50 | ╔═╡ Code 51 | // Find all "friends of friends" of node 1 in the graph. 52 | friends1(friend_of_friend) :- 53 | edge(x: 1, y: friend), 54 | edge(x: friend, y: friend_of_friend). 55 | 56 | ╔═╣ Markdown 57 | ## Embedding Code 58 | 59 | These examples show the core of Datalog in its purest form, a programming language based on the relational algebra. However, to do real-world data analysis, we need to support other operations, like arithmetic, strings, and other standard data types! 60 | 61 | Percival handles this situation by allowing you to seamlessly embed JavaScript expressions within queries. These are delimited by backquotes. 62 | 63 | ╔═╡ Code 64 | name(full_name: `first + " " + last`, sqrt_age) :- 65 | person(first, last, age), 66 | sqrt_age = `Math.sqrt(age)`. 67 | 68 | person(first: "Alice", last: "Carol", age: 20). 69 | person(first: "Foo", last: "Bar", age: 45). 70 | person(first: "Baz", last: "Lam", age: 12). 71 | 72 | ╔═╣ Markdown 73 | For a more complex example, here is how you would find all paths of length _at most 10_ inside a directed graph. 74 | 75 | ╔═╡ Code 76 | walk(x: v, y: v, len: 0) :- edge(x: v). 77 | walk(x: v, y: v, len: 0) :- edge(y: v). 78 | 79 | // What happens to the output of this rule if we add a cycle to the graph? 80 | walk(x, y, len) :- 81 | walk(x, y: z, len: len1), 82 | edge(x: z, y), 83 | len = `len1 + 1`, 84 | `len <= 10`. 85 | 86 | ╔═╣ Markdown 87 | Here's one more fun example of arithmetic in rules: computing Fibonacci numbers! You can try changing the value of `max_n` to see how quickly the result is updated. 88 | 89 | Don't worry about slowing down the website, since Percival runs on Web Workers isolated from your browser's main render thread. 90 | 91 | ╔═╡ Code 92 | max_n(value: 30). 93 | 94 | ╔═╡ Code 95 | fib(n: 0, v: 0). 96 | fib(n: 1, v: 1). 97 | fib(n: `n + 1`, v) :- 98 | fib(n, v: v1), 99 | fib(n: `n - 1`, v: v2), 100 | v = `v1 + v2`, 101 | max_n(value), 102 | `n < value`. 103 | 104 | ╔═╣ Markdown 105 | ## Aggregates 106 | 107 | Not only can you do mathematical operations in queries, but you can also perform _aggregates_. In this version, supported aggregates include `sum`, `min`, `max`, `mean`, and `count`. 108 | 109 | For this example, we're going to import a publicly available dataset about cars from NPM. Percival allows you to load any public JSON dataset from GitHub, NPM, or standard HTTPS web link. 110 | 111 | ╔═╡ Code 112 | import cars from "npm://vega-datasets@2.1.0/data/cars.json" 113 | 114 | country(name: Origin) :- cars(Origin). 115 | 116 | ╔═╣ Markdown 117 | For each year and country of origin in the dataset, we will query for the average fuel economy of cars. This might let us answer questions about how fuel economy changes over time between the countries. 118 | 119 | ╔═╡ Code 120 | average_mpg(country, year: `new Date(year)`, value) :- 121 | country(name: country), 122 | cars(Year: year), 123 | value = mean[Miles_per_Gallon] { 124 | cars(Origin: country, Year: year, Miles_per_Gallon) 125 | }. 126 | 127 | ╔═╣ Markdown 128 | With support for aggregates, we can now answer a lot of analytical questions about the data. One key tool for exploring datasets is visualization. Percival supports declarative data visualization through _Plot_ cells, which run JavaScript code that generates diagrams using the [Observable Plot](https://github.com/observablehq/plot) library. 129 | 130 | ╔═╡ Plot 131 | average_mpg => Plot.line(average_mpg, { 132 | sort: "year", 133 | x: "year", 134 | y: "value", 135 | stroke: "country", 136 | }).plot({ grid: true }) 137 | 138 | ╔═╣ Markdown 139 | Here's another example of a plot on our dataset. This time, we'll make a simple scatter plot on the entire cars dataset, faceted by the country of origin. 140 | 141 | ╔═╡ Plot 142 | cars => Plot.plot({ 143 | marks: [ 144 | Plot.dot(cars, { 145 | x: "Horsepower", 146 | y: "Miles_per_Gallon", 147 | stroke: "Weight_in_lbs", 148 | strokeWidth: 1.5, 149 | }), 150 | Plot.ruleX([40]), 151 | Plot.ruleY([5]), 152 | ], 153 | facet: { 154 | data: cars, 155 | y: "Origin", 156 | }, 157 | color: { 158 | type: "linear", 159 | range: ["steelblue", "orange"], 160 | }, 161 | fy: { tickPadding: -8 }, 162 | grid: true, 163 | }) 164 | 165 | ╔═╣ Markdown 166 | ## Real-World Case Study 167 | 168 | Let's see how all of these pieces fit together to work on a real-world dataset, where you might want to combine data from multiple different sources. 169 | 170 | ╔═╣ Markdown 171 | ### Initial Exploration 172 | 173 | Suppose that you just got access to a collection of data about airports, and you're eager to start exploring it. The dataset is tabular and contains information such as name, geographical location, city, state, and country. 174 | 175 | ╔═╡ Code 176 | import airports from "npm://vega-datasets@2.1.0/data/airports.csv" 177 | 178 | ╔═╣ Markdown 179 | From looking at the rows, it seems like there are airports from multiple different countries in this dataset! Let's figure out what the value counts in the `country` column look like. 180 | 181 | ╔═╡ Code 182 | airports_per_country(country, count) :- 183 | airports(country), 184 | count = count[1] { airports(country) }. 185 | 186 | ╔═╣ Markdown 187 | It turns out that **all but 4 of the airports are in the United States**. To make the rest of our analysis simpler, we're going to filter only those airports that have country equal to `"USA"`. We're also going to reduce our columns to only the necessary ones. 188 | 189 | ╔═╡ Code 190 | us_airports(state, iata, name) :- 191 | airports(state, iata, name, country: "USA"). 192 | 193 | ╔═╣ Markdown 194 | Cool, that was really simple! Let's use another aggregate query to see how many airports are in each US state. 195 | 196 | ╔═╡ Code 197 | airports_per_state(state, count) :- 198 | us_airports(state), 199 | count = count[1] { us_airports(state) }. 200 | 201 | ╔═╡ Plot 202 | airports_per_state => Plot.plot({ 203 | marks: [ 204 | Plot.dot(airports_per_state, { 205 | x: "count", 206 | fill: "steelblue", 207 | fillOpacity: 0.6, 208 | }), 209 | ], 210 | grid: true, 211 | }) 212 | 213 | ╔═╣ Markdown 214 | It seems like most states have between 0-100 airports, with a few outliers having 200-300 airports. This makes sense, given that some states are much smaller than others, and even between states of the same size, population density can be very different! 215 | 216 | ╔═╣ Markdown 217 | ### Loading More Data 218 | 219 | We might wonder if states with higher populations have more airports. However, we don't have this information in our current table, so we'll need to find a new dataset for this. [Here's one](https://github.com/jakevdp/data-USstates) that we found, off-the-shelf, on GitHub. 220 | 221 | _(I quickly updated some of the column names in these tables to make them compatible with Percival, which is why the latter two tables are imported from Gists.)_ 222 | 223 | ╔═╡ Code 224 | import state_abbrevs from "gh://jakevdp/data-USstates@b9c5dfa/state-abbrevs.csv" 225 | import state_areas from "https://gist.githubusercontent.com/ekzhang/a68794f064594cf0ab56a317c3b7d121/raw/state-areas.csv" 226 | import state_population from "https://gist.githubusercontent.com/ekzhang/a68794f064594cf0ab56a317c3b7d121/raw/state-population.csv" 227 | 228 | ╔═╣ Markdown 229 | Since this dataset consists of multiple tables in a slightly different format, we'll need to construct an inner join between these tables and our airports to combine them together. Luckily, this is very simple to do with a Datalog query! 230 | 231 | ╔═╡ Code 232 | airports_state_info(state, count, population, area) :- 233 | state_abbrevs(state: name, abbreviation: state), 234 | airports_per_state(count, state), 235 | state_population(state, population, ages: "total", year: 2013), 236 | state_areas(state: name, area_sq_mi: area). 237 | 238 | ╔═╡ Plot 239 | airports_state_info => Plot.plot({ 240 | marks: [ 241 | Plot.dot(airports_state_info, { 242 | x: "population", 243 | y: "count", 244 | r: "area", 245 | fill: "steelblue", 246 | fillOpacity: 0.8, 247 | title: "state", 248 | }), 249 | Plot.text(airports_state_info, { 250 | x: "population", 251 | y: "count", 252 | textAnchor: "start", 253 | dx: 6, 254 | text: "state", 255 | fillColor: "#222", 256 | fillOpacity: 0.8, 257 | fontSize: d => Math.sqrt(d.area) / 50, 258 | }), 259 | Plot.ruleY([0]), 260 | Plot.ruleX([0]), 261 | ], 262 | grid: true, 263 | }) 264 | 265 | ╔═╣ Markdown 266 | As you can see, there is a clear direct relationship between the size of a state, its population, and the number of airports in that state. The one exception to this relationship is **Alaska (AK)**, where although the population is very small, it has over 260 airports! We're also able to see that **Texas (TX)** and **California (CA)** have the second and third-largest number of airports, respectively. 267 | 268 | ╔═╣ Markdown 269 | ## Closing 270 | 271 | Percival is an early-stage research project. If you have any comments or feedback, you can reach me at the public [GitHub repository](https://github.com/ekzhang/percival) or on Twitter [@ekzhang1](https://twitter.com/ekzhang1). 272 | 273 | If you like Percival, feel free to try using it on your own problems! To create a new, blank notebook from scratch, [click here](/?new). 274 | 275 | By the way, if you press the "Share" button at the top of this page, you'll get a permanent link to the current notebook. Unlike Jupyter or R exports, these documents are fully interactive, and you only need a browser to continue exploring where you left off. ✨ 276 | -------------------------------------------------------------------------------- /src/lib/runtime.test.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import init from "percival-wasm"; 3 | import { build } from "./runtime"; 4 | 5 | async function checkProgram({ 6 | src, 7 | deps, 8 | results, 9 | input, 10 | output, 11 | }: { 12 | src: string; 13 | deps: string[]; 14 | results: string[]; 15 | input: Record; 16 | output: Record; 17 | }) { 18 | const result = build(src); 19 | expect(result.ok).to.be.true; 20 | if (!result.ok) { 21 | // unreachable, needed for type inference 22 | throw null; 23 | } 24 | expect(result.deps).to.have.members(deps); 25 | expect(result.results).to.have.members(results); 26 | const observed = await result.evaluate(input); 27 | for (const key of Object.keys(output)) { 28 | expect(observed[key]).to.have.deep.members(output[key]); 29 | } 30 | } 31 | 32 | describe("basic compilation", () => { 33 | it("can build code", async () => { 34 | await init(); 35 | expect(build("tc(x: 3).").ok).to.be.true; 36 | expect(build("tc(x:).").ok).to.be.false; 37 | }); 38 | 39 | it("evaluates a simple program", async () => { 40 | await init(); 41 | const result = build("tc(x: 3)."); 42 | expect(result.ok).to.be.true; 43 | if (!result.ok) throw null; // unreachable 44 | expect(await result.evaluate({})).to.deep.equal({ 45 | tc: [{ x: 3 }], 46 | }); 47 | }); 48 | 49 | it("evaluates transitive closure from input", async () => { 50 | await init(); 51 | await checkProgram({ 52 | src: ` 53 | tc(x, y) :- edge(x, y). 54 | tc(x, y) :- tc(x, y: z), edge(x: z, y). 55 | `, 56 | deps: ["edge"], 57 | results: ["tc"], 58 | input: { 59 | edge: [ 60 | { x: 2, y: 3 }, 61 | { x: 3, y: 4 }, 62 | ], 63 | }, 64 | output: { 65 | tc: [ 66 | { x: 2, y: 3 }, 67 | { x: 2, y: 4 }, 68 | { x: 3, y: 4 }, 69 | ], 70 | }, 71 | }); 72 | }); 73 | 74 | it("evaluates a bigger transitive closure", async () => { 75 | await init(); 76 | await checkProgram({ 77 | src: ` 78 | tc(x, y) :- tc(x, y: z), edge(x: z, y). 79 | tc(x, y) :- edge(x, y). 80 | `, 81 | deps: ["edge"], 82 | results: ["tc"], 83 | input: { 84 | edge: [ 85 | { x: "hello", y: "world" }, 86 | { x: "world", y: "foo" }, 87 | { x: "foo", y: "baz" }, 88 | { x: "world", y: "bar" }, 89 | { x: "alt-src", y: "foo" }, 90 | ], 91 | }, 92 | output: { 93 | tc: [ 94 | { x: "hello", y: "world" }, 95 | { x: "hello", y: "foo" }, 96 | { x: "hello", y: "baz" }, 97 | { x: "hello", y: "bar" }, 98 | { x: "world", y: "foo" }, 99 | { x: "world", y: "baz" }, 100 | { x: "world", y: "bar" }, 101 | { x: "alt-src", y: "foo" }, 102 | { x: "alt-src", y: "baz" }, 103 | { x: "foo", y: "baz" }, 104 | ], 105 | }, 106 | }); 107 | }); 108 | 109 | it("evaluates transitive closure inline", async () => { 110 | await init(); 111 | await checkProgram({ 112 | src: ` 113 | edge(x: "foo", y: "bar"). 114 | edge(x: "bar", y: "baz"). 115 | tc(x, y) :- edge(x, y). 116 | tc(x, y) :- tc(x, y: z), edge(x: z, y). 117 | `, 118 | deps: [], 119 | results: ["edge", "tc"], 120 | input: {}, 121 | output: { 122 | edge: [ 123 | { x: "foo", y: "bar" }, 124 | { x: "bar", y: "baz" }, 125 | ], 126 | tc: [ 127 | { x: "foo", y: "bar" }, 128 | { x: "foo", y: "baz" }, 129 | { x: "bar", y: "baz" }, 130 | ], 131 | }, 132 | }); 133 | }); 134 | 135 | it("can handle boolean literals", async () => { 136 | await init(); 137 | await checkProgram({ 138 | src: `ok(x: true, y: false).`, 139 | deps: [], 140 | results: ["ok"], 141 | input: {}, 142 | output: { 143 | ok: [{ x: true, y: false }], 144 | }, 145 | }); 146 | }); 147 | }); 148 | 149 | describe("embedded backtick expressions", () => { 150 | it("evaluates backtick expressions", async () => { 151 | await init(); 152 | await checkProgram({ 153 | src: ` 154 | name(value: \`first + " " + last\`) :- person(first, last). 155 | `, 156 | deps: ["person"], 157 | results: ["name"], 158 | input: { 159 | person: [ 160 | { 161 | first: "eric", 162 | last: "zhang", 163 | }, 164 | { 165 | first: "john", 166 | last: "doe", 167 | }, 168 | ], 169 | }, 170 | output: { 171 | name: [{ value: "eric zhang" }, { value: "john doe" }], 172 | }, 173 | }); 174 | }); 175 | 176 | it("evaluates fibonacci numbers", async () => { 177 | await init(); 178 | await checkProgram({ 179 | src: ` 180 | fib(n: 0, x: 0). 181 | fib(n: 1, x: 1). 182 | fib(n: \`n + 1\`, x: \`x1 + x2\`) :- 183 | fib(n, x: x1), 184 | fib(n: \`n - 1\`, x: x2), 185 | x = \`x1 + x2\`, 186 | \`n < 10\`. 187 | `, 188 | deps: [], 189 | results: ["fib"], 190 | input: {}, 191 | output: { 192 | fib: [ 193 | { n: 0, x: 0 }, 194 | { n: 1, x: 1 }, 195 | { n: 2, x: 1 }, 196 | { n: 3, x: 2 }, 197 | { n: 4, x: 3 }, 198 | { n: 5, x: 5 }, 199 | { n: 6, x: 8 }, 200 | { n: 7, x: 13 }, 201 | { n: 8, x: 21 }, 202 | { n: 9, x: 34 }, 203 | { n: 10, x: 55 }, 204 | ], 205 | }, 206 | }); 207 | }); 208 | }); 209 | 210 | describe("promise cancellation", () => { 211 | it("can cancel evaluation", async () => { 212 | await init(); 213 | const result = build("ok()."); 214 | expect(result.ok).to.be.true; 215 | if (!result.ok) throw null; // unreachable 216 | const promise = result.evaluate({}); 217 | promise.cancel(); 218 | try { 219 | await promise; 220 | throw new Error("Promise should have thrown"); 221 | } catch (error: any) { 222 | expect(error.message).to.equal("Promise was cancelled by user"); 223 | } 224 | }); 225 | }); 226 | 227 | describe("import directives", () => { 228 | it("can load crimea.json", async () => { 229 | await init(); 230 | await checkProgram({ 231 | src: `import crimea from "npm://vega-datasets@2.1.0/data/crimea.json"`, 232 | deps: [], 233 | results: ["crimea"], 234 | input: {}, 235 | output: { 236 | crimea: [ 237 | { date: "1854-04-01", wounds: 0, other: 110, disease: 110 }, 238 | { date: "1854-05-01", wounds: 0, other: 95, disease: 105 }, 239 | { date: "1854-06-01", wounds: 0, other: 40, disease: 95 }, 240 | { date: "1854-07-01", wounds: 0, other: 140, disease: 520 }, 241 | { date: "1854-08-01", wounds: 20, other: 150, disease: 800 }, 242 | { date: "1854-09-01", wounds: 220, other: 230, disease: 740 }, 243 | { date: "1854-10-01", wounds: 305, other: 310, disease: 600 }, 244 | { date: "1854-11-01", wounds: 480, other: 290, disease: 820 }, 245 | { date: "1854-12-01", wounds: 295, other: 310, disease: 1100 }, 246 | { date: "1855-01-01", wounds: 230, other: 460, disease: 1440 }, 247 | { date: "1855-02-01", wounds: 180, other: 520, disease: 1270 }, 248 | { date: "1855-03-01", wounds: 155, other: 350, disease: 935 }, 249 | { date: "1855-04-01", wounds: 195, other: 195, disease: 560 }, 250 | { date: "1855-05-01", wounds: 180, other: 155, disease: 550 }, 251 | { date: "1855-06-01", wounds: 330, other: 130, disease: 650 }, 252 | { date: "1855-07-01", wounds: 260, other: 130, disease: 430 }, 253 | { date: "1855-08-01", wounds: 290, other: 110, disease: 490 }, 254 | { date: "1855-09-01", wounds: 355, other: 100, disease: 290 }, 255 | { date: "1855-10-01", wounds: 135, other: 95, disease: 245 }, 256 | { date: "1855-11-01", wounds: 100, other: 140, disease: 325 }, 257 | { date: "1855-12-01", wounds: 40, other: 120, disease: 215 }, 258 | { date: "1856-01-01", wounds: 0, other: 160, disease: 160 }, 259 | { date: "1856-02-01", wounds: 0, other: 100, disease: 100 }, 260 | { date: "1856-03-01", wounds: 0, other: 125, disease: 90 }, 261 | ], 262 | }, 263 | }); 264 | }); 265 | 266 | it("can load iowa-electricity.csv", async () => { 267 | await init(); 268 | await checkProgram({ 269 | src: ` 270 | import iowa from "npm://vega-datasets@2.1.0/data/iowa-electricity.csv" 271 | count(value: count[1] { iowa() }). 272 | `, 273 | deps: [], 274 | results: ["iowa", "count"], 275 | input: {}, 276 | output: { 277 | count: [{ value: 51 }], 278 | }, 279 | }); 280 | }); 281 | }); 282 | 283 | describe("aggregation operators", () => { 284 | it("calculates statistics in crimea data", async () => { 285 | await init(); 286 | await checkProgram({ 287 | src: ` 288 | import crimea from "npm://vega-datasets@2.1.0/data/crimea.json" 289 | 290 | stats(count, max_wounds, min_wounds, total_wounds, mean_wounds) :- 291 | count = count[1] { crimea() }, 292 | max_wounds = max[wounds] { crimea(wounds) }, 293 | min_wounds = min[wounds] { crimea(wounds) }, 294 | total_wounds = sum[wounds] { crimea(wounds) }, 295 | mean_wounds = mean[wounds] { crimea(wounds) }. 296 | `, 297 | deps: [], 298 | results: ["crimea", "stats"], 299 | input: {}, 300 | output: { 301 | stats: [ 302 | { 303 | count: 24, 304 | max_wounds: 480, 305 | min_wounds: 0, 306 | total_wounds: 3770, 307 | mean_wounds: 3770 / 24, 308 | }, 309 | ], 310 | }, 311 | }); 312 | }); 313 | 314 | it("calculates yearly mpg in car data", async () => { 315 | await init(); 316 | await checkProgram({ 317 | src: ` 318 | import cars from "npm://vega-datasets/data/cars.json" 319 | 320 | year(year: Year) :- cars(Year). 321 | 322 | yearly_mpg(year, value) :- 323 | year(year), 324 | value = mean[Miles_per_Gallon] { 325 | cars(Year: year, Miles_per_Gallon) 326 | }. 327 | `, 328 | deps: [], 329 | results: ["cars", "year", "yearly_mpg"], 330 | input: {}, 331 | output: { 332 | yearly_mpg: [ 333 | { 334 | value: 33.696551724137926, 335 | year: "1980-01-01", 336 | }, 337 | { 338 | value: 22.703703703703702, 339 | year: "1974-01-01", 340 | }, 341 | { 342 | value: 21.573529411764707, 343 | year: "1976-01-01", 344 | }, 345 | { 346 | value: 30.536065573770493, 347 | year: "1982-01-01", 348 | }, 349 | { 350 | value: 25.09310344827585, 351 | year: "1979-01-01", 352 | }, 353 | { 354 | value: 20.517241379310345, 355 | year: "1971-01-01", 356 | }, 357 | { 358 | value: 23.375, 359 | year: "1977-01-01", 360 | }, 361 | { 362 | value: 18.714285714285715, 363 | year: "1972-01-01", 364 | }, 365 | { 366 | value: 24.061111111111114, 367 | year: "1978-01-01", 368 | }, 369 | { 370 | value: 20.266666666666666, 371 | year: "1975-01-01", 372 | }, 373 | { 374 | value: 17.1, 375 | year: "1973-01-01", 376 | }, 377 | { 378 | value: 14.657142857142857, 379 | year: "1970-01-01", 380 | }, 381 | ], 382 | }, 383 | }); 384 | }); 385 | 386 | it("handles nested aggregates", async () => { 387 | await init(); 388 | await checkProgram({ 389 | src: `ok(value: sum[min[to] { edge(from, to) }] { vertex(id: from) }).`, 390 | deps: ["vertex", "edge"], 391 | results: ["ok"], 392 | input: { 393 | vertex: [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }], 394 | edge: [ 395 | { from: 1, to: 3 }, 396 | { from: 1, to: 2 }, 397 | { from: 2, to: 4 }, 398 | { from: 3, to: 3 }, 399 | { from: 4, to: 1 }, 400 | ], 401 | }, 402 | output: { 403 | ok: [{ value: 10 }], 404 | }, 405 | }); 406 | }); 407 | }); 408 | -------------------------------------------------------------------------------- /crates/percival/src/parser.rs: -------------------------------------------------------------------------------- 1 | //! Parser definitions and error recovery for Percival. 2 | 3 | use std::fmt; 4 | 5 | use chumsky::{prelude::*, Stream}; 6 | 7 | use crate::ast::{Aggregate, Clause, Fact, Import, Literal, Program, Rule, Value}; 8 | 9 | /// A range of character positions in a parser input. 10 | pub type Span = std::ops::Range; 11 | 12 | /// A token emitted from the initial lexical analysis phase. 13 | #[derive(Clone, Debug, PartialEq, Eq, Hash)] 14 | pub enum Token { 15 | /// An identifier, such as for a variable. 16 | Ident(String), 17 | /// A numerical constant literal. 18 | Number(String), 19 | /// A string literal, with optional escape sequences. 20 | String(String), 21 | /// A raw JavaScript expression delimited by backquotes. 22 | Expr(String), 23 | /// A control character understood by Percival. 24 | Ctrl(&'static str), 25 | } 26 | 27 | impl fmt::Display for Token { 28 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 29 | match self { 30 | Token::Ident(s) => write!(f, "{}", s), 31 | Token::Number(n) => write!(f, "{}", n), 32 | Token::String(s) => write!(f, "\"{}\"", s), 33 | Token::Expr(e) => write!(f, "`{}`", e), 34 | Token::Ctrl(c) => write!(f, "{}", c), 35 | } 36 | } 37 | } 38 | 39 | /// Construct a parser combinator for lexical analysis (stage 1). 40 | /// 41 | /// If possible, prefer to use the higher-level `Grammar` API directly, rather 42 | /// than this low-level implementation of a parser combinator. 43 | pub fn lexer() -> BoxedParser<'static, char, Vec<(Token, Span)>, Simple> { 44 | let ident = text::ident().labelled("ident"); 45 | 46 | let number = { 47 | // We only support decimal literals for now, not the full scope of numbers. 48 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#numeric_literals 49 | let digit = one_of("0123456789"); 50 | let digits = digit.then_ignore(just('_').or_not()).repeated().at_least(1); 51 | let sign = just('-') 52 | .or(just('+')) 53 | .map(|c| c.to_string()) 54 | .or_not() 55 | .map(Option::unwrap_or_default); 56 | let integer = sign.chain(digits.clone()); 57 | 58 | let fraction = just('.') 59 | .chain(digits.clone()) 60 | .or_not() 61 | .map(Option::unwrap_or_default); 62 | let exponent = just('e') 63 | .or(just('E')) 64 | .chain(sign.chain(digits)) 65 | .or_not() 66 | .map(Option::unwrap_or_default); 67 | integer 68 | .chain::(fraction) 69 | .chain::(exponent) 70 | .collect() 71 | }; 72 | 73 | let string = { 74 | let normal_char = filter(|&c: &char| c != '"' && c != '\\' && !c.is_control()); 75 | let hex_digit = filter(|&c: &char| c.is_ascii_hexdigit()); 76 | let control_char = just('\\') 77 | .chain( 78 | one_of("\"\\/bfnrt") 79 | .map(|c| vec![c]) 80 | .or(just('u').chain(hex_digit.repeated().at_least(4).at_most(4))), 81 | ) 82 | .collect::(); 83 | let chars = normal_char 84 | .map(|c| c.to_string()) 85 | .or(control_char) 86 | .repeated() 87 | .collect(); 88 | just('"').ignore_then(chars).then_ignore(just('"')) 89 | }; 90 | 91 | let expr = just('`') 92 | .ignore_then(take_until(just('`'))) 93 | .map(|(s, _)| s) 94 | .collect() 95 | .labelled("expr"); 96 | 97 | let ctrl = choice::<_, Simple>(( 98 | just::<_, _, Simple>(":-"), 99 | just::<_, _, Simple>("("), 100 | just::<_, _, Simple>(")"), 101 | just::<_, _, Simple>("["), 102 | just::<_, _, Simple>("]"), 103 | just::<_, _, Simple>("{"), 104 | just::<_, _, Simple>("}"), 105 | just::<_, _, Simple>(":"), 106 | just::<_, _, Simple>("."), 107 | just::<_, _, Simple>(","), 108 | just::<_, _, Simple>("="), 109 | )); 110 | 111 | let token = choice(( 112 | ident.map(Token::Ident), 113 | number.map(Token::Number), 114 | string.map(Token::String), 115 | expr.map(Token::Expr), 116 | ctrl.map(Token::Ctrl), 117 | )) 118 | .boxed() 119 | .recover_with(skip_then_retry_until([])); 120 | 121 | let comments = { 122 | let single_line = just("//").then_ignore(take_until(text::newline())); 123 | let multi_line = just("/*").then_ignore(take_until(just("*/"))); 124 | single_line 125 | .or(multi_line) 126 | .padded() 127 | .repeated() 128 | .map_err(|e: Simple| Simple::custom(e.span(), "Not a valid comment")) 129 | }; 130 | 131 | token 132 | .padded() 133 | .padded_by(comments) 134 | .map_with_span(|tok, span| (tok, span)) 135 | .repeated() 136 | .boxed() 137 | } 138 | 139 | /// Construct a parser combinator for syntactic analysis (stage 2). 140 | /// 141 | /// If possible, prefer to use the higher-level `Grammar` API directly, rather 142 | /// than this low-level implementation of a parser combinator. 143 | pub fn parser() -> BoxedParser<'static, Token, Program, Simple> { 144 | use Token::*; 145 | 146 | let ident = select! { Ident(id) => id }; 147 | 148 | let literal = select! { 149 | Number(n) => Literal::Number(n), 150 | String(s) => Literal::String(s), 151 | Ident(b) if b == "true" => Literal::Boolean(true), 152 | Ident(b) if b == "false" => Literal::Boolean(false), 153 | } 154 | .labelled("literal"); 155 | 156 | let jc = |s: &'static str| just(Ctrl(s)); 157 | 158 | // Declared here so that we can use it for aggregate subqueries. 159 | let mut clauses = Recursive::<_, Vec, Simple>::declare(); 160 | 161 | let value = recursive(|value| { 162 | let aggregate = ident 163 | .then(value.delimited_by(jc("["), jc("]"))) 164 | .then(clauses.clone().delimited_by(jc("{"), jc("}"))) 165 | .map(|((operator, value), subquery)| Aggregate { 166 | operator, 167 | value: Box::new(value), 168 | subquery, 169 | }); 170 | 171 | choice(( 172 | aggregate.map(Value::Aggregate), 173 | literal.map(Value::Literal), 174 | select! { 175 | Expr(e) => Value::Expr(e), 176 | Ident(id) => Value::Id(id), 177 | }, 178 | )) 179 | .labelled("value") 180 | }); 181 | 182 | let prop = ident 183 | .then(jc(":").ignore_then(value.clone()).or_not()) 184 | .try_map(|(id, value), span| { 185 | let value = value.unwrap_or_else(|| Value::Id(id.clone())); 186 | match &value { 187 | Value::Id(name) if is_reserved_word(name) => Err(Simple::custom( 188 | span, 189 | "Cannot use reserved word as a variable binding", 190 | )), 191 | _ => Ok((id, value)), 192 | } 193 | }) 194 | .labelled("prop"); 195 | 196 | let fact = ident 197 | .then(prop.separated_by(jc(",")).delimited_by(jc("("), jc(")"))) 198 | .map(|(name, props)| Fact { 199 | name, 200 | props: props.into_iter().collect(), 201 | }) 202 | .labelled("fact"); 203 | 204 | let expr = select! { Expr(e) => e }; 205 | 206 | let binding = ident.then_ignore(jc("=")).then(value).labelled("binding"); 207 | 208 | let clause = choice(( 209 | fact.clone().map(Clause::Fact), 210 | expr.map(Clause::Expr), 211 | binding.map(|(name, value)| Clause::Binding(name, value)), 212 | )) 213 | .labelled("clause"); 214 | 215 | clauses.define(clause.clone().separated_by(jc(","))); 216 | 217 | let rule = fact 218 | .then( 219 | jc(":-") 220 | .ignore_then(clauses) 221 | .then_ignore(jc(".")) 222 | .try_map(|clauses, span| { 223 | if clauses.is_empty() { 224 | Err(Simple::custom(span, "Rule needs at least one clause")) 225 | } else { 226 | Ok(clauses) 227 | } 228 | }) 229 | .or(jc(".").to(Vec::new())), 230 | ) 231 | .map(|(goal, clauses)| Rule { goal, clauses }) 232 | .labelled("rule"); 233 | 234 | let import = select! { Ident(k) if k == "import" => () } 235 | .ignore_then(ident) 236 | .then_ignore(select! { Ident(k) if k == "from" => () }) 237 | .then(select! { String(s) => s }) 238 | .map(|(name, uri)| Import { name, uri }); 239 | 240 | enum Entry { 241 | Rule(Rule), 242 | Import(Import), 243 | } 244 | 245 | let program = choice((rule.map(Entry::Rule), import.map(Entry::Import))) 246 | .repeated() 247 | .map(|entries| { 248 | let mut rules = Vec::new(); 249 | let mut imports = Vec::new(); 250 | for entry in entries { 251 | match entry { 252 | Entry::Rule(rule) => rules.push(rule), 253 | Entry::Import(import) => imports.push(import), 254 | } 255 | } 256 | Program { rules, imports } 257 | }); 258 | 259 | program.then_ignore(end()).boxed() 260 | } 261 | 262 | /// Checks if a token is reserved, which cannot be used as an identifier. 263 | /// 264 | /// See [https://262.ecma-international.org/6.0/#sec-reserved-words] for 265 | /// JavaScript reserved words. The rest of the tokens listed here are prohibited 266 | /// for internal reasons, or because they mean other things in the context of 267 | /// the Percival language. 268 | fn is_reserved_word(name: &str) -> bool { 269 | match name { 270 | // Reserved words in the ECMAScript standard 271 | "break" | "do" | "in" | "typeof" | "case" | "else" | "instanceof" | "var" | "catch" 272 | | "export" | "new" | "void" | "class" | "extends" | "return" | "while" | "const" 273 | | "finally" | "super" | "with" | "continue" | "for" | "switch" | "yield" | "debugger" 274 | | "function" | "this" | "default" | "if" | "throw" | "delete" | "import" | "try" 275 | | "enum" | "await" | "implements" | "package" | "protected" | "interface" | "private" 276 | | "public" | "null" | "true" | "false" | "let" => true, 277 | 278 | // Internal names, reserved to avoid conflicts 279 | _ => name.starts_with("__percival"), 280 | } 281 | } 282 | 283 | /// An end-to-end grammar, combining lexing and parsing stages. 284 | #[derive(Clone)] 285 | pub struct Grammar { 286 | lexer: BoxedParser<'static, char, Vec<(Token, Span)>, Simple>, 287 | parser: BoxedParser<'static, Token, Program, Simple>, 288 | } 289 | 290 | impl Grammar { 291 | /// Construct a new grammar for the Percival language. 292 | pub fn new() -> Self { 293 | Self { 294 | lexer: lexer(), 295 | parser: parser(), 296 | } 297 | } 298 | 299 | /// Parse an input source file, returning the program or a list of errors. 300 | pub fn parse(&self, src: &str) -> Result>> { 301 | let (tokens, errs) = self.lexer.parse_recovery(src); 302 | let mut errs: Vec<_> = errs.into_iter().map(|e| e.map(|c| c.to_string())).collect(); 303 | 304 | if let Some(tokens) = tokens { 305 | // println!("Tokens = {:?}", tokens); 306 | let len = src.chars().count(); 307 | let stream = Stream::from_iter(len..len + 1, tokens.into_iter()); 308 | let (prog, parse_errs) = self.parser.parse_recovery(stream); 309 | match prog { 310 | Some(prog) if errs.is_empty() && parse_errs.is_empty() => Ok(prog), 311 | _ => { 312 | errs.extend(parse_errs.into_iter().map(|e| e.map(|c| c.to_string()))); 313 | Err(errs) 314 | } 315 | } 316 | } else { 317 | Err(errs) 318 | } 319 | } 320 | } 321 | 322 | impl Default for Grammar { 323 | fn default() -> Self { 324 | Self::new() 325 | } 326 | } 327 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.3.8" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" 10 | dependencies = [ 11 | "const-random", 12 | ] 13 | 14 | [[package]] 15 | name = "archery" 16 | version = "0.4.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02" 19 | dependencies = [ 20 | "static_assertions", 21 | ] 22 | 23 | [[package]] 24 | name = "ariadne" 25 | version = "0.1.5" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "f1cb2a2046bea8ce5e875551f5772024882de0b540c7f93dfc5d6cf1ca8b030c" 28 | dependencies = [ 29 | "yansi", 30 | ] 31 | 32 | [[package]] 33 | name = "atty" 34 | version = "0.2.14" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 37 | dependencies = [ 38 | "hermit-abi", 39 | "libc", 40 | "winapi", 41 | ] 42 | 43 | [[package]] 44 | name = "autocfg" 45 | version = "1.1.0" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 48 | 49 | [[package]] 50 | name = "bitflags" 51 | version = "1.3.2" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 54 | 55 | [[package]] 56 | name = "bumpalo" 57 | version = "3.11.1" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" 60 | 61 | [[package]] 62 | name = "cfg-if" 63 | version = "1.0.0" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 66 | 67 | [[package]] 68 | name = "chumsky" 69 | version = "0.8.0" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" 72 | dependencies = [ 73 | "ahash", 74 | ] 75 | 76 | [[package]] 77 | name = "clap" 78 | version = "3.2.23" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" 81 | dependencies = [ 82 | "atty", 83 | "bitflags", 84 | "clap_derive", 85 | "clap_lex", 86 | "indexmap", 87 | "once_cell", 88 | "strsim", 89 | "termcolor", 90 | "textwrap", 91 | ] 92 | 93 | [[package]] 94 | name = "clap_derive" 95 | version = "3.2.18" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" 98 | dependencies = [ 99 | "heck", 100 | "proc-macro-error", 101 | "proc-macro2", 102 | "quote", 103 | "syn", 104 | ] 105 | 106 | [[package]] 107 | name = "clap_lex" 108 | version = "0.2.4" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" 111 | dependencies = [ 112 | "os_str_bytes", 113 | ] 114 | 115 | [[package]] 116 | name = "console_error_panic_hook" 117 | version = "0.1.7" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" 120 | dependencies = [ 121 | "cfg-if", 122 | "wasm-bindgen", 123 | ] 124 | 125 | [[package]] 126 | name = "const-random" 127 | version = "0.1.15" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" 130 | dependencies = [ 131 | "const-random-macro", 132 | "proc-macro-hack", 133 | ] 134 | 135 | [[package]] 136 | name = "const-random-macro" 137 | version = "0.1.15" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" 140 | dependencies = [ 141 | "getrandom", 142 | "once_cell", 143 | "proc-macro-hack", 144 | "tiny-keccak", 145 | ] 146 | 147 | [[package]] 148 | name = "crunchy" 149 | version = "0.2.2" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 152 | 153 | [[package]] 154 | name = "getrandom" 155 | version = "0.2.8" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" 158 | dependencies = [ 159 | "cfg-if", 160 | "libc", 161 | "wasi", 162 | ] 163 | 164 | [[package]] 165 | name = "hashbrown" 166 | version = "0.12.3" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 169 | 170 | [[package]] 171 | name = "heck" 172 | version = "0.4.0" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" 175 | 176 | [[package]] 177 | name = "hermit-abi" 178 | version = "0.1.19" 179 | source = "registry+https://github.com/rust-lang/crates.io-index" 180 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 181 | dependencies = [ 182 | "libc", 183 | ] 184 | 185 | [[package]] 186 | name = "indexmap" 187 | version = "1.9.2" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" 190 | dependencies = [ 191 | "autocfg", 192 | "hashbrown", 193 | ] 194 | 195 | [[package]] 196 | name = "js-sys" 197 | version = "0.3.60" 198 | source = "registry+https://github.com/rust-lang/crates.io-index" 199 | checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" 200 | dependencies = [ 201 | "wasm-bindgen", 202 | ] 203 | 204 | [[package]] 205 | name = "libc" 206 | version = "0.2.138" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" 209 | 210 | [[package]] 211 | name = "log" 212 | version = "0.4.17" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 215 | dependencies = [ 216 | "cfg-if", 217 | ] 218 | 219 | [[package]] 220 | name = "maplit" 221 | version = "1.0.2" 222 | source = "registry+https://github.com/rust-lang/crates.io-index" 223 | checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" 224 | 225 | [[package]] 226 | name = "once_cell" 227 | version = "1.16.0" 228 | source = "registry+https://github.com/rust-lang/crates.io-index" 229 | checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" 230 | 231 | [[package]] 232 | name = "os_str_bytes" 233 | version = "6.4.1" 234 | source = "registry+https://github.com/rust-lang/crates.io-index" 235 | checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" 236 | 237 | [[package]] 238 | name = "percival" 239 | version = "0.1.0" 240 | dependencies = [ 241 | "ariadne", 242 | "chumsky", 243 | "maplit", 244 | "rpds", 245 | "thiserror", 246 | ] 247 | 248 | [[package]] 249 | name = "percival-cli" 250 | version = "0.1.0" 251 | dependencies = [ 252 | "clap", 253 | "percival", 254 | ] 255 | 256 | [[package]] 257 | name = "percival-wasm" 258 | version = "0.1.0" 259 | dependencies = [ 260 | "console_error_panic_hook", 261 | "percival", 262 | "wasm-bindgen", 263 | "wasm-bindgen-test", 264 | "yansi", 265 | ] 266 | 267 | [[package]] 268 | name = "proc-macro-error" 269 | version = "1.0.4" 270 | source = "registry+https://github.com/rust-lang/crates.io-index" 271 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 272 | dependencies = [ 273 | "proc-macro-error-attr", 274 | "proc-macro2", 275 | "quote", 276 | "syn", 277 | "version_check", 278 | ] 279 | 280 | [[package]] 281 | name = "proc-macro-error-attr" 282 | version = "1.0.4" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 285 | dependencies = [ 286 | "proc-macro2", 287 | "quote", 288 | "version_check", 289 | ] 290 | 291 | [[package]] 292 | name = "proc-macro-hack" 293 | version = "0.5.19" 294 | source = "registry+https://github.com/rust-lang/crates.io-index" 295 | checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" 296 | 297 | [[package]] 298 | name = "proc-macro2" 299 | version = "1.0.49" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" 302 | dependencies = [ 303 | "unicode-ident", 304 | ] 305 | 306 | [[package]] 307 | name = "quote" 308 | version = "1.0.23" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" 311 | dependencies = [ 312 | "proc-macro2", 313 | ] 314 | 315 | [[package]] 316 | name = "rpds" 317 | version = "0.12.0" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "66262ea963eff99163e6b741fbc3417a52cc13074728c1047e9911789df9b000" 320 | dependencies = [ 321 | "archery", 322 | ] 323 | 324 | [[package]] 325 | name = "scoped-tls" 326 | version = "1.0.1" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" 329 | 330 | [[package]] 331 | name = "static_assertions" 332 | version = "1.1.0" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 335 | 336 | [[package]] 337 | name = "strsim" 338 | version = "0.10.0" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 341 | 342 | [[package]] 343 | name = "syn" 344 | version = "1.0.107" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" 347 | dependencies = [ 348 | "proc-macro2", 349 | "quote", 350 | "unicode-ident", 351 | ] 352 | 353 | [[package]] 354 | name = "termcolor" 355 | version = "1.1.3" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" 358 | dependencies = [ 359 | "winapi-util", 360 | ] 361 | 362 | [[package]] 363 | name = "textwrap" 364 | version = "0.16.0" 365 | source = "registry+https://github.com/rust-lang/crates.io-index" 366 | checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" 367 | 368 | [[package]] 369 | name = "thiserror" 370 | version = "1.0.38" 371 | source = "registry+https://github.com/rust-lang/crates.io-index" 372 | checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" 373 | dependencies = [ 374 | "thiserror-impl", 375 | ] 376 | 377 | [[package]] 378 | name = "thiserror-impl" 379 | version = "1.0.38" 380 | source = "registry+https://github.com/rust-lang/crates.io-index" 381 | checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" 382 | dependencies = [ 383 | "proc-macro2", 384 | "quote", 385 | "syn", 386 | ] 387 | 388 | [[package]] 389 | name = "tiny-keccak" 390 | version = "2.0.2" 391 | source = "registry+https://github.com/rust-lang/crates.io-index" 392 | checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" 393 | dependencies = [ 394 | "crunchy", 395 | ] 396 | 397 | [[package]] 398 | name = "unicode-ident" 399 | version = "1.0.6" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" 402 | 403 | [[package]] 404 | name = "version_check" 405 | version = "0.9.4" 406 | source = "registry+https://github.com/rust-lang/crates.io-index" 407 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 408 | 409 | [[package]] 410 | name = "wasi" 411 | version = "0.11.0+wasi-snapshot-preview1" 412 | source = "registry+https://github.com/rust-lang/crates.io-index" 413 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 414 | 415 | [[package]] 416 | name = "wasm-bindgen" 417 | version = "0.2.83" 418 | source = "registry+https://github.com/rust-lang/crates.io-index" 419 | checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" 420 | dependencies = [ 421 | "cfg-if", 422 | "wasm-bindgen-macro", 423 | ] 424 | 425 | [[package]] 426 | name = "wasm-bindgen-backend" 427 | version = "0.2.83" 428 | source = "registry+https://github.com/rust-lang/crates.io-index" 429 | checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" 430 | dependencies = [ 431 | "bumpalo", 432 | "log", 433 | "once_cell", 434 | "proc-macro2", 435 | "quote", 436 | "syn", 437 | "wasm-bindgen-shared", 438 | ] 439 | 440 | [[package]] 441 | name = "wasm-bindgen-futures" 442 | version = "0.4.33" 443 | source = "registry+https://github.com/rust-lang/crates.io-index" 444 | checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" 445 | dependencies = [ 446 | "cfg-if", 447 | "js-sys", 448 | "wasm-bindgen", 449 | "web-sys", 450 | ] 451 | 452 | [[package]] 453 | name = "wasm-bindgen-macro" 454 | version = "0.2.83" 455 | source = "registry+https://github.com/rust-lang/crates.io-index" 456 | checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" 457 | dependencies = [ 458 | "quote", 459 | "wasm-bindgen-macro-support", 460 | ] 461 | 462 | [[package]] 463 | name = "wasm-bindgen-macro-support" 464 | version = "0.2.83" 465 | source = "registry+https://github.com/rust-lang/crates.io-index" 466 | checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" 467 | dependencies = [ 468 | "proc-macro2", 469 | "quote", 470 | "syn", 471 | "wasm-bindgen-backend", 472 | "wasm-bindgen-shared", 473 | ] 474 | 475 | [[package]] 476 | name = "wasm-bindgen-shared" 477 | version = "0.2.83" 478 | source = "registry+https://github.com/rust-lang/crates.io-index" 479 | checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" 480 | 481 | [[package]] 482 | name = "wasm-bindgen-test" 483 | version = "0.3.33" 484 | source = "registry+https://github.com/rust-lang/crates.io-index" 485 | checksum = "09d2fff962180c3fadf677438054b1db62bee4aa32af26a45388af07d1287e1d" 486 | dependencies = [ 487 | "console_error_panic_hook", 488 | "js-sys", 489 | "scoped-tls", 490 | "wasm-bindgen", 491 | "wasm-bindgen-futures", 492 | "wasm-bindgen-test-macro", 493 | ] 494 | 495 | [[package]] 496 | name = "wasm-bindgen-test-macro" 497 | version = "0.3.33" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "4683da3dfc016f704c9f82cf401520c4f1cb3ee440f7f52b3d6ac29506a49ca7" 500 | dependencies = [ 501 | "proc-macro2", 502 | "quote", 503 | ] 504 | 505 | [[package]] 506 | name = "web-sys" 507 | version = "0.3.60" 508 | source = "registry+https://github.com/rust-lang/crates.io-index" 509 | checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" 510 | dependencies = [ 511 | "js-sys", 512 | "wasm-bindgen", 513 | ] 514 | 515 | [[package]] 516 | name = "winapi" 517 | version = "0.3.9" 518 | source = "registry+https://github.com/rust-lang/crates.io-index" 519 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 520 | dependencies = [ 521 | "winapi-i686-pc-windows-gnu", 522 | "winapi-x86_64-pc-windows-gnu", 523 | ] 524 | 525 | [[package]] 526 | name = "winapi-i686-pc-windows-gnu" 527 | version = "0.4.0" 528 | source = "registry+https://github.com/rust-lang/crates.io-index" 529 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 530 | 531 | [[package]] 532 | name = "winapi-util" 533 | version = "0.1.5" 534 | source = "registry+https://github.com/rust-lang/crates.io-index" 535 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 536 | dependencies = [ 537 | "winapi", 538 | ] 539 | 540 | [[package]] 541 | name = "winapi-x86_64-pc-windows-gnu" 542 | version = "0.4.0" 543 | source = "registry+https://github.com/rust-lang/crates.io-index" 544 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 545 | 546 | [[package]] 547 | name = "yansi" 548 | version = "0.5.1" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" 551 | -------------------------------------------------------------------------------- /crates/percival/src/codegen.rs: -------------------------------------------------------------------------------- 1 | //! JavaScript dynamic code generation facilities for Percival. 2 | 3 | use std::{ 4 | collections::{BTreeMap, BTreeSet}, 5 | fmt::Display, 6 | rc::Rc, 7 | }; 8 | 9 | use rpds::{RedBlackTreeMap, RedBlackTreeSet}; 10 | use thiserror::Error; 11 | 12 | use crate::ast::{Aggregate, Clause, Literal, Program, Rule, Value}; 13 | 14 | const VAR_DEPS: &str = "__percival_deps"; 15 | const VAR_IMMUTABLE: &str = "__percival.Immutable"; 16 | const VAR_LOAD: &str = "__percival.load"; 17 | const VAR_AGGREGATES: &str = "__percival.aggregates"; 18 | const VAR_IMPORTS: &str = "__percival_imports"; 19 | 20 | const VAR_FIRST_ITERATION: &str = "__percival_first_iteration"; 21 | const VAR_OBJ: &str = "__percival_obj"; 22 | const VAR_GOAL: &str = "__percival_goal"; 23 | 24 | /// List of aggregate operators. Keep this in sync with `worker.ts`. 25 | const OPERATORS: [&str; 5] = ["count", "sum", "mean", "min", "max"]; 26 | 27 | /// An error during code generation. 28 | #[derive(Error, Debug)] 29 | pub enum Error { 30 | /// A given variable was not found in context. 31 | #[error("Could not find definition of `{0:?}` in context")] 32 | UndefVar(VarId), 33 | 34 | /// Two conflicting imports were found with the same name. 35 | #[error("Multiple imports found with name \"{0}\"")] 36 | DuplicateImport(String), 37 | 38 | /// Tried to put an import on the left-hand side of a rule. 39 | #[error("Imported relation \"{0}\" cannot be used as the goal of a rule")] 40 | GoalImportConflict(String), 41 | 42 | /// Import protocol not understood in directive. 43 | #[error("Unknown import protocol \"{0}\"")] 44 | UnknownProtocol(String), 45 | 46 | /// Two conflicting variables were defined with the same name. 47 | #[error("Conflicting declaration of variable \"{0}\"")] 48 | DuplicateVariable(String), 49 | 50 | /// Unknown aggregate operator was referenced. 51 | #[error("Aggregate operator \"{0}\" is not in {OPERATORS:?}")] 52 | UnknownAggregate(String), 53 | 54 | /// Aggregate references relation that is declared in this cell. 55 | #[error("Relation \"{0}\" is queried in the same cell that it is declared")] 56 | CircularReference(String), 57 | } 58 | 59 | /// Result returned by the compiler. 60 | pub type Result = std::result::Result; 61 | 62 | /// An index created on a subset of relation fields. 63 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 64 | pub struct Index { 65 | /// Name of the relation being indexed. 66 | name: String, 67 | 68 | /// Bound fields of the relation. 69 | bound: BTreeSet, 70 | } 71 | 72 | /// Abstract identifier for variables stored in JavaScript objects. 73 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 74 | pub enum VarId { 75 | /// Active sets of current relations. 76 | Set(String), 77 | 78 | /// Index maps of current relations. 79 | Index(Index), 80 | 81 | /// Updated relations in the current iteration. 82 | Update(String), 83 | 84 | /// Updates to index maps of current relations. 85 | IndexUpdate(Index), 86 | 87 | /// New relations in the current iteration. 88 | New(String), 89 | 90 | /// A bound local variable in Datalog. 91 | Var(String), 92 | } 93 | 94 | /// Context storing mappings of [`VarId`] to their JavaScript identifiers. 95 | /// 96 | /// This is implemented using a persistent data structure, so it can be cheaply 97 | /// cloned to produce nested subcontexts. 98 | #[derive(Clone, Debug)] 99 | struct Context { 100 | map: RedBlackTreeMap, 101 | deps: Rc>, 102 | results: Rc>, 103 | imports: Rc>, 104 | counter: u32, 105 | } 106 | 107 | impl Context { 108 | fn new(prog: &Program) -> Self { 109 | Context { 110 | map: RedBlackTreeMap::new(), 111 | deps: Rc::new(prog.deps()), 112 | results: Rc::new(prog.results()), 113 | imports: Rc::new(prog.imports()), 114 | counter: 0, 115 | } 116 | } 117 | 118 | /// Produce a new, globally unique symbol for compilation. 119 | fn gensym(&mut self, key: &str) -> String { 120 | let counter = self.counter; 121 | self.counter += 1; 122 | format!("__percival_{}_{}", key, counter) 123 | } 124 | 125 | /// Get an entry of the map. 126 | fn get(&self, key: &VarId) -> Result { 127 | self.map 128 | .get(key) 129 | .map(String::clone) 130 | .ok_or_else(|| Error::UndefVar(key.clone())) 131 | } 132 | 133 | /// Add a new entry to the map, returning a new map. 134 | fn add(&self, key: VarId, value: String) -> Self { 135 | if self.map.contains_key(&key) { 136 | panic!("Tried to add duplicate key {:?} to context", key); 137 | } 138 | Self { 139 | map: self.map.insert(key, value), 140 | ..self.clone() 141 | } 142 | } 143 | 144 | /// Check is a fact value is bound or free, given the current context. 145 | fn is_bound(&self, value: &Value) -> bool { 146 | match value { 147 | Value::Id(id) => self.map.contains_key(&VarId::Var(id.clone())), 148 | Value::Literal(_) | Value::Expr(_) | Value::Aggregate(_) => true, 149 | } 150 | } 151 | } 152 | 153 | /// Generates a JavaScript function body that evaluates the program. 154 | pub fn compile(prog: &Program) -> Result { 155 | let ctx = make_global_context(prog)?; 156 | let code = [ 157 | cmp_imports(prog)?, 158 | cmp_decls(&ctx)?, 159 | cmp_main_loop(&ctx, prog)?, 160 | cmp_output(&ctx)?, 161 | ]; 162 | Ok(code.join("\n")) 163 | } 164 | 165 | fn make_global_context(prog: &Program) -> Result { 166 | let mut ctx = Context::new(prog); 167 | 168 | if ctx.imports.len() < prog.imports.len() { 169 | // Some duplicate import during parsing, find and return it. 170 | let mut names = BTreeSet::new(); 171 | for import in &prog.imports { 172 | if names.contains(&import.name) { 173 | return Err(Error::DuplicateImport(import.name.clone())); 174 | } 175 | names.insert(import.name.clone()); 176 | } 177 | unreachable!("At least one import must be duplicated"); 178 | } 179 | 180 | for name in Rc::clone(&ctx.imports).iter() { 181 | if ctx.results.contains(name) { 182 | return Err(Error::GoalImportConflict(name.clone())); 183 | } 184 | let set_name = ctx.gensym(name); 185 | ctx = ctx.add(VarId::Set(name.clone()), set_name); 186 | } 187 | 188 | for name in Rc::clone(&ctx.deps).iter() { 189 | let set_name = ctx.gensym(name); 190 | ctx = ctx.add(VarId::Set(name.clone()), set_name); 191 | } 192 | 193 | for name in Rc::clone(&ctx.results).iter() { 194 | let set_name = ctx.gensym(name); 195 | let update_name = ctx.gensym(&format!("{}_update", name)); 196 | ctx = ctx 197 | .add(VarId::Set(name.clone()), set_name) 198 | .add(VarId::Update(name.clone()), update_name); 199 | } 200 | 201 | for index in make_indices(prog) { 202 | let index_name = ctx.gensym(&format!("{}_index", index.name)); 203 | ctx = ctx.add(VarId::Index(index.clone()), index_name); 204 | if ctx.results.contains(&index.name) { 205 | let update_name = ctx.gensym(&format!("{}_index_update", index.name)); 206 | ctx = ctx.add(VarId::IndexUpdate(index), update_name); 207 | } 208 | } 209 | 210 | Ok(ctx) 211 | } 212 | 213 | fn make_indices(prog: &Program) -> BTreeSet { 214 | fn walk_clause<'a>( 215 | indices: &mut BTreeSet, 216 | vars: &mut RedBlackTreeSet<&'a str>, 217 | clause: &'a Clause, 218 | ) { 219 | match clause { 220 | Clause::Fact(fact) => { 221 | for value in fact.props.values() { 222 | walk_value(indices, vars, value); 223 | } 224 | let mut bound = BTreeSet::new(); 225 | for (key, value) in &fact.props { 226 | match value { 227 | Value::Id(id) => { 228 | if vars.contains(&id[..]) { 229 | bound.insert(key.to_owned()); 230 | } else { 231 | *vars = vars.insert(id); 232 | } 233 | } 234 | Value::Literal(_) | Value::Expr(_) | Value::Aggregate(_) => { 235 | bound.insert(key.to_owned()); 236 | } 237 | } 238 | } 239 | if !bound.is_empty() { 240 | indices.insert(Index { 241 | name: fact.name.clone(), 242 | bound, 243 | }); 244 | } 245 | } 246 | Clause::Expr(_) => (), 247 | Clause::Binding(_, value) => { 248 | walk_value(indices, vars, value); 249 | } 250 | } 251 | } 252 | 253 | fn walk_clauses<'a>( 254 | indices: &mut BTreeSet, 255 | vars: &mut RedBlackTreeSet<&'a str>, 256 | clauses: &'a [Clause], 257 | ) { 258 | for clause in clauses { 259 | walk_clause(indices, vars, clause); 260 | } 261 | } 262 | 263 | fn walk_value(indices: &mut BTreeSet, vars: &RedBlackTreeSet<&str>, value: &Value) { 264 | if let Value::Aggregate(aggregate) = value { 265 | let mut vars = vars.clone(); 266 | walk_clauses(indices, &mut vars, &aggregate.subquery); 267 | walk_value(indices, &vars, &aggregate.value); 268 | } 269 | } 270 | 271 | fn walk_rule(indices: &mut BTreeSet, rule: &Rule) { 272 | let mut vars = RedBlackTreeSet::new(); 273 | walk_clauses(indices, &mut vars, &rule.clauses); 274 | for value in rule.goal.props.values() { 275 | walk_value(indices, &vars, value); 276 | } 277 | } 278 | 279 | let mut indices = BTreeSet::new(); 280 | for rule in &prog.rules { 281 | walk_rule(&mut indices, rule); 282 | } 283 | indices 284 | } 285 | 286 | fn cmp_imports(prog: &Program) -> Result { 287 | if prog.imports.is_empty() { 288 | return Ok("".into()); 289 | } 290 | let mut fields = Vec::new(); 291 | for import in &prog.imports { 292 | let index = import.uri.find("://"); 293 | let index = index.ok_or_else(|| Error::UnknownProtocol("".into()))?; 294 | let (protocol, address) = import.uri.split_at(index + 3); 295 | let url = match protocol { 296 | "http://" | "https://" => import.uri.clone(), 297 | "gh://" => format!("https://cdn.jsdelivr.net/gh/{}", address), 298 | "npm://" => format!("https://cdn.jsdelivr.net/npm/{}", address), 299 | _ => return Err(Error::UnknownProtocol(protocol.into())), 300 | }; 301 | fields.push(format!( 302 | "{}: await {}(\"{}\"),\n", 303 | import.name, VAR_LOAD, url, 304 | )); 305 | } 306 | Ok(format!( 307 | "const {} = {{\n{}}};", 308 | VAR_IMPORTS, 309 | fields.join(""), 310 | )) 311 | } 312 | 313 | fn cmp_decls(ctx: &Context) -> Result { 314 | let mut decls = Vec::new(); 315 | for (id, js_name) in &ctx.map { 316 | match id { 317 | VarId::Set(name) | VarId::Update(name) => { 318 | decls.push(format!("let {} = {}.Set();", js_name, VAR_IMMUTABLE)); 319 | if ctx.deps.contains(name) || ctx.imports.contains(name) { 320 | // Initialize sets - need to move to Immutable.Map objects. 321 | let source = if ctx.deps.contains(name) { 322 | VAR_DEPS 323 | } else { 324 | VAR_IMPORTS 325 | }; 326 | let init_set = format!( 327 | " 328 | {v} = {v}.withMutations({v} => {{ 329 | for (const {obj} of {source}.{name}) {{ 330 | {v}.add({imm}.Map({obj})); 331 | }} 332 | }}); 333 | ", 334 | v = js_name, 335 | obj = VAR_OBJ, 336 | source = source, 337 | imm = VAR_IMMUTABLE, 338 | name = name, 339 | ); 340 | decls.push(init_set.trim().into()); 341 | } 342 | } 343 | VarId::Index(index) => { 344 | decls.push(format!("let {} = {}.Map();", js_name, VAR_IMMUTABLE)); 345 | if ctx.deps.contains(&index.name) || ctx.imports.contains(&index.name) { 346 | // Initialize index in the declarations. 347 | let source = if ctx.deps.contains(&index.name) { 348 | VAR_DEPS 349 | } else { 350 | VAR_IMPORTS 351 | }; 352 | let init_index = format!( 353 | " 354 | {v} = {v}.withMutations({v} => {{ 355 | for (const {obj} of {source}.{name}) {{ 356 | {v}.update({imm}.Map({bindings}), value => {{ 357 | if (value === undefined) value = []; 358 | value.push({imm}.Map({obj})); 359 | return value; 360 | }}); 361 | }} 362 | }});", 363 | v = js_name, 364 | obj = VAR_OBJ, 365 | source = source, 366 | imm = VAR_IMMUTABLE, 367 | name = index.name, 368 | bindings = cmp_object(&index.bound, |field| { 369 | Ok(format!("{}.{}", VAR_OBJ, field)) 370 | })?, 371 | ); 372 | decls.push(init_index.trim().into()); 373 | } 374 | } 375 | _ => (), 376 | } 377 | } 378 | Ok(decls.join("\n")) 379 | } 380 | 381 | fn cmp_main_loop(ctx: &Context, prog: &Program) -> Result { 382 | let updates = cmp_updates(ctx)?; 383 | let (ctx, new_decls) = cmp_new_decls(ctx); 384 | let rules = cmp_rules(&ctx, prog)?; 385 | let set_update_to_new = cmp_set_update_to_new(&ctx)?; 386 | let main_loop = format!( 387 | " 388 | let {first_iter} = true; 389 | while ({first_iter} || !({no_updates})) {{ 390 | {updates} 391 | {new_decls} 392 | {rules} 393 | {set_update_to_new} 394 | {first_iter} = false; 395 | }}", 396 | first_iter = VAR_FIRST_ITERATION, 397 | no_updates = ctx 398 | .results 399 | .iter() 400 | .map(|name| format!( 401 | "{}.size === 0 && ", 402 | ctx.get(&VarId::Update(name.into())) 403 | .expect("could not find name in main loop no_updates") 404 | )) 405 | .collect::>() 406 | .join("") 407 | + "true", 408 | updates = updates, 409 | new_decls = new_decls, 410 | rules = rules, 411 | set_update_to_new = set_update_to_new, 412 | ); 413 | Ok(main_loop.trim().into()) 414 | } 415 | 416 | fn cmp_updates(ctx: &Context) -> Result { 417 | let mut updates = Vec::new(); 418 | for (id, js_name) in &ctx.map { 419 | match id { 420 | VarId::Update(name) => { 421 | updates.push(format!( 422 | "{v} = {v}.merge({upd});", 423 | v = ctx.get(&VarId::Set(name.into()))?, 424 | upd = js_name, 425 | )); 426 | } 427 | VarId::Index(index) if ctx.results.contains(&index.name) => { 428 | let upd_name = ctx.get(&VarId::Update(index.name.clone()))?; 429 | let ind_upd_name = ctx.get(&VarId::IndexUpdate(index.clone()))?; 430 | let code = format!( 431 | " 432 | {v} = {v}.asMutable(); 433 | let {ind_upd} = {imm}.Map().asMutable(); 434 | for (const {obj} of {upd}) {{ 435 | const key = {imm}.Map({key}); 436 | {v}.update(key, value => {{ 437 | if (value === undefined) value = []; 438 | value.push({obj}); 439 | return value; 440 | }}); 441 | {ind_upd}.update(key, value => {{ 442 | if (value === undefined) value = []; 443 | value.push({obj}); 444 | return value; 445 | }}); 446 | }} 447 | {v} = {v}.asImmutable(); 448 | {ind_upd} = {ind_upd}.asImmutable(); 449 | ", 450 | imm = VAR_IMMUTABLE, 451 | obj = VAR_OBJ, 452 | v = js_name, 453 | upd = upd_name, 454 | ind_upd = ind_upd_name, 455 | key = cmp_object(&index.bound, |field| { 456 | Ok(format!("{}.get('{}')", VAR_OBJ, field)) 457 | })?, 458 | ); 459 | updates.push(code.trim().into()); 460 | } 461 | _ => (), 462 | } 463 | } 464 | Ok(updates.join("\n")) 465 | } 466 | 467 | fn cmp_new_decls(ctx: &Context) -> (Context, String) { 468 | let mut ctx = ctx.clone(); 469 | let mut decls = Vec::new(); 470 | for result in Rc::clone(&ctx.results).iter() { 471 | let name = ctx.gensym(&format!("{}_new", result)); 472 | decls.push(format!( 473 | "const {} = {}.Set().asMutable();", 474 | name, VAR_IMMUTABLE, 475 | )); 476 | ctx = ctx.add(VarId::New(result.clone()), name); 477 | } 478 | (ctx, decls.join("\n")) 479 | } 480 | 481 | fn cmp_rules(ctx: &Context, prog: &Program) -> Result { 482 | Ok(prog 483 | .rules 484 | .iter() 485 | .map(|rule| cmp_rule(ctx, rule)) 486 | .collect::>>()? 487 | .join("\n")) 488 | } 489 | 490 | /// Compile a single Datalog rule into a collection of loops. 491 | fn cmp_rule(ctx: &Context, rule: &Rule) -> Result { 492 | let fact_positions: Vec<_> = rule 493 | .clauses 494 | .iter() 495 | .enumerate() 496 | .filter_map(|(i, clause)| match clause { 497 | Clause::Fact(fact) if ctx.results.contains(&fact.name) => Some(i), 498 | _ => None, 499 | }) 500 | .collect(); 501 | 502 | if fact_positions.is_empty() { 503 | // Will not change, so we only need to evaluate it once 504 | let eval_loop = cmp_rule_incremental(ctx, rule, None)?; 505 | Ok(format!( 506 | "if ({first_iter}) {{\n{eval_loop}\n}}", 507 | first_iter = VAR_FIRST_ITERATION, 508 | eval_loop = eval_loop 509 | )) 510 | } else { 511 | // Rule has one or more facts, so we use semi-naive evaluation 512 | let variants = fact_positions 513 | .into_iter() 514 | .map(|update_position| cmp_rule_incremental(ctx, rule, Some(update_position))) 515 | .collect::>>()?; 516 | Ok(variants.join("\n")) 517 | } 518 | } 519 | 520 | /// Compile a single incremental semi-naive evaluation loop for a rule. 521 | fn cmp_rule_incremental( 522 | ctx: &Context, 523 | rule: &Rule, 524 | update_position: Option, 525 | ) -> Result { 526 | let mut ctx = ctx.clone(); 527 | 528 | let mut clauses = Vec::new(); 529 | for (i, clause) in rule.clauses.iter().enumerate() { 530 | let only_update = update_position == Some(i); 531 | clauses.push(cmp_clause(&mut ctx, clause, only_update, false)?); 532 | } 533 | 534 | let goal = format!( 535 | " 536 | const {goal} = {imm}.Map({goal_obj}); 537 | if (!{set}.includes({goal})) {new}.add({goal}); 538 | ", 539 | goal = VAR_GOAL, 540 | imm = VAR_IMMUTABLE, 541 | goal_obj = cmp_fields(&ctx, &rule.goal.props)?, 542 | set = ctx.get(&VarId::Set(rule.goal.name.clone())).unwrap(), 543 | new = ctx.get(&VarId::New(rule.goal.name.clone())).unwrap(), 544 | ); 545 | 546 | let mut code = String::from("{\n"); 547 | for clause in &clauses { 548 | code += clause; 549 | code += "\n"; 550 | } 551 | code += goal.trim(); 552 | code += &"\n}".repeat(clauses.len() + 1); 553 | Ok(code) 554 | } 555 | 556 | fn cmp_clause( 557 | ctx: &mut Context, 558 | clause: &Clause, 559 | only_update: bool, 560 | is_subquery: bool, 561 | ) -> Result { 562 | match clause { 563 | Clause::Fact(fact) => { 564 | if is_subquery && ctx.results.contains(&fact.name) { 565 | return Err(Error::CircularReference(fact.name.clone())); 566 | } 567 | 568 | let mut bound_fields = BTreeMap::new(); 569 | let mut setters = Vec::new(); 570 | for (key, value) in &fact.props { 571 | if ctx.is_bound(value) { 572 | bound_fields.insert(key.clone(), value.clone()); 573 | } else { 574 | match value { 575 | Value::Id(id) => { 576 | // Use the same name for the variable in JavaScript. 577 | let name = id.clone(); 578 | setters.push(format!("const {} = {}.get('{}');", name, VAR_OBJ, key)); 579 | *ctx = ctx.add(VarId::Var(id.clone()), name); 580 | } 581 | Value::Literal(_) | Value::Expr(_) | Value::Aggregate(_) => { 582 | unreachable!("literal and expression values are always bound") 583 | } 584 | } 585 | } 586 | } 587 | 588 | if bound_fields.is_empty() { 589 | // No bound fields, just iterate over the set. 590 | let name = fact.name.clone(); 591 | let set = ctx.get(&if !only_update { 592 | VarId::Set(name) 593 | } else { 594 | VarId::Update(name) 595 | })?; 596 | 597 | let code = format!( 598 | " 599 | for (const {obj} of {set}) {{ 600 | {setters} 601 | ", 602 | obj = VAR_OBJ, 603 | set = set, 604 | setters = setters.join("\n"), 605 | ); 606 | Ok(code.trim().into()) 607 | } else { 608 | // At least one field is bound, so we use an index instead. 609 | let index = Index { 610 | name: fact.name.clone(), 611 | bound: bound_fields.keys().cloned().collect(), 612 | }; 613 | let index = ctx.get(&if !only_update { 614 | VarId::Index(index) 615 | } else { 616 | VarId::IndexUpdate(index) 617 | })?; 618 | 619 | let code = format!( 620 | " 621 | for (const {obj} of {index}.get({imm}.Map({bindings})) ?? []) {{ 622 | {setters} 623 | ", 624 | obj = VAR_OBJ, 625 | imm = VAR_IMMUTABLE, 626 | index = index, 627 | bindings = cmp_fields(ctx, &bound_fields)?, 628 | setters = setters.join("\n"), 629 | ); 630 | Ok(code.trim().into()) 631 | } 632 | } 633 | 634 | Clause::Expr(expr) => { 635 | assert!(!only_update); 636 | Ok(format!("if ({}) {{", expr)) 637 | } 638 | 639 | Clause::Binding(name, value) => { 640 | assert!(!only_update); 641 | let key = VarId::Var(name.clone()); 642 | if ctx.map.contains_key(&key) { 643 | return Err(Error::DuplicateVariable(name.clone())); 644 | } 645 | *ctx = ctx.add(VarId::Var(name.clone()), name.clone()); 646 | Ok(format!("{{\nconst {} = {};", name, cmp_value(ctx, value)?)) 647 | } 648 | } 649 | } 650 | 651 | fn cmp_fields(ctx: &Context, props: &BTreeMap) -> Result { 652 | cmp_object(props.keys(), |key| { 653 | let value = props.get(key).unwrap(); 654 | cmp_value(ctx, value) 655 | }) 656 | } 657 | 658 | fn cmp_value(ctx: &Context, value: &Value) -> Result { 659 | Ok(match value { 660 | Value::Id(id) => ctx.get(&VarId::Var(id.clone()))?, 661 | Value::Literal(Literal::Number(n)) => n.clone(), 662 | Value::Literal(Literal::String(s)) => format!("\"{}\"", s), 663 | Value::Literal(Literal::Boolean(b)) => b.to_string(), 664 | Value::Expr(e) => format!("({})", e), 665 | Value::Aggregate(aggregate) => cmp_aggregate(ctx, aggregate)?, 666 | }) 667 | } 668 | 669 | fn cmp_aggregate(ctx: &Context, aggregate: &Aggregate) -> Result { 670 | if !OPERATORS.contains(&&aggregate.operator[..]) { 671 | return Err(Error::UnknownAggregate(aggregate.operator.clone())); 672 | } 673 | let mut ctx = ctx.clone(); // Create a new context for this aggregate. 674 | let results_var = ctx.gensym("results"); 675 | 676 | let subquery_loop = { 677 | let mut clauses = Vec::new(); 678 | for clause in &aggregate.subquery { 679 | clauses.push(cmp_clause(&mut ctx, clause, false, true)?); 680 | } 681 | 682 | let goal = format!( 683 | "{results}.push({value});", 684 | results = results_var, 685 | value = cmp_value(&ctx, &aggregate.value)?, 686 | ); 687 | 688 | let mut code = String::new(); 689 | for clause in &clauses { 690 | code += clause; 691 | code += "\n"; 692 | } 693 | code += &goal; 694 | code += &"\n}".repeat(clauses.len()); 695 | code 696 | }; 697 | 698 | let code = format!( 699 | "{agg}.{op}((() => {{ 700 | const {results} = []; 701 | {subquery_loop} 702 | return {results}; 703 | }})())", 704 | agg = VAR_AGGREGATES, 705 | op = aggregate.operator, 706 | results = results_var, 707 | subquery_loop = subquery_loop, 708 | ); 709 | 710 | Ok(code) 711 | } 712 | 713 | fn cmp_set_update_to_new(ctx: &Context) -> Result { 714 | let setters = ctx 715 | .results 716 | .iter() 717 | .map(|name| { 718 | Ok(format!( 719 | "{} = {}.asImmutable();", 720 | ctx.get(&VarId::Update(name.clone()))?, 721 | ctx.get(&VarId::New(name.clone()))?, 722 | )) 723 | }) 724 | .collect::>>()?; 725 | Ok(setters.join("\n")) 726 | } 727 | 728 | fn cmp_output(ctx: &Context) -> Result { 729 | let outputs: BTreeSet = ctx 730 | .results 731 | .iter() 732 | .chain(ctx.imports.iter()) 733 | .cloned() 734 | .collect(); 735 | let obj = cmp_object(&outputs, |name| { 736 | Ok(format!("{}.toJS()", ctx.get(&VarId::Set(name.clone()))?)) 737 | })?; 738 | Ok(format!("return {};", obj)) 739 | } 740 | 741 | fn cmp_object( 742 | fields: impl IntoIterator, 743 | value_fn: impl Fn(T) -> Result, 744 | ) -> Result { 745 | let fields = fields 746 | .into_iter() 747 | .map(|field| value_fn(field).map(|value| format!("{}: {}", field, value))) 748 | .collect::>>()?; 749 | Ok(format!("{{{}}}", fields.join(", "))) 750 | } 751 | --------------------------------------------------------------------------------