├── v1 ├── Makefile ├── Readme.md ├── rerp-tests.rkt ├── derp-tests.rkt ├── lazy-structs.rkt ├── rerp-core-alt.rkt ├── rerp-core.rkt ├── herp-core.rkt ├── memoization.rkt ├── derp-core.rkt ├── fixed-points.rkt ├── derp-optimize.rkt ├── compaction.rkt ├── derp-sugar-tests.rkt └── derp-sugar.rkt ├── v3 └── Readme.md ├── .vscode ├── settings.json └── launch.json ├── docs ├── Posts lattice for language equations.png ├── POMS.md ├── Conjunctive grammar with right context.md ├── Context free grammar.md ├── Regex.md ├── MOG.md ├── Conjunctive grammar.md ├── Boolean grammar.md ├── PEG.md ├── Regular expressions.md ├── Regular expressions with lookahead.md ├── Operations concept map.md └── Operations concept map.svg ├── v2 └── Readme.md ├── .devcontainer ├── Dockerfile └── devcontainer.json └── README.md /v1/Makefile: -------------------------------------------------------------------------------- 1 | 2 | clean: 3 | cleanup 4 | rm -rf compiled 5 | -------------------------------------------------------------------------------- /v1/Readme.md: -------------------------------------------------------------------------------- 1 | Code copied from http://www.ucombinator.org/projects/parsing/ 2 | -------------------------------------------------------------------------------- /v3/Readme.md: -------------------------------------------------------------------------------- 1 | Code is here: https://bitbucket.org/jbclements/derp-3/src/master/ -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "magic-racket.general.racketPath": "xvfb-run racket" 3 | } -------------------------------------------------------------------------------- /docs/Posts lattice for language equations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stereobooster/derp/main/docs/Posts lattice for language equations.png -------------------------------------------------------------------------------- /v2/Readme.md: -------------------------------------------------------------------------------- 1 | Code is here: 2 | 3 | - https://github.com/mattmight/derp2 4 | - https://matt.might.net/teaching/compilers/spring-2013/derp.html 5 | -------------------------------------------------------------------------------- /v1/rerp-tests.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | 3 | (require "rerp-core.rkt") 4 | 5 | ; Examples 6 | 7 | (define ab* (★ (∪ (token 'a) (token 'b)))) 8 | 9 | 10 | (recognizes? '(a b a b a) ab*) 11 | 12 | (recognizes? '(a b a c) ab*) 13 | 14 | -------------------------------------------------------------------------------- /docs/POMS.md: -------------------------------------------------------------------------------- 1 | # PARTIALLY ORDERED MULTISET CONTEXT-FREE GRAMMARS AND FREE-WORD-ORDER PARSING 2 | 3 | - [PARTIALLY ORDERED MULTISET CONTEXT-FREE GRAMMARS AND FREE-WORD-ORDER PARSING](https://www.eecs.harvard.edu/~shieber/Biblio/Papers/pomset-cfg.pdf) -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM racket/racket 2 | 3 | RUN apt-get update \ 4 | && apt-get -y install --no-install-recommends curl xvfb git build-essential xauth \ 5 | && apt-get autoremove -y \ 6 | && apt-get clean -y 7 | 8 | RUN raco pkg install --auto racket-langserver 9 | 10 | RUN echo 'export FONTCONFIG_PATH=/etc/fonts' >>~/.bashrc 11 | -------------------------------------------------------------------------------- /v1/derp-tests.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | 3 | (require "derivative-parsers-core.rkt") 4 | 5 | ; Examples 6 | 7 | (define ab* (∪ (∘ ab* (∪ (token 'a) (token 'b))) 8 | (ε (set '())))) 9 | 10 | (define ab*2 (∪ (∘ (∪ (token 'a) (token 'b)) 11 | ab*2) 12 | (ε (set '())))) 13 | 14 | 15 | (parse '(a b b a) ab*) 16 | 17 | (parse '(a b b a) ab*2) 18 | 19 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.1.0", 6 | "configurations": [ 7 | { 8 | "name": "Racket: Current File", 9 | "type": "rkt", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Racket", 3 | "build": { 4 | "context": "..", 5 | "dockerfile": "Dockerfile", 6 | }, 7 | // Set custom container specific settings.json values on container create 8 | "settings": { 9 | "terminal.integrated.defaultProfile.linux": "bash", 10 | "terminal.integrated.inheritEnv": false, 11 | "magicRacket.languageServer.command": "xvfb-run", 12 | "magicRacket.languageServer.arguments": [ 13 | "--auto-servernum", 14 | "racket", 15 | "--lib", 16 | "racket-langserver" 17 | ], 18 | }, 19 | // Add the IDs of extensions you want installed when the container is created 20 | "extensions": [ 21 | "evzen-wybitul.magic-racket", 22 | "andes.racket-repl" 23 | ], 24 | } -------------------------------------------------------------------------------- /docs/Conjunctive grammar with right context.md: -------------------------------------------------------------------------------- 1 | # Conjunctive grammar with context 2 | 3 | Barash and Okhotin proposed idea of contexts in context free grammars in 2012. See [Defining Contexts in Context-Free Grammars](https://www.semanticscholar.org/paper/Defining-Contexts-in-Context-Free-Grammars-Barash-Okhotin/f914cf1b9b4c879cd7becd1f490176e2b4a1583e). 4 | 5 | ## Brzozowski derivatives of conjunctive grammar with right context 6 | 7 | First of all, let's define "syntax sugar" for the right context $L_1 \cap \triangleright L_2$: 8 | 9 | $$ 10 | \begin{align} 11 | & L_1 \cap \triangleright L_2 = \\ 12 | & L_1 \cdot (\epsilon \cap \triangleright L_2) = \\ 13 | & L_1 \cdot \\& L_2 14 | \end{align} 15 | $$ 16 | 17 | $\\&$ - is positive lookahead from [REwLA](Regular%20expressions%20with%20lookahead.md). 18 | -------------------------------------------------------------------------------- /v1/lazy-structs.rkt: -------------------------------------------------------------------------------- 1 | (module lazy-structs 2 | racket/base 3 | 4 | (require (for-syntax racket/base)) 5 | (require racket/match) 6 | (require (for-syntax racket/match)) 7 | (require racket/promise) 8 | 9 | (provide define-lazy-struct) 10 | 11 | (define-syntax (define-lazy-struct stx) 12 | (syntax-case stx () 13 | [(_ name {field ...}) 14 | (with-syntax ([$name (datum->syntax #'name (gensym (syntax->datum #'name)))]) 15 | #'(begin 16 | 17 | (define-struct $name {field ...}) 18 | 19 | (define-match-expander name 20 | (syntax-rules () [(_ field ...) 21 | ; => 22 | ($name (app force field) ...)]) 23 | 24 | (syntax-rules () [(_ field ...) 25 | ; => 26 | ($name (delay field) ...)]))))]))) 27 | -------------------------------------------------------------------------------- /docs/Context free grammar.md: -------------------------------------------------------------------------------- 1 | # Context free grammar 2 | 3 | Idea of extending Kleene algebra with general recursion (Kleene star is iteration) was investigated by Dexter Kozen and Hans Leiß in 1990-s. See [Towards Kleene Algebra with Recursion](https://www.cis.uni-muenchen.de/download/publikationen/kar_csl-91.pdf). 4 | 5 | They also ntocied that regular expressions with general recursion correspond to context free languages. 6 | 7 | ## Brzozowski derivatives of CFG 8 | 9 | Matthew Might et al. in 2011 showed how to implement algorithm to parse CFG with Brzozowski derivative. See [Parsing with Derivatives](https://matt.might.net/papers/might2011derivatives.pdf). 10 | 11 | It uses the same definition of Brzozowski derivative as in [Regula expressions](./Regular%20expressions.md), except they don't use intersection (REE6) and complement (REE7). 12 | 13 | ## Notation 14 | 15 | - concatenation $L_1 \cdot L_2$. Might uses $L_1 \circ L_2$ 16 | - Kleene star $^*$. Might uses $^\star$ 17 | -------------------------------------------------------------------------------- /docs/Regex.md: -------------------------------------------------------------------------------- 1 | # Regex 2 | 3 | Regex is [regular expressions](Regular%20expressions.md) with backreferences. 4 | 5 | Backreferences are used to reference a captured value at matching time. Backreferences have the syntax `\n`, where `n` is the capture identifier. The backreference `\1` in pattern `([a-z])\1` is replaced with the substring matched by the capture `([a-z])`. 6 | 7 | Backreferences extend regular expressions by allowing the recognition of more than regular languages. As an example, the expression `(a+)(b+)\1\2` defines the language $\set{a^ib^ja^ib^j \mid i, j > 0 }$ which is not context-free. 8 | 9 | Backreferences can result in an exponential running time of the matching algorithm. This problem is unavoidable because the 3-SAT problem can be reduced to regexes with backreferences. Therefore, any matcher thus constructed is NP-Complete. 10 | 11 | From: [Converting regexes to Parsing Expression Grammars](https://www.inf.puc-rio.br/~roberto/docs/ry10-01.pdf). 12 | 13 | ## Related 14 | 15 | - https://github.com/goodmami/pe 16 | - https://rosie-lang.org/about/ 17 | - [Converting Regex to Parsing Expression Grammar with Captures](https://repository.lib.ncsu.edu/bitstream/handle/1840.20/38685/etd.pdf?sequence=1) 18 | - [Regular Expressions with Backreferences Re-examined]() 19 | - TODO: rename Regex to REwB 20 | -------------------------------------------------------------------------------- /docs/MOG.md: -------------------------------------------------------------------------------- 1 | # Multi-Ordered Grammars 2 | 3 | - [Parsing Multi-Ordered Grammars with the Gray Algorithm](https://npapoylias.gitlab.io/lands-project/Multi-Ordered-Grammars-Gray-Algorithm-Papoulias-PeerJ-PrePrint.pdf) 4 | - `MOG = PEG + Unordered + RecScopedOrdered` 5 | - `MOG = CFG + LAhead + Ordered + RecScopedOrdered` 6 | - [For and Against PEGs: Why we need Multi-Ordered Grammars](https://peerj.com/preprints/27358.pdf) 7 | 8 | ## Notation 9 | 10 | | Operator | Appears in | Description | 11 | | -------- | ------------------ | ------------------------------------------------ | 12 | | `AB` | MOG, EBNF, PEG, RE | Composition operator aka concatenation, sequence | 13 | | `?` | MOG, EBNF, PEG, RE | Optional operator | 14 | | `*` | MOG, EBNF, PEG, RE | Zero-or-more operator aka Kleene star | 15 | | `+` | MOG, EBNF, PEG, RE | One-or-more operator aka Kleene plus | 16 | | `()` | MOG, EBNF, PEG, RE | Grouping operator | 17 | | \| | MOG, EBNF, PEG, RE | Non-deterministic (unordered) choice operator | 18 | | `&` | MOG, PEG | Syntactic positive predicate | 19 | | `!` | MOG, PEG | Syntactic negative predicate | 20 | | \|\| | MOG (scoped), PEG | Deterministic (ordered) choice operator | 21 | | `/` | MOG | Self-recursive (ordered) choice operator | 22 | | `\` | MOG | Recursive (ordered) choice operator | 23 | -------------------------------------------------------------------------------- /v1/rerp-core-alt.rkt: -------------------------------------------------------------------------------- 1 | (module rerp-core 2 | racket 3 | 4 | ; Rerp Core implements the minimal core 5 | ; of a regular language recognizer. 6 | 7 | (provide (all-defined-out)) 8 | 9 | ; Atomic languages: 10 | (define-struct ∅ {}) ; empty set 11 | (define-struct ε {}) ; empty string 12 | (define-struct token {value}) ; exact terminal 13 | 14 | ; Compound languages: 15 | (define-struct ∪ {this that}) ; union 16 | (define-struct ∘ {left right}) ; concatenation 17 | (define-struct ★ {lang}) ; repetition 18 | 19 | ; Derivative: 20 | (define (D c L) 21 | (match L 22 | [(∅) (∅)] 23 | [(ε) (∅)] 24 | [(token a) (cond [(eqv? a c) (ε)] 25 | [else (∅)])] 26 | 27 | [(∪ L1 L2) (∪ (D c L1) 28 | (D c L2))] 29 | [(★ L1) (∘ (D c L1) L)] 30 | [(∘ L1 L2) (∪ (∘ (δ L1) (D c L2)) 31 | (∘ (D c L1) L2))])) 32 | 33 | ; Nullability: 34 | (define (nullable? L) 35 | (match L 36 | [(∅) #f] 37 | [(ε) #t] 38 | [(token _) #f] 39 | [(★ _) #t] 40 | [(∪ L1 L2) (or (nullable? L1) (nullable? L2))] 41 | [(∘ L1 L2) (and (nullable? L1) (nullable? L2))])) 42 | 43 | (define (δ L) 44 | (cond 45 | [(nullable? L) (ε)] 46 | [else (∅)])) 47 | 48 | ; Parse a list of tokens: 49 | (define (recognizes? w L) 50 | (if (null? w) 51 | (nullable? L) 52 | (recognizes? (cdr w) (D (car w) L))))) 53 | -------------------------------------------------------------------------------- /v1/rerp-core.rkt: -------------------------------------------------------------------------------- 1 | (module rerp-core 2 | racket 3 | 4 | ; Rerp Core implements the minimal core 5 | ; of a regular language recognizer. 6 | 7 | (provide (all-defined-out)) 8 | 9 | ; Atomic languages: 10 | (define-struct ∅ {}) ; empty set 11 | (define-struct ε {}) ; empty string 12 | (define-struct token {value}) ; exact terminal 13 | 14 | ; Compound languages: 15 | (define-struct ∪ {this that}) ; union 16 | (define-struct ∘ {left right}) ; concatenation 17 | (define-struct ★ {lang}) ; repetition 18 | (define-struct δ {lang}) ; nullability 19 | 20 | ; Derivative: 21 | (define (D c L) 22 | (match L 23 | [(∅) (∅)] 24 | [(ε) (∅)] 25 | [(δ _) (∅)] 26 | [(token a) (cond [(eqv? a c) (ε)] 27 | [else (∅)])] 28 | 29 | [(∪ L1 L2) (∪ (D c L1) 30 | (D c L2))] 31 | [(★ L1) (∘ (D c L1) L)] 32 | [(∘ L1 L2) (∪ (∘ (δ L1) (D c L2)) 33 | (∘ (D c L1) L2))])) 34 | 35 | ; Nullability: 36 | (define (nullable? L) 37 | (match L 38 | [(∅) #f] 39 | [(ε) #t] 40 | [(token _) #f] 41 | [(★ _) #t] 42 | [(δ L1) (nullable? L1)] 43 | [(∪ L1 L2) (or (nullable? L1) (nullable? L2))] 44 | [(∘ L1 L2) (and (nullable? L1) (nullable? L2))])) 45 | 46 | 47 | ; Parse a list of tokens: 48 | (define (recognizes? w L) 49 | (if (null? w) 50 | (nullable? L) 51 | (recognizes? (cdr w) (D (car w) L))))) 52 | -------------------------------------------------------------------------------- /docs/Conjunctive grammar.md: -------------------------------------------------------------------------------- 1 | # Conjunctive grammar 2 | 3 | Idea of Conjunctive grammar was proposed by Okhotin in 2001 as extension of CFG. See [conjunctive grammars](https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=b1b58648d9b644352116197be20b58f84e769c04). 4 | 5 | Conjunctive grammar can define non-context-free languages. For example, $\set{a^nb^nc^n \mid n \geq 0}$. The same example that [Ford uses for PEG](https://bford.info/pub/lang/peg.pdf) (see 3.4 Language Properties). 6 | 7 | $$ 8 | \begin{align} 9 | S &\rightarrow A\cdot B \cap D\cdot C \\ 10 | A &\rightarrow a\cdot A \cup \epsilon \\ 11 | B &\rightarrow b\cdot B\cdot c \cup \epsilon \\ 12 | C &\rightarrow c\cdot C \cup \epsilon \\ 13 | D &\rightarrow a\cdot D\cdot b \cup \epsilon \\ 14 | \end{align} 15 | $$ 16 | 17 | ## Brzozowski derivatives of conjunctive grammar 18 | 19 | Derivative can be defined the same way as for [Context free grammar](./Context%20free%20grammar.md), except we need to add rule for intersection (REE6) from [Regular expressions](./Regular%20expressions.md). 20 | 21 | It is possible to use the same definition because language equations with intersection also have the least solution (fixed point). See **Corollary 5.19 (CG - Least solution)** in [Extending context-free grammars with conjunction and negation, Astrid van der Jagt, 2021](https://www.cs.ru.nl/bachelors-theses/2021/Astrid_van_der_Jagt___4571037___Extending_context-free_grammars_with_conjunction_and_negation.pdf) 22 | 23 | ## Notation 24 | 25 | - concatenation $L_1 \cdot L_2$. Okhotin uses $L_1L_2$ 26 | - union (aka unordered choice) $\cup$. Okhotin uses $|$ 27 | - intersection $\cap$. Okhotin uses $\\&$ 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Moved to https://parsing.stereobooster.com/ 2 | 3 | # Parsing with derivatives 4 | 5 | [![](docs/Operations%20concept%20map.svg)](docs/Operations%20concept%20map.md) 6 | 7 | ## Innovation 8 | 9 | - Parsing with derivatives can easily support [Conjunctive grammar](docs/Conjunctive%20grammar.md), which can parse not context-free grammar, for example $\set{a^nb^nc^n \mid n \geq 0}$. This is not something innovative per se, but I haven't seen it mentioned anywhere in reviewed publications. 10 | - Parsing with derivatives can support [Conjunctive grammar with right context](docs/Conjunctive%20grammar%20with%20right%20context.md), if we add recursion to [REwLA](docs/Regular%20expressions%20with%20lookahead.md) 11 | - I showed how to express [PEG](docs/PEG.md) in terms of REwLA with recursion. Which allows to define derivative formula for PEG. 12 | - This allows to create variation of PEG with unordered choice. Related work: 13 | - [Parsing Expression Grammars with Unordered Choices](https://www.jstage.jst.go.jp/article/ipsjjip/25/0/25_975/_pdf) 14 | - By the nature of parsing with derivatives it doesn't have issues with left recursion. Related works: 15 | - [Left recursion in Parsing Expression Grammars](https://www.sciencedirect.com/science/article/pii/S0167642314000288) 16 | - [Packrat Parsers Can Support Left Recursion](https://web.cs.ucla.edu/~todd/research/pepm08.pdf) 17 | - [Direct Left-Recursive Parsing Expression Grammars](https://tratt.net/laurie/research/pubs/html/tratt__direct_left_recursive_parsing_expression_grammars/) 18 | - [More about left recursion in PEG](https://ceur-ws.org/Vol-2240/paper9.pdf) 19 | - It can open door to understand expressive power of PEG (it is not contained within context-free, but also can express some context-sensitive languages) 20 | -------------------------------------------------------------------------------- /v1/herp-core.rkt: -------------------------------------------------------------------------------- 1 | (module herp-core 2 | racket 3 | 4 | ; Herp Core implements the minimal core 5 | ; of a context-free language recognizer. 6 | 7 | (require "memoization.rkt") 8 | (require "fixed-points.rkt") 9 | (require "lazy-structs.rkt") 10 | 11 | (provide (all-defined-out)) 12 | 13 | ; Atomic languages: 14 | (define-struct ∅ {}) ; empty set 15 | (define-struct ε {}) ; empty string 16 | (define-struct token {value}) ; exact terminal 17 | 18 | ; Compound languages: 19 | (define-lazy-struct δ {lang}) ; nullability 20 | (define-lazy-struct ∪ {this that}) ; union 21 | (define-lazy-struct ∘ {left right}) ; concatenation 22 | (define-lazy-struct ★ {lang}) ; repetition 23 | 24 | ; Derivative: 25 | (define/memoize (D c L) 26 | #:order ([L #:eq] [c #:equal]) 27 | (match L 28 | [(∅) (∅)] 29 | [(ε) (∅)] 30 | [(δ _) (∅)] 31 | [(token a) (cond [(eqv? a c) (ε)] 32 | [else (∅)])] 33 | 34 | [(∪ L1 L2) (∪ (D c L1) 35 | (D c L2))] 36 | [(★ L1) (∘ (D c L1) L)] 37 | [(∘ L1 L2) (∪ (∘ (δ L1) (D c L2)) 38 | (∘ (D c L1) L2))])) 39 | 40 | ; Nullability: 41 | (define/fix (nullable? L) 42 | #:bottom #f 43 | (match L 44 | [(∅) #f] 45 | [(ε) #t] 46 | [(token _) #f] 47 | [(★ _) #t] 48 | [(δ L1) (nullable? L1)] 49 | [(∪ L1 L2) (or (nullable? L1) (nullable? L2))] 50 | [(∘ L1 L2) (and (nullable? L1) (nullable? L2))])) 51 | 52 | 53 | ; Parse a list of tokens: 54 | (define (recognizes? w L) 55 | (if (null? w) 56 | (nullable? L) 57 | (recognizes? (cdr w) (D (car w) L))))) 58 | -------------------------------------------------------------------------------- /v1/memoization.rkt: -------------------------------------------------------------------------------- 1 | (module memoization 2 | racket/base 3 | 4 | (provide define/memoize) 5 | 6 | (define-syntax make-weak-hash-trie 7 | (syntax-rules () 8 | [(_ #:eq eq ...) (make-weak-hasheq)] 9 | [(_ #:eqv eq ...) (make-weak-hasheqv)] 10 | [(_ #:equal eq ...) (make-weak-hash)])) 11 | 12 | (define-syntax weak-hash-trie-get! 13 | (syntax-rules () 14 | [(_ t [eq] [x] lazy-val) 15 | ; => 16 | (let ([$t t] 17 | [$x x]) 18 | (if (hash-has-key? $t $x) 19 | (hash-ref $t $x) 20 | (let ([val lazy-val]) 21 | (hash-set! $t $x val) 22 | val)))] 23 | 24 | [(_ t [eq1 eq2 eq3 ...] [x1 x2 x3 ...] lazy-val) 25 | ; => 26 | (let ([$t t]) 27 | (if (hash-has-key? t x1) 28 | (let ([t2 (hash-ref t x1)]) 29 | (weak-hash-trie-get! t2 [eq2 eq3 ...] [x2 x3 ...] lazy-val)) 30 | (let ([t2 (make-weak-hash-trie eq2 eq3 ...)]) 31 | (hash-set! t x1 t2) 32 | (weak-hash-trie-get! t2 [eq2 eq3 ...] [x2 x3 ...] lazy-val))))])) 33 | 34 | 35 | ; Define a function that is memoized by default: 36 | (define-syntax define/memoize 37 | (syntax-rules () 38 | [(_ (f [v eq] ...) body ...) 39 | ; => 40 | (define/memoize (f v ...) #:order ([v eq] ...) body ...)] 41 | 42 | [(_ (f v ...) #:order ([v* eq] ...) body ...) 43 | ; => 44 | (define f (let ((cache (make-weak-hash-trie eq ...)) 45 | ($f (lambda (v ...) (let ([v* v] ...) body ...)))) 46 | (lambda (v ...) 47 | (let ([v* v] ...) 48 | (weak-hash-trie-get! cache [eq ...] [v ...] ($f v ...))))))] 49 | 50 | [(_ (f v ...) body ...) 51 | ; => 52 | (define/memoize (f [v #:equal] ...) body ...)]))) 53 | -------------------------------------------------------------------------------- /docs/Boolean grammar.md: -------------------------------------------------------------------------------- 1 | # Boolean grammar 2 | 3 | Idea of boolean grammar was proposed by Okhotin in 2004 as extension of CFG and conjunctive grammar. See [boolean grammar](https://www.sciencedirect.com/science/article/pii/S0890540104001075). 4 | 5 | ## Brzozowski derivatives of boolean grammar 6 | 7 | It tempting to define derivative the same way as for [Context free grammar](./Context%20free%20grammar.md), except we need to add rules for intersection (REE6) and complement (REE7) from [Regular expressions](./Regular%20expressions.md). **But this won't work in general case**, because language equations with complement doesn't have least solution. See: **Lemma 6.7 (Complement operation is not Scott continuous)** in [Extending context-free grammars with conjunction and negation, Astrid van der Jagt, 2021](https://www.cs.ru.nl/bachelors-theses/2021/Astrid_van_der_Jagt___4571037___Extending_context-free_grammars_with_conjunction_and_negation.pdf) 8 | 9 | Okhotin also proposes "well-founded boolean grammar", which suppose to have unique solution. But this variation also has issues see Vassilis Kountouriotis, Christos Nomikos, and Panos Rondogiannis, "Well-founded semantics for Boolean grammars", 2009. 10 | 11 | ## Notation 12 | 13 | - concatenation $L_1 \cdot L_2$. Okhotin uses $L_1L_2$ 14 | - union (aka unordered choice) $\cup$. Okhotin uses $|$ 15 | - intersection $\cap$. Okhotin uses $\\&$ 16 | - complement $^c$. Okhotin uses $\lnot$ 17 | 18 | ## Related 19 | 20 | - [sbp: A Scannerless Boolean Parser](http://www.megacz.com/berkeley/research/papers/megacz,adam-sbp.a.scannerless.boolean.parser.pdf) 21 | - [Partial Derivatives of an Extended Regular Expression](https://www.researchgate.net/publication/220836274_Partial_Derivatives_of_an_Extended_Regular_Expression) 22 | - [Efficient Parsing with Derivatives and Zippers](https://infoscience.epfl.ch/record/287059?ln=en) 23 | - [Well-founded semantics for Boolean grammars](https://www.sciencedirect.com/science/article/pii/S0890540109001473) 24 | -------------------------------------------------------------------------------- /v1/derp-core.rkt: -------------------------------------------------------------------------------- 1 | (module derp-core 2 | racket 3 | 4 | (require "memoization.rkt") 5 | (require "fixed-points.rkt") 6 | (require "lazy-structs.rkt") 7 | 8 | (provide (all-defined-out)) 9 | 10 | ; Atomic parsers: 11 | (define-struct ∅ {}) ; empty set 12 | (define-struct ε {tree-set}) ; empty string 13 | (define-struct token {value?}) ; token class 14 | 15 | ; Compound parsers: 16 | (define-lazy-struct δ {lang}) 17 | (define-lazy-struct ∪ {this that}) 18 | (define-lazy-struct ∘ {left right}) 19 | (define-lazy-struct ★ {lang}) 20 | (define-lazy-struct → {lang reduce}) 21 | 22 | ; Derivative: 23 | (define/memoize (D c p) 24 | #:order ([p #:eq] [c #:equal]) 25 | (match p 26 | [(∅) (∅)] 27 | [(ε _) (∅)] 28 | [(δ _) (∅)] 29 | [(token p?) (cond 30 | [(p? c) (ε (set c))] 31 | [else (∅)])] 32 | 33 | [(∪ p1 p2) (∪ (D c p1) 34 | (D c p2))] 35 | [(★ p1) (∘ (D c p1) p)] 36 | [(→ p1 f) (→ (D c p1) f)] 37 | [(∘ p1 p2) (∪ (∘ (δ p1) (D c p2)) 38 | (∘ (D c p1) p2))])) 39 | 40 | ; Parsing null: 41 | (define/fix (parse-null p) 42 | #:bottom (set) 43 | (match p 44 | [(ε S) S] 45 | [(∅) (set)] 46 | [(δ p) (parse-null p)] 47 | [(token _) (set)] 48 | 49 | [(★ _) (set '())] 50 | [(∪ p1 p2) (set-union (parse-null p1) (parse-null p2))] 51 | [(∘ p1 p2) (for*/set ([t1 (parse-null p1)] 52 | [t2 (parse-null p2)]) 53 | (cons t1 t2))] 54 | [(→ p1 f) (for/set ([t (parse-null p1)]) 55 | (f t))])) 56 | 57 | ; Parse a list of tokens: 58 | (define (parse w p) 59 | (if (null? w) 60 | (parse-null p) 61 | (parse (cdr w) (D (car w) p))))) 62 | -------------------------------------------------------------------------------- /docs/PEG.md: -------------------------------------------------------------------------------- 1 | # Parsing Expression Grammar 2 | 3 | PEG was proposed by Ford in 2004. See [Parsing Expression Grammars: A Recognition-Based Syntactic Foundation](https://bford.info/pub/lang/peg.pdf). 4 | 5 | ## Brzozowski derivatives of PEG 6 | 7 | There were previous attempts to define derivative for PEG: 8 | 9 | - [Derivatives of Parsing Expression Grammars](https://arxiv.org/pdf/1405.4841.pdf), Aaron Moss, 2014 10 | - [Simplified Parsing Expression Derivatives](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7206630/), Aaron Moss, 2020 11 | - [Recognising and Generating Terms using Derivatives of Parsing Expression Grammars](https://www.semanticscholar.org/paper/Recognising-and-Generating-Terms-using-Derivatives-Garnock-Jones-Eslamimehr/b415bd943c4fd3458c60672dd9b277e4755cc6bc), Tony Garnock-Jones, Mahdi Eslamimehr, Alessandro Warth, 2018 12 | 13 | But the problem is that those definitions are incompatible with original Brzozowski definition. So I want to give definition of PEG in terms of extensions of context free grammars. 14 | 15 | - (PEG1) positive syntactic predicate: $\\&_{PEG}(L_1) \cdot L_2 = \\&(L_1 \cdot \Sigma^*) \cdot L_2$ 16 | - $\\&(L) = \\&_{PEG}(L \cdot \\&_{PEG}(\epsilon))$ 17 | - (PEG2) negative syntactic predicate: $!_{PEG}(L_1) \cdot L_2 = !(L_1 \cdot \Sigma^*) \cdot L_2$ 18 | - $!(L) = !{PEG}(L \cdot !_{PEG}(\Sigma))$ 19 | - $!_{PEG}(!_{PEG}(L)) = \\&_{PEG}(L)$ 20 | - (PEG3) prioritized choice: $L_1 / L_2 = L_1 \cup !_{PEG}(L_1) \cdot L_2 = L_1 \cup !(L_1 \cdot \Sigma^*) \cdot L_2$ 21 | 22 | Where `!` and `&` are operators from [REwLA](Regular%20expressions%20with%20lookahead.md). 23 | 24 | ## Notation 25 | 26 | - Any character: $\Sigma$. Ford uses `.` 27 | - Optional: $L \cup \epsilon$. Ford uses $L?$ 28 | - Concatenation (sequence): $L_1 \cdot L_2$. Ford uses $L_1L_2$ 29 | 30 | ## Related 31 | 32 | - [Parsing expression grammars, constructing a linear-time parser](https://www.cs.ru.nl/bachelors-theses/2017/Jan_Martens___s348435___Parsing-expression-grammars-constructing-a-linear-time-parser.pdf) 33 | - [The computational power of parsing expression grammars](https://arxiv.org/abs/1902.08272) 34 | - [Packrat Parsers Can Handle Practical Grammars in Mostly Constant Space](https://kmizu.github.io/papers/paste513-mizushima.pdf) 35 | -------------------------------------------------------------------------------- /v1/fixed-points.rkt: -------------------------------------------------------------------------------- 1 | (module fixed-points 2 | racket/base 3 | 4 | (provide define/fix) 5 | 6 | ; Generic tools: 7 | (define-syntax while 8 | (syntax-rules () 9 | [(_ cond body ...) 10 | ; => 11 | (letrec ((lp (λ () (when cond body ... (lp))))) 12 | (lp))])) 13 | 14 | ; Define a recursive (yet monotonic) function over 15 | ; a mutually recursive graph by computing its fixed 16 | ; point: 17 | (define-syntax define/fix 18 | (syntax-rules () 19 | [(_ (f x) #:bottom bottom body ...) 20 | ; => 21 | (define f (let ((cache (make-weak-hasheq)) 22 | (changed? (make-parameter 'error-changed)) 23 | (running? (make-parameter #f)) 24 | (visited (make-parameter 'error-visited))) 25 | (λ (x) 26 | (let ((cached? (hash-has-key? cache x)) 27 | (cached (hash-ref cache x (lambda () bottom))) 28 | (run? (running?))) 29 | (cond 30 | [(and cached? (not run?)) 31 | ; => 32 | cached] 33 | 34 | [(and run? (hash-has-key? (unbox (visited)) x)) 35 | ; => 36 | (if cached? cached bottom)] 37 | 38 | [run? 39 | ; => 40 | (hash-set! (unbox (visited)) x #t) 41 | (let ((new-val (begin body ...))) 42 | (when (not (equal? new-val cached)) 43 | (set-box! (changed?) #t) 44 | (hash-set! cache x new-val)) 45 | new-val)] 46 | 47 | [(and (not cached?) (not run?)) 48 | ; => 49 | (parameterize ([changed? (box #t)] 50 | [running? #t] 51 | [visited (box (make-weak-hasheq))]) 52 | (let ([v bottom]) 53 | (while (unbox (changed?)) 54 | (set-box! (changed?) #f) 55 | (set-box! (visited) (make-weak-hasheq)) 56 | (set! v (f x))) 57 | v))])))))]))) 58 | 59 | -------------------------------------------------------------------------------- /v1/derp-optimize.rkt: -------------------------------------------------------------------------------- 1 | (module derp-optimize 2 | racket 3 | 4 | (provide (all-defined-out)) 5 | 6 | (require (except-in "derp-core.rkt" ∅? ε?)) 7 | (require "memoization.rkt") 8 | (require "fixed-points.rkt") 9 | 10 | (define/fix (ε? L) 11 | #:bottom #t 12 | (match L 13 | [(∅) #f] 14 | [(ε _) #t] 15 | [(token _) #f] 16 | [(∪ L1 L2) (and (ε? L1) (ε? L2))] 17 | [(∘ L1 L2) (and (ε? L1) (ε? L2))] 18 | [(★ L1) (or (ε? L1) (∅? L1))] 19 | [(→ L1 _) (ε? L1)])) 20 | 21 | ; Compute the size of a set: 22 | (define (set-choose s) 23 | (define el #f) 24 | (for ([el* s]) 25 | (set! el el*)) 26 | el) 27 | 28 | ; Matches a language if it is *exactly* the empty string: 29 | (define-match-expander nullp 30 | (syntax-rules () 31 | [(_) (? ε?)] 32 | [(_ el) (and (? ε?) 33 | (app parse-null (and (app set-count 1) 34 | (app set-choose el))))])) 35 | 36 | ; Checks whether a language is the empty set: 37 | (define/fix (∅? L) 38 | #:bottom #f 39 | (match L 40 | [(∅) #t] 41 | [(ε _) #f] 42 | [(token _) #f] 43 | [(★ L1) #f] 44 | [(∪ L1 L2) (and (∅? L1) (∅? L2))] 45 | [(∘ L1 L2) (or (∅? L1) (∅? L2))] 46 | [(→ L1 _) (∅? L1)])) 47 | 48 | ; Optimizing compaction. 49 | ; (K L) is an equivalent, compacted version of L. 50 | (define/memoize (K [L #:eq]) 51 | (match L 52 | [(∅) L] 53 | [(ε _) L] 54 | [(? ∅?) (∅)] 55 | [(? ε?) (ε (parse-null L))] 56 | [(token _) L] 57 | 58 | [(★ (? ∅?)) (ε (set '()))] 59 | [(★ L) (★ (K L))] 60 | 61 | [(∪ (? ∅?) L2) (K L2)] 62 | [(∪ L1 (? ∅?)) (K L1)] 63 | 64 | [(∘ (nullp t) L2) (→ (K L2) (λ (w2) (cons t w2)))] 65 | [(∘ L1 (nullp t)) (→ (K L1) (λ (w1) (cons w1 t)))] 66 | 67 | [(∪ L1 L2) (∪ (K L1) (K L2))] 68 | [(∘ L1 L2) (∘ (K L1) (K L2))] 69 | 70 | [(→ (and e (? ε?)) f) 71 | (ε (for/set ([t (parse-null e)]) (f t)))] 72 | 73 | [(→ (∘ (nullp t) L2) f) (→ (K L2) (λ (w2) (f (cons t w2))))] 74 | [(→ (→ L f) g) (→ (K L) (compose g f))] 75 | [(→ L f) (→ (K L) f)])) 76 | 77 | (define (parse/compact w L #:compactor [compact K]) 78 | (if (null? w) 79 | (parse-null L) 80 | (parse/compact (cdr w) (compact L))))) 81 | 82 | 83 | -------------------------------------------------------------------------------- /docs/Regular expressions.md: -------------------------------------------------------------------------------- 1 | # Regular expressions 2 | 3 | Regular expressions correspond to regular languages in Chomsky hierarchy and DFA in automata theory. 4 | 5 | Regular expressions are based on work Kleene, Stephen Cole - ["Representation of Events in Nerve Nets and Finite Automata", 1951](https://www.rand.org/content/dam/rand/pubs/research_memoranda/2008/RM704.pdf). 6 | 7 | ## Brzozowski derivatives of regular expressions 8 | 9 | Janusz A. Brzozowski in 1964, proposed idea of derivative: ["Derivatives of Regular Expressions, doi:10.1145/321239.321249"](https://dl.acm.org/doi/pdf/10.1145/321239.321249). 10 | 11 | Let's define "nullability" function as: 12 | 13 | - $\delta(L) = \epsilon, \text{ if } \epsilon \in L$ 14 | - $\delta(L) = \emptyset, \text{ if } \epsilon \notin L$ 15 | 16 | Let's define it recursively: 17 | 18 | - RE1 $\delta(\epsilon) = \epsilon$ 19 | - RE2 $\delta(\emptyset) = \emptyset, \delta(x) = \emptyset, x \in \Sigma$ 20 | - RE3 $\delta(L_1 \cup L_2) = \delta(L_1) \cup \delta(L_2)$ 21 | - RE4 $\delta(L_1 \cdot L_2) = \delta(L_1) \cdot \delta(L_2)$ 22 | - RE5 $\delta(L^*) = \epsilon$ 23 | - REE6 $\delta(L_1 \cap L_2) = \delta(L_1) \cap \delta(L_2)$ 24 | - REE7 $\delta(L^c) = \epsilon, \text{ if } \delta(L) = \emptyset$ and $\delta(L^c) = \emptyset, \text{ if } \delta(L) = \epsilon$ 25 | 26 | Let's define derivative: 27 | 28 | - RE1 $D_a(a) = \epsilon$ 29 | - RE2 $D_a(b) = \emptyset, \text{ if } b = \emptyset, b = \epsilon, b \in \Sigma \text{ and } b \neq a$ 30 | - RE3 $D_a(L_1 \cup L_2) = D_a(L_1) \cup D_a(L_2)$ 31 | - RE4 $D_a(L_1 \cdot L_2) = D_a(L_1) \cdot L_2 \cup \delta(L_1) \cdot D_a(L_2)$ 32 | - RE5 $D_a(L^*) = D_a(L) \cdot L^*$ 33 | - REE6 $D_a(L_1 \cap L_2) = D_a(L_1) \cap D_a(L_2)$ 34 | - REE7 $D_a(L^c) = D_a(L)^c$ 35 | 36 | ## Notation 37 | 38 | - empty language: $\emptyset$. Brzozowski uses $\phi$ 39 | - null laguage: $\epsilon$. Brzozowski uses $\lambda$. In other literature sometimes they use $\varepsilon$ 40 | - concatenation: $L_1 \cdot L_2$. Brzozowski uses $L_1L_2$ 41 | - union (aka unordered choice): $\cup$. Brzozowski uses $+$. In other literature sometimes they use $|$ and $\lor$ 42 | - intersection: $\cap$. Brzozowski uses $\\&$. In other literature sometimes they use $\land$ 43 | - complement: $^c$. Brzozowski uses $'$. In other literature sometimes they use $^\complement$ or $^C$ or $\lnot$ 44 | - Any character: $\Sigma$. In Regex they use `.` 45 | - Kleene star $^*$. In Regex, PEG, MOG they use `*` 46 | - "Kleene plus" $L^+ = L \cdot L^*$. In Regex, PEG, MOG they use `+` 47 | 48 | ## Related 49 | 50 | - https://github.com/awalterschulze/regex-reexamined-coq/ 51 | -------------------------------------------------------------------------------- /docs/Regular expressions with lookahead.md: -------------------------------------------------------------------------------- 1 | # Regular Expressions with Lookahead 2 | 3 | - [Regular Expressions with Lookahead](https://www.researchgate.net/publication/351177928_Regular_Expressions_with_Lookahead) 4 | 5 | ## Brzozowski derivative 6 | 7 | [Derivatives of Regular Expressions with Lookahead](https://www.jstage.jst.go.jp/article/ipsjjip/27/0/27_422/_pdf) 8 | 9 | - RE1 $D_a(a) = \epsilon$ 10 | - RE2 $D_a(b) = \emptyset, \text{ if } b = \emptyset, b = \epsilon, b \in \Sigma \text{ and } b \neq a$ 11 | - RE3 $D_a(L_1 \cup L_2) = D_a(L_1) \cup D_a(L_2)$ 12 | - REL4 $D_{a}(L_{1} \cdot L_{2}) = D_{a}(L_{1}) \cdot L_{2} \cup D_{a}^\sim(L_{1})D_{a}(L_{2})$ 13 | - RE5 $D_a(L^*) = D_a(L) \cdot L^*$ 14 | - REE6 $D_a(L_1 \cap L_2) = D_a(L_1) \cap D_a(L_2)$ 15 | - REE7 $D_a(L^c) = D_a(L)^c$ 16 | - REL8 $D_{a}(!L) = \emptyset$ 17 | - REL9 $D_{a}(\\&L) = \emptyset$ 18 | 19 | They don't use nullabilty function ($\delta$), instead they use $D^\sim$: 20 | 21 | | | REwLA | | delta | 22 | | ---- | --------------------------------------------------------------------------- | ---- | ------------------------------------------------------- | 23 | | REL1 | $D_{a}^\sim(\epsilon) = \epsilon$ | RE1 | $\delta(\epsilon) = \epsilon$ | 24 | | REL2 | $D_{a}^\sim(\emptyset) = \emptyset$ | RE2 | $\delta(\emptyset) = \emptyset$ | 25 | | REL2 | $D_{a}^\sim(a) = \emptyset$ | RE2 | $\delta(a) = \emptyset$ | 26 | | REL3 | $D_{a}^\sim(L_{1} \cup L_{2}) = D_{a}^\sim(L_{1}) \cup D_{a}^\sim(L_{2})$ | RE3 | $\delta(L_1 \cup L_2) = \delta(L_1) \cup \delta(L_2)$ | 27 | | REL4 | $D_{a}^\sim(L_{1} \cdot L_{2}) = D_{a}^\sim(L_{1}) \cdot D_{a}^\sim(L_{2})$ | RE4 | $\delta(L_1 \cdot L_2) = \delta(L_1) \cdot \delta(L_2)$ | 28 | | REL5 | $D_{a}^\sim(L^*) = \epsilon$ | RE5 | $\delta(L^*) = \epsilon$ | 29 | | | | REE6 | $\delta(L_1 \cap L_2) = \delta(L_1) \cap \delta(L_2)$ | 30 | | | | REE7 | $\delta(L') =$ ... | 31 | | REL8 | $D_{a}^\sim(!L) = !(D_{a}(L) \cup D_{a}^\sim(L))$ | | | 32 | | REL9 | $D_{a}^\sim(\\&L) = \\&(D_{a}(L) \cup D_{a}^\sim(L))$ | | | 33 | -------------------------------------------------------------------------------- /v1/compaction.rkt: -------------------------------------------------------------------------------- 1 | ;; Compaction 2 | 3 | 4 | ; Nullability: 5 | (define/fix (nullable? l) 6 | #:bottom #f 7 | (match l 8 | [(∅) #f] 9 | [(ε _) #t] 10 | [(token _) #f] 11 | [(★ _) #t] 12 | [(δ p) (nullable? p)] 13 | [(∪ l1 l2) (or (nullable? l1) (nullable? l2))] 14 | [(∘ l1 l2) (and (nullable? l1) (nullable? l2))] 15 | [(→ l1 _) (nullable? l1)])) 16 | 17 | ; Compute the size of a set: 18 | (define (set-size s) 19 | (define size 0) 20 | (for ([_ s]) 21 | (set! size (+ size 1))) 22 | size) 23 | 24 | (define (singleton? s) 25 | (eqv? (set-size s) 1)) 26 | 27 | (define (set-choose s) 28 | (define el #f) 29 | (for ([el* s]) 30 | (set! el el*)) 31 | el) 32 | 33 | ; Checks whether a language is the empty string: 34 | (define/fix (is-null? l) 35 | #:bottom #t 36 | (match l 37 | [(∅) #f] 38 | [(ε _) #t] 39 | [(token _) #f] 40 | [(∪ l1 l2) (and (is-null? l1) (is-null? l2))] 41 | [(∘ l1 l2) (and (is-null? l1) (is-null? l2))] 42 | [(★ l1) (or (is-null? l1) (is-empty? l1))] 43 | [(→ l1 _) (is-null? l1)])) 44 | 45 | ; Matches a language if it is *exactly* the empty string: 46 | (define-match-expander nullp 47 | (syntax-rules () 48 | [(_) (app is-null? #t)] 49 | [(_ el) (and (app is-null? #t) (app parse-null (and (? singleton?) (app set-choose el))))])) 50 | 51 | ; Checks whether a language is the empty set: 52 | (define/fix (is-empty? l) 53 | #:bottom #t 54 | (match l 55 | [(∅) #t] 56 | [(ε _) #f] 57 | [(token _) #f] 58 | [(★ l1) #f] 59 | [(∪ l1 l2) (and (is-empty? l1) (is-empty? l2))] 60 | [(∘ l1 l2) (or (is-empty? l1) (is-empty? l2))] 61 | [(→ l1 _) (is-empty? l1)])) 62 | 63 | (define-match-expander emptyp 64 | (syntax-rules () 65 | [(_) (app is-empty? #t)])) 66 | 67 | 68 | 69 | ;;;; Optimizations for the grammar: 70 | 71 | (define/memoize (compact [l #:eq]) 72 | (match l 73 | [(∅) (∅)] 74 | [(ε S) (ε S)] 75 | [(emptyp) (∅)] 76 | [(nullp) (ε (parse-null l))] 77 | [(token _) l] 78 | 79 | [(★ (emptyp)) (ε (set '()))] 80 | [(★ l) (★ (compact l))] 81 | 82 | [(∪ (emptyp) l2) (compact l2)] 83 | [(∪ l1 (emptyp)) (compact l1)] 84 | 85 | [(∘ (nullp t) l2) (→ (compact l2) (lambda (w2) (cons t w2)))] 86 | [(∘ l1 (nullp t)) (→ (compact l1) (lambda (w1) (cons w1 t)))] 87 | 88 | [(∪ l1 l2) (∪ (compact l1) (compact l2))] 89 | [(∘ l1 l2) (∘ (compact l1) (compact l2))] 90 | 91 | [(→ (and e (nullp)) f) 92 | ; => 93 | (ε (for/set ([t (parse-null e)]) (f t)))] 94 | 95 | [(→ (∘ (nullp t) l2) f) 96 | ; => 97 | (→ (compact l2) (lambda (w2) (f (cons t w2))))] 98 | 99 | [(→ (→ l f) g) 100 | ; => 101 | (→ (compact l) (compose g f))] 102 | 103 | [(→ l f) (→ (compact l) f)])) 104 | 105 | -------------------------------------------------------------------------------- /docs/Operations concept map.md: -------------------------------------------------------------------------------- 1 | # Operations concept map 2 | 3 | Properly this diagram is called concept lattice (it comes from Formal Concept Analysis). It is very similar to Hasse diagram. 4 | 5 | ![](Operations%20concept%20map.svg) 6 | 7 | ## As table 8 | 9 | | | · | ∪ | \* | ∩ | ' | ⟲ | & | ! | / | 10 | | -------- | --- | --- | ----- | ----- | ----- | --- | --- | --- | ----- | 11 | | RE | x | x | x | | | | | | | 12 | | REE | x | x | x | x | x | | | | | 13 | | REwLA | x | x | x | x (4) | x (5) | | x | x | x (2) | 14 | | PEG | x | (6) | x (1) | (4) | (5) | x | x | x | x | 15 | | CFG | x | x | x (1) | | | x | | | | 16 | | Conj | x | x | x (1) | x | | x | | | | 17 | | Bool | x | x | x (1) | x | x | x | | | | 18 | | ConjCont | x | x | x (1) | x | | x | x | | | 19 | | ~MOG (3) | x | x | x (1) | (4) | (5) | x | x | x | x | 20 | 21 | - (1) Kleene star (`A*`) can be simulated with `S -> "" | S · A` 22 | - (2) Prioritised choice (`A / B`) can be simulated with `A ∪ !(A) · B` 23 | - (3) MOG has concept of "tainted" and scoped operators, this diagram doesn't take it into account 24 | 25 | **Note**: not taken in account in diagram: 26 | 27 | - (4) Intersection (`A ∩ B`) can be simulated with `&(A) · &(B) · (Σ* · &(ϵ))`. Is this correct for **PEG, MOG**? 28 | - (5) Complement (`A'`) can be simulated with `!(A) · (Σ* · !(Σ))`. Is this correct for **PEG, MOG**? 29 | - (6) Union `A ∪ b = (A' ∩ B')'`. Is this correct for **PEG**? 30 | - Negative lookahead (`!A`) can be simulated with positivie lookahead and complement `&A'` 31 | - Positivie lookahead (`&A`) can be simulated with negative lookahead `!!A` 32 | 33 | ## Abbreviations 34 | 35 | - [RE](Regular%20expressions.md) - Regular Expressions 36 | - [REE](Regular%20expressions.md) - Regular Expressions Extended 37 | - [REwLA](Regular%20expressions%20with%20lookahead.md) - Regular Expressions with Look Ahead 38 | - [PEG](PEG.md) - Parsing Expressions Grammar 39 | - [CFG](CFG.md) - Context-Free Grammar 40 | - [Conj](Conjunctive%20grammar.md) - Conjuctive grammar 41 | - [Bool](Boolean%20grammar.md) - Boolean grammar 42 | - [ConjCont](Conjunctive%20grammar%20with%20right%20context.md) - Conjuctive grammar with Context (right) 43 | - [MOG](MOG.md) - Multi Ordered Grammar 44 | 45 | ### Not in diagram 46 | 47 | - [Regex](Regex.md) - Regular Expressions with backreferences 48 | - TAG - Tree Adjoining Grammar 49 | - MG - Minimalist Grammar 50 | 51 | ## Operators 52 | 53 | - `·` - concatenation or sequence. Often ommited in noatation e.g. `AB` instead of `A · B` 54 | - `∪` - unordered or non-determenistic choice or union. Chomsky uses `|`. Brzozowski uses `+` 55 | - `*` - Kleene star or iteration 56 | - `∩` - intersection. Okhotin and Brzozowski use `&` 57 | - `'` - complement. Okhotin uses $\lnot$ 58 | - `⟲` - recursion. It is not an explicit operator, but rather "permision" to form recursion a la `S -> S` 59 | - `&` - positive lookahead or right context or positive syntatic predicate. Barash and Okhotin use $\triangleright$ 60 | - `!` - negative lookahead or negative syntatic predicate 61 | - `/` - ordered or determenistic or prioritized choice. In MOG they use `||` 62 | 63 | ### Not in diagram 64 | 65 | - `↑` - cut operator (from extension of [PEG](PEG.md)) 66 | - `||` - interleave operator (from [POMS](POMS.md)) 67 | - $\triangleleft$ - left context (from Conjuctive grammar with Context) 68 | - `\x` - backreference (from Regex) 69 | 70 | ## Related 71 | 72 | ![Post's lattice for language equations](Posts%20lattice%20for%20language%20equations.png) 73 | 74 | From [On language equations with concatenation and various sets of boolean operations](http://www.numdam.org/item/10.1051/ita/2015006.pdf) 75 | 76 | **TODO**: diagram of expressive power of grammars 77 | -------------------------------------------------------------------------------- /v1/derp-sugar-tests.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | 3 | (require rackunit) 4 | 5 | (require "derp-core.rkt") 6 | (require "derp-sugar.rkt") 7 | 8 | (check-pred 9 | (λ (L) 10 | (match L 11 | [(ε S) (equal? S (set 'a))] 12 | [else #f])) 13 | (D 'a (token (λ (t) (eqv? t 'a)))) (ε (set 'a))) 14 | 15 | (check-equal? 16 | (parse '(a) (token (λ (t) (eqv? t 'a)))) 17 | (set 'a)) 18 | 19 | (check-equal? 20 | (parse '(a) (lang 'a)) 21 | (set 'a)) 22 | 23 | (define ab* (lang (∪ (∘ (∪ 'a 'b) ab*) 24 | (ε '())))) 25 | 26 | (check-equal? 27 | (parse '(a b b) ab*) 28 | (set '(a b b))) 29 | 30 | (define ab*-rev (lang (∪ (@--> (list ab*-rev (∪ 'a 'b)) 31 | (λ (lst hd) (cons hd lst))) 32 | (ε '())))) 33 | 34 | (check-equal? 35 | (parse '(b a a) ab*-rev) 36 | (set '(a a b))) 37 | 38 | (define tag (lang `(< ,'tag-name >))) 39 | 40 | (check-equal? 41 | (parse '(< tag-name >) tag) 42 | (set '(< tag-name >))) 43 | 44 | 45 | (check-equal? 46 | (parse '(< id >) (lang (list! '< ,'id '>))) 47 | (set '(id))) 48 | 49 | (let ([literal->language 50 | (λ (lit) (token (λ (tok) (equal? tok 3))))]) 51 | (check-equal? 52 | (parse '(3 3 3 3) (lang (★ 400))) 53 | (set '(3 3 3 3)))) 54 | 55 | (define (numeric-literals lit) 56 | (match lit 57 | ['NUM (token number?)] 58 | [else (literal->language lit)])) 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | (define left-recursive-grammar 68 | (grammar 69 | #:start L 70 | #:literals numeric-literals 71 | [L (∪ (∘ L 'NUM) 72 | (ε))])) 73 | 74 | 75 | ;(parse '(1 2 3) left-recursive-grammar) 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | (define right-recursive-grammar 84 | (grammar 85 | #:start L 86 | #:literals numeric-literals 87 | [L (∪ (∘ 'NUM L) 88 | (ε))])) 89 | 90 | ;(parse '(1 2 3) right-recursive-grammar) 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | (define hidden-left-recursive-grammar 102 | (grammar 103 | #:start A 104 | #:literals numeric-literals 105 | [A (∪ (∘ B 'NUM) 106 | (ε))] 107 | [B A])) 108 | 109 | ;(parse '(1 2 3) hidden-left-recursive-grammar) 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | (define hidden-right-recursive-grammar 120 | (grammar 121 | #:start A 122 | #:literals numeric-literals 123 | [A (∪ (∘ 'NUM B) 124 | (ε))] 125 | [B A])) 126 | 127 | ;(parse '(1 2 3) hidden-right-recursive-grammar) 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | (define more-hidden-left-recursive-grammar 137 | (grammar 138 | #:start A 139 | #:literals numeric-literals 140 | [A (∪ (∘ B 'NUM) 141 | (ε))] 142 | [B (∘ A 'NUM)])) 143 | 144 | ;(parse '(1 2 3 4) more-hidden-left-recursive-grammar) 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | (define infinite-recursive-grammar 153 | (grammar 154 | #:start A 155 | [A A])) 156 | 157 | ;(parse '(1 2 3) infinite-recursive-grammar) 158 | ;(parse '() infinite-recursive-grammar) 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | (define hidden-infinite-recursive-grammar 169 | (grammar 170 | #:start A 171 | [A B] 172 | [B A])) 173 | 174 | ;(parse '(1 2 3) hidden-infinite-recursive-grammar) 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | (define trick-middle-infinite-recursive-grammar 183 | (grammar 184 | #:start A 185 | [A (∘ 'x A 'x)])) 186 | 187 | ;(parse '(x x x) trick-middle-infinite-recursive-grammar) 188 | 189 | 190 | 191 | 192 | (define parseable-infinite-recursive-grammar 193 | (grammar 194 | #:start C 195 | [A B] 196 | [B A] 197 | [C (∪ A B (★ 'x))])) 198 | 199 | ;(parse '(x x x) parseable-infinite-recursive-grammar) 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | (define amb-exp-grammar 208 | (grammar 209 | #:start E 210 | #:literals numeric-literals 211 | [E (∪ 'NUM 212 | (@--> (list 'L E 'R) (λ (_1 e _2) e)) 213 | (@--> (list E '+ E) (λ (e1 _ e2) `(+ ,e1 ,e2))) 214 | (@--> (list E '* E) (λ (e1 _ e2) `(* ,e1 ,e2))))])) 215 | 216 | ;(parse '(1 + 2) amb-exp-grammar) 217 | ;(parse '(1 + 2 + 3) amb-exp-grammar) 218 | ;(parse '(1 + 2 + 3 + 4) amb-exp-grammar) 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | (define amb-exp-grammar-with-infinite-recursion 228 | (grammar 229 | #:start E 230 | #:literals numeric-literals 231 | [E (∪ 'NUM 232 | E 233 | (@--> (list 'L E 'R) (λ (_1 e _2) e)) 234 | (@--> (list E '+ E) (λ (e1 _ e2) `(+ ,e1 ,e2))) 235 | (@--> (list E '* E) (λ (e1 _ e2) `(* ,e1 ,e2))))])) 236 | 237 | 238 | ;(parse '(1 + 2 + 3 + 4) amb-exp-grammar-with-infinite-recursion) 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /v1/derp-sugar.rkt: -------------------------------------------------------------------------------- 1 | (module derp-sugar 2 | racket 3 | 4 | (provide (all-defined-out)) 5 | 6 | (require "derp-core.rkt") 7 | 8 | (define-syntax (lang stx) 9 | (syntax-case stx (∅ ε ε* quote token? 10 | empty eps eps* 11 | ∪ ★ + ? ∘ 12 | or rep rep+ opt seq 13 | list list! unquote 14 | quasiquote 15 | → --> $--> @--> >--> car) 16 | [(f L) (with-syntax ([literal->language 17 | (datum->syntax #'L 'literal->language)]) 18 | #'(lang literal->language L))] 19 | [(_ ll (∅)) #'(∅)] 20 | [(_ ll (ε)) #'(ε (set '()))] 21 | [(_ ll (ε v)) #'(ε (set v))] 22 | [(_ ll (ε* S)) #'(ε S)] 23 | [(_ ll (token? pred)) #'(token? pred)] 24 | [(f ll (quote lit)) #'(ll 'lit)] 25 | 26 | [(f ll (empty)) #'(f ll (∅))] 27 | [(f ll (eps v)) #'(f ll (ε v))] 28 | [(f ll (eps* S)) #'(f ll (ε* S))] 29 | 30 | [(_ ll (∪)) #'(∅)] 31 | [(f ll (∪ l1)) #'(f ll l1)] 32 | [(f ll (∪ l1 l2 ...)) #'(∪ (f ll l1) (f ll (∪ l2 ...)))] 33 | [(f ll (or l1 ...)) #'(f ll (∪ l1 ...))] 34 | 35 | [(_ ll (∘)) #'(ε (set #f))] 36 | [(f ll (∘ l1)) #'(f ll l1)] 37 | [(f ll (∘ l1 l2 ...)) #'(∘ (f ll l1) (f ll (∘ l2 ...)))] 38 | [(f ll (seq l1 ...)) #'(f ll (∘ l1 ...))] 39 | 40 | 41 | [(_ ll (list)) #'(ε (set '()))] 42 | [(f ll (list l1)) #'(→ (f ll l1) (λ (w1) (list w1)))] 43 | [(f ll (list l1 l2 ...)) #'(∘ (f ll l1) (f ll (list l2 ...)))] 44 | 45 | [(_ ll (list!)) #'(ε (set '()))] 46 | [(f ll (list! ,l1 l2 ...)) #'(f ll (∘ (f ll l1) (list! l2 ...)))] 47 | [(f ll (list! l1 l2 ...)) #'(f ll (--> (∘ (f ll l1) (list! l2 ...)) cdr))] 48 | 49 | [(f ll `()) #'(ε (set '()))] 50 | [(f ll `(,hd tl ...)) #'(∘ (f ll hd) (f ll `(tl ...)))] 51 | [(f ll `(hd tl ...)) #'(∘ (f ll 'hd) (f ll `(tl ...)))] 52 | 53 | [(f ll (★ l)) #'(★ (f ll l))] 54 | [(f ll (rep l)) #'(f ll (★ l))] 55 | 56 | [(f ll (+ l)) #'(∘ (f ll l) (★ (f ll l)))] 57 | [(f ll (rep+ l)) #'(f ll (+ l))] 58 | 59 | [(f ll (? l)) #'(∪ (f ll l) (ε* (set #f)))] 60 | [(f ll (? l v)) #'(∪ (f ll l) (ε* (set v)))] 61 | [(f ll (opt x ...)) #'(f ll (? x ...))] 62 | 63 | [(f ll (car l)) #'(→ (f ll l) car)] 64 | 65 | 66 | [(f ll (→ l g)) #'(→ (f ll l) g)] 67 | [(f ll (--> l g)) #'(→ (f ll l) g)] 68 | [(f ll (@--> l g)) #'(→ (f ll l) (λ (w) (apply g w)))] 69 | [(f ll (>--> l c ...)) #'(→ (f ll l) (λ (w) (match w c ...)))] 70 | [(f ll ($--> l e ...)) (with-syntax ([$ (datum->syntax #'l '$)] 71 | [$$ (datum->syntax #'l '$$)]) 72 | #'(→ (f ll l) 73 | (λ ($$) 74 | (let (($ (λ (n) (list-ref $$ n)))) 75 | e ...))))] 76 | 77 | [(f ll atom) (let ((d (syntax->datum #'atom))) 78 | (cond 79 | [(string? d) #'(ll atom)] 80 | [(number? d) #'(ll atom)] 81 | [(boolean? d) #'(ll atom)] 82 | [else #'atom]))] 83 | 84 | [else (error "syntax error in lang")])) 85 | 86 | ; Specifies the default behavior for literals in the grammar: 87 | (define (default-literal->language lit) 88 | (token (λ (t) (equal? t lit)))) 89 | 90 | (define literal->language default-literal->language) 91 | 92 | ; Set the behavior for literals in the grammar: 93 | (define (set-literal->language! f) 94 | (set! literal->language f)) 95 | 96 | ; Tools for defining grammars: 97 | (define-syntax grammar-rule 98 | (syntax-rules () 99 | [(_ #:literals ll (lhs (rhs ...))) 100 | (define lhs (lang ll (rhs ...)))] 101 | 102 | [(_ #:literals ll (lhs rhs)) 103 | (define lhs (lang ll (--> rhs (λ (x) x))))] 104 | 105 | [(_ (lhs (rhs ...))) 106 | (define lhs (lang (rhs ...)))] 107 | 108 | [(_ (lhs rhs)) 109 | (define lhs (lang (--> rhs (λ (x) x))))])) 110 | 111 | 112 | (define-syntax grammar 113 | (syntax-rules () 114 | [(_) (∅)] 115 | 116 | [(_ #:start body rules ...) 117 | ; => 118 | (grammar rules ... body)] 119 | 120 | [(_ #:literals ll (lhs rhs) ... body) 121 | ; => 122 | (let () 123 | (grammar-rule #:literals ll (lhs rhs)) ... 124 | body)] 125 | 126 | [(_ (lhs rhs) ... body) 127 | ; => 128 | (let () 129 | (grammar-rule (lhs rhs)) ... 130 | body)]))) 131 | -------------------------------------------------------------------------------- /docs/Operations concept map.svg: -------------------------------------------------------------------------------- 1 | Bool~MOGPEGREEREConjConjContCFGREwLA· *'&!/ --------------------------------------------------------------------------------