├── .gitignore ├── AGENTS.md ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── ai_optimizer.py ├── apps ├── keyvalue.scm └── minigame.scm ├── assets ├── gauge-needle.png ├── gauge.png ├── htop.png ├── info.html ├── memcp-logo.svg ├── ports.svg ├── shot1.png ├── shot2.png └── webapps.svg ├── docs ├── arithmetic--logic.md ├── associative-lists--dictionaries.md ├── date.md ├── docu.zip ├── index.md ├── io.md ├── lists.md ├── parsers.md ├── scm-builtins.md ├── storage.md ├── streams.md ├── strings.md ├── sync.md └── vectors.md ├── git-pre-commit ├── go.mod ├── lib ├── main.scm ├── psql-parser.scm ├── queryplan.scm ├── rdf-parser.scm ├── rdf.scm ├── sql-builtins.scm ├── sql-metadata.scm ├── sql-parser.scm ├── sql-test.scm ├── sql.scm └── test.scm ├── main.go ├── memcp.singularity.recipe ├── run_sql_tests.py ├── scm ├── alu.go ├── assoc_fast.go ├── compare.go ├── date.go ├── declare.go ├── jit.go ├── jit_amd64.go ├── jit_arm64.go ├── list.go ├── match.go ├── mysql.go ├── network.go ├── optimizer.go ├── packrat.go ├── parser.go ├── printer.go ├── prompt.go ├── scheduler.go ├── scm.go ├── streams.go ├── strings.go ├── sync.go ├── trace.go └── vector.go ├── storage ├── analyzer.go ├── cache.go ├── compute.go ├── csv.go ├── database.go ├── index.go ├── json.go ├── limits.go ├── overlay-blob.go ├── partition.go ├── persistence-files.go ├── persistence.go ├── scan.go ├── scan_helper.go ├── scan_order.go ├── settings.go ├── shard.go ├── shared_resource.go ├── storage-float.go ├── storage-int.go ├── storage-prefix.go ├── storage-scmer.go ├── storage-seq.go ├── storage-sparse.go ├── storage-string.go ├── storage.go └── table.go ├── test_memcp_api.py ├── tests ├── 01_basic_sql.yaml ├── 02_functions.yaml ├── 03_ddl_operations.yaml ├── 04_table_operations.yaml ├── 05_advanced_queries.yaml ├── 06_edge_cases.yaml ├── 07_error_cases.yaml ├── 08_rdf_sparql.yaml ├── 09_joins.yaml ├── 10_nulls.yaml ├── 11_group_having.yaml ├── 12_joins_outer.yaml ├── 13_subselects.yaml ├── 14_order_limit.yaml ├── 15_dml.yaml ├── 16_group_by_sum.yaml ├── 17_strings_like.yaml ├── 18_unnesting.yaml ├── 19_subselect_order.yaml ├── 20_default_values.yaml ├── 21_grant_revoke.yaml ├── 22_delete_qualified.yaml ├── 23_policy_enforcement.yaml ├── 24_mysql_basic_compat.yaml ├── 25_schema_qualified_insert.yaml ├── 26_mysql_datetime_defaults.yaml ├── 27_mysql_keys_indexes.yaml ├── 28_mysql_fk_acceptance.yaml ├── 29_mysql_upsert.yaml ├── 30_trailing_semicolon.yaml ├── 31_length_function.yaml ├── 32_expr_subselects.yaml ├── 33_collations_order.yaml ├── 34_collation_columns.yaml └── 35_memory_engine.yaml └── tools ├── lint_scm.py └── mysqldump-to-json.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw* 2 | *.o 3 | tools/Makefile 4 | memcp-logo.png 5 | test.jsonl 6 | memcp 7 | .memcp-history.tmp 8 | data 9 | memcp.sif 10 | trace*.json 11 | *.diff 12 | go.sum 13 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 0.1.4 2 | ===== 3 | 4 | - PASSWORD(str) for password hashes 5 | 6 | 0.1.3 7 | ===== 8 | 9 | - Parsec parsers 10 | - implement SELECT, UPDATE, DELETE with WHERE 11 | 12 | 0.1.2 13 | ===== 14 | 15 | - added Dockerfile 16 | - added function help 17 | - storage function: scan_order 18 | 19 | 0.1.1 20 | ===== 21 | 22 | - IO functions: password 23 | - user table for mysql auth 24 | - mysql and REST API check for username/password 25 | 26 | 0.1.0 27 | ===== 28 | 29 | - basic scheme functions: quote, eval, if, and, or, match, define/set, lambda, begin, error, symbol, list 30 | - arithmetic scheme functions: +, -, *, /, <=, <, >=, >, equal?, !/not 31 | - scheme string functions: simplify, strlen, concat, toLower, toUpper, split 32 | - scheme list functions: append, cons, car, cdr, merge, has?, filter, map, reduce 33 | - scheme dictionary functions: filter_assoc, map_assoc, reduce_assoc, set_assoc, has_assoc?, merge_assoc 34 | - IO functions: print, import, load, serve, mysql 35 | - storage functions: scan, createdatabase, dropdatabase, createtable, droptable, insert, stat, rebuild, loadCSV, loadJSON 36 | - storage types: SCMER, int, sequence, string, dictionary, float 37 | - SQL: support for SELECT * FROM, CREATE DATABASE, CREATE TABLE, SHOW DATABASES, SHOW TABLES, INSERT INTO 38 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | # Build stage 4 | FROM golang:1.22-alpine AS builder 5 | 6 | WORKDIR /build 7 | 8 | # Install git for go modules that might need it 9 | RUN apk add --no-cache git 10 | 11 | # Copy go mod files first for better caching 12 | COPY go.mod go.sum ./ 13 | RUN go mod download 14 | 15 | # Copy source code 16 | COPY . . 17 | 18 | # Build the application 19 | RUN CGO_ENABLED=0 GOOS=linux go get 20 | RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o memcp . 21 | 22 | # Runtime stage 23 | FROM alpine:latest 24 | 25 | # Install ca-certificates for HTTPS requests 26 | RUN apk --no-cache add ca-certificates 27 | 28 | WORKDIR /app 29 | 30 | # Copy the binary from builder stage 31 | COPY --from=builder /build/memcp . 32 | # Copy Scheme library (runtime scripts) 33 | COPY --from=builder /build/lib ./lib 34 | 35 | # Create data directory 36 | RUN mkdir -p /data 37 | 38 | # Set up volumes and expose ports 39 | VOLUME /data 40 | EXPOSE 4332 41 | EXPOSE 3307 42 | 43 | # Set environment variables (overridable via docker-compose) 44 | # ROOT_PASSWORD is only considered in the first run 45 | ENV PARAMS= 46 | ENV ROOT_PASSWORD=admin 47 | ENV APP=lib/main.scm 48 | 49 | # Run the application (load default Scheme entrypoint) 50 | # If ROOT_PASSWORD is set, pass it as --root-password; otherwise rely on default in lib/sql.scm 51 | CMD ./memcp -data /data --root-password="$ROOT_PASSWORD" $PARAMS $APP 52 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | go build 3 | 4 | run: 5 | ./memcp 6 | 7 | perf: 8 | perf record --call-graph fp -- ./memcp 9 | 10 | test: 11 | # run `cp git-pre-commit .git/hooks/pre-commit` to activate the trigger 12 | ./git-pre-commit 13 | 14 | memcp.sif: 15 | sudo singularity build memcp.sif memcp.singularity.recipe 16 | 17 | docs: 18 | ./memcp -write-docu docs 19 | 20 | docker-release: 21 | sudo docker build -t carli2/memcp:latest . 22 | sudo docker push carli2/memcp:latest 23 | 24 | .PHONY: memcp.sif docs 25 | -------------------------------------------------------------------------------- /apps/keyvalue.scm: -------------------------------------------------------------------------------- 1 | /* microservice democase: a simple key value store with prepared statements */ 2 | (import "../lib/sql-parser.scm") 3 | (import "../lib/queryplan.scm") 4 | 5 | /* usage: 6 | 7 | set a key-value pair: 8 | curl -d "my_value" http://localhost:1266/my_key 9 | 10 | retrieve a key-value pair: 11 | http://localhost:1266/my_key 12 | 13 | */ 14 | 15 | /* initialize database and prepare sql statements */ 16 | (createdatabase "keyvalue" true) 17 | (eval (parse_sql "keyvalue" "CREATE TABLE IF NOT EXISTS kv(key TEXT, value TEXT, UNIQUE KEY PRIMARY(key))")) 18 | 19 | (set item_get (parse_sql "keyvalue" "SELECT value FROM kv WHERE key = @key")) 20 | (set item_set (parse_sql "keyvalue" "INSERT INTO kv(key, value) VALUES (@key, @value) ON DUPLICATE KEY UPDATE value = @value")) 21 | /*(set item_list (parse_sql "keyvalue" "SELECT key, value FROM kv"))*/ 22 | 23 | 24 | (define http_handler (begin 25 | (lambda (req res) (begin 26 | (set session (newsession)) 27 | (session "key" (req "path")) 28 | (if (equal? (req "method") "GET") (begin 29 | /* GET = load */ 30 | (set resultrow (lambda (resultset) ((res "print") (resultset "value")))) 31 | (eval item_get) 32 | ) (begin 33 | /* PUT / POST: store */ 34 | (session "value" ((req "body"))) 35 | (eval item_set) 36 | ((res "print") "ok") 37 | )) 38 | )) 39 | )) 40 | 41 | (set port 1266) 42 | (serve port (lambda (req res) (http_handler req res))) 43 | -------------------------------------------------------------------------------- /apps/minigame.scm: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | (define minigame_static '( 19 | "" '(200 "text/html" "Have fun, play with it") 20 | "game.js" '(200 "text/javascript" "window.onload = function () { 21 | conn = new WebSocket('ws://' + document.location.host + '/minigame/ws'); 22 | conn.onopen = function () { 23 | conn.send('hi from client'); 24 | } 25 | conn.onmessage = function (msg) { 26 | console.log(msg); 27 | alert(msg.data); 28 | } 29 | }") 30 | '(404 "text/plain" "404 not found") 31 | )) 32 | 33 | (define http_handler (begin 34 | (set old_handler http_handler) 35 | (lambda (req res) (begin 36 | /* hooked our additional paths to it */ 37 | (match (req "path") 38 | (regex "^/minigame/(.*)$" url rest) (begin 39 | (if (equal? rest "ws") (begin 40 | (set msg ((res "websocket") (lambda (msg) (print "message: " msg)))) 41 | (msg 1 "Hello World from server") 42 | ) (match (minigame_static rest) '(status type content) (begin 43 | ((res "header") "Content-Type" type) 44 | ((res "status") status) 45 | ((res "print") content) 46 | ))) 47 | ) 48 | /* default */ 49 | (old_handler req res)) 50 | )) 51 | )) 52 | -------------------------------------------------------------------------------- /assets/gauge-needle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/gauge-needle.png -------------------------------------------------------------------------------- /assets/gauge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/gauge.png -------------------------------------------------------------------------------- /assets/htop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/htop.png -------------------------------------------------------------------------------- /assets/info.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | MemCP performance 7 | 44 | 45 | 46 | 47 |

Traditional Database

48 |
49 |
50 |
51 |

RAM

52 |
53 |
54 |
55 |

HDD

56 |
57 |
58 |
59 |

Performance

60 |
61 |
62 |
    63 |
  • Storage on Hard Disk
  • 64 |
  • Single-Core Performance
  • 65 |
  • No Compression
  • 66 |
  • High Demand on RAM and HDD bandwidth
  • 67 |
  • IO Bottleneck on TCP Connections
  • 68 |
69 |
70 |
71 |

In-Memory Database

72 |
73 |
74 |
75 |

RAM

76 |
77 |
78 |
79 |

HDD

80 |
81 |
82 |
83 |

Performance

84 |
85 |
86 |
    87 |
  • Storage in RAM, Backup on HDD
  • 88 |
  • Multi-Core Performance x12
  • 89 |
  • In-Memory Compression Factor 5:1
  • 90 |
  • Optimal Usage of HDD und RAM bandwidth
  • 91 |
  • REST APIs, AI etc. directly in the address space of the database
  • 92 |
  • Ideal for Statistics
  • 93 |
94 |
95 |
96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /assets/shot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/shot1.png -------------------------------------------------------------------------------- /assets/shot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/shot2.png -------------------------------------------------------------------------------- /docs/arithmetic--logic.md: -------------------------------------------------------------------------------- 1 | # Arithmetic / Logic 2 | 3 | ## int? 4 | 5 | tells if the value is a integer 6 | 7 | **Allowed number of parameters:** 1–1 8 | 9 | ### Parameters 10 | 11 | - **value** (`any`): value 12 | 13 | ### Returns 14 | 15 | `bool` 16 | 17 | ## number? 18 | 19 | tells if the value is a number 20 | 21 | **Allowed number of parameters:** 1–1 22 | 23 | ### Parameters 24 | 25 | - **value** (`any`): value 26 | 27 | ### Returns 28 | 29 | `bool` 30 | 31 | ## + 32 | 33 | adds two or more numbers 34 | 35 | **Allowed number of parameters:** 2–1000 36 | 37 | ### Parameters 38 | 39 | - **value...** (`number`): values to add 40 | 41 | ### Returns 42 | 43 | `number` 44 | 45 | ## - 46 | 47 | subtracts two or more numbers from the first one 48 | 49 | **Allowed number of parameters:** 2–1000 50 | 51 | ### Parameters 52 | 53 | - **value...** (`number`): values 54 | 55 | ### Returns 56 | 57 | `number` 58 | 59 | ## * 60 | 61 | multiplies two or more numbers 62 | 63 | **Allowed number of parameters:** 2–1000 64 | 65 | ### Parameters 66 | 67 | - **value...** (`number`): values 68 | 69 | ### Returns 70 | 71 | `number` 72 | 73 | ## / 74 | 75 | divides two or more numbers from the first one 76 | 77 | **Allowed number of parameters:** 2–1000 78 | 79 | ### Parameters 80 | 81 | - **value...** (`number`): values 82 | 83 | ### Returns 84 | 85 | `number` 86 | 87 | ## <= 88 | 89 | compares two numbers or strings 90 | 91 | **Allowed number of parameters:** 2–2 92 | 93 | ### Parameters 94 | 95 | - **value...** (`any`): values 96 | 97 | ### Returns 98 | 99 | `bool` 100 | 101 | ## < 102 | 103 | compares two numbers or strings 104 | 105 | **Allowed number of parameters:** 2–2 106 | 107 | ### Parameters 108 | 109 | - **value...** (`any`): values 110 | 111 | ### Returns 112 | 113 | `bool` 114 | 115 | ## > 116 | 117 | compares two numbers or strings 118 | 119 | **Allowed number of parameters:** 2–2 120 | 121 | ### Parameters 122 | 123 | - **value...** (`any`): values 124 | 125 | ### Returns 126 | 127 | `bool` 128 | 129 | ## >= 130 | 131 | compares two numbers or strings 132 | 133 | **Allowed number of parameters:** 2–2 134 | 135 | ### Parameters 136 | 137 | - **value...** (`any`): values 138 | 139 | ### Returns 140 | 141 | `bool` 142 | 143 | ## equal? 144 | 145 | compares two values of the same type, (equal? nil nil) is true 146 | 147 | **Allowed number of parameters:** 2–2 148 | 149 | ### Parameters 150 | 151 | - **value...** (`any`): values 152 | 153 | ### Returns 154 | 155 | `bool` 156 | 157 | ## equal?? 158 | 159 | performs a SQL compliant sloppy equality check on primitive values (number, int, string, bool. nil), strings are compared case insensitive, (equal? nil nil) is nil 160 | 161 | **Allowed number of parameters:** 2–2 162 | 163 | ### Parameters 164 | 165 | - **value...** (`any`): values 166 | 167 | ### Returns 168 | 169 | `bool` 170 | 171 | ## ! 172 | 173 | negates the boolean value 174 | 175 | **Allowed number of parameters:** 1–1 176 | 177 | ### Parameters 178 | 179 | - **value** (`bool`): value 180 | 181 | ### Returns 182 | 183 | `bool` 184 | 185 | ## not 186 | 187 | negates the boolean value 188 | 189 | **Allowed number of parameters:** 1–1 190 | 191 | ### Parameters 192 | 193 | - **value** (`bool`): value 194 | 195 | ### Returns 196 | 197 | `bool` 198 | 199 | ## nil? 200 | 201 | returns true if value is nil 202 | 203 | **Allowed number of parameters:** 1–1 204 | 205 | ### Parameters 206 | 207 | - **value** (`any`): value 208 | 209 | ### Returns 210 | 211 | `bool` 212 | 213 | ## min 214 | 215 | returns the smallest value 216 | 217 | **Allowed number of parameters:** 1–1000 218 | 219 | ### Parameters 220 | 221 | - **value...** (`number|string`): value 222 | 223 | ### Returns 224 | 225 | `number|string` 226 | 227 | ## max 228 | 229 | returns the highest value 230 | 231 | **Allowed number of parameters:** 1–1000 232 | 233 | ### Parameters 234 | 235 | - **value...** (`number|string`): value 236 | 237 | ### Returns 238 | 239 | `number|string` 240 | 241 | ## floor 242 | 243 | rounds the number down 244 | 245 | **Allowed number of parameters:** 1–1 246 | 247 | ### Parameters 248 | 249 | - **value** (`number`): value 250 | 251 | ### Returns 252 | 253 | `number` 254 | 255 | ## ceil 256 | 257 | rounds the number up 258 | 259 | **Allowed number of parameters:** 1–1 260 | 261 | ### Parameters 262 | 263 | - **value** (`number`): value 264 | 265 | ### Returns 266 | 267 | `number` 268 | 269 | ## round 270 | 271 | rounds the number 272 | 273 | **Allowed number of parameters:** 1–1 274 | 275 | ### Parameters 276 | 277 | - **value** (`number`): value 278 | 279 | ### Returns 280 | 281 | `number` 282 | 283 | -------------------------------------------------------------------------------- /docs/associative-lists--dictionaries.md: -------------------------------------------------------------------------------- 1 | # Associative Lists / Dictionaries 2 | 3 | ## filter_assoc 4 | 5 | returns a filtered dictionary according to a filter function 6 | 7 | **Allowed number of parameters:** 2–2 8 | 9 | ### Parameters 10 | 11 | - **dict** (`list`): dictionary that has to be filtered 12 | - **condition** (`func`): filter function func(string any)->bool where the first parameter is the key, the second is the value 13 | 14 | ### Returns 15 | 16 | `list` 17 | 18 | ## map_assoc 19 | 20 | returns a mapped dictionary according to a map function 21 | Keys will stay the same but values are mapped. 22 | 23 | **Allowed number of parameters:** 2–2 24 | 25 | ### Parameters 26 | 27 | - **dict** (`list`): dictionary that has to be mapped 28 | - **map** (`func`): map function func(string any)->any where the first parameter is the key, the second is the value. It must return the new value. 29 | 30 | ### Returns 31 | 32 | `list` 33 | 34 | ## reduce_assoc 35 | 36 | reduces a dictionary according to a reduce function 37 | 38 | **Allowed number of parameters:** 3–3 39 | 40 | ### Parameters 41 | 42 | - **dict** (`list`): dictionary that has to be reduced 43 | - **reduce** (`func`): reduce function func(any string any)->any where the first parameter is the accumulator, second is key, third is value. It must return the new accumulator. 44 | - **neutral** (`any`): initial value for the accumulator 45 | 46 | ### Returns 47 | 48 | `any` 49 | 50 | ## has_assoc? 51 | 52 | checks if a dictionary has a key present 53 | 54 | **Allowed number of parameters:** 2–2 55 | 56 | ### Parameters 57 | 58 | - **dict** (`list`): dictionary that has to be checked 59 | - **key** (`string`): key to test 60 | 61 | ### Returns 62 | 63 | `bool` 64 | 65 | ## extract_assoc 66 | 67 | applies a function (key value) on the dictionary and returns the results as a flat list 68 | 69 | **Allowed number of parameters:** 2–2 70 | 71 | ### Parameters 72 | 73 | - **dict** (`list`): dictionary that has to be checked 74 | - **map** (`func`): func(string any)->any that flattens down each element 75 | 76 | ### Returns 77 | 78 | `list` 79 | 80 | ## set_assoc 81 | 82 | returns a dictionary where a single value has been changed. 83 | This function may destroy the input value for the sake of performance. You must not use the input value again. 84 | 85 | **Allowed number of parameters:** 3–4 86 | 87 | ### Parameters 88 | 89 | - **dict** (`list`): input dictionary that has to be changed. You must not use this value again. 90 | - **key** (`string`): key that has to be set 91 | - **value** (`any`): new value to set 92 | - **merge** (`func`): (optional) func(any any)->any that is called when a value is overwritten. The first parameter is the old value, the second is the new value. It must return the merged value that shall be pysically stored in the new dictionary. 93 | 94 | ### Returns 95 | 96 | `list` 97 | 98 | ## merge_assoc 99 | 100 | returns a dictionary where all keys from dict1 and all keys from dict2 are present. 101 | If a key is present in both inputs, the second one will be dominant so the first value will be overwritten unless you provide a merge function 102 | 103 | **Allowed number of parameters:** 2–3 104 | 105 | ### Parameters 106 | 107 | - **dict1** (`list`): first input dictionary that has to be changed. You must not use this value again. 108 | - **dict2** (`list`): input dictionary that contains the new values that have to be added 109 | - **merge** (`func`): (optional) func(any any)->any that is called when a value is overwritten. The first parameter is the old value, the second is the new value from dict2. It must return the merged value that shall be pysically stored in the new dictionary. 110 | 111 | ### Returns 112 | 113 | `list` 114 | 115 | -------------------------------------------------------------------------------- /docs/date.md: -------------------------------------------------------------------------------- 1 | # Date 2 | 3 | ## now 4 | 5 | returns the unix timestamp 6 | 7 | **Allowed number of parameters:** 0–0 8 | 9 | ### Parameters 10 | 11 | _This function has no parameters._ 12 | 13 | ### Returns 14 | 15 | `int` 16 | 17 | ## parse_date 18 | 19 | parses unix date from a string 20 | 21 | **Allowed number of parameters:** 1–1 22 | 23 | ### Parameters 24 | 25 | - **value** (`string`): values to parse 26 | 27 | ### Returns 28 | 29 | `int` 30 | 31 | -------------------------------------------------------------------------------- /docs/docu.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/docs/docu.zip -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | - [SCM Builtins](scm-builtins.md) 4 | - [Arithmetic / Logic](arithmetic--logic.md) 5 | - [Strings](strings.md) 6 | - [Streams](streams.md) 7 | - [Lists](lists.md) 8 | - [Associative Lists / Dictionaries](associative-lists--dictionaries.md) 9 | - [Date](date.md) 10 | - [Vectors](vectors.md) 11 | - [Parsers](parsers.md) 12 | - [Sync](sync.md) 13 | - [IO](io.md) 14 | - [Storage](storage.md) 15 | -------------------------------------------------------------------------------- /docs/io.md: -------------------------------------------------------------------------------- 1 | # IO 2 | 3 | ## print 4 | 5 | Prints values to stdout (only in IO environment) 6 | 7 | **Allowed number of parameters:** 1–1000 8 | 9 | ### Parameters 10 | 11 | - **value...** (`any`): values to print 12 | 13 | ### Returns 14 | 15 | `bool` 16 | 17 | ## env 18 | 19 | returns the content of a environment variable 20 | 21 | **Allowed number of parameters:** 1–2 22 | 23 | ### Parameters 24 | 25 | - **var** (`string`): envvar 26 | - **default** (`string`): default if the env is not found 27 | 28 | ### Returns 29 | 30 | `string` 31 | 32 | ## help 33 | 34 | Lists all functions or print help for a specific function 35 | 36 | **Allowed number of parameters:** 0–1 37 | 38 | ### Parameters 39 | 40 | - **topic** (`string`): function to print help about 41 | 42 | ### Returns 43 | 44 | `nil` 45 | 46 | ## import 47 | 48 | Imports a file .scm file into current namespace 49 | 50 | **Allowed number of parameters:** 1–1 51 | 52 | ### Parameters 53 | 54 | - **filename** (`string`): filename relative to folder of source file 55 | 56 | ### Returns 57 | 58 | `any` 59 | 60 | ## load 61 | 62 | Loads a file or stream and returns the string or iterates line-wise 63 | 64 | **Allowed number of parameters:** 1–3 65 | 66 | ### Parameters 67 | 68 | - **filenameOrStream** (`string|stream`): filename relative to folder of source file or stream to read from 69 | - **linehandler** (`func`): handler that reads each line; each line may end with delimiter 70 | - **delimiter** (`string`): delimiter to extract; if no delimiter is given, the file is read as whole and returned or passed to linehandler 71 | 72 | ### Returns 73 | 74 | `string|bool` 75 | 76 | ## stream 77 | 78 | Opens a file readonly as stream 79 | 80 | **Allowed number of parameters:** 1–1 81 | 82 | ### Parameters 83 | 84 | - **filename** (`string`): filename relative to folder of source file 85 | 86 | ### Returns 87 | 88 | `stream` 89 | 90 | ## watch 91 | 92 | Loads a file and calls the callback. Whenever the file changes on disk, the file is load again. 93 | 94 | **Allowed number of parameters:** 2–2 95 | 96 | ### Parameters 97 | 98 | - **filename** (`string`): filename relative to folder of source file 99 | - **updatehandler** (`func`): handler that receives the file content func(content) 100 | 101 | ### Returns 102 | 103 | `bool` 104 | 105 | ## serve 106 | 107 | Opens a HTTP server at a given port 108 | 109 | **Allowed number of parameters:** 2–2 110 | 111 | ### Parameters 112 | 113 | - **port** (`number`): port number for HTTP server 114 | - **handler** (`func`): handler: lambda(req res) that handles the http request (TODO: detailed documentation) 115 | 116 | ### Returns 117 | 118 | `bool` 119 | 120 | ## serveStatic 121 | 122 | creates a static handler for use as a callback in (serve) - returns a handler lambda(req res) 123 | 124 | **Allowed number of parameters:** 1–1 125 | 126 | ### Parameters 127 | 128 | - **directory** (`string`): folder with the files to serve 129 | 130 | ### Returns 131 | 132 | `func` 133 | 134 | ## mysql 135 | 136 | Imports a file .scm file into current namespace 137 | 138 | **Allowed number of parameters:** 4–4 139 | 140 | ### Parameters 141 | 142 | - **port** (`number`): port number for MySQL server 143 | - **getPassword** (`func`): lambda(username string) string|nil has to return the password for a user or nil to deny login 144 | - **schemacallback** (`func`): lambda(username schema) bool handler check whether user is allowed to schem (string) - you should check access rights here 145 | - **handler** (`func`): lambda(schema sql resultrow session) handler to process sql query (string) in schema (string). resultrow is a lambda(list) 146 | 147 | ### Returns 148 | 149 | `bool` 150 | 151 | ## password 152 | 153 | Hashes a password with sha1 (for mysql user authentication) 154 | 155 | **Allowed number of parameters:** 1–1 156 | 157 | ### Parameters 158 | 159 | - **password** (`string`): plain text password to hash 160 | 161 | ### Returns 162 | 163 | `string` 164 | 165 | ## args 166 | 167 | Returns command line arguments 168 | 169 | **Allowed number of parameters:** 0–0 170 | 171 | ### Parameters 172 | 173 | _This function has no parameters._ 174 | 175 | ### Returns 176 | 177 | `list` 178 | 179 | ## arg 180 | 181 | Gets a command line argument value 182 | 183 | **Allowed number of parameters:** 2–3 184 | 185 | ### Parameters 186 | 187 | - **longname** (`string`): long argument name (without --) 188 | - **shortname** (`string`): short argument name (without -) or default value if only 2 args 189 | - **default** (`any`): default value if argument not found 190 | 191 | ### Returns 192 | 193 | `any` 194 | 195 | -------------------------------------------------------------------------------- /docs/lists.md: -------------------------------------------------------------------------------- 1 | # Lists 2 | 3 | ## count 4 | 5 | counts the number of elements in the list 6 | 7 | **Allowed number of parameters:** 1–1 8 | 9 | ### Parameters 10 | 11 | - **list** (`list`): base list 12 | 13 | ### Returns 14 | 15 | `int` 16 | 17 | ## nth 18 | 19 | get the nth item of a list 20 | 21 | **Allowed number of parameters:** 2–2 22 | 23 | ### Parameters 24 | 25 | - **list** (`list`): base list 26 | - **index** (`number`): index beginning from 0 27 | 28 | ### Returns 29 | 30 | `any` 31 | 32 | ## append 33 | 34 | appends items to a list and return the extended list. 35 | The original list stays unharmed. 36 | 37 | **Allowed number of parameters:** 2–1000 38 | 39 | ### Parameters 40 | 41 | - **list** (`list`): base list 42 | - **item...** (`any`): items to add 43 | 44 | ### Returns 45 | 46 | `list` 47 | 48 | ## append_unique 49 | 50 | appends items to a list but only if they are new. 51 | The original list stays unharmed. 52 | 53 | **Allowed number of parameters:** 2–1000 54 | 55 | ### Parameters 56 | 57 | - **list** (`list`): base list 58 | - **item...** (`any`): items to add 59 | 60 | ### Returns 61 | 62 | `list` 63 | 64 | ## cons 65 | 66 | constructs a list from a head and a tail list 67 | 68 | **Allowed number of parameters:** 2–2 69 | 70 | ### Parameters 71 | 72 | - **car** (`any`): new head element 73 | - **cdr** (`list`): tail that is appended after car 74 | 75 | ### Returns 76 | 77 | `list` 78 | 79 | ## car 80 | 81 | extracts the head of a list 82 | 83 | **Allowed number of parameters:** 1–1 84 | 85 | ### Parameters 86 | 87 | - **list** (`list`): list 88 | 89 | ### Returns 90 | 91 | `any` 92 | 93 | ## cdr 94 | 95 | extracts the tail of a list 96 | The tail of a list is a list with all items except the head. 97 | 98 | **Allowed number of parameters:** 1–1 99 | 100 | ### Parameters 101 | 102 | - **list** (`list`): list 103 | 104 | ### Returns 105 | 106 | `any` 107 | 108 | ## zip 109 | 110 | swaps the dimension of a list of lists. If one parameter is given, it is a list of lists that is flattened. If multiple parameters are given, they are treated as the components that will be zipped into the sub list 111 | 112 | **Allowed number of parameters:** 1–1000 113 | 114 | ### Parameters 115 | 116 | - **list** (`list`): list of lists of items 117 | 118 | ### Returns 119 | 120 | `list` 121 | 122 | ## merge 123 | 124 | flattens a list of lists into a list containing all the subitems. If one parameter is given, it is a list of lists that is flattened. If multiple parameters are given, they are treated as lists that will be merged into one 125 | 126 | **Allowed number of parameters:** 1–1000 127 | 128 | ### Parameters 129 | 130 | - **list** (`list`): list of lists of items 131 | 132 | ### Returns 133 | 134 | `list` 135 | 136 | ## merge_unique 137 | 138 | flattens a list of lists into a list containing all the subitems. Duplicates are filtered out. 139 | 140 | **Allowed number of parameters:** 1–1000 141 | 142 | ### Parameters 143 | 144 | - **list** (`list`): list of lists of items 145 | 146 | ### Returns 147 | 148 | `list` 149 | 150 | ## has? 151 | 152 | checks if a list has a certain item (equal?) 153 | 154 | **Allowed number of parameters:** 2–2 155 | 156 | ### Parameters 157 | 158 | - **haystack** (`list`): list to search in 159 | - **needle** (`any`): item to search for 160 | 161 | ### Returns 162 | 163 | `bool` 164 | 165 | ## filter 166 | 167 | returns a list that only contains elements that pass the filter function 168 | 169 | **Allowed number of parameters:** 2–2 170 | 171 | ### Parameters 172 | 173 | - **list** (`list`): list that has to be filtered 174 | - **condition** (`func`): filter condition func(any)->bool 175 | 176 | ### Returns 177 | 178 | `list` 179 | 180 | ## map 181 | 182 | returns a list that contains the results of a map function that is applied to the list 183 | 184 | **Allowed number of parameters:** 2–2 185 | 186 | ### Parameters 187 | 188 | - **list** (`list`): list that has to be mapped 189 | - **map** (`func`): map function func(any)->any that is applied to each item 190 | 191 | ### Returns 192 | 193 | `list` 194 | 195 | ## mapIndex 196 | 197 | returns a list that contains the results of a map function that is applied to the list 198 | 199 | **Allowed number of parameters:** 2–2 200 | 201 | ### Parameters 202 | 203 | - **list** (`list`): list that has to be mapped 204 | - **map** (`func`): map function func(i, any)->any that is applied to each item 205 | 206 | ### Returns 207 | 208 | `list` 209 | 210 | ## reduce 211 | 212 | returns a list that contains the result of a map function 213 | 214 | **Allowed number of parameters:** 2–3 215 | 216 | ### Parameters 217 | 218 | - **list** (`list`): list that has to be reduced 219 | - **reduce** (`func`): reduce function func(any any)->any where the first parameter is the accumulator, the second is a list item 220 | - **neutral** (`any`): (optional) initial value of the accumulator, defaults to nil 221 | 222 | ### Returns 223 | 224 | `any` 225 | 226 | ## produce 227 | 228 | returns a list that contains produced items - it works like for(state = startstate, condition(state), state = iterator(state)) {yield state} 229 | 230 | **Allowed number of parameters:** 3–3 231 | 232 | ### Parameters 233 | 234 | - **startstate** (`any`): start state to begin with 235 | - **condition** (`func`): func that returns true whether the state will be inserted into the result or the loop is stopped 236 | - **iterator** (`func`): func that produces the next state 237 | 238 | ### Returns 239 | 240 | `list` 241 | 242 | ## produceN 243 | 244 | returns a list with numbers from 0..n-1 245 | 246 | **Allowed number of parameters:** 1–1 247 | 248 | ### Parameters 249 | 250 | - **n** (`number`): number of elements to produce 251 | 252 | ### Returns 253 | 254 | `list` 255 | 256 | ## list? 257 | 258 | checks if a value is a list 259 | 260 | **Allowed number of parameters:** 1–1 261 | 262 | ### Parameters 263 | 264 | - **value** (`any`): value to check 265 | 266 | ### Returns 267 | 268 | `bool` 269 | 270 | ## contains? 271 | 272 | checks if a value is in a list; uses the equal?? operator 273 | 274 | **Allowed number of parameters:** 2–2 275 | 276 | ### Parameters 277 | 278 | - **list** (`list`): list to check 279 | - **value** (`any`): value to check 280 | 281 | ### Returns 282 | 283 | `bool` 284 | 285 | -------------------------------------------------------------------------------- /docs/parsers.md: -------------------------------------------------------------------------------- 1 | # Parsers 2 | 3 | ## parser 4 | 5 | creates a parser 6 | 7 | Scm parsers work this way: 8 | (parser syntax scmerresult) -> func 9 | 10 | syntax can be one of: 11 | (parser syntax scmerresult) will execute scmerresult after parsing syntax 12 | (parser syntax scmerresult "skipper") will add a different whitespace skipper regex to the root parser 13 | (define var syntax) valid inside (parser...), stores the result of syntax into var for use in scmerresult 14 | "str" AtomParser 15 | (atom "str" caseinsensitive skipws) AtomParser 16 | (regex "asdf" caseinsensitive skipws) RegexParser 17 | '(a b c) AndParser 18 | (or a b c) OrParser 19 | (* sub separator) KleeneParser 20 | (+ sub separator) ManyParser 21 | (? xyz) MaybeParser (if >1 AndParser) 22 | (not mainparser parser1 parser2 parser3 ...) a parser that matches mainparser but not parser1... 23 | $ EndParser 24 | empty EmptyParser 25 | symbol -> use other parser defined in env 26 | 27 | for further details on packrat parsers, take a look at https://github.com/launix-de/go-packrat 28 | 29 | 30 | **Allowed number of parameters:** 1–3 31 | 32 | ### Parameters 33 | 34 | - **syntax** (`any`): syntax of the grammar (see docs) 35 | - **generator** (`any`): (optional) expressions to evaluate. All captured variables are available in the scope. 36 | - **skipper** (`string`): (optional) string that defines the skip mechanism for whitespaces as regexp 37 | 38 | ### Returns 39 | 40 | `func` 41 | 42 | -------------------------------------------------------------------------------- /docs/streams.md: -------------------------------------------------------------------------------- 1 | # Streams 2 | 3 | ## streamString 4 | 5 | creates a stream that contains a string 6 | 7 | **Allowed number of parameters:** 1–1 8 | 9 | ### Parameters 10 | 11 | - **content** (`string`): content to put into the stream 12 | 13 | ### Returns 14 | 15 | `stream` 16 | 17 | ## gzip 18 | 19 | compresses a stream with gzip. Create streams with (stream filename) 20 | 21 | **Allowed number of parameters:** 1–1 22 | 23 | ### Parameters 24 | 25 | - **stream** (`stream`): input stream 26 | 27 | ### Returns 28 | 29 | `stream` 30 | 31 | ## xz 32 | 33 | compresses a stream with xz. Create streams with (stream filename) 34 | 35 | **Allowed number of parameters:** 1–1 36 | 37 | ### Parameters 38 | 39 | - **stream** (`stream`): input stream 40 | 41 | ### Returns 42 | 43 | `stream` 44 | 45 | ## zcat 46 | 47 | turns a compressed gzip stream into a stream of uncompressed data. Create streams with (stream filename) 48 | 49 | **Allowed number of parameters:** 1–1 50 | 51 | ### Parameters 52 | 53 | - **stream** (`stream`): input stream 54 | 55 | ### Returns 56 | 57 | `stream` 58 | 59 | ## xzcat 60 | 61 | turns a compressed xz stream into a stream of uncompressed data. Create streams with (stream filename) 62 | 63 | **Allowed number of parameters:** 1–1 64 | 65 | ### Parameters 66 | 67 | - **stream** (`stream`): input stream 68 | 69 | ### Returns 70 | 71 | `stream` 72 | 73 | -------------------------------------------------------------------------------- /docs/strings.md: -------------------------------------------------------------------------------- 1 | # Strings 2 | 3 | ## string? 4 | 5 | tells if the value is a string 6 | 7 | **Allowed number of parameters:** 1–1 8 | 9 | ### Parameters 10 | 11 | - **value** (`any`): value 12 | 13 | ### Returns 14 | 15 | `bool` 16 | 17 | ## concat 18 | 19 | concatenates stringable values and returns a string 20 | 21 | **Allowed number of parameters:** 1–1000 22 | 23 | ### Parameters 24 | 25 | - **value...** (`any`): values to concat 26 | 27 | ### Returns 28 | 29 | `string` 30 | 31 | ## substr 32 | 33 | returns a substring 34 | 35 | **Allowed number of parameters:** 2–3 36 | 37 | ### Parameters 38 | 39 | - **value** (`string`): string to cut 40 | - **start** (`number`): first character index 41 | - **len** (`number`): optional length 42 | 43 | ### Returns 44 | 45 | `string` 46 | 47 | ## simplify 48 | 49 | turns a stringable input value in the easiest-most value (e.g. turn strings into numbers if they are numeric 50 | 51 | **Allowed number of parameters:** 1–1 52 | 53 | ### Parameters 54 | 55 | - **value** (`any`): value to simplify 56 | 57 | ### Returns 58 | 59 | `any` 60 | 61 | ## strlen 62 | 63 | returns the length of a string 64 | 65 | **Allowed number of parameters:** 1–1 66 | 67 | ### Parameters 68 | 69 | - **value** (`string`): input string 70 | 71 | ### Returns 72 | 73 | `int` 74 | 75 | ## strlike 76 | 77 | matches the string against a wildcard pattern (SQL compliant) 78 | 79 | **Allowed number of parameters:** 2–3 80 | 81 | ### Parameters 82 | 83 | - **value** (`string`): input string 84 | - **pattern** (`string`): pattern with % and _ in them 85 | - **collation** (`string`): collation in which to compare them 86 | 87 | ### Returns 88 | 89 | `bool` 90 | 91 | ## toLower 92 | 93 | turns a string into lower case 94 | 95 | **Allowed number of parameters:** 1–1 96 | 97 | ### Parameters 98 | 99 | - **value** (`string`): input string 100 | 101 | ### Returns 102 | 103 | `string` 104 | 105 | ## toUpper 106 | 107 | turns a string into upper case 108 | 109 | **Allowed number of parameters:** 1–1 110 | 111 | ### Parameters 112 | 113 | - **value** (`string`): input string 114 | 115 | ### Returns 116 | 117 | `string` 118 | 119 | ## replace 120 | 121 | replaces all occurances in a string with another string 122 | 123 | **Allowed number of parameters:** 3–3 124 | 125 | ### Parameters 126 | 127 | - **s** (`string`): input string 128 | - **find** (`string`): search string 129 | - **replace** (`string`): replace string 130 | 131 | ### Returns 132 | 133 | `string` 134 | 135 | ## split 136 | 137 | splits a string using a separator or space 138 | 139 | **Allowed number of parameters:** 1–2 140 | 141 | ### Parameters 142 | 143 | - **value** (`string`): input string 144 | - **separator** (`string`): (optional) parameter, defaults to " " 145 | 146 | ### Returns 147 | 148 | `list` 149 | 150 | ## collate 151 | 152 | returns the `<` operator for a given collation. MemCP allows natural sorting of numeric literals. 153 | 154 | **Allowed number of parameters:** 1–1 155 | 156 | ### Parameters 157 | 158 | - **collation** (`string`): collation string of the form LANG or LANG_cs or LANG_ci where LANG is a BCP 47 code, for compatibility to MySQL, a CHARSET_ prefix is allowed and ignored as well as the aliases bin, danish, general, german1, german2, spanish and swedish are allowed for language codes 159 | - **reverse** (`bool`): whether to reverse the order like in ORDER BY DESC 160 | 161 | ### Returns 162 | 163 | `func` 164 | 165 | ## htmlentities 166 | 167 | escapes the string for use in HTML 168 | 169 | **Allowed number of parameters:** 1–1 170 | 171 | ### Parameters 172 | 173 | - **value** (`string`): input string 174 | 175 | ### Returns 176 | 177 | `string` 178 | 179 | ## urlencode 180 | 181 | encodes a string according to URI coding schema 182 | 183 | **Allowed number of parameters:** 1–1 184 | 185 | ### Parameters 186 | 187 | - **value** (`string`): string to encode 188 | 189 | ### Returns 190 | 191 | `string` 192 | 193 | ## urldecode 194 | 195 | decodes a string according to URI coding schema 196 | 197 | **Allowed number of parameters:** 1–1 198 | 199 | ### Parameters 200 | 201 | - **value** (`string`): string to decode 202 | 203 | ### Returns 204 | 205 | `string` 206 | 207 | ## json_encode 208 | 209 | encodes a value in JSON, treats lists as lists 210 | 211 | **Allowed number of parameters:** 1–1 212 | 213 | ### Parameters 214 | 215 | - **value** (`any`): value to encode 216 | 217 | ### Returns 218 | 219 | `string` 220 | 221 | ## json_encode_assoc 222 | 223 | encodes a value in JSON, treats lists as associative arrays 224 | 225 | **Allowed number of parameters:** 1–1 226 | 227 | ### Parameters 228 | 229 | - **value** (`any`): value to encode 230 | 231 | ### Returns 232 | 233 | `string` 234 | 235 | ## json_decode 236 | 237 | parses JSON into a map 238 | 239 | **Allowed number of parameters:** 1–1 240 | 241 | ### Parameters 242 | 243 | - **value** (`string`): string to decode 244 | 245 | ### Returns 246 | 247 | `any` 248 | 249 | ## base64_encode 250 | 251 | encodes a string as Base64 (standard encoding) 252 | 253 | **Allowed number of parameters:** 1–1 254 | 255 | ### Parameters 256 | 257 | - **value** (`string`): binary string to encode 258 | 259 | ### Returns 260 | 261 | `string` 262 | 263 | ## base64_decode 264 | 265 | decodes a Base64 string (standard encoding) 266 | 267 | **Allowed number of parameters:** 1–1 268 | 269 | ### Parameters 270 | 271 | - **value** (`string`): base64-encoded string 272 | 273 | ### Returns 274 | 275 | `string` 276 | 277 | ## sql_unescape 278 | 279 | unescapes the inner part of a sql string 280 | 281 | **Allowed number of parameters:** 1–1 282 | 283 | ### Parameters 284 | 285 | - **value** (`string`): string to decode 286 | 287 | ### Returns 288 | 289 | `string` 290 | 291 | ## bin2hex 292 | 293 | turns binary data into hex with lowercase letters 294 | 295 | **Allowed number of parameters:** 1–1 296 | 297 | ### Parameters 298 | 299 | - **value** (`string`): string to decode 300 | 301 | ### Returns 302 | 303 | `string` 304 | 305 | ## hex2bin 306 | 307 | decodes a hex string into binary data 308 | 309 | **Allowed number of parameters:** 1–1 310 | 311 | ### Parameters 312 | 313 | - **value** (`string`): hex string (even length) 314 | 315 | ### Returns 316 | 317 | `string` 318 | 319 | ## randomBytes 320 | 321 | returns a string with numBytes cryptographically secure random bytes 322 | 323 | **Allowed number of parameters:** 1–1 324 | 325 | ### Parameters 326 | 327 | - **numBytes** (`number`): number of random bytes 328 | 329 | ### Returns 330 | 331 | `string` 332 | 333 | -------------------------------------------------------------------------------- /docs/sync.md: -------------------------------------------------------------------------------- 1 | # Sync 2 | 3 | ## newsession 4 | 5 | Creates a new session which is a threadsafe key-value store represented as a function that can be either called as a getter (session key) or setter (session key value) or list all keys with (session) 6 | 7 | **Allowed number of parameters:** 0–0 8 | 9 | ### Parameters 10 | 11 | _This function has no parameters._ 12 | 13 | ### Returns 14 | 15 | `func` 16 | 17 | ## context 18 | 19 | Context helper function. Each context also contains a session. (context func args) creates a new context and runs func in that context, (context "session") reads the session variable, (context "check") will check the liveliness of the context and otherwise throw an error 20 | 21 | **Allowed number of parameters:** 1–1000 22 | 23 | ### Parameters 24 | 25 | - **args...** (`any`): depends on the usage 26 | 27 | ### Returns 28 | 29 | `any` 30 | 31 | ## sleep 32 | 33 | sleeps the amount of seconds 34 | 35 | **Allowed number of parameters:** 1–1 36 | 37 | ### Parameters 38 | 39 | - **duration** (`number`): number of seconds to sleep 40 | 41 | ### Returns 42 | 43 | `bool` 44 | 45 | ## once 46 | 47 | Creates a function wrapper that you can call multiple times but only gets executed once. The result value is cached and returned on a second call. You can add parameters to that resulting function that will be passed to the first run of the wrapped function. 48 | 49 | **Allowed number of parameters:** 1–1 50 | 51 | ### Parameters 52 | 53 | - **f** (`func`): function that produces the result value 54 | 55 | ### Returns 56 | 57 | `func` 58 | 59 | ## mutex 60 | 61 | Creates a mutex. The return value is a function that takes one parameter which is a parameterless function. The mutex is guaranteed that all calls to that mutex get serialized. 62 | 63 | **Allowed number of parameters:** 1–1 64 | 65 | ### Parameters 66 | 67 | _This function has no parameters._ 68 | 69 | ### Returns 70 | 71 | `func` 72 | 73 | -------------------------------------------------------------------------------- /docs/vectors.md: -------------------------------------------------------------------------------- 1 | # Vectors 2 | 3 | ## dot 4 | 5 | produced the dot product 6 | 7 | **Allowed number of parameters:** 2–3 8 | 9 | ### Parameters 10 | 11 | - **v1** (`list`): vector1 12 | - **v2** (`list`): vector2 13 | - **mode** (`string`): DOT, COSINE, EUCLIDEAN, default is DOT 14 | 15 | ### Returns 16 | 17 | `number` 18 | 19 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/launix-de/memcp 2 | 3 | go 1.22.0 4 | 5 | require ( 6 | github.com/chzyer/readline v1.5.1 7 | github.com/dc0d/onexit v1.1.0 8 | github.com/docker/go-units v0.5.0 9 | github.com/fsnotify/fsnotify v1.8.0 10 | github.com/google/btree v1.1.3 11 | github.com/google/uuid v1.6.0 12 | github.com/gorilla/websocket v1.5.3 13 | github.com/jtolds/gls v4.20.0+incompatible 14 | github.com/launix-de/NonLockingReadMap v1.0.8 15 | github.com/launix-de/go-mysqlstack v0.0.0-20241101205441-bc39b4e0fb04 16 | github.com/launix-de/go-packrat/v2 v2.1.11 17 | github.com/ulikunitz/xz v0.5.15 18 | golang.org/x/text v0.21.0 19 | ) 20 | 21 | require ( 22 | github.com/gopherjs/gopherjs v1.17.2 // indirect 23 | github.com/shopspring/decimal v1.4.0 // indirect 24 | golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect 25 | golang.org/x/sys v0.26.0 // indirect 26 | ) 27 | -------------------------------------------------------------------------------- /lib/main.scm: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | (print "") 19 | (print "Welcome to memcp") 20 | (print "") 21 | (import "test.scm") 22 | 23 | (set static_files (serveStatic "../assets")) 24 | 25 | /* this can be overhooked */ 26 | (define http_handler (lambda (req res) (begin 27 | (print "request " req) 28 | (if (equal? (req "path") "/") (begin 29 | ((res "header") "Location" "/info.html") 30 | ((res "status") 301) 31 | ) (static_files req res)) 32 | /* 33 | ((res "header") "Content-Type" "text/plain") 34 | ((res "status") 404) 35 | ((res "println") "404 not found") 36 | */ 37 | ))) 38 | 39 | (import "sql.scm") 40 | (import "rdf.scm") 41 | 42 | /* read ports from command line arguments or environment */ 43 | (if (not (arg "disable-api" false)) (begin 44 | (set port (arg "api-port" (env "PORT" "4321"))) 45 | (serve port (lambda (req res) (http_handler req res))) 46 | (print "listening on http://localhost:" port) 47 | )) 48 | -------------------------------------------------------------------------------- /lib/rdf.scm: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | (import "rdf-parser.scm") 19 | 20 | /* 21 | this is how rdf works: 22 | - every database may have a table rdf(s text, p text, o text) 23 | - import formats are: xml, ttl 24 | */ 25 | 26 | (define handler_404 (lambda (req res) (begin 27 | /*(print "request " req)*/ 28 | ((res "header") "Content-Type" "text/plain") 29 | ((res "status") 404) 30 | ((res "println") "404 not found") 31 | ))) 32 | 33 | /* http hook for handling SparQL */ 34 | (define http_handler (begin 35 | (set old_handler (coalesce http_handler handler_404)) 36 | (define handle_query (lambda (req res schema query) (begin 37 | /* check for password */ 38 | (set pw (scan "system" "user" '("username") (lambda (username) (equal? username (req "username"))) '("password") (lambda (password) password) (lambda (a b) b) nil)) 39 | (if (and pw (equal? pw (password (req "password")))) (time (begin 40 | ((res "header") "Content-Type" "text/plain") 41 | ((res "status") 200) 42 | /*(print "RDF query: " query)*/ 43 | (define formula (parse_sparql schema query)) 44 | (define resultrow (res "jsonl")) 45 | 46 | (eval formula) 47 | ) query) (begin 48 | ((res "header") "Content-Type" "text/plain") 49 | ((res "header") "WWW-Authenticate" "Basic realm=\"authorization required\"") 50 | ((res "status") 401) 51 | ((res "print") "Unauthorized") 52 | )) 53 | ))) 54 | (define handle_ttl_load (lambda (req res schema ttl_data) (begin 55 | /* check for password */ 56 | (set pw (scan "system" "user" '("username") (lambda (username) (equal? username (req "username"))) '("password") (lambda (password) password) (lambda (a b) b) nil)) 57 | (if (and pw (equal? pw (password (req "password")))) (begin 58 | ((res "header") "Content-Type" "text/plain") 59 | ((res "status") 200) 60 | /*(print "Loading TTL data into: " schema)*/ 61 | /* ensure rdf table exists */ 62 | (eval (parse_sql schema "CREATE TABLE IF NOT EXISTS rdf (s TEXT, p TEXT, o TEXT)" (lambda (schema table write) true))) 63 | /* load the TTL data */ 64 | (load_ttl schema ttl_data) 65 | ((res "println") "TTL data loaded successfully") 66 | ) (begin 67 | ((res "header") "Content-Type" "text/plain") 68 | ((res "header") "WWW-Authenticate" "Basic realm=\"authorization required\"") 69 | ((res "status") 401) 70 | ((res "print") "Unauthorized") 71 | )) 72 | ))) 73 | old_handler old_handler /* workaround for optimizer bug */ 74 | (lambda (req res) (begin 75 | /* hooked our additional paths to it */ 76 | (match (req "path") 77 | (regex "^/rdf/([^/]+)$" url schema) (begin 78 | (set query ((req "body"))) 79 | (handle_query req res schema query) 80 | ) 81 | (regex "^/rdf/([^/]+)/(.*)$" url schema query_un) (begin 82 | (set query (urldecode query_un)) 83 | (handle_query req res schema query) 84 | ) 85 | (regex "^/rdf/([^/]+)/load_ttl$" url schema) (begin 86 | (set ttl_data ((req "body"))) 87 | (handle_ttl_load req res schema ttl_data) 88 | ) 89 | /* default */ 90 | (!begin 91 | ((outer old_handler) req res)) 92 | ) 93 | )) 94 | )) 95 | -------------------------------------------------------------------------------- /lib/sql-builtins.scm: -------------------------------------------------------------------------------- 1 | (define sql_builtins (coalesce sql_builtins (newsession))) 2 | 3 | /* all upper case */ 4 | /*(sql_builtins "HELLO" (lambda () "Hello world"))*/ 5 | 6 | /* time */ 7 | (sql_builtins "UNIX_TIMESTAMP" now) 8 | (sql_builtins "UNIX_TIMESTAMP" parse_date) 9 | (sql_builtins "CURRENT_TIMESTAMP" now) 10 | (sql_builtins "NOW" now) 11 | 12 | /* math */ 13 | (sql_builtins "FLOOR" floor) 14 | (sql_builtins "CEIL" ceil) 15 | (sql_builtins "CEILING" ceil) 16 | (sql_builtins "ROUND" round) 17 | 18 | /* strings */ 19 | (sql_builtins "UPPER" toUpper) 20 | (sql_builtins "LOWER" toLower) 21 | (sql_builtins "PASSWORD" password) 22 | /* Base64 helpers */ 23 | (sql_builtins "TO_BASE64" base64_encode) 24 | (sql_builtins "FROM_BASE64" base64_decode) 25 | /* SQL LENGTH(str): NULL-safe wrapper around strlen */ 26 | (sql_builtins "LENGTH" (lambda (x) (if (nil? x) nil (strlen x)))) 27 | 28 | /* vectors */ 29 | (sql_builtins "VECTOR_DISTANCE" dot) 30 | (sql_builtins "STRING_TO_VECTOR" json_decode) 31 | (sql_builtins "VECTOR_TO_STRING" json_encode) 32 | (sql_builtins "VECTOR_DIM" json_encode) 33 | 34 | /* management: use SQL statements instead (REBUILD, SHOW SHARDS, etc.) */ 35 | -------------------------------------------------------------------------------- /lib/sql-metadata.scm: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023, 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | /* emulate metadata tables */ 19 | (define get_schema (lambda (schema tbl) (match '(schema tbl) 20 | /* special tables */ 21 | '((ignorecase "information_schema") (ignorecase "schemata")) '( 22 | '("Field" "catalog_name") 23 | '("Field" "schema_name") 24 | '("Field" "default_character_set_name") 25 | '("Field" "default_collation_name") 26 | '("Field" "sql_path") 27 | '("Field" "schema_comment") 28 | ) 29 | 30 | '((ignorecase "information_schema") (ignorecase "tables")) '( 31 | '("Field" "table_catalog") 32 | '("Field" "table_schema") 33 | '("Field" "table_name") 34 | '("Field" "table_type") 35 | ) 36 | '((ignorecase "information_schema") (ignorecase "columns")) '( 37 | '("Field" "table_catalog") 38 | '("Field" "table_schema") 39 | '("Field" "table_name") 40 | '("Field" "column_name") 41 | '("Field" "ordinal_position") 42 | '("Field" "column_default") 43 | '("Field" "is_nullable") 44 | '("Field" "data_type") 45 | /* TODO: CHARACTER_MAXIMUM_LENGTH CHARACTER_OCTET_LENGTH NUMERIC_PRECISION NUMERIC_SCALE DATETIME_PRECISION CHARACTER_SET_NAME COLLATION_NAME */ 46 | '("Field" "column_type") 47 | '("Field" "column_key") 48 | '("Field" "extra") 49 | '("Field" "privileges") 50 | '("Field" "column_comment") 51 | '("Field" "is_generated") 52 | '("Field" "generation_expression") 53 | ) 54 | '((ignorecase "information_schema") (ignorecase "key_column_usage")) '( 55 | '("Field" "constraint_catalog") 56 | '("Field" "constraint_schema") 57 | '("Field" "constraint_name") 58 | '("Field" "table_catalog") 59 | '("Field" "table_schema") 60 | '("Field" "table_name") 61 | '("Field" "column_name") 62 | '("Field" "ordinal_position") 63 | '("Field" "position_in_unique_constraint") 64 | '("Field" "referenced_table_schema") 65 | '("Field" "referenced_table_name") 66 | '("Field" "referenced_column_name") 67 | ) 68 | '((ignorecase "information_schema") (ignorecase "referential_constraints")) '( 69 | '("Field" "constraint_catalog") 70 | '("Field" "constraint_schema") 71 | '("Field" "constraint_name") 72 | '("Field" "unique_constraint_catalog") 73 | '("Field" "unique_constraint_schema") 74 | '("Field" "unique_constraint_name") 75 | '("Field" "match_option") 76 | '("Field" "update_rule") 77 | '("Field" "delete_rule") 78 | '("Field" "table_name") 79 | '("Field" "referenced_table_name") 80 | ) 81 | 82 | /* Minimal compatibility for mysqldump probes */ 83 | '((ignorecase "information_schema") (ignorecase "files")) '( 84 | '("Field" "file_name") 85 | '("Field" "file_type") 86 | '("Field" "tablespace_name") 87 | '("Field" "logfile_group_name") 88 | '("Field" "total_extents") 89 | '("Field" "initial_size") 90 | '("Field" "engine") 91 | '("Field" "extra") 92 | ) 93 | '((ignorecase "information_schema") (ignorecase "partitions")) '( 94 | '("Field" "table_schema") 95 | '("Field" "table_name") 96 | '("Field" "partition_name") 97 | '("Field" "tablespace_name") 98 | ) 99 | 100 | /* Unknown INFORMATION_SCHEMA table → clear SCM-side error */ 101 | '((ignorecase "information_schema") _) 102 | (error (concat "INFORMATION_SCHEMA." tbl " is not supported yet")) 103 | (show schema tbl) /* otherwise: fetch from metadata */ 104 | ))) 105 | (define scan_wrapper (lambda args (match args (merge '(scanfn schema tbl) rest) (match '(schema tbl) 106 | '((ignorecase "information_schema") (ignorecase "schemata")) 107 | (merge '(scanfn schema 108 | '('map '('show) '('lambda '('schema) '('list "catalog_name" "def" "schema_name" 'schema "default_character_set_name" "utf8mb4" "default_collation_name" "utf8mb3_general_ci" "sql_path" NULL "schema_comment" ""))) 109 | ) rest) 110 | '((ignorecase "information_schema") (ignorecase "tables")) 111 | (merge '(scanfn schema 112 | '('merge '('map '('show) '('lambda '('schema) '('map '('show 'schema) '('lambda '('tbl) '('list "table_catalog" "def" "table_schema" 'schema "table_name" 'tbl "table_type" "BASE TABLE")))))) 113 | ) rest) 114 | '((ignorecase "information_schema") (ignorecase "columns")) 115 | (merge '(scanfn schema 116 | '((quote merge) '((quote map) '((quote show)) '((quote lambda) '((quote schema)) '((quote merge) '((quote map) '((quote show) (quote schema)) '((quote lambda) '((quote tbl)) '((quote map) '((quote show) (quote schema) (quote tbl)) '((quote lambda) '((quote col)) '((quote list) "table_catalog" "def" "table_schema" (quote schema) "table_name" (quote tbl) "column_name" '((quote col) "Field") "data_type" '((quote col) "RawType") "column_type" '((quote concat) '((quote col) "Type") '((quote col) "Dimensions"))))))))))) 117 | ) rest) 118 | '((ignorecase "information_schema") (ignorecase "key_column_usage")) 119 | (merge '(scanfn schema '(list)) rest) /* TODO: list constraints */ 120 | '((ignorecase "information_schema") (ignorecase "referential_constraints")) 121 | (merge '(scanfn schema '(list)) rest) /* TODO: list constraints */ 122 | '((ignorecase "information_schema") (ignorecase "files")) 123 | (merge '(scanfn schema '(list)) rest) /* empty: MemCP has no tablespaces/undo logs */ 124 | '((ignorecase "information_schema") (ignorecase "partitions")) 125 | (merge '(scanfn schema '(list)) rest) /* empty: no MySQL partitions */ 126 | '(schema tbl) /* normal case */ 127 | (merge '(scanfn schema tbl) rest) 128 | )))) 129 | -------------------------------------------------------------------------------- /lib/sql-test.scm: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2025 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | 18 | /* SQL Engine Test Suite - contained in its own environment */ 19 | ((lambda () (begin 20 | (print "performing SQL engine tests ...") 21 | 22 | (set teststat (newsession)) 23 | (teststat "count" 0) 24 | (teststat "success" 0) 25 | (define assert (lambda (val1 val2 errormsg) (begin 26 | (teststat "count" (+ (teststat "count") 1)) 27 | (if (equal? val1 val2) (teststat "success" (+ (teststat "success") 1)) (print "failed test "(teststat "count")": " errormsg)) 28 | ))) 29 | 30 | /* Clean up any existing test database and create fresh one */ 31 | (try (lambda () (dropdatabase "memcp-tests")) (lambda (e) nil)) 32 | (createdatabase "memcp-tests" true) 33 | 34 | /* Helper function to execute SQL and return result rows */ 35 | (define sql-test-exec (lambda (query) (begin 36 | (set query-results (newsession)) 37 | (query-results "rows" '()) 38 | (define resultrow (lambda (row) (begin 39 | (query-results "rows" (append (query-results "rows") (list row))) 40 | ))) 41 | (eval (parse_sql "memcp-tests" query)) 42 | (query-results "rows") 43 | ))) 44 | 45 | /* Create test tables and run simple tests */ 46 | (sql-test-exec "CREATE TABLE test_users (id INT PRIMARY KEY, name VARCHAR(50))") 47 | (sql-test-exec "INSERT INTO test_users (id, name) VALUES (1, 'Alice')") 48 | (sql-test-exec "INSERT INTO test_users (id, name) VALUES (2, 'Bob')") 49 | 50 | (define result1 (sql-test-exec "SELECT * FROM test_users")) 51 | (assert (equal? (count result1) 2) true "SELECT should return 2 rows") 52 | 53 | (define result2 (sql-test-exec "SELECT COUNT(*) FROM test_users")) 54 | (assert (equal? (count result2) 1) true "SELECT COUNT(*) should return 1 row") 55 | 56 | /* Basic parsing tests - just verify the SQL parses correctly without executing */ 57 | (define allow (lambda (schema table write) true)) 58 | (assert (list? (parse_sql "system" "SELECT 1" allow)) true "Simple SELECT should parse") 59 | (assert (list? (parse_sql "system" "SELECT * FROM user" allow)) true "SELECT * should parse") 60 | (assert (list? (parse_sql "system" "INSERT INTO user VALUES (1, 'test', 'pass')" allow)) true "INSERT should parse") 61 | (assert (list? (parse_sql "system" "UPDATE user SET username = 'newname' WHERE id = 1" allow)) true "UPDATE should parse") 62 | (assert (list? (parse_sql "system" "DELETE FROM user WHERE id = 1" allow)) true "DELETE should parse") 63 | 64 | (print "SQL parsing and execution tests completed successfully") 65 | 66 | /* Clean up test database */ 67 | (dropdatabase "memcp-tests") 68 | 69 | (print "finished SQL engine tests") 70 | (print "test result: " (teststat "success") "/" (teststat "count")) 71 | (if (< (teststat "success") (teststat "count")) (begin 72 | (print "") 73 | (print "---- !!! some SQL test cases have failed !!! ----") 74 | (print "") 75 | (print " SQL engine may have issues") 76 | (error "SQL tests failed") 77 | ) (print "all SQL tests succeeded.")) 78 | (print "") 79 | ))) 80 | -------------------------------------------------------------------------------- /memcp.singularity.recipe: -------------------------------------------------------------------------------- 1 | Bootstrap: docker 2 | From: ubuntu:22.04 3 | 4 | # build with: sudo singularity build memcp.sif memcp.singularity.recipe 5 | # run with: singularity run --bind `pwd`:/data memcp.sif 6 | 7 | %post 8 | apt-get -y update 9 | apt-get -y install git software-properties-common 10 | add-apt-repository -y ppa:longsleep/golang-backports 11 | apt-get -y install golang 12 | 13 | git clone https://github.com/launix-de/memcp /memcp 14 | cd /memcp 15 | go get 16 | go build 17 | 18 | %environment 19 | export PATH="/memcp:$PATH" 20 | 21 | %runscript 22 | cd /memcp && ./memcp -data /data 23 | -------------------------------------------------------------------------------- /scm/assoc_fast.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2025 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package scm 18 | 19 | import ( 20 | "encoding/binary" 21 | "hash/maphash" 22 | "math" 23 | "reflect" 24 | ) 25 | 26 | // Stable seed for hashing to ensure consistent indices across Set/Get calls. 27 | var fastDictSeed maphash.Seed 28 | 29 | func init() { 30 | fastDictSeed = maphash.MakeSeed() 31 | } 32 | 33 | // FastDict: shard-local assoc optimized for frequent set/merge operations. 34 | // Implementation uses a flat pairs array plus a lightweight hash index 35 | // to avoid O(N^2) behavior as it grows. 36 | type FastDict struct { 37 | Pairs []Scmer // [k0, v0, k1, v1, ...] 38 | index map[uint64][]int // hash -> positions (indices into Pairs, even only) 39 | } 40 | 41 | func NewFastDict(capacityPairs int) *FastDict { 42 | if capacityPairs < 0 { 43 | capacityPairs = 0 44 | } 45 | return &FastDict{Pairs: make([]Scmer, 0, capacityPairs*2), index: make(map[uint64][]int)} 46 | } 47 | 48 | func (d *FastDict) Iterate(fn func(k, v Scmer) bool) { 49 | for i := 0; i < len(d.Pairs); i += 2 { 50 | if !fn(d.Pairs[i], d.Pairs[i+1]) { 51 | return 52 | } 53 | } 54 | } 55 | 56 | // HashKey computes a stable hash for a Scheme value. 57 | // It avoids allocating intermediate strings by inspecting types and 58 | // feeding bytes directly to a streaming hasher. Lists are hashed by 59 | // recursively hashing their elements with structural markers. 60 | func HashKey(k Scmer) uint64 { 61 | var h maphash.Hash 62 | h.SetSeed(fastDictSeed) 63 | var writeScmer func(v Scmer) 64 | writeScmer = func(v Scmer) { 65 | switch x := v.(type) { 66 | case nil: 67 | h.WriteByte(0) 68 | case bool: 69 | h.WriteByte(1) 70 | if x { 71 | h.WriteByte(1) 72 | } else { 73 | h.WriteByte(0) 74 | } 75 | case int64: 76 | h.WriteByte(2) 77 | var b [8]byte 78 | binary.LittleEndian.PutUint64(b[:], uint64(x)) 79 | h.Write(b[:]) 80 | case float64: 81 | h.WriteByte(3) 82 | var b [8]byte 83 | binary.LittleEndian.PutUint64(b[:], math.Float64bits(x)) 84 | h.Write(b[:]) 85 | case string: 86 | h.WriteByte(4) 87 | h.WriteString(x) 88 | case Symbol: 89 | h.WriteByte(5) 90 | h.WriteString(string(x)) 91 | case []Scmer: 92 | h.WriteByte(6) 93 | // write length to reduce collisions for different list sizes 94 | var b [8]byte 95 | binary.LittleEndian.PutUint64(b[:], uint64(len(x))) 96 | h.Write(b[:]) 97 | for _, el := range x { 98 | writeScmer(el) 99 | } 100 | case *FastDict: 101 | // Hash as list of pairs to match []Scmer assoc representation 102 | h.WriteByte(6) 103 | var b [8]byte 104 | binary.LittleEndian.PutUint64(b[:], uint64(len(x.Pairs))) 105 | h.Write(b[:]) 106 | for i := 0; i < len(x.Pairs); i += 2 { 107 | writeScmer(x.Pairs[i]) 108 | writeScmer(x.Pairs[i+1]) 109 | } 110 | default: 111 | // Fallback on type name to avoid heavy allocations 112 | h.WriteByte(255) 113 | h.WriteString(reflect.TypeOf(v).String()) 114 | } 115 | } 116 | writeScmer(k) 117 | return h.Sum64() 118 | } 119 | 120 | func (d *FastDict) findPos(key Scmer, h uint64) (int, bool) { 121 | if d.index == nil { 122 | return -1, false 123 | } 124 | if bucket, ok := d.index[h]; ok { 125 | for _, pos := range bucket { 126 | if Equal(d.Pairs[pos], key) { 127 | return pos, true 128 | } 129 | } 130 | } 131 | return -1, false 132 | } 133 | 134 | func (d *FastDict) Get(key Scmer) (Scmer, bool) { 135 | h := HashKey(key) 136 | if pos, ok := d.findPos(key, h); ok { 137 | return d.Pairs[pos+1], true 138 | } 139 | return nil, false 140 | } 141 | 142 | // Set sets or merges a value for key. If merge is nil, it overwrites. 143 | func (d *FastDict) Set(key, value Scmer, merge func(oldV, newV Scmer) Scmer) { 144 | if d.index == nil { 145 | d.index = make(map[uint64][]int) 146 | } 147 | h := HashKey(key) 148 | if pos, ok := d.findPos(key, h); ok { 149 | if merge != nil { 150 | d.Pairs[pos+1] = merge(d.Pairs[pos+1], value) 151 | } else { 152 | d.Pairs[pos+1] = value 153 | } 154 | return 155 | } 156 | // append new 157 | pos := len(d.Pairs) 158 | d.Pairs = append(d.Pairs, key, value) 159 | d.index[h] = append(d.index[h], pos) 160 | } 161 | 162 | func (d *FastDict) ToList() []Scmer { return d.Pairs } 163 | -------------------------------------------------------------------------------- /scm/date.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package scm 18 | 19 | import "time" 20 | 21 | func init_date() { 22 | // string functions 23 | DeclareTitle("Date") 24 | allowed_formats := []string{ 25 | "2006-01-02 15:04:05.000000", 26 | "2006-01-02 15:04:05", 27 | "2006-01-02 15:04", 28 | "2006-01-02", 29 | "06-01-02 15:04:05.000000", 30 | "06-01-02 15:04:05", 31 | "06-01-02 15:04", 32 | "06-01-02", 33 | } 34 | 35 | Declare(&Globalenv, &Declaration{ 36 | "now", "returns the unix timestamp", 37 | 0, 0, 38 | []DeclarationParameter{}, "int", 39 | func(a ...Scmer) (result Scmer) { 40 | return int64(time.Now().Unix()) 41 | }, 42 | false, 43 | }) 44 | Declare(&Globalenv, &Declaration{ 45 | "parse_date", "parses unix date from a string", 46 | 1, 1, 47 | []DeclarationParameter{ 48 | DeclarationParameter{"value", "string", "values to parse"}, 49 | }, "int", 50 | func(a ...Scmer) Scmer { 51 | for _, format := range allowed_formats { // try through all formats 52 | if t, err := time.Parse(format, String(a[0])); err != nil { 53 | return int64(t.Unix()) 54 | } 55 | } 56 | return nil 57 | }, 58 | true, 59 | }) 60 | } 61 | -------------------------------------------------------------------------------- /scm/jit_arm64.go: -------------------------------------------------------------------------------- 1 | //go:build arm64 2 | 3 | /* 4 | Copyright (C) 2024 Carl-Philip Hänsch 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program. If not, see . 18 | */ 19 | package scm 20 | 21 | import "unsafe" 22 | 23 | // TODO: create this file for other architectures, too 24 | 25 | // all code snippets fill rax+rbx with the return value 26 | func jitReturnLiteral(value Scmer) []byte { 27 | code := []byte{ 28 | // TODO 29 | } 30 | // insert the literal into the immediate values 31 | *(*unsafe.Pointer)(unsafe.Pointer(&code[2])) = *(*unsafe.Pointer)(unsafe.Pointer(&value)) 32 | *(*unsafe.Pointer)(unsafe.Pointer(&code[12])) = *((*unsafe.Pointer)(unsafe.Add(unsafe.Pointer(&value), 8))) 33 | return code 34 | } 35 | 36 | func jitNthArgument(idx int) []byte { // up to 16 params 37 | // TODO: corner case 0, corner case >=16 38 | code := []byte{ 39 | // TODO 40 | } 41 | return code 42 | } 43 | 44 | func jitStackFrame(size uint8) []byte { 45 | return []byte{ 46 | // TODO 47 | } 48 | } 49 | 50 | /* TODO: peephole optimizer: 51 | - remove argument checks (test rbx,rbx 48 85 db 76 xx) 52 | - shorten immediate values 53 | - constant-fold operations 54 | - inline functions 55 | - jump to other functions 56 | */ 57 | -------------------------------------------------------------------------------- /scm/prompt.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | Copyright (C) 2013 Pieter Kelchtermans (originally licensed unter WTFPL 2.0) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | */ 18 | 19 | package scm 20 | 21 | import ( 22 | "bytes" 23 | "fmt" 24 | "github.com/chzyer/readline" 25 | "io" 26 | "log" 27 | "os" 28 | "regexp" 29 | "runtime/debug" 30 | "strings" 31 | ) 32 | 33 | const newprompt = "\033[32m>\033[0m " 34 | const contprompt = "\033[32m.\033[0m " 35 | const resultprompt = "\033[31m=\033[0m " 36 | 37 | var lambdaExpr *regexp.Regexp = regexp.MustCompile("\\(lambda\\s*\\(([^)]+)\\)") 38 | 39 | /* implements interface readline.AutoCompleter */ 40 | func (en *Env) Do(line []rune, pos int) (newLine [][]rune, offset int) { 41 | start := pos 42 | for start >= 1 && line[start-1] != '(' && line[start-1] != ')' && line[start-1] != ' ' { 43 | start-- 44 | } 45 | pfx := string(line[start:pos]) 46 | offset = len(pfx) 47 | // iterate documentation 48 | for _, d := range declarations { 49 | if strings.HasPrefix(d.Name, pfx) && en.FindRead(Symbol(d.Name)) != nil { 50 | if d.Name == "lambda" { 51 | newLine = append(newLine, []rune("lambda ("[offset:])) 52 | } else { 53 | newLine = append(newLine, []rune(d.Name[offset:])) 54 | } 55 | } 56 | } 57 | // iterate variables 58 | for en != nil { 59 | for s, _ := range en.Vars { 60 | if strings.HasPrefix(string(s), pfx) { 61 | newLine = append(newLine, []rune(s[offset:])) 62 | } 63 | } 64 | en = en.Outer // iterate over parent scope 65 | } 66 | // find lambda variables in the line 67 | for _, m := range lambdaExpr.FindAllStringSubmatch(string(line), -1) { 68 | // each declared parameter of the lambda is also completed 69 | for _, s := range strings.Split(m[1], " ") { 70 | if strings.HasPrefix(s, pfx) { 71 | newLine = append(newLine, []rune(s[offset:])) 72 | } 73 | } 74 | } 75 | return 76 | } 77 | 78 | var ReplInstance *readline.Instance 79 | 80 | func Repl(en *Env) { 81 | l, err := readline.NewEx(&readline.Config{ 82 | Prompt: newprompt, 83 | HistoryFile: ".memcp-history.tmp", 84 | AutoComplete: en, 85 | InterruptPrompt: "^C", 86 | EOFPrompt: "exit", 87 | HistorySearchFold: true, 88 | }) 89 | if err != nil { 90 | panic(err) 91 | } 92 | ReplInstance = l 93 | defer l.Close() 94 | l.CaptureExitSignal() 95 | 96 | oldline := "" 97 | for { 98 | line, err := l.Readline() 99 | line = oldline + line 100 | if err == readline.ErrInterrupt { 101 | if len(line) == 0 { 102 | break 103 | } else { 104 | continue 105 | } 106 | } else if err == io.EOF { 107 | break 108 | } else if err != nil { 109 | panic(err) 110 | } 111 | if line == "" { 112 | continue 113 | } 114 | 115 | // anti-panic func 116 | func() { 117 | defer func() { 118 | if r := recover(); r != nil { 119 | rs := fmt.Sprint(r) 120 | if strings.Contains(rs, "expecting matching )") { 121 | // keep oldline 122 | oldline = line + "\n" 123 | l.SetPrompt(contprompt) 124 | return 125 | } 126 | PrintError(r) 127 | oldline = "" 128 | l.SetPrompt(newprompt) 129 | } 130 | }() 131 | var b bytes.Buffer 132 | code := Read("user prompt", line) 133 | Validate(code, "any") 134 | code = Optimize(code, en) 135 | result := Eval(code, en) 136 | Serialize(&b, result, en) 137 | fmt.Print(resultprompt) 138 | fmt.Println(b.String()) 139 | oldline = "" 140 | l.SetPrompt(newprompt) 141 | }() 142 | } 143 | ReplInstance = nil 144 | } 145 | 146 | var errorlog *log.Logger 147 | 148 | func init() { 149 | errorlog = log.New(os.Stderr, "", 0) 150 | } 151 | func PrintError(r any) { 152 | s := fmt.Sprint(r) 153 | numlines := strings.Count(s, "\nin ")*4 + 9 // skip those stack trace lines that peel out of the error message 154 | trace := string(debug.Stack()) 155 | for numlines > 0 { 156 | if trace == "" { 157 | break 158 | } 159 | if trace[0] == '\n' { 160 | numlines-- 161 | } 162 | trace = trace[1:] 163 | } 164 | errorlog.Println(r, ": \n", trace) 165 | } 166 | -------------------------------------------------------------------------------- /scm/scheduler.go: -------------------------------------------------------------------------------- 1 | package scm 2 | 3 | import ( 4 | "container/heap" 5 | "context" 6 | "fmt" 7 | "runtime/debug" 8 | "sync" 9 | "time" 10 | ) 11 | 12 | type Task func() 13 | 14 | type task struct { 15 | runAt time.Time 16 | fn Task 17 | id uint64 18 | } 19 | 20 | type taskHeap []task 21 | 22 | func (h taskHeap) Len() int { return len(h) } 23 | 24 | func (h taskHeap) Less(i, j int) bool { 25 | if h[i].runAt.Equal(h[j].runAt) { 26 | return h[i].id < h[j].id 27 | } 28 | return h[i].runAt.Before(h[j].runAt) 29 | } 30 | 31 | func (h taskHeap) Swap(i, j int) { 32 | h[i], h[j] = h[j], h[i] 33 | } 34 | 35 | func (h *taskHeap) Push(x any) { 36 | *h = append(*h, x.(task)) 37 | } 38 | 39 | func (h *taskHeap) Pop() any { 40 | old := *h 41 | n := len(old) 42 | item := old[n-1] 43 | *h = old[:n-1] 44 | return item 45 | } 46 | 47 | type Scheduler struct { 48 | mu sync.Mutex 49 | tasks taskHeap 50 | wakeCh chan struct{} 51 | stopCh chan struct{} 52 | cancel map[uint64]struct{} 53 | active map[uint64]struct{} 54 | stopped bool 55 | nextID uint64 56 | initOnce sync.Once 57 | wg sync.WaitGroup 58 | } 59 | 60 | var DefaultScheduler Scheduler 61 | 62 | func init() { 63 | DefaultScheduler.init() 64 | } 65 | 66 | func (s *Scheduler) init() { 67 | s.initOnce.Do(func() { 68 | s.wakeCh = make(chan struct{}, 1) 69 | s.stopCh = make(chan struct{}) 70 | s.cancel = make(map[uint64]struct{}) 71 | s.active = make(map[uint64]struct{}) 72 | heap.Init(&s.tasks) 73 | s.wg.Add(1) 74 | go s.run() 75 | }) 76 | } 77 | 78 | func (s *Scheduler) ScheduleAt(t time.Time, fn Task) (uint64, bool) { 79 | if fn == nil { 80 | return 0, false 81 | } 82 | s.init() 83 | s.mu.Lock() 84 | defer s.mu.Unlock() 85 | if s.stopped { 86 | return 0, false 87 | } 88 | s.nextID++ 89 | id := s.nextID 90 | newTask := task{runAt: t, fn: fn, id: id} 91 | heap.Push(&s.tasks, newTask) 92 | s.active[id] = struct{}{} 93 | delete(s.cancel, id) 94 | shouldWake := len(s.tasks) > 0 && s.tasks[0].id == id 95 | if shouldWake { 96 | s.signalLocked() 97 | } 98 | return id, true 99 | } 100 | 101 | func (s *Scheduler) ScheduleAfter(delay time.Duration, fn Task) (uint64, bool) { 102 | if delay < 0 { 103 | delay = 0 104 | } 105 | return s.ScheduleAt(time.Now().Add(delay), fn) 106 | } 107 | 108 | func (s *Scheduler) Clear(id uint64) bool { 109 | s.init() 110 | s.mu.Lock() 111 | defer s.mu.Unlock() 112 | if s.stopped { 113 | return false 114 | } 115 | if _, ok := s.active[id]; !ok { 116 | return false 117 | } 118 | s.cancel[id] = struct{}{} 119 | delete(s.active, id) 120 | s.signalLocked() 121 | return true 122 | } 123 | 124 | func (s *Scheduler) Stop() { 125 | s.init() 126 | s.mu.Lock() 127 | if s.stopped { 128 | s.mu.Unlock() 129 | s.wg.Wait() 130 | return 131 | } 132 | s.stopped = true 133 | close(s.stopCh) 134 | s.mu.Unlock() 135 | s.signal() 136 | s.wg.Wait() 137 | } 138 | 139 | func (s *Scheduler) signalLocked() { 140 | select { 141 | case s.wakeCh <- struct{}{}: 142 | default: 143 | } 144 | } 145 | 146 | func (s *Scheduler) signal() { 147 | signalC := s.wakeCh 148 | if signalC == nil { 149 | return 150 | } 151 | select { 152 | case signalC <- struct{}{}: 153 | default: 154 | } 155 | } 156 | 157 | func (s *Scheduler) runTask(fn Task) { 158 | defer func() { 159 | if r := recover(); r != nil { 160 | fmt.Printf("scheduler: task panic: %v\n", r) 161 | debug.PrintStack() 162 | } 163 | }() 164 | fn() 165 | } 166 | 167 | func (s *Scheduler) drainTimer(timer *time.Timer) { 168 | if timer != nil && !timer.Stop() { 169 | select { 170 | case <-timer.C: 171 | default: 172 | } 173 | } 174 | } 175 | 176 | func (s *Scheduler) run() { 177 | defer s.wg.Done() 178 | var timer *time.Timer 179 | for { 180 | s.mu.Lock() 181 | if len(s.tasks) == 0 { 182 | if s.stopped { 183 | s.mu.Unlock() 184 | return 185 | } 186 | s.mu.Unlock() 187 | select { 188 | case <-s.stopCh: 189 | return 190 | case <-s.wakeCh: 191 | } 192 | continue 193 | } 194 | next := s.tasks[0] 195 | if _, cancelled := s.cancel[next.id]; cancelled { 196 | heap.Pop(&s.tasks) 197 | delete(s.cancel, next.id) 198 | delete(s.active, next.id) 199 | s.mu.Unlock() 200 | continue 201 | } 202 | wait := time.Until(next.runAt) 203 | if wait <= 0 { 204 | heap.Pop(&s.tasks) 205 | delete(s.active, next.id) 206 | delete(s.cancel, next.id) 207 | s.mu.Unlock() 208 | go s.runTask(next.fn) 209 | continue 210 | } 211 | if timer == nil { 212 | timer = time.NewTimer(wait) 213 | } else { 214 | timer.Reset(wait) 215 | } 216 | s.mu.Unlock() 217 | select { 218 | case <-timer.C: 219 | case <-s.wakeCh: 220 | s.drainTimer(timer) 221 | case <-s.stopCh: 222 | s.drainTimer(timer) 223 | return 224 | } 225 | } 226 | } 227 | 228 | func init_scheduler() { 229 | DeclareTitle("Scheduler") 230 | Declare(&Globalenv, &Declaration{ 231 | "setTimeout", "Schedules a callback to run after the given delay in milliseconds (fractional values allowed for sub-millisecond precision).", 232 | 2, 1000, 233 | []DeclarationParameter{ 234 | {"callback", "func", "function to execute once the timeout expires"}, 235 | {"milliseconds", "number", "milliseconds until execution"}, 236 | {"args...", "any", "optional arguments forwarded to the callback"}, 237 | }, "int", 238 | setTimeout, false, 239 | }) 240 | Declare(&Globalenv, &Declaration{ 241 | "clearTimeout", "Cancels a timeout created with setTimeout.", 242 | 1, 1, 243 | []DeclarationParameter{ 244 | {"id", "number", "identifier returned by setTimeout"}, 245 | }, "bool", 246 | clearTimeout, false, 247 | }) 248 | } 249 | 250 | func setTimeout(a ...Scmer) Scmer { 251 | if len(a) < 2 { 252 | panic("setTimeout expects at least a callback and delay") 253 | } 254 | 255 | callback := a[0] 256 | millis := ToFloat(a[1]) 257 | if millis < 0 { 258 | millis = 0 259 | } 260 | 261 | duration := time.Duration(millis * float64(time.Millisecond)) 262 | callbackArgs := append([]Scmer(nil), a[2:]...) 263 | id, ok := DefaultScheduler.ScheduleAfter(duration, func() { 264 | NewContext(context.TODO(), func() { 265 | Apply(callback, callbackArgs...) 266 | }) 267 | }) 268 | if !ok { 269 | return false 270 | } 271 | return int64(id) 272 | } 273 | 274 | func clearTimeout(a ...Scmer) Scmer { 275 | if len(a) != 1 { 276 | panic("clearTimeout expects one argument") 277 | } 278 | id := uint64(ToInt(a[0])) 279 | return DefaultScheduler.Clear(id) 280 | } 281 | -------------------------------------------------------------------------------- /scm/streams.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package scm 18 | 19 | import "io" 20 | import "bufio" 21 | import "compress/gzip" 22 | import "github.com/ulikunitz/xz" 23 | 24 | func init_streams() { 25 | // string functions 26 | DeclareTitle("Streams") 27 | 28 | Declare(&Globalenv, &Declaration{ 29 | "streamString", "creates a stream that contains a string", 30 | 1, 1, 31 | []DeclarationParameter{ 32 | DeclarationParameter{"content", "string", "content to put into the stream"}, 33 | }, "stream", 34 | func(a ...Scmer) (result Scmer) { 35 | reader, writer := io.Pipe() 36 | go func() { 37 | io.WriteString(writer, String(a[0])) 38 | writer.Close() 39 | }() 40 | return io.Reader(reader) 41 | }, false, 42 | }) 43 | Declare(&Globalenv, &Declaration{ 44 | "gzip", "compresses a stream with gzip. Create streams with (stream filename)", 45 | 1, 1, 46 | []DeclarationParameter{ 47 | DeclarationParameter{"stream", "stream", "input stream"}, 48 | }, "stream", 49 | func(a ...Scmer) (result Scmer) { 50 | stream := a[0].(io.Reader) 51 | reader, writer := io.Pipe() 52 | bwriter := bufio.NewWriterSize(writer, 16*1024) 53 | zip := gzip.NewWriter(bwriter) 54 | go func() { 55 | io.Copy(zip, stream) 56 | zip.Close() 57 | bwriter.Flush() 58 | writer.Close() 59 | }() 60 | return (io.Reader)(reader) 61 | }, false, 62 | }) 63 | Declare(&Globalenv, &Declaration{ 64 | "xz", "compresses a stream with xz. Create streams with (stream filename)", 65 | 1, 1, 66 | []DeclarationParameter{ 67 | DeclarationParameter{"stream", "stream", "input stream"}, 68 | }, "stream", 69 | func(a ...Scmer) (result Scmer) { 70 | stream := a[0].(io.Reader) 71 | reader, writer := io.Pipe() 72 | bwriter := bufio.NewWriterSize(writer, 16*1024) 73 | zip, err := xz.NewWriter(bwriter) 74 | go func() { 75 | io.Copy(zip, stream) 76 | zip.Close() 77 | bwriter.Flush() 78 | writer.Close() 79 | }() 80 | if err != nil { 81 | panic(err) 82 | } 83 | return (io.Reader)(reader) 84 | }, false, 85 | }) 86 | Declare(&Globalenv, &Declaration{ 87 | "zcat", "turns a compressed gzip stream into a stream of uncompressed data. Create streams with (stream filename)", 88 | 1, 1, 89 | []DeclarationParameter{ 90 | DeclarationParameter{"stream", "stream", "input stream"}, 91 | }, "stream", 92 | func(a ...Scmer) (result Scmer) { 93 | stream := a[0].(io.Reader) 94 | result, err := gzip.NewReader(stream) 95 | if err != nil { 96 | panic(err) 97 | } 98 | return result 99 | }, false, 100 | }) 101 | Declare(&Globalenv, &Declaration{ 102 | "xzcat", "turns a compressed xz stream into a stream of uncompressed data. Create streams with (stream filename)", 103 | 1, 1, 104 | []DeclarationParameter{ 105 | DeclarationParameter{"stream", "stream", "input stream"}, 106 | }, "stream", 107 | func(a ...Scmer) (result Scmer) { 108 | stream := a[0].(io.Reader) 109 | result, err := xz.NewReader(stream) 110 | if err != nil { 111 | panic(err) 112 | } 113 | return result 114 | }, false, 115 | }) 116 | } 117 | -------------------------------------------------------------------------------- /scm/trace.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package scm 18 | 19 | import "io" 20 | import "os" 21 | import "fmt" 22 | import "sync" 23 | import "time" 24 | import "encoding/json" 25 | 26 | type Tracefile struct { 27 | isFirst bool 28 | file io.WriteCloser 29 | m sync.Mutex 30 | } 31 | 32 | var Trace *Tracefile // default trace: set to not nil if you want to trace 33 | var TracePrint bool // whether to print traces to stdout 34 | 35 | func SetTrace(on bool) { // sets Trace to nil or a value 36 | if Trace != nil { 37 | Trace.Close() 38 | Trace = nil 39 | } 40 | if on { 41 | // TODO: tracefolder 42 | f, err := os.Create(os.Getenv("MEMCP_TRACEDIR") + "trace_" + fmt.Sprint(time.Now().Unix()) + ".json") 43 | if err != nil { 44 | panic(err) 45 | } 46 | Trace = NewTrace(f) 47 | } 48 | } 49 | 50 | func NewTrace(file io.WriteCloser) *Tracefile { 51 | file.Write([]byte("[")) 52 | result := new(Tracefile) 53 | result.file = file 54 | result.isFirst = true 55 | return result 56 | } 57 | 58 | func (t *Tracefile) Close() { 59 | t.file.Write([]byte("]")) 60 | t.file.Close() 61 | } 62 | 63 | func (t *Tracefile) Duration(name string, cat string, f func()) { 64 | t.EventHalf(name, cat, "B", 0, 0) 65 | defer t.EventHalf(name, cat, "E", 0, 0) 66 | f() 67 | } 68 | 69 | func (t *Tracefile) Event(name string, cat string, typ string) { 70 | t.EventHalf(name, cat, typ, 0, 0) 71 | } 72 | 73 | func (t *Tracefile) EventHalf(name string, cat string, typ string, tid int, pid int) { 74 | ts := time.Since(start).Microseconds() 75 | t.EventFull(name, cat, typ, ts, tid, pid) 76 | } 77 | 78 | /* 79 | * 80 | 81 | @name string function 82 | @cat string comma separated categories (for filtering) 83 | @typ B/E for begin/end, X for events 84 | @ts timestamp in microseconds 85 | @pid process id 86 | @tid thread id 87 | @args ?? 88 | */ 89 | func (t *Tracefile) EventFull(name string, cat string, typ string, ts int64, tid int, pid int) { 90 | t.m.Lock() 91 | if t.isFirst { 92 | t.isFirst = false 93 | } else { 94 | t.file.Write([]byte(",\n")) 95 | } 96 | t.file.Write([]byte("{\"name\": ")) 97 | b, _ := json.Marshal(name) // name 98 | t.file.Write(b) 99 | t.file.Write([]byte(", \"cat\": ")) 100 | b, _ = json.Marshal(cat) // cat 101 | t.file.Write(b) 102 | t.file.Write([]byte(", \"ph\": \"")) 103 | t.file.Write([]byte(typ)) 104 | t.file.Write([]byte("\", \"ts\": ")) 105 | b, _ = json.Marshal(ts) // ts 106 | t.file.Write(b) 107 | t.file.Write([]byte(", \"pid\": ")) 108 | b, _ = json.Marshal(pid) // pid 109 | t.file.Write(b) 110 | t.file.Write([]byte(", \"tid\": ")) 111 | b, _ = json.Marshal(tid) // tid 112 | t.file.Write(b) 113 | t.file.Write([]byte(", \"s\": \"g\"}")) 114 | t.m.Unlock() 115 | } 116 | 117 | var start time.Time = time.Now() 118 | -------------------------------------------------------------------------------- /scm/vector.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2025 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package scm 18 | 19 | import "math" 20 | 21 | func init_vector() { 22 | // string functions 23 | DeclareTitle("Vectors") 24 | 25 | Declare(&Globalenv, &Declaration{ 26 | "dot", "produced the dot product", 27 | 2, 3, 28 | []DeclarationParameter{ 29 | DeclarationParameter{"v1", "list", "vector1"}, 30 | DeclarationParameter{"v2", "list", "vector2"}, 31 | DeclarationParameter{"mode", "string", "DOT, COSINE, EUCLIDEAN, default is DOT"}, 32 | }, "number", 33 | func(a ...Scmer) Scmer { 34 | var result float64 = 0 35 | v1 := a[0].([]Scmer) 36 | v2 := a[1].([]Scmer) 37 | if len(a) > 2 && a[2] == "COSINE" { 38 | // COSINE 39 | var lena float64 = 0 40 | var lenb float64 = 0 41 | for i := 0; i < len(v1) && i < len(v2); i++ { 42 | w1 := ToFloat(v1[i]) 43 | w2 := ToFloat(v2[i]) 44 | lena += w1 * w1 45 | lenb += w2 * w2 46 | result += w1 * w2 47 | } 48 | result = result / math.Sqrt(lena*lenb) 49 | } else { 50 | // DOT AND EUCLIDEAN 51 | for i := 0; i < len(v1) && i < len(v2); i++ { 52 | result += ToFloat(v1[i]) * ToFloat(v2[i]) 53 | } 54 | if len(a) > 2 && a[2] == "EUCLIDEAN" { 55 | result = math.Sqrt(result) 56 | } 57 | } 58 | return result 59 | }, true, 60 | }) 61 | } 62 | -------------------------------------------------------------------------------- /storage/analyzer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "sort" 20 | import "github.com/launix-de/memcp/scm" 21 | 22 | type columnboundaries struct { 23 | col string 24 | lower scm.Scmer 25 | lowerInclusive bool 26 | upper scm.Scmer 27 | upperInclusive bool 28 | } 29 | 30 | type boundaries []columnboundaries 31 | 32 | // analyzes a lambda expression for value boundaries, so the best index can be found 33 | func extractBoundaries(conditionCols []string, condition scm.Scmer) boundaries { 34 | p := condition.(scm.Proc) 35 | symbolmapping := make(map[scm.Symbol]string) 36 | for i, sym := range p.Params.([]scm.Scmer) { 37 | symbolmapping[sym.(scm.Symbol)] = conditionCols[i] 38 | } 39 | cols := make([]columnboundaries, 0, 4) 40 | addConstraint := func(in []columnboundaries, b2 columnboundaries) []columnboundaries { 41 | for i, b := range in { 42 | if b.col == b2.col { 43 | // column match -> merge value range 44 | if b.lower == nil || b2.lower != nil && scm.Less(b.lower, b2.lower) { 45 | // both values are ANDed, so take the higher value as lower bound 46 | in[i].lower = b2.lower 47 | } 48 | in[i].lowerInclusive = b.lowerInclusive || b2.lowerInclusive // TODO: check correctness 49 | if b.upper == nil || b2.upper != nil && scm.Less(b2.upper, b.upper) { 50 | // the lower of both upper values will be the new upper bound 51 | in[i].upper = b2.upper 52 | } 53 | in[i].upperInclusive = b.upperInclusive || b2.upperInclusive // TODO: check correctness 54 | return in 55 | } 56 | } 57 | // else: append 58 | return append(in, b2) 59 | } 60 | // analyze condition for AND clauses, equal? < > <= >= BETWEEN 61 | extractConstant := func(v scm.Scmer) (scm.Scmer, bool) { 62 | switch val := v.(type) { 63 | case int64, float64, string: 64 | // equals column vs. constant 65 | return val, true 66 | case scm.Symbol: 67 | if val2, ok := condition.(scm.Proc).En.Vars[val]; ok { 68 | switch val3 := val2.(type) { 69 | // bound constant 70 | case int64, float64, string: 71 | // equals column vs. constant 72 | return val3, true 73 | } 74 | } 75 | case []scm.Scmer: 76 | if val[0] == scm.Symbol("outer") { 77 | if sym, ok := val[1].(scm.Symbol); ok { 78 | if val2, ok := condition.(scm.Proc).En.Vars[sym]; ok { 79 | switch val3 := val2.(type) { 80 | // bound constant 81 | case int64, float64, string: 82 | // equals column vs. constant 83 | return val3, true 84 | } 85 | } 86 | } 87 | } 88 | } 89 | return nil, false 90 | } 91 | var traverseCondition func(scm.Scmer) 92 | traverseCondition = func(node scm.Scmer) { 93 | switch v := node.(type) { 94 | case []scm.Scmer: 95 | if v[0] == scm.Symbol("equal?") || v[0] == scm.Symbol("equal??") { 96 | // equi 97 | switch v1 := v[1].(type) { 98 | case scm.Symbol: 99 | if col, ok := symbolmapping[v1]; ok { // left is a column 100 | if v2, ok := extractConstant(v[2]); ok { // right is a constant 101 | // ?equal var const 102 | cols = addConstraint(cols, columnboundaries{col, v2, true, v2, true}) 103 | } 104 | } 105 | // TODO: equals constant vs. column 106 | } 107 | } else if v[0] == scm.Symbol("<") || v[0] == scm.Symbol("<=") { 108 | // compare 109 | switch v1 := v[1].(type) { 110 | case scm.Symbol: 111 | if col, ok := symbolmapping[v1]; ok { // left is a column 112 | if v2, ok := extractConstant(v[2]); ok { // right is a constant 113 | // ?equal var const 114 | cols = addConstraint(cols, columnboundaries{col, nil, false, v2, v[0] == scm.Symbol("<=")}) 115 | } 116 | } 117 | // TODO: constant vs. column 118 | } 119 | } else if v[0] == scm.Symbol(">") || v[0] == scm.Symbol(">=") { 120 | // compare 121 | switch v1 := v[1].(type) { 122 | case scm.Symbol: 123 | if col, ok := symbolmapping[v1]; ok { // left is a column 124 | if v2, ok := extractConstant(v[2]); ok { // right is a constant 125 | // ?equal var const 126 | cols = addConstraint(cols, columnboundaries{col, v2, v[0] == scm.Symbol(">="), nil, false}) 127 | } 128 | } 129 | // TODO: constant vs. column 130 | } 131 | } else if v[0] == scm.Symbol("and") { 132 | // AND -> recursive traverse 133 | for i := 1; i < len(v); i++ { 134 | traverseCondition(v[i]) 135 | } 136 | } 137 | // TODO: <, >, <=, >= 138 | // TODO: OR -> merge multiple 139 | // TODO: variable expressions that can be expanded 140 | } 141 | } 142 | traverseCondition(p.Body) // recursive analysis over condition 143 | 144 | // sort columns -> at first, the lower==upper alphabetically; then one lower!=upper according to best selectivity; discard the rest 145 | sort.Slice(cols, func(i, j int) bool { 146 | if cols[i].lower == cols[i].upper && cols[j].lower != cols[j].upper { 147 | return true // put equal?-conditions leftmost 148 | } 149 | return cols[i].col < cols[j].col // otherwise: alphabetically 150 | }) 151 | 152 | return cols 153 | } 154 | 155 | func indexFromBoundaries(cols boundaries) (lower []scm.Scmer, upperLast scm.Scmer) { 156 | if len(cols) > 0 { 157 | //fmt.Println("conditions:", cols) 158 | // build up lower and upper bounds of index 159 | for { 160 | if len(cols) >= 2 && cols[len(cols)-2].lower != cols[len(cols)-2].upper { 161 | // remove last col -> we cant have two ranged cols 162 | cols = cols[:len(cols)-1] 163 | } else { 164 | break // finished -> pure index 165 | } 166 | } 167 | // find out boundaries 168 | lower = make([]scm.Scmer, len(cols)) 169 | for i, v := range cols { 170 | lower[i] = v.lower 171 | } 172 | upperLast = cols[len(cols)-1].upper 173 | //fmt.Println(cols, lower, upperLast) // debug output if we found the right boundaries 174 | } 175 | return 176 | } 177 | -------------------------------------------------------------------------------- /storage/cache.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2025 MemCP Contributors 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import ( 20 | "sort" 21 | "time" 22 | ) 23 | 24 | type softItem struct { 25 | pointer any 26 | size int64 27 | priorityFactor int 28 | cleanup func(pointer any) 29 | getLastUsed func(pointer any) time.Time 30 | effectiveTime time.Time 31 | } 32 | 33 | // CacheManager manages memory-limited soft references. 34 | type CacheManager struct { 35 | memoryBudget int64 36 | currentMemory int64 37 | 38 | items []softItem 39 | indexMap map[any]int // pointer -> index in items slice 40 | 41 | opChan chan cacheOp 42 | } 43 | 44 | type cacheOp struct { 45 | add *softItem 46 | del any 47 | done chan struct{} 48 | } 49 | 50 | // NewCacheManager creates a new CacheManager with given memory budget. 51 | func NewCacheManager(memoryBudget int64) *CacheManager { 52 | cm := &CacheManager{ 53 | memoryBudget: memoryBudget, 54 | items: make([]softItem, 0), 55 | indexMap: make(map[any]int), 56 | opChan: make(chan cacheOp, 1024), 57 | } 58 | go cm.run() 59 | return cm 60 | } 61 | 62 | // AddItem inserts a new item into the cache. Cleanup is called if over budget. 63 | func (cm *CacheManager) AddItem( 64 | pointer any, 65 | size int64, 66 | priorityFactor int, 67 | cleanup func(pointer any), 68 | getLastUsed func(pointer any) time.Time, 69 | ) { 70 | item := &softItem{ 71 | pointer: pointer, 72 | size: size, 73 | priorityFactor: priorityFactor, 74 | cleanup: cleanup, 75 | getLastUsed: getLastUsed, 76 | effectiveTime: time.Now(), // always now for new items 77 | } 78 | done := make(chan struct{}) 79 | cm.opChan <- cacheOp{add: item, done: done} 80 | <-done 81 | } 82 | 83 | // Delete removes an item from the cache immediately. 84 | func (cm *CacheManager) Delete(pointer any) { 85 | done := make(chan struct{}) 86 | cm.opChan <- cacheOp{del: pointer, done: done} 87 | <-done 88 | } 89 | 90 | // run is the single-threaded goroutine handling all operations and cleanup. 91 | func (cm *CacheManager) run() { 92 | for op := range cm.opChan { 93 | if op.add != nil { 94 | cm.add(op.add) 95 | } else if op.del != nil { 96 | cm.delete(op.del) 97 | } 98 | if op.done != nil { 99 | close(op.done) 100 | } 101 | } 102 | } 103 | 104 | // add inserts a new softItem and triggers cleanup if over budget. 105 | func (cm *CacheManager) add(item *softItem) { 106 | idx := len(cm.items) 107 | cm.items = append(cm.items, *item) 108 | cm.indexMap[item.pointer] = idx 109 | cm.currentMemory += item.size 110 | 111 | if cm.currentMemory > cm.memoryBudget { 112 | cm.cleanup() 113 | } 114 | } 115 | 116 | // delete removes a softItem immediately. 117 | func (cm *CacheManager) delete(pointer any) { 118 | idx, ok := cm.indexMap[pointer] 119 | if !ok { 120 | return 121 | } 122 | item := cm.items[idx] 123 | item.cleanup(item.pointer) 124 | cm.currentMemory -= item.size 125 | 126 | lastIdx := len(cm.items) - 1 127 | if idx != lastIdx { 128 | cm.items[idx] = cm.items[lastIdx] 129 | cm.indexMap[cm.items[idx].pointer] = idx 130 | } 131 | cm.items = cm.items[:lastIdx] 132 | delete(cm.indexMap, pointer) 133 | } 134 | 135 | // cleanup frees memory to respect the memory budget (simple-stupid approach). 136 | func (cm *CacheManager) cleanup() { 137 | if cm.currentMemory <= cm.memoryBudget { 138 | return 139 | } 140 | 141 | targetMemory := cm.memoryBudget * 75 / 100 // free until 75% of budget 142 | 143 | // Step 1: recompute effectiveTime for all items 144 | for i := range cm.items { 145 | cm.items[i].effectiveTime = cm.items[i].getLastUsed(cm.items[i].pointer) 146 | } 147 | 148 | // Step 2: sort items by effectiveTime (oldest first) 149 | sort.Slice(cm.items, func(i, j int) bool { 150 | return cm.items[i].effectiveTime.Before(cm.items[j].effectiveTime) 151 | }) 152 | 153 | // Step 3: evict oldest items until memory is under target 154 | i := 0 155 | for cm.currentMemory > targetMemory && i < len(cm.items) { 156 | item := cm.items[i] 157 | item.cleanup(item.pointer) 158 | cm.currentMemory -= item.size 159 | delete(cm.indexMap, item.pointer) 160 | i++ 161 | } 162 | 163 | // Step 4: compact the slice 164 | cm.items = cm.items[i:] 165 | for idx, item := range cm.items { 166 | cm.indexMap[item.pointer] = idx 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /storage/compute.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "fmt" 20 | import "sync" 21 | import "runtime" 22 | import "runtime/debug" 23 | import "github.com/jtolds/gls" 24 | import "github.com/launix-de/memcp/scm" 25 | 26 | func (t *table) ComputeColumn(name string, inputCols []string, computor scm.Scmer) { 27 | for i, c := range t.Columns { 28 | if c.Name == name { 29 | // found the column 30 | t.Columns[i].Computor = computor // set formula so delta storages and rebuild algo know how to recompute 31 | done := make(chan error, 6) 32 | shardlist := t.Shards 33 | if shardlist == nil { 34 | shardlist = t.PShards 35 | } 36 | for i, s := range shardlist { 37 | gls.Go(func(i int, s *storageShard) func() { 38 | return func() { 39 | defer func() { 40 | if r := recover(); r != nil { 41 | //fmt.Println("panic during compute:", r, string(debug.Stack())) 42 | done <- scanError{r, string(debug.Stack())} 43 | } 44 | }() 45 | for !s.ComputeColumn(name, inputCols, computor, len(shardlist) == 1) { 46 | // couldn't compute column because delta is still active 47 | t.mu.Lock() 48 | s = s.rebuild(false) 49 | shardlist[i] = s 50 | t.mu.Unlock() 51 | // persist new shard UUID after publishing 52 | t.schema.save() 53 | } 54 | done <- nil 55 | } 56 | }(i, s)) 57 | } 58 | for range shardlist { 59 | err := <-done // collect finish signal before return 60 | if err != nil { 61 | panic(err) 62 | } 63 | } 64 | return 65 | } 66 | } 67 | panic("column " + t.Name + "." + name + " does not exist") 68 | } 69 | 70 | func (s *storageShard) ComputeColumn(name string, inputCols []string, computor scm.Scmer, parallel bool) bool { 71 | fmt.Println("start compute on", s.t.Name, "parallel", parallel) 72 | if s.deletions.Count() > 0 || len(s.inserts) > 0 { 73 | return false // can't compute in shards with delta storage 74 | } 75 | // We are going to mutate this shard's columns: mark shard as WRITE (not COLD) 76 | s.srState = WRITE 77 | // Ensure main_count and input storages are initialized before compute 78 | s.ensureMainCount(false) 79 | cols := make([]ColumnStorage, len(inputCols)) 80 | for i, col := range inputCols { 81 | cols[i] = s.getColumnStorageOrPanic(col) 82 | } 83 | vals := make([]scm.Scmer, s.main_count) // build the stretchy value array 84 | if parallel { 85 | var done sync.WaitGroup 86 | done.Add(int(s.main_count)) 87 | progress := make(chan uint, runtime.NumCPU()/2) // don't go all at once, we don't have enough RAM 88 | for i := 0; i < runtime.NumCPU()/2; i++ { 89 | gls.Go(func() { // threadpool with half of the cores 90 | // allocate a private parameter buffer per worker to avoid data races 91 | colvalues := make([]scm.Scmer, len(cols)) 92 | for i := range progress { 93 | for j, col := range cols { 94 | colvalues[j] = col.GetValue(i) // read values from main storage into lambda params 95 | } 96 | vals[i] = scm.Apply(computor, colvalues...) // execute computor kernel (but the onoptimized version for non-serial use) 97 | done.Done() 98 | } 99 | }) 100 | } 101 | // add all items to the queue 102 | for i := uint(0); i < s.main_count; i++ { 103 | progress <- i 104 | } 105 | close(progress) // signal workers to exit 106 | done.Wait() 107 | } else { 108 | // allocate a common param buffer to save allocations 109 | colvalues := make([]scm.Scmer, len(cols)) 110 | fn := scm.OptimizeProcToSerialFunction(computor) // optimize for serial application 111 | for i := uint(0); i < s.main_count; i++ { 112 | for j, col := range cols { 113 | colvalues[j] = col.GetValue(i) // read values from main storage into lambda params 114 | } 115 | vals[i] = fn(colvalues...) // execute computor kernel 116 | } 117 | } 118 | 119 | s.mu.Lock() // don't defer because we unlock inbetween 120 | store := new(StorageSCMER) 121 | store.values = vals 122 | s.columns[name] = store 123 | s.mu.Unlock() 124 | // TODO: decide whether to rebuild optimized store 125 | return true 126 | } 127 | -------------------------------------------------------------------------------- /storage/csv.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "bufio" 21 | import "strings" 22 | import "github.com/launix-de/memcp/scm" 23 | 24 | func LoadCSV(schema, table string, f io.Reader, delimiter string, firstLine bool) { 25 | scanner := bufio.NewScanner(f) 26 | scanner.Split(bufio.ScanLines) 27 | 28 | lines := make(chan string, 512) 29 | 30 | go func() { 31 | for scanner.Scan() { 32 | lines <- scanner.Text() 33 | } 34 | close(lines) 35 | }() 36 | 37 | db := GetDatabase(schema) 38 | if db == nil { 39 | panic("database " + schema + " does not exist") 40 | } 41 | t := db.GetTable(table) 42 | if t == nil { 43 | panic("table " + table + " does not exist") 44 | } 45 | var cols []string 46 | if firstLine { 47 | if !scanner.Scan() { 48 | panic("CSV does not contain header line") 49 | } 50 | cols = strings.Split(scanner.Text(), delimiter) // read headerline 51 | } else { 52 | // otherwise use the table's column order 53 | cols = make([]string, len(t.Columns)) 54 | for i, col := range t.Columns { 55 | cols[i] = col.Name 56 | } 57 | } 58 | buffer := make([][]scm.Scmer, 0, 4096) 59 | for s := range lines { 60 | if s == "" { 61 | // ignore 62 | } else { 63 | arr := strings.Split(s, delimiter) 64 | x := make([]scm.Scmer, len(t.Columns)) 65 | for i, _ := range t.Columns { 66 | if i < len(arr) { 67 | x[i] = scm.Simplify(arr[i]) 68 | } 69 | } 70 | buffer = append(buffer, x) 71 | if len(buffer) >= 4096 { 72 | t.Insert(cols, buffer, nil, nil, false, nil) 73 | buffer = buffer[:0] 74 | } 75 | } 76 | } 77 | if len(buffer) > 0 { 78 | t.Insert(cols, buffer, nil, nil, false, nil) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /storage/json.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | /* 20 | 21 | JSON storage on disk for persistence: 22 | - each node has its own data folder 23 | - each db/table.jsonl is a jsonl file 24 | - the first line is #table so it can be read by a simple .jsonl 25 | - a line can also say #delete 26 | - a line can also say #update json 27 | - on rewrite, db/_table.jsonl is rebuild and replaced (maybe once a week) 28 | 29 | */ 30 | 31 | import "io" 32 | import "bufio" 33 | import "encoding/json" 34 | import "github.com/launix-de/memcp/scm" 35 | 36 | func LoadJSON(schema string, f io.Reader) { 37 | scanner := bufio.NewScanner(f) 38 | scanner.Split(bufio.ScanLines) 39 | 40 | lines := make(chan string, 512) 41 | 42 | go func() { 43 | for scanner.Scan() { 44 | lines <- scanner.Text() 45 | } 46 | close(lines) 47 | }() 48 | 49 | var t *table 50 | for s := range lines { 51 | if s == "" { 52 | // ignore 53 | } else if s[0:7] == "#table " { 54 | // new table (or find the existing one) 55 | t, _ = CreateTable(schema, s[7:], Safe, true) 56 | } else if s[0] == '#' { 57 | // comment 58 | } else { 59 | if t == nil { 60 | panic("no table set") 61 | } else { 62 | if len(t.Columns) == 0 { 63 | // JSON with an unknown table format -> create dummy cols 64 | var x map[string]scm.Scmer 65 | json.Unmarshal([]byte(s), &x) // parse JSON 66 | for c, _ := range x { 67 | // create column with dummy storage for next rebuild 68 | t.CreateColumn(c, "ANY", []int{}, []scm.Scmer{"comment", "json import"}) 69 | } 70 | } 71 | func(t *table, s string) { 72 | var y map[string]scm.Scmer 73 | json.Unmarshal([]byte(s), &y) // parse JSON 74 | cols := make([]string, len(y)) 75 | x := make([]scm.Scmer, len(y)) 76 | i := 0 77 | for k, v := range y { 78 | cols[i] = k 79 | x[i] = v 80 | i++ 81 | } 82 | t.Insert(cols, [][]scm.Scmer{x}, nil, nil, false, nil) // put into table 83 | }(t, s) 84 | } 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /storage/limits.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2025 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "runtime" 20 | 21 | // global semaphore to limit concurrent disk-backed load operations 22 | var loadSemaphore chan struct{} 23 | 24 | func init() { 25 | workers := runtime.NumCPU() 26 | if workers < 1 { 27 | workers = 1 28 | } 29 | loadSemaphore = make(chan struct{}, workers) 30 | // prefill with tokens 31 | for i := 0; i < workers; i++ { 32 | loadSemaphore <- struct{}{} 33 | } 34 | } 35 | 36 | // acquireLoadSlot blocks until a load slot is available and returns a release func. 37 | func acquireLoadSlot() func() { 38 | <-loadSemaphore 39 | return func() { loadSemaphore <- struct{}{} } 40 | } 41 | -------------------------------------------------------------------------------- /storage/overlay-blob.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "fmt" 21 | import "unsafe" 22 | import "reflect" 23 | import "strings" 24 | import "compress/gzip" 25 | import "crypto/sha256" 26 | import "encoding/binary" 27 | import "github.com/launix-de/memcp/scm" 28 | 29 | type OverlayBlob struct { 30 | // every overlay has a base 31 | Base ColumnStorage 32 | // values 33 | values map[[32]byte]string // gzipped contents content addressable 34 | size uint 35 | } 36 | 37 | func (s *OverlayBlob) ComputeSize() uint { 38 | var sz uint = 48 + 48*uint(len(s.values)) + s.size + s.Base.ComputeSize() 39 | for _, v := range s.values { 40 | sz += 24 + 16 + (uint(len(v)-1)/8+1)*8 + 32 // some overhead + content 41 | } 42 | return sz 43 | } 44 | 45 | func (s *OverlayBlob) String() string { 46 | return fmt.Sprintf("overlay[%dx zip-blob %d]+%s", len(s.values), s.size, s.Base.String()) 47 | } 48 | 49 | func (s *OverlayBlob) Serialize(f io.Writer) { 50 | binary.Write(f, binary.LittleEndian, uint8(31)) // 31 = OverlayBlob 51 | io.WriteString(f, "1234567") // dummy 52 | var size uint64 = uint64(len(s.values)) 53 | binary.Write(f, binary.LittleEndian, size) // write number of overlay items 54 | for k, v := range s.values { 55 | f.Write(k[:]) 56 | binary.Write(f, binary.LittleEndian, uint64(len(v))) // write length 57 | io.WriteString(f, v) // write content 58 | } 59 | s.Base.Serialize(f) // serialize base 60 | } 61 | 62 | func (s *OverlayBlob) Deserialize(f io.Reader) uint { 63 | var dummy [7]byte 64 | f.Read(dummy[:]) // read padding 65 | 66 | var size uint64 67 | binary.Read(f, binary.LittleEndian, &size) // read size 68 | s.values = make(map[[32]byte]string) 69 | 70 | for i := uint64(0); i < size; i++ { 71 | var key [32]byte 72 | f.Read(key[:]) 73 | var l uint64 74 | binary.Read(f, binary.LittleEndian, &l) 75 | value := make([]byte, l) 76 | f.Read(value) 77 | s.size += uint(l) // statistics 78 | s.values[key] = string(value) 79 | } 80 | var basetype uint8 81 | f.Read(unsafe.Slice(&basetype, 1)) 82 | s.Base = reflect.New(storages[basetype]).Interface().(ColumnStorage) 83 | l := s.Base.Deserialize(f) // read base 84 | return l 85 | } 86 | 87 | func (s *OverlayBlob) GetValue(i uint) scm.Scmer { 88 | v := s.Base.GetValue(i) 89 | switch v_ := v.(type) { 90 | case string: 91 | if v_ != "" && v_[0] == '!' { 92 | if v_[1] == '!' { 93 | return v_[1:] // escaped string 94 | } else { 95 | // unpack from storage 96 | if v, ok := s.values[*(*[32]byte)(unsafe.Pointer(unsafe.StringData(v_[1:])))]; ok { 97 | var b strings.Builder 98 | reader, err := gzip.NewReader(strings.NewReader(v)) 99 | if err != nil { 100 | panic(err) 101 | } 102 | io.Copy(&b, reader) 103 | reader.Close() 104 | return b.String() 105 | } 106 | return nil // value was lost (this should not happen) 107 | } 108 | } else { 109 | return v 110 | } 111 | default: 112 | return v 113 | } 114 | } 115 | 116 | func (s *OverlayBlob) prepare() { 117 | // set up scan 118 | s.Base.prepare() 119 | } 120 | func (s *OverlayBlob) scan(i uint, value scm.Scmer) { 121 | switch v_ := value.(type) { 122 | case scm.LazyString: 123 | if v_.Hash != "" { 124 | s.Base.scan(i, "!"+v_.Hash) 125 | } else { 126 | s.Base.scan(i, v_.GetValue()) 127 | } 128 | case string: 129 | if len(v_) > 255 { 130 | h := sha256.New() 131 | io.WriteString(h, v_) 132 | s.Base.scan(i, fmt.Sprintf("!%s", h.Sum(nil))) 133 | } else { 134 | if v_ != "" && v_[0] == '!' { 135 | s.Base.scan(i, "!"+v_) // escape strings that start with ! 136 | } else { 137 | s.Base.scan(i, value) 138 | } 139 | } 140 | default: 141 | s.Base.scan(i, value) 142 | } 143 | } 144 | func (s *OverlayBlob) init(i uint) { 145 | s.values = make(map[[32]byte]string) 146 | s.size = 0 147 | s.Base.init(i) 148 | } 149 | func (s *OverlayBlob) build(i uint, value scm.Scmer) { 150 | switch v_ := value.(type) { 151 | case string: 152 | if len(v_) > 255 { 153 | h := sha256.New() 154 | io.WriteString(h, v_) 155 | hashsum := h.Sum(nil) 156 | s.Base.build(i, fmt.Sprintf("!%s", hashsum)) 157 | var b strings.Builder 158 | z := gzip.NewWriter(&b) 159 | io.Copy(z, strings.NewReader(v_)) 160 | z.Close() 161 | s.size += uint(b.Len()) 162 | s.values[*(*[32]byte)(unsafe.Pointer(&hashsum[0]))] = b.String() 163 | } else { 164 | if v_ != "" && v_[0] == '!' { 165 | s.Base.build(i, "!"+v_) // escape strings that start with ! 166 | } else { 167 | s.Base.build(i, value) 168 | } 169 | } 170 | default: 171 | s.Base.build(i, value) 172 | } 173 | } 174 | func (s *OverlayBlob) finish() { 175 | s.Base.finish() 176 | } 177 | func (s *OverlayBlob) proposeCompression(i uint) ColumnStorage { 178 | // dont't propose another pass 179 | return nil 180 | } 181 | -------------------------------------------------------------------------------- /storage/persistence-files.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "os" 21 | import "fmt" 22 | import "bufio" 23 | import "bytes" 24 | import "strings" 25 | import "crypto/sha256" 26 | import "encoding/json" 27 | import "github.com/launix-de/memcp/scm" 28 | 29 | type FileStorage struct { 30 | path string 31 | } 32 | 33 | type FileFactory struct { 34 | Basepath string 35 | } 36 | 37 | // helper for long column names 38 | func ProcessColumnName(col string) string { 39 | if len(col) < 64 { 40 | return col 41 | } else { 42 | hashsum := sha256.Sum256([]byte(col)) 43 | return fmt.Sprintf("%x", hashsum[:8]) 44 | } 45 | } 46 | 47 | func (f *FileFactory) CreateDatabase(schema string) PersistenceEngine { 48 | return &FileStorage{f.Basepath + "/" + schema + "/"} 49 | } 50 | 51 | func (f *FileStorage) ReadSchema() []byte { 52 | jsonbytes, _ := os.ReadFile(f.path + "schema.json") 53 | if len(jsonbytes) == 0 { 54 | // try to load backup (in case of failure while save) 55 | jsonbytes, _ = os.ReadFile(f.path + "schema.json.old") 56 | } 57 | return jsonbytes 58 | } 59 | 60 | func (s *FileStorage) WriteSchema(jsonbytes []byte) { 61 | os.MkdirAll(s.path, 0750) 62 | if stat, err := os.Stat(s.path + "schema.json"); err == nil && stat.Size() > 0 { 63 | // rescue a copy of schema.json in case the schema is not serializable 64 | os.Rename(s.path+"schema.json", s.path+"schema.json.old") 65 | } 66 | f, err := os.Create(s.path + "schema.json") 67 | if err != nil { 68 | panic(err) 69 | } 70 | defer f.Close() 71 | f.Write(jsonbytes) 72 | } 73 | 74 | func (s *FileStorage) ReadColumn(shard string, column string) io.ReadCloser { 75 | //f, err := os.C 76 | f, err := os.Open(s.path + shard + "-" + ProcessColumnName(column)) 77 | if err != nil { 78 | // file does not exist -> no data available 79 | return ErrorReader{err} 80 | } 81 | return f 82 | } 83 | 84 | func (s *FileStorage) WriteColumn(shard string, column string) io.WriteCloser { 85 | os.MkdirAll(s.path, 0750) 86 | f, err := os.Create(s.path + shard + "-" + ProcessColumnName(column)) 87 | if err != nil { 88 | panic(err) 89 | } 90 | return f 91 | } 92 | 93 | func (s *FileStorage) RemoveColumn(shard string, column string) { 94 | os.Remove(s.path + shard + "-" + ProcessColumnName(column)) 95 | } 96 | 97 | func (s *FileStorage) OpenLog(shard string) PersistenceLogfile { 98 | os.MkdirAll(s.path, 0750) 99 | f, err := os.OpenFile(s.path+shard+".log", os.O_RDWR|os.O_CREATE, 0750) 100 | if err != nil { 101 | panic(err) 102 | } 103 | return FileLogfile{f} 104 | } 105 | 106 | func (s *FileStorage) ReplayLog(shard string) (chan interface{}, PersistenceLogfile) { 107 | os.MkdirAll(s.path, 0750) 108 | f, err := os.OpenFile(s.path+shard+".log", os.O_RDWR|os.O_CREATE, 0750) 109 | if err != nil { 110 | panic(err) 111 | } 112 | replay := make(chan interface{}, 8) 113 | fi, _ := f.Stat() 114 | if fi.Size() > 0 { 115 | scanner := bufio.NewScanner(f) 116 | for scanner.Scan() { 117 | b := scanner.Bytes() 118 | if string(b) == "" { 119 | // nop 120 | } else if string(b[0:7]) == "delete " { 121 | var idx uint 122 | json.Unmarshal(b[7:], &idx) 123 | replay <- LogEntryDelete{idx} 124 | } else if string(b[0:7]) == "insert " { 125 | body := string(b[7:]) 126 | if pos := strings.Index(body, "]["); pos >= 0 { 127 | // new format: columns ][ values 128 | var cols []string 129 | var values [][]scm.Scmer 130 | json.Unmarshal([]byte(body[:pos+1]), &cols) 131 | json.Unmarshal([]byte(body[pos+1:]), &values) 132 | for i := 0; i < len(values); i++ { 133 | for j := 0; j < len(values[i]); j++ { 134 | values[i][j] = scm.TransformFromJSON(values[i][j]) 135 | } 136 | } 137 | replay <- LogEntryInsert{cols, values} 138 | } else { 139 | // fallback/old format: flat array of alternating key/value pairs -> single row 140 | var flat []interface{} 141 | if err := json.Unmarshal([]byte(body), &flat); err != nil { 142 | panic("unknown log sequence: " + string(b)) 143 | } 144 | if len(flat)%2 != 0 { 145 | panic("corrupt insert log (odd items): " + string(b)) 146 | } 147 | cols := make([]string, 0, len(flat)/2) 148 | row := make([]scm.Scmer, 0, len(flat)/2) 149 | for i := 0; i < len(flat); i += 2 { 150 | cols = append(cols, flat[i].(string)) 151 | row = append(row, scm.TransformFromJSON(flat[i+1])) 152 | } 153 | replay <- LogEntryInsert{cols, [][]scm.Scmer{row}} 154 | } 155 | } else { 156 | panic("unknown log sequence: " + string(b)) 157 | } 158 | } 159 | close(replay) 160 | } else { 161 | close(replay) 162 | } 163 | return replay, FileLogfile{f} 164 | } 165 | 166 | func (s *FileStorage) RemoveLog(shard string) { 167 | os.Remove(s.path + shard + ".log") 168 | } 169 | 170 | type FileLogfile struct { 171 | w *os.File 172 | } 173 | 174 | func (w FileLogfile) Write(logentry interface{}) { 175 | switch l := logentry.(type) { 176 | case LogEntryDelete: 177 | var b bytes.Buffer 178 | b.WriteString("delete ") 179 | tmp, _ := json.Marshal(l.idx) 180 | b.Write(tmp) 181 | b.WriteString("\n") 182 | w.w.Write(b.Bytes()) 183 | case LogEntryInsert: 184 | var b bytes.Buffer 185 | b.WriteString("insert ") 186 | tmp, _ := json.Marshal(l.cols) 187 | b.Write(tmp) 188 | tmp, _ = json.Marshal(l.values) 189 | b.Write(tmp) 190 | b.WriteString("\n") 191 | w.w.Write(b.Bytes()) 192 | } 193 | } 194 | func (w FileLogfile) Sync() { 195 | w.w.Sync() 196 | } 197 | func (w FileLogfile) Close() { 198 | w.w.Close() 199 | } 200 | 201 | func (s *FileStorage) Remove() { 202 | os.RemoveAll(s.path) 203 | } 204 | -------------------------------------------------------------------------------- /storage/persistence.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "github.com/launix-de/memcp/scm" 21 | 22 | /* 23 | 24 | persistence interface 25 | 26 | MemCP allows multiple persistence interfaces for storage devices: 27 | - file system: in data/[dbname] 28 | - all other: in data/[dbname.json] 29 | 30 | A storage interface must implement the following operations: 31 | - load schema.json 32 | - load a column 33 | - load all log entries 34 | - remove a shard (columns and log) 35 | - persist schema.json 36 | - persist a column (shard id, column name) 37 | - persist a log entry 38 | 39 | */ 40 | 41 | type PersistenceEngine interface { 42 | ReadSchema() []byte 43 | WriteSchema(schema []byte) 44 | ReadColumn(shard string, column string) io.ReadCloser 45 | WriteColumn(shard string, column string) io.WriteCloser 46 | RemoveColumn(shard string, column string) 47 | OpenLog(shard string) PersistenceLogfile // open for writing 48 | ReplayLog(shard string) (chan interface{}, PersistenceLogfile) // replay existing log 49 | RemoveLog(shard string) 50 | Remove() // delete from storage 51 | } 52 | 53 | type PersistenceLogfile interface { 54 | Write(logentry interface{}) 55 | Sync() 56 | Close() 57 | } 58 | type LogEntryDelete struct { 59 | idx uint 60 | } 61 | type LogEntryInsert struct { 62 | cols []string 63 | values [][]scm.Scmer 64 | } 65 | 66 | // for CREATE TABLE 67 | type PersistenceFactory interface { 68 | CreateDatabase(schema string) PersistenceEngine 69 | } 70 | 71 | // Helper function to move databases between storages 72 | func MoveDatabase(src PersistenceEngine, dst PersistenceEngine) { 73 | // TODO: read schema.json 74 | // TODO: for each shard: read columns, read log, transfer to dst 75 | } 76 | 77 | // ErrorReader implements io.ReadCloser 78 | type ErrorReader struct { 79 | e error 80 | } 81 | 82 | func (e ErrorReader) Read([]byte) (int, error) { 83 | // reflects the error (e.g. file not found) 84 | return 0, e.e 85 | } 86 | func (e ErrorReader) Close() error { 87 | // closes without problem 88 | return nil 89 | } 90 | -------------------------------------------------------------------------------- /storage/settings.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2024 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "github.com/dc0d/onexit" 20 | import "github.com/launix-de/memcp/scm" 21 | 22 | type SettingsT struct { 23 | Backtrace bool 24 | Trace bool 25 | TracePrint bool 26 | PartitionMaxDimensions int 27 | DefaultEngine string 28 | ShardSize uint 29 | AnalyzeMinItems int 30 | AIEstimator bool 31 | } 32 | 33 | var Settings SettingsT = SettingsT{false, false, false, 10, "safe", 60000, 50, false} 34 | 35 | // call this after you filled Settings 36 | func InitSettings() { 37 | scm.SettingsHaveGoodBacktraces = Settings.Backtrace 38 | scm.SetTrace(Settings.Trace) 39 | scm.TracePrint = Settings.TracePrint 40 | onexit.Register(func() { scm.SetTrace(false) }) // close trace file on exit 41 | } 42 | 43 | func ChangeSettings(a ...scm.Scmer) scm.Scmer { 44 | // schema, filename 45 | if len(a) == 0 { 46 | return []scm.Scmer{ 47 | "Backtrace", Settings.Backtrace, 48 | "Trace", Settings.Trace, 49 | "TracePrint", Settings.TracePrint, 50 | "PartitionMaxDimensions", int64(Settings.PartitionMaxDimensions), 51 | "DefaultEngine", Settings.DefaultEngine, 52 | "ShardSize", int64(Settings.ShardSize), 53 | "AnalyzeMinItems", int64(Settings.AnalyzeMinItems), 54 | "AIEstimator", Settings.AIEstimator, 55 | } 56 | } else if len(a) == 1 { 57 | switch scm.String(a[0]) { 58 | case "Backtrace": 59 | return Settings.Backtrace 60 | case "Trace": 61 | return Settings.Trace 62 | case "TracePrint": 63 | return Settings.TracePrint 64 | case "PartitionMaxDimensions": 65 | return int64(Settings.PartitionMaxDimensions) 66 | case "DefaultEngine": 67 | return Settings.DefaultEngine 68 | case "ShardSize": 69 | return int64(Settings.ShardSize) 70 | case "AnalyzeMinItems": 71 | return int64(Settings.AnalyzeMinItems) 72 | case "AIEstimator": 73 | return Settings.AIEstimator 74 | default: 75 | panic("unknown setting: " + scm.String(a[0])) 76 | } 77 | } else { 78 | switch scm.String(a[0]) { 79 | case "Backtrace": 80 | scm.SettingsHaveGoodBacktraces = Settings.Backtrace 81 | Settings.Backtrace = scm.ToBool(a[1]) 82 | case "Trace": 83 | Settings.Trace = scm.ToBool(a[1]) 84 | scm.SetTrace(Settings.Trace) 85 | case "TracePrint": 86 | Settings.TracePrint = scm.ToBool(a[1]) 87 | scm.TracePrint = Settings.TracePrint 88 | case "PartitionMaxDimensions": 89 | Settings.PartitionMaxDimensions = scm.ToInt(a[1]) 90 | case "DefaultEngine": 91 | Settings.DefaultEngine = scm.String(a[1]) 92 | case "ShardSize": 93 | Settings.ShardSize = uint(scm.ToInt(a[1])) 94 | case "AnalyzeMinItems": 95 | Settings.AnalyzeMinItems = scm.ToInt(a[1]) 96 | case "AIEstimator": 97 | prev := Settings.AIEstimator 98 | Settings.AIEstimator = scm.ToBool(a[1]) 99 | if prev != Settings.AIEstimator { 100 | // start/stop estimator on change 101 | if Settings.AIEstimator { 102 | StartGlobalEstimator() 103 | } else { 104 | StopGlobalEstimator() 105 | } 106 | } else if Settings.AIEstimator { 107 | // Setting already true; if estimator not running, try to (re)start 108 | globalEstimatorMu.Lock() 109 | est := globalEstimator 110 | globalEstimatorMu.Unlock() 111 | if est == nil { 112 | StartGlobalEstimator() 113 | } 114 | } 115 | default: 116 | panic("unknown setting: " + scm.String(a[0])) 117 | } 118 | return true 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /storage/shared_resource.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2025 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | // Shared resource state used for lazy loaded objects. 20 | // COLD: not loaded yet; SHARED: loaded for read; WRITE: loaded and exclusively writable. 21 | type SharedState uint8 22 | 23 | const ( 24 | COLD SharedState = 0 25 | SHARED SharedState = 1 26 | WRITE SharedState = 2 27 | ) 28 | 29 | // SharedResource marks a lazily loaded resource controllable by a process monitor. 30 | // In the current single-process implementation, these methods primarily coordinate 31 | // lazy load/unload. The returned release() functions are placeholders and can 32 | // evolve into reference counting once a multi-node monitor is added. 33 | type SharedResource interface { 34 | GetState() SharedState 35 | GetRead() func() // acquire read access; returns release() 36 | GetExclusive() func() // acquire exclusive access; returns release() 37 | } 38 | -------------------------------------------------------------------------------- /storage/storage-float.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "math" 21 | import "unsafe" 22 | import "encoding/binary" 23 | import "github.com/launix-de/memcp/scm" 24 | 25 | // main type for storage: can store any value, is inefficient but does type analysis how to optimize 26 | type StorageFloat struct { 27 | values []float64 28 | } 29 | 30 | func (s *StorageFloat) ComputeSize() uint { 31 | return 16 + 8*uint(len(s.values)) + 24 /* a slice */ 32 | } 33 | 34 | func (s *StorageFloat) String() string { 35 | return "float64" 36 | } 37 | 38 | func (s *StorageFloat) Serialize(f io.Writer) { 39 | binary.Write(f, binary.LittleEndian, uint8(12)) // 12 = StorageFloat 40 | io.WriteString(f, "1234567") // fill up to 64 bit alignment 41 | binary.Write(f, binary.LittleEndian, uint64(len(s.values))) 42 | // now at offset 16 begin data 43 | rawdata := unsafe.Slice((*byte)(unsafe.Pointer(&s.values[0])), 8*len(s.values)) 44 | f.Write(rawdata) 45 | // free allocated memory and mmap 46 | /* TODO: runtime.SetFinalizer(s, func(s *StorageSCMER) {f.Close()}) 47 | newrawdata = mmap.Map(f, RDWR, 0) 48 | s.values = unsafe.Slice((*float64)&newrawdata[16], len(s.values)) 49 | */ 50 | } 51 | func (s *StorageFloat) Deserialize(f io.Reader) uint { 52 | var dummy [7]byte 53 | f.Read(dummy[:]) 54 | var l uint64 55 | binary.Read(f, binary.LittleEndian, &l) 56 | /* TODO: runtime.SetFinalizer(s, func(s *StorageSCMER) { f.Close() }) 57 | rawdata := mmap.Map(f, RDWR, 0) 58 | */ 59 | rawdata := make([]byte, 8*l) 60 | f.Read(rawdata) 61 | s.values = unsafe.Slice((*float64)(unsafe.Pointer(&rawdata[0])), l) 62 | return uint(l) 63 | } 64 | 65 | func (s *StorageFloat) GetValue(i uint) scm.Scmer { 66 | // NULL is encoded as NaN in SQL 67 | if math.IsNaN(s.values[i]) { 68 | return nil 69 | } else { 70 | return s.values[i] 71 | } 72 | } 73 | 74 | func (s *StorageFloat) scan(i uint, value scm.Scmer) { 75 | } 76 | func (s *StorageFloat) prepare() { 77 | } 78 | func (s *StorageFloat) init(i uint) { 79 | // allocate 80 | s.values = make([]float64, i) 81 | } 82 | func (s *StorageFloat) build(i uint, value scm.Scmer) { 83 | // store 84 | if value == nil { 85 | s.values[i] = math.NaN() 86 | } else { 87 | s.values[i] = value.(float64) 88 | } 89 | } 90 | func (s *StorageFloat) finish() { 91 | } 92 | 93 | func (s *StorageFloat) proposeCompression(i uint) ColumnStorage { 94 | // dont't propose another pass 95 | return nil 96 | } 97 | -------------------------------------------------------------------------------- /storage/storage-int.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "fmt" 21 | import "unsafe" 22 | import "math/bits" 23 | import "encoding/binary" 24 | import "github.com/launix-de/memcp/scm" 25 | 26 | type StorageInt struct { 27 | chunk []uint64 28 | bitsize uint8 29 | offset int64 30 | max int64 // only of statistic use 31 | count uint64 // only stored for serialization purposes 32 | hasNull bool 33 | null uint64 // which value is null 34 | } 35 | 36 | func (s *StorageInt) Serialize(f io.Writer) { 37 | var hasNull uint8 38 | if s.hasNull { 39 | hasNull = 1 40 | } 41 | binary.Write(f, binary.LittleEndian, uint8(10)) // 10 = StorageInt 42 | binary.Write(f, binary.LittleEndian, uint8(s.bitsize)) // len=2 43 | binary.Write(f, binary.LittleEndian, uint8(hasNull)) // len=3 44 | binary.Write(f, binary.LittleEndian, uint8(0)) // len=4 45 | binary.Write(f, binary.LittleEndian, uint32(0)) // len=8 46 | binary.Write(f, binary.LittleEndian, uint64(len(s.chunk))) // chunk size so we know how many data is left 47 | binary.Write(f, binary.LittleEndian, uint64(s.count)) 48 | binary.Write(f, binary.LittleEndian, uint64(s.offset)) 49 | binary.Write(f, binary.LittleEndian, uint64(s.null)) 50 | if len(s.chunk) > 0 { 51 | f.Write(unsafe.Slice((*byte)(unsafe.Pointer(&s.chunk[0])), 8*len(s.chunk))) 52 | } 53 | } 54 | func (s *StorageInt) Deserialize(f io.Reader) uint { 55 | return s.DeserializeEx(f, false) 56 | } 57 | 58 | func (s *StorageInt) DeserializeEx(f io.Reader, readMagicbyte bool) uint { 59 | var dummy8 uint8 60 | var dummy32 uint32 61 | if readMagicbyte { 62 | binary.Read(f, binary.LittleEndian, &dummy8) 63 | if dummy8 != 10 { 64 | panic(fmt.Sprintf("Tried to deserialize StorageInt(10) from file but found %d", dummy8)) 65 | } 66 | } 67 | binary.Read(f, binary.LittleEndian, &s.bitsize) 68 | var hasNull uint8 69 | binary.Read(f, binary.LittleEndian, &hasNull) 70 | s.hasNull = hasNull != 0 71 | binary.Read(f, binary.LittleEndian, &dummy8) 72 | binary.Read(f, binary.LittleEndian, &dummy32) 73 | var chunkcount uint64 74 | binary.Read(f, binary.LittleEndian, &chunkcount) 75 | binary.Read(f, binary.LittleEndian, &s.count) 76 | binary.Read(f, binary.LittleEndian, &s.offset) 77 | binary.Read(f, binary.LittleEndian, &s.null) 78 | if chunkcount > 0 { 79 | rawdata := make([]byte, chunkcount*8) 80 | f.Read(rawdata) 81 | s.chunk = unsafe.Slice((*uint64)(unsafe.Pointer(&rawdata[0])), chunkcount) 82 | } 83 | return uint(s.count) 84 | } 85 | 86 | func toInt(x scm.Scmer) int64 { 87 | switch v := x.(type) { 88 | case float64: 89 | return int64(v) 90 | case int: 91 | return int64(v) 92 | case uint: 93 | return int64(v) 94 | case uint64: 95 | return int64(v) 96 | case int64: 97 | return v 98 | // TODO: 8 bit, 16 bit, 32 bit 99 | default: 100 | return 0 101 | } 102 | } 103 | 104 | func (s *StorageInt) ComputeSize() uint { 105 | return 8*uint(len(s.chunk)) + 64 // management overhead 106 | } 107 | 108 | func (s *StorageInt) String() string { 109 | if s.hasNull { 110 | return fmt.Sprintf("int[%d]NULL", s.bitsize) 111 | } else { 112 | return fmt.Sprintf("int[%d]", s.bitsize) 113 | } 114 | } 115 | 116 | func (s *StorageInt) GetValue(i uint) scm.Scmer { 117 | v := s.GetValueUInt(i) 118 | if s.hasNull && v == s.null { 119 | return nil 120 | } 121 | return int64(v) + s.offset 122 | } 123 | 124 | func (s *StorageInt) GetValueUInt(i uint) uint64 { 125 | bitpos := i * uint(s.bitsize) 126 | 127 | v := s.chunk[bitpos/64] << (bitpos % 64) // align to leftmost position 128 | if bitpos%64+uint(s.bitsize) > 64 { 129 | v = v | s.chunk[bitpos/64+1]>>(64-bitpos%64) 130 | } 131 | 132 | return uint64(v) >> (64 - uint(s.bitsize)) // shift right without sign 133 | } 134 | 135 | func (s *StorageInt) prepare() { 136 | // set up scan 137 | s.bitsize = 0 138 | s.offset = int64(1<<63 - 1) 139 | s.max = -s.offset - 1 140 | s.hasNull = false 141 | } 142 | func (s *StorageInt) scan(i uint, value scm.Scmer) { 143 | // storage is so simple, dont need scan 144 | if value == nil { 145 | s.hasNull = true 146 | return 147 | } 148 | v := toInt(value) 149 | if v < s.offset { 150 | s.offset = v 151 | } 152 | if v > s.max { 153 | s.max = v 154 | } 155 | } 156 | func (s *StorageInt) init(i uint) { 157 | v := s.max - s.offset 158 | if s.hasNull { 159 | // store the value 160 | v = v + 1 161 | s.null = uint64(v) 162 | } 163 | if v == -1 { 164 | // no values at all 165 | v = 0 166 | s.offset = 0 167 | s.null = 0 168 | } 169 | s.bitsize = uint8(bits.Len64(uint64(v))) 170 | if s.bitsize == 0 { 171 | s.bitsize = 1 172 | } 173 | // allocate 174 | s.chunk = make([]uint64, ((i-1)*uint(s.bitsize)+65)/64+1) 175 | s.count = uint64(i) 176 | // fmt.Println("storing bitsize", s.bitsize,"null",s.null,"offset",s.offset) 177 | } 178 | func (s *StorageInt) build(i uint, value scm.Scmer) { 179 | if i >= uint(s.count) { 180 | panic("tried to build StorageInt outside of range") 181 | } 182 | // store 183 | vi := toInt(value) 184 | if value == nil { 185 | // null value 186 | vi = int64(s.null) 187 | } else { 188 | vi = vi - s.offset 189 | } 190 | bitpos := i * uint(s.bitsize) 191 | v := uint64(vi) << (64 - uint(s.bitsize)) // shift value to the leftmost position of 64bit int 192 | s.chunk[bitpos/64] = s.chunk[bitpos/64] | (v >> (bitpos % 64)) // first chunk 193 | if bitpos%64+uint(s.bitsize) > 64 { 194 | s.chunk[bitpos/64+1] = s.chunk[bitpos/64+1] | v<<(64-bitpos%64) // second chunk 195 | } 196 | } 197 | func (s *StorageInt) finish() { 198 | } 199 | func (s *StorageInt) proposeCompression(i uint) ColumnStorage { 200 | // dont't propose another pass 201 | return nil 202 | } 203 | -------------------------------------------------------------------------------- /storage/storage-prefix.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "fmt" 20 | import "strings" 21 | import "github.com/launix-de/memcp/scm" 22 | 23 | type StoragePrefix struct { 24 | // prefix compression 25 | prefixes StorageInt 26 | prefixdictionary []string // pref 27 | values StorageString // only one depth (but can be cascaded!) 28 | } 29 | 30 | func (s *StoragePrefix) ComputeSize() uint { 31 | return s.prefixes.ComputeSize() + 24 + s.values.ComputeSize() 32 | } 33 | 34 | func (s *StoragePrefix) String() string { 35 | return fmt.Sprintf("prefix[%s]-%s", s.prefixdictionary[1], s.values.String()) 36 | } 37 | 38 | func (s *StoragePrefix) GetValue(i uint) scm.Scmer { 39 | innerval := s.values.GetValue(i) 40 | switch v := innerval.(type) { 41 | case string: 42 | return s.prefixdictionary[int64(s.prefixes.GetValueUInt(i))+s.prefixes.offset] + v // append prefix 43 | case nil: 44 | return nil 45 | default: 46 | panic("invalid value in prefix storage") 47 | } 48 | } 49 | 50 | func (s *StoragePrefix) prepare() { 51 | // set up scan 52 | s.prefixes.prepare() 53 | s.values.prepare() 54 | } 55 | func (s *StoragePrefix) scan(i uint, value scm.Scmer) { 56 | var v string 57 | switch v_ := value.(type) { 58 | case string: 59 | v = v_ 60 | default: 61 | // NULL 62 | s.values.scan(i, nil) 63 | return 64 | } 65 | 66 | for pfid := len(s.prefixdictionary) - 1; pfid >= 0; pfid-- { 67 | if strings.HasPrefix(v, s.prefixdictionary[pfid]) { 68 | // learn the string stripped from its prefix 69 | s.prefixes.scan(i, pfid) 70 | s.values.scan(i, v[len(s.prefixdictionary[pfid]):]) 71 | return 72 | } 73 | } 74 | } 75 | func (s *StoragePrefix) init(i uint) { 76 | s.prefixes.init(i) 77 | s.values.init(i) 78 | } 79 | func (s *StoragePrefix) build(i uint, value scm.Scmer) { 80 | // store 81 | var v string 82 | switch v_ := value.(type) { 83 | case string: 84 | v = v_ 85 | default: 86 | // NULL = 1 1 87 | s.values.build(i, nil) 88 | return 89 | } 90 | 91 | for pfid := len(s.prefixdictionary) - 1; pfid >= 0; pfid-- { 92 | if strings.HasPrefix(v, s.prefixdictionary[pfid]) { 93 | // learn the string stripped from its prefix 94 | s.prefixes.build(i, pfid) 95 | s.values.build(i, v[len(s.prefixdictionary[pfid]):]) 96 | return 97 | } 98 | } 99 | } 100 | func (s *StoragePrefix) finish() { 101 | s.prefixes.finish() 102 | s.values.finish() 103 | } 104 | func (s *StoragePrefix) proposeCompression(i uint) ColumnStorage { 105 | // dont't propose another pass 106 | // TODO: if s.values proposes a StoragePrefix, build it into our cascade?? 107 | return nil 108 | } 109 | -------------------------------------------------------------------------------- /storage/storage-scmer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "math" 21 | import "bufio" 22 | import "encoding/json" 23 | import "encoding/binary" 24 | import "github.com/launix-de/memcp/scm" 25 | 26 | // main type for storage: can store any value, is inefficient but does type analysis how to optimize 27 | type StorageSCMER struct { 28 | values []scm.Scmer 29 | onlyInt bool 30 | onlyFloat bool 31 | hasString bool 32 | longStrings int 33 | null uint // amount of NULL values (sparse map!) 34 | numSeq uint // sequence statistics 35 | last1, last2 int64 // sequence statistics 36 | } 37 | 38 | func (s *StorageSCMER) ComputeSize() uint { 39 | // ! size of Scmer values is not considered 40 | var sz uint = 80 + 24 41 | for _, v := range s.values { 42 | sz += scm.ComputeSize(v) 43 | } 44 | return sz 45 | } 46 | 47 | func (s *StorageSCMER) String() string { 48 | return "SCMER" 49 | } 50 | 51 | func (s *StorageSCMER) Serialize(f io.Writer) { 52 | binary.Write(f, binary.LittleEndian, uint8(1)) // 1 = StorageSCMER 53 | binary.Write(f, binary.LittleEndian, uint64(len(s.values))) 54 | for i := 0; i < len(s.values); i++ { 55 | v, err := json.Marshal(s.values[i]) 56 | if err != nil { 57 | panic(err) 58 | } 59 | f.Write(v) 60 | f.Write([]byte("\n")) // endline so the serialized file becomes a jsonl file beginning at byte 9 61 | } 62 | } 63 | func (s *StorageSCMER) Deserialize(f io.Reader) uint { 64 | var l uint64 65 | binary.Read(f, binary.LittleEndian, &l) 66 | s.values = make([]scm.Scmer, l) 67 | scanner := bufio.NewScanner(f) 68 | for i := uint64(0); i < l; i++ { 69 | if scanner.Scan() { 70 | var v any 71 | json.Unmarshal(scanner.Bytes(), &v) 72 | s.values[i] = scm.TransformFromJSON(v) 73 | } 74 | } 75 | return uint(l) 76 | } 77 | 78 | func (s *StorageSCMER) GetValue(i uint) scm.Scmer { 79 | return s.values[i] 80 | } 81 | 82 | func (s *StorageSCMER) scan(i uint, value scm.Scmer) { 83 | switch v := value.(type) { 84 | case int64: 85 | v2 := toInt(value) 86 | // analyze whether there is a sequence 87 | if v2-s.last1 == s.last1-s.last2 { 88 | s.numSeq = s.numSeq + 1 // count as sequencable 89 | } 90 | // push sequence detector 91 | s.last2 = s.last1 92 | s.last1 = v2 93 | case float64: 94 | if _, f := math.Modf(v); f != 0.0 { 95 | s.onlyInt = false 96 | } else { 97 | v := toInt(value) 98 | // analyze whether there is a sequence 99 | if v-s.last1 == s.last1-s.last2 { 100 | s.numSeq = s.numSeq + 1 // count as sequencable 101 | } 102 | // push sequence detector 103 | s.last2 = s.last1 104 | s.last1 = v 105 | } 106 | case scm.LazyString: 107 | s.onlyInt = false 108 | s.onlyFloat = false 109 | s.hasString = true 110 | s.longStrings++ 111 | case string: 112 | s.onlyInt = false 113 | s.onlyFloat = false 114 | s.hasString = true 115 | if len(v) > 255 { 116 | s.longStrings++ 117 | } 118 | case nil: 119 | s.null = s.null + 1 // count NULL 120 | // storageInt can also handle null 121 | default: 122 | s.onlyInt = false 123 | s.onlyFloat = false 124 | } 125 | } 126 | func (s *StorageSCMER) prepare() { 127 | s.onlyInt = true 128 | s.onlyFloat = true 129 | s.hasString = false 130 | } 131 | func (s *StorageSCMER) init(i uint) { 132 | // allocate 133 | s.values = make([]scm.Scmer, i) 134 | } 135 | func (s *StorageSCMER) build(i uint, value scm.Scmer) { 136 | // store 137 | s.values[i] = value 138 | } 139 | func (s *StorageSCMER) finish() { 140 | } 141 | 142 | // soley to StorageSCMER 143 | func (s *StorageSCMER) proposeCompression(i uint) ColumnStorage { 144 | if s.null*100 > i*13 { 145 | // sparse payoff against bitcompressed is at ~13% 146 | if s.longStrings > 2 { 147 | b := new(OverlayBlob) 148 | b.Base = new(StorageSparse) 149 | return b 150 | } 151 | return new(StorageSparse) 152 | } 153 | if s.hasString { 154 | if s.longStrings > 2 { 155 | b := new(OverlayBlob) 156 | b.Base = new(StorageString) 157 | return b 158 | } 159 | return new(StorageString) 160 | } 161 | if s.onlyInt { // TODO: OverlaySCMER? 162 | // propose sequence compression in the form (recordid, startvalue, length, stride) using binary search on recordid for reading 163 | if i > 5 && 2*(i-s.numSeq) < i { 164 | return new(StorageSeq) 165 | } 166 | return new(StorageInt) 167 | } 168 | if s.onlyFloat { 169 | // tight float packing 170 | return new(StorageFloat) 171 | } 172 | if s.null*2 > i { 173 | // sparse payoff against StorageSCMER is at 2.1 174 | return new(StorageSparse) 175 | } 176 | // dont't propose another pass 177 | return nil 178 | } 179 | -------------------------------------------------------------------------------- /storage/storage-sparse.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2023 Carl-Philip Hänsch 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | */ 17 | package storage 18 | 19 | import "io" 20 | import "bufio" 21 | import "encoding/json" 22 | import "encoding/binary" 23 | import "github.com/launix-de/memcp/scm" 24 | 25 | type StorageSparse struct { 26 | i, count uint64 27 | recids StorageInt 28 | values []scm.Scmer // TODO: embed other formats as values (ColumnStorage with a proposeCompression loop) 29 | } 30 | 31 | func (s *StorageSparse) ComputeSize() uint { 32 | var sz uint = 16 + 8 + 24 + s.recids.ComputeSize() + 8*uint(len(s.values)) 33 | for _, v := range s.values { 34 | sz += scm.ComputeSize(v) 35 | } 36 | return sz 37 | } 38 | 39 | func (s *StorageSparse) String() string { 40 | return "SCMER-sparse" 41 | } 42 | func (s *StorageSparse) Serialize(f io.Writer) { 43 | binary.Write(f, binary.LittleEndian, uint8(2)) // 2 = StorageSparse 44 | binary.Write(f, binary.LittleEndian, uint64(s.count)) 45 | binary.Write(f, binary.LittleEndian, uint64(len(s.values))) 46 | for k, v := range s.values { 47 | vbytes, err := json.Marshal(uint64(s.recids.GetValueUInt(uint(k)) + uint64(s.recids.offset))) 48 | if err != nil { 49 | panic(err) 50 | } 51 | f.Write(vbytes) 52 | f.Write([]byte("\n")) // endline so the serialized file becomes a jsonl file beginning at byte 9 53 | vbytes, err = json.Marshal(v) 54 | if err != nil { 55 | panic(err) 56 | } 57 | f.Write(vbytes) 58 | f.Write([]byte("\n")) // endline so the serialized file becomes a jsonl file beginning at byte 9 59 | } 60 | } 61 | func (s *StorageSparse) Deserialize(f io.Reader) uint { 62 | var l uint64 63 | binary.Read(f, binary.LittleEndian, &l) 64 | s.count = l 65 | var l2 uint64 66 | binary.Read(f, binary.LittleEndian, &l2) 67 | s.values = make([]scm.Scmer, l2) 68 | s.i = l2 69 | scanner := bufio.NewScanner(f) 70 | s.recids.prepare() 71 | s.recids.scan(0, 0) 72 | s.recids.scan(uint(l2-1), l-1) 73 | s.recids.init(uint(l2)) 74 | i := 0 75 | for { 76 | var k uint64 77 | if !scanner.Scan() { 78 | break 79 | } 80 | json.Unmarshal(scanner.Bytes(), &k) 81 | if !scanner.Scan() { 82 | break 83 | } 84 | var v any 85 | json.Unmarshal(scanner.Bytes(), &v) 86 | s.recids.build(uint(i), k) 87 | s.values[i] = scm.TransformFromJSON(v) 88 | i++ 89 | } 90 | s.recids.finish() 91 | return uint(l) 92 | } 93 | 94 | func (s *StorageSparse) GetValue(i uint) scm.Scmer { 95 | var lower uint = 0 96 | var upper uint = uint(s.i) 97 | for { 98 | if lower == upper { 99 | return nil // sparse value 100 | } 101 | pivot := uint((lower + upper) / 2) 102 | recid := s.recids.GetValueUInt(pivot) + uint64(s.recids.offset) 103 | if recid == uint64(i) { 104 | return s.values[pivot] // found the value 105 | } 106 | if recid < uint64(i) { 107 | lower = pivot + 1 108 | } else { 109 | upper = pivot 110 | } 111 | 112 | } 113 | } 114 | 115 | func (s *StorageSparse) scan(i uint, value scm.Scmer) { 116 | if value != nil { 117 | s.recids.scan(uint(s.i), i) 118 | s.i++ 119 | } 120 | } 121 | func (s *StorageSparse) prepare() { 122 | s.i = 0 123 | } 124 | func (s *StorageSparse) init(i uint) { 125 | s.values = make([]scm.Scmer, s.i) 126 | s.count = uint64(i) 127 | s.recids.init(uint(s.i)) 128 | s.i = 0 129 | } 130 | func (s *StorageSparse) build(i uint, value scm.Scmer) { 131 | // store 132 | if value != nil { 133 | s.recids.build(uint(s.i), i) 134 | s.values[s.i] = value 135 | s.i++ 136 | } 137 | } 138 | func (s *StorageSparse) finish() { 139 | s.recids.finish() 140 | } 141 | 142 | // soley to StorageSparse 143 | func (s *StorageSparse) proposeCompression(i uint) ColumnStorage { 144 | return nil 145 | } 146 | -------------------------------------------------------------------------------- /tests/01_basic_sql.yaml: -------------------------------------------------------------------------------- 1 | # Basic SQL Operations Test Suite 2 | # Fundamental SQL functionality: arithmetic, comparisons, literals, basic expressions 3 | 4 | metadata: 5 | version: "1.0" 6 | description: "Basic SQL operations and expressions" 7 | 8 | setup: [] 9 | 10 | test_cases: 11 | 12 | # === ARITHMETIC EXPRESSIONS === 13 | - name: "Basic addition" 14 | sql: "SELECT 5 + 3 AS result" 15 | expect: 16 | rows: 1 17 | data: 18 | - result: 8 19 | 20 | - name: "Basic subtraction" 21 | sql: "SELECT 10 - 4 AS result" 22 | expect: 23 | rows: 1 24 | data: 25 | - result: 6 26 | 27 | - name: "Basic multiplication" 28 | sql: "SELECT 6 * 7 AS result" 29 | expect: 30 | rows: 1 31 | data: 32 | - result: 42 33 | 34 | - name: "Basic division" 35 | sql: "SELECT 15 / 3 AS result" 36 | expect: 37 | rows: 1 38 | data: 39 | - result: 5 40 | 41 | - name: "Complex arithmetic with parentheses" 42 | sql: "SELECT (10 + 5) * 2 - 8 / 4 AS result" 43 | expect: 44 | rows: 1 45 | data: 46 | - result: 28 47 | 48 | - name: "Division with decimal result" 49 | sql: "SELECT 1 / 0.1 AS result" 50 | expect: 51 | rows: 1 52 | data: 53 | - result: 10 54 | 55 | - name: "Large number arithmetic" 56 | sql: "SELECT 999999 + 1 AS result" 57 | expect: 58 | rows: 1 59 | data: 60 | - result: 1000000 61 | 62 | - name: "Negative number operations" 63 | sql: "SELECT -5 * -3 AS result" 64 | expect: 65 | rows: 1 66 | data: 67 | - result: 15 68 | 69 | # === COMPARISON OPERATORS === 70 | - name: "Greater than comparison" 71 | sql: "SELECT 10 > 5 AS result" 72 | expect: 73 | rows: 1 74 | data: 75 | - result: true 76 | 77 | - name: "Less than comparison" 78 | sql: "SELECT 3 < 8 AS result" 79 | expect: 80 | rows: 1 81 | data: 82 | - result: true 83 | 84 | - name: "Greater than or equal" 85 | sql: "SELECT 5 >= 5 AS result" 86 | expect: 87 | rows: 1 88 | data: 89 | - result: true 90 | 91 | - name: "Less than or equal" 92 | sql: "SELECT 4 <= 7 AS result" 93 | expect: 94 | rows: 1 95 | data: 96 | - result: true 97 | 98 | - name: "Equality comparison" 99 | sql: "SELECT 5 = 5 AS result" 100 | expect: 101 | rows: 1 102 | data: 103 | - result: true 104 | 105 | - name: "Inequality comparison" 106 | sql: "SELECT 5 != 3 AS result" 107 | expect: 108 | rows: 1 109 | data: 110 | - result: true 111 | 112 | - name: "String comparison" 113 | sql: "SELECT 'apple' < 'banana' AS result" 114 | expect: 115 | rows: 1 116 | data: 117 | - result: true 118 | 119 | - name: "Comparison with arithmetic" 120 | sql: "SELECT (10 + 5) > (3 * 4) AS result" 121 | expect: 122 | rows: 1 123 | data: 124 | - result: true 125 | 126 | # === LITERALS AND CONSTANTS === 127 | - name: "Integer literal" 128 | sql: "SELECT 42 AS number" 129 | expect: 130 | rows: 1 131 | data: 132 | - number: 42 133 | 134 | - name: "Negative integer literal" 135 | sql: "SELECT -17 AS number" 136 | expect: 137 | rows: 1 138 | data: 139 | - number: -17 140 | 141 | - name: "Float literal" 142 | sql: "SELECT 3.14159 AS pi" 143 | expect: 144 | rows: 1 145 | data: 146 | - pi: 3.14159 147 | 148 | - name: "String literal with single quotes" 149 | sql: "SELECT 'Hello World' AS message" 150 | expect: 151 | rows: 1 152 | data: 153 | - message: "Hello World" 154 | 155 | - name: "String literal with double quotes" 156 | sql: "SELECT \"Hello SQL\" AS message" 157 | expect: 158 | rows: 1 159 | data: 160 | - message: "Hello SQL" 161 | 162 | - name: "Boolean true literal" 163 | sql: "SELECT true AS bool_val" 164 | expect: 165 | rows: 1 166 | data: 167 | - bool_val: true 168 | 169 | - name: "Boolean false literal" 170 | sql: "SELECT false AS bool_val" 171 | expect: 172 | rows: 1 173 | data: 174 | - bool_val: false 175 | 176 | # === MULTIPLE COLUMNS === 177 | - name: "Multiple column expressions" 178 | sql: "SELECT 1 + 2 AS sum, 3 * 4 AS product, 'test' AS text" 179 | expect: 180 | rows: 1 181 | data: 182 | - sum: 3 183 | product: 12 184 | text: "test" 185 | 186 | - name: "Mixed data types in columns" 187 | sql: "SELECT 42 AS number, 'text' AS string, true AS boolean, 3.14 AS decimal" 188 | expect: 189 | rows: 1 190 | data: 191 | - number: 42 192 | string: "text" 193 | boolean: true 194 | decimal: 3.14 195 | 196 | cleanup: [] -------------------------------------------------------------------------------- /tests/02_functions.yaml: -------------------------------------------------------------------------------- 1 | # SQL Functions Test Suite 2 | # Built-in functions: math, string, time, and other utility functions 3 | 4 | metadata: 5 | version: "1.0" 6 | description: "SQL built-in functions and expressions" 7 | 8 | setup: [] 9 | 10 | test_cases: 11 | 12 | # === MATHEMATICAL FUNCTIONS === 13 | - name: "FLOOR function" 14 | sql: "SELECT FLOOR(4.7) AS result" 15 | expect: 16 | rows: 1 17 | data: 18 | - result: 4 19 | 20 | - name: "FLOOR with negative number" 21 | sql: "SELECT FLOOR(-4.7) AS result" 22 | expect: 23 | rows: 1 24 | data: 25 | - result: -5 26 | 27 | - name: "CEIL function" 28 | sql: "SELECT CEIL(4.3) AS result" 29 | expect: 30 | rows: 1 31 | data: 32 | - result: 5 33 | 34 | - name: "CEILING function (alias)" 35 | sql: "SELECT CEILING(4.3) AS result" 36 | expect: 37 | rows: 1 38 | data: 39 | - result: 5 40 | 41 | - name: "ROUND function" 42 | sql: "SELECT ROUND(4.6) AS result" 43 | expect: 44 | rows: 1 45 | data: 46 | - result: 5 47 | 48 | - name: "ROUND with .5 value" 49 | sql: "SELECT ROUND(4.5) AS result" 50 | expect: 51 | rows: 1 52 | data: 53 | - result: 5 54 | 55 | - name: "Nested math functions" 56 | sql: "SELECT FLOOR(CEIL(4.3) * 2 + ROUND(1.6)) AS result" 57 | expect: 58 | rows: 1 59 | data: 60 | - result: 12 61 | 62 | # === STRING FUNCTIONS === 63 | - name: "UPPER function" 64 | sql: "SELECT UPPER('hello world') AS result" 65 | expect: 66 | rows: 1 67 | data: 68 | - result: "HELLO WORLD" 69 | 70 | - name: "LOWER function" 71 | sql: "SELECT LOWER('HELLO WORLD') AS result" 72 | expect: 73 | rows: 1 74 | data: 75 | - result: "hello world" 76 | 77 | - name: "Nested string functions" 78 | sql: "SELECT UPPER(LOWER('HELLO world')) AS result" 79 | expect: 80 | rows: 1 81 | data: 82 | - result: "HELLO WORLD" 83 | 84 | - name: "UPPER with empty string" 85 | sql: "SELECT UPPER('') AS result" 86 | expect: 87 | rows: 1 88 | data: 89 | - result: "" 90 | 91 | - name: "LOWER with empty string" 92 | sql: "SELECT LOWER('') AS result" 93 | expect: 94 | rows: 1 95 | data: 96 | - result: "" 97 | 98 | - name: "Multiple string operations" 99 | sql: "SELECT UPPER('hello') AS upper_case, LOWER('WORLD') AS lower_case" 100 | expect: 101 | rows: 1 102 | data: 103 | - upper_case: "HELLO" 104 | lower_case: "world" 105 | 106 | # === BASE64 FUNCTIONS === 107 | - name: "TO_BASE64 encodes" 108 | sql: "SELECT TO_BASE64('foo') AS b64" 109 | expect: 110 | rows: 1 111 | data: 112 | - b64: "Zm9v" 113 | 114 | - name: "FROM_BASE64 decodes" 115 | sql: "SELECT FROM_BASE64('Zm9v') AS plain" 116 | expect: 117 | rows: 1 118 | data: 119 | - plain: "foo" 120 | 121 | - name: "Base64 roundtrip" 122 | sql: "SELECT FROM_BASE64(TO_BASE64('Hello, world!')) AS plain" 123 | expect: 124 | rows: 1 125 | data: 126 | - plain: "Hello, world!" 127 | 128 | # === TIME FUNCTIONS === 129 | - name: "UNIX_TIMESTAMP function exists" 130 | sql: "SELECT UNIX_TIMESTAMP() > 1600000000 AS recent_timestamp" 131 | expect: 132 | rows: 1 133 | data: 134 | - recent_timestamp: true 135 | 136 | - name: "CURRENT_TIMESTAMP function exists" 137 | sql: "SELECT CURRENT_TIMESTAMP() > 1600000000 AS recent_timestamp" 138 | expect: 139 | rows: 1 140 | data: 141 | - recent_timestamp: true 142 | 143 | - name: "Time functions return numbers" 144 | sql: "SELECT UNIX_TIMESTAMP() > 0 AS positive_time" 145 | expect: 146 | rows: 1 147 | data: 148 | - positive_time: true 149 | 150 | # === VECTOR FUNCTIONS === 151 | - name: "STRING_TO_VECTOR function" 152 | sql: "SELECT STRING_TO_VECTOR('[1,2,3]') AS vector" 153 | expect: 154 | rows: 1 155 | 156 | - name: "VECTOR_TO_STRING function" 157 | sql: "SELECT VECTOR_TO_STRING('[1,2,3]') AS vector_str" 158 | expect: 159 | rows: 1 160 | 161 | # TODO: VECTOR_DISTANCE has type conversion issues 162 | # - name: "VECTOR_DISTANCE function" 163 | # sql: "SELECT VECTOR_DISTANCE('[1,2,3]', '[4,5,6]') AS distance" 164 | # expect: 165 | # error: true 166 | # error_type: "type_conversion" 167 | 168 | # === CASE EXPRESSIONS === 169 | - name: "Simple CASE expression" 170 | sql: "SELECT CASE WHEN 5 > 3 THEN 'greater' ELSE 'lesser' END AS result" 171 | expect: 172 | rows: 1 173 | data: 174 | - result: "greater" 175 | 176 | - name: "CASE with multiple WHEN clauses" 177 | sql: "SELECT CASE WHEN 2 > 5 THEN 'big' WHEN 2 > 1 THEN 'medium' ELSE 'small' END AS result" 178 | expect: 179 | rows: 1 180 | data: 181 | - result: "medium" 182 | 183 | - name: "Nested CASE expressions" 184 | sql: "SELECT CASE WHEN 5 > 3 THEN CASE WHEN 2 > 1 THEN 'both_true' ELSE 'first_only' END ELSE 'neither' END AS result" 185 | expect: 186 | rows: 1 187 | data: 188 | - result: "both_true" 189 | 190 | - name: "CASE with arithmetic in conditions" 191 | sql: "SELECT CASE WHEN (5 + 3) > (2 * 3) THEN 'math_works' ELSE 'math_broken' END AS result" 192 | expect: 193 | rows: 1 194 | data: 195 | - result: "math_works" 196 | 197 | - name: "CASE with string comparisons" 198 | sql: "SELECT CASE WHEN 'apple' < 'banana' THEN 'alphabetical' ELSE 'reversed' END AS result" 199 | expect: 200 | rows: 1 201 | data: 202 | - result: "alphabetical" 203 | 204 | # === FUNCTION COMBINATIONS === 205 | - name: "Functions in arithmetic" 206 | sql: "SELECT FLOOR(4.7) + CEIL(4.3) AS result" 207 | expect: 208 | rows: 1 209 | data: 210 | - result: 9 211 | 212 | - name: "Functions with CASE" 213 | sql: "SELECT CASE WHEN FLOOR(4.7) > 3 THEN UPPER('yes') ELSE LOWER('NO') END AS result" 214 | expect: 215 | rows: 1 216 | data: 217 | - result: "YES" 218 | 219 | - name: "Multiple function types" 220 | sql: "SELECT FLOOR(3.7) AS math, UPPER('test') AS string, UNIX_TIMESTAMP() > 0 AS time" 221 | expect: 222 | rows: 1 223 | data: 224 | - math: 3 225 | string: "TEST" 226 | time: true 227 | 228 | cleanup: [] 229 | -------------------------------------------------------------------------------- /tests/03_ddl_operations.yaml: -------------------------------------------------------------------------------- 1 | # DDL Operations Test Suite 2 | # Data Definition Language: CREATE, DROP, ALTER operations with affected_rows 3 | 4 | metadata: 5 | version: "1.0" 6 | description: "DDL operations (CREATE, DROP, ALTER) with affected_rows validation" 7 | 8 | setup: 9 | # Ensure user rows don't pre-exist from earlier runs 10 | - sql: "DELETE FROM system.user WHERE username = 'testuser'" 11 | - sql: "DELETE FROM system.user WHERE username = 'testuser2'" 12 | 13 | test_cases: 14 | 15 | # === CREATE TABLE OPERATIONS === 16 | - name: "CREATE TABLE basic structure" 17 | sql: "CREATE TABLE users (id INT, name VARCHAR(50))" 18 | expect: 19 | affected_rows: 1 20 | 21 | - name: "CREATE TABLE with multiple columns" 22 | sql: "CREATE TABLE products (id INT, name VARCHAR(100), price DECIMAL(10,2), description TEXT)" 23 | expect: 24 | affected_rows: 1 25 | 26 | - name: "CREATE TABLE with constraints" 27 | sql: "CREATE TABLE orders (id INT PRIMARY KEY, user_id INT, total DECIMAL(10,2), status VARCHAR(20))" 28 | expect: 29 | affected_rows: 1 30 | 31 | - name: "CREATE TABLE with IF NOT EXISTS" 32 | sql: "CREATE TABLE IF NOT EXISTS customers (id INT, name VARCHAR(100))" 33 | expect: 34 | affected_rows: 1 35 | 36 | - name: "CREATE TABLE with IF NOT EXISTS (already exists)" 37 | sql: "CREATE TABLE IF NOT EXISTS users (id INT, email VARCHAR(200))" 38 | expect: 39 | affected_rows: 1 40 | 41 | - name: "CREATE TABLE with various data types" 42 | sql: | 43 | CREATE TABLE test_types ( 44 | id INT, 45 | name VARCHAR(100), 46 | price DECIMAL(10,2), 47 | active BOOLEAN, 48 | created_at TIMESTAMP, 49 | notes TEXT 50 | ) 51 | expect: 52 | affected_rows: 1 53 | 54 | - name: "CREATE TABLE with ENGINE specification" 55 | sql: "CREATE TABLE memory_table (id INT, data VARCHAR(100)) ENGINE=MEMORY" 56 | expect: 57 | affected_rows: 1 58 | 59 | # === SHOW OPERATIONS === 60 | - name: "SHOW DATABASES" 61 | sql: "SHOW DATABASES" 62 | expect: {} # Environment dependent, just check it runs 63 | 64 | - name: "SHOW TABLES" 65 | sql: "SHOW TABLES" 66 | expect: {} # Environment dependent, just check it runs 67 | 68 | - name: "SHOW TABLES FROM specific database" 69 | sql: "SHOW TABLES FROM ddl_test" 70 | expect: {} # Environment dependent, just check it runs 71 | 72 | - name: "DESCRIBE table structure" 73 | sql: "DESCRIBE users" 74 | expect: {} # Just check it runs successfully 75 | 76 | - name: "SHOW FULL COLUMNS" 77 | sql: "SHOW FULL COLUMNS FROM users" 78 | expect: {} # Just check it runs successfully 79 | 80 | - name: "SHOW TABLE STATUS" 81 | sql: "SHOW TABLE STATUS" 82 | expect: {} # Just check it runs successfully 83 | 84 | - name: "SHOW VARIABLES" 85 | sql: "SHOW VARIABLES" 86 | expect: {} # Environment dependent, just check it runs 87 | 88 | # === SESSION VARIABLES === 89 | - name: "SET session variable" 90 | sql: "SET @test_var = 'hello'" 91 | expect: {} # Session variables return assigned value, not affected_rows 92 | 93 | - name: "SET session variable with SESSION keyword" 94 | sql: "SET SESSION @another_var = 42" 95 | expect: {} # Session variables return assigned value, not affected_rows 96 | 97 | - name: "SET session variable with expression" 98 | sql: "SET @calculated = 5 + 3 * 2" 99 | expect: {} # Session variables return assigned value 100 | 101 | - name: "SET multiple session variables" 102 | sql: "SET @var1 = 'test', @var2 = 123" 103 | expect: {} # Multiple session variables 104 | 105 | # === GLOBAL VARIABLES === 106 | - name: "SET complex expression" 107 | sql: "SET @complex = 5 + 3 * 2" 108 | expect: {} # Just check it runs successfully 109 | 110 | - name: "SET string concatenation" 111 | sql: "SET @text = 'hello'" 112 | expect: {} # Just check it runs successfully 113 | 114 | # TODO: DROP TABLE operations (when DROP functionality is stable) 115 | # - name: "DROP TABLE with IF EXISTS" 116 | # sql: "DROP TABLE IF EXISTS test_drop" 117 | # expect: 118 | # affected_rows: 0 # Table doesn't exist 119 | 120 | # - name: "DROP existing TABLE" 121 | # sql: "DROP TABLE memory_table" 122 | # expect: 123 | # affected_rows: 1 124 | 125 | # TODO: ALTER TABLE operations (when ALTER functionality is stable) 126 | # - name: "ALTER TABLE add column" 127 | # sql: "ALTER TABLE users ADD COLUMN email VARCHAR(200)" 128 | # expect: 129 | # affected_rows: 1 130 | 131 | # - name: "ALTER TABLE modify column" 132 | # sql: "ALTER TABLE users MODIFY COLUMN name VARCHAR(200)" 133 | # expect: 134 | # affected_rows: 1 135 | 136 | # TODO: CREATE INDEX operations 137 | # - name: "CREATE INDEX" 138 | # sql: "CREATE INDEX idx_name ON users (name)" 139 | # expect: 140 | # affected_rows: 1 141 | 142 | # === CREATE USER OPERATIONS === 143 | - name: "CREATE USER basic" 144 | sql: "CREATE USER testuser" 145 | expect: 146 | affected_rows: 1 147 | 148 | - name: "CREATE USER with password" 149 | sql: "CREATE USER testuser2 IDENTIFIED BY 'password123'" 150 | expect: 151 | affected_rows: 1 152 | 153 | # TODO: ALTER USER operations 154 | # - name: "ALTER USER password" 155 | # sql: "ALTER USER testuser IDENTIFIED BY 'newpassword'" 156 | # expect: 157 | # affected_rows: 1 158 | 159 | cleanup: 160 | # Clean up created users to keep suite idempotent 161 | - sql: "DELETE FROM system.user WHERE username = 'testuser'" 162 | - sql: "DELETE FROM system.user WHERE username = 'testuser2'" 163 | -------------------------------------------------------------------------------- /tests/04_table_operations.yaml: -------------------------------------------------------------------------------- 1 | # Minimal Table Operations Test 2 | # Testing just the most basic operations to isolate database corruption issue 3 | 4 | metadata: 5 | version: "1.0" 6 | description: "Minimal table operations test for debugging" 7 | 8 | cleanup: 9 | - action: "Clean up test table" 10 | sql: "DROP TABLE IF EXISTS simple_test" 11 | 12 | setup: 13 | - action: "Drop pre-existing table" 14 | sql: "DROP TABLE IF EXISTS simple_test" 15 | - action: "CREATE TABLE simple_test" 16 | sql: "CREATE TABLE simple_test (id INT, name VARCHAR(50))" 17 | 18 | test_cases: 19 | - name: "Simple INSERT test" 20 | sql: "INSERT INTO simple_test (id, name) VALUES (1, 'test')" 21 | expect: 22 | affected_rows: 1 23 | 24 | - name: "Simple SELECT test" 25 | sql: "SELECT * FROM simple_test" 26 | expect: 27 | rows: 1 28 | 29 | - name: "Schema-qualified INSERT" 30 | sql: "INSERT INTO `memcp-tests`.simple_test (id, name) VALUES (2, 'q')" 31 | expect: 32 | affected_rows: 1 33 | 34 | - name: "Row count after qualified INSERT" 35 | sql: "SELECT COUNT(*) AS c FROM simple_test" 36 | expect: 37 | rows: 1 38 | data: 39 | - c: 2 40 | 41 | - name: "INSERT with trailing semicolon" 42 | sql: "INSERT INTO simple_test (id, name) VALUES (3, 'semi');" 43 | expect: 44 | affected_rows: 1 45 | 46 | - name: "Row count after trailing-semicolon INSERT" 47 | sql: "SELECT COUNT(*) AS c FROM simple_test" 48 | expect: 49 | rows: 1 50 | data: 51 | - c: 3 52 | -------------------------------------------------------------------------------- /tests/05_advanced_queries.yaml: -------------------------------------------------------------------------------- 1 | # Advanced Query Test Suite 2 | # Complex queries, JOINs, subqueries, GROUP BY, aggregates (many marked TODO until supported) 3 | 4 | metadata: 5 | version: "1.0" 6 | description: "Advanced SQL queries and aggregation features" 7 | 8 | setup: 9 | - action: "CREATE TABLE customers" 10 | sql: | 11 | CREATE TABLE customers ( 12 | id INT, 13 | name VARCHAR(100), 14 | city VARCHAR(50), 15 | country VARCHAR(50) 16 | ) 17 | - action: "CREATE TABLE orders" 18 | sql: | 19 | CREATE TABLE orders ( 20 | id INT, 21 | customer_id INT, 22 | amount DECIMAL(10,2), 23 | order_date VARCHAR(20), 24 | status VARCHAR(20) 25 | ) 26 | - action: "CREATE TABLE products" 27 | sql: | 28 | CREATE TABLE products ( 29 | id INT, 30 | name VARCHAR(100), 31 | category VARCHAR(50), 32 | price DECIMAL(10,2) 33 | ) 34 | 35 | test_cases: 36 | 37 | # === SETUP TEST DATA === 38 | - name: "Insert customer test data" 39 | sql: | 40 | INSERT INTO customers (id, name, city, country) VALUES 41 | (1, 'John Doe', 'New York', 'USA'), 42 | (2, 'Jane Smith', 'London', 'UK'), 43 | (3, 'Hans Mueller', 'Berlin', 'Germany'), 44 | (4, 'Marie Dupont', 'Paris', 'France'), 45 | (5, 'Carlos Rodriguez', 'Madrid', 'Spain') 46 | expect: 47 | affected_rows: 5 48 | 49 | - name: "Insert order test data" 50 | sql: | 51 | INSERT INTO orders (id, customer_id, amount, order_date, status) VALUES 52 | (1, 1, 299.99, '2024-01-15', 'completed'), 53 | (2, 2, 150.00, '2024-02-01', 'completed'), 54 | (3, 3, 500.00, '2024-01-20', 'pending'), 55 | (4, 4, 75.50, '2024-02-10', 'completed'), 56 | (5, 5, 1200.00, '2024-02-15', 'shipped') 57 | expect: 58 | affected_rows: 5 59 | 60 | - name: "Insert product test data" 61 | sql: | 62 | INSERT INTO products (id, name, category, price) VALUES 63 | (1, 'Laptop', 'Electronics', 999.99), 64 | (2, 'Mouse', 'Electronics', 25.99), 65 | (3, 'Keyboard', 'Electronics', 79.99), 66 | (4, 'Chair', 'Furniture', 199.99), 67 | (5, 'Desk', 'Furniture', 299.99) 68 | expect: 69 | affected_rows: 5 70 | 71 | # === BASIC VERIFICATION === 72 | - name: "Simple COUNT verification" 73 | sql: "SELECT * FROM customers" 74 | expect: {} 75 | 76 | # === WORKING FEATURES VERIFICATION === 77 | - name: "Verify all customer data inserted correctly" 78 | sql: "SELECT * FROM customers WHERE id = 1" 79 | expect: {} 80 | 81 | cleanup: 82 | - action: "Clean up customers table" 83 | sql: "DROP TABLE IF EXISTS customers" 84 | - action: "Clean up orders table" 85 | sql: "DROP TABLE IF EXISTS orders" 86 | - action: "Clean up products table" 87 | sql: "DROP TABLE IF EXISTS products" -------------------------------------------------------------------------------- /tests/07_error_cases.yaml: -------------------------------------------------------------------------------- 1 | metadata: 2 | version: "1.0" 3 | description: "Error cases that should fail" 4 | 5 | setup: [] 6 | 7 | test_cases: 8 | - name: "Syntax error - column id is not defined" 9 | sql: "SELECT * WHERE id = 1" 10 | expect: 11 | error: true 12 | 13 | - name: "Syntax error - invalid SQL statement" 14 | sql: "SELCT * FROM users" 15 | expect: 16 | error: true 17 | 18 | - name: "Non-existing table" 19 | sql: "SELECT * FROM non_existing_table" 20 | expect: 21 | error: true 22 | 23 | - name: "Non-existing column" 24 | sql: "SELECT non_existing_column FROM users" 25 | expect: 26 | error: true 27 | 28 | - name: "Invalid column reference in WHERE" 29 | sql: "SELECT * FROM users WHERE invalid_column = 1" 30 | expect: 31 | error: true 32 | 33 | - name: "Invalid function name" 34 | sql: "SELECT INVALID_FUNCTION()" 35 | expect: 36 | error: true 37 | 38 | - name: "Duplicate table creation (MemCP allows duplicate CREATE TABLE)" 39 | sql: "CREATE TABLE users (id INTEGER)" 40 | expect: {} 41 | 42 | - name: "Drop non-existing table" 43 | sql: "DROP TABLE non_existing_table" 44 | expect: 45 | error: true 46 | 47 | - name: "Missing closing parenthesis" 48 | sql: "SELECT * FROM (SELECT * FROM users" 49 | expect: 50 | error: true 51 | 52 | cleanup: [] 53 | -------------------------------------------------------------------------------- /tests/08_rdf_sparql.yaml: -------------------------------------------------------------------------------- 1 | metadata: 2 | version: "1.0" 3 | description: "RDF and SPARQL query testing with TTL data" 4 | 5 | setup: [] 6 | 7 | test_cases: 8 | # Error cases for SPARQL - parser should reject these 9 | - name: "Invalid SPARQL syntax - missing braces" 10 | sparql: | 11 | SELECT ?name WHERE 12 | ?person ?name . 13 | expect: 14 | error: true 15 | 16 | - name: "Invalid SPARQL syntax - malformed SELECT keyword" 17 | sparql: | 18 | SELCT ?name WHERE { 19 | ?person ?name . 20 | } 21 | expect: 22 | error: true 23 | 24 | - name: "Invalid SPARQL syntax - missing WHERE clause" 25 | sparql: | 26 | SELECT ?name { 27 | ?person ?name . 28 | } 29 | expect: 30 | error: true 31 | 32 | cleanup: [] -------------------------------------------------------------------------------- /tests/09_joins.yaml: -------------------------------------------------------------------------------- 1 | # Minimal Table Operations Test - JOIN Version 2 | # Testing just the most basic operations to isolate database corruption issue 3 | # Includes JOIN operations 4 | 5 | metadata: 6 | version: "1.0" 7 | description: "Minimal table operations test for debugging, including JOINs" 8 | 9 | setup: 10 | - name: "Clean up test tables" 11 | sql: "DROP TABLE IF EXISTS simple_test" 12 | - name: "Clean up test tables" 13 | sql: "DROP TABLE IF EXISTS related_data" 14 | - name: "init 1" 15 | sql: "CREATE TABLE simple_test (id INT, name VARCHAR(50))" 16 | - name: "init2" 17 | sql: "CREATE TABLE related_data (id INT, simple_test_id INT, description VARCHAR(100))" 18 | - name: "init3" 19 | sql: "INSERT INTO simple_test (id, name) VALUES (1, 'test'), (2, 'another')" 20 | 21 | test_cases: 22 | - name: "Simple INSERT" 23 | sql: "INSERT INTO related_data (id, simple_test_id, description) VALUES (101, 1, 'Description for test'), (102, 2, 'Another description')" 24 | expect: 25 | affected_rows: 2 #Should be two inserts 26 | 27 | - name: "SELECT simple test" 28 | sql: "SELECT * FROM related_data WHERE simple_test_id = 1" 29 | expect: 30 | rows: 1 # Should return the row where simple_test_id is 1 31 | 32 | - name: "SELECT JOIN test with multiple records" 33 | sql: "SELECT s.name, r.description FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id WHERE s.id = 1" 34 | expect: 35 | rows: 1 36 | expected_results: 37 | - name: 'test' 38 | description: 'Description for test' 39 | 40 | 41 | # - name: "Complex SELECT JOIN test with filtering" 42 | # sql: "SELECT s.name, r.description FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id WHERE s.id IN (1, 2) AND r.description LIKE '%description%'" 43 | # expect: 44 | # rows: 2 # Should return rows where id is 1 or 2 AND description contains "description" 45 | # expected_results: 46 | # - name: 'test' 47 | # description: 'Description for test' 48 | # - name: 'another' 49 | # description: 'Another description' 50 | # 51 | # - name: "SELECT JOIN with COUNT" 52 | # sql: "SELECT COUNT(*) FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id" 53 | # expect: 54 | # result: 2 #Should return the number of records related to simple_test 55 | # 56 | # - name: "SELECT JOIN with DISTINCT" 57 | # sql: "SELECT DISTINCT r.description FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id" 58 | # expect: 59 | # rows: 2 60 | # exptected_results: 61 | # - description: "Description for test" 62 | # - description: "Another description" 63 | # 64 | -------------------------------------------------------------------------------- /tests/10_nulls.yaml: -------------------------------------------------------------------------------- 1 | # NULLs and COALESCE Test Suite 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "NULL literal handling and COALESCE; COUNT with NULLs" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS null_test" 9 | - sql: "CREATE TABLE null_test (id INT, name VARCHAR(50))" 10 | - sql: "INSERT INTO null_test (id, name) VALUES (1, 'alpha'), (2, NULL), (3, NULL)" 11 | 12 | test_cases: 13 | - name: "NULL literal" 14 | sql: "SELECT NULL AS v" 15 | expect: 16 | rows: 1 17 | 18 | - name: "COALESCE picks first non-null" 19 | sql: "SELECT COALESCE(NULL, 'x', 'y') AS v" 20 | expect: 21 | rows: 1 22 | data: 23 | - v: "x" 24 | 25 | - name: "COUNT ignores NULL values" 26 | sql: "SELECT COUNT(name) AS c FROM null_test" 27 | expect: 28 | rows: 1 29 | data: 30 | - c: 1 31 | 32 | - name: "COUNT(*) counts all rows" 33 | sql: "SELECT COUNT(*) AS c FROM null_test" 34 | expect: 35 | rows: 1 36 | data: 37 | - c: 3 38 | 39 | cleanup: 40 | - sql: "DROP TABLE IF EXISTS null_test" 41 | -------------------------------------------------------------------------------- /tests/11_group_having.yaml: -------------------------------------------------------------------------------- 1 | # GROUP BY and HAVING Test Suite 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Aggregation with GROUP BY and HAVING" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS sales" 9 | - sql: "CREATE TABLE sales (dept VARCHAR(20), amount INT)" 10 | - sql: | 11 | INSERT INTO sales (dept, amount) VALUES 12 | ('A', 10), ('A', 20), ('B', 5) 13 | 14 | test_cases: 15 | - name: "Group and count with HAVING" 16 | sql: "SELECT dept, COUNT(*) AS c FROM sales GROUP BY dept HAVING COUNT(*) > 1 ORDER BY dept" 17 | expect: 18 | rows: 1 19 | data: 20 | - dept: "A" 21 | c: 2 22 | 23 | - name: "Group with SUM and ORDER" 24 | sql: "SELECT dept, SUM(amount) AS s FROM sales GROUP BY dept ORDER BY SUM(amount) DESC" 25 | expect: 26 | rows: 2 27 | 28 | cleanup: 29 | - sql: "DROP TABLE IF EXISTS sales" 30 | -------------------------------------------------------------------------------- /tests/12_joins_outer.yaml: -------------------------------------------------------------------------------- 1 | # Outer Join Semantics Test Suite 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "LEFT/RIGHT JOIN with missing matches and row multiplicity" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS parent" 9 | - sql: "DROP TABLE IF EXISTS child" 10 | - sql: "CREATE TABLE parent (id INT, label VARCHAR(20))" 11 | - sql: "CREATE TABLE child (pid INT, note VARCHAR(20))" 12 | - sql: "INSERT INTO parent (id, label) VALUES (1, 'p1'), (2, 'p2')" 13 | - sql: "INSERT INTO child (pid, note) VALUES (2, 'c2')" 14 | 15 | test_cases: 16 | # TODO(memcp): LEFT JOIN with unmatched row returns repeated NULLs (note: [null,null]). 17 | # - name: "LEFT JOIN preserves left rows" 18 | # sql: "SELECT p.id, c.note AS note FROM parent p LEFT JOIN child c ON p.id = c.pid ORDER BY p.id" 19 | # expect: 20 | # rows: 2 21 | # data: 22 | # - id: 1 23 | # note: null 24 | # - id: 2 25 | # note: "c2" 26 | 27 | - name: "RIGHT JOIN mirrors behavior" 28 | sql: "SELECT p.id, c.note AS note FROM parent p RIGHT JOIN child c ON p.id = c.pid ORDER BY p.id" 29 | expect: 30 | rows: 1 31 | data: 32 | - id: 2 33 | note: "c2" 34 | 35 | cleanup: 36 | - sql: "DROP TABLE IF EXISTS child" 37 | - sql: "DROP TABLE IF EXISTS parent" 38 | -------------------------------------------------------------------------------- /tests/13_subselects.yaml: -------------------------------------------------------------------------------- 1 | # Subselects in FROM (Derived Tables) 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Use SELECT in FROM (basic)" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS customers" 9 | - sql: "DROP TABLE IF EXISTS orders" 10 | - sql: "CREATE TABLE customers (id INT, name VARCHAR(50))" 11 | - sql: "CREATE TABLE orders (id INT, customer_id INT)" 12 | - sql: "INSERT INTO customers (id, name) VALUES (1, 'Alice'), (2, 'Bob')" 13 | - sql: "INSERT INTO orders (id, customer_id) VALUES (1,1), (2,1), (3,2)" 14 | 15 | test_cases: 16 | - name: "Derived table with filter" 17 | sql: "SELECT t.a FROM (SELECT 1 AS a) t WHERE t.a = 1" 18 | expect: 19 | rows: 1 20 | data: 21 | - a: 1 22 | 23 | # TODO(memcp): Subselect-in-FROM (derived tables) fails with "Unknown function: ". 24 | # - name: "Join with aggregated subselect" 25 | # sql: | 26 | # SELECT c.name, o.cnt 27 | # FROM customers c 28 | # JOIN (SELECT customer_id, COUNT(*) AS cnt FROM orders GROUP BY customer_id) o 29 | # ON c.id = o.customer_id 30 | # ORDER BY o.cnt DESC 31 | # LIMIT 1 32 | # expect: 33 | # rows: 1 34 | # data: 35 | # - name: "Alice" 36 | # cnt: 2 37 | 38 | cleanup: 39 | - sql: "DROP TABLE IF EXISTS orders" 40 | - sql: "DROP TABLE IF EXISTS customers" 41 | -------------------------------------------------------------------------------- /tests/14_order_limit.yaml: -------------------------------------------------------------------------------- 1 | # ORDER BY and LIMIT/OFFSET 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Ordering by multiple keys with pagination" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS ord_t" 9 | - sql: "CREATE TABLE ord_t (a INT, b INT)" 10 | - sql: | 11 | INSERT INTO ord_t (a,b) VALUES 12 | (1,2),(1,1),(2,2),(2,0),(3,1) 13 | 14 | test_cases: 15 | - name: "Top-1 by a DESC" 16 | sql: "SELECT a, b FROM ord_t ORDER BY a DESC LIMIT 1" 17 | expect: 18 | rows: 1 19 | data: 20 | - a: 3 21 | b: 1 22 | 23 | - name: "Top-1 by a ASC" 24 | sql: "SELECT a FROM ord_t ORDER BY a ASC LIMIT 1" 25 | expect: 26 | rows: 1 27 | data: 28 | - a: 1 29 | 30 | - name: "Top-2 by a DESC (no offset)" 31 | sql: "SELECT a FROM ord_t ORDER BY a DESC LIMIT 2" 32 | expect: 33 | rows: 2 34 | data: 35 | - a: 3 36 | 37 | - name: "Multi-key order with limit/offset" 38 | sql: "SELECT a, b FROM ord_t ORDER BY a DESC, b ASC LIMIT 2 OFFSET 1" 39 | expect: 40 | rows: 2 41 | data: 42 | - a: 2 43 | b: 0 44 | - a: 2 45 | b: 2 46 | 47 | cleanup: 48 | - sql: "DROP TABLE IF EXISTS ord_t" 49 | -------------------------------------------------------------------------------- /tests/15_dml.yaml: -------------------------------------------------------------------------------- 1 | # DML: UPDATE and DELETE 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Verify affected_rows and row changes for UPDATE/DELETE" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS dml_t" 9 | - sql: "CREATE TABLE dml_t (id INT, x INT, flag INT)" 10 | - sql: "INSERT INTO dml_t (id, x, flag) VALUES (1, 10, 1), (2, 20, 0), (3, 30, 0)" 11 | 12 | test_cases: 13 | - name: "UPDATE with predicate" 14 | sql: "UPDATE dml_t SET x = x + 1 WHERE id < 3" 15 | expect: 16 | affected_rows: 2 17 | 18 | - name: "DELETE with predicate" 19 | sql: "DELETE FROM dml_t WHERE flag = 0" 20 | expect: 21 | affected_rows: 2 22 | 23 | - name: "Remaining row count" 24 | sql: "SELECT COUNT(*) AS c FROM dml_t" 25 | expect: 26 | rows: 1 27 | data: 28 | - c: 1 29 | 30 | cleanup: 31 | - sql: "DROP TABLE IF EXISTS dml_t" 32 | 33 | -------------------------------------------------------------------------------- /tests/16_group_by_sum.yaml: -------------------------------------------------------------------------------- 1 | # GROUP BY SUM Test Suite 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Validate SELECT a, SUM(b) FROM tbl GROUP BY a" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS tbl" 9 | - sql: "CREATE TABLE tbl (a INT, b INT)" 10 | - sql: | 11 | INSERT INTO tbl (a, b) VALUES 12 | (1, 10), (1, 20), (2, 5), (3, 7) 13 | 14 | test_cases: 15 | - name: "Group sum by a (exact query)" 16 | sql: "SELECT a, SUM(b) FROM tbl GROUP BY a" 17 | expect: 18 | rows: 3 19 | 20 | - name: "Group sum by a with ORDER" 21 | sql: "SELECT a, SUM(b) AS s FROM tbl GROUP BY a ORDER BY a" 22 | expect: 23 | rows: 3 24 | data: 25 | - a: 1 26 | s: 30 27 | - a: 2 28 | s: 5 29 | - a: 3 30 | s: 7 31 | 32 | cleanup: 33 | - sql: "DROP TABLE IF EXISTS tbl" 34 | -------------------------------------------------------------------------------- /tests/17_strings_like.yaml: -------------------------------------------------------------------------------- 1 | # LIKE operator and patterns 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Pattern matching with % and _" 6 | 7 | setup: [] 8 | 9 | test_cases: 10 | - name: "Prefix match" 11 | sql: "SELECT 'alpha' LIKE 'a%' AS r" 12 | expect: 13 | rows: 1 14 | data: 15 | - r: true 16 | 17 | - name: "Single-char match" 18 | sql: "SELECT 'a1' LIKE 'a_' AS r" 19 | expect: 20 | rows: 1 21 | data: 22 | - r: true 23 | 24 | - name: "No match" 25 | sql: "SELECT 'beta' LIKE 'a%' AS r" 26 | expect: 27 | rows: 1 28 | data: 29 | - r: false 30 | 31 | -------------------------------------------------------------------------------- /tests/18_unnesting.yaml: -------------------------------------------------------------------------------- 1 | # Unnesting: Derived Tables (Basic) 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Unnesting arbitrary queries: basic derived table" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS ord_t" 9 | - sql: "CREATE TABLE ord_t (a INT)" 10 | - sql: "INSERT INTO ord_t (a) VALUES (1),(3),(2),(5),(4)" 11 | 12 | test_cases: 13 | - name: "Select from simple derived table" 14 | sql: "SELECT * FROM (SELECT 1 AS a) t" 15 | expect: 16 | rows: 1 17 | data: 18 | - a: 1 19 | 20 | - name: "ORDER ignored without LIMIT in derived table" 21 | sql: "SELECT * FROM (SELECT a FROM ord_t ORDER BY a DESC) t" 22 | expect: 23 | rows: 5 24 | 25 | # TODO: add correlated subselect in FROM (LATERAL-style) 26 | # - name: "Derived table with correlation" 27 | # sql: | 28 | # SELECT x, y 29 | # FROM (SELECT 1 AS x) d 30 | # JOIN (SELECT x+1 AS y) t 31 | # expect: 32 | # rows: 1 33 | 34 | # TODO: ORDER BY in subquery ignored without LIMIT 35 | # - name: "ORDER ignored without LIMIT in derived table" 36 | # sql: "SELECT * FROM (SELECT 2 AS a UNION ALL SELECT 1 AS a ORDER BY a DESC) t" 37 | # expect: {} 38 | 39 | # TODO: ORDER BY with LIMIT in derived table affects rows 40 | # - name: "ORDER with LIMIT in derived table" 41 | # sql: "SELECT * FROM (SELECT 2 AS a UNION ALL SELECT 1 AS a ORDER BY a DESC LIMIT 1) t" 42 | # expect: 43 | # rows: 1 44 | # data: 45 | # - a: 2 46 | 47 | cleanup: [] 48 | -------------------------------------------------------------------------------- /tests/19_subselect_order.yaml: -------------------------------------------------------------------------------- 1 | # ORDER BY on derived-table alias 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "ORDER BY t.col where t is a subselect alias" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS temperature" 9 | - sql: "DROP TABLE IF EXISTS sensor" 10 | - sql: "CREATE TABLE temperature (date INT, sensor INT, temp INT)" 11 | - sql: "CREATE TABLE sensor (ID INT, name VARCHAR(64))" 12 | - sql: | 13 | INSERT INTO temperature (date, sensor, temp) VALUES 14 | (10, 1, 21), 15 | (20, 1, 22), 16 | (15, 2, 23) 17 | - sql: | 18 | INSERT INTO sensor (ID, name) VALUES 19 | (1, 'S1'), 20 | (2, 'S2') 21 | 22 | test_cases: 23 | - name: "Order by alias-qualified column" 24 | sql: | 25 | SELECT `t`.* 26 | FROM (SELECT `date` AS `date` FROM `temperature`) AS `t` 27 | ORDER BY `t`.`date` DESC 28 | LIMIT 2 OFFSET 0 29 | expect: 30 | rows: 2 31 | data: 32 | - date: 20 33 | - date: 15 34 | 35 | - name: "Derived alias WHERE on base column" 36 | sql: | 37 | SELECT t.* 38 | FROM ( 39 | SELECT `date` AS `date`, `sensor` AS `sensor` 40 | FROM `temperature` 41 | ) AS t 42 | WHERE t.sensor = 1 43 | ORDER BY t.date DESC 44 | LIMIT 2 45 | expect: 46 | rows: 2 47 | data: 48 | - date: 20 49 | sensor: 1 50 | - date: 10 51 | sensor: 1 52 | 53 | - name: "Derived alias WHERE inside subselect (qualified table)" 54 | sql: | 55 | SELECT t.* 56 | FROM ( 57 | SELECT `date` AS `date`, `sensor` AS `sensor` 58 | FROM `temperature` 59 | WHERE temperature.`sensor` = 1 60 | ) AS t 61 | ORDER BY t.date DESC 62 | LIMIT 2 63 | expect: 64 | rows: 2 65 | data: 66 | - date: 20 67 | sensor: 1 68 | - date: 10 69 | sensor: 1 70 | 71 | - name: "Derived alias WHERE inside subselect (unqualified table)" 72 | sql: | 73 | SELECT t.* 74 | FROM ( 75 | SELECT `date` AS `date`, `sensor` AS `sensor` 76 | FROM `temperature` 77 | WHERE `sensor` = 1 78 | ) AS t 79 | ORDER BY t.date DESC 80 | LIMIT 2 81 | expect: 82 | rows: 2 83 | data: 84 | - date: 20 85 | sensor: 1 86 | - date: 10 87 | sensor: 1 88 | 89 | - name: "Derived alias simple star without ORDER" 90 | sql: | 91 | SELECT t.* 92 | FROM ( 93 | SELECT `date` AS `date` 94 | FROM `temperature` 95 | ) AS t 96 | LIMIT 3 97 | expect: 98 | rows: 3 99 | 100 | cleanup: 101 | - sql: "DROP TABLE IF EXISTS temperature" 102 | - sql: "DROP TABLE IF EXISTS sensor" 103 | -------------------------------------------------------------------------------- /tests/20_default_values.yaml: -------------------------------------------------------------------------------- 1 | # Default Values Test Suite 2 | # Validate that DEFAULT values are applied on INSERT when a column is omitted 3 | 4 | metadata: 5 | version: "1.0" 6 | description: "Default column values (boolean) on INSERT" 7 | 8 | setup: 9 | - { sql: "CREATE TABLE default_test (id INT, flag BOOLEAN DEFAULT FALSE) ENGINE=MEMORY" } 10 | 11 | test_cases: 12 | - name: "Insert without defaulted column" 13 | sql: "INSERT INTO default_test (id) VALUES (1)" 14 | expect: 15 | affected_rows: 1 16 | 17 | - name: "Select row with default applied" 18 | sql: "SELECT id, flag FROM default_test ORDER BY id" 19 | expect: 20 | rows: 1 21 | data: 22 | - { id: 1, flag: false } 23 | 24 | - name: "Insert with explicit TRUE" 25 | sql: "INSERT INTO default_test (id, flag) VALUES (2, TRUE)" 26 | expect: 27 | affected_rows: 1 28 | 29 | - name: "Verify both rows and flags" 30 | sql: "SELECT id, flag FROM default_test ORDER BY id" 31 | expect: 32 | rows: 2 33 | data: 34 | - { id: 1, flag: false } 35 | - { id: 2, flag: true } 36 | 37 | - name: "Alter default to TRUE via ALTER COLUMN" 38 | sql: "ALTER TABLE default_test ALTER COLUMN flag SET DEFAULT TRUE" 39 | expect: {} 40 | 41 | - name: "Insert after default changed to TRUE" 42 | sql: "INSERT INTO default_test (id) VALUES (3)" 43 | expect: 44 | affected_rows: 1 45 | 46 | - name: "Verify defaults after change to TRUE" 47 | sql: "SELECT id, flag FROM default_test ORDER BY id" 48 | expect: 49 | rows: 3 50 | data: 51 | - { id: 1, flag: false } 52 | - { id: 2, flag: true } 53 | - { id: 3, flag: true } 54 | 55 | - name: "Alter default to FALSE via ALTER COLUMN" 56 | sql: "ALTER TABLE default_test ALTER COLUMN flag SET DEFAULT FALSE" 57 | expect: {} 58 | 59 | - name: "Insert after default changed to FALSE" 60 | sql: "INSERT INTO default_test (id) VALUES (4)" 61 | expect: 62 | affected_rows: 1 63 | 64 | - name: "Verify defaults after change to FALSE" 65 | sql: "SELECT id, flag FROM default_test ORDER BY id" 66 | expect: 67 | rows: 4 68 | data: 69 | - { id: 1, flag: false } 70 | - { id: 2, flag: true } 71 | - { id: 3, flag: true } 72 | - { id: 4, flag: false } 73 | 74 | cleanup: 75 | - { sql: "DROP TABLE IF EXISTS default_test" } 76 | -------------------------------------------------------------------------------- /tests/21_grant_revoke.yaml: -------------------------------------------------------------------------------- 1 | # GRANT/REVOKE Test Suite 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "User privileges: GRANT and REVOKE" 6 | 7 | setup: 8 | - sql: "DELETE FROM `system`.`access` WHERE username='alice'" 9 | expect: {} 10 | - sql: "DELETE FROM `system`.`user` WHERE username='alice'" 11 | expect: {} 12 | 13 | test_cases: 14 | - name: "CREATE USER alice" 15 | sql: "CREATE USER alice IDENTIFIED BY 'pw'" 16 | expect: 17 | affected_rows: 1 18 | 19 | - name: "GRANT ALL ON *.* sets admin" 20 | sql: "GRANT ALL ON *.* TO alice" 21 | expect: {} 22 | 23 | - name: "admin flag is true after grant" 24 | sql: "SELECT admin FROM system.user WHERE username = 'alice'" 25 | expect: 26 | rows: 1 27 | data: 28 | - { admin: true } 29 | 30 | - name: "REVOKE ALL ON *.* clears admin" 31 | sql: "REVOKE ALL ON *.* FROM alice" 32 | expect: {} 33 | 34 | - name: "admin flag is false after revoke" 35 | sql: "SELECT admin FROM system.user WHERE username = 'alice'" 36 | expect: 37 | rows: 1 38 | data: 39 | - { admin: false } 40 | 41 | - name: "GRANT db access creates system.access entry" 42 | sql: "GRANT ALL PRIVILEGES ON `memcp-tests`.* TO alice" 43 | expect: {} 44 | 45 | - name: "Verify access entry exists" 46 | sql: "SELECT database FROM system.access WHERE username='alice' AND database='memcp-tests'" 47 | expect: 48 | rows: 1 49 | data: 50 | - { database: "memcp-tests" } 51 | 52 | - name: "REVOKE db access removes entry" 53 | sql: "REVOKE ALL PRIVILEGES ON `memcp-tests`.* FROM alice" 54 | expect: 55 | affected_rows: 1 56 | 57 | - name: "Verify access entry removed" 58 | sql: "SELECT database FROM system.access WHERE username='alice' AND database='memcp-tests'" 59 | expect: 60 | rows: 0 61 | 62 | # Policy enforcement tests (per-user auth) 63 | - name: "prepare table t as root (create)" 64 | sql: "CREATE TABLE t(id INT, name TEXT)" 65 | expect: {} 66 | 67 | - name: "prepare table t as root (insert)" 68 | sql: "INSERT INTO t(id, name) VALUES (1, 'x')" 69 | expect: { affected_rows: 1 } 70 | 71 | - name: "alice cannot read without grant" 72 | username: "alice" 73 | password: "pw" 74 | sql: "SELECT * FROM t" 75 | expect: 76 | error: true 77 | 78 | - name: "grant db access to alice" 79 | sql: "GRANT SELECT ON `memcp-tests`.* TO alice" 80 | expect: {} 81 | 82 | - name: "alice can read after grant" 83 | username: "alice" 84 | password: "pw" 85 | sql: "SELECT id, name FROM t" 86 | expect: 87 | rows: 1 88 | data: 89 | - { id: 1, name: "x" } 90 | 91 | - name: "revoke db access from alice" 92 | sql: "REVOKE SELECT ON `memcp-tests`.* FROM alice" 93 | expect: { affected_rows: 1 } 94 | 95 | cleanup: [] 96 | -------------------------------------------------------------------------------- /tests/22_delete_qualified.yaml: -------------------------------------------------------------------------------- 1 | # Schema-qualified DELETE and DROP DATABASE IF EXISTS 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Ensure DELETE FROM schema.tbl works and DROP DATABASE IF EXISTS is accepted" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS dmlq" 9 | - sql: "CREATE TABLE dmlq (id INT, v INT)" 10 | - sql: "INSERT INTO dmlq (id, v) VALUES (1, 10), (2, 20), (3, 30)" 11 | 12 | test_cases: 13 | - name: "DELETE using schema-qualified table" 14 | sql: "DELETE FROM `memcp-tests`.dmlq WHERE v >= 20" 15 | expect: 16 | affected_rows: 2 17 | 18 | - name: "Remaining rows after schema-qualified delete" 19 | sql: "SELECT COUNT(*) AS c FROM dmlq" 20 | expect: 21 | rows: 1 22 | data: 23 | - c: 1 24 | 25 | - name: "DROP DATABASE IF EXISTS on non-existent db" 26 | sql: "DROP DATABASE IF EXISTS `surely_nonexistent_db_xyz`" 27 | expect: {} 28 | 29 | cleanup: 30 | - sql: "DROP TABLE IF EXISTS dmlq" 31 | -------------------------------------------------------------------------------- /tests/23_policy_enforcement.yaml: -------------------------------------------------------------------------------- 1 | # Policy enforcement across SELECT/INSERT/UPDATE/DELETE 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Policy checks for all CRUD operations" 6 | 7 | setup: 8 | - sql: "DELETE FROM `system`.`access` WHERE username='alice'" 9 | - sql: "DELETE FROM `system`.`user` WHERE username='alice'" 10 | - sql: "CREATE USER alice IDENTIFIED BY 'pw'" 11 | - sql: "DROP TABLE IF EXISTS t" 12 | - sql: "CREATE TABLE t(id INT, name TEXT)" 13 | 14 | test_cases: 15 | - name: "alice cannot CREATE DATABASE" 16 | username: "alice" 17 | password: "pw" 18 | sql: "CREATE DATABASE `policytestdb_alice`" 19 | expect: { error: true } 20 | 21 | - name: "alice cannot CREATE USER" 22 | username: "alice" 23 | password: "pw" 24 | sql: "CREATE USER charlie IDENTIFIED BY 'pw'" 25 | expect: { error: true } 26 | 27 | - name: "alice cannot GRANT on db" 28 | username: "alice" 29 | password: "pw" 30 | sql: "GRANT SELECT ON `memcp-tests`.* TO alice" 31 | expect: { error: true } 32 | 33 | - name: "alice cannot REVOKE on db" 34 | username: "alice" 35 | password: "pw" 36 | sql: "REVOKE SELECT ON `memcp-tests`.* FROM alice" 37 | expect: { error: true } 38 | 39 | - name: "alice cannot DROP DATABASE" 40 | username: "alice" 41 | password: "pw" 42 | sql: "DROP DATABASE `memcp-tests`" 43 | expect: { error: true } 44 | 45 | - name: "alice cannot SHUTDOWN" 46 | username: "alice" 47 | password: "pw" 48 | sql: "SHUTDOWN" 49 | expect: { error: true } 50 | 51 | - name: "alice cannot SELECT without grant" 52 | username: "alice" 53 | password: "pw" 54 | sql: "SELECT * FROM t" 55 | expect: { error: true } 56 | 57 | - name: "alice cannot INSERT without grant" 58 | username: "alice" 59 | password: "pw" 60 | sql: "INSERT INTO t(id, name) VALUES (1, 'x')" 61 | expect: { error: true } 62 | 63 | - name: "grant ALL on db to alice" 64 | sql: "GRANT ALL ON `memcp-tests`.* TO alice" 65 | expect: {} 66 | 67 | - name: "alice INSERT allowed after grant" 68 | username: "alice" 69 | password: "pw" 70 | sql: "INSERT INTO t(id, name) VALUES (2, 'y')" 71 | expect: { affected_rows: 1 } 72 | 73 | - name: "alice UPDATE allowed after grant" 74 | username: "alice" 75 | password: "pw" 76 | sql: "UPDATE t SET name='z' WHERE id=2" 77 | expect: { affected_rows: 1 } 78 | 79 | - name: "alice DELETE allowed after grant" 80 | username: "alice" 81 | password: "pw" 82 | sql: "DELETE FROM t WHERE id=2" 83 | expect: { affected_rows: 1 } 84 | 85 | - name: "revoke ALL on db from alice" 86 | sql: "REVOKE ALL ON `memcp-tests`.* FROM alice" 87 | expect: {} 88 | 89 | - name: "alice cannot SELECT after revoke" 90 | username: "alice" 91 | password: "pw" 92 | sql: "SELECT * FROM t" 93 | expect: { error: true } 94 | 95 | - name: "alice cannot DELETE after revoke" 96 | username: "alice" 97 | password: "pw" 98 | sql: "DELETE FROM t" 99 | expect: { error: true } 100 | 101 | cleanup: 102 | - sql: "DROP TABLE IF EXISTS t" 103 | 104 | -------------------------------------------------------------------------------- /tests/24_mysql_basic_compat.yaml: -------------------------------------------------------------------------------- 1 | # MySQL Compatibility v1 — Basics 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "MySQL v1: identifiers, casts, NULL/booleans, escapes" 6 | # Suite is now expected to pass fully 7 | 8 | setup: 9 | - sql: DROP TABLE IF EXISTS `users` 10 | 11 | test_cases: 12 | - name: "CREATE TABLE with backticked identifiers" 13 | sql: | 14 | CREATE TABLE `users` ( 15 | `id` INT PRIMARY KEY, 16 | `name` VARCHAR(100), 17 | `active` BOOLEAN 18 | ) 19 | expect: 20 | affected_rows: 1 21 | 22 | - name: "INSERT respects backticks" 23 | sql: | 24 | INSERT INTO `users` (`id`, `name`, `active`) VALUES (1, 'Ada', true) 25 | expect: 26 | affected_rows: 1 27 | 28 | - name: "SELECT with backticks" 29 | sql: | 30 | SELECT `id`, `name`, `active` FROM `users` 31 | expect: 32 | rows: 1 33 | data: 34 | - id: 1 35 | name: "Ada" 36 | active: true 37 | 38 | - name: "Implicit numeric cast: string to int comparison" 39 | sql: | 40 | SELECT '1' = 1 AS eq 41 | expect: 42 | rows: 1 43 | data: 44 | - eq: true 45 | 46 | - name: "Implicit boolean truthiness" 47 | sql: | 48 | SELECT IF(1, 't', 'f') AS v 49 | expect: 50 | rows: 1 51 | data: 52 | - v: "t" 53 | 54 | - name: "SELECT WHERE without FROM" 55 | # Avoids aggregate over derived table; checks filter semantics directly 56 | sql: | 57 | SELECT 1 WHERE 1 58 | expect: 59 | rows: 1 60 | 61 | - name: "SELECT WHERE without FROM and false" 62 | # Avoids aggregate over derived table; checks filter semantics directly 63 | sql: | 64 | SELECT 1 WHERE 0 65 | expect: 66 | rows: 0 67 | 68 | - name: "NULL truthiness (WHERE filters out)" 69 | # Avoids aggregate over derived table; checks filter semantics directly 70 | sql: | 71 | SELECT 1 WHERE NULL 72 | expect: 73 | rows: 0 74 | 75 | - name: "String escape single quote" 76 | sql: | 77 | SELECT 'can\'t' AS s 78 | expect: 79 | rows: 1 80 | data: 81 | - s: "can't" 82 | 83 | - name: "Backslash literal (no escape)" 84 | noncritical: true 85 | sql: | 86 | SELECT 'line\\nbreak' AS s 87 | expect: 88 | rows: 1 89 | data: 90 | - s: "line\\nbreak" 91 | 92 | cleanup: [] 93 | -------------------------------------------------------------------------------- /tests/25_schema_qualified_insert.yaml: -------------------------------------------------------------------------------- 1 | # Schema-qualified INSERT 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Support INSERT INTO schema.tbl(...) in memcp-tests" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS `memcp-tests`.base_models" 9 | - sql: | 10 | CREATE TABLE `memcp-tests`.base_models ( 11 | id INT PRIMARY KEY, 12 | model VARCHAR(100) 13 | ) 14 | 15 | test_cases: 16 | - name: "INSERT into schema-qualified table" 17 | sql: "INSERT INTO `memcp-tests`.base_models(id, model) VALUES (1, 'gpt')" 18 | expect: 19 | affected_rows: 1 20 | 21 | - name: "Verify insert via schema-qualified name" 22 | sql: "SELECT id, model FROM `memcp-tests`.base_models WHERE id = 1" 23 | expect: 24 | rows: 1 25 | data: 26 | - id: 1 27 | model: "gpt" 28 | 29 | cleanup: 30 | - sql: "DROP TABLE IF EXISTS `memcp-tests`.base_models" 31 | -------------------------------------------------------------------------------- /tests/26_mysql_datetime_defaults.yaml: -------------------------------------------------------------------------------- 1 | # MySQL Compatibility v1 — Date/Time semantics 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "MySQL v1: CURRENT_TIMESTAMP defaults, NOW(), comparisons" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS tstamp" 9 | 10 | test_cases: 11 | - name: "CREATE TABLE with default CURRENT_TIMESTAMP" 12 | sql: "CREATE TABLE tstamp (id INT PRIMARY KEY, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)" 13 | expect: 14 | affected_rows: 1 15 | 16 | - name: "INSERT row without timestamp uses default" 17 | sql: "INSERT INTO tstamp (id) VALUES (1)" 18 | expect: 19 | affected_rows: 1 20 | 21 | - name: "NOW() returns a value" 22 | sql: "SELECT NOW() AS nowv" 23 | expect: {} 24 | 25 | - name: "Default timestamp is not null" 26 | sql: "SELECT created_at IS NOT NULL AS ok FROM tstamp WHERE id = 1" 27 | expect: 28 | rows: 1 29 | data: 30 | - ok: true 31 | 32 | cleanup: 33 | - sql: "DROP TABLE IF EXISTS tstamp" 34 | -------------------------------------------------------------------------------- /tests/27_mysql_keys_indexes.yaml: -------------------------------------------------------------------------------- 1 | # MySQL Compatibility v1 — Keys and Indexes (acceptance) 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "MySQL v1: PK/UK creation and basic behavior" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS k" 9 | 10 | test_cases: 11 | - name: "CREATE TABLE with PK and UNIQUE" 12 | sql: "CREATE TABLE k (id INT PRIMARY KEY, email VARCHAR(100) UNIQUE)" 13 | expect: 14 | affected_rows: 1 15 | 16 | - name: "CREATE INDEX accepted" 17 | sql: "CREATE INDEX idx_email ON k (email)" 18 | expect: {} 19 | 20 | - name: "Insert row" 21 | sql: "INSERT INTO k (id, email) VALUES (1, 'a@example.com')" 22 | expect: 23 | affected_rows: 1 24 | 25 | - name: "Duplicate PK should error" 26 | sql: "INSERT INTO k (id, email) VALUES (1, 'b@example.com')" 27 | expect: 28 | error: true 29 | 30 | - name: "Duplicate UNIQUE should error" 31 | sql: "INSERT INTO k (id, email) VALUES (2, 'a@example.com')" 32 | expect: 33 | error: true 34 | 35 | cleanup: 36 | - sql: "DROP TABLE IF EXISTS k" 37 | -------------------------------------------------------------------------------- /tests/28_mysql_fk_acceptance.yaml: -------------------------------------------------------------------------------- 1 | # MySQL Compatibility v1 — Foreign Key DDL acceptance 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "MySQL v1: accept FK DDL (metadata/no-op)" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS child" 9 | - sql: "DROP TABLE IF EXISTS parent" 10 | 11 | test_cases: 12 | - name: "CREATE parent/child with FK" 13 | sql: | 14 | CREATE TABLE parent (id INT PRIMARY KEY, name VARCHAR(50)) 15 | expect: 16 | affected_rows: 1 17 | 18 | - name: "CREATE child with FK references" 19 | sql: | 20 | CREATE TABLE child ( 21 | id INT PRIMARY KEY, 22 | parent_id INT, 23 | CONSTRAINT fk_p FOREIGN KEY (parent_id) REFERENCES parent(id) 24 | ) 25 | expect: {} 26 | 27 | - name: "INSERT matching FK" 28 | sql: | 29 | INSERT INTO parent (id, name) VALUES (1, 'p1') 30 | expect: 31 | affected_rows: 1 32 | 33 | - name: "Insert child referencing parent" 34 | sql: "INSERT INTO child (id, parent_id) VALUES (10, 1)" 35 | expect: {} 36 | 37 | cleanup: 38 | - sql: "DROP TABLE IF EXISTS child" 39 | - sql: "DROP TABLE IF EXISTS parent" 40 | -------------------------------------------------------------------------------- /tests/29_mysql_upsert.yaml: -------------------------------------------------------------------------------- 1 | # MySQL Compatibility v1 — INSERT ... ON DUPLICATE KEY UPDATE 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "MySQL v1: upsert semantics for PK/UNIQUE keys" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS upsert_t" 9 | - sql: | 10 | CREATE TABLE upsert_t ( 11 | id INT PRIMARY KEY, 12 | x INT, 13 | y INT, 14 | email VARCHAR(100) UNIQUE 15 | ) 16 | 17 | test_cases: 18 | - name: "Insert initial row" 19 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (1, 10, 5, 'a@example.com')" 20 | expect: 21 | affected_rows: 1 22 | 23 | - name: "Select initial state" 24 | sql: "SELECT id, x, y, email FROM upsert_t ORDER BY id" 25 | expect: 26 | rows: 1 27 | data: 28 | - id: 1 29 | x: 10 30 | y: 5 31 | email: "a@example.com" 32 | 33 | - name: "Simple overwrite: x = VALUES(x) on PK conflict" 34 | sql: | 35 | INSERT INTO upsert_t (id, x, y, email) 36 | VALUES (1, 42, 99, 'a@example.com') 37 | ON DUPLICATE KEY UPDATE x = VALUES(x) 38 | expect: {} 39 | 40 | - name: "Verify overwrite changed only x (not y/email)" 41 | sql: "SELECT id, x, y, email FROM upsert_t WHERE id = 1" 42 | expect: 43 | rows: 1 44 | data: 45 | - id: 1 46 | x: 42 47 | y: 5 48 | email: "a@example.com" 49 | 50 | - name: "Increment: x = x + 1 on PK conflict" 51 | sql: | 52 | INSERT INTO upsert_t (id, x, y, email) 53 | VALUES (1, 0, 0, 'a@example.com') 54 | ON DUPLICATE KEY UPDATE x = x + 1 55 | expect: {} 56 | 57 | - name: "Verify increment" 58 | sql: "SELECT id, x FROM upsert_t WHERE id = 1" 59 | expect: 60 | rows: 1 61 | data: 62 | - id: 1 63 | x: 43 64 | 65 | - name: "Add incoming: x = x + VALUES(x)" 66 | sql: | 67 | INSERT INTO upsert_t (id, x, y, email) 68 | VALUES (1, 7, 0, 'a@example.com') 69 | ON DUPLICATE KEY UPDATE x = x + VALUES(x) 70 | expect: {} 71 | 72 | - name: "Verify add incoming" 73 | sql: "SELECT id, x FROM upsert_t WHERE id = 1" 74 | expect: 75 | rows: 1 76 | data: 77 | - id: 1 78 | x: 50 79 | 80 | - name: "Use VALUES(y) in expression: x = x + VALUES(y)" 81 | sql: | 82 | INSERT INTO upsert_t (id, x, y, email) 83 | VALUES (1, 123, 4, 'a@example.com') 84 | ON DUPLICATE KEY UPDATE x = x + VALUES(y) 85 | expect: {} 86 | 87 | - name: "Verify VALUES(y) usage" 88 | sql: "SELECT id, x FROM upsert_t WHERE id = 1" 89 | expect: 90 | rows: 1 91 | data: 92 | - id: 1 93 | x: 54 94 | 95 | - name: "Seed second row" 96 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (2, 7, 0, 'b@example.com')" 97 | expect: 98 | affected_rows: 1 99 | 100 | - name: "Upsert via UNIQUE(email) conflict; update x = VALUES(x)" 101 | sql: | 102 | INSERT INTO upsert_t (id, x, y, email) 103 | VALUES (3, 8, 9, 'b@example.com') 104 | ON DUPLICATE KEY UPDATE x = VALUES(x) 105 | expect: {} 106 | 107 | - name: "Verify UNIQUE conflict updated row with email b@example.com" 108 | sql: "SELECT id, x, y, email FROM upsert_t WHERE email = 'b@example.com'" 109 | expect: 110 | rows: 1 111 | data: 112 | - id: 2 113 | x: 8 114 | y: 0 115 | email: "b@example.com" 116 | 117 | - name: "Row count remains 2" 118 | sql: "SELECT COUNT(*) AS c FROM upsert_t" 119 | expect: 120 | rows: 1 121 | data: 122 | - c: 2 123 | 124 | - name: "No-op overwrite: x = VALUES(x) with same value" 125 | sql: | 126 | INSERT INTO upsert_t (id, x, y, email) 127 | VALUES (2, 8, 111, 'b@example.com') 128 | ON DUPLICATE KEY UPDATE x = VALUES(x) 129 | expect: 130 | affected_rows: 1 131 | 132 | - name: "Verify no-op did not change y" 133 | sql: "SELECT id, x, y FROM upsert_t WHERE id = 2" 134 | expect: 135 | rows: 1 136 | data: 137 | - id: 2 138 | x: 8 139 | y: 0 140 | 141 | - name: "Upsert changed returns affected_rows=2 (MySQL semantics)" 142 | sql: | 143 | INSERT INTO upsert_t (id, x, y, email) 144 | VALUES (2, 9, 0, 'b@example.com') 145 | ON DUPLICATE KEY UPDATE x = x + 1 146 | expect: 147 | affected_rows: 2 148 | 149 | - name: "Delete row id=1" 150 | sql: "DELETE FROM upsert_t WHERE id = 1" 151 | expect: 152 | affected_rows: 1 153 | 154 | - name: "Reinsert same PK+email after delete" 155 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (1, 100, 0, 'a@example.com')" 156 | expect: 157 | affected_rows: 1 158 | 159 | - name: "Row count back to 2" 160 | sql: "SELECT COUNT(*) AS c FROM upsert_t" 161 | expect: 162 | rows: 1 163 | data: 164 | - c: 2 165 | 166 | - name: "Verify reinserted row" 167 | sql: "SELECT id, x, y, email FROM upsert_t WHERE id = 1" 168 | expect: 169 | rows: 1 170 | data: 171 | - id: 1 172 | x: 100 173 | y: 0 174 | email: "a@example.com" 175 | 176 | - name: "Illegal duplicate PK insert errors" 177 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (2, 9, 9, 'new@example.com')" 178 | expect: 179 | error: true 180 | 181 | - name: "Illegal duplicate UNIQUE(email) insert errors" 182 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (3, 1, 1, 'b@example.com')" 183 | expect: 184 | error: true 185 | 186 | cleanup: 187 | - sql: "DROP TABLE IF EXISTS upsert_t" 188 | -------------------------------------------------------------------------------- /tests/30_trailing_semicolon.yaml: -------------------------------------------------------------------------------- 1 | # Trailing semicolon acceptance 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Accept a trailing ';' at end of SQL statement" 6 | 7 | setup: 8 | - sql: "DROP TABLE IF EXISTS ts;" 9 | 10 | test_cases: 11 | - name: "CREATE TABLE with trailing semicolon" 12 | sql: | 13 | CREATE TABLE ts ( 14 | id INT PRIMARY KEY, 15 | v INT 16 | ); 17 | expect: 18 | affected_rows: 1 19 | 20 | - name: "INSERT with trailing semicolon" 21 | sql: "INSERT INTO ts (id, v) VALUES (1, 10);" 22 | expect: 23 | affected_rows: 1 24 | 25 | - name: "SELECT with trailing semicolon" 26 | sql: "SELECT v FROM ts WHERE id = 1;" 27 | expect: 28 | rows: 1 29 | data: 30 | - v: 10 31 | 32 | cleanup: 33 | - sql: "DROP TABLE IF EXISTS ts;" 34 | 35 | -------------------------------------------------------------------------------- /tests/31_length_function.yaml: -------------------------------------------------------------------------------- 1 | # LENGTH(str) function 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Implement LENGTH(str) with NULL handling" 6 | 7 | setup: [] 8 | 9 | test_cases: 10 | - name: "LENGTH of literal" 11 | sql: "SELECT LENGTH('abc') AS n" 12 | expect: 13 | rows: 1 14 | data: 15 | - n: 3 16 | 17 | - name: "LENGTH of empty string" 18 | sql: "SELECT LENGTH('') AS n" 19 | expect: 20 | rows: 1 21 | data: 22 | - n: 0 23 | 24 | - name: "LENGTH of NULL is NULL" 25 | sql: "SELECT LENGTH(NULL) AS n" 26 | expect: 27 | rows: 1 28 | data: 29 | - n: null 30 | 31 | cleanup: [] 32 | 33 | -------------------------------------------------------------------------------- /tests/32_expr_subselects.yaml: -------------------------------------------------------------------------------- 1 | # Subselects in expressions (IN/EXISTS/scalar) 2 | 3 | metadata: 4 | version: "1.0" 5 | description: "Parse IN (SELECT ...), EXISTS (SELECT ...), scalar (SELECT ...) but reject in planner" 6 | 7 | setup: [] 8 | 9 | # TODO: turn error: true into what we expect according to SQL standard 10 | # TODO: also add complex cases with (SELECT FROM ) 11 | 12 | test_cases: 13 | - name: "IN subselect simple" 14 | sql: "SELECT 1 WHERE 1 IN (SELECT 1)" 15 | expect: 16 | error: true 17 | 18 | - name: "NOT IN subselect simple" 19 | sql: "SELECT 1 WHERE 1 NOT IN (SELECT 1)" 20 | expect: 21 | error: true 22 | 23 | - name: "EXISTS subselect" 24 | sql: "SELECT 1 WHERE EXISTS (SELECT 1)" 25 | expect: 26 | error: true 27 | 28 | - name: "Scalar subselect in SELECT list" 29 | sql: "SELECT (SELECT 1) AS x" 30 | expect: 31 | error: true 32 | 33 | - name: "IN constant list still supported" 34 | sql: "SELECT 1 WHERE 1 IN (1,2,3)" 35 | expect: 36 | rows: 1 37 | 38 | cleanup: [] 39 | 40 | -------------------------------------------------------------------------------- /tests/33_collations_order.yaml: -------------------------------------------------------------------------------- 1 | metadata: 2 | version: "1.0" 3 | description: "ORDER BY with COLLATE comparators" 4 | 5 | setup: 6 | - sql: "DROP TABLE IF EXISTS coll_t" 7 | - sql: | 8 | CREATE TABLE coll_t ( 9 | id INT, 10 | name VARCHAR(50) 11 | ) 12 | - sql: "INSERT INTO coll_t (id, name) VALUES (1, 'a'), (2, 'B'), (3, 'aa'), (4, 'Ä')" 13 | 14 | test_cases: 15 | - name: "ORDER BY binary default (uppercase before lowercase)" 16 | sql: "SELECT name FROM coll_t ORDER BY name LIMIT 4" 17 | expect: 18 | rows: 4 19 | data: 20 | - name: "B" 21 | - name: "a" 22 | - name: "aa" 23 | - name: "Ä" 24 | 25 | - name: "ORDER BY COLLATE general_ci (case-insensitive, a before B)" 26 | sql: "SELECT name FROM coll_t ORDER BY name COLLATE utf8mb4_general_ci LIMIT 2" 27 | expect: 28 | rows: 2 29 | data: 30 | - name: "a" 31 | - name: "B" 32 | 33 | - name: "ORDER BY COLLATE general_ci DESC (reverse order)" 34 | sql: "SELECT name FROM coll_t ORDER BY name COLLATE utf8mb4_general_ci DESC LIMIT 2" 35 | expect: 36 | rows: 2 37 | data: 38 | - name: "B" 39 | - name: "a" 40 | 41 | - name: "ORDER BY explicit COLLATE bin DESC" 42 | sql: "SELECT name FROM coll_t ORDER BY name COLLATE bin DESC LIMIT 2" 43 | expect: 44 | rows: 2 45 | data: 46 | - name: "Ä" 47 | - name: "aa" 48 | 49 | cleanup: 50 | - sql: "DROP TABLE IF EXISTS coll_t" 51 | 52 | -------------------------------------------------------------------------------- /tests/34_collation_columns.yaml: -------------------------------------------------------------------------------- 1 | metadata: 2 | version: "1.0" 3 | description: "Column-level COLLATE in CREATE/ALTER TABLE; verify persistence via SHOW" 4 | 5 | setup: 6 | - sql: "DROP TABLE IF EXISTS coll_cols" 7 | - sql: | 8 | CREATE TABLE coll_cols ( 9 | id INT, 10 | txt TEXT COLLATE utf8mb4_general_ci 11 | ) 12 | - sql: "INSERT INTO coll_cols (id, txt) VALUES (1, 'a'), (2, 'B')" 13 | 14 | test_cases: 15 | - name: "SHOW FULL COLUMNS exposes column collation" 16 | sql: "SHOW FULL COLUMNS FROM coll_cols" 17 | expect: 18 | rows: 2 19 | data: 20 | - Field: "id" 21 | - Field: "txt" 22 | Collation: "utf8mb4_general_ci" 23 | 24 | - name: "ORDER BY uses column COLLATE (ASC, general_ci)" 25 | sql: "SELECT txt FROM coll_cols ORDER BY txt LIMIT 2" 26 | expect: 27 | rows: 2 28 | data: 29 | - txt: "a" 30 | - txt: "B" 31 | 32 | - name: "SHUTDOWN memcp to verify persisted column collation" 33 | sql: "SHUTDOWN" 34 | expect: 35 | rows: 0 36 | 37 | - name: "ORDER BY uses column COLLATE (ASC, general_ci) after restart" 38 | sql: "SELECT txt FROM coll_cols ORDER BY txt LIMIT 2" 39 | expect: 40 | rows: 2 41 | data: 42 | - txt: "a" 43 | - txt: "B" 44 | 45 | - name: "ALTER COLUMN COLLATE to bin" 46 | sql: "ALTER TABLE coll_cols ALTER COLUMN txt COLLATE bin" 47 | expect: 48 | rows: 0 49 | 50 | - name: "SHOW FULL COLUMNS after ALTER shows new collation" 51 | sql: "SHOW FULL COLUMNS FROM coll_cols" 52 | expect: 53 | rows: 2 54 | data: 55 | - Field: "id" 56 | - Field: "txt" 57 | Collation: "bin" 58 | 59 | - name: "ORDER BY with column COLLATE bin (DESC)" 60 | sql: "SELECT txt FROM coll_cols ORDER BY txt DESC LIMIT 2" 61 | expect: 62 | rows: 2 63 | data: 64 | - txt: "a" 65 | - txt: "B" 66 | 67 | - name: "SHUTDOWN memcp after ALTER to verify bin persisted" 68 | sql: "SHUTDOWN" 69 | expect: 70 | rows: 0 71 | 72 | - name: "ORDER BY with column COLLATE bin (DESC) after restart" 73 | sql: "SELECT txt FROM coll_cols ORDER BY txt DESC LIMIT 2" 74 | expect: 75 | rows: 2 76 | data: 77 | - txt: "a" 78 | - txt: "B" 79 | 80 | cleanup: 81 | - sql: "DROP TABLE IF EXISTS coll_cols" 82 | -------------------------------------------------------------------------------- /tests/35_memory_engine.yaml: -------------------------------------------------------------------------------- 1 | metadata: 2 | version: "1.0" 3 | description: "ENGINE=MEMORY table retains schema but not data across restart" 4 | 5 | setup: 6 | - sql: "DROP TABLE IF EXISTS mem_t" 7 | - sql: "CREATE TABLE mem_t (id INT, v TEXT) ENGINE=MEMORY" 8 | 9 | test_cases: 10 | - name: "Insert data into MEMORY table" 11 | sql: "INSERT INTO mem_t (id, v) VALUES (1, 'x'), (2, 'y')" 12 | expect: 13 | affected_rows: 2 14 | 15 | - name: "Select returns inserted rows" 16 | sql: "SELECT id, v FROM mem_t ORDER BY id" 17 | expect: 18 | rows: 2 19 | data: 20 | - id: 1 21 | v: "x" 22 | - id: 2 23 | v: "y" 24 | 25 | - name: "SHUTDOWN memcp" 26 | sql: "SHUTDOWN" 27 | expect: 28 | rows: 0 29 | 30 | - name: "Table exists after restart" 31 | sql: "SHOW FULL COLUMNS FROM mem_t" 32 | expect: 33 | rows: 2 34 | 35 | - name: "Select after restart returns no rows" 36 | sql: "SELECT id, v FROM mem_t ORDER BY id" 37 | expect: 38 | rows: 0 39 | 40 | cleanup: 41 | - sql: "DROP TABLE IF EXISTS mem_t" 42 | -------------------------------------------------------------------------------- /tools/mysqldump-to-json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import mysql.connector 4 | import simplejson as json 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('-H', '--host', default='localhost', help='hostname') 9 | parser.add_argument('-u', '--user', required=True, help='user') 10 | parser.add_argument('-p', '--password', required=True, help='password') 11 | parser.add_argument('database', help='database') 12 | args = parser.parse_args() 13 | 14 | hostname = args.host 15 | user = args.user 16 | password = args.password 17 | database = args.database or user 18 | 19 | 20 | mydb = mysql.connector.connect( 21 | host=hostname, 22 | user=user, 23 | password=password, 24 | database=database 25 | ) 26 | 27 | mycursor = mydb.cursor() 28 | mycursor.execute("SHOW TABLES") 29 | 30 | tables = [] 31 | for x in mycursor: 32 | tables.append(x[0]) 33 | 34 | for t in tables: 35 | print('#table ' + t) 36 | mycursor.execute("SELECT * FROM `"+t.replace("`", "``")+"`") 37 | print('#columns ', mycursor.column_names) 38 | for row in mycursor: 39 | print(json.dumps(dict(zip(mycursor.column_names, row)))) 40 | print('') 41 | --------------------------------------------------------------------------------