├── .gitignore
├── AGENTS.md
├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── ai_optimizer.py
├── apps
├── keyvalue.scm
└── minigame.scm
├── assets
├── gauge-needle.png
├── gauge.png
├── htop.png
├── info.html
├── memcp-logo.svg
├── ports.svg
├── shot1.png
├── shot2.png
└── webapps.svg
├── docs
├── arithmetic--logic.md
├── associative-lists--dictionaries.md
├── date.md
├── docu.zip
├── index.md
├── io.md
├── lists.md
├── parsers.md
├── scm-builtins.md
├── storage.md
├── streams.md
├── strings.md
├── sync.md
└── vectors.md
├── git-pre-commit
├── go.mod
├── lib
├── main.scm
├── psql-parser.scm
├── queryplan.scm
├── rdf-parser.scm
├── rdf.scm
├── sql-builtins.scm
├── sql-metadata.scm
├── sql-parser.scm
├── sql-test.scm
├── sql.scm
└── test.scm
├── main.go
├── memcp.singularity.recipe
├── run_sql_tests.py
├── scm
├── alu.go
├── assoc_fast.go
├── compare.go
├── date.go
├── declare.go
├── jit.go
├── jit_amd64.go
├── jit_arm64.go
├── list.go
├── match.go
├── mysql.go
├── network.go
├── optimizer.go
├── packrat.go
├── parser.go
├── printer.go
├── prompt.go
├── scheduler.go
├── scm.go
├── streams.go
├── strings.go
├── sync.go
├── trace.go
└── vector.go
├── storage
├── analyzer.go
├── cache.go
├── compute.go
├── csv.go
├── database.go
├── index.go
├── json.go
├── limits.go
├── overlay-blob.go
├── partition.go
├── persistence-files.go
├── persistence.go
├── scan.go
├── scan_helper.go
├── scan_order.go
├── settings.go
├── shard.go
├── shared_resource.go
├── storage-float.go
├── storage-int.go
├── storage-prefix.go
├── storage-scmer.go
├── storage-seq.go
├── storage-sparse.go
├── storage-string.go
├── storage.go
└── table.go
├── test_memcp_api.py
├── tests
├── 01_basic_sql.yaml
├── 02_functions.yaml
├── 03_ddl_operations.yaml
├── 04_table_operations.yaml
├── 05_advanced_queries.yaml
├── 06_edge_cases.yaml
├── 07_error_cases.yaml
├── 08_rdf_sparql.yaml
├── 09_joins.yaml
├── 10_nulls.yaml
├── 11_group_having.yaml
├── 12_joins_outer.yaml
├── 13_subselects.yaml
├── 14_order_limit.yaml
├── 15_dml.yaml
├── 16_group_by_sum.yaml
├── 17_strings_like.yaml
├── 18_unnesting.yaml
├── 19_subselect_order.yaml
├── 20_default_values.yaml
├── 21_grant_revoke.yaml
├── 22_delete_qualified.yaml
├── 23_policy_enforcement.yaml
├── 24_mysql_basic_compat.yaml
├── 25_schema_qualified_insert.yaml
├── 26_mysql_datetime_defaults.yaml
├── 27_mysql_keys_indexes.yaml
├── 28_mysql_fk_acceptance.yaml
├── 29_mysql_upsert.yaml
├── 30_trailing_semicolon.yaml
├── 31_length_function.yaml
├── 32_expr_subselects.yaml
├── 33_collations_order.yaml
├── 34_collation_columns.yaml
└── 35_memory_engine.yaml
└── tools
├── lint_scm.py
└── mysqldump-to-json.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.sw*
2 | *.o
3 | tools/Makefile
4 | memcp-logo.png
5 | test.jsonl
6 | memcp
7 | .memcp-history.tmp
8 | data
9 | memcp.sif
10 | trace*.json
11 | *.diff
12 | go.sum
13 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 0.1.4
2 | =====
3 |
4 | - PASSWORD(str) for password hashes
5 |
6 | 0.1.3
7 | =====
8 |
9 | - Parsec parsers
10 | - implement SELECT, UPDATE, DELETE with WHERE
11 |
12 | 0.1.2
13 | =====
14 |
15 | - added Dockerfile
16 | - added function help
17 | - storage function: scan_order
18 |
19 | 0.1.1
20 | =====
21 |
22 | - IO functions: password
23 | - user table for mysql auth
24 | - mysql and REST API check for username/password
25 |
26 | 0.1.0
27 | =====
28 |
29 | - basic scheme functions: quote, eval, if, and, or, match, define/set, lambda, begin, error, symbol, list
30 | - arithmetic scheme functions: +, -, *, /, <=, <, >=, >, equal?, !/not
31 | - scheme string functions: simplify, strlen, concat, toLower, toUpper, split
32 | - scheme list functions: append, cons, car, cdr, merge, has?, filter, map, reduce
33 | - scheme dictionary functions: filter_assoc, map_assoc, reduce_assoc, set_assoc, has_assoc?, merge_assoc
34 | - IO functions: print, import, load, serve, mysql
35 | - storage functions: scan, createdatabase, dropdatabase, createtable, droptable, insert, stat, rebuild, loadCSV, loadJSON
36 | - storage types: SCMER, int, sequence, string, dictionary, float
37 | - SQL: support for SELECT * FROM, CREATE DATABASE, CREATE TABLE, SHOW DATABASES, SHOW TABLES, INSERT INTO
38 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1
2 |
3 | # Build stage
4 | FROM golang:1.22-alpine AS builder
5 |
6 | WORKDIR /build
7 |
8 | # Install git for go modules that might need it
9 | RUN apk add --no-cache git
10 |
11 | # Copy go mod files first for better caching
12 | COPY go.mod go.sum ./
13 | RUN go mod download
14 |
15 | # Copy source code
16 | COPY . .
17 |
18 | # Build the application
19 | RUN CGO_ENABLED=0 GOOS=linux go get
20 | RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o memcp .
21 |
22 | # Runtime stage
23 | FROM alpine:latest
24 |
25 | # Install ca-certificates for HTTPS requests
26 | RUN apk --no-cache add ca-certificates
27 |
28 | WORKDIR /app
29 |
30 | # Copy the binary from builder stage
31 | COPY --from=builder /build/memcp .
32 | # Copy Scheme library (runtime scripts)
33 | COPY --from=builder /build/lib ./lib
34 |
35 | # Create data directory
36 | RUN mkdir -p /data
37 |
38 | # Set up volumes and expose ports
39 | VOLUME /data
40 | EXPOSE 4332
41 | EXPOSE 3307
42 |
43 | # Set environment variables (overridable via docker-compose)
44 | # ROOT_PASSWORD is only considered in the first run
45 | ENV PARAMS=
46 | ENV ROOT_PASSWORD=admin
47 | ENV APP=lib/main.scm
48 |
49 | # Run the application (load default Scheme entrypoint)
50 | # If ROOT_PASSWORD is set, pass it as --root-password; otherwise rely on default in lib/sql.scm
51 | CMD ./memcp -data /data --root-password="$ROOT_PASSWORD" $PARAMS $APP
52 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | go build
3 |
4 | run:
5 | ./memcp
6 |
7 | perf:
8 | perf record --call-graph fp -- ./memcp
9 |
10 | test:
11 | # run `cp git-pre-commit .git/hooks/pre-commit` to activate the trigger
12 | ./git-pre-commit
13 |
14 | memcp.sif:
15 | sudo singularity build memcp.sif memcp.singularity.recipe
16 |
17 | docs:
18 | ./memcp -write-docu docs
19 |
20 | docker-release:
21 | sudo docker build -t carli2/memcp:latest .
22 | sudo docker push carli2/memcp:latest
23 |
24 | .PHONY: memcp.sif docs
25 |
--------------------------------------------------------------------------------
/apps/keyvalue.scm:
--------------------------------------------------------------------------------
1 | /* microservice democase: a simple key value store with prepared statements */
2 | (import "../lib/sql-parser.scm")
3 | (import "../lib/queryplan.scm")
4 |
5 | /* usage:
6 |
7 | set a key-value pair:
8 | curl -d "my_value" http://localhost:1266/my_key
9 |
10 | retrieve a key-value pair:
11 | http://localhost:1266/my_key
12 |
13 | */
14 |
15 | /* initialize database and prepare sql statements */
16 | (createdatabase "keyvalue" true)
17 | (eval (parse_sql "keyvalue" "CREATE TABLE IF NOT EXISTS kv(key TEXT, value TEXT, UNIQUE KEY PRIMARY(key))"))
18 |
19 | (set item_get (parse_sql "keyvalue" "SELECT value FROM kv WHERE key = @key"))
20 | (set item_set (parse_sql "keyvalue" "INSERT INTO kv(key, value) VALUES (@key, @value) ON DUPLICATE KEY UPDATE value = @value"))
21 | /*(set item_list (parse_sql "keyvalue" "SELECT key, value FROM kv"))*/
22 |
23 |
24 | (define http_handler (begin
25 | (lambda (req res) (begin
26 | (set session (newsession))
27 | (session "key" (req "path"))
28 | (if (equal? (req "method") "GET") (begin
29 | /* GET = load */
30 | (set resultrow (lambda (resultset) ((res "print") (resultset "value"))))
31 | (eval item_get)
32 | ) (begin
33 | /* PUT / POST: store */
34 | (session "value" ((req "body")))
35 | (eval item_set)
36 | ((res "print") "ok")
37 | ))
38 | ))
39 | ))
40 |
41 | (set port 1266)
42 | (serve port (lambda (req res) (http_handler req res)))
43 |
--------------------------------------------------------------------------------
/apps/minigame.scm:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | (define minigame_static '(
19 | "" '(200 "text/html" "Have fun, play with it")
20 | "game.js" '(200 "text/javascript" "window.onload = function () {
21 | conn = new WebSocket('ws://' + document.location.host + '/minigame/ws');
22 | conn.onopen = function () {
23 | conn.send('hi from client');
24 | }
25 | conn.onmessage = function (msg) {
26 | console.log(msg);
27 | alert(msg.data);
28 | }
29 | }")
30 | '(404 "text/plain" "404 not found")
31 | ))
32 |
33 | (define http_handler (begin
34 | (set old_handler http_handler)
35 | (lambda (req res) (begin
36 | /* hooked our additional paths to it */
37 | (match (req "path")
38 | (regex "^/minigame/(.*)$" url rest) (begin
39 | (if (equal? rest "ws") (begin
40 | (set msg ((res "websocket") (lambda (msg) (print "message: " msg))))
41 | (msg 1 "Hello World from server")
42 | ) (match (minigame_static rest) '(status type content) (begin
43 | ((res "header") "Content-Type" type)
44 | ((res "status") status)
45 | ((res "print") content)
46 | )))
47 | )
48 | /* default */
49 | (old_handler req res))
50 | ))
51 | ))
52 |
--------------------------------------------------------------------------------
/assets/gauge-needle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/gauge-needle.png
--------------------------------------------------------------------------------
/assets/gauge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/gauge.png
--------------------------------------------------------------------------------
/assets/htop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/htop.png
--------------------------------------------------------------------------------
/assets/info.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | MemCP performance
7 |
44 |
45 |
46 |
47 | Traditional Database
48 |
49 |
53 |
57 |
58 |
59 |
Performance
60 |
61 |
62 |
63 | - Storage on Hard Disk
64 | - Single-Core Performance
65 | - No Compression
66 | - High Demand on RAM and HDD bandwidth
67 | - IO Bottleneck on TCP Connections
68 |
69 |
70 |
71 | In-Memory Database
72 |
73 |
77 |
81 |
82 |
83 |
Performance
84 |
85 |
86 |
87 | - Storage in RAM, Backup on HDD
88 | - Multi-Core Performance x12
89 | - In-Memory Compression Factor 5:1
90 | - Optimal Usage of HDD und RAM bandwidth
91 | - REST APIs, AI etc. directly in the address space of the database
92 | - Ideal for Statistics
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/assets/shot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/shot1.png
--------------------------------------------------------------------------------
/assets/shot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/assets/shot2.png
--------------------------------------------------------------------------------
/docs/arithmetic--logic.md:
--------------------------------------------------------------------------------
1 | # Arithmetic / Logic
2 |
3 | ## int?
4 |
5 | tells if the value is a integer
6 |
7 | **Allowed number of parameters:** 1–1
8 |
9 | ### Parameters
10 |
11 | - **value** (`any`): value
12 |
13 | ### Returns
14 |
15 | `bool`
16 |
17 | ## number?
18 |
19 | tells if the value is a number
20 |
21 | **Allowed number of parameters:** 1–1
22 |
23 | ### Parameters
24 |
25 | - **value** (`any`): value
26 |
27 | ### Returns
28 |
29 | `bool`
30 |
31 | ## +
32 |
33 | adds two or more numbers
34 |
35 | **Allowed number of parameters:** 2–1000
36 |
37 | ### Parameters
38 |
39 | - **value...** (`number`): values to add
40 |
41 | ### Returns
42 |
43 | `number`
44 |
45 | ## -
46 |
47 | subtracts two or more numbers from the first one
48 |
49 | **Allowed number of parameters:** 2–1000
50 |
51 | ### Parameters
52 |
53 | - **value...** (`number`): values
54 |
55 | ### Returns
56 |
57 | `number`
58 |
59 | ## *
60 |
61 | multiplies two or more numbers
62 |
63 | **Allowed number of parameters:** 2–1000
64 |
65 | ### Parameters
66 |
67 | - **value...** (`number`): values
68 |
69 | ### Returns
70 |
71 | `number`
72 |
73 | ## /
74 |
75 | divides two or more numbers from the first one
76 |
77 | **Allowed number of parameters:** 2–1000
78 |
79 | ### Parameters
80 |
81 | - **value...** (`number`): values
82 |
83 | ### Returns
84 |
85 | `number`
86 |
87 | ## <=
88 |
89 | compares two numbers or strings
90 |
91 | **Allowed number of parameters:** 2–2
92 |
93 | ### Parameters
94 |
95 | - **value...** (`any`): values
96 |
97 | ### Returns
98 |
99 | `bool`
100 |
101 | ## <
102 |
103 | compares two numbers or strings
104 |
105 | **Allowed number of parameters:** 2–2
106 |
107 | ### Parameters
108 |
109 | - **value...** (`any`): values
110 |
111 | ### Returns
112 |
113 | `bool`
114 |
115 | ## >
116 |
117 | compares two numbers or strings
118 |
119 | **Allowed number of parameters:** 2–2
120 |
121 | ### Parameters
122 |
123 | - **value...** (`any`): values
124 |
125 | ### Returns
126 |
127 | `bool`
128 |
129 | ## >=
130 |
131 | compares two numbers or strings
132 |
133 | **Allowed number of parameters:** 2–2
134 |
135 | ### Parameters
136 |
137 | - **value...** (`any`): values
138 |
139 | ### Returns
140 |
141 | `bool`
142 |
143 | ## equal?
144 |
145 | compares two values of the same type, (equal? nil nil) is true
146 |
147 | **Allowed number of parameters:** 2–2
148 |
149 | ### Parameters
150 |
151 | - **value...** (`any`): values
152 |
153 | ### Returns
154 |
155 | `bool`
156 |
157 | ## equal??
158 |
159 | performs a SQL compliant sloppy equality check on primitive values (number, int, string, bool. nil), strings are compared case insensitive, (equal? nil nil) is nil
160 |
161 | **Allowed number of parameters:** 2–2
162 |
163 | ### Parameters
164 |
165 | - **value...** (`any`): values
166 |
167 | ### Returns
168 |
169 | `bool`
170 |
171 | ## !
172 |
173 | negates the boolean value
174 |
175 | **Allowed number of parameters:** 1–1
176 |
177 | ### Parameters
178 |
179 | - **value** (`bool`): value
180 |
181 | ### Returns
182 |
183 | `bool`
184 |
185 | ## not
186 |
187 | negates the boolean value
188 |
189 | **Allowed number of parameters:** 1–1
190 |
191 | ### Parameters
192 |
193 | - **value** (`bool`): value
194 |
195 | ### Returns
196 |
197 | `bool`
198 |
199 | ## nil?
200 |
201 | returns true if value is nil
202 |
203 | **Allowed number of parameters:** 1–1
204 |
205 | ### Parameters
206 |
207 | - **value** (`any`): value
208 |
209 | ### Returns
210 |
211 | `bool`
212 |
213 | ## min
214 |
215 | returns the smallest value
216 |
217 | **Allowed number of parameters:** 1–1000
218 |
219 | ### Parameters
220 |
221 | - **value...** (`number|string`): value
222 |
223 | ### Returns
224 |
225 | `number|string`
226 |
227 | ## max
228 |
229 | returns the highest value
230 |
231 | **Allowed number of parameters:** 1–1000
232 |
233 | ### Parameters
234 |
235 | - **value...** (`number|string`): value
236 |
237 | ### Returns
238 |
239 | `number|string`
240 |
241 | ## floor
242 |
243 | rounds the number down
244 |
245 | **Allowed number of parameters:** 1–1
246 |
247 | ### Parameters
248 |
249 | - **value** (`number`): value
250 |
251 | ### Returns
252 |
253 | `number`
254 |
255 | ## ceil
256 |
257 | rounds the number up
258 |
259 | **Allowed number of parameters:** 1–1
260 |
261 | ### Parameters
262 |
263 | - **value** (`number`): value
264 |
265 | ### Returns
266 |
267 | `number`
268 |
269 | ## round
270 |
271 | rounds the number
272 |
273 | **Allowed number of parameters:** 1–1
274 |
275 | ### Parameters
276 |
277 | - **value** (`number`): value
278 |
279 | ### Returns
280 |
281 | `number`
282 |
283 |
--------------------------------------------------------------------------------
/docs/associative-lists--dictionaries.md:
--------------------------------------------------------------------------------
1 | # Associative Lists / Dictionaries
2 |
3 | ## filter_assoc
4 |
5 | returns a filtered dictionary according to a filter function
6 |
7 | **Allowed number of parameters:** 2–2
8 |
9 | ### Parameters
10 |
11 | - **dict** (`list`): dictionary that has to be filtered
12 | - **condition** (`func`): filter function func(string any)->bool where the first parameter is the key, the second is the value
13 |
14 | ### Returns
15 |
16 | `list`
17 |
18 | ## map_assoc
19 |
20 | returns a mapped dictionary according to a map function
21 | Keys will stay the same but values are mapped.
22 |
23 | **Allowed number of parameters:** 2–2
24 |
25 | ### Parameters
26 |
27 | - **dict** (`list`): dictionary that has to be mapped
28 | - **map** (`func`): map function func(string any)->any where the first parameter is the key, the second is the value. It must return the new value.
29 |
30 | ### Returns
31 |
32 | `list`
33 |
34 | ## reduce_assoc
35 |
36 | reduces a dictionary according to a reduce function
37 |
38 | **Allowed number of parameters:** 3–3
39 |
40 | ### Parameters
41 |
42 | - **dict** (`list`): dictionary that has to be reduced
43 | - **reduce** (`func`): reduce function func(any string any)->any where the first parameter is the accumulator, second is key, third is value. It must return the new accumulator.
44 | - **neutral** (`any`): initial value for the accumulator
45 |
46 | ### Returns
47 |
48 | `any`
49 |
50 | ## has_assoc?
51 |
52 | checks if a dictionary has a key present
53 |
54 | **Allowed number of parameters:** 2–2
55 |
56 | ### Parameters
57 |
58 | - **dict** (`list`): dictionary that has to be checked
59 | - **key** (`string`): key to test
60 |
61 | ### Returns
62 |
63 | `bool`
64 |
65 | ## extract_assoc
66 |
67 | applies a function (key value) on the dictionary and returns the results as a flat list
68 |
69 | **Allowed number of parameters:** 2–2
70 |
71 | ### Parameters
72 |
73 | - **dict** (`list`): dictionary that has to be checked
74 | - **map** (`func`): func(string any)->any that flattens down each element
75 |
76 | ### Returns
77 |
78 | `list`
79 |
80 | ## set_assoc
81 |
82 | returns a dictionary where a single value has been changed.
83 | This function may destroy the input value for the sake of performance. You must not use the input value again.
84 |
85 | **Allowed number of parameters:** 3–4
86 |
87 | ### Parameters
88 |
89 | - **dict** (`list`): input dictionary that has to be changed. You must not use this value again.
90 | - **key** (`string`): key that has to be set
91 | - **value** (`any`): new value to set
92 | - **merge** (`func`): (optional) func(any any)->any that is called when a value is overwritten. The first parameter is the old value, the second is the new value. It must return the merged value that shall be pysically stored in the new dictionary.
93 |
94 | ### Returns
95 |
96 | `list`
97 |
98 | ## merge_assoc
99 |
100 | returns a dictionary where all keys from dict1 and all keys from dict2 are present.
101 | If a key is present in both inputs, the second one will be dominant so the first value will be overwritten unless you provide a merge function
102 |
103 | **Allowed number of parameters:** 2–3
104 |
105 | ### Parameters
106 |
107 | - **dict1** (`list`): first input dictionary that has to be changed. You must not use this value again.
108 | - **dict2** (`list`): input dictionary that contains the new values that have to be added
109 | - **merge** (`func`): (optional) func(any any)->any that is called when a value is overwritten. The first parameter is the old value, the second is the new value from dict2. It must return the merged value that shall be pysically stored in the new dictionary.
110 |
111 | ### Returns
112 |
113 | `list`
114 |
115 |
--------------------------------------------------------------------------------
/docs/date.md:
--------------------------------------------------------------------------------
1 | # Date
2 |
3 | ## now
4 |
5 | returns the unix timestamp
6 |
7 | **Allowed number of parameters:** 0–0
8 |
9 | ### Parameters
10 |
11 | _This function has no parameters._
12 |
13 | ### Returns
14 |
15 | `int`
16 |
17 | ## parse_date
18 |
19 | parses unix date from a string
20 |
21 | **Allowed number of parameters:** 1–1
22 |
23 | ### Parameters
24 |
25 | - **value** (`string`): values to parse
26 |
27 | ### Returns
28 |
29 | `int`
30 |
31 |
--------------------------------------------------------------------------------
/docs/docu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/launix-de/memcp/64671ed626acd595cb91a4149925e617254cd5d3/docs/docu.zip
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # Documentation
2 |
3 | - [SCM Builtins](scm-builtins.md)
4 | - [Arithmetic / Logic](arithmetic--logic.md)
5 | - [Strings](strings.md)
6 | - [Streams](streams.md)
7 | - [Lists](lists.md)
8 | - [Associative Lists / Dictionaries](associative-lists--dictionaries.md)
9 | - [Date](date.md)
10 | - [Vectors](vectors.md)
11 | - [Parsers](parsers.md)
12 | - [Sync](sync.md)
13 | - [IO](io.md)
14 | - [Storage](storage.md)
15 |
--------------------------------------------------------------------------------
/docs/io.md:
--------------------------------------------------------------------------------
1 | # IO
2 |
3 | ## print
4 |
5 | Prints values to stdout (only in IO environment)
6 |
7 | **Allowed number of parameters:** 1–1000
8 |
9 | ### Parameters
10 |
11 | - **value...** (`any`): values to print
12 |
13 | ### Returns
14 |
15 | `bool`
16 |
17 | ## env
18 |
19 | returns the content of a environment variable
20 |
21 | **Allowed number of parameters:** 1–2
22 |
23 | ### Parameters
24 |
25 | - **var** (`string`): envvar
26 | - **default** (`string`): default if the env is not found
27 |
28 | ### Returns
29 |
30 | `string`
31 |
32 | ## help
33 |
34 | Lists all functions or print help for a specific function
35 |
36 | **Allowed number of parameters:** 0–1
37 |
38 | ### Parameters
39 |
40 | - **topic** (`string`): function to print help about
41 |
42 | ### Returns
43 |
44 | `nil`
45 |
46 | ## import
47 |
48 | Imports a file .scm file into current namespace
49 |
50 | **Allowed number of parameters:** 1–1
51 |
52 | ### Parameters
53 |
54 | - **filename** (`string`): filename relative to folder of source file
55 |
56 | ### Returns
57 |
58 | `any`
59 |
60 | ## load
61 |
62 | Loads a file or stream and returns the string or iterates line-wise
63 |
64 | **Allowed number of parameters:** 1–3
65 |
66 | ### Parameters
67 |
68 | - **filenameOrStream** (`string|stream`): filename relative to folder of source file or stream to read from
69 | - **linehandler** (`func`): handler that reads each line; each line may end with delimiter
70 | - **delimiter** (`string`): delimiter to extract; if no delimiter is given, the file is read as whole and returned or passed to linehandler
71 |
72 | ### Returns
73 |
74 | `string|bool`
75 |
76 | ## stream
77 |
78 | Opens a file readonly as stream
79 |
80 | **Allowed number of parameters:** 1–1
81 |
82 | ### Parameters
83 |
84 | - **filename** (`string`): filename relative to folder of source file
85 |
86 | ### Returns
87 |
88 | `stream`
89 |
90 | ## watch
91 |
92 | Loads a file and calls the callback. Whenever the file changes on disk, the file is load again.
93 |
94 | **Allowed number of parameters:** 2–2
95 |
96 | ### Parameters
97 |
98 | - **filename** (`string`): filename relative to folder of source file
99 | - **updatehandler** (`func`): handler that receives the file content func(content)
100 |
101 | ### Returns
102 |
103 | `bool`
104 |
105 | ## serve
106 |
107 | Opens a HTTP server at a given port
108 |
109 | **Allowed number of parameters:** 2–2
110 |
111 | ### Parameters
112 |
113 | - **port** (`number`): port number for HTTP server
114 | - **handler** (`func`): handler: lambda(req res) that handles the http request (TODO: detailed documentation)
115 |
116 | ### Returns
117 |
118 | `bool`
119 |
120 | ## serveStatic
121 |
122 | creates a static handler for use as a callback in (serve) - returns a handler lambda(req res)
123 |
124 | **Allowed number of parameters:** 1–1
125 |
126 | ### Parameters
127 |
128 | - **directory** (`string`): folder with the files to serve
129 |
130 | ### Returns
131 |
132 | `func`
133 |
134 | ## mysql
135 |
136 | Imports a file .scm file into current namespace
137 |
138 | **Allowed number of parameters:** 4–4
139 |
140 | ### Parameters
141 |
142 | - **port** (`number`): port number for MySQL server
143 | - **getPassword** (`func`): lambda(username string) string|nil has to return the password for a user or nil to deny login
144 | - **schemacallback** (`func`): lambda(username schema) bool handler check whether user is allowed to schem (string) - you should check access rights here
145 | - **handler** (`func`): lambda(schema sql resultrow session) handler to process sql query (string) in schema (string). resultrow is a lambda(list)
146 |
147 | ### Returns
148 |
149 | `bool`
150 |
151 | ## password
152 |
153 | Hashes a password with sha1 (for mysql user authentication)
154 |
155 | **Allowed number of parameters:** 1–1
156 |
157 | ### Parameters
158 |
159 | - **password** (`string`): plain text password to hash
160 |
161 | ### Returns
162 |
163 | `string`
164 |
165 | ## args
166 |
167 | Returns command line arguments
168 |
169 | **Allowed number of parameters:** 0–0
170 |
171 | ### Parameters
172 |
173 | _This function has no parameters._
174 |
175 | ### Returns
176 |
177 | `list`
178 |
179 | ## arg
180 |
181 | Gets a command line argument value
182 |
183 | **Allowed number of parameters:** 2–3
184 |
185 | ### Parameters
186 |
187 | - **longname** (`string`): long argument name (without --)
188 | - **shortname** (`string`): short argument name (without -) or default value if only 2 args
189 | - **default** (`any`): default value if argument not found
190 |
191 | ### Returns
192 |
193 | `any`
194 |
195 |
--------------------------------------------------------------------------------
/docs/lists.md:
--------------------------------------------------------------------------------
1 | # Lists
2 |
3 | ## count
4 |
5 | counts the number of elements in the list
6 |
7 | **Allowed number of parameters:** 1–1
8 |
9 | ### Parameters
10 |
11 | - **list** (`list`): base list
12 |
13 | ### Returns
14 |
15 | `int`
16 |
17 | ## nth
18 |
19 | get the nth item of a list
20 |
21 | **Allowed number of parameters:** 2–2
22 |
23 | ### Parameters
24 |
25 | - **list** (`list`): base list
26 | - **index** (`number`): index beginning from 0
27 |
28 | ### Returns
29 |
30 | `any`
31 |
32 | ## append
33 |
34 | appends items to a list and return the extended list.
35 | The original list stays unharmed.
36 |
37 | **Allowed number of parameters:** 2–1000
38 |
39 | ### Parameters
40 |
41 | - **list** (`list`): base list
42 | - **item...** (`any`): items to add
43 |
44 | ### Returns
45 |
46 | `list`
47 |
48 | ## append_unique
49 |
50 | appends items to a list but only if they are new.
51 | The original list stays unharmed.
52 |
53 | **Allowed number of parameters:** 2–1000
54 |
55 | ### Parameters
56 |
57 | - **list** (`list`): base list
58 | - **item...** (`any`): items to add
59 |
60 | ### Returns
61 |
62 | `list`
63 |
64 | ## cons
65 |
66 | constructs a list from a head and a tail list
67 |
68 | **Allowed number of parameters:** 2–2
69 |
70 | ### Parameters
71 |
72 | - **car** (`any`): new head element
73 | - **cdr** (`list`): tail that is appended after car
74 |
75 | ### Returns
76 |
77 | `list`
78 |
79 | ## car
80 |
81 | extracts the head of a list
82 |
83 | **Allowed number of parameters:** 1–1
84 |
85 | ### Parameters
86 |
87 | - **list** (`list`): list
88 |
89 | ### Returns
90 |
91 | `any`
92 |
93 | ## cdr
94 |
95 | extracts the tail of a list
96 | The tail of a list is a list with all items except the head.
97 |
98 | **Allowed number of parameters:** 1–1
99 |
100 | ### Parameters
101 |
102 | - **list** (`list`): list
103 |
104 | ### Returns
105 |
106 | `any`
107 |
108 | ## zip
109 |
110 | swaps the dimension of a list of lists. If one parameter is given, it is a list of lists that is flattened. If multiple parameters are given, they are treated as the components that will be zipped into the sub list
111 |
112 | **Allowed number of parameters:** 1–1000
113 |
114 | ### Parameters
115 |
116 | - **list** (`list`): list of lists of items
117 |
118 | ### Returns
119 |
120 | `list`
121 |
122 | ## merge
123 |
124 | flattens a list of lists into a list containing all the subitems. If one parameter is given, it is a list of lists that is flattened. If multiple parameters are given, they are treated as lists that will be merged into one
125 |
126 | **Allowed number of parameters:** 1–1000
127 |
128 | ### Parameters
129 |
130 | - **list** (`list`): list of lists of items
131 |
132 | ### Returns
133 |
134 | `list`
135 |
136 | ## merge_unique
137 |
138 | flattens a list of lists into a list containing all the subitems. Duplicates are filtered out.
139 |
140 | **Allowed number of parameters:** 1–1000
141 |
142 | ### Parameters
143 |
144 | - **list** (`list`): list of lists of items
145 |
146 | ### Returns
147 |
148 | `list`
149 |
150 | ## has?
151 |
152 | checks if a list has a certain item (equal?)
153 |
154 | **Allowed number of parameters:** 2–2
155 |
156 | ### Parameters
157 |
158 | - **haystack** (`list`): list to search in
159 | - **needle** (`any`): item to search for
160 |
161 | ### Returns
162 |
163 | `bool`
164 |
165 | ## filter
166 |
167 | returns a list that only contains elements that pass the filter function
168 |
169 | **Allowed number of parameters:** 2–2
170 |
171 | ### Parameters
172 |
173 | - **list** (`list`): list that has to be filtered
174 | - **condition** (`func`): filter condition func(any)->bool
175 |
176 | ### Returns
177 |
178 | `list`
179 |
180 | ## map
181 |
182 | returns a list that contains the results of a map function that is applied to the list
183 |
184 | **Allowed number of parameters:** 2–2
185 |
186 | ### Parameters
187 |
188 | - **list** (`list`): list that has to be mapped
189 | - **map** (`func`): map function func(any)->any that is applied to each item
190 |
191 | ### Returns
192 |
193 | `list`
194 |
195 | ## mapIndex
196 |
197 | returns a list that contains the results of a map function that is applied to the list
198 |
199 | **Allowed number of parameters:** 2–2
200 |
201 | ### Parameters
202 |
203 | - **list** (`list`): list that has to be mapped
204 | - **map** (`func`): map function func(i, any)->any that is applied to each item
205 |
206 | ### Returns
207 |
208 | `list`
209 |
210 | ## reduce
211 |
212 | returns a list that contains the result of a map function
213 |
214 | **Allowed number of parameters:** 2–3
215 |
216 | ### Parameters
217 |
218 | - **list** (`list`): list that has to be reduced
219 | - **reduce** (`func`): reduce function func(any any)->any where the first parameter is the accumulator, the second is a list item
220 | - **neutral** (`any`): (optional) initial value of the accumulator, defaults to nil
221 |
222 | ### Returns
223 |
224 | `any`
225 |
226 | ## produce
227 |
228 | returns a list that contains produced items - it works like for(state = startstate, condition(state), state = iterator(state)) {yield state}
229 |
230 | **Allowed number of parameters:** 3–3
231 |
232 | ### Parameters
233 |
234 | - **startstate** (`any`): start state to begin with
235 | - **condition** (`func`): func that returns true whether the state will be inserted into the result or the loop is stopped
236 | - **iterator** (`func`): func that produces the next state
237 |
238 | ### Returns
239 |
240 | `list`
241 |
242 | ## produceN
243 |
244 | returns a list with numbers from 0..n-1
245 |
246 | **Allowed number of parameters:** 1–1
247 |
248 | ### Parameters
249 |
250 | - **n** (`number`): number of elements to produce
251 |
252 | ### Returns
253 |
254 | `list`
255 |
256 | ## list?
257 |
258 | checks if a value is a list
259 |
260 | **Allowed number of parameters:** 1–1
261 |
262 | ### Parameters
263 |
264 | - **value** (`any`): value to check
265 |
266 | ### Returns
267 |
268 | `bool`
269 |
270 | ## contains?
271 |
272 | checks if a value is in a list; uses the equal?? operator
273 |
274 | **Allowed number of parameters:** 2–2
275 |
276 | ### Parameters
277 |
278 | - **list** (`list`): list to check
279 | - **value** (`any`): value to check
280 |
281 | ### Returns
282 |
283 | `bool`
284 |
285 |
--------------------------------------------------------------------------------
/docs/parsers.md:
--------------------------------------------------------------------------------
1 | # Parsers
2 |
3 | ## parser
4 |
5 | creates a parser
6 |
7 | Scm parsers work this way:
8 | (parser syntax scmerresult) -> func
9 |
10 | syntax can be one of:
11 | (parser syntax scmerresult) will execute scmerresult after parsing syntax
12 | (parser syntax scmerresult "skipper") will add a different whitespace skipper regex to the root parser
13 | (define var syntax) valid inside (parser...), stores the result of syntax into var for use in scmerresult
14 | "str" AtomParser
15 | (atom "str" caseinsensitive skipws) AtomParser
16 | (regex "asdf" caseinsensitive skipws) RegexParser
17 | '(a b c) AndParser
18 | (or a b c) OrParser
19 | (* sub separator) KleeneParser
20 | (+ sub separator) ManyParser
21 | (? xyz) MaybeParser (if >1 AndParser)
22 | (not mainparser parser1 parser2 parser3 ...) a parser that matches mainparser but not parser1...
23 | $ EndParser
24 | empty EmptyParser
25 | symbol -> use other parser defined in env
26 |
27 | for further details on packrat parsers, take a look at https://github.com/launix-de/go-packrat
28 |
29 |
30 | **Allowed number of parameters:** 1–3
31 |
32 | ### Parameters
33 |
34 | - **syntax** (`any`): syntax of the grammar (see docs)
35 | - **generator** (`any`): (optional) expressions to evaluate. All captured variables are available in the scope.
36 | - **skipper** (`string`): (optional) string that defines the skip mechanism for whitespaces as regexp
37 |
38 | ### Returns
39 |
40 | `func`
41 |
42 |
--------------------------------------------------------------------------------
/docs/streams.md:
--------------------------------------------------------------------------------
1 | # Streams
2 |
3 | ## streamString
4 |
5 | creates a stream that contains a string
6 |
7 | **Allowed number of parameters:** 1–1
8 |
9 | ### Parameters
10 |
11 | - **content** (`string`): content to put into the stream
12 |
13 | ### Returns
14 |
15 | `stream`
16 |
17 | ## gzip
18 |
19 | compresses a stream with gzip. Create streams with (stream filename)
20 |
21 | **Allowed number of parameters:** 1–1
22 |
23 | ### Parameters
24 |
25 | - **stream** (`stream`): input stream
26 |
27 | ### Returns
28 |
29 | `stream`
30 |
31 | ## xz
32 |
33 | compresses a stream with xz. Create streams with (stream filename)
34 |
35 | **Allowed number of parameters:** 1–1
36 |
37 | ### Parameters
38 |
39 | - **stream** (`stream`): input stream
40 |
41 | ### Returns
42 |
43 | `stream`
44 |
45 | ## zcat
46 |
47 | turns a compressed gzip stream into a stream of uncompressed data. Create streams with (stream filename)
48 |
49 | **Allowed number of parameters:** 1–1
50 |
51 | ### Parameters
52 |
53 | - **stream** (`stream`): input stream
54 |
55 | ### Returns
56 |
57 | `stream`
58 |
59 | ## xzcat
60 |
61 | turns a compressed xz stream into a stream of uncompressed data. Create streams with (stream filename)
62 |
63 | **Allowed number of parameters:** 1–1
64 |
65 | ### Parameters
66 |
67 | - **stream** (`stream`): input stream
68 |
69 | ### Returns
70 |
71 | `stream`
72 |
73 |
--------------------------------------------------------------------------------
/docs/strings.md:
--------------------------------------------------------------------------------
1 | # Strings
2 |
3 | ## string?
4 |
5 | tells if the value is a string
6 |
7 | **Allowed number of parameters:** 1–1
8 |
9 | ### Parameters
10 |
11 | - **value** (`any`): value
12 |
13 | ### Returns
14 |
15 | `bool`
16 |
17 | ## concat
18 |
19 | concatenates stringable values and returns a string
20 |
21 | **Allowed number of parameters:** 1–1000
22 |
23 | ### Parameters
24 |
25 | - **value...** (`any`): values to concat
26 |
27 | ### Returns
28 |
29 | `string`
30 |
31 | ## substr
32 |
33 | returns a substring
34 |
35 | **Allowed number of parameters:** 2–3
36 |
37 | ### Parameters
38 |
39 | - **value** (`string`): string to cut
40 | - **start** (`number`): first character index
41 | - **len** (`number`): optional length
42 |
43 | ### Returns
44 |
45 | `string`
46 |
47 | ## simplify
48 |
49 | turns a stringable input value in the easiest-most value (e.g. turn strings into numbers if they are numeric
50 |
51 | **Allowed number of parameters:** 1–1
52 |
53 | ### Parameters
54 |
55 | - **value** (`any`): value to simplify
56 |
57 | ### Returns
58 |
59 | `any`
60 |
61 | ## strlen
62 |
63 | returns the length of a string
64 |
65 | **Allowed number of parameters:** 1–1
66 |
67 | ### Parameters
68 |
69 | - **value** (`string`): input string
70 |
71 | ### Returns
72 |
73 | `int`
74 |
75 | ## strlike
76 |
77 | matches the string against a wildcard pattern (SQL compliant)
78 |
79 | **Allowed number of parameters:** 2–3
80 |
81 | ### Parameters
82 |
83 | - **value** (`string`): input string
84 | - **pattern** (`string`): pattern with % and _ in them
85 | - **collation** (`string`): collation in which to compare them
86 |
87 | ### Returns
88 |
89 | `bool`
90 |
91 | ## toLower
92 |
93 | turns a string into lower case
94 |
95 | **Allowed number of parameters:** 1–1
96 |
97 | ### Parameters
98 |
99 | - **value** (`string`): input string
100 |
101 | ### Returns
102 |
103 | `string`
104 |
105 | ## toUpper
106 |
107 | turns a string into upper case
108 |
109 | **Allowed number of parameters:** 1–1
110 |
111 | ### Parameters
112 |
113 | - **value** (`string`): input string
114 |
115 | ### Returns
116 |
117 | `string`
118 |
119 | ## replace
120 |
121 | replaces all occurances in a string with another string
122 |
123 | **Allowed number of parameters:** 3–3
124 |
125 | ### Parameters
126 |
127 | - **s** (`string`): input string
128 | - **find** (`string`): search string
129 | - **replace** (`string`): replace string
130 |
131 | ### Returns
132 |
133 | `string`
134 |
135 | ## split
136 |
137 | splits a string using a separator or space
138 |
139 | **Allowed number of parameters:** 1–2
140 |
141 | ### Parameters
142 |
143 | - **value** (`string`): input string
144 | - **separator** (`string`): (optional) parameter, defaults to " "
145 |
146 | ### Returns
147 |
148 | `list`
149 |
150 | ## collate
151 |
152 | returns the `<` operator for a given collation. MemCP allows natural sorting of numeric literals.
153 |
154 | **Allowed number of parameters:** 1–1
155 |
156 | ### Parameters
157 |
158 | - **collation** (`string`): collation string of the form LANG or LANG_cs or LANG_ci where LANG is a BCP 47 code, for compatibility to MySQL, a CHARSET_ prefix is allowed and ignored as well as the aliases bin, danish, general, german1, german2, spanish and swedish are allowed for language codes
159 | - **reverse** (`bool`): whether to reverse the order like in ORDER BY DESC
160 |
161 | ### Returns
162 |
163 | `func`
164 |
165 | ## htmlentities
166 |
167 | escapes the string for use in HTML
168 |
169 | **Allowed number of parameters:** 1–1
170 |
171 | ### Parameters
172 |
173 | - **value** (`string`): input string
174 |
175 | ### Returns
176 |
177 | `string`
178 |
179 | ## urlencode
180 |
181 | encodes a string according to URI coding schema
182 |
183 | **Allowed number of parameters:** 1–1
184 |
185 | ### Parameters
186 |
187 | - **value** (`string`): string to encode
188 |
189 | ### Returns
190 |
191 | `string`
192 |
193 | ## urldecode
194 |
195 | decodes a string according to URI coding schema
196 |
197 | **Allowed number of parameters:** 1–1
198 |
199 | ### Parameters
200 |
201 | - **value** (`string`): string to decode
202 |
203 | ### Returns
204 |
205 | `string`
206 |
207 | ## json_encode
208 |
209 | encodes a value in JSON, treats lists as lists
210 |
211 | **Allowed number of parameters:** 1–1
212 |
213 | ### Parameters
214 |
215 | - **value** (`any`): value to encode
216 |
217 | ### Returns
218 |
219 | `string`
220 |
221 | ## json_encode_assoc
222 |
223 | encodes a value in JSON, treats lists as associative arrays
224 |
225 | **Allowed number of parameters:** 1–1
226 |
227 | ### Parameters
228 |
229 | - **value** (`any`): value to encode
230 |
231 | ### Returns
232 |
233 | `string`
234 |
235 | ## json_decode
236 |
237 | parses JSON into a map
238 |
239 | **Allowed number of parameters:** 1–1
240 |
241 | ### Parameters
242 |
243 | - **value** (`string`): string to decode
244 |
245 | ### Returns
246 |
247 | `any`
248 |
249 | ## base64_encode
250 |
251 | encodes a string as Base64 (standard encoding)
252 |
253 | **Allowed number of parameters:** 1–1
254 |
255 | ### Parameters
256 |
257 | - **value** (`string`): binary string to encode
258 |
259 | ### Returns
260 |
261 | `string`
262 |
263 | ## base64_decode
264 |
265 | decodes a Base64 string (standard encoding)
266 |
267 | **Allowed number of parameters:** 1–1
268 |
269 | ### Parameters
270 |
271 | - **value** (`string`): base64-encoded string
272 |
273 | ### Returns
274 |
275 | `string`
276 |
277 | ## sql_unescape
278 |
279 | unescapes the inner part of a sql string
280 |
281 | **Allowed number of parameters:** 1–1
282 |
283 | ### Parameters
284 |
285 | - **value** (`string`): string to decode
286 |
287 | ### Returns
288 |
289 | `string`
290 |
291 | ## bin2hex
292 |
293 | turns binary data into hex with lowercase letters
294 |
295 | **Allowed number of parameters:** 1–1
296 |
297 | ### Parameters
298 |
299 | - **value** (`string`): string to decode
300 |
301 | ### Returns
302 |
303 | `string`
304 |
305 | ## hex2bin
306 |
307 | decodes a hex string into binary data
308 |
309 | **Allowed number of parameters:** 1–1
310 |
311 | ### Parameters
312 |
313 | - **value** (`string`): hex string (even length)
314 |
315 | ### Returns
316 |
317 | `string`
318 |
319 | ## randomBytes
320 |
321 | returns a string with numBytes cryptographically secure random bytes
322 |
323 | **Allowed number of parameters:** 1–1
324 |
325 | ### Parameters
326 |
327 | - **numBytes** (`number`): number of random bytes
328 |
329 | ### Returns
330 |
331 | `string`
332 |
333 |
--------------------------------------------------------------------------------
/docs/sync.md:
--------------------------------------------------------------------------------
1 | # Sync
2 |
3 | ## newsession
4 |
5 | Creates a new session which is a threadsafe key-value store represented as a function that can be either called as a getter (session key) or setter (session key value) or list all keys with (session)
6 |
7 | **Allowed number of parameters:** 0–0
8 |
9 | ### Parameters
10 |
11 | _This function has no parameters._
12 |
13 | ### Returns
14 |
15 | `func`
16 |
17 | ## context
18 |
19 | Context helper function. Each context also contains a session. (context func args) creates a new context and runs func in that context, (context "session") reads the session variable, (context "check") will check the liveliness of the context and otherwise throw an error
20 |
21 | **Allowed number of parameters:** 1–1000
22 |
23 | ### Parameters
24 |
25 | - **args...** (`any`): depends on the usage
26 |
27 | ### Returns
28 |
29 | `any`
30 |
31 | ## sleep
32 |
33 | sleeps the amount of seconds
34 |
35 | **Allowed number of parameters:** 1–1
36 |
37 | ### Parameters
38 |
39 | - **duration** (`number`): number of seconds to sleep
40 |
41 | ### Returns
42 |
43 | `bool`
44 |
45 | ## once
46 |
47 | Creates a function wrapper that you can call multiple times but only gets executed once. The result value is cached and returned on a second call. You can add parameters to that resulting function that will be passed to the first run of the wrapped function.
48 |
49 | **Allowed number of parameters:** 1–1
50 |
51 | ### Parameters
52 |
53 | - **f** (`func`): function that produces the result value
54 |
55 | ### Returns
56 |
57 | `func`
58 |
59 | ## mutex
60 |
61 | Creates a mutex. The return value is a function that takes one parameter which is a parameterless function. The mutex is guaranteed that all calls to that mutex get serialized.
62 |
63 | **Allowed number of parameters:** 1–1
64 |
65 | ### Parameters
66 |
67 | _This function has no parameters._
68 |
69 | ### Returns
70 |
71 | `func`
72 |
73 |
--------------------------------------------------------------------------------
/docs/vectors.md:
--------------------------------------------------------------------------------
1 | # Vectors
2 |
3 | ## dot
4 |
5 | produced the dot product
6 |
7 | **Allowed number of parameters:** 2–3
8 |
9 | ### Parameters
10 |
11 | - **v1** (`list`): vector1
12 | - **v2** (`list`): vector2
13 | - **mode** (`string`): DOT, COSINE, EUCLIDEAN, default is DOT
14 |
15 | ### Returns
16 |
17 | `number`
18 |
19 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/launix-de/memcp
2 |
3 | go 1.22.0
4 |
5 | require (
6 | github.com/chzyer/readline v1.5.1
7 | github.com/dc0d/onexit v1.1.0
8 | github.com/docker/go-units v0.5.0
9 | github.com/fsnotify/fsnotify v1.8.0
10 | github.com/google/btree v1.1.3
11 | github.com/google/uuid v1.6.0
12 | github.com/gorilla/websocket v1.5.3
13 | github.com/jtolds/gls v4.20.0+incompatible
14 | github.com/launix-de/NonLockingReadMap v1.0.8
15 | github.com/launix-de/go-mysqlstack v0.0.0-20241101205441-bc39b4e0fb04
16 | github.com/launix-de/go-packrat/v2 v2.1.11
17 | github.com/ulikunitz/xz v0.5.15
18 | golang.org/x/text v0.21.0
19 | )
20 |
21 | require (
22 | github.com/gopherjs/gopherjs v1.17.2 // indirect
23 | github.com/shopspring/decimal v1.4.0 // indirect
24 | golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect
25 | golang.org/x/sys v0.26.0 // indirect
26 | )
27 |
--------------------------------------------------------------------------------
/lib/main.scm:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | (print "")
19 | (print "Welcome to memcp")
20 | (print "")
21 | (import "test.scm")
22 |
23 | (set static_files (serveStatic "../assets"))
24 |
25 | /* this can be overhooked */
26 | (define http_handler (lambda (req res) (begin
27 | (print "request " req)
28 | (if (equal? (req "path") "/") (begin
29 | ((res "header") "Location" "/info.html")
30 | ((res "status") 301)
31 | ) (static_files req res))
32 | /*
33 | ((res "header") "Content-Type" "text/plain")
34 | ((res "status") 404)
35 | ((res "println") "404 not found")
36 | */
37 | )))
38 |
39 | (import "sql.scm")
40 | (import "rdf.scm")
41 |
42 | /* read ports from command line arguments or environment */
43 | (if (not (arg "disable-api" false)) (begin
44 | (set port (arg "api-port" (env "PORT" "4321")))
45 | (serve port (lambda (req res) (http_handler req res)))
46 | (print "listening on http://localhost:" port)
47 | ))
48 |
--------------------------------------------------------------------------------
/lib/rdf.scm:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | (import "rdf-parser.scm")
19 |
20 | /*
21 | this is how rdf works:
22 | - every database may have a table rdf(s text, p text, o text)
23 | - import formats are: xml, ttl
24 | */
25 |
26 | (define handler_404 (lambda (req res) (begin
27 | /*(print "request " req)*/
28 | ((res "header") "Content-Type" "text/plain")
29 | ((res "status") 404)
30 | ((res "println") "404 not found")
31 | )))
32 |
33 | /* http hook for handling SparQL */
34 | (define http_handler (begin
35 | (set old_handler (coalesce http_handler handler_404))
36 | (define handle_query (lambda (req res schema query) (begin
37 | /* check for password */
38 | (set pw (scan "system" "user" '("username") (lambda (username) (equal? username (req "username"))) '("password") (lambda (password) password) (lambda (a b) b) nil))
39 | (if (and pw (equal? pw (password (req "password")))) (time (begin
40 | ((res "header") "Content-Type" "text/plain")
41 | ((res "status") 200)
42 | /*(print "RDF query: " query)*/
43 | (define formula (parse_sparql schema query))
44 | (define resultrow (res "jsonl"))
45 |
46 | (eval formula)
47 | ) query) (begin
48 | ((res "header") "Content-Type" "text/plain")
49 | ((res "header") "WWW-Authenticate" "Basic realm=\"authorization required\"")
50 | ((res "status") 401)
51 | ((res "print") "Unauthorized")
52 | ))
53 | )))
54 | (define handle_ttl_load (lambda (req res schema ttl_data) (begin
55 | /* check for password */
56 | (set pw (scan "system" "user" '("username") (lambda (username) (equal? username (req "username"))) '("password") (lambda (password) password) (lambda (a b) b) nil))
57 | (if (and pw (equal? pw (password (req "password")))) (begin
58 | ((res "header") "Content-Type" "text/plain")
59 | ((res "status") 200)
60 | /*(print "Loading TTL data into: " schema)*/
61 | /* ensure rdf table exists */
62 | (eval (parse_sql schema "CREATE TABLE IF NOT EXISTS rdf (s TEXT, p TEXT, o TEXT)" (lambda (schema table write) true)))
63 | /* load the TTL data */
64 | (load_ttl schema ttl_data)
65 | ((res "println") "TTL data loaded successfully")
66 | ) (begin
67 | ((res "header") "Content-Type" "text/plain")
68 | ((res "header") "WWW-Authenticate" "Basic realm=\"authorization required\"")
69 | ((res "status") 401)
70 | ((res "print") "Unauthorized")
71 | ))
72 | )))
73 | old_handler old_handler /* workaround for optimizer bug */
74 | (lambda (req res) (begin
75 | /* hooked our additional paths to it */
76 | (match (req "path")
77 | (regex "^/rdf/([^/]+)$" url schema) (begin
78 | (set query ((req "body")))
79 | (handle_query req res schema query)
80 | )
81 | (regex "^/rdf/([^/]+)/(.*)$" url schema query_un) (begin
82 | (set query (urldecode query_un))
83 | (handle_query req res schema query)
84 | )
85 | (regex "^/rdf/([^/]+)/load_ttl$" url schema) (begin
86 | (set ttl_data ((req "body")))
87 | (handle_ttl_load req res schema ttl_data)
88 | )
89 | /* default */
90 | (!begin
91 | ((outer old_handler) req res))
92 | )
93 | ))
94 | ))
95 |
--------------------------------------------------------------------------------
/lib/sql-builtins.scm:
--------------------------------------------------------------------------------
1 | (define sql_builtins (coalesce sql_builtins (newsession)))
2 |
3 | /* all upper case */
4 | /*(sql_builtins "HELLO" (lambda () "Hello world"))*/
5 |
6 | /* time */
7 | (sql_builtins "UNIX_TIMESTAMP" now)
8 | (sql_builtins "UNIX_TIMESTAMP" parse_date)
9 | (sql_builtins "CURRENT_TIMESTAMP" now)
10 | (sql_builtins "NOW" now)
11 |
12 | /* math */
13 | (sql_builtins "FLOOR" floor)
14 | (sql_builtins "CEIL" ceil)
15 | (sql_builtins "CEILING" ceil)
16 | (sql_builtins "ROUND" round)
17 |
18 | /* strings */
19 | (sql_builtins "UPPER" toUpper)
20 | (sql_builtins "LOWER" toLower)
21 | (sql_builtins "PASSWORD" password)
22 | /* Base64 helpers */
23 | (sql_builtins "TO_BASE64" base64_encode)
24 | (sql_builtins "FROM_BASE64" base64_decode)
25 | /* SQL LENGTH(str): NULL-safe wrapper around strlen */
26 | (sql_builtins "LENGTH" (lambda (x) (if (nil? x) nil (strlen x))))
27 |
28 | /* vectors */
29 | (sql_builtins "VECTOR_DISTANCE" dot)
30 | (sql_builtins "STRING_TO_VECTOR" json_decode)
31 | (sql_builtins "VECTOR_TO_STRING" json_encode)
32 | (sql_builtins "VECTOR_DIM" json_encode)
33 |
34 | /* management: use SQL statements instead (REBUILD, SHOW SHARDS, etc.) */
35 |
--------------------------------------------------------------------------------
/lib/sql-metadata.scm:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023, 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | /* emulate metadata tables */
19 | (define get_schema (lambda (schema tbl) (match '(schema tbl)
20 | /* special tables */
21 | '((ignorecase "information_schema") (ignorecase "schemata")) '(
22 | '("Field" "catalog_name")
23 | '("Field" "schema_name")
24 | '("Field" "default_character_set_name")
25 | '("Field" "default_collation_name")
26 | '("Field" "sql_path")
27 | '("Field" "schema_comment")
28 | )
29 |
30 | '((ignorecase "information_schema") (ignorecase "tables")) '(
31 | '("Field" "table_catalog")
32 | '("Field" "table_schema")
33 | '("Field" "table_name")
34 | '("Field" "table_type")
35 | )
36 | '((ignorecase "information_schema") (ignorecase "columns")) '(
37 | '("Field" "table_catalog")
38 | '("Field" "table_schema")
39 | '("Field" "table_name")
40 | '("Field" "column_name")
41 | '("Field" "ordinal_position")
42 | '("Field" "column_default")
43 | '("Field" "is_nullable")
44 | '("Field" "data_type")
45 | /* TODO: CHARACTER_MAXIMUM_LENGTH CHARACTER_OCTET_LENGTH NUMERIC_PRECISION NUMERIC_SCALE DATETIME_PRECISION CHARACTER_SET_NAME COLLATION_NAME */
46 | '("Field" "column_type")
47 | '("Field" "column_key")
48 | '("Field" "extra")
49 | '("Field" "privileges")
50 | '("Field" "column_comment")
51 | '("Field" "is_generated")
52 | '("Field" "generation_expression")
53 | )
54 | '((ignorecase "information_schema") (ignorecase "key_column_usage")) '(
55 | '("Field" "constraint_catalog")
56 | '("Field" "constraint_schema")
57 | '("Field" "constraint_name")
58 | '("Field" "table_catalog")
59 | '("Field" "table_schema")
60 | '("Field" "table_name")
61 | '("Field" "column_name")
62 | '("Field" "ordinal_position")
63 | '("Field" "position_in_unique_constraint")
64 | '("Field" "referenced_table_schema")
65 | '("Field" "referenced_table_name")
66 | '("Field" "referenced_column_name")
67 | )
68 | '((ignorecase "information_schema") (ignorecase "referential_constraints")) '(
69 | '("Field" "constraint_catalog")
70 | '("Field" "constraint_schema")
71 | '("Field" "constraint_name")
72 | '("Field" "unique_constraint_catalog")
73 | '("Field" "unique_constraint_schema")
74 | '("Field" "unique_constraint_name")
75 | '("Field" "match_option")
76 | '("Field" "update_rule")
77 | '("Field" "delete_rule")
78 | '("Field" "table_name")
79 | '("Field" "referenced_table_name")
80 | )
81 |
82 | /* Minimal compatibility for mysqldump probes */
83 | '((ignorecase "information_schema") (ignorecase "files")) '(
84 | '("Field" "file_name")
85 | '("Field" "file_type")
86 | '("Field" "tablespace_name")
87 | '("Field" "logfile_group_name")
88 | '("Field" "total_extents")
89 | '("Field" "initial_size")
90 | '("Field" "engine")
91 | '("Field" "extra")
92 | )
93 | '((ignorecase "information_schema") (ignorecase "partitions")) '(
94 | '("Field" "table_schema")
95 | '("Field" "table_name")
96 | '("Field" "partition_name")
97 | '("Field" "tablespace_name")
98 | )
99 |
100 | /* Unknown INFORMATION_SCHEMA table → clear SCM-side error */
101 | '((ignorecase "information_schema") _)
102 | (error (concat "INFORMATION_SCHEMA." tbl " is not supported yet"))
103 | (show schema tbl) /* otherwise: fetch from metadata */
104 | )))
105 | (define scan_wrapper (lambda args (match args (merge '(scanfn schema tbl) rest) (match '(schema tbl)
106 | '((ignorecase "information_schema") (ignorecase "schemata"))
107 | (merge '(scanfn schema
108 | '('map '('show) '('lambda '('schema) '('list "catalog_name" "def" "schema_name" 'schema "default_character_set_name" "utf8mb4" "default_collation_name" "utf8mb3_general_ci" "sql_path" NULL "schema_comment" "")))
109 | ) rest)
110 | '((ignorecase "information_schema") (ignorecase "tables"))
111 | (merge '(scanfn schema
112 | '('merge '('map '('show) '('lambda '('schema) '('map '('show 'schema) '('lambda '('tbl) '('list "table_catalog" "def" "table_schema" 'schema "table_name" 'tbl "table_type" "BASE TABLE"))))))
113 | ) rest)
114 | '((ignorecase "information_schema") (ignorecase "columns"))
115 | (merge '(scanfn schema
116 | '((quote merge) '((quote map) '((quote show)) '((quote lambda) '((quote schema)) '((quote merge) '((quote map) '((quote show) (quote schema)) '((quote lambda) '((quote tbl)) '((quote map) '((quote show) (quote schema) (quote tbl)) '((quote lambda) '((quote col)) '((quote list) "table_catalog" "def" "table_schema" (quote schema) "table_name" (quote tbl) "column_name" '((quote col) "Field") "data_type" '((quote col) "RawType") "column_type" '((quote concat) '((quote col) "Type") '((quote col) "Dimensions")))))))))))
117 | ) rest)
118 | '((ignorecase "information_schema") (ignorecase "key_column_usage"))
119 | (merge '(scanfn schema '(list)) rest) /* TODO: list constraints */
120 | '((ignorecase "information_schema") (ignorecase "referential_constraints"))
121 | (merge '(scanfn schema '(list)) rest) /* TODO: list constraints */
122 | '((ignorecase "information_schema") (ignorecase "files"))
123 | (merge '(scanfn schema '(list)) rest) /* empty: MemCP has no tablespaces/undo logs */
124 | '((ignorecase "information_schema") (ignorecase "partitions"))
125 | (merge '(scanfn schema '(list)) rest) /* empty: no MySQL partitions */
126 | '(schema tbl) /* normal case */
127 | (merge '(scanfn schema tbl) rest)
128 | ))))
129 |
--------------------------------------------------------------------------------
/lib/sql-test.scm:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2025 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 |
18 | /* SQL Engine Test Suite - contained in its own environment */
19 | ((lambda () (begin
20 | (print "performing SQL engine tests ...")
21 |
22 | (set teststat (newsession))
23 | (teststat "count" 0)
24 | (teststat "success" 0)
25 | (define assert (lambda (val1 val2 errormsg) (begin
26 | (teststat "count" (+ (teststat "count") 1))
27 | (if (equal? val1 val2) (teststat "success" (+ (teststat "success") 1)) (print "failed test "(teststat "count")": " errormsg))
28 | )))
29 |
30 | /* Clean up any existing test database and create fresh one */
31 | (try (lambda () (dropdatabase "memcp-tests")) (lambda (e) nil))
32 | (createdatabase "memcp-tests" true)
33 |
34 | /* Helper function to execute SQL and return result rows */
35 | (define sql-test-exec (lambda (query) (begin
36 | (set query-results (newsession))
37 | (query-results "rows" '())
38 | (define resultrow (lambda (row) (begin
39 | (query-results "rows" (append (query-results "rows") (list row)))
40 | )))
41 | (eval (parse_sql "memcp-tests" query))
42 | (query-results "rows")
43 | )))
44 |
45 | /* Create test tables and run simple tests */
46 | (sql-test-exec "CREATE TABLE test_users (id INT PRIMARY KEY, name VARCHAR(50))")
47 | (sql-test-exec "INSERT INTO test_users (id, name) VALUES (1, 'Alice')")
48 | (sql-test-exec "INSERT INTO test_users (id, name) VALUES (2, 'Bob')")
49 |
50 | (define result1 (sql-test-exec "SELECT * FROM test_users"))
51 | (assert (equal? (count result1) 2) true "SELECT should return 2 rows")
52 |
53 | (define result2 (sql-test-exec "SELECT COUNT(*) FROM test_users"))
54 | (assert (equal? (count result2) 1) true "SELECT COUNT(*) should return 1 row")
55 |
56 | /* Basic parsing tests - just verify the SQL parses correctly without executing */
57 | (define allow (lambda (schema table write) true))
58 | (assert (list? (parse_sql "system" "SELECT 1" allow)) true "Simple SELECT should parse")
59 | (assert (list? (parse_sql "system" "SELECT * FROM user" allow)) true "SELECT * should parse")
60 | (assert (list? (parse_sql "system" "INSERT INTO user VALUES (1, 'test', 'pass')" allow)) true "INSERT should parse")
61 | (assert (list? (parse_sql "system" "UPDATE user SET username = 'newname' WHERE id = 1" allow)) true "UPDATE should parse")
62 | (assert (list? (parse_sql "system" "DELETE FROM user WHERE id = 1" allow)) true "DELETE should parse")
63 |
64 | (print "SQL parsing and execution tests completed successfully")
65 |
66 | /* Clean up test database */
67 | (dropdatabase "memcp-tests")
68 |
69 | (print "finished SQL engine tests")
70 | (print "test result: " (teststat "success") "/" (teststat "count"))
71 | (if (< (teststat "success") (teststat "count")) (begin
72 | (print "")
73 | (print "---- !!! some SQL test cases have failed !!! ----")
74 | (print "")
75 | (print " SQL engine may have issues")
76 | (error "SQL tests failed")
77 | ) (print "all SQL tests succeeded."))
78 | (print "")
79 | )))
80 |
--------------------------------------------------------------------------------
/memcp.singularity.recipe:
--------------------------------------------------------------------------------
1 | Bootstrap: docker
2 | From: ubuntu:22.04
3 |
4 | # build with: sudo singularity build memcp.sif memcp.singularity.recipe
5 | # run with: singularity run --bind `pwd`:/data memcp.sif
6 |
7 | %post
8 | apt-get -y update
9 | apt-get -y install git software-properties-common
10 | add-apt-repository -y ppa:longsleep/golang-backports
11 | apt-get -y install golang
12 |
13 | git clone https://github.com/launix-de/memcp /memcp
14 | cd /memcp
15 | go get
16 | go build
17 |
18 | %environment
19 | export PATH="/memcp:$PATH"
20 |
21 | %runscript
22 | cd /memcp && ./memcp -data /data
23 |
--------------------------------------------------------------------------------
/scm/assoc_fast.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2025 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package scm
18 |
19 | import (
20 | "encoding/binary"
21 | "hash/maphash"
22 | "math"
23 | "reflect"
24 | )
25 |
26 | // Stable seed for hashing to ensure consistent indices across Set/Get calls.
27 | var fastDictSeed maphash.Seed
28 |
29 | func init() {
30 | fastDictSeed = maphash.MakeSeed()
31 | }
32 |
33 | // FastDict: shard-local assoc optimized for frequent set/merge operations.
34 | // Implementation uses a flat pairs array plus a lightweight hash index
35 | // to avoid O(N^2) behavior as it grows.
36 | type FastDict struct {
37 | Pairs []Scmer // [k0, v0, k1, v1, ...]
38 | index map[uint64][]int // hash -> positions (indices into Pairs, even only)
39 | }
40 |
41 | func NewFastDict(capacityPairs int) *FastDict {
42 | if capacityPairs < 0 {
43 | capacityPairs = 0
44 | }
45 | return &FastDict{Pairs: make([]Scmer, 0, capacityPairs*2), index: make(map[uint64][]int)}
46 | }
47 |
48 | func (d *FastDict) Iterate(fn func(k, v Scmer) bool) {
49 | for i := 0; i < len(d.Pairs); i += 2 {
50 | if !fn(d.Pairs[i], d.Pairs[i+1]) {
51 | return
52 | }
53 | }
54 | }
55 |
56 | // HashKey computes a stable hash for a Scheme value.
57 | // It avoids allocating intermediate strings by inspecting types and
58 | // feeding bytes directly to a streaming hasher. Lists are hashed by
59 | // recursively hashing their elements with structural markers.
60 | func HashKey(k Scmer) uint64 {
61 | var h maphash.Hash
62 | h.SetSeed(fastDictSeed)
63 | var writeScmer func(v Scmer)
64 | writeScmer = func(v Scmer) {
65 | switch x := v.(type) {
66 | case nil:
67 | h.WriteByte(0)
68 | case bool:
69 | h.WriteByte(1)
70 | if x {
71 | h.WriteByte(1)
72 | } else {
73 | h.WriteByte(0)
74 | }
75 | case int64:
76 | h.WriteByte(2)
77 | var b [8]byte
78 | binary.LittleEndian.PutUint64(b[:], uint64(x))
79 | h.Write(b[:])
80 | case float64:
81 | h.WriteByte(3)
82 | var b [8]byte
83 | binary.LittleEndian.PutUint64(b[:], math.Float64bits(x))
84 | h.Write(b[:])
85 | case string:
86 | h.WriteByte(4)
87 | h.WriteString(x)
88 | case Symbol:
89 | h.WriteByte(5)
90 | h.WriteString(string(x))
91 | case []Scmer:
92 | h.WriteByte(6)
93 | // write length to reduce collisions for different list sizes
94 | var b [8]byte
95 | binary.LittleEndian.PutUint64(b[:], uint64(len(x)))
96 | h.Write(b[:])
97 | for _, el := range x {
98 | writeScmer(el)
99 | }
100 | case *FastDict:
101 | // Hash as list of pairs to match []Scmer assoc representation
102 | h.WriteByte(6)
103 | var b [8]byte
104 | binary.LittleEndian.PutUint64(b[:], uint64(len(x.Pairs)))
105 | h.Write(b[:])
106 | for i := 0; i < len(x.Pairs); i += 2 {
107 | writeScmer(x.Pairs[i])
108 | writeScmer(x.Pairs[i+1])
109 | }
110 | default:
111 | // Fallback on type name to avoid heavy allocations
112 | h.WriteByte(255)
113 | h.WriteString(reflect.TypeOf(v).String())
114 | }
115 | }
116 | writeScmer(k)
117 | return h.Sum64()
118 | }
119 |
120 | func (d *FastDict) findPos(key Scmer, h uint64) (int, bool) {
121 | if d.index == nil {
122 | return -1, false
123 | }
124 | if bucket, ok := d.index[h]; ok {
125 | for _, pos := range bucket {
126 | if Equal(d.Pairs[pos], key) {
127 | return pos, true
128 | }
129 | }
130 | }
131 | return -1, false
132 | }
133 |
134 | func (d *FastDict) Get(key Scmer) (Scmer, bool) {
135 | h := HashKey(key)
136 | if pos, ok := d.findPos(key, h); ok {
137 | return d.Pairs[pos+1], true
138 | }
139 | return nil, false
140 | }
141 |
142 | // Set sets or merges a value for key. If merge is nil, it overwrites.
143 | func (d *FastDict) Set(key, value Scmer, merge func(oldV, newV Scmer) Scmer) {
144 | if d.index == nil {
145 | d.index = make(map[uint64][]int)
146 | }
147 | h := HashKey(key)
148 | if pos, ok := d.findPos(key, h); ok {
149 | if merge != nil {
150 | d.Pairs[pos+1] = merge(d.Pairs[pos+1], value)
151 | } else {
152 | d.Pairs[pos+1] = value
153 | }
154 | return
155 | }
156 | // append new
157 | pos := len(d.Pairs)
158 | d.Pairs = append(d.Pairs, key, value)
159 | d.index[h] = append(d.index[h], pos)
160 | }
161 |
162 | func (d *FastDict) ToList() []Scmer { return d.Pairs }
163 |
--------------------------------------------------------------------------------
/scm/date.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package scm
18 |
19 | import "time"
20 |
21 | func init_date() {
22 | // string functions
23 | DeclareTitle("Date")
24 | allowed_formats := []string{
25 | "2006-01-02 15:04:05.000000",
26 | "2006-01-02 15:04:05",
27 | "2006-01-02 15:04",
28 | "2006-01-02",
29 | "06-01-02 15:04:05.000000",
30 | "06-01-02 15:04:05",
31 | "06-01-02 15:04",
32 | "06-01-02",
33 | }
34 |
35 | Declare(&Globalenv, &Declaration{
36 | "now", "returns the unix timestamp",
37 | 0, 0,
38 | []DeclarationParameter{}, "int",
39 | func(a ...Scmer) (result Scmer) {
40 | return int64(time.Now().Unix())
41 | },
42 | false,
43 | })
44 | Declare(&Globalenv, &Declaration{
45 | "parse_date", "parses unix date from a string",
46 | 1, 1,
47 | []DeclarationParameter{
48 | DeclarationParameter{"value", "string", "values to parse"},
49 | }, "int",
50 | func(a ...Scmer) Scmer {
51 | for _, format := range allowed_formats { // try through all formats
52 | if t, err := time.Parse(format, String(a[0])); err != nil {
53 | return int64(t.Unix())
54 | }
55 | }
56 | return nil
57 | },
58 | true,
59 | })
60 | }
61 |
--------------------------------------------------------------------------------
/scm/jit_arm64.go:
--------------------------------------------------------------------------------
1 | //go:build arm64
2 |
3 | /*
4 | Copyright (C) 2024 Carl-Philip Hänsch
5 |
6 | This program is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published by
8 | the Free Software Foundation, either version 3 of the License, or
9 | (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with this program. If not, see .
18 | */
19 | package scm
20 |
21 | import "unsafe"
22 |
23 | // TODO: create this file for other architectures, too
24 |
25 | // all code snippets fill rax+rbx with the return value
26 | func jitReturnLiteral(value Scmer) []byte {
27 | code := []byte{
28 | // TODO
29 | }
30 | // insert the literal into the immediate values
31 | *(*unsafe.Pointer)(unsafe.Pointer(&code[2])) = *(*unsafe.Pointer)(unsafe.Pointer(&value))
32 | *(*unsafe.Pointer)(unsafe.Pointer(&code[12])) = *((*unsafe.Pointer)(unsafe.Add(unsafe.Pointer(&value), 8)))
33 | return code
34 | }
35 |
36 | func jitNthArgument(idx int) []byte { // up to 16 params
37 | // TODO: corner case 0, corner case >=16
38 | code := []byte{
39 | // TODO
40 | }
41 | return code
42 | }
43 |
44 | func jitStackFrame(size uint8) []byte {
45 | return []byte{
46 | // TODO
47 | }
48 | }
49 |
50 | /* TODO: peephole optimizer:
51 | - remove argument checks (test rbx,rbx 48 85 db 76 xx)
52 | - shorten immediate values
53 | - constant-fold operations
54 | - inline functions
55 | - jump to other functions
56 | */
57 |
--------------------------------------------------------------------------------
/scm/prompt.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 | Copyright (C) 2013 Pieter Kelchtermans (originally licensed unter WTFPL 2.0)
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 | */
18 |
19 | package scm
20 |
21 | import (
22 | "bytes"
23 | "fmt"
24 | "github.com/chzyer/readline"
25 | "io"
26 | "log"
27 | "os"
28 | "regexp"
29 | "runtime/debug"
30 | "strings"
31 | )
32 |
33 | const newprompt = "\033[32m>\033[0m "
34 | const contprompt = "\033[32m.\033[0m "
35 | const resultprompt = "\033[31m=\033[0m "
36 |
37 | var lambdaExpr *regexp.Regexp = regexp.MustCompile("\\(lambda\\s*\\(([^)]+)\\)")
38 |
39 | /* implements interface readline.AutoCompleter */
40 | func (en *Env) Do(line []rune, pos int) (newLine [][]rune, offset int) {
41 | start := pos
42 | for start >= 1 && line[start-1] != '(' && line[start-1] != ')' && line[start-1] != ' ' {
43 | start--
44 | }
45 | pfx := string(line[start:pos])
46 | offset = len(pfx)
47 | // iterate documentation
48 | for _, d := range declarations {
49 | if strings.HasPrefix(d.Name, pfx) && en.FindRead(Symbol(d.Name)) != nil {
50 | if d.Name == "lambda" {
51 | newLine = append(newLine, []rune("lambda ("[offset:]))
52 | } else {
53 | newLine = append(newLine, []rune(d.Name[offset:]))
54 | }
55 | }
56 | }
57 | // iterate variables
58 | for en != nil {
59 | for s, _ := range en.Vars {
60 | if strings.HasPrefix(string(s), pfx) {
61 | newLine = append(newLine, []rune(s[offset:]))
62 | }
63 | }
64 | en = en.Outer // iterate over parent scope
65 | }
66 | // find lambda variables in the line
67 | for _, m := range lambdaExpr.FindAllStringSubmatch(string(line), -1) {
68 | // each declared parameter of the lambda is also completed
69 | for _, s := range strings.Split(m[1], " ") {
70 | if strings.HasPrefix(s, pfx) {
71 | newLine = append(newLine, []rune(s[offset:]))
72 | }
73 | }
74 | }
75 | return
76 | }
77 |
78 | var ReplInstance *readline.Instance
79 |
80 | func Repl(en *Env) {
81 | l, err := readline.NewEx(&readline.Config{
82 | Prompt: newprompt,
83 | HistoryFile: ".memcp-history.tmp",
84 | AutoComplete: en,
85 | InterruptPrompt: "^C",
86 | EOFPrompt: "exit",
87 | HistorySearchFold: true,
88 | })
89 | if err != nil {
90 | panic(err)
91 | }
92 | ReplInstance = l
93 | defer l.Close()
94 | l.CaptureExitSignal()
95 |
96 | oldline := ""
97 | for {
98 | line, err := l.Readline()
99 | line = oldline + line
100 | if err == readline.ErrInterrupt {
101 | if len(line) == 0 {
102 | break
103 | } else {
104 | continue
105 | }
106 | } else if err == io.EOF {
107 | break
108 | } else if err != nil {
109 | panic(err)
110 | }
111 | if line == "" {
112 | continue
113 | }
114 |
115 | // anti-panic func
116 | func() {
117 | defer func() {
118 | if r := recover(); r != nil {
119 | rs := fmt.Sprint(r)
120 | if strings.Contains(rs, "expecting matching )") {
121 | // keep oldline
122 | oldline = line + "\n"
123 | l.SetPrompt(contprompt)
124 | return
125 | }
126 | PrintError(r)
127 | oldline = ""
128 | l.SetPrompt(newprompt)
129 | }
130 | }()
131 | var b bytes.Buffer
132 | code := Read("user prompt", line)
133 | Validate(code, "any")
134 | code = Optimize(code, en)
135 | result := Eval(code, en)
136 | Serialize(&b, result, en)
137 | fmt.Print(resultprompt)
138 | fmt.Println(b.String())
139 | oldline = ""
140 | l.SetPrompt(newprompt)
141 | }()
142 | }
143 | ReplInstance = nil
144 | }
145 |
146 | var errorlog *log.Logger
147 |
148 | func init() {
149 | errorlog = log.New(os.Stderr, "", 0)
150 | }
151 | func PrintError(r any) {
152 | s := fmt.Sprint(r)
153 | numlines := strings.Count(s, "\nin ")*4 + 9 // skip those stack trace lines that peel out of the error message
154 | trace := string(debug.Stack())
155 | for numlines > 0 {
156 | if trace == "" {
157 | break
158 | }
159 | if trace[0] == '\n' {
160 | numlines--
161 | }
162 | trace = trace[1:]
163 | }
164 | errorlog.Println(r, ": \n", trace)
165 | }
166 |
--------------------------------------------------------------------------------
/scm/scheduler.go:
--------------------------------------------------------------------------------
1 | package scm
2 |
3 | import (
4 | "container/heap"
5 | "context"
6 | "fmt"
7 | "runtime/debug"
8 | "sync"
9 | "time"
10 | )
11 |
12 | type Task func()
13 |
14 | type task struct {
15 | runAt time.Time
16 | fn Task
17 | id uint64
18 | }
19 |
20 | type taskHeap []task
21 |
22 | func (h taskHeap) Len() int { return len(h) }
23 |
24 | func (h taskHeap) Less(i, j int) bool {
25 | if h[i].runAt.Equal(h[j].runAt) {
26 | return h[i].id < h[j].id
27 | }
28 | return h[i].runAt.Before(h[j].runAt)
29 | }
30 |
31 | func (h taskHeap) Swap(i, j int) {
32 | h[i], h[j] = h[j], h[i]
33 | }
34 |
35 | func (h *taskHeap) Push(x any) {
36 | *h = append(*h, x.(task))
37 | }
38 |
39 | func (h *taskHeap) Pop() any {
40 | old := *h
41 | n := len(old)
42 | item := old[n-1]
43 | *h = old[:n-1]
44 | return item
45 | }
46 |
47 | type Scheduler struct {
48 | mu sync.Mutex
49 | tasks taskHeap
50 | wakeCh chan struct{}
51 | stopCh chan struct{}
52 | cancel map[uint64]struct{}
53 | active map[uint64]struct{}
54 | stopped bool
55 | nextID uint64
56 | initOnce sync.Once
57 | wg sync.WaitGroup
58 | }
59 |
60 | var DefaultScheduler Scheduler
61 |
62 | func init() {
63 | DefaultScheduler.init()
64 | }
65 |
66 | func (s *Scheduler) init() {
67 | s.initOnce.Do(func() {
68 | s.wakeCh = make(chan struct{}, 1)
69 | s.stopCh = make(chan struct{})
70 | s.cancel = make(map[uint64]struct{})
71 | s.active = make(map[uint64]struct{})
72 | heap.Init(&s.tasks)
73 | s.wg.Add(1)
74 | go s.run()
75 | })
76 | }
77 |
78 | func (s *Scheduler) ScheduleAt(t time.Time, fn Task) (uint64, bool) {
79 | if fn == nil {
80 | return 0, false
81 | }
82 | s.init()
83 | s.mu.Lock()
84 | defer s.mu.Unlock()
85 | if s.stopped {
86 | return 0, false
87 | }
88 | s.nextID++
89 | id := s.nextID
90 | newTask := task{runAt: t, fn: fn, id: id}
91 | heap.Push(&s.tasks, newTask)
92 | s.active[id] = struct{}{}
93 | delete(s.cancel, id)
94 | shouldWake := len(s.tasks) > 0 && s.tasks[0].id == id
95 | if shouldWake {
96 | s.signalLocked()
97 | }
98 | return id, true
99 | }
100 |
101 | func (s *Scheduler) ScheduleAfter(delay time.Duration, fn Task) (uint64, bool) {
102 | if delay < 0 {
103 | delay = 0
104 | }
105 | return s.ScheduleAt(time.Now().Add(delay), fn)
106 | }
107 |
108 | func (s *Scheduler) Clear(id uint64) bool {
109 | s.init()
110 | s.mu.Lock()
111 | defer s.mu.Unlock()
112 | if s.stopped {
113 | return false
114 | }
115 | if _, ok := s.active[id]; !ok {
116 | return false
117 | }
118 | s.cancel[id] = struct{}{}
119 | delete(s.active, id)
120 | s.signalLocked()
121 | return true
122 | }
123 |
124 | func (s *Scheduler) Stop() {
125 | s.init()
126 | s.mu.Lock()
127 | if s.stopped {
128 | s.mu.Unlock()
129 | s.wg.Wait()
130 | return
131 | }
132 | s.stopped = true
133 | close(s.stopCh)
134 | s.mu.Unlock()
135 | s.signal()
136 | s.wg.Wait()
137 | }
138 |
139 | func (s *Scheduler) signalLocked() {
140 | select {
141 | case s.wakeCh <- struct{}{}:
142 | default:
143 | }
144 | }
145 |
146 | func (s *Scheduler) signal() {
147 | signalC := s.wakeCh
148 | if signalC == nil {
149 | return
150 | }
151 | select {
152 | case signalC <- struct{}{}:
153 | default:
154 | }
155 | }
156 |
157 | func (s *Scheduler) runTask(fn Task) {
158 | defer func() {
159 | if r := recover(); r != nil {
160 | fmt.Printf("scheduler: task panic: %v\n", r)
161 | debug.PrintStack()
162 | }
163 | }()
164 | fn()
165 | }
166 |
167 | func (s *Scheduler) drainTimer(timer *time.Timer) {
168 | if timer != nil && !timer.Stop() {
169 | select {
170 | case <-timer.C:
171 | default:
172 | }
173 | }
174 | }
175 |
176 | func (s *Scheduler) run() {
177 | defer s.wg.Done()
178 | var timer *time.Timer
179 | for {
180 | s.mu.Lock()
181 | if len(s.tasks) == 0 {
182 | if s.stopped {
183 | s.mu.Unlock()
184 | return
185 | }
186 | s.mu.Unlock()
187 | select {
188 | case <-s.stopCh:
189 | return
190 | case <-s.wakeCh:
191 | }
192 | continue
193 | }
194 | next := s.tasks[0]
195 | if _, cancelled := s.cancel[next.id]; cancelled {
196 | heap.Pop(&s.tasks)
197 | delete(s.cancel, next.id)
198 | delete(s.active, next.id)
199 | s.mu.Unlock()
200 | continue
201 | }
202 | wait := time.Until(next.runAt)
203 | if wait <= 0 {
204 | heap.Pop(&s.tasks)
205 | delete(s.active, next.id)
206 | delete(s.cancel, next.id)
207 | s.mu.Unlock()
208 | go s.runTask(next.fn)
209 | continue
210 | }
211 | if timer == nil {
212 | timer = time.NewTimer(wait)
213 | } else {
214 | timer.Reset(wait)
215 | }
216 | s.mu.Unlock()
217 | select {
218 | case <-timer.C:
219 | case <-s.wakeCh:
220 | s.drainTimer(timer)
221 | case <-s.stopCh:
222 | s.drainTimer(timer)
223 | return
224 | }
225 | }
226 | }
227 |
228 | func init_scheduler() {
229 | DeclareTitle("Scheduler")
230 | Declare(&Globalenv, &Declaration{
231 | "setTimeout", "Schedules a callback to run after the given delay in milliseconds (fractional values allowed for sub-millisecond precision).",
232 | 2, 1000,
233 | []DeclarationParameter{
234 | {"callback", "func", "function to execute once the timeout expires"},
235 | {"milliseconds", "number", "milliseconds until execution"},
236 | {"args...", "any", "optional arguments forwarded to the callback"},
237 | }, "int",
238 | setTimeout, false,
239 | })
240 | Declare(&Globalenv, &Declaration{
241 | "clearTimeout", "Cancels a timeout created with setTimeout.",
242 | 1, 1,
243 | []DeclarationParameter{
244 | {"id", "number", "identifier returned by setTimeout"},
245 | }, "bool",
246 | clearTimeout, false,
247 | })
248 | }
249 |
250 | func setTimeout(a ...Scmer) Scmer {
251 | if len(a) < 2 {
252 | panic("setTimeout expects at least a callback and delay")
253 | }
254 |
255 | callback := a[0]
256 | millis := ToFloat(a[1])
257 | if millis < 0 {
258 | millis = 0
259 | }
260 |
261 | duration := time.Duration(millis * float64(time.Millisecond))
262 | callbackArgs := append([]Scmer(nil), a[2:]...)
263 | id, ok := DefaultScheduler.ScheduleAfter(duration, func() {
264 | NewContext(context.TODO(), func() {
265 | Apply(callback, callbackArgs...)
266 | })
267 | })
268 | if !ok {
269 | return false
270 | }
271 | return int64(id)
272 | }
273 |
274 | func clearTimeout(a ...Scmer) Scmer {
275 | if len(a) != 1 {
276 | panic("clearTimeout expects one argument")
277 | }
278 | id := uint64(ToInt(a[0]))
279 | return DefaultScheduler.Clear(id)
280 | }
281 |
--------------------------------------------------------------------------------
/scm/streams.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package scm
18 |
19 | import "io"
20 | import "bufio"
21 | import "compress/gzip"
22 | import "github.com/ulikunitz/xz"
23 |
24 | func init_streams() {
25 | // string functions
26 | DeclareTitle("Streams")
27 |
28 | Declare(&Globalenv, &Declaration{
29 | "streamString", "creates a stream that contains a string",
30 | 1, 1,
31 | []DeclarationParameter{
32 | DeclarationParameter{"content", "string", "content to put into the stream"},
33 | }, "stream",
34 | func(a ...Scmer) (result Scmer) {
35 | reader, writer := io.Pipe()
36 | go func() {
37 | io.WriteString(writer, String(a[0]))
38 | writer.Close()
39 | }()
40 | return io.Reader(reader)
41 | }, false,
42 | })
43 | Declare(&Globalenv, &Declaration{
44 | "gzip", "compresses a stream with gzip. Create streams with (stream filename)",
45 | 1, 1,
46 | []DeclarationParameter{
47 | DeclarationParameter{"stream", "stream", "input stream"},
48 | }, "stream",
49 | func(a ...Scmer) (result Scmer) {
50 | stream := a[0].(io.Reader)
51 | reader, writer := io.Pipe()
52 | bwriter := bufio.NewWriterSize(writer, 16*1024)
53 | zip := gzip.NewWriter(bwriter)
54 | go func() {
55 | io.Copy(zip, stream)
56 | zip.Close()
57 | bwriter.Flush()
58 | writer.Close()
59 | }()
60 | return (io.Reader)(reader)
61 | }, false,
62 | })
63 | Declare(&Globalenv, &Declaration{
64 | "xz", "compresses a stream with xz. Create streams with (stream filename)",
65 | 1, 1,
66 | []DeclarationParameter{
67 | DeclarationParameter{"stream", "stream", "input stream"},
68 | }, "stream",
69 | func(a ...Scmer) (result Scmer) {
70 | stream := a[0].(io.Reader)
71 | reader, writer := io.Pipe()
72 | bwriter := bufio.NewWriterSize(writer, 16*1024)
73 | zip, err := xz.NewWriter(bwriter)
74 | go func() {
75 | io.Copy(zip, stream)
76 | zip.Close()
77 | bwriter.Flush()
78 | writer.Close()
79 | }()
80 | if err != nil {
81 | panic(err)
82 | }
83 | return (io.Reader)(reader)
84 | }, false,
85 | })
86 | Declare(&Globalenv, &Declaration{
87 | "zcat", "turns a compressed gzip stream into a stream of uncompressed data. Create streams with (stream filename)",
88 | 1, 1,
89 | []DeclarationParameter{
90 | DeclarationParameter{"stream", "stream", "input stream"},
91 | }, "stream",
92 | func(a ...Scmer) (result Scmer) {
93 | stream := a[0].(io.Reader)
94 | result, err := gzip.NewReader(stream)
95 | if err != nil {
96 | panic(err)
97 | }
98 | return result
99 | }, false,
100 | })
101 | Declare(&Globalenv, &Declaration{
102 | "xzcat", "turns a compressed xz stream into a stream of uncompressed data. Create streams with (stream filename)",
103 | 1, 1,
104 | []DeclarationParameter{
105 | DeclarationParameter{"stream", "stream", "input stream"},
106 | }, "stream",
107 | func(a ...Scmer) (result Scmer) {
108 | stream := a[0].(io.Reader)
109 | result, err := xz.NewReader(stream)
110 | if err != nil {
111 | panic(err)
112 | }
113 | return result
114 | }, false,
115 | })
116 | }
117 |
--------------------------------------------------------------------------------
/scm/trace.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package scm
18 |
19 | import "io"
20 | import "os"
21 | import "fmt"
22 | import "sync"
23 | import "time"
24 | import "encoding/json"
25 |
26 | type Tracefile struct {
27 | isFirst bool
28 | file io.WriteCloser
29 | m sync.Mutex
30 | }
31 |
32 | var Trace *Tracefile // default trace: set to not nil if you want to trace
33 | var TracePrint bool // whether to print traces to stdout
34 |
35 | func SetTrace(on bool) { // sets Trace to nil or a value
36 | if Trace != nil {
37 | Trace.Close()
38 | Trace = nil
39 | }
40 | if on {
41 | // TODO: tracefolder
42 | f, err := os.Create(os.Getenv("MEMCP_TRACEDIR") + "trace_" + fmt.Sprint(time.Now().Unix()) + ".json")
43 | if err != nil {
44 | panic(err)
45 | }
46 | Trace = NewTrace(f)
47 | }
48 | }
49 |
50 | func NewTrace(file io.WriteCloser) *Tracefile {
51 | file.Write([]byte("["))
52 | result := new(Tracefile)
53 | result.file = file
54 | result.isFirst = true
55 | return result
56 | }
57 |
58 | func (t *Tracefile) Close() {
59 | t.file.Write([]byte("]"))
60 | t.file.Close()
61 | }
62 |
63 | func (t *Tracefile) Duration(name string, cat string, f func()) {
64 | t.EventHalf(name, cat, "B", 0, 0)
65 | defer t.EventHalf(name, cat, "E", 0, 0)
66 | f()
67 | }
68 |
69 | func (t *Tracefile) Event(name string, cat string, typ string) {
70 | t.EventHalf(name, cat, typ, 0, 0)
71 | }
72 |
73 | func (t *Tracefile) EventHalf(name string, cat string, typ string, tid int, pid int) {
74 | ts := time.Since(start).Microseconds()
75 | t.EventFull(name, cat, typ, ts, tid, pid)
76 | }
77 |
78 | /*
79 | *
80 |
81 | @name string function
82 | @cat string comma separated categories (for filtering)
83 | @typ B/E for begin/end, X for events
84 | @ts timestamp in microseconds
85 | @pid process id
86 | @tid thread id
87 | @args ??
88 | */
89 | func (t *Tracefile) EventFull(name string, cat string, typ string, ts int64, tid int, pid int) {
90 | t.m.Lock()
91 | if t.isFirst {
92 | t.isFirst = false
93 | } else {
94 | t.file.Write([]byte(",\n"))
95 | }
96 | t.file.Write([]byte("{\"name\": "))
97 | b, _ := json.Marshal(name) // name
98 | t.file.Write(b)
99 | t.file.Write([]byte(", \"cat\": "))
100 | b, _ = json.Marshal(cat) // cat
101 | t.file.Write(b)
102 | t.file.Write([]byte(", \"ph\": \""))
103 | t.file.Write([]byte(typ))
104 | t.file.Write([]byte("\", \"ts\": "))
105 | b, _ = json.Marshal(ts) // ts
106 | t.file.Write(b)
107 | t.file.Write([]byte(", \"pid\": "))
108 | b, _ = json.Marshal(pid) // pid
109 | t.file.Write(b)
110 | t.file.Write([]byte(", \"tid\": "))
111 | b, _ = json.Marshal(tid) // tid
112 | t.file.Write(b)
113 | t.file.Write([]byte(", \"s\": \"g\"}"))
114 | t.m.Unlock()
115 | }
116 |
117 | var start time.Time = time.Now()
118 |
--------------------------------------------------------------------------------
/scm/vector.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2025 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package scm
18 |
19 | import "math"
20 |
21 | func init_vector() {
22 | // string functions
23 | DeclareTitle("Vectors")
24 |
25 | Declare(&Globalenv, &Declaration{
26 | "dot", "produced the dot product",
27 | 2, 3,
28 | []DeclarationParameter{
29 | DeclarationParameter{"v1", "list", "vector1"},
30 | DeclarationParameter{"v2", "list", "vector2"},
31 | DeclarationParameter{"mode", "string", "DOT, COSINE, EUCLIDEAN, default is DOT"},
32 | }, "number",
33 | func(a ...Scmer) Scmer {
34 | var result float64 = 0
35 | v1 := a[0].([]Scmer)
36 | v2 := a[1].([]Scmer)
37 | if len(a) > 2 && a[2] == "COSINE" {
38 | // COSINE
39 | var lena float64 = 0
40 | var lenb float64 = 0
41 | for i := 0; i < len(v1) && i < len(v2); i++ {
42 | w1 := ToFloat(v1[i])
43 | w2 := ToFloat(v2[i])
44 | lena += w1 * w1
45 | lenb += w2 * w2
46 | result += w1 * w2
47 | }
48 | result = result / math.Sqrt(lena*lenb)
49 | } else {
50 | // DOT AND EUCLIDEAN
51 | for i := 0; i < len(v1) && i < len(v2); i++ {
52 | result += ToFloat(v1[i]) * ToFloat(v2[i])
53 | }
54 | if len(a) > 2 && a[2] == "EUCLIDEAN" {
55 | result = math.Sqrt(result)
56 | }
57 | }
58 | return result
59 | }, true,
60 | })
61 | }
62 |
--------------------------------------------------------------------------------
/storage/analyzer.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "sort"
20 | import "github.com/launix-de/memcp/scm"
21 |
22 | type columnboundaries struct {
23 | col string
24 | lower scm.Scmer
25 | lowerInclusive bool
26 | upper scm.Scmer
27 | upperInclusive bool
28 | }
29 |
30 | type boundaries []columnboundaries
31 |
32 | // analyzes a lambda expression for value boundaries, so the best index can be found
33 | func extractBoundaries(conditionCols []string, condition scm.Scmer) boundaries {
34 | p := condition.(scm.Proc)
35 | symbolmapping := make(map[scm.Symbol]string)
36 | for i, sym := range p.Params.([]scm.Scmer) {
37 | symbolmapping[sym.(scm.Symbol)] = conditionCols[i]
38 | }
39 | cols := make([]columnboundaries, 0, 4)
40 | addConstraint := func(in []columnboundaries, b2 columnboundaries) []columnboundaries {
41 | for i, b := range in {
42 | if b.col == b2.col {
43 | // column match -> merge value range
44 | if b.lower == nil || b2.lower != nil && scm.Less(b.lower, b2.lower) {
45 | // both values are ANDed, so take the higher value as lower bound
46 | in[i].lower = b2.lower
47 | }
48 | in[i].lowerInclusive = b.lowerInclusive || b2.lowerInclusive // TODO: check correctness
49 | if b.upper == nil || b2.upper != nil && scm.Less(b2.upper, b.upper) {
50 | // the lower of both upper values will be the new upper bound
51 | in[i].upper = b2.upper
52 | }
53 | in[i].upperInclusive = b.upperInclusive || b2.upperInclusive // TODO: check correctness
54 | return in
55 | }
56 | }
57 | // else: append
58 | return append(in, b2)
59 | }
60 | // analyze condition for AND clauses, equal? < > <= >= BETWEEN
61 | extractConstant := func(v scm.Scmer) (scm.Scmer, bool) {
62 | switch val := v.(type) {
63 | case int64, float64, string:
64 | // equals column vs. constant
65 | return val, true
66 | case scm.Symbol:
67 | if val2, ok := condition.(scm.Proc).En.Vars[val]; ok {
68 | switch val3 := val2.(type) {
69 | // bound constant
70 | case int64, float64, string:
71 | // equals column vs. constant
72 | return val3, true
73 | }
74 | }
75 | case []scm.Scmer:
76 | if val[0] == scm.Symbol("outer") {
77 | if sym, ok := val[1].(scm.Symbol); ok {
78 | if val2, ok := condition.(scm.Proc).En.Vars[sym]; ok {
79 | switch val3 := val2.(type) {
80 | // bound constant
81 | case int64, float64, string:
82 | // equals column vs. constant
83 | return val3, true
84 | }
85 | }
86 | }
87 | }
88 | }
89 | return nil, false
90 | }
91 | var traverseCondition func(scm.Scmer)
92 | traverseCondition = func(node scm.Scmer) {
93 | switch v := node.(type) {
94 | case []scm.Scmer:
95 | if v[0] == scm.Symbol("equal?") || v[0] == scm.Symbol("equal??") {
96 | // equi
97 | switch v1 := v[1].(type) {
98 | case scm.Symbol:
99 | if col, ok := symbolmapping[v1]; ok { // left is a column
100 | if v2, ok := extractConstant(v[2]); ok { // right is a constant
101 | // ?equal var const
102 | cols = addConstraint(cols, columnboundaries{col, v2, true, v2, true})
103 | }
104 | }
105 | // TODO: equals constant vs. column
106 | }
107 | } else if v[0] == scm.Symbol("<") || v[0] == scm.Symbol("<=") {
108 | // compare
109 | switch v1 := v[1].(type) {
110 | case scm.Symbol:
111 | if col, ok := symbolmapping[v1]; ok { // left is a column
112 | if v2, ok := extractConstant(v[2]); ok { // right is a constant
113 | // ?equal var const
114 | cols = addConstraint(cols, columnboundaries{col, nil, false, v2, v[0] == scm.Symbol("<=")})
115 | }
116 | }
117 | // TODO: constant vs. column
118 | }
119 | } else if v[0] == scm.Symbol(">") || v[0] == scm.Symbol(">=") {
120 | // compare
121 | switch v1 := v[1].(type) {
122 | case scm.Symbol:
123 | if col, ok := symbolmapping[v1]; ok { // left is a column
124 | if v2, ok := extractConstant(v[2]); ok { // right is a constant
125 | // ?equal var const
126 | cols = addConstraint(cols, columnboundaries{col, v2, v[0] == scm.Symbol(">="), nil, false})
127 | }
128 | }
129 | // TODO: constant vs. column
130 | }
131 | } else if v[0] == scm.Symbol("and") {
132 | // AND -> recursive traverse
133 | for i := 1; i < len(v); i++ {
134 | traverseCondition(v[i])
135 | }
136 | }
137 | // TODO: <, >, <=, >=
138 | // TODO: OR -> merge multiple
139 | // TODO: variable expressions that can be expanded
140 | }
141 | }
142 | traverseCondition(p.Body) // recursive analysis over condition
143 |
144 | // sort columns -> at first, the lower==upper alphabetically; then one lower!=upper according to best selectivity; discard the rest
145 | sort.Slice(cols, func(i, j int) bool {
146 | if cols[i].lower == cols[i].upper && cols[j].lower != cols[j].upper {
147 | return true // put equal?-conditions leftmost
148 | }
149 | return cols[i].col < cols[j].col // otherwise: alphabetically
150 | })
151 |
152 | return cols
153 | }
154 |
155 | func indexFromBoundaries(cols boundaries) (lower []scm.Scmer, upperLast scm.Scmer) {
156 | if len(cols) > 0 {
157 | //fmt.Println("conditions:", cols)
158 | // build up lower and upper bounds of index
159 | for {
160 | if len(cols) >= 2 && cols[len(cols)-2].lower != cols[len(cols)-2].upper {
161 | // remove last col -> we cant have two ranged cols
162 | cols = cols[:len(cols)-1]
163 | } else {
164 | break // finished -> pure index
165 | }
166 | }
167 | // find out boundaries
168 | lower = make([]scm.Scmer, len(cols))
169 | for i, v := range cols {
170 | lower[i] = v.lower
171 | }
172 | upperLast = cols[len(cols)-1].upper
173 | //fmt.Println(cols, lower, upperLast) // debug output if we found the right boundaries
174 | }
175 | return
176 | }
177 |
--------------------------------------------------------------------------------
/storage/cache.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2025 MemCP Contributors
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import (
20 | "sort"
21 | "time"
22 | )
23 |
24 | type softItem struct {
25 | pointer any
26 | size int64
27 | priorityFactor int
28 | cleanup func(pointer any)
29 | getLastUsed func(pointer any) time.Time
30 | effectiveTime time.Time
31 | }
32 |
33 | // CacheManager manages memory-limited soft references.
34 | type CacheManager struct {
35 | memoryBudget int64
36 | currentMemory int64
37 |
38 | items []softItem
39 | indexMap map[any]int // pointer -> index in items slice
40 |
41 | opChan chan cacheOp
42 | }
43 |
44 | type cacheOp struct {
45 | add *softItem
46 | del any
47 | done chan struct{}
48 | }
49 |
50 | // NewCacheManager creates a new CacheManager with given memory budget.
51 | func NewCacheManager(memoryBudget int64) *CacheManager {
52 | cm := &CacheManager{
53 | memoryBudget: memoryBudget,
54 | items: make([]softItem, 0),
55 | indexMap: make(map[any]int),
56 | opChan: make(chan cacheOp, 1024),
57 | }
58 | go cm.run()
59 | return cm
60 | }
61 |
62 | // AddItem inserts a new item into the cache. Cleanup is called if over budget.
63 | func (cm *CacheManager) AddItem(
64 | pointer any,
65 | size int64,
66 | priorityFactor int,
67 | cleanup func(pointer any),
68 | getLastUsed func(pointer any) time.Time,
69 | ) {
70 | item := &softItem{
71 | pointer: pointer,
72 | size: size,
73 | priorityFactor: priorityFactor,
74 | cleanup: cleanup,
75 | getLastUsed: getLastUsed,
76 | effectiveTime: time.Now(), // always now for new items
77 | }
78 | done := make(chan struct{})
79 | cm.opChan <- cacheOp{add: item, done: done}
80 | <-done
81 | }
82 |
83 | // Delete removes an item from the cache immediately.
84 | func (cm *CacheManager) Delete(pointer any) {
85 | done := make(chan struct{})
86 | cm.opChan <- cacheOp{del: pointer, done: done}
87 | <-done
88 | }
89 |
90 | // run is the single-threaded goroutine handling all operations and cleanup.
91 | func (cm *CacheManager) run() {
92 | for op := range cm.opChan {
93 | if op.add != nil {
94 | cm.add(op.add)
95 | } else if op.del != nil {
96 | cm.delete(op.del)
97 | }
98 | if op.done != nil {
99 | close(op.done)
100 | }
101 | }
102 | }
103 |
104 | // add inserts a new softItem and triggers cleanup if over budget.
105 | func (cm *CacheManager) add(item *softItem) {
106 | idx := len(cm.items)
107 | cm.items = append(cm.items, *item)
108 | cm.indexMap[item.pointer] = idx
109 | cm.currentMemory += item.size
110 |
111 | if cm.currentMemory > cm.memoryBudget {
112 | cm.cleanup()
113 | }
114 | }
115 |
116 | // delete removes a softItem immediately.
117 | func (cm *CacheManager) delete(pointer any) {
118 | idx, ok := cm.indexMap[pointer]
119 | if !ok {
120 | return
121 | }
122 | item := cm.items[idx]
123 | item.cleanup(item.pointer)
124 | cm.currentMemory -= item.size
125 |
126 | lastIdx := len(cm.items) - 1
127 | if idx != lastIdx {
128 | cm.items[idx] = cm.items[lastIdx]
129 | cm.indexMap[cm.items[idx].pointer] = idx
130 | }
131 | cm.items = cm.items[:lastIdx]
132 | delete(cm.indexMap, pointer)
133 | }
134 |
135 | // cleanup frees memory to respect the memory budget (simple-stupid approach).
136 | func (cm *CacheManager) cleanup() {
137 | if cm.currentMemory <= cm.memoryBudget {
138 | return
139 | }
140 |
141 | targetMemory := cm.memoryBudget * 75 / 100 // free until 75% of budget
142 |
143 | // Step 1: recompute effectiveTime for all items
144 | for i := range cm.items {
145 | cm.items[i].effectiveTime = cm.items[i].getLastUsed(cm.items[i].pointer)
146 | }
147 |
148 | // Step 2: sort items by effectiveTime (oldest first)
149 | sort.Slice(cm.items, func(i, j int) bool {
150 | return cm.items[i].effectiveTime.Before(cm.items[j].effectiveTime)
151 | })
152 |
153 | // Step 3: evict oldest items until memory is under target
154 | i := 0
155 | for cm.currentMemory > targetMemory && i < len(cm.items) {
156 | item := cm.items[i]
157 | item.cleanup(item.pointer)
158 | cm.currentMemory -= item.size
159 | delete(cm.indexMap, item.pointer)
160 | i++
161 | }
162 |
163 | // Step 4: compact the slice
164 | cm.items = cm.items[i:]
165 | for idx, item := range cm.items {
166 | cm.indexMap[item.pointer] = idx
167 | }
168 | }
169 |
--------------------------------------------------------------------------------
/storage/compute.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "fmt"
20 | import "sync"
21 | import "runtime"
22 | import "runtime/debug"
23 | import "github.com/jtolds/gls"
24 | import "github.com/launix-de/memcp/scm"
25 |
26 | func (t *table) ComputeColumn(name string, inputCols []string, computor scm.Scmer) {
27 | for i, c := range t.Columns {
28 | if c.Name == name {
29 | // found the column
30 | t.Columns[i].Computor = computor // set formula so delta storages and rebuild algo know how to recompute
31 | done := make(chan error, 6)
32 | shardlist := t.Shards
33 | if shardlist == nil {
34 | shardlist = t.PShards
35 | }
36 | for i, s := range shardlist {
37 | gls.Go(func(i int, s *storageShard) func() {
38 | return func() {
39 | defer func() {
40 | if r := recover(); r != nil {
41 | //fmt.Println("panic during compute:", r, string(debug.Stack()))
42 | done <- scanError{r, string(debug.Stack())}
43 | }
44 | }()
45 | for !s.ComputeColumn(name, inputCols, computor, len(shardlist) == 1) {
46 | // couldn't compute column because delta is still active
47 | t.mu.Lock()
48 | s = s.rebuild(false)
49 | shardlist[i] = s
50 | t.mu.Unlock()
51 | // persist new shard UUID after publishing
52 | t.schema.save()
53 | }
54 | done <- nil
55 | }
56 | }(i, s))
57 | }
58 | for range shardlist {
59 | err := <-done // collect finish signal before return
60 | if err != nil {
61 | panic(err)
62 | }
63 | }
64 | return
65 | }
66 | }
67 | panic("column " + t.Name + "." + name + " does not exist")
68 | }
69 |
70 | func (s *storageShard) ComputeColumn(name string, inputCols []string, computor scm.Scmer, parallel bool) bool {
71 | fmt.Println("start compute on", s.t.Name, "parallel", parallel)
72 | if s.deletions.Count() > 0 || len(s.inserts) > 0 {
73 | return false // can't compute in shards with delta storage
74 | }
75 | // We are going to mutate this shard's columns: mark shard as WRITE (not COLD)
76 | s.srState = WRITE
77 | // Ensure main_count and input storages are initialized before compute
78 | s.ensureMainCount(false)
79 | cols := make([]ColumnStorage, len(inputCols))
80 | for i, col := range inputCols {
81 | cols[i] = s.getColumnStorageOrPanic(col)
82 | }
83 | vals := make([]scm.Scmer, s.main_count) // build the stretchy value array
84 | if parallel {
85 | var done sync.WaitGroup
86 | done.Add(int(s.main_count))
87 | progress := make(chan uint, runtime.NumCPU()/2) // don't go all at once, we don't have enough RAM
88 | for i := 0; i < runtime.NumCPU()/2; i++ {
89 | gls.Go(func() { // threadpool with half of the cores
90 | // allocate a private parameter buffer per worker to avoid data races
91 | colvalues := make([]scm.Scmer, len(cols))
92 | for i := range progress {
93 | for j, col := range cols {
94 | colvalues[j] = col.GetValue(i) // read values from main storage into lambda params
95 | }
96 | vals[i] = scm.Apply(computor, colvalues...) // execute computor kernel (but the onoptimized version for non-serial use)
97 | done.Done()
98 | }
99 | })
100 | }
101 | // add all items to the queue
102 | for i := uint(0); i < s.main_count; i++ {
103 | progress <- i
104 | }
105 | close(progress) // signal workers to exit
106 | done.Wait()
107 | } else {
108 | // allocate a common param buffer to save allocations
109 | colvalues := make([]scm.Scmer, len(cols))
110 | fn := scm.OptimizeProcToSerialFunction(computor) // optimize for serial application
111 | for i := uint(0); i < s.main_count; i++ {
112 | for j, col := range cols {
113 | colvalues[j] = col.GetValue(i) // read values from main storage into lambda params
114 | }
115 | vals[i] = fn(colvalues...) // execute computor kernel
116 | }
117 | }
118 |
119 | s.mu.Lock() // don't defer because we unlock inbetween
120 | store := new(StorageSCMER)
121 | store.values = vals
122 | s.columns[name] = store
123 | s.mu.Unlock()
124 | // TODO: decide whether to rebuild optimized store
125 | return true
126 | }
127 |
--------------------------------------------------------------------------------
/storage/csv.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "bufio"
21 | import "strings"
22 | import "github.com/launix-de/memcp/scm"
23 |
24 | func LoadCSV(schema, table string, f io.Reader, delimiter string, firstLine bool) {
25 | scanner := bufio.NewScanner(f)
26 | scanner.Split(bufio.ScanLines)
27 |
28 | lines := make(chan string, 512)
29 |
30 | go func() {
31 | for scanner.Scan() {
32 | lines <- scanner.Text()
33 | }
34 | close(lines)
35 | }()
36 |
37 | db := GetDatabase(schema)
38 | if db == nil {
39 | panic("database " + schema + " does not exist")
40 | }
41 | t := db.GetTable(table)
42 | if t == nil {
43 | panic("table " + table + " does not exist")
44 | }
45 | var cols []string
46 | if firstLine {
47 | if !scanner.Scan() {
48 | panic("CSV does not contain header line")
49 | }
50 | cols = strings.Split(scanner.Text(), delimiter) // read headerline
51 | } else {
52 | // otherwise use the table's column order
53 | cols = make([]string, len(t.Columns))
54 | for i, col := range t.Columns {
55 | cols[i] = col.Name
56 | }
57 | }
58 | buffer := make([][]scm.Scmer, 0, 4096)
59 | for s := range lines {
60 | if s == "" {
61 | // ignore
62 | } else {
63 | arr := strings.Split(s, delimiter)
64 | x := make([]scm.Scmer, len(t.Columns))
65 | for i, _ := range t.Columns {
66 | if i < len(arr) {
67 | x[i] = scm.Simplify(arr[i])
68 | }
69 | }
70 | buffer = append(buffer, x)
71 | if len(buffer) >= 4096 {
72 | t.Insert(cols, buffer, nil, nil, false, nil)
73 | buffer = buffer[:0]
74 | }
75 | }
76 | }
77 | if len(buffer) > 0 {
78 | t.Insert(cols, buffer, nil, nil, false, nil)
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/storage/json.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | /*
20 |
21 | JSON storage on disk for persistence:
22 | - each node has its own data folder
23 | - each db/table.jsonl is a jsonl file
24 | - the first line is #table so it can be read by a simple .jsonl
25 | - a line can also say #delete
26 | - a line can also say #update json
27 | - on rewrite, db/_table.jsonl is rebuild and replaced (maybe once a week)
28 |
29 | */
30 |
31 | import "io"
32 | import "bufio"
33 | import "encoding/json"
34 | import "github.com/launix-de/memcp/scm"
35 |
36 | func LoadJSON(schema string, f io.Reader) {
37 | scanner := bufio.NewScanner(f)
38 | scanner.Split(bufio.ScanLines)
39 |
40 | lines := make(chan string, 512)
41 |
42 | go func() {
43 | for scanner.Scan() {
44 | lines <- scanner.Text()
45 | }
46 | close(lines)
47 | }()
48 |
49 | var t *table
50 | for s := range lines {
51 | if s == "" {
52 | // ignore
53 | } else if s[0:7] == "#table " {
54 | // new table (or find the existing one)
55 | t, _ = CreateTable(schema, s[7:], Safe, true)
56 | } else if s[0] == '#' {
57 | // comment
58 | } else {
59 | if t == nil {
60 | panic("no table set")
61 | } else {
62 | if len(t.Columns) == 0 {
63 | // JSON with an unknown table format -> create dummy cols
64 | var x map[string]scm.Scmer
65 | json.Unmarshal([]byte(s), &x) // parse JSON
66 | for c, _ := range x {
67 | // create column with dummy storage for next rebuild
68 | t.CreateColumn(c, "ANY", []int{}, []scm.Scmer{"comment", "json import"})
69 | }
70 | }
71 | func(t *table, s string) {
72 | var y map[string]scm.Scmer
73 | json.Unmarshal([]byte(s), &y) // parse JSON
74 | cols := make([]string, len(y))
75 | x := make([]scm.Scmer, len(y))
76 | i := 0
77 | for k, v := range y {
78 | cols[i] = k
79 | x[i] = v
80 | i++
81 | }
82 | t.Insert(cols, [][]scm.Scmer{x}, nil, nil, false, nil) // put into table
83 | }(t, s)
84 | }
85 | }
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/storage/limits.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2025 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "runtime"
20 |
21 | // global semaphore to limit concurrent disk-backed load operations
22 | var loadSemaphore chan struct{}
23 |
24 | func init() {
25 | workers := runtime.NumCPU()
26 | if workers < 1 {
27 | workers = 1
28 | }
29 | loadSemaphore = make(chan struct{}, workers)
30 | // prefill with tokens
31 | for i := 0; i < workers; i++ {
32 | loadSemaphore <- struct{}{}
33 | }
34 | }
35 |
36 | // acquireLoadSlot blocks until a load slot is available and returns a release func.
37 | func acquireLoadSlot() func() {
38 | <-loadSemaphore
39 | return func() { loadSemaphore <- struct{}{} }
40 | }
41 |
--------------------------------------------------------------------------------
/storage/overlay-blob.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "fmt"
21 | import "unsafe"
22 | import "reflect"
23 | import "strings"
24 | import "compress/gzip"
25 | import "crypto/sha256"
26 | import "encoding/binary"
27 | import "github.com/launix-de/memcp/scm"
28 |
29 | type OverlayBlob struct {
30 | // every overlay has a base
31 | Base ColumnStorage
32 | // values
33 | values map[[32]byte]string // gzipped contents content addressable
34 | size uint
35 | }
36 |
37 | func (s *OverlayBlob) ComputeSize() uint {
38 | var sz uint = 48 + 48*uint(len(s.values)) + s.size + s.Base.ComputeSize()
39 | for _, v := range s.values {
40 | sz += 24 + 16 + (uint(len(v)-1)/8+1)*8 + 32 // some overhead + content
41 | }
42 | return sz
43 | }
44 |
45 | func (s *OverlayBlob) String() string {
46 | return fmt.Sprintf("overlay[%dx zip-blob %d]+%s", len(s.values), s.size, s.Base.String())
47 | }
48 |
49 | func (s *OverlayBlob) Serialize(f io.Writer) {
50 | binary.Write(f, binary.LittleEndian, uint8(31)) // 31 = OverlayBlob
51 | io.WriteString(f, "1234567") // dummy
52 | var size uint64 = uint64(len(s.values))
53 | binary.Write(f, binary.LittleEndian, size) // write number of overlay items
54 | for k, v := range s.values {
55 | f.Write(k[:])
56 | binary.Write(f, binary.LittleEndian, uint64(len(v))) // write length
57 | io.WriteString(f, v) // write content
58 | }
59 | s.Base.Serialize(f) // serialize base
60 | }
61 |
62 | func (s *OverlayBlob) Deserialize(f io.Reader) uint {
63 | var dummy [7]byte
64 | f.Read(dummy[:]) // read padding
65 |
66 | var size uint64
67 | binary.Read(f, binary.LittleEndian, &size) // read size
68 | s.values = make(map[[32]byte]string)
69 |
70 | for i := uint64(0); i < size; i++ {
71 | var key [32]byte
72 | f.Read(key[:])
73 | var l uint64
74 | binary.Read(f, binary.LittleEndian, &l)
75 | value := make([]byte, l)
76 | f.Read(value)
77 | s.size += uint(l) // statistics
78 | s.values[key] = string(value)
79 | }
80 | var basetype uint8
81 | f.Read(unsafe.Slice(&basetype, 1))
82 | s.Base = reflect.New(storages[basetype]).Interface().(ColumnStorage)
83 | l := s.Base.Deserialize(f) // read base
84 | return l
85 | }
86 |
87 | func (s *OverlayBlob) GetValue(i uint) scm.Scmer {
88 | v := s.Base.GetValue(i)
89 | switch v_ := v.(type) {
90 | case string:
91 | if v_ != "" && v_[0] == '!' {
92 | if v_[1] == '!' {
93 | return v_[1:] // escaped string
94 | } else {
95 | // unpack from storage
96 | if v, ok := s.values[*(*[32]byte)(unsafe.Pointer(unsafe.StringData(v_[1:])))]; ok {
97 | var b strings.Builder
98 | reader, err := gzip.NewReader(strings.NewReader(v))
99 | if err != nil {
100 | panic(err)
101 | }
102 | io.Copy(&b, reader)
103 | reader.Close()
104 | return b.String()
105 | }
106 | return nil // value was lost (this should not happen)
107 | }
108 | } else {
109 | return v
110 | }
111 | default:
112 | return v
113 | }
114 | }
115 |
116 | func (s *OverlayBlob) prepare() {
117 | // set up scan
118 | s.Base.prepare()
119 | }
120 | func (s *OverlayBlob) scan(i uint, value scm.Scmer) {
121 | switch v_ := value.(type) {
122 | case scm.LazyString:
123 | if v_.Hash != "" {
124 | s.Base.scan(i, "!"+v_.Hash)
125 | } else {
126 | s.Base.scan(i, v_.GetValue())
127 | }
128 | case string:
129 | if len(v_) > 255 {
130 | h := sha256.New()
131 | io.WriteString(h, v_)
132 | s.Base.scan(i, fmt.Sprintf("!%s", h.Sum(nil)))
133 | } else {
134 | if v_ != "" && v_[0] == '!' {
135 | s.Base.scan(i, "!"+v_) // escape strings that start with !
136 | } else {
137 | s.Base.scan(i, value)
138 | }
139 | }
140 | default:
141 | s.Base.scan(i, value)
142 | }
143 | }
144 | func (s *OverlayBlob) init(i uint) {
145 | s.values = make(map[[32]byte]string)
146 | s.size = 0
147 | s.Base.init(i)
148 | }
149 | func (s *OverlayBlob) build(i uint, value scm.Scmer) {
150 | switch v_ := value.(type) {
151 | case string:
152 | if len(v_) > 255 {
153 | h := sha256.New()
154 | io.WriteString(h, v_)
155 | hashsum := h.Sum(nil)
156 | s.Base.build(i, fmt.Sprintf("!%s", hashsum))
157 | var b strings.Builder
158 | z := gzip.NewWriter(&b)
159 | io.Copy(z, strings.NewReader(v_))
160 | z.Close()
161 | s.size += uint(b.Len())
162 | s.values[*(*[32]byte)(unsafe.Pointer(&hashsum[0]))] = b.String()
163 | } else {
164 | if v_ != "" && v_[0] == '!' {
165 | s.Base.build(i, "!"+v_) // escape strings that start with !
166 | } else {
167 | s.Base.build(i, value)
168 | }
169 | }
170 | default:
171 | s.Base.build(i, value)
172 | }
173 | }
174 | func (s *OverlayBlob) finish() {
175 | s.Base.finish()
176 | }
177 | func (s *OverlayBlob) proposeCompression(i uint) ColumnStorage {
178 | // dont't propose another pass
179 | return nil
180 | }
181 |
--------------------------------------------------------------------------------
/storage/persistence-files.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "os"
21 | import "fmt"
22 | import "bufio"
23 | import "bytes"
24 | import "strings"
25 | import "crypto/sha256"
26 | import "encoding/json"
27 | import "github.com/launix-de/memcp/scm"
28 |
29 | type FileStorage struct {
30 | path string
31 | }
32 |
33 | type FileFactory struct {
34 | Basepath string
35 | }
36 |
37 | // helper for long column names
38 | func ProcessColumnName(col string) string {
39 | if len(col) < 64 {
40 | return col
41 | } else {
42 | hashsum := sha256.Sum256([]byte(col))
43 | return fmt.Sprintf("%x", hashsum[:8])
44 | }
45 | }
46 |
47 | func (f *FileFactory) CreateDatabase(schema string) PersistenceEngine {
48 | return &FileStorage{f.Basepath + "/" + schema + "/"}
49 | }
50 |
51 | func (f *FileStorage) ReadSchema() []byte {
52 | jsonbytes, _ := os.ReadFile(f.path + "schema.json")
53 | if len(jsonbytes) == 0 {
54 | // try to load backup (in case of failure while save)
55 | jsonbytes, _ = os.ReadFile(f.path + "schema.json.old")
56 | }
57 | return jsonbytes
58 | }
59 |
60 | func (s *FileStorage) WriteSchema(jsonbytes []byte) {
61 | os.MkdirAll(s.path, 0750)
62 | if stat, err := os.Stat(s.path + "schema.json"); err == nil && stat.Size() > 0 {
63 | // rescue a copy of schema.json in case the schema is not serializable
64 | os.Rename(s.path+"schema.json", s.path+"schema.json.old")
65 | }
66 | f, err := os.Create(s.path + "schema.json")
67 | if err != nil {
68 | panic(err)
69 | }
70 | defer f.Close()
71 | f.Write(jsonbytes)
72 | }
73 |
74 | func (s *FileStorage) ReadColumn(shard string, column string) io.ReadCloser {
75 | //f, err := os.C
76 | f, err := os.Open(s.path + shard + "-" + ProcessColumnName(column))
77 | if err != nil {
78 | // file does not exist -> no data available
79 | return ErrorReader{err}
80 | }
81 | return f
82 | }
83 |
84 | func (s *FileStorage) WriteColumn(shard string, column string) io.WriteCloser {
85 | os.MkdirAll(s.path, 0750)
86 | f, err := os.Create(s.path + shard + "-" + ProcessColumnName(column))
87 | if err != nil {
88 | panic(err)
89 | }
90 | return f
91 | }
92 |
93 | func (s *FileStorage) RemoveColumn(shard string, column string) {
94 | os.Remove(s.path + shard + "-" + ProcessColumnName(column))
95 | }
96 |
97 | func (s *FileStorage) OpenLog(shard string) PersistenceLogfile {
98 | os.MkdirAll(s.path, 0750)
99 | f, err := os.OpenFile(s.path+shard+".log", os.O_RDWR|os.O_CREATE, 0750)
100 | if err != nil {
101 | panic(err)
102 | }
103 | return FileLogfile{f}
104 | }
105 |
106 | func (s *FileStorage) ReplayLog(shard string) (chan interface{}, PersistenceLogfile) {
107 | os.MkdirAll(s.path, 0750)
108 | f, err := os.OpenFile(s.path+shard+".log", os.O_RDWR|os.O_CREATE, 0750)
109 | if err != nil {
110 | panic(err)
111 | }
112 | replay := make(chan interface{}, 8)
113 | fi, _ := f.Stat()
114 | if fi.Size() > 0 {
115 | scanner := bufio.NewScanner(f)
116 | for scanner.Scan() {
117 | b := scanner.Bytes()
118 | if string(b) == "" {
119 | // nop
120 | } else if string(b[0:7]) == "delete " {
121 | var idx uint
122 | json.Unmarshal(b[7:], &idx)
123 | replay <- LogEntryDelete{idx}
124 | } else if string(b[0:7]) == "insert " {
125 | body := string(b[7:])
126 | if pos := strings.Index(body, "]["); pos >= 0 {
127 | // new format: columns ][ values
128 | var cols []string
129 | var values [][]scm.Scmer
130 | json.Unmarshal([]byte(body[:pos+1]), &cols)
131 | json.Unmarshal([]byte(body[pos+1:]), &values)
132 | for i := 0; i < len(values); i++ {
133 | for j := 0; j < len(values[i]); j++ {
134 | values[i][j] = scm.TransformFromJSON(values[i][j])
135 | }
136 | }
137 | replay <- LogEntryInsert{cols, values}
138 | } else {
139 | // fallback/old format: flat array of alternating key/value pairs -> single row
140 | var flat []interface{}
141 | if err := json.Unmarshal([]byte(body), &flat); err != nil {
142 | panic("unknown log sequence: " + string(b))
143 | }
144 | if len(flat)%2 != 0 {
145 | panic("corrupt insert log (odd items): " + string(b))
146 | }
147 | cols := make([]string, 0, len(flat)/2)
148 | row := make([]scm.Scmer, 0, len(flat)/2)
149 | for i := 0; i < len(flat); i += 2 {
150 | cols = append(cols, flat[i].(string))
151 | row = append(row, scm.TransformFromJSON(flat[i+1]))
152 | }
153 | replay <- LogEntryInsert{cols, [][]scm.Scmer{row}}
154 | }
155 | } else {
156 | panic("unknown log sequence: " + string(b))
157 | }
158 | }
159 | close(replay)
160 | } else {
161 | close(replay)
162 | }
163 | return replay, FileLogfile{f}
164 | }
165 |
166 | func (s *FileStorage) RemoveLog(shard string) {
167 | os.Remove(s.path + shard + ".log")
168 | }
169 |
170 | type FileLogfile struct {
171 | w *os.File
172 | }
173 |
174 | func (w FileLogfile) Write(logentry interface{}) {
175 | switch l := logentry.(type) {
176 | case LogEntryDelete:
177 | var b bytes.Buffer
178 | b.WriteString("delete ")
179 | tmp, _ := json.Marshal(l.idx)
180 | b.Write(tmp)
181 | b.WriteString("\n")
182 | w.w.Write(b.Bytes())
183 | case LogEntryInsert:
184 | var b bytes.Buffer
185 | b.WriteString("insert ")
186 | tmp, _ := json.Marshal(l.cols)
187 | b.Write(tmp)
188 | tmp, _ = json.Marshal(l.values)
189 | b.Write(tmp)
190 | b.WriteString("\n")
191 | w.w.Write(b.Bytes())
192 | }
193 | }
194 | func (w FileLogfile) Sync() {
195 | w.w.Sync()
196 | }
197 | func (w FileLogfile) Close() {
198 | w.w.Close()
199 | }
200 |
201 | func (s *FileStorage) Remove() {
202 | os.RemoveAll(s.path)
203 | }
204 |
--------------------------------------------------------------------------------
/storage/persistence.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "github.com/launix-de/memcp/scm"
21 |
22 | /*
23 |
24 | persistence interface
25 |
26 | MemCP allows multiple persistence interfaces for storage devices:
27 | - file system: in data/[dbname]
28 | - all other: in data/[dbname.json]
29 |
30 | A storage interface must implement the following operations:
31 | - load schema.json
32 | - load a column
33 | - load all log entries
34 | - remove a shard (columns and log)
35 | - persist schema.json
36 | - persist a column (shard id, column name)
37 | - persist a log entry
38 |
39 | */
40 |
41 | type PersistenceEngine interface {
42 | ReadSchema() []byte
43 | WriteSchema(schema []byte)
44 | ReadColumn(shard string, column string) io.ReadCloser
45 | WriteColumn(shard string, column string) io.WriteCloser
46 | RemoveColumn(shard string, column string)
47 | OpenLog(shard string) PersistenceLogfile // open for writing
48 | ReplayLog(shard string) (chan interface{}, PersistenceLogfile) // replay existing log
49 | RemoveLog(shard string)
50 | Remove() // delete from storage
51 | }
52 |
53 | type PersistenceLogfile interface {
54 | Write(logentry interface{})
55 | Sync()
56 | Close()
57 | }
58 | type LogEntryDelete struct {
59 | idx uint
60 | }
61 | type LogEntryInsert struct {
62 | cols []string
63 | values [][]scm.Scmer
64 | }
65 |
66 | // for CREATE TABLE
67 | type PersistenceFactory interface {
68 | CreateDatabase(schema string) PersistenceEngine
69 | }
70 |
71 | // Helper function to move databases between storages
72 | func MoveDatabase(src PersistenceEngine, dst PersistenceEngine) {
73 | // TODO: read schema.json
74 | // TODO: for each shard: read columns, read log, transfer to dst
75 | }
76 |
77 | // ErrorReader implements io.ReadCloser
78 | type ErrorReader struct {
79 | e error
80 | }
81 |
82 | func (e ErrorReader) Read([]byte) (int, error) {
83 | // reflects the error (e.g. file not found)
84 | return 0, e.e
85 | }
86 | func (e ErrorReader) Close() error {
87 | // closes without problem
88 | return nil
89 | }
90 |
--------------------------------------------------------------------------------
/storage/settings.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2024 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "github.com/dc0d/onexit"
20 | import "github.com/launix-de/memcp/scm"
21 |
22 | type SettingsT struct {
23 | Backtrace bool
24 | Trace bool
25 | TracePrint bool
26 | PartitionMaxDimensions int
27 | DefaultEngine string
28 | ShardSize uint
29 | AnalyzeMinItems int
30 | AIEstimator bool
31 | }
32 |
33 | var Settings SettingsT = SettingsT{false, false, false, 10, "safe", 60000, 50, false}
34 |
35 | // call this after you filled Settings
36 | func InitSettings() {
37 | scm.SettingsHaveGoodBacktraces = Settings.Backtrace
38 | scm.SetTrace(Settings.Trace)
39 | scm.TracePrint = Settings.TracePrint
40 | onexit.Register(func() { scm.SetTrace(false) }) // close trace file on exit
41 | }
42 |
43 | func ChangeSettings(a ...scm.Scmer) scm.Scmer {
44 | // schema, filename
45 | if len(a) == 0 {
46 | return []scm.Scmer{
47 | "Backtrace", Settings.Backtrace,
48 | "Trace", Settings.Trace,
49 | "TracePrint", Settings.TracePrint,
50 | "PartitionMaxDimensions", int64(Settings.PartitionMaxDimensions),
51 | "DefaultEngine", Settings.DefaultEngine,
52 | "ShardSize", int64(Settings.ShardSize),
53 | "AnalyzeMinItems", int64(Settings.AnalyzeMinItems),
54 | "AIEstimator", Settings.AIEstimator,
55 | }
56 | } else if len(a) == 1 {
57 | switch scm.String(a[0]) {
58 | case "Backtrace":
59 | return Settings.Backtrace
60 | case "Trace":
61 | return Settings.Trace
62 | case "TracePrint":
63 | return Settings.TracePrint
64 | case "PartitionMaxDimensions":
65 | return int64(Settings.PartitionMaxDimensions)
66 | case "DefaultEngine":
67 | return Settings.DefaultEngine
68 | case "ShardSize":
69 | return int64(Settings.ShardSize)
70 | case "AnalyzeMinItems":
71 | return int64(Settings.AnalyzeMinItems)
72 | case "AIEstimator":
73 | return Settings.AIEstimator
74 | default:
75 | panic("unknown setting: " + scm.String(a[0]))
76 | }
77 | } else {
78 | switch scm.String(a[0]) {
79 | case "Backtrace":
80 | scm.SettingsHaveGoodBacktraces = Settings.Backtrace
81 | Settings.Backtrace = scm.ToBool(a[1])
82 | case "Trace":
83 | Settings.Trace = scm.ToBool(a[1])
84 | scm.SetTrace(Settings.Trace)
85 | case "TracePrint":
86 | Settings.TracePrint = scm.ToBool(a[1])
87 | scm.TracePrint = Settings.TracePrint
88 | case "PartitionMaxDimensions":
89 | Settings.PartitionMaxDimensions = scm.ToInt(a[1])
90 | case "DefaultEngine":
91 | Settings.DefaultEngine = scm.String(a[1])
92 | case "ShardSize":
93 | Settings.ShardSize = uint(scm.ToInt(a[1]))
94 | case "AnalyzeMinItems":
95 | Settings.AnalyzeMinItems = scm.ToInt(a[1])
96 | case "AIEstimator":
97 | prev := Settings.AIEstimator
98 | Settings.AIEstimator = scm.ToBool(a[1])
99 | if prev != Settings.AIEstimator {
100 | // start/stop estimator on change
101 | if Settings.AIEstimator {
102 | StartGlobalEstimator()
103 | } else {
104 | StopGlobalEstimator()
105 | }
106 | } else if Settings.AIEstimator {
107 | // Setting already true; if estimator not running, try to (re)start
108 | globalEstimatorMu.Lock()
109 | est := globalEstimator
110 | globalEstimatorMu.Unlock()
111 | if est == nil {
112 | StartGlobalEstimator()
113 | }
114 | }
115 | default:
116 | panic("unknown setting: " + scm.String(a[0]))
117 | }
118 | return true
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/storage/shared_resource.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2025 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | // Shared resource state used for lazy loaded objects.
20 | // COLD: not loaded yet; SHARED: loaded for read; WRITE: loaded and exclusively writable.
21 | type SharedState uint8
22 |
23 | const (
24 | COLD SharedState = 0
25 | SHARED SharedState = 1
26 | WRITE SharedState = 2
27 | )
28 |
29 | // SharedResource marks a lazily loaded resource controllable by a process monitor.
30 | // In the current single-process implementation, these methods primarily coordinate
31 | // lazy load/unload. The returned release() functions are placeholders and can
32 | // evolve into reference counting once a multi-node monitor is added.
33 | type SharedResource interface {
34 | GetState() SharedState
35 | GetRead() func() // acquire read access; returns release()
36 | GetExclusive() func() // acquire exclusive access; returns release()
37 | }
38 |
--------------------------------------------------------------------------------
/storage/storage-float.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "math"
21 | import "unsafe"
22 | import "encoding/binary"
23 | import "github.com/launix-de/memcp/scm"
24 |
25 | // main type for storage: can store any value, is inefficient but does type analysis how to optimize
26 | type StorageFloat struct {
27 | values []float64
28 | }
29 |
30 | func (s *StorageFloat) ComputeSize() uint {
31 | return 16 + 8*uint(len(s.values)) + 24 /* a slice */
32 | }
33 |
34 | func (s *StorageFloat) String() string {
35 | return "float64"
36 | }
37 |
38 | func (s *StorageFloat) Serialize(f io.Writer) {
39 | binary.Write(f, binary.LittleEndian, uint8(12)) // 12 = StorageFloat
40 | io.WriteString(f, "1234567") // fill up to 64 bit alignment
41 | binary.Write(f, binary.LittleEndian, uint64(len(s.values)))
42 | // now at offset 16 begin data
43 | rawdata := unsafe.Slice((*byte)(unsafe.Pointer(&s.values[0])), 8*len(s.values))
44 | f.Write(rawdata)
45 | // free allocated memory and mmap
46 | /* TODO: runtime.SetFinalizer(s, func(s *StorageSCMER) {f.Close()})
47 | newrawdata = mmap.Map(f, RDWR, 0)
48 | s.values = unsafe.Slice((*float64)&newrawdata[16], len(s.values))
49 | */
50 | }
51 | func (s *StorageFloat) Deserialize(f io.Reader) uint {
52 | var dummy [7]byte
53 | f.Read(dummy[:])
54 | var l uint64
55 | binary.Read(f, binary.LittleEndian, &l)
56 | /* TODO: runtime.SetFinalizer(s, func(s *StorageSCMER) { f.Close() })
57 | rawdata := mmap.Map(f, RDWR, 0)
58 | */
59 | rawdata := make([]byte, 8*l)
60 | f.Read(rawdata)
61 | s.values = unsafe.Slice((*float64)(unsafe.Pointer(&rawdata[0])), l)
62 | return uint(l)
63 | }
64 |
65 | func (s *StorageFloat) GetValue(i uint) scm.Scmer {
66 | // NULL is encoded as NaN in SQL
67 | if math.IsNaN(s.values[i]) {
68 | return nil
69 | } else {
70 | return s.values[i]
71 | }
72 | }
73 |
74 | func (s *StorageFloat) scan(i uint, value scm.Scmer) {
75 | }
76 | func (s *StorageFloat) prepare() {
77 | }
78 | func (s *StorageFloat) init(i uint) {
79 | // allocate
80 | s.values = make([]float64, i)
81 | }
82 | func (s *StorageFloat) build(i uint, value scm.Scmer) {
83 | // store
84 | if value == nil {
85 | s.values[i] = math.NaN()
86 | } else {
87 | s.values[i] = value.(float64)
88 | }
89 | }
90 | func (s *StorageFloat) finish() {
91 | }
92 |
93 | func (s *StorageFloat) proposeCompression(i uint) ColumnStorage {
94 | // dont't propose another pass
95 | return nil
96 | }
97 |
--------------------------------------------------------------------------------
/storage/storage-int.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "fmt"
21 | import "unsafe"
22 | import "math/bits"
23 | import "encoding/binary"
24 | import "github.com/launix-de/memcp/scm"
25 |
26 | type StorageInt struct {
27 | chunk []uint64
28 | bitsize uint8
29 | offset int64
30 | max int64 // only of statistic use
31 | count uint64 // only stored for serialization purposes
32 | hasNull bool
33 | null uint64 // which value is null
34 | }
35 |
36 | func (s *StorageInt) Serialize(f io.Writer) {
37 | var hasNull uint8
38 | if s.hasNull {
39 | hasNull = 1
40 | }
41 | binary.Write(f, binary.LittleEndian, uint8(10)) // 10 = StorageInt
42 | binary.Write(f, binary.LittleEndian, uint8(s.bitsize)) // len=2
43 | binary.Write(f, binary.LittleEndian, uint8(hasNull)) // len=3
44 | binary.Write(f, binary.LittleEndian, uint8(0)) // len=4
45 | binary.Write(f, binary.LittleEndian, uint32(0)) // len=8
46 | binary.Write(f, binary.LittleEndian, uint64(len(s.chunk))) // chunk size so we know how many data is left
47 | binary.Write(f, binary.LittleEndian, uint64(s.count))
48 | binary.Write(f, binary.LittleEndian, uint64(s.offset))
49 | binary.Write(f, binary.LittleEndian, uint64(s.null))
50 | if len(s.chunk) > 0 {
51 | f.Write(unsafe.Slice((*byte)(unsafe.Pointer(&s.chunk[0])), 8*len(s.chunk)))
52 | }
53 | }
54 | func (s *StorageInt) Deserialize(f io.Reader) uint {
55 | return s.DeserializeEx(f, false)
56 | }
57 |
58 | func (s *StorageInt) DeserializeEx(f io.Reader, readMagicbyte bool) uint {
59 | var dummy8 uint8
60 | var dummy32 uint32
61 | if readMagicbyte {
62 | binary.Read(f, binary.LittleEndian, &dummy8)
63 | if dummy8 != 10 {
64 | panic(fmt.Sprintf("Tried to deserialize StorageInt(10) from file but found %d", dummy8))
65 | }
66 | }
67 | binary.Read(f, binary.LittleEndian, &s.bitsize)
68 | var hasNull uint8
69 | binary.Read(f, binary.LittleEndian, &hasNull)
70 | s.hasNull = hasNull != 0
71 | binary.Read(f, binary.LittleEndian, &dummy8)
72 | binary.Read(f, binary.LittleEndian, &dummy32)
73 | var chunkcount uint64
74 | binary.Read(f, binary.LittleEndian, &chunkcount)
75 | binary.Read(f, binary.LittleEndian, &s.count)
76 | binary.Read(f, binary.LittleEndian, &s.offset)
77 | binary.Read(f, binary.LittleEndian, &s.null)
78 | if chunkcount > 0 {
79 | rawdata := make([]byte, chunkcount*8)
80 | f.Read(rawdata)
81 | s.chunk = unsafe.Slice((*uint64)(unsafe.Pointer(&rawdata[0])), chunkcount)
82 | }
83 | return uint(s.count)
84 | }
85 |
86 | func toInt(x scm.Scmer) int64 {
87 | switch v := x.(type) {
88 | case float64:
89 | return int64(v)
90 | case int:
91 | return int64(v)
92 | case uint:
93 | return int64(v)
94 | case uint64:
95 | return int64(v)
96 | case int64:
97 | return v
98 | // TODO: 8 bit, 16 bit, 32 bit
99 | default:
100 | return 0
101 | }
102 | }
103 |
104 | func (s *StorageInt) ComputeSize() uint {
105 | return 8*uint(len(s.chunk)) + 64 // management overhead
106 | }
107 |
108 | func (s *StorageInt) String() string {
109 | if s.hasNull {
110 | return fmt.Sprintf("int[%d]NULL", s.bitsize)
111 | } else {
112 | return fmt.Sprintf("int[%d]", s.bitsize)
113 | }
114 | }
115 |
116 | func (s *StorageInt) GetValue(i uint) scm.Scmer {
117 | v := s.GetValueUInt(i)
118 | if s.hasNull && v == s.null {
119 | return nil
120 | }
121 | return int64(v) + s.offset
122 | }
123 |
124 | func (s *StorageInt) GetValueUInt(i uint) uint64 {
125 | bitpos := i * uint(s.bitsize)
126 |
127 | v := s.chunk[bitpos/64] << (bitpos % 64) // align to leftmost position
128 | if bitpos%64+uint(s.bitsize) > 64 {
129 | v = v | s.chunk[bitpos/64+1]>>(64-bitpos%64)
130 | }
131 |
132 | return uint64(v) >> (64 - uint(s.bitsize)) // shift right without sign
133 | }
134 |
135 | func (s *StorageInt) prepare() {
136 | // set up scan
137 | s.bitsize = 0
138 | s.offset = int64(1<<63 - 1)
139 | s.max = -s.offset - 1
140 | s.hasNull = false
141 | }
142 | func (s *StorageInt) scan(i uint, value scm.Scmer) {
143 | // storage is so simple, dont need scan
144 | if value == nil {
145 | s.hasNull = true
146 | return
147 | }
148 | v := toInt(value)
149 | if v < s.offset {
150 | s.offset = v
151 | }
152 | if v > s.max {
153 | s.max = v
154 | }
155 | }
156 | func (s *StorageInt) init(i uint) {
157 | v := s.max - s.offset
158 | if s.hasNull {
159 | // store the value
160 | v = v + 1
161 | s.null = uint64(v)
162 | }
163 | if v == -1 {
164 | // no values at all
165 | v = 0
166 | s.offset = 0
167 | s.null = 0
168 | }
169 | s.bitsize = uint8(bits.Len64(uint64(v)))
170 | if s.bitsize == 0 {
171 | s.bitsize = 1
172 | }
173 | // allocate
174 | s.chunk = make([]uint64, ((i-1)*uint(s.bitsize)+65)/64+1)
175 | s.count = uint64(i)
176 | // fmt.Println("storing bitsize", s.bitsize,"null",s.null,"offset",s.offset)
177 | }
178 | func (s *StorageInt) build(i uint, value scm.Scmer) {
179 | if i >= uint(s.count) {
180 | panic("tried to build StorageInt outside of range")
181 | }
182 | // store
183 | vi := toInt(value)
184 | if value == nil {
185 | // null value
186 | vi = int64(s.null)
187 | } else {
188 | vi = vi - s.offset
189 | }
190 | bitpos := i * uint(s.bitsize)
191 | v := uint64(vi) << (64 - uint(s.bitsize)) // shift value to the leftmost position of 64bit int
192 | s.chunk[bitpos/64] = s.chunk[bitpos/64] | (v >> (bitpos % 64)) // first chunk
193 | if bitpos%64+uint(s.bitsize) > 64 {
194 | s.chunk[bitpos/64+1] = s.chunk[bitpos/64+1] | v<<(64-bitpos%64) // second chunk
195 | }
196 | }
197 | func (s *StorageInt) finish() {
198 | }
199 | func (s *StorageInt) proposeCompression(i uint) ColumnStorage {
200 | // dont't propose another pass
201 | return nil
202 | }
203 |
--------------------------------------------------------------------------------
/storage/storage-prefix.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "fmt"
20 | import "strings"
21 | import "github.com/launix-de/memcp/scm"
22 |
23 | type StoragePrefix struct {
24 | // prefix compression
25 | prefixes StorageInt
26 | prefixdictionary []string // pref
27 | values StorageString // only one depth (but can be cascaded!)
28 | }
29 |
30 | func (s *StoragePrefix) ComputeSize() uint {
31 | return s.prefixes.ComputeSize() + 24 + s.values.ComputeSize()
32 | }
33 |
34 | func (s *StoragePrefix) String() string {
35 | return fmt.Sprintf("prefix[%s]-%s", s.prefixdictionary[1], s.values.String())
36 | }
37 |
38 | func (s *StoragePrefix) GetValue(i uint) scm.Scmer {
39 | innerval := s.values.GetValue(i)
40 | switch v := innerval.(type) {
41 | case string:
42 | return s.prefixdictionary[int64(s.prefixes.GetValueUInt(i))+s.prefixes.offset] + v // append prefix
43 | case nil:
44 | return nil
45 | default:
46 | panic("invalid value in prefix storage")
47 | }
48 | }
49 |
50 | func (s *StoragePrefix) prepare() {
51 | // set up scan
52 | s.prefixes.prepare()
53 | s.values.prepare()
54 | }
55 | func (s *StoragePrefix) scan(i uint, value scm.Scmer) {
56 | var v string
57 | switch v_ := value.(type) {
58 | case string:
59 | v = v_
60 | default:
61 | // NULL
62 | s.values.scan(i, nil)
63 | return
64 | }
65 |
66 | for pfid := len(s.prefixdictionary) - 1; pfid >= 0; pfid-- {
67 | if strings.HasPrefix(v, s.prefixdictionary[pfid]) {
68 | // learn the string stripped from its prefix
69 | s.prefixes.scan(i, pfid)
70 | s.values.scan(i, v[len(s.prefixdictionary[pfid]):])
71 | return
72 | }
73 | }
74 | }
75 | func (s *StoragePrefix) init(i uint) {
76 | s.prefixes.init(i)
77 | s.values.init(i)
78 | }
79 | func (s *StoragePrefix) build(i uint, value scm.Scmer) {
80 | // store
81 | var v string
82 | switch v_ := value.(type) {
83 | case string:
84 | v = v_
85 | default:
86 | // NULL = 1 1
87 | s.values.build(i, nil)
88 | return
89 | }
90 |
91 | for pfid := len(s.prefixdictionary) - 1; pfid >= 0; pfid-- {
92 | if strings.HasPrefix(v, s.prefixdictionary[pfid]) {
93 | // learn the string stripped from its prefix
94 | s.prefixes.build(i, pfid)
95 | s.values.build(i, v[len(s.prefixdictionary[pfid]):])
96 | return
97 | }
98 | }
99 | }
100 | func (s *StoragePrefix) finish() {
101 | s.prefixes.finish()
102 | s.values.finish()
103 | }
104 | func (s *StoragePrefix) proposeCompression(i uint) ColumnStorage {
105 | // dont't propose another pass
106 | // TODO: if s.values proposes a StoragePrefix, build it into our cascade??
107 | return nil
108 | }
109 |
--------------------------------------------------------------------------------
/storage/storage-scmer.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "math"
21 | import "bufio"
22 | import "encoding/json"
23 | import "encoding/binary"
24 | import "github.com/launix-de/memcp/scm"
25 |
26 | // main type for storage: can store any value, is inefficient but does type analysis how to optimize
27 | type StorageSCMER struct {
28 | values []scm.Scmer
29 | onlyInt bool
30 | onlyFloat bool
31 | hasString bool
32 | longStrings int
33 | null uint // amount of NULL values (sparse map!)
34 | numSeq uint // sequence statistics
35 | last1, last2 int64 // sequence statistics
36 | }
37 |
38 | func (s *StorageSCMER) ComputeSize() uint {
39 | // ! size of Scmer values is not considered
40 | var sz uint = 80 + 24
41 | for _, v := range s.values {
42 | sz += scm.ComputeSize(v)
43 | }
44 | return sz
45 | }
46 |
47 | func (s *StorageSCMER) String() string {
48 | return "SCMER"
49 | }
50 |
51 | func (s *StorageSCMER) Serialize(f io.Writer) {
52 | binary.Write(f, binary.LittleEndian, uint8(1)) // 1 = StorageSCMER
53 | binary.Write(f, binary.LittleEndian, uint64(len(s.values)))
54 | for i := 0; i < len(s.values); i++ {
55 | v, err := json.Marshal(s.values[i])
56 | if err != nil {
57 | panic(err)
58 | }
59 | f.Write(v)
60 | f.Write([]byte("\n")) // endline so the serialized file becomes a jsonl file beginning at byte 9
61 | }
62 | }
63 | func (s *StorageSCMER) Deserialize(f io.Reader) uint {
64 | var l uint64
65 | binary.Read(f, binary.LittleEndian, &l)
66 | s.values = make([]scm.Scmer, l)
67 | scanner := bufio.NewScanner(f)
68 | for i := uint64(0); i < l; i++ {
69 | if scanner.Scan() {
70 | var v any
71 | json.Unmarshal(scanner.Bytes(), &v)
72 | s.values[i] = scm.TransformFromJSON(v)
73 | }
74 | }
75 | return uint(l)
76 | }
77 |
78 | func (s *StorageSCMER) GetValue(i uint) scm.Scmer {
79 | return s.values[i]
80 | }
81 |
82 | func (s *StorageSCMER) scan(i uint, value scm.Scmer) {
83 | switch v := value.(type) {
84 | case int64:
85 | v2 := toInt(value)
86 | // analyze whether there is a sequence
87 | if v2-s.last1 == s.last1-s.last2 {
88 | s.numSeq = s.numSeq + 1 // count as sequencable
89 | }
90 | // push sequence detector
91 | s.last2 = s.last1
92 | s.last1 = v2
93 | case float64:
94 | if _, f := math.Modf(v); f != 0.0 {
95 | s.onlyInt = false
96 | } else {
97 | v := toInt(value)
98 | // analyze whether there is a sequence
99 | if v-s.last1 == s.last1-s.last2 {
100 | s.numSeq = s.numSeq + 1 // count as sequencable
101 | }
102 | // push sequence detector
103 | s.last2 = s.last1
104 | s.last1 = v
105 | }
106 | case scm.LazyString:
107 | s.onlyInt = false
108 | s.onlyFloat = false
109 | s.hasString = true
110 | s.longStrings++
111 | case string:
112 | s.onlyInt = false
113 | s.onlyFloat = false
114 | s.hasString = true
115 | if len(v) > 255 {
116 | s.longStrings++
117 | }
118 | case nil:
119 | s.null = s.null + 1 // count NULL
120 | // storageInt can also handle null
121 | default:
122 | s.onlyInt = false
123 | s.onlyFloat = false
124 | }
125 | }
126 | func (s *StorageSCMER) prepare() {
127 | s.onlyInt = true
128 | s.onlyFloat = true
129 | s.hasString = false
130 | }
131 | func (s *StorageSCMER) init(i uint) {
132 | // allocate
133 | s.values = make([]scm.Scmer, i)
134 | }
135 | func (s *StorageSCMER) build(i uint, value scm.Scmer) {
136 | // store
137 | s.values[i] = value
138 | }
139 | func (s *StorageSCMER) finish() {
140 | }
141 |
142 | // soley to StorageSCMER
143 | func (s *StorageSCMER) proposeCompression(i uint) ColumnStorage {
144 | if s.null*100 > i*13 {
145 | // sparse payoff against bitcompressed is at ~13%
146 | if s.longStrings > 2 {
147 | b := new(OverlayBlob)
148 | b.Base = new(StorageSparse)
149 | return b
150 | }
151 | return new(StorageSparse)
152 | }
153 | if s.hasString {
154 | if s.longStrings > 2 {
155 | b := new(OverlayBlob)
156 | b.Base = new(StorageString)
157 | return b
158 | }
159 | return new(StorageString)
160 | }
161 | if s.onlyInt { // TODO: OverlaySCMER?
162 | // propose sequence compression in the form (recordid, startvalue, length, stride) using binary search on recordid for reading
163 | if i > 5 && 2*(i-s.numSeq) < i {
164 | return new(StorageSeq)
165 | }
166 | return new(StorageInt)
167 | }
168 | if s.onlyFloat {
169 | // tight float packing
170 | return new(StorageFloat)
171 | }
172 | if s.null*2 > i {
173 | // sparse payoff against StorageSCMER is at 2.1
174 | return new(StorageSparse)
175 | }
176 | // dont't propose another pass
177 | return nil
178 | }
179 |
--------------------------------------------------------------------------------
/storage/storage-sparse.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2023 Carl-Philip Hänsch
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 |
9 | This program is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | GNU General Public License for more details.
13 |
14 | You should have received a copy of the GNU General Public License
15 | along with this program. If not, see .
16 | */
17 | package storage
18 |
19 | import "io"
20 | import "bufio"
21 | import "encoding/json"
22 | import "encoding/binary"
23 | import "github.com/launix-de/memcp/scm"
24 |
25 | type StorageSparse struct {
26 | i, count uint64
27 | recids StorageInt
28 | values []scm.Scmer // TODO: embed other formats as values (ColumnStorage with a proposeCompression loop)
29 | }
30 |
31 | func (s *StorageSparse) ComputeSize() uint {
32 | var sz uint = 16 + 8 + 24 + s.recids.ComputeSize() + 8*uint(len(s.values))
33 | for _, v := range s.values {
34 | sz += scm.ComputeSize(v)
35 | }
36 | return sz
37 | }
38 |
39 | func (s *StorageSparse) String() string {
40 | return "SCMER-sparse"
41 | }
42 | func (s *StorageSparse) Serialize(f io.Writer) {
43 | binary.Write(f, binary.LittleEndian, uint8(2)) // 2 = StorageSparse
44 | binary.Write(f, binary.LittleEndian, uint64(s.count))
45 | binary.Write(f, binary.LittleEndian, uint64(len(s.values)))
46 | for k, v := range s.values {
47 | vbytes, err := json.Marshal(uint64(s.recids.GetValueUInt(uint(k)) + uint64(s.recids.offset)))
48 | if err != nil {
49 | panic(err)
50 | }
51 | f.Write(vbytes)
52 | f.Write([]byte("\n")) // endline so the serialized file becomes a jsonl file beginning at byte 9
53 | vbytes, err = json.Marshal(v)
54 | if err != nil {
55 | panic(err)
56 | }
57 | f.Write(vbytes)
58 | f.Write([]byte("\n")) // endline so the serialized file becomes a jsonl file beginning at byte 9
59 | }
60 | }
61 | func (s *StorageSparse) Deserialize(f io.Reader) uint {
62 | var l uint64
63 | binary.Read(f, binary.LittleEndian, &l)
64 | s.count = l
65 | var l2 uint64
66 | binary.Read(f, binary.LittleEndian, &l2)
67 | s.values = make([]scm.Scmer, l2)
68 | s.i = l2
69 | scanner := bufio.NewScanner(f)
70 | s.recids.prepare()
71 | s.recids.scan(0, 0)
72 | s.recids.scan(uint(l2-1), l-1)
73 | s.recids.init(uint(l2))
74 | i := 0
75 | for {
76 | var k uint64
77 | if !scanner.Scan() {
78 | break
79 | }
80 | json.Unmarshal(scanner.Bytes(), &k)
81 | if !scanner.Scan() {
82 | break
83 | }
84 | var v any
85 | json.Unmarshal(scanner.Bytes(), &v)
86 | s.recids.build(uint(i), k)
87 | s.values[i] = scm.TransformFromJSON(v)
88 | i++
89 | }
90 | s.recids.finish()
91 | return uint(l)
92 | }
93 |
94 | func (s *StorageSparse) GetValue(i uint) scm.Scmer {
95 | var lower uint = 0
96 | var upper uint = uint(s.i)
97 | for {
98 | if lower == upper {
99 | return nil // sparse value
100 | }
101 | pivot := uint((lower + upper) / 2)
102 | recid := s.recids.GetValueUInt(pivot) + uint64(s.recids.offset)
103 | if recid == uint64(i) {
104 | return s.values[pivot] // found the value
105 | }
106 | if recid < uint64(i) {
107 | lower = pivot + 1
108 | } else {
109 | upper = pivot
110 | }
111 |
112 | }
113 | }
114 |
115 | func (s *StorageSparse) scan(i uint, value scm.Scmer) {
116 | if value != nil {
117 | s.recids.scan(uint(s.i), i)
118 | s.i++
119 | }
120 | }
121 | func (s *StorageSparse) prepare() {
122 | s.i = 0
123 | }
124 | func (s *StorageSparse) init(i uint) {
125 | s.values = make([]scm.Scmer, s.i)
126 | s.count = uint64(i)
127 | s.recids.init(uint(s.i))
128 | s.i = 0
129 | }
130 | func (s *StorageSparse) build(i uint, value scm.Scmer) {
131 | // store
132 | if value != nil {
133 | s.recids.build(uint(s.i), i)
134 | s.values[s.i] = value
135 | s.i++
136 | }
137 | }
138 | func (s *StorageSparse) finish() {
139 | s.recids.finish()
140 | }
141 |
142 | // soley to StorageSparse
143 | func (s *StorageSparse) proposeCompression(i uint) ColumnStorage {
144 | return nil
145 | }
146 |
--------------------------------------------------------------------------------
/tests/01_basic_sql.yaml:
--------------------------------------------------------------------------------
1 | # Basic SQL Operations Test Suite
2 | # Fundamental SQL functionality: arithmetic, comparisons, literals, basic expressions
3 |
4 | metadata:
5 | version: "1.0"
6 | description: "Basic SQL operations and expressions"
7 |
8 | setup: []
9 |
10 | test_cases:
11 |
12 | # === ARITHMETIC EXPRESSIONS ===
13 | - name: "Basic addition"
14 | sql: "SELECT 5 + 3 AS result"
15 | expect:
16 | rows: 1
17 | data:
18 | - result: 8
19 |
20 | - name: "Basic subtraction"
21 | sql: "SELECT 10 - 4 AS result"
22 | expect:
23 | rows: 1
24 | data:
25 | - result: 6
26 |
27 | - name: "Basic multiplication"
28 | sql: "SELECT 6 * 7 AS result"
29 | expect:
30 | rows: 1
31 | data:
32 | - result: 42
33 |
34 | - name: "Basic division"
35 | sql: "SELECT 15 / 3 AS result"
36 | expect:
37 | rows: 1
38 | data:
39 | - result: 5
40 |
41 | - name: "Complex arithmetic with parentheses"
42 | sql: "SELECT (10 + 5) * 2 - 8 / 4 AS result"
43 | expect:
44 | rows: 1
45 | data:
46 | - result: 28
47 |
48 | - name: "Division with decimal result"
49 | sql: "SELECT 1 / 0.1 AS result"
50 | expect:
51 | rows: 1
52 | data:
53 | - result: 10
54 |
55 | - name: "Large number arithmetic"
56 | sql: "SELECT 999999 + 1 AS result"
57 | expect:
58 | rows: 1
59 | data:
60 | - result: 1000000
61 |
62 | - name: "Negative number operations"
63 | sql: "SELECT -5 * -3 AS result"
64 | expect:
65 | rows: 1
66 | data:
67 | - result: 15
68 |
69 | # === COMPARISON OPERATORS ===
70 | - name: "Greater than comparison"
71 | sql: "SELECT 10 > 5 AS result"
72 | expect:
73 | rows: 1
74 | data:
75 | - result: true
76 |
77 | - name: "Less than comparison"
78 | sql: "SELECT 3 < 8 AS result"
79 | expect:
80 | rows: 1
81 | data:
82 | - result: true
83 |
84 | - name: "Greater than or equal"
85 | sql: "SELECT 5 >= 5 AS result"
86 | expect:
87 | rows: 1
88 | data:
89 | - result: true
90 |
91 | - name: "Less than or equal"
92 | sql: "SELECT 4 <= 7 AS result"
93 | expect:
94 | rows: 1
95 | data:
96 | - result: true
97 |
98 | - name: "Equality comparison"
99 | sql: "SELECT 5 = 5 AS result"
100 | expect:
101 | rows: 1
102 | data:
103 | - result: true
104 |
105 | - name: "Inequality comparison"
106 | sql: "SELECT 5 != 3 AS result"
107 | expect:
108 | rows: 1
109 | data:
110 | - result: true
111 |
112 | - name: "String comparison"
113 | sql: "SELECT 'apple' < 'banana' AS result"
114 | expect:
115 | rows: 1
116 | data:
117 | - result: true
118 |
119 | - name: "Comparison with arithmetic"
120 | sql: "SELECT (10 + 5) > (3 * 4) AS result"
121 | expect:
122 | rows: 1
123 | data:
124 | - result: true
125 |
126 | # === LITERALS AND CONSTANTS ===
127 | - name: "Integer literal"
128 | sql: "SELECT 42 AS number"
129 | expect:
130 | rows: 1
131 | data:
132 | - number: 42
133 |
134 | - name: "Negative integer literal"
135 | sql: "SELECT -17 AS number"
136 | expect:
137 | rows: 1
138 | data:
139 | - number: -17
140 |
141 | - name: "Float literal"
142 | sql: "SELECT 3.14159 AS pi"
143 | expect:
144 | rows: 1
145 | data:
146 | - pi: 3.14159
147 |
148 | - name: "String literal with single quotes"
149 | sql: "SELECT 'Hello World' AS message"
150 | expect:
151 | rows: 1
152 | data:
153 | - message: "Hello World"
154 |
155 | - name: "String literal with double quotes"
156 | sql: "SELECT \"Hello SQL\" AS message"
157 | expect:
158 | rows: 1
159 | data:
160 | - message: "Hello SQL"
161 |
162 | - name: "Boolean true literal"
163 | sql: "SELECT true AS bool_val"
164 | expect:
165 | rows: 1
166 | data:
167 | - bool_val: true
168 |
169 | - name: "Boolean false literal"
170 | sql: "SELECT false AS bool_val"
171 | expect:
172 | rows: 1
173 | data:
174 | - bool_val: false
175 |
176 | # === MULTIPLE COLUMNS ===
177 | - name: "Multiple column expressions"
178 | sql: "SELECT 1 + 2 AS sum, 3 * 4 AS product, 'test' AS text"
179 | expect:
180 | rows: 1
181 | data:
182 | - sum: 3
183 | product: 12
184 | text: "test"
185 |
186 | - name: "Mixed data types in columns"
187 | sql: "SELECT 42 AS number, 'text' AS string, true AS boolean, 3.14 AS decimal"
188 | expect:
189 | rows: 1
190 | data:
191 | - number: 42
192 | string: "text"
193 | boolean: true
194 | decimal: 3.14
195 |
196 | cleanup: []
--------------------------------------------------------------------------------
/tests/02_functions.yaml:
--------------------------------------------------------------------------------
1 | # SQL Functions Test Suite
2 | # Built-in functions: math, string, time, and other utility functions
3 |
4 | metadata:
5 | version: "1.0"
6 | description: "SQL built-in functions and expressions"
7 |
8 | setup: []
9 |
10 | test_cases:
11 |
12 | # === MATHEMATICAL FUNCTIONS ===
13 | - name: "FLOOR function"
14 | sql: "SELECT FLOOR(4.7) AS result"
15 | expect:
16 | rows: 1
17 | data:
18 | - result: 4
19 |
20 | - name: "FLOOR with negative number"
21 | sql: "SELECT FLOOR(-4.7) AS result"
22 | expect:
23 | rows: 1
24 | data:
25 | - result: -5
26 |
27 | - name: "CEIL function"
28 | sql: "SELECT CEIL(4.3) AS result"
29 | expect:
30 | rows: 1
31 | data:
32 | - result: 5
33 |
34 | - name: "CEILING function (alias)"
35 | sql: "SELECT CEILING(4.3) AS result"
36 | expect:
37 | rows: 1
38 | data:
39 | - result: 5
40 |
41 | - name: "ROUND function"
42 | sql: "SELECT ROUND(4.6) AS result"
43 | expect:
44 | rows: 1
45 | data:
46 | - result: 5
47 |
48 | - name: "ROUND with .5 value"
49 | sql: "SELECT ROUND(4.5) AS result"
50 | expect:
51 | rows: 1
52 | data:
53 | - result: 5
54 |
55 | - name: "Nested math functions"
56 | sql: "SELECT FLOOR(CEIL(4.3) * 2 + ROUND(1.6)) AS result"
57 | expect:
58 | rows: 1
59 | data:
60 | - result: 12
61 |
62 | # === STRING FUNCTIONS ===
63 | - name: "UPPER function"
64 | sql: "SELECT UPPER('hello world') AS result"
65 | expect:
66 | rows: 1
67 | data:
68 | - result: "HELLO WORLD"
69 |
70 | - name: "LOWER function"
71 | sql: "SELECT LOWER('HELLO WORLD') AS result"
72 | expect:
73 | rows: 1
74 | data:
75 | - result: "hello world"
76 |
77 | - name: "Nested string functions"
78 | sql: "SELECT UPPER(LOWER('HELLO world')) AS result"
79 | expect:
80 | rows: 1
81 | data:
82 | - result: "HELLO WORLD"
83 |
84 | - name: "UPPER with empty string"
85 | sql: "SELECT UPPER('') AS result"
86 | expect:
87 | rows: 1
88 | data:
89 | - result: ""
90 |
91 | - name: "LOWER with empty string"
92 | sql: "SELECT LOWER('') AS result"
93 | expect:
94 | rows: 1
95 | data:
96 | - result: ""
97 |
98 | - name: "Multiple string operations"
99 | sql: "SELECT UPPER('hello') AS upper_case, LOWER('WORLD') AS lower_case"
100 | expect:
101 | rows: 1
102 | data:
103 | - upper_case: "HELLO"
104 | lower_case: "world"
105 |
106 | # === BASE64 FUNCTIONS ===
107 | - name: "TO_BASE64 encodes"
108 | sql: "SELECT TO_BASE64('foo') AS b64"
109 | expect:
110 | rows: 1
111 | data:
112 | - b64: "Zm9v"
113 |
114 | - name: "FROM_BASE64 decodes"
115 | sql: "SELECT FROM_BASE64('Zm9v') AS plain"
116 | expect:
117 | rows: 1
118 | data:
119 | - plain: "foo"
120 |
121 | - name: "Base64 roundtrip"
122 | sql: "SELECT FROM_BASE64(TO_BASE64('Hello, world!')) AS plain"
123 | expect:
124 | rows: 1
125 | data:
126 | - plain: "Hello, world!"
127 |
128 | # === TIME FUNCTIONS ===
129 | - name: "UNIX_TIMESTAMP function exists"
130 | sql: "SELECT UNIX_TIMESTAMP() > 1600000000 AS recent_timestamp"
131 | expect:
132 | rows: 1
133 | data:
134 | - recent_timestamp: true
135 |
136 | - name: "CURRENT_TIMESTAMP function exists"
137 | sql: "SELECT CURRENT_TIMESTAMP() > 1600000000 AS recent_timestamp"
138 | expect:
139 | rows: 1
140 | data:
141 | - recent_timestamp: true
142 |
143 | - name: "Time functions return numbers"
144 | sql: "SELECT UNIX_TIMESTAMP() > 0 AS positive_time"
145 | expect:
146 | rows: 1
147 | data:
148 | - positive_time: true
149 |
150 | # === VECTOR FUNCTIONS ===
151 | - name: "STRING_TO_VECTOR function"
152 | sql: "SELECT STRING_TO_VECTOR('[1,2,3]') AS vector"
153 | expect:
154 | rows: 1
155 |
156 | - name: "VECTOR_TO_STRING function"
157 | sql: "SELECT VECTOR_TO_STRING('[1,2,3]') AS vector_str"
158 | expect:
159 | rows: 1
160 |
161 | # TODO: VECTOR_DISTANCE has type conversion issues
162 | # - name: "VECTOR_DISTANCE function"
163 | # sql: "SELECT VECTOR_DISTANCE('[1,2,3]', '[4,5,6]') AS distance"
164 | # expect:
165 | # error: true
166 | # error_type: "type_conversion"
167 |
168 | # === CASE EXPRESSIONS ===
169 | - name: "Simple CASE expression"
170 | sql: "SELECT CASE WHEN 5 > 3 THEN 'greater' ELSE 'lesser' END AS result"
171 | expect:
172 | rows: 1
173 | data:
174 | - result: "greater"
175 |
176 | - name: "CASE with multiple WHEN clauses"
177 | sql: "SELECT CASE WHEN 2 > 5 THEN 'big' WHEN 2 > 1 THEN 'medium' ELSE 'small' END AS result"
178 | expect:
179 | rows: 1
180 | data:
181 | - result: "medium"
182 |
183 | - name: "Nested CASE expressions"
184 | sql: "SELECT CASE WHEN 5 > 3 THEN CASE WHEN 2 > 1 THEN 'both_true' ELSE 'first_only' END ELSE 'neither' END AS result"
185 | expect:
186 | rows: 1
187 | data:
188 | - result: "both_true"
189 |
190 | - name: "CASE with arithmetic in conditions"
191 | sql: "SELECT CASE WHEN (5 + 3) > (2 * 3) THEN 'math_works' ELSE 'math_broken' END AS result"
192 | expect:
193 | rows: 1
194 | data:
195 | - result: "math_works"
196 |
197 | - name: "CASE with string comparisons"
198 | sql: "SELECT CASE WHEN 'apple' < 'banana' THEN 'alphabetical' ELSE 'reversed' END AS result"
199 | expect:
200 | rows: 1
201 | data:
202 | - result: "alphabetical"
203 |
204 | # === FUNCTION COMBINATIONS ===
205 | - name: "Functions in arithmetic"
206 | sql: "SELECT FLOOR(4.7) + CEIL(4.3) AS result"
207 | expect:
208 | rows: 1
209 | data:
210 | - result: 9
211 |
212 | - name: "Functions with CASE"
213 | sql: "SELECT CASE WHEN FLOOR(4.7) > 3 THEN UPPER('yes') ELSE LOWER('NO') END AS result"
214 | expect:
215 | rows: 1
216 | data:
217 | - result: "YES"
218 |
219 | - name: "Multiple function types"
220 | sql: "SELECT FLOOR(3.7) AS math, UPPER('test') AS string, UNIX_TIMESTAMP() > 0 AS time"
221 | expect:
222 | rows: 1
223 | data:
224 | - math: 3
225 | string: "TEST"
226 | time: true
227 |
228 | cleanup: []
229 |
--------------------------------------------------------------------------------
/tests/03_ddl_operations.yaml:
--------------------------------------------------------------------------------
1 | # DDL Operations Test Suite
2 | # Data Definition Language: CREATE, DROP, ALTER operations with affected_rows
3 |
4 | metadata:
5 | version: "1.0"
6 | description: "DDL operations (CREATE, DROP, ALTER) with affected_rows validation"
7 |
8 | setup:
9 | # Ensure user rows don't pre-exist from earlier runs
10 | - sql: "DELETE FROM system.user WHERE username = 'testuser'"
11 | - sql: "DELETE FROM system.user WHERE username = 'testuser2'"
12 |
13 | test_cases:
14 |
15 | # === CREATE TABLE OPERATIONS ===
16 | - name: "CREATE TABLE basic structure"
17 | sql: "CREATE TABLE users (id INT, name VARCHAR(50))"
18 | expect:
19 | affected_rows: 1
20 |
21 | - name: "CREATE TABLE with multiple columns"
22 | sql: "CREATE TABLE products (id INT, name VARCHAR(100), price DECIMAL(10,2), description TEXT)"
23 | expect:
24 | affected_rows: 1
25 |
26 | - name: "CREATE TABLE with constraints"
27 | sql: "CREATE TABLE orders (id INT PRIMARY KEY, user_id INT, total DECIMAL(10,2), status VARCHAR(20))"
28 | expect:
29 | affected_rows: 1
30 |
31 | - name: "CREATE TABLE with IF NOT EXISTS"
32 | sql: "CREATE TABLE IF NOT EXISTS customers (id INT, name VARCHAR(100))"
33 | expect:
34 | affected_rows: 1
35 |
36 | - name: "CREATE TABLE with IF NOT EXISTS (already exists)"
37 | sql: "CREATE TABLE IF NOT EXISTS users (id INT, email VARCHAR(200))"
38 | expect:
39 | affected_rows: 1
40 |
41 | - name: "CREATE TABLE with various data types"
42 | sql: |
43 | CREATE TABLE test_types (
44 | id INT,
45 | name VARCHAR(100),
46 | price DECIMAL(10,2),
47 | active BOOLEAN,
48 | created_at TIMESTAMP,
49 | notes TEXT
50 | )
51 | expect:
52 | affected_rows: 1
53 |
54 | - name: "CREATE TABLE with ENGINE specification"
55 | sql: "CREATE TABLE memory_table (id INT, data VARCHAR(100)) ENGINE=MEMORY"
56 | expect:
57 | affected_rows: 1
58 |
59 | # === SHOW OPERATIONS ===
60 | - name: "SHOW DATABASES"
61 | sql: "SHOW DATABASES"
62 | expect: {} # Environment dependent, just check it runs
63 |
64 | - name: "SHOW TABLES"
65 | sql: "SHOW TABLES"
66 | expect: {} # Environment dependent, just check it runs
67 |
68 | - name: "SHOW TABLES FROM specific database"
69 | sql: "SHOW TABLES FROM ddl_test"
70 | expect: {} # Environment dependent, just check it runs
71 |
72 | - name: "DESCRIBE table structure"
73 | sql: "DESCRIBE users"
74 | expect: {} # Just check it runs successfully
75 |
76 | - name: "SHOW FULL COLUMNS"
77 | sql: "SHOW FULL COLUMNS FROM users"
78 | expect: {} # Just check it runs successfully
79 |
80 | - name: "SHOW TABLE STATUS"
81 | sql: "SHOW TABLE STATUS"
82 | expect: {} # Just check it runs successfully
83 |
84 | - name: "SHOW VARIABLES"
85 | sql: "SHOW VARIABLES"
86 | expect: {} # Environment dependent, just check it runs
87 |
88 | # === SESSION VARIABLES ===
89 | - name: "SET session variable"
90 | sql: "SET @test_var = 'hello'"
91 | expect: {} # Session variables return assigned value, not affected_rows
92 |
93 | - name: "SET session variable with SESSION keyword"
94 | sql: "SET SESSION @another_var = 42"
95 | expect: {} # Session variables return assigned value, not affected_rows
96 |
97 | - name: "SET session variable with expression"
98 | sql: "SET @calculated = 5 + 3 * 2"
99 | expect: {} # Session variables return assigned value
100 |
101 | - name: "SET multiple session variables"
102 | sql: "SET @var1 = 'test', @var2 = 123"
103 | expect: {} # Multiple session variables
104 |
105 | # === GLOBAL VARIABLES ===
106 | - name: "SET complex expression"
107 | sql: "SET @complex = 5 + 3 * 2"
108 | expect: {} # Just check it runs successfully
109 |
110 | - name: "SET string concatenation"
111 | sql: "SET @text = 'hello'"
112 | expect: {} # Just check it runs successfully
113 |
114 | # TODO: DROP TABLE operations (when DROP functionality is stable)
115 | # - name: "DROP TABLE with IF EXISTS"
116 | # sql: "DROP TABLE IF EXISTS test_drop"
117 | # expect:
118 | # affected_rows: 0 # Table doesn't exist
119 |
120 | # - name: "DROP existing TABLE"
121 | # sql: "DROP TABLE memory_table"
122 | # expect:
123 | # affected_rows: 1
124 |
125 | # TODO: ALTER TABLE operations (when ALTER functionality is stable)
126 | # - name: "ALTER TABLE add column"
127 | # sql: "ALTER TABLE users ADD COLUMN email VARCHAR(200)"
128 | # expect:
129 | # affected_rows: 1
130 |
131 | # - name: "ALTER TABLE modify column"
132 | # sql: "ALTER TABLE users MODIFY COLUMN name VARCHAR(200)"
133 | # expect:
134 | # affected_rows: 1
135 |
136 | # TODO: CREATE INDEX operations
137 | # - name: "CREATE INDEX"
138 | # sql: "CREATE INDEX idx_name ON users (name)"
139 | # expect:
140 | # affected_rows: 1
141 |
142 | # === CREATE USER OPERATIONS ===
143 | - name: "CREATE USER basic"
144 | sql: "CREATE USER testuser"
145 | expect:
146 | affected_rows: 1
147 |
148 | - name: "CREATE USER with password"
149 | sql: "CREATE USER testuser2 IDENTIFIED BY 'password123'"
150 | expect:
151 | affected_rows: 1
152 |
153 | # TODO: ALTER USER operations
154 | # - name: "ALTER USER password"
155 | # sql: "ALTER USER testuser IDENTIFIED BY 'newpassword'"
156 | # expect:
157 | # affected_rows: 1
158 |
159 | cleanup:
160 | # Clean up created users to keep suite idempotent
161 | - sql: "DELETE FROM system.user WHERE username = 'testuser'"
162 | - sql: "DELETE FROM system.user WHERE username = 'testuser2'"
163 |
--------------------------------------------------------------------------------
/tests/04_table_operations.yaml:
--------------------------------------------------------------------------------
1 | # Minimal Table Operations Test
2 | # Testing just the most basic operations to isolate database corruption issue
3 |
4 | metadata:
5 | version: "1.0"
6 | description: "Minimal table operations test for debugging"
7 |
8 | cleanup:
9 | - action: "Clean up test table"
10 | sql: "DROP TABLE IF EXISTS simple_test"
11 |
12 | setup:
13 | - action: "Drop pre-existing table"
14 | sql: "DROP TABLE IF EXISTS simple_test"
15 | - action: "CREATE TABLE simple_test"
16 | sql: "CREATE TABLE simple_test (id INT, name VARCHAR(50))"
17 |
18 | test_cases:
19 | - name: "Simple INSERT test"
20 | sql: "INSERT INTO simple_test (id, name) VALUES (1, 'test')"
21 | expect:
22 | affected_rows: 1
23 |
24 | - name: "Simple SELECT test"
25 | sql: "SELECT * FROM simple_test"
26 | expect:
27 | rows: 1
28 |
29 | - name: "Schema-qualified INSERT"
30 | sql: "INSERT INTO `memcp-tests`.simple_test (id, name) VALUES (2, 'q')"
31 | expect:
32 | affected_rows: 1
33 |
34 | - name: "Row count after qualified INSERT"
35 | sql: "SELECT COUNT(*) AS c FROM simple_test"
36 | expect:
37 | rows: 1
38 | data:
39 | - c: 2
40 |
41 | - name: "INSERT with trailing semicolon"
42 | sql: "INSERT INTO simple_test (id, name) VALUES (3, 'semi');"
43 | expect:
44 | affected_rows: 1
45 |
46 | - name: "Row count after trailing-semicolon INSERT"
47 | sql: "SELECT COUNT(*) AS c FROM simple_test"
48 | expect:
49 | rows: 1
50 | data:
51 | - c: 3
52 |
--------------------------------------------------------------------------------
/tests/05_advanced_queries.yaml:
--------------------------------------------------------------------------------
1 | # Advanced Query Test Suite
2 | # Complex queries, JOINs, subqueries, GROUP BY, aggregates (many marked TODO until supported)
3 |
4 | metadata:
5 | version: "1.0"
6 | description: "Advanced SQL queries and aggregation features"
7 |
8 | setup:
9 | - action: "CREATE TABLE customers"
10 | sql: |
11 | CREATE TABLE customers (
12 | id INT,
13 | name VARCHAR(100),
14 | city VARCHAR(50),
15 | country VARCHAR(50)
16 | )
17 | - action: "CREATE TABLE orders"
18 | sql: |
19 | CREATE TABLE orders (
20 | id INT,
21 | customer_id INT,
22 | amount DECIMAL(10,2),
23 | order_date VARCHAR(20),
24 | status VARCHAR(20)
25 | )
26 | - action: "CREATE TABLE products"
27 | sql: |
28 | CREATE TABLE products (
29 | id INT,
30 | name VARCHAR(100),
31 | category VARCHAR(50),
32 | price DECIMAL(10,2)
33 | )
34 |
35 | test_cases:
36 |
37 | # === SETUP TEST DATA ===
38 | - name: "Insert customer test data"
39 | sql: |
40 | INSERT INTO customers (id, name, city, country) VALUES
41 | (1, 'John Doe', 'New York', 'USA'),
42 | (2, 'Jane Smith', 'London', 'UK'),
43 | (3, 'Hans Mueller', 'Berlin', 'Germany'),
44 | (4, 'Marie Dupont', 'Paris', 'France'),
45 | (5, 'Carlos Rodriguez', 'Madrid', 'Spain')
46 | expect:
47 | affected_rows: 5
48 |
49 | - name: "Insert order test data"
50 | sql: |
51 | INSERT INTO orders (id, customer_id, amount, order_date, status) VALUES
52 | (1, 1, 299.99, '2024-01-15', 'completed'),
53 | (2, 2, 150.00, '2024-02-01', 'completed'),
54 | (3, 3, 500.00, '2024-01-20', 'pending'),
55 | (4, 4, 75.50, '2024-02-10', 'completed'),
56 | (5, 5, 1200.00, '2024-02-15', 'shipped')
57 | expect:
58 | affected_rows: 5
59 |
60 | - name: "Insert product test data"
61 | sql: |
62 | INSERT INTO products (id, name, category, price) VALUES
63 | (1, 'Laptop', 'Electronics', 999.99),
64 | (2, 'Mouse', 'Electronics', 25.99),
65 | (3, 'Keyboard', 'Electronics', 79.99),
66 | (4, 'Chair', 'Furniture', 199.99),
67 | (5, 'Desk', 'Furniture', 299.99)
68 | expect:
69 | affected_rows: 5
70 |
71 | # === BASIC VERIFICATION ===
72 | - name: "Simple COUNT verification"
73 | sql: "SELECT * FROM customers"
74 | expect: {}
75 |
76 | # === WORKING FEATURES VERIFICATION ===
77 | - name: "Verify all customer data inserted correctly"
78 | sql: "SELECT * FROM customers WHERE id = 1"
79 | expect: {}
80 |
81 | cleanup:
82 | - action: "Clean up customers table"
83 | sql: "DROP TABLE IF EXISTS customers"
84 | - action: "Clean up orders table"
85 | sql: "DROP TABLE IF EXISTS orders"
86 | - action: "Clean up products table"
87 | sql: "DROP TABLE IF EXISTS products"
--------------------------------------------------------------------------------
/tests/07_error_cases.yaml:
--------------------------------------------------------------------------------
1 | metadata:
2 | version: "1.0"
3 | description: "Error cases that should fail"
4 |
5 | setup: []
6 |
7 | test_cases:
8 | - name: "Syntax error - column id is not defined"
9 | sql: "SELECT * WHERE id = 1"
10 | expect:
11 | error: true
12 |
13 | - name: "Syntax error - invalid SQL statement"
14 | sql: "SELCT * FROM users"
15 | expect:
16 | error: true
17 |
18 | - name: "Non-existing table"
19 | sql: "SELECT * FROM non_existing_table"
20 | expect:
21 | error: true
22 |
23 | - name: "Non-existing column"
24 | sql: "SELECT non_existing_column FROM users"
25 | expect:
26 | error: true
27 |
28 | - name: "Invalid column reference in WHERE"
29 | sql: "SELECT * FROM users WHERE invalid_column = 1"
30 | expect:
31 | error: true
32 |
33 | - name: "Invalid function name"
34 | sql: "SELECT INVALID_FUNCTION()"
35 | expect:
36 | error: true
37 |
38 | - name: "Duplicate table creation (MemCP allows duplicate CREATE TABLE)"
39 | sql: "CREATE TABLE users (id INTEGER)"
40 | expect: {}
41 |
42 | - name: "Drop non-existing table"
43 | sql: "DROP TABLE non_existing_table"
44 | expect:
45 | error: true
46 |
47 | - name: "Missing closing parenthesis"
48 | sql: "SELECT * FROM (SELECT * FROM users"
49 | expect:
50 | error: true
51 |
52 | cleanup: []
53 |
--------------------------------------------------------------------------------
/tests/08_rdf_sparql.yaml:
--------------------------------------------------------------------------------
1 | metadata:
2 | version: "1.0"
3 | description: "RDF and SPARQL query testing with TTL data"
4 |
5 | setup: []
6 |
7 | test_cases:
8 | # Error cases for SPARQL - parser should reject these
9 | - name: "Invalid SPARQL syntax - missing braces"
10 | sparql: |
11 | SELECT ?name WHERE
12 | ?person ?name .
13 | expect:
14 | error: true
15 |
16 | - name: "Invalid SPARQL syntax - malformed SELECT keyword"
17 | sparql: |
18 | SELCT ?name WHERE {
19 | ?person ?name .
20 | }
21 | expect:
22 | error: true
23 |
24 | - name: "Invalid SPARQL syntax - missing WHERE clause"
25 | sparql: |
26 | SELECT ?name {
27 | ?person ?name .
28 | }
29 | expect:
30 | error: true
31 |
32 | cleanup: []
--------------------------------------------------------------------------------
/tests/09_joins.yaml:
--------------------------------------------------------------------------------
1 | # Minimal Table Operations Test - JOIN Version
2 | # Testing just the most basic operations to isolate database corruption issue
3 | # Includes JOIN operations
4 |
5 | metadata:
6 | version: "1.0"
7 | description: "Minimal table operations test for debugging, including JOINs"
8 |
9 | setup:
10 | - name: "Clean up test tables"
11 | sql: "DROP TABLE IF EXISTS simple_test"
12 | - name: "Clean up test tables"
13 | sql: "DROP TABLE IF EXISTS related_data"
14 | - name: "init 1"
15 | sql: "CREATE TABLE simple_test (id INT, name VARCHAR(50))"
16 | - name: "init2"
17 | sql: "CREATE TABLE related_data (id INT, simple_test_id INT, description VARCHAR(100))"
18 | - name: "init3"
19 | sql: "INSERT INTO simple_test (id, name) VALUES (1, 'test'), (2, 'another')"
20 |
21 | test_cases:
22 | - name: "Simple INSERT"
23 | sql: "INSERT INTO related_data (id, simple_test_id, description) VALUES (101, 1, 'Description for test'), (102, 2, 'Another description')"
24 | expect:
25 | affected_rows: 2 #Should be two inserts
26 |
27 | - name: "SELECT simple test"
28 | sql: "SELECT * FROM related_data WHERE simple_test_id = 1"
29 | expect:
30 | rows: 1 # Should return the row where simple_test_id is 1
31 |
32 | - name: "SELECT JOIN test with multiple records"
33 | sql: "SELECT s.name, r.description FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id WHERE s.id = 1"
34 | expect:
35 | rows: 1
36 | expected_results:
37 | - name: 'test'
38 | description: 'Description for test'
39 |
40 |
41 | # - name: "Complex SELECT JOIN test with filtering"
42 | # sql: "SELECT s.name, r.description FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id WHERE s.id IN (1, 2) AND r.description LIKE '%description%'"
43 | # expect:
44 | # rows: 2 # Should return rows where id is 1 or 2 AND description contains "description"
45 | # expected_results:
46 | # - name: 'test'
47 | # description: 'Description for test'
48 | # - name: 'another'
49 | # description: 'Another description'
50 | #
51 | # - name: "SELECT JOIN with COUNT"
52 | # sql: "SELECT COUNT(*) FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id"
53 | # expect:
54 | # result: 2 #Should return the number of records related to simple_test
55 | #
56 | # - name: "SELECT JOIN with DISTINCT"
57 | # sql: "SELECT DISTINCT r.description FROM simple_test s JOIN related_data r ON s.id = r.simple_test_id"
58 | # expect:
59 | # rows: 2
60 | # exptected_results:
61 | # - description: "Description for test"
62 | # - description: "Another description"
63 | #
64 |
--------------------------------------------------------------------------------
/tests/10_nulls.yaml:
--------------------------------------------------------------------------------
1 | # NULLs and COALESCE Test Suite
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "NULL literal handling and COALESCE; COUNT with NULLs"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS null_test"
9 | - sql: "CREATE TABLE null_test (id INT, name VARCHAR(50))"
10 | - sql: "INSERT INTO null_test (id, name) VALUES (1, 'alpha'), (2, NULL), (3, NULL)"
11 |
12 | test_cases:
13 | - name: "NULL literal"
14 | sql: "SELECT NULL AS v"
15 | expect:
16 | rows: 1
17 |
18 | - name: "COALESCE picks first non-null"
19 | sql: "SELECT COALESCE(NULL, 'x', 'y') AS v"
20 | expect:
21 | rows: 1
22 | data:
23 | - v: "x"
24 |
25 | - name: "COUNT ignores NULL values"
26 | sql: "SELECT COUNT(name) AS c FROM null_test"
27 | expect:
28 | rows: 1
29 | data:
30 | - c: 1
31 |
32 | - name: "COUNT(*) counts all rows"
33 | sql: "SELECT COUNT(*) AS c FROM null_test"
34 | expect:
35 | rows: 1
36 | data:
37 | - c: 3
38 |
39 | cleanup:
40 | - sql: "DROP TABLE IF EXISTS null_test"
41 |
--------------------------------------------------------------------------------
/tests/11_group_having.yaml:
--------------------------------------------------------------------------------
1 | # GROUP BY and HAVING Test Suite
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Aggregation with GROUP BY and HAVING"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS sales"
9 | - sql: "CREATE TABLE sales (dept VARCHAR(20), amount INT)"
10 | - sql: |
11 | INSERT INTO sales (dept, amount) VALUES
12 | ('A', 10), ('A', 20), ('B', 5)
13 |
14 | test_cases:
15 | - name: "Group and count with HAVING"
16 | sql: "SELECT dept, COUNT(*) AS c FROM sales GROUP BY dept HAVING COUNT(*) > 1 ORDER BY dept"
17 | expect:
18 | rows: 1
19 | data:
20 | - dept: "A"
21 | c: 2
22 |
23 | - name: "Group with SUM and ORDER"
24 | sql: "SELECT dept, SUM(amount) AS s FROM sales GROUP BY dept ORDER BY SUM(amount) DESC"
25 | expect:
26 | rows: 2
27 |
28 | cleanup:
29 | - sql: "DROP TABLE IF EXISTS sales"
30 |
--------------------------------------------------------------------------------
/tests/12_joins_outer.yaml:
--------------------------------------------------------------------------------
1 | # Outer Join Semantics Test Suite
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "LEFT/RIGHT JOIN with missing matches and row multiplicity"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS parent"
9 | - sql: "DROP TABLE IF EXISTS child"
10 | - sql: "CREATE TABLE parent (id INT, label VARCHAR(20))"
11 | - sql: "CREATE TABLE child (pid INT, note VARCHAR(20))"
12 | - sql: "INSERT INTO parent (id, label) VALUES (1, 'p1'), (2, 'p2')"
13 | - sql: "INSERT INTO child (pid, note) VALUES (2, 'c2')"
14 |
15 | test_cases:
16 | # TODO(memcp): LEFT JOIN with unmatched row returns repeated NULLs (note: [null,null]).
17 | # - name: "LEFT JOIN preserves left rows"
18 | # sql: "SELECT p.id, c.note AS note FROM parent p LEFT JOIN child c ON p.id = c.pid ORDER BY p.id"
19 | # expect:
20 | # rows: 2
21 | # data:
22 | # - id: 1
23 | # note: null
24 | # - id: 2
25 | # note: "c2"
26 |
27 | - name: "RIGHT JOIN mirrors behavior"
28 | sql: "SELECT p.id, c.note AS note FROM parent p RIGHT JOIN child c ON p.id = c.pid ORDER BY p.id"
29 | expect:
30 | rows: 1
31 | data:
32 | - id: 2
33 | note: "c2"
34 |
35 | cleanup:
36 | - sql: "DROP TABLE IF EXISTS child"
37 | - sql: "DROP TABLE IF EXISTS parent"
38 |
--------------------------------------------------------------------------------
/tests/13_subselects.yaml:
--------------------------------------------------------------------------------
1 | # Subselects in FROM (Derived Tables)
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Use SELECT in FROM (basic)"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS customers"
9 | - sql: "DROP TABLE IF EXISTS orders"
10 | - sql: "CREATE TABLE customers (id INT, name VARCHAR(50))"
11 | - sql: "CREATE TABLE orders (id INT, customer_id INT)"
12 | - sql: "INSERT INTO customers (id, name) VALUES (1, 'Alice'), (2, 'Bob')"
13 | - sql: "INSERT INTO orders (id, customer_id) VALUES (1,1), (2,1), (3,2)"
14 |
15 | test_cases:
16 | - name: "Derived table with filter"
17 | sql: "SELECT t.a FROM (SELECT 1 AS a) t WHERE t.a = 1"
18 | expect:
19 | rows: 1
20 | data:
21 | - a: 1
22 |
23 | # TODO(memcp): Subselect-in-FROM (derived tables) fails with "Unknown function: ".
24 | # - name: "Join with aggregated subselect"
25 | # sql: |
26 | # SELECT c.name, o.cnt
27 | # FROM customers c
28 | # JOIN (SELECT customer_id, COUNT(*) AS cnt FROM orders GROUP BY customer_id) o
29 | # ON c.id = o.customer_id
30 | # ORDER BY o.cnt DESC
31 | # LIMIT 1
32 | # expect:
33 | # rows: 1
34 | # data:
35 | # - name: "Alice"
36 | # cnt: 2
37 |
38 | cleanup:
39 | - sql: "DROP TABLE IF EXISTS orders"
40 | - sql: "DROP TABLE IF EXISTS customers"
41 |
--------------------------------------------------------------------------------
/tests/14_order_limit.yaml:
--------------------------------------------------------------------------------
1 | # ORDER BY and LIMIT/OFFSET
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Ordering by multiple keys with pagination"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS ord_t"
9 | - sql: "CREATE TABLE ord_t (a INT, b INT)"
10 | - sql: |
11 | INSERT INTO ord_t (a,b) VALUES
12 | (1,2),(1,1),(2,2),(2,0),(3,1)
13 |
14 | test_cases:
15 | - name: "Top-1 by a DESC"
16 | sql: "SELECT a, b FROM ord_t ORDER BY a DESC LIMIT 1"
17 | expect:
18 | rows: 1
19 | data:
20 | - a: 3
21 | b: 1
22 |
23 | - name: "Top-1 by a ASC"
24 | sql: "SELECT a FROM ord_t ORDER BY a ASC LIMIT 1"
25 | expect:
26 | rows: 1
27 | data:
28 | - a: 1
29 |
30 | - name: "Top-2 by a DESC (no offset)"
31 | sql: "SELECT a FROM ord_t ORDER BY a DESC LIMIT 2"
32 | expect:
33 | rows: 2
34 | data:
35 | - a: 3
36 |
37 | - name: "Multi-key order with limit/offset"
38 | sql: "SELECT a, b FROM ord_t ORDER BY a DESC, b ASC LIMIT 2 OFFSET 1"
39 | expect:
40 | rows: 2
41 | data:
42 | - a: 2
43 | b: 0
44 | - a: 2
45 | b: 2
46 |
47 | cleanup:
48 | - sql: "DROP TABLE IF EXISTS ord_t"
49 |
--------------------------------------------------------------------------------
/tests/15_dml.yaml:
--------------------------------------------------------------------------------
1 | # DML: UPDATE and DELETE
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Verify affected_rows and row changes for UPDATE/DELETE"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS dml_t"
9 | - sql: "CREATE TABLE dml_t (id INT, x INT, flag INT)"
10 | - sql: "INSERT INTO dml_t (id, x, flag) VALUES (1, 10, 1), (2, 20, 0), (3, 30, 0)"
11 |
12 | test_cases:
13 | - name: "UPDATE with predicate"
14 | sql: "UPDATE dml_t SET x = x + 1 WHERE id < 3"
15 | expect:
16 | affected_rows: 2
17 |
18 | - name: "DELETE with predicate"
19 | sql: "DELETE FROM dml_t WHERE flag = 0"
20 | expect:
21 | affected_rows: 2
22 |
23 | - name: "Remaining row count"
24 | sql: "SELECT COUNT(*) AS c FROM dml_t"
25 | expect:
26 | rows: 1
27 | data:
28 | - c: 1
29 |
30 | cleanup:
31 | - sql: "DROP TABLE IF EXISTS dml_t"
32 |
33 |
--------------------------------------------------------------------------------
/tests/16_group_by_sum.yaml:
--------------------------------------------------------------------------------
1 | # GROUP BY SUM Test Suite
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Validate SELECT a, SUM(b) FROM tbl GROUP BY a"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS tbl"
9 | - sql: "CREATE TABLE tbl (a INT, b INT)"
10 | - sql: |
11 | INSERT INTO tbl (a, b) VALUES
12 | (1, 10), (1, 20), (2, 5), (3, 7)
13 |
14 | test_cases:
15 | - name: "Group sum by a (exact query)"
16 | sql: "SELECT a, SUM(b) FROM tbl GROUP BY a"
17 | expect:
18 | rows: 3
19 |
20 | - name: "Group sum by a with ORDER"
21 | sql: "SELECT a, SUM(b) AS s FROM tbl GROUP BY a ORDER BY a"
22 | expect:
23 | rows: 3
24 | data:
25 | - a: 1
26 | s: 30
27 | - a: 2
28 | s: 5
29 | - a: 3
30 | s: 7
31 |
32 | cleanup:
33 | - sql: "DROP TABLE IF EXISTS tbl"
34 |
--------------------------------------------------------------------------------
/tests/17_strings_like.yaml:
--------------------------------------------------------------------------------
1 | # LIKE operator and patterns
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Pattern matching with % and _"
6 |
7 | setup: []
8 |
9 | test_cases:
10 | - name: "Prefix match"
11 | sql: "SELECT 'alpha' LIKE 'a%' AS r"
12 | expect:
13 | rows: 1
14 | data:
15 | - r: true
16 |
17 | - name: "Single-char match"
18 | sql: "SELECT 'a1' LIKE 'a_' AS r"
19 | expect:
20 | rows: 1
21 | data:
22 | - r: true
23 |
24 | - name: "No match"
25 | sql: "SELECT 'beta' LIKE 'a%' AS r"
26 | expect:
27 | rows: 1
28 | data:
29 | - r: false
30 |
31 |
--------------------------------------------------------------------------------
/tests/18_unnesting.yaml:
--------------------------------------------------------------------------------
1 | # Unnesting: Derived Tables (Basic)
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Unnesting arbitrary queries: basic derived table"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS ord_t"
9 | - sql: "CREATE TABLE ord_t (a INT)"
10 | - sql: "INSERT INTO ord_t (a) VALUES (1),(3),(2),(5),(4)"
11 |
12 | test_cases:
13 | - name: "Select from simple derived table"
14 | sql: "SELECT * FROM (SELECT 1 AS a) t"
15 | expect:
16 | rows: 1
17 | data:
18 | - a: 1
19 |
20 | - name: "ORDER ignored without LIMIT in derived table"
21 | sql: "SELECT * FROM (SELECT a FROM ord_t ORDER BY a DESC) t"
22 | expect:
23 | rows: 5
24 |
25 | # TODO: add correlated subselect in FROM (LATERAL-style)
26 | # - name: "Derived table with correlation"
27 | # sql: |
28 | # SELECT x, y
29 | # FROM (SELECT 1 AS x) d
30 | # JOIN (SELECT x+1 AS y) t
31 | # expect:
32 | # rows: 1
33 |
34 | # TODO: ORDER BY in subquery ignored without LIMIT
35 | # - name: "ORDER ignored without LIMIT in derived table"
36 | # sql: "SELECT * FROM (SELECT 2 AS a UNION ALL SELECT 1 AS a ORDER BY a DESC) t"
37 | # expect: {}
38 |
39 | # TODO: ORDER BY with LIMIT in derived table affects rows
40 | # - name: "ORDER with LIMIT in derived table"
41 | # sql: "SELECT * FROM (SELECT 2 AS a UNION ALL SELECT 1 AS a ORDER BY a DESC LIMIT 1) t"
42 | # expect:
43 | # rows: 1
44 | # data:
45 | # - a: 2
46 |
47 | cleanup: []
48 |
--------------------------------------------------------------------------------
/tests/19_subselect_order.yaml:
--------------------------------------------------------------------------------
1 | # ORDER BY on derived-table alias
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "ORDER BY t.col where t is a subselect alias"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS temperature"
9 | - sql: "DROP TABLE IF EXISTS sensor"
10 | - sql: "CREATE TABLE temperature (date INT, sensor INT, temp INT)"
11 | - sql: "CREATE TABLE sensor (ID INT, name VARCHAR(64))"
12 | - sql: |
13 | INSERT INTO temperature (date, sensor, temp) VALUES
14 | (10, 1, 21),
15 | (20, 1, 22),
16 | (15, 2, 23)
17 | - sql: |
18 | INSERT INTO sensor (ID, name) VALUES
19 | (1, 'S1'),
20 | (2, 'S2')
21 |
22 | test_cases:
23 | - name: "Order by alias-qualified column"
24 | sql: |
25 | SELECT `t`.*
26 | FROM (SELECT `date` AS `date` FROM `temperature`) AS `t`
27 | ORDER BY `t`.`date` DESC
28 | LIMIT 2 OFFSET 0
29 | expect:
30 | rows: 2
31 | data:
32 | - date: 20
33 | - date: 15
34 |
35 | - name: "Derived alias WHERE on base column"
36 | sql: |
37 | SELECT t.*
38 | FROM (
39 | SELECT `date` AS `date`, `sensor` AS `sensor`
40 | FROM `temperature`
41 | ) AS t
42 | WHERE t.sensor = 1
43 | ORDER BY t.date DESC
44 | LIMIT 2
45 | expect:
46 | rows: 2
47 | data:
48 | - date: 20
49 | sensor: 1
50 | - date: 10
51 | sensor: 1
52 |
53 | - name: "Derived alias WHERE inside subselect (qualified table)"
54 | sql: |
55 | SELECT t.*
56 | FROM (
57 | SELECT `date` AS `date`, `sensor` AS `sensor`
58 | FROM `temperature`
59 | WHERE temperature.`sensor` = 1
60 | ) AS t
61 | ORDER BY t.date DESC
62 | LIMIT 2
63 | expect:
64 | rows: 2
65 | data:
66 | - date: 20
67 | sensor: 1
68 | - date: 10
69 | sensor: 1
70 |
71 | - name: "Derived alias WHERE inside subselect (unqualified table)"
72 | sql: |
73 | SELECT t.*
74 | FROM (
75 | SELECT `date` AS `date`, `sensor` AS `sensor`
76 | FROM `temperature`
77 | WHERE `sensor` = 1
78 | ) AS t
79 | ORDER BY t.date DESC
80 | LIMIT 2
81 | expect:
82 | rows: 2
83 | data:
84 | - date: 20
85 | sensor: 1
86 | - date: 10
87 | sensor: 1
88 |
89 | - name: "Derived alias simple star without ORDER"
90 | sql: |
91 | SELECT t.*
92 | FROM (
93 | SELECT `date` AS `date`
94 | FROM `temperature`
95 | ) AS t
96 | LIMIT 3
97 | expect:
98 | rows: 3
99 |
100 | cleanup:
101 | - sql: "DROP TABLE IF EXISTS temperature"
102 | - sql: "DROP TABLE IF EXISTS sensor"
103 |
--------------------------------------------------------------------------------
/tests/20_default_values.yaml:
--------------------------------------------------------------------------------
1 | # Default Values Test Suite
2 | # Validate that DEFAULT values are applied on INSERT when a column is omitted
3 |
4 | metadata:
5 | version: "1.0"
6 | description: "Default column values (boolean) on INSERT"
7 |
8 | setup:
9 | - { sql: "CREATE TABLE default_test (id INT, flag BOOLEAN DEFAULT FALSE) ENGINE=MEMORY" }
10 |
11 | test_cases:
12 | - name: "Insert without defaulted column"
13 | sql: "INSERT INTO default_test (id) VALUES (1)"
14 | expect:
15 | affected_rows: 1
16 |
17 | - name: "Select row with default applied"
18 | sql: "SELECT id, flag FROM default_test ORDER BY id"
19 | expect:
20 | rows: 1
21 | data:
22 | - { id: 1, flag: false }
23 |
24 | - name: "Insert with explicit TRUE"
25 | sql: "INSERT INTO default_test (id, flag) VALUES (2, TRUE)"
26 | expect:
27 | affected_rows: 1
28 |
29 | - name: "Verify both rows and flags"
30 | sql: "SELECT id, flag FROM default_test ORDER BY id"
31 | expect:
32 | rows: 2
33 | data:
34 | - { id: 1, flag: false }
35 | - { id: 2, flag: true }
36 |
37 | - name: "Alter default to TRUE via ALTER COLUMN"
38 | sql: "ALTER TABLE default_test ALTER COLUMN flag SET DEFAULT TRUE"
39 | expect: {}
40 |
41 | - name: "Insert after default changed to TRUE"
42 | sql: "INSERT INTO default_test (id) VALUES (3)"
43 | expect:
44 | affected_rows: 1
45 |
46 | - name: "Verify defaults after change to TRUE"
47 | sql: "SELECT id, flag FROM default_test ORDER BY id"
48 | expect:
49 | rows: 3
50 | data:
51 | - { id: 1, flag: false }
52 | - { id: 2, flag: true }
53 | - { id: 3, flag: true }
54 |
55 | - name: "Alter default to FALSE via ALTER COLUMN"
56 | sql: "ALTER TABLE default_test ALTER COLUMN flag SET DEFAULT FALSE"
57 | expect: {}
58 |
59 | - name: "Insert after default changed to FALSE"
60 | sql: "INSERT INTO default_test (id) VALUES (4)"
61 | expect:
62 | affected_rows: 1
63 |
64 | - name: "Verify defaults after change to FALSE"
65 | sql: "SELECT id, flag FROM default_test ORDER BY id"
66 | expect:
67 | rows: 4
68 | data:
69 | - { id: 1, flag: false }
70 | - { id: 2, flag: true }
71 | - { id: 3, flag: true }
72 | - { id: 4, flag: false }
73 |
74 | cleanup:
75 | - { sql: "DROP TABLE IF EXISTS default_test" }
76 |
--------------------------------------------------------------------------------
/tests/21_grant_revoke.yaml:
--------------------------------------------------------------------------------
1 | # GRANT/REVOKE Test Suite
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "User privileges: GRANT and REVOKE"
6 |
7 | setup:
8 | - sql: "DELETE FROM `system`.`access` WHERE username='alice'"
9 | expect: {}
10 | - sql: "DELETE FROM `system`.`user` WHERE username='alice'"
11 | expect: {}
12 |
13 | test_cases:
14 | - name: "CREATE USER alice"
15 | sql: "CREATE USER alice IDENTIFIED BY 'pw'"
16 | expect:
17 | affected_rows: 1
18 |
19 | - name: "GRANT ALL ON *.* sets admin"
20 | sql: "GRANT ALL ON *.* TO alice"
21 | expect: {}
22 |
23 | - name: "admin flag is true after grant"
24 | sql: "SELECT admin FROM system.user WHERE username = 'alice'"
25 | expect:
26 | rows: 1
27 | data:
28 | - { admin: true }
29 |
30 | - name: "REVOKE ALL ON *.* clears admin"
31 | sql: "REVOKE ALL ON *.* FROM alice"
32 | expect: {}
33 |
34 | - name: "admin flag is false after revoke"
35 | sql: "SELECT admin FROM system.user WHERE username = 'alice'"
36 | expect:
37 | rows: 1
38 | data:
39 | - { admin: false }
40 |
41 | - name: "GRANT db access creates system.access entry"
42 | sql: "GRANT ALL PRIVILEGES ON `memcp-tests`.* TO alice"
43 | expect: {}
44 |
45 | - name: "Verify access entry exists"
46 | sql: "SELECT database FROM system.access WHERE username='alice' AND database='memcp-tests'"
47 | expect:
48 | rows: 1
49 | data:
50 | - { database: "memcp-tests" }
51 |
52 | - name: "REVOKE db access removes entry"
53 | sql: "REVOKE ALL PRIVILEGES ON `memcp-tests`.* FROM alice"
54 | expect:
55 | affected_rows: 1
56 |
57 | - name: "Verify access entry removed"
58 | sql: "SELECT database FROM system.access WHERE username='alice' AND database='memcp-tests'"
59 | expect:
60 | rows: 0
61 |
62 | # Policy enforcement tests (per-user auth)
63 | - name: "prepare table t as root (create)"
64 | sql: "CREATE TABLE t(id INT, name TEXT)"
65 | expect: {}
66 |
67 | - name: "prepare table t as root (insert)"
68 | sql: "INSERT INTO t(id, name) VALUES (1, 'x')"
69 | expect: { affected_rows: 1 }
70 |
71 | - name: "alice cannot read without grant"
72 | username: "alice"
73 | password: "pw"
74 | sql: "SELECT * FROM t"
75 | expect:
76 | error: true
77 |
78 | - name: "grant db access to alice"
79 | sql: "GRANT SELECT ON `memcp-tests`.* TO alice"
80 | expect: {}
81 |
82 | - name: "alice can read after grant"
83 | username: "alice"
84 | password: "pw"
85 | sql: "SELECT id, name FROM t"
86 | expect:
87 | rows: 1
88 | data:
89 | - { id: 1, name: "x" }
90 |
91 | - name: "revoke db access from alice"
92 | sql: "REVOKE SELECT ON `memcp-tests`.* FROM alice"
93 | expect: { affected_rows: 1 }
94 |
95 | cleanup: []
96 |
--------------------------------------------------------------------------------
/tests/22_delete_qualified.yaml:
--------------------------------------------------------------------------------
1 | # Schema-qualified DELETE and DROP DATABASE IF EXISTS
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Ensure DELETE FROM schema.tbl works and DROP DATABASE IF EXISTS is accepted"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS dmlq"
9 | - sql: "CREATE TABLE dmlq (id INT, v INT)"
10 | - sql: "INSERT INTO dmlq (id, v) VALUES (1, 10), (2, 20), (3, 30)"
11 |
12 | test_cases:
13 | - name: "DELETE using schema-qualified table"
14 | sql: "DELETE FROM `memcp-tests`.dmlq WHERE v >= 20"
15 | expect:
16 | affected_rows: 2
17 |
18 | - name: "Remaining rows after schema-qualified delete"
19 | sql: "SELECT COUNT(*) AS c FROM dmlq"
20 | expect:
21 | rows: 1
22 | data:
23 | - c: 1
24 |
25 | - name: "DROP DATABASE IF EXISTS on non-existent db"
26 | sql: "DROP DATABASE IF EXISTS `surely_nonexistent_db_xyz`"
27 | expect: {}
28 |
29 | cleanup:
30 | - sql: "DROP TABLE IF EXISTS dmlq"
31 |
--------------------------------------------------------------------------------
/tests/23_policy_enforcement.yaml:
--------------------------------------------------------------------------------
1 | # Policy enforcement across SELECT/INSERT/UPDATE/DELETE
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Policy checks for all CRUD operations"
6 |
7 | setup:
8 | - sql: "DELETE FROM `system`.`access` WHERE username='alice'"
9 | - sql: "DELETE FROM `system`.`user` WHERE username='alice'"
10 | - sql: "CREATE USER alice IDENTIFIED BY 'pw'"
11 | - sql: "DROP TABLE IF EXISTS t"
12 | - sql: "CREATE TABLE t(id INT, name TEXT)"
13 |
14 | test_cases:
15 | - name: "alice cannot CREATE DATABASE"
16 | username: "alice"
17 | password: "pw"
18 | sql: "CREATE DATABASE `policytestdb_alice`"
19 | expect: { error: true }
20 |
21 | - name: "alice cannot CREATE USER"
22 | username: "alice"
23 | password: "pw"
24 | sql: "CREATE USER charlie IDENTIFIED BY 'pw'"
25 | expect: { error: true }
26 |
27 | - name: "alice cannot GRANT on db"
28 | username: "alice"
29 | password: "pw"
30 | sql: "GRANT SELECT ON `memcp-tests`.* TO alice"
31 | expect: { error: true }
32 |
33 | - name: "alice cannot REVOKE on db"
34 | username: "alice"
35 | password: "pw"
36 | sql: "REVOKE SELECT ON `memcp-tests`.* FROM alice"
37 | expect: { error: true }
38 |
39 | - name: "alice cannot DROP DATABASE"
40 | username: "alice"
41 | password: "pw"
42 | sql: "DROP DATABASE `memcp-tests`"
43 | expect: { error: true }
44 |
45 | - name: "alice cannot SHUTDOWN"
46 | username: "alice"
47 | password: "pw"
48 | sql: "SHUTDOWN"
49 | expect: { error: true }
50 |
51 | - name: "alice cannot SELECT without grant"
52 | username: "alice"
53 | password: "pw"
54 | sql: "SELECT * FROM t"
55 | expect: { error: true }
56 |
57 | - name: "alice cannot INSERT without grant"
58 | username: "alice"
59 | password: "pw"
60 | sql: "INSERT INTO t(id, name) VALUES (1, 'x')"
61 | expect: { error: true }
62 |
63 | - name: "grant ALL on db to alice"
64 | sql: "GRANT ALL ON `memcp-tests`.* TO alice"
65 | expect: {}
66 |
67 | - name: "alice INSERT allowed after grant"
68 | username: "alice"
69 | password: "pw"
70 | sql: "INSERT INTO t(id, name) VALUES (2, 'y')"
71 | expect: { affected_rows: 1 }
72 |
73 | - name: "alice UPDATE allowed after grant"
74 | username: "alice"
75 | password: "pw"
76 | sql: "UPDATE t SET name='z' WHERE id=2"
77 | expect: { affected_rows: 1 }
78 |
79 | - name: "alice DELETE allowed after grant"
80 | username: "alice"
81 | password: "pw"
82 | sql: "DELETE FROM t WHERE id=2"
83 | expect: { affected_rows: 1 }
84 |
85 | - name: "revoke ALL on db from alice"
86 | sql: "REVOKE ALL ON `memcp-tests`.* FROM alice"
87 | expect: {}
88 |
89 | - name: "alice cannot SELECT after revoke"
90 | username: "alice"
91 | password: "pw"
92 | sql: "SELECT * FROM t"
93 | expect: { error: true }
94 |
95 | - name: "alice cannot DELETE after revoke"
96 | username: "alice"
97 | password: "pw"
98 | sql: "DELETE FROM t"
99 | expect: { error: true }
100 |
101 | cleanup:
102 | - sql: "DROP TABLE IF EXISTS t"
103 |
104 |
--------------------------------------------------------------------------------
/tests/24_mysql_basic_compat.yaml:
--------------------------------------------------------------------------------
1 | # MySQL Compatibility v1 — Basics
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "MySQL v1: identifiers, casts, NULL/booleans, escapes"
6 | # Suite is now expected to pass fully
7 |
8 | setup:
9 | - sql: DROP TABLE IF EXISTS `users`
10 |
11 | test_cases:
12 | - name: "CREATE TABLE with backticked identifiers"
13 | sql: |
14 | CREATE TABLE `users` (
15 | `id` INT PRIMARY KEY,
16 | `name` VARCHAR(100),
17 | `active` BOOLEAN
18 | )
19 | expect:
20 | affected_rows: 1
21 |
22 | - name: "INSERT respects backticks"
23 | sql: |
24 | INSERT INTO `users` (`id`, `name`, `active`) VALUES (1, 'Ada', true)
25 | expect:
26 | affected_rows: 1
27 |
28 | - name: "SELECT with backticks"
29 | sql: |
30 | SELECT `id`, `name`, `active` FROM `users`
31 | expect:
32 | rows: 1
33 | data:
34 | - id: 1
35 | name: "Ada"
36 | active: true
37 |
38 | - name: "Implicit numeric cast: string to int comparison"
39 | sql: |
40 | SELECT '1' = 1 AS eq
41 | expect:
42 | rows: 1
43 | data:
44 | - eq: true
45 |
46 | - name: "Implicit boolean truthiness"
47 | sql: |
48 | SELECT IF(1, 't', 'f') AS v
49 | expect:
50 | rows: 1
51 | data:
52 | - v: "t"
53 |
54 | - name: "SELECT WHERE without FROM"
55 | # Avoids aggregate over derived table; checks filter semantics directly
56 | sql: |
57 | SELECT 1 WHERE 1
58 | expect:
59 | rows: 1
60 |
61 | - name: "SELECT WHERE without FROM and false"
62 | # Avoids aggregate over derived table; checks filter semantics directly
63 | sql: |
64 | SELECT 1 WHERE 0
65 | expect:
66 | rows: 0
67 |
68 | - name: "NULL truthiness (WHERE filters out)"
69 | # Avoids aggregate over derived table; checks filter semantics directly
70 | sql: |
71 | SELECT 1 WHERE NULL
72 | expect:
73 | rows: 0
74 |
75 | - name: "String escape single quote"
76 | sql: |
77 | SELECT 'can\'t' AS s
78 | expect:
79 | rows: 1
80 | data:
81 | - s: "can't"
82 |
83 | - name: "Backslash literal (no escape)"
84 | noncritical: true
85 | sql: |
86 | SELECT 'line\\nbreak' AS s
87 | expect:
88 | rows: 1
89 | data:
90 | - s: "line\\nbreak"
91 |
92 | cleanup: []
93 |
--------------------------------------------------------------------------------
/tests/25_schema_qualified_insert.yaml:
--------------------------------------------------------------------------------
1 | # Schema-qualified INSERT
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Support INSERT INTO schema.tbl(...) in memcp-tests"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS `memcp-tests`.base_models"
9 | - sql: |
10 | CREATE TABLE `memcp-tests`.base_models (
11 | id INT PRIMARY KEY,
12 | model VARCHAR(100)
13 | )
14 |
15 | test_cases:
16 | - name: "INSERT into schema-qualified table"
17 | sql: "INSERT INTO `memcp-tests`.base_models(id, model) VALUES (1, 'gpt')"
18 | expect:
19 | affected_rows: 1
20 |
21 | - name: "Verify insert via schema-qualified name"
22 | sql: "SELECT id, model FROM `memcp-tests`.base_models WHERE id = 1"
23 | expect:
24 | rows: 1
25 | data:
26 | - id: 1
27 | model: "gpt"
28 |
29 | cleanup:
30 | - sql: "DROP TABLE IF EXISTS `memcp-tests`.base_models"
31 |
--------------------------------------------------------------------------------
/tests/26_mysql_datetime_defaults.yaml:
--------------------------------------------------------------------------------
1 | # MySQL Compatibility v1 — Date/Time semantics
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "MySQL v1: CURRENT_TIMESTAMP defaults, NOW(), comparisons"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS tstamp"
9 |
10 | test_cases:
11 | - name: "CREATE TABLE with default CURRENT_TIMESTAMP"
12 | sql: "CREATE TABLE tstamp (id INT PRIMARY KEY, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)"
13 | expect:
14 | affected_rows: 1
15 |
16 | - name: "INSERT row without timestamp uses default"
17 | sql: "INSERT INTO tstamp (id) VALUES (1)"
18 | expect:
19 | affected_rows: 1
20 |
21 | - name: "NOW() returns a value"
22 | sql: "SELECT NOW() AS nowv"
23 | expect: {}
24 |
25 | - name: "Default timestamp is not null"
26 | sql: "SELECT created_at IS NOT NULL AS ok FROM tstamp WHERE id = 1"
27 | expect:
28 | rows: 1
29 | data:
30 | - ok: true
31 |
32 | cleanup:
33 | - sql: "DROP TABLE IF EXISTS tstamp"
34 |
--------------------------------------------------------------------------------
/tests/27_mysql_keys_indexes.yaml:
--------------------------------------------------------------------------------
1 | # MySQL Compatibility v1 — Keys and Indexes (acceptance)
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "MySQL v1: PK/UK creation and basic behavior"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS k"
9 |
10 | test_cases:
11 | - name: "CREATE TABLE with PK and UNIQUE"
12 | sql: "CREATE TABLE k (id INT PRIMARY KEY, email VARCHAR(100) UNIQUE)"
13 | expect:
14 | affected_rows: 1
15 |
16 | - name: "CREATE INDEX accepted"
17 | sql: "CREATE INDEX idx_email ON k (email)"
18 | expect: {}
19 |
20 | - name: "Insert row"
21 | sql: "INSERT INTO k (id, email) VALUES (1, 'a@example.com')"
22 | expect:
23 | affected_rows: 1
24 |
25 | - name: "Duplicate PK should error"
26 | sql: "INSERT INTO k (id, email) VALUES (1, 'b@example.com')"
27 | expect:
28 | error: true
29 |
30 | - name: "Duplicate UNIQUE should error"
31 | sql: "INSERT INTO k (id, email) VALUES (2, 'a@example.com')"
32 | expect:
33 | error: true
34 |
35 | cleanup:
36 | - sql: "DROP TABLE IF EXISTS k"
37 |
--------------------------------------------------------------------------------
/tests/28_mysql_fk_acceptance.yaml:
--------------------------------------------------------------------------------
1 | # MySQL Compatibility v1 — Foreign Key DDL acceptance
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "MySQL v1: accept FK DDL (metadata/no-op)"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS child"
9 | - sql: "DROP TABLE IF EXISTS parent"
10 |
11 | test_cases:
12 | - name: "CREATE parent/child with FK"
13 | sql: |
14 | CREATE TABLE parent (id INT PRIMARY KEY, name VARCHAR(50))
15 | expect:
16 | affected_rows: 1
17 |
18 | - name: "CREATE child with FK references"
19 | sql: |
20 | CREATE TABLE child (
21 | id INT PRIMARY KEY,
22 | parent_id INT,
23 | CONSTRAINT fk_p FOREIGN KEY (parent_id) REFERENCES parent(id)
24 | )
25 | expect: {}
26 |
27 | - name: "INSERT matching FK"
28 | sql: |
29 | INSERT INTO parent (id, name) VALUES (1, 'p1')
30 | expect:
31 | affected_rows: 1
32 |
33 | - name: "Insert child referencing parent"
34 | sql: "INSERT INTO child (id, parent_id) VALUES (10, 1)"
35 | expect: {}
36 |
37 | cleanup:
38 | - sql: "DROP TABLE IF EXISTS child"
39 | - sql: "DROP TABLE IF EXISTS parent"
40 |
--------------------------------------------------------------------------------
/tests/29_mysql_upsert.yaml:
--------------------------------------------------------------------------------
1 | # MySQL Compatibility v1 — INSERT ... ON DUPLICATE KEY UPDATE
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "MySQL v1: upsert semantics for PK/UNIQUE keys"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS upsert_t"
9 | - sql: |
10 | CREATE TABLE upsert_t (
11 | id INT PRIMARY KEY,
12 | x INT,
13 | y INT,
14 | email VARCHAR(100) UNIQUE
15 | )
16 |
17 | test_cases:
18 | - name: "Insert initial row"
19 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (1, 10, 5, 'a@example.com')"
20 | expect:
21 | affected_rows: 1
22 |
23 | - name: "Select initial state"
24 | sql: "SELECT id, x, y, email FROM upsert_t ORDER BY id"
25 | expect:
26 | rows: 1
27 | data:
28 | - id: 1
29 | x: 10
30 | y: 5
31 | email: "a@example.com"
32 |
33 | - name: "Simple overwrite: x = VALUES(x) on PK conflict"
34 | sql: |
35 | INSERT INTO upsert_t (id, x, y, email)
36 | VALUES (1, 42, 99, 'a@example.com')
37 | ON DUPLICATE KEY UPDATE x = VALUES(x)
38 | expect: {}
39 |
40 | - name: "Verify overwrite changed only x (not y/email)"
41 | sql: "SELECT id, x, y, email FROM upsert_t WHERE id = 1"
42 | expect:
43 | rows: 1
44 | data:
45 | - id: 1
46 | x: 42
47 | y: 5
48 | email: "a@example.com"
49 |
50 | - name: "Increment: x = x + 1 on PK conflict"
51 | sql: |
52 | INSERT INTO upsert_t (id, x, y, email)
53 | VALUES (1, 0, 0, 'a@example.com')
54 | ON DUPLICATE KEY UPDATE x = x + 1
55 | expect: {}
56 |
57 | - name: "Verify increment"
58 | sql: "SELECT id, x FROM upsert_t WHERE id = 1"
59 | expect:
60 | rows: 1
61 | data:
62 | - id: 1
63 | x: 43
64 |
65 | - name: "Add incoming: x = x + VALUES(x)"
66 | sql: |
67 | INSERT INTO upsert_t (id, x, y, email)
68 | VALUES (1, 7, 0, 'a@example.com')
69 | ON DUPLICATE KEY UPDATE x = x + VALUES(x)
70 | expect: {}
71 |
72 | - name: "Verify add incoming"
73 | sql: "SELECT id, x FROM upsert_t WHERE id = 1"
74 | expect:
75 | rows: 1
76 | data:
77 | - id: 1
78 | x: 50
79 |
80 | - name: "Use VALUES(y) in expression: x = x + VALUES(y)"
81 | sql: |
82 | INSERT INTO upsert_t (id, x, y, email)
83 | VALUES (1, 123, 4, 'a@example.com')
84 | ON DUPLICATE KEY UPDATE x = x + VALUES(y)
85 | expect: {}
86 |
87 | - name: "Verify VALUES(y) usage"
88 | sql: "SELECT id, x FROM upsert_t WHERE id = 1"
89 | expect:
90 | rows: 1
91 | data:
92 | - id: 1
93 | x: 54
94 |
95 | - name: "Seed second row"
96 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (2, 7, 0, 'b@example.com')"
97 | expect:
98 | affected_rows: 1
99 |
100 | - name: "Upsert via UNIQUE(email) conflict; update x = VALUES(x)"
101 | sql: |
102 | INSERT INTO upsert_t (id, x, y, email)
103 | VALUES (3, 8, 9, 'b@example.com')
104 | ON DUPLICATE KEY UPDATE x = VALUES(x)
105 | expect: {}
106 |
107 | - name: "Verify UNIQUE conflict updated row with email b@example.com"
108 | sql: "SELECT id, x, y, email FROM upsert_t WHERE email = 'b@example.com'"
109 | expect:
110 | rows: 1
111 | data:
112 | - id: 2
113 | x: 8
114 | y: 0
115 | email: "b@example.com"
116 |
117 | - name: "Row count remains 2"
118 | sql: "SELECT COUNT(*) AS c FROM upsert_t"
119 | expect:
120 | rows: 1
121 | data:
122 | - c: 2
123 |
124 | - name: "No-op overwrite: x = VALUES(x) with same value"
125 | sql: |
126 | INSERT INTO upsert_t (id, x, y, email)
127 | VALUES (2, 8, 111, 'b@example.com')
128 | ON DUPLICATE KEY UPDATE x = VALUES(x)
129 | expect:
130 | affected_rows: 1
131 |
132 | - name: "Verify no-op did not change y"
133 | sql: "SELECT id, x, y FROM upsert_t WHERE id = 2"
134 | expect:
135 | rows: 1
136 | data:
137 | - id: 2
138 | x: 8
139 | y: 0
140 |
141 | - name: "Upsert changed returns affected_rows=2 (MySQL semantics)"
142 | sql: |
143 | INSERT INTO upsert_t (id, x, y, email)
144 | VALUES (2, 9, 0, 'b@example.com')
145 | ON DUPLICATE KEY UPDATE x = x + 1
146 | expect:
147 | affected_rows: 2
148 |
149 | - name: "Delete row id=1"
150 | sql: "DELETE FROM upsert_t WHERE id = 1"
151 | expect:
152 | affected_rows: 1
153 |
154 | - name: "Reinsert same PK+email after delete"
155 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (1, 100, 0, 'a@example.com')"
156 | expect:
157 | affected_rows: 1
158 |
159 | - name: "Row count back to 2"
160 | sql: "SELECT COUNT(*) AS c FROM upsert_t"
161 | expect:
162 | rows: 1
163 | data:
164 | - c: 2
165 |
166 | - name: "Verify reinserted row"
167 | sql: "SELECT id, x, y, email FROM upsert_t WHERE id = 1"
168 | expect:
169 | rows: 1
170 | data:
171 | - id: 1
172 | x: 100
173 | y: 0
174 | email: "a@example.com"
175 |
176 | - name: "Illegal duplicate PK insert errors"
177 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (2, 9, 9, 'new@example.com')"
178 | expect:
179 | error: true
180 |
181 | - name: "Illegal duplicate UNIQUE(email) insert errors"
182 | sql: "INSERT INTO upsert_t (id, x, y, email) VALUES (3, 1, 1, 'b@example.com')"
183 | expect:
184 | error: true
185 |
186 | cleanup:
187 | - sql: "DROP TABLE IF EXISTS upsert_t"
188 |
--------------------------------------------------------------------------------
/tests/30_trailing_semicolon.yaml:
--------------------------------------------------------------------------------
1 | # Trailing semicolon acceptance
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Accept a trailing ';' at end of SQL statement"
6 |
7 | setup:
8 | - sql: "DROP TABLE IF EXISTS ts;"
9 |
10 | test_cases:
11 | - name: "CREATE TABLE with trailing semicolon"
12 | sql: |
13 | CREATE TABLE ts (
14 | id INT PRIMARY KEY,
15 | v INT
16 | );
17 | expect:
18 | affected_rows: 1
19 |
20 | - name: "INSERT with trailing semicolon"
21 | sql: "INSERT INTO ts (id, v) VALUES (1, 10);"
22 | expect:
23 | affected_rows: 1
24 |
25 | - name: "SELECT with trailing semicolon"
26 | sql: "SELECT v FROM ts WHERE id = 1;"
27 | expect:
28 | rows: 1
29 | data:
30 | - v: 10
31 |
32 | cleanup:
33 | - sql: "DROP TABLE IF EXISTS ts;"
34 |
35 |
--------------------------------------------------------------------------------
/tests/31_length_function.yaml:
--------------------------------------------------------------------------------
1 | # LENGTH(str) function
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Implement LENGTH(str) with NULL handling"
6 |
7 | setup: []
8 |
9 | test_cases:
10 | - name: "LENGTH of literal"
11 | sql: "SELECT LENGTH('abc') AS n"
12 | expect:
13 | rows: 1
14 | data:
15 | - n: 3
16 |
17 | - name: "LENGTH of empty string"
18 | sql: "SELECT LENGTH('') AS n"
19 | expect:
20 | rows: 1
21 | data:
22 | - n: 0
23 |
24 | - name: "LENGTH of NULL is NULL"
25 | sql: "SELECT LENGTH(NULL) AS n"
26 | expect:
27 | rows: 1
28 | data:
29 | - n: null
30 |
31 | cleanup: []
32 |
33 |
--------------------------------------------------------------------------------
/tests/32_expr_subselects.yaml:
--------------------------------------------------------------------------------
1 | # Subselects in expressions (IN/EXISTS/scalar)
2 |
3 | metadata:
4 | version: "1.0"
5 | description: "Parse IN (SELECT ...), EXISTS (SELECT ...), scalar (SELECT ...) but reject in planner"
6 |
7 | setup: []
8 |
9 | # TODO: turn error: true into what we expect according to SQL standard
10 | # TODO: also add complex cases with (SELECT FROM )
11 |
12 | test_cases:
13 | - name: "IN subselect simple"
14 | sql: "SELECT 1 WHERE 1 IN (SELECT 1)"
15 | expect:
16 | error: true
17 |
18 | - name: "NOT IN subselect simple"
19 | sql: "SELECT 1 WHERE 1 NOT IN (SELECT 1)"
20 | expect:
21 | error: true
22 |
23 | - name: "EXISTS subselect"
24 | sql: "SELECT 1 WHERE EXISTS (SELECT 1)"
25 | expect:
26 | error: true
27 |
28 | - name: "Scalar subselect in SELECT list"
29 | sql: "SELECT (SELECT 1) AS x"
30 | expect:
31 | error: true
32 |
33 | - name: "IN constant list still supported"
34 | sql: "SELECT 1 WHERE 1 IN (1,2,3)"
35 | expect:
36 | rows: 1
37 |
38 | cleanup: []
39 |
40 |
--------------------------------------------------------------------------------
/tests/33_collations_order.yaml:
--------------------------------------------------------------------------------
1 | metadata:
2 | version: "1.0"
3 | description: "ORDER BY with COLLATE comparators"
4 |
5 | setup:
6 | - sql: "DROP TABLE IF EXISTS coll_t"
7 | - sql: |
8 | CREATE TABLE coll_t (
9 | id INT,
10 | name VARCHAR(50)
11 | )
12 | - sql: "INSERT INTO coll_t (id, name) VALUES (1, 'a'), (2, 'B'), (3, 'aa'), (4, 'Ä')"
13 |
14 | test_cases:
15 | - name: "ORDER BY binary default (uppercase before lowercase)"
16 | sql: "SELECT name FROM coll_t ORDER BY name LIMIT 4"
17 | expect:
18 | rows: 4
19 | data:
20 | - name: "B"
21 | - name: "a"
22 | - name: "aa"
23 | - name: "Ä"
24 |
25 | - name: "ORDER BY COLLATE general_ci (case-insensitive, a before B)"
26 | sql: "SELECT name FROM coll_t ORDER BY name COLLATE utf8mb4_general_ci LIMIT 2"
27 | expect:
28 | rows: 2
29 | data:
30 | - name: "a"
31 | - name: "B"
32 |
33 | - name: "ORDER BY COLLATE general_ci DESC (reverse order)"
34 | sql: "SELECT name FROM coll_t ORDER BY name COLLATE utf8mb4_general_ci DESC LIMIT 2"
35 | expect:
36 | rows: 2
37 | data:
38 | - name: "B"
39 | - name: "a"
40 |
41 | - name: "ORDER BY explicit COLLATE bin DESC"
42 | sql: "SELECT name FROM coll_t ORDER BY name COLLATE bin DESC LIMIT 2"
43 | expect:
44 | rows: 2
45 | data:
46 | - name: "Ä"
47 | - name: "aa"
48 |
49 | cleanup:
50 | - sql: "DROP TABLE IF EXISTS coll_t"
51 |
52 |
--------------------------------------------------------------------------------
/tests/34_collation_columns.yaml:
--------------------------------------------------------------------------------
1 | metadata:
2 | version: "1.0"
3 | description: "Column-level COLLATE in CREATE/ALTER TABLE; verify persistence via SHOW"
4 |
5 | setup:
6 | - sql: "DROP TABLE IF EXISTS coll_cols"
7 | - sql: |
8 | CREATE TABLE coll_cols (
9 | id INT,
10 | txt TEXT COLLATE utf8mb4_general_ci
11 | )
12 | - sql: "INSERT INTO coll_cols (id, txt) VALUES (1, 'a'), (2, 'B')"
13 |
14 | test_cases:
15 | - name: "SHOW FULL COLUMNS exposes column collation"
16 | sql: "SHOW FULL COLUMNS FROM coll_cols"
17 | expect:
18 | rows: 2
19 | data:
20 | - Field: "id"
21 | - Field: "txt"
22 | Collation: "utf8mb4_general_ci"
23 |
24 | - name: "ORDER BY uses column COLLATE (ASC, general_ci)"
25 | sql: "SELECT txt FROM coll_cols ORDER BY txt LIMIT 2"
26 | expect:
27 | rows: 2
28 | data:
29 | - txt: "a"
30 | - txt: "B"
31 |
32 | - name: "SHUTDOWN memcp to verify persisted column collation"
33 | sql: "SHUTDOWN"
34 | expect:
35 | rows: 0
36 |
37 | - name: "ORDER BY uses column COLLATE (ASC, general_ci) after restart"
38 | sql: "SELECT txt FROM coll_cols ORDER BY txt LIMIT 2"
39 | expect:
40 | rows: 2
41 | data:
42 | - txt: "a"
43 | - txt: "B"
44 |
45 | - name: "ALTER COLUMN COLLATE to bin"
46 | sql: "ALTER TABLE coll_cols ALTER COLUMN txt COLLATE bin"
47 | expect:
48 | rows: 0
49 |
50 | - name: "SHOW FULL COLUMNS after ALTER shows new collation"
51 | sql: "SHOW FULL COLUMNS FROM coll_cols"
52 | expect:
53 | rows: 2
54 | data:
55 | - Field: "id"
56 | - Field: "txt"
57 | Collation: "bin"
58 |
59 | - name: "ORDER BY with column COLLATE bin (DESC)"
60 | sql: "SELECT txt FROM coll_cols ORDER BY txt DESC LIMIT 2"
61 | expect:
62 | rows: 2
63 | data:
64 | - txt: "a"
65 | - txt: "B"
66 |
67 | - name: "SHUTDOWN memcp after ALTER to verify bin persisted"
68 | sql: "SHUTDOWN"
69 | expect:
70 | rows: 0
71 |
72 | - name: "ORDER BY with column COLLATE bin (DESC) after restart"
73 | sql: "SELECT txt FROM coll_cols ORDER BY txt DESC LIMIT 2"
74 | expect:
75 | rows: 2
76 | data:
77 | - txt: "a"
78 | - txt: "B"
79 |
80 | cleanup:
81 | - sql: "DROP TABLE IF EXISTS coll_cols"
82 |
--------------------------------------------------------------------------------
/tests/35_memory_engine.yaml:
--------------------------------------------------------------------------------
1 | metadata:
2 | version: "1.0"
3 | description: "ENGINE=MEMORY table retains schema but not data across restart"
4 |
5 | setup:
6 | - sql: "DROP TABLE IF EXISTS mem_t"
7 | - sql: "CREATE TABLE mem_t (id INT, v TEXT) ENGINE=MEMORY"
8 |
9 | test_cases:
10 | - name: "Insert data into MEMORY table"
11 | sql: "INSERT INTO mem_t (id, v) VALUES (1, 'x'), (2, 'y')"
12 | expect:
13 | affected_rows: 2
14 |
15 | - name: "Select returns inserted rows"
16 | sql: "SELECT id, v FROM mem_t ORDER BY id"
17 | expect:
18 | rows: 2
19 | data:
20 | - id: 1
21 | v: "x"
22 | - id: 2
23 | v: "y"
24 |
25 | - name: "SHUTDOWN memcp"
26 | sql: "SHUTDOWN"
27 | expect:
28 | rows: 0
29 |
30 | - name: "Table exists after restart"
31 | sql: "SHOW FULL COLUMNS FROM mem_t"
32 | expect:
33 | rows: 2
34 |
35 | - name: "Select after restart returns no rows"
36 | sql: "SELECT id, v FROM mem_t ORDER BY id"
37 | expect:
38 | rows: 0
39 |
40 | cleanup:
41 | - sql: "DROP TABLE IF EXISTS mem_t"
42 |
--------------------------------------------------------------------------------
/tools/mysqldump-to-json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import mysql.connector
4 | import simplejson as json
5 | import argparse
6 |
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument('-H', '--host', default='localhost', help='hostname')
9 | parser.add_argument('-u', '--user', required=True, help='user')
10 | parser.add_argument('-p', '--password', required=True, help='password')
11 | parser.add_argument('database', help='database')
12 | args = parser.parse_args()
13 |
14 | hostname = args.host
15 | user = args.user
16 | password = args.password
17 | database = args.database or user
18 |
19 |
20 | mydb = mysql.connector.connect(
21 | host=hostname,
22 | user=user,
23 | password=password,
24 | database=database
25 | )
26 |
27 | mycursor = mydb.cursor()
28 | mycursor.execute("SHOW TABLES")
29 |
30 | tables = []
31 | for x in mycursor:
32 | tables.append(x[0])
33 |
34 | for t in tables:
35 | print('#table ' + t)
36 | mycursor.execute("SELECT * FROM `"+t.replace("`", "``")+"`")
37 | print('#columns ', mycursor.column_names)
38 | for row in mycursor:
39 | print(json.dumps(dict(zip(mycursor.column_names, row))))
40 | print('')
41 |
--------------------------------------------------------------------------------