├── .editorconfig
├── .prettierrc.json
├── LICENSE
├── README.md
├── docs
├── README.md
├── conventions.md
├── specs.md
└── style-guide.md
├── example.koy
└── parsers
└── lua-parser
├── README.md
├── deps
└── inspect.lua
├── samples
├── s1.koy
├── s2.koy
├── s3.koy
└── s4.koy
├── src
├── lib.lua
└── utils.lua
└── test.lua
/.editorconfig:
--------------------------------------------------------------------------------
1 | # EditorConfig helps developers define and maintain consistent
2 | # coding styles between different editors and IDEs
3 | # EditorConfig is awesome: https://EditorConfig.org
4 |
5 | root = true
6 |
7 |
8 | [*]
9 | charset = utf-8
10 | indent_size = 4
11 | indent_style = tab
12 | end_of_line = lf
13 | insert_final_newline = true
14 | trim_trailing_whitespace = true
15 |
16 | [*.{diff,md}]
17 | trim_trailing_whitespace = false
18 |
19 |
--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "trailingComma": "es5",
3 | "tabWidth": 4,
4 | "semi": false,
5 | "singleQuote": true
6 | }
7 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Pocco81
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
🎏 Koy
3 |
4 |
5 |
6 | Experimental human-friendly data serialization language
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | Koy is a new flexible and feature-rich data serialization language; easy for you, your dog and your average 5 year-old. Its design focuses on being visually unobtrusive while keeping an overall sense of verbosity, allowing easy-to-write parsers (in multiple languages) to effortlessly map the data to hash tables.
22 |
23 |
24 |
25 |
26 | ### 🐣 Example
27 |
28 | ```koy
29 | /*
30 | Hello world! this is a simple Koy document
31 | and you are reading a multi-line comment :^)
32 | */
33 |
34 | // let's import some settings, shall we?
35 | import "./settings/screen.koy"
36 |
37 | title: "Koy Example"
38 |
39 | person: {
40 | name: "Michael Theodor Mouse",
41 | age: 34
42 | }
43 |
44 | laptop: {
45 | owner: ${person} << {
46 | username: "mickey1234"
47 | },
48 | temp:int 203.04,
49 | married: true,
50 | document: ${title}
51 | }
52 |
53 | ports: [ 8001, 8002, 8003 ]
54 | ```
55 |
56 | > As shown here, Koy isn't only good for deeply nested data, but also for avoiding an unambiguous use of it, managing their data-types and modularizing them.
57 |
58 |
59 |
60 | ### 📋 Characteristics
61 |
62 | - **Friendly Syntax with Obvious Semantics**: everything in a Koy file works on a `key` -> `value` basis, therefore you can nest data as much as you want and no matter what, it's easy to comprehend at a glance.
63 | - **Standard Errors**: Koy defines a list of semantic errors throwable for when the parser screams _"oh crap! what is this?"_. This way developers get an implementation-agnostic definition that helps them debug their program's config faster.
64 | - **Unambiguous**: Koy has one, and only _ONE_ way to define each thing, because doing the opposite would increase the overall complexity of the language.
65 | - **Feature Rich:** Koy supports:
66 | - comments
67 | - variables
68 | - type casting & coercion
69 | - data overwritting
70 | - importing other koy files
71 | - native data-types:
72 | - Integer (`int`)
73 | - String (`str`)
74 | - Null (`null`)
75 | - Array (`arr`)
76 | - Boolean (`bool`)
77 | - Float (`flt`)
78 | - Object (`obj`)
79 |
80 |
81 |
82 | ### 🪴 Index
83 |
84 | + [Library Implementations](#-library-implementations)
85 | + [IDE/DE Support](#%EF%B8%8F-idede-support)
86 | + [Documentation](#-documentation)
87 | + [ToDo List](#-todo-list)
88 | + [FAQ](#-faq)
89 | + [License](#-license)
90 |
91 |
92 |
93 |
94 |
95 | ### 📚 Library Implementations
96 |
97 | The following is a list of library implementations for Koy:
98 |
99 | - 🌙 Lua: [`parser`](https://github.com/Pocco81/koy-lang/tree/main/parsers/lua-parser)
100 |
101 |
102 |
103 | ### 🖼️ IDE/DE Support
104 |
105 | The following is a list of IDE/DE plugins available for Koy:
106 |
107 | > 👷🛑 Under dev
108 |
109 |
110 |
111 | ### 🎁 Documentation
112 |
113 | You can read Koy's docs [here](https://github.com/Pocco81/koy-lang/tree/main/docs)
114 |
115 |
116 |
117 | ### 🧻 ToDo List
118 |
119 | Check out the list [here](https://github.com/Pocco81/koy-lang/projects/1).
120 |
121 |
122 |
123 | ### 🙋 FAQ
124 |
125 | - _**Why?**_
126 |
127 | **Disclaimers**:
128 |
129 | - these are just my thoughts on config/data serialization languages. Feel free to disagree (and to open an issue, I'm open to discussions.)
130 | - all this is for the sake of creating a _bettter_ language, however I still haven't decided if I'll end up making this. Initially this repo was just meant to be a rant, but I don't know, maybe something good will come out of here!
131 |
132 | With that said, let's continue...
133 |
134 | I like how XML is useful for porting data across platforms, however I dislike the fact that it "repeats itself" too much (opening and closing tags), so often times it feels visually jammed. JSON, however, is vastly nicer on the eyes due to its notable hierarchical structure. But now, looking more at its syntax I can't help but feel like it's _too strict_(?) (numbers and longstrings are a nightmare); apart from that, given that JSON is a data-only-type-of-config-language, nice stuff such as comments are not baked into it. On the other end of the spectrum, TOML, which focuses on being easy to read due to obvious sematic sucks for deeply nested _data_. The `.` convention simply doesn't cut it for me. Furthermore, something that I wish any of these included by default is native support for variable placeholders. I know, YAML has aliases and anchors which sorta do the job? however these simply don't work as such because they can't be inserted arbitrarily throughout a YAML file, which sucks too. Speaking about YAML, it is unnecessarily complex. Like, c'mon? 4 ways to define a simple boolean?
135 |
136 | **Koy**, in a sense, is just a proof of concept for what I ambition my _ideal_ data serialization language to look like.
137 |
138 |
139 |
140 | ### 📜 License
141 |
142 | Koy is released under the MIT license, which grants the following permissions:
143 |
144 | - Commercial use
145 | - Distribution
146 | - Modification
147 | - Private use
148 |
149 | For more convoluted language, see the [LICENSE](https://github.com/koy-lang/koy-lang/blob/main/LICENSE).
150 |
151 |
152 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 |
2 |
📄 Docs
3 |
4 |
5 |
14 |
15 |
16 | Documentation for Koy, the human-friendly data serialization language
17 |
18 |
19 |
20 |
21 | ### 🎏 Koy Lang
22 |
23 | Koy is designed to be a minimal, simple and intuitive data serialization language; easy for you, your dog and your average 5 year-old. It has a small set of symbols, not too many rules and only one singular statement. Its lack of complexity makes it the perfect language for your project! easy to implement, debug and modularize.
24 |
25 | Go ahead and check out the [Conventions](https://github.com/Pocco81/koy-lang/blob/main/docs/conventions.md) to learn about the generalities of the language! if you end up liking it, consider checking out the [Specs sheet](https://github.com/Pocco81/koy-lang/blob/main/docs/specs.md) and the [Style Guide](https://github.com/Pocco81/koy-lang/blob/main/docs/style-guide.md).
26 |
27 |
28 |
29 | ### 📚 Cheat sheet
30 |
31 |
32 | Cheat sheet for symbols
33 |
34 |
35 |
36 |
37 | Symbols | Function | Example |
38 |
39 |
40 |
41 | // |
42 | Single-line comment |
43 |
44 |
45 | ```
46 | // hello world!
47 | ```
48 |
49 | |
50 |
51 |
52 |
53 | /**/ |
54 | Multi-line comment |
55 |
56 |
57 | ```
58 | /*
59 | This is a multi-line comment and
60 | you are watcing Disney channel!
61 | */
62 | ```
63 |
64 | |
65 |
66 |
67 |
68 |
69 | : |
70 | Set a key, followed by its data type (optional) and then the value. To define a literal key put it between single quotes ('' ) |
71 |
72 |
73 | ```
74 | hello: "world!"
75 |
76 | // specifying data type
77 | temperature:int 12.23
78 | ```
79 |
80 | |
81 |
82 |
83 |
84 |
85 | ${} |
86 | Call a variable |
87 |
88 |
89 | ```
90 | // simple usage
91 | name: "Michael Theodor Mouse"
92 | hello: "Good evening ${name}"
93 |
94 | // with arrays (using the `.` notation)
95 | user: {
96 | name: "Michael",
97 | surnames: "Theodor Mouse"
98 | }
99 | hi: "Good morning ${user.name}"
100 | ```
101 |
102 | |
103 |
104 |
105 |
106 |
107 | "" |
108 | Define a normal string |
109 |
110 |
111 | ```
112 | hello: "world"
113 | ```
114 |
115 | |
116 |
117 |
118 |
119 |
120 | """ """ |
121 | Define a multi-line string |
122 |
123 |
124 | ```
125 | hello: """My name is
126 | Michael Theodor Mouse, but
127 | you can call me Peter.
128 | """
129 | ```
130 |
131 | |
132 |
133 |
134 |
135 |
136 | '' |
137 | Define a literal value |
138 |
139 |
140 | ```
141 | weird_path: 'pc/\fds!fd/\&24324%!@'
142 | ```
143 |
144 | |
145 |
146 |
147 |
148 |
149 | [] |
150 | Define an array |
151 |
152 |
153 | ```
154 | hosts: [ "omega", "alpha", "gama" ]
155 | ```
156 |
157 | |
158 |
159 |
160 |
161 |
162 | {} |
163 | Define an object |
164 |
165 |
166 | ```
167 | user: {
168 | name: "Michael Theodor Mouse",
169 | age: 92
170 | }
171 | ```
172 |
173 | |
174 |
175 |
176 |
177 |
178 | import |
179 | Import other .koy files |
180 |
181 |
182 | ```
183 | // single import
184 | import "./directory/settings.koy"
185 |
186 | // multiple imports
187 | import {
188 | "./directory/user0.koy",
189 | "./directory/user1.koy",
190 | "./directory/user2.koy"
191 | }
192 | ```
193 |
194 | |
195 |
196 |
197 |
198 |
199 | << |
200 | Overwrite values from objects |
201 |
202 |
203 | ```
204 | user: {
205 | name: "Michael Theodor Mouse",
206 | age: 93
207 | }
208 |
209 | laptop: {
210 | name: "Lenovo Thinkpad",
211 | owner: ${user} << {
212 | name: "Dominic Toretto"
213 | }
214 | }
215 | ```
216 |
217 | |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 | Cheat sheet for rules
227 |
228 |
229 |
230 |
231 |
232 |
233 | Example .koy
file using every feature
234 |
235 |
236 |
237 |
238 |
239 |
240 |
--------------------------------------------------------------------------------
/docs/conventions.md:
--------------------------------------------------------------------------------
1 |
2 |
📐 Conventions
3 |
4 |
5 |
6 | Standard utilization of the language
7 |
8 |
9 |
10 |
11 | ### Filename
12 |
13 | + **Extension:** Koy files must use the `.koy` extension.
14 | + **Naming Convention:** try to stick with the _snake_case_ naming convention.
15 |
16 | ### MIME Type
17 |
18 | The appropriate MIME type is `application/koy`
19 |
20 |
21 |
--------------------------------------------------------------------------------
/docs/specs.md:
--------------------------------------------------------------------------------
1 |
2 |
✨ Specs
3 |
4 |
5 |
6 | Syntactic rules
7 |
8 |
9 |
10 |
11 | - [TL;DR](#tldr)
12 | - [General Structure](#general-structure)
13 | - [Comments](#comments)
14 | - [Data Types](#data-types)
15 | - [Specifying/Converting Data Types](#specifyingconverting-data-types)
16 | - [Integer](#integer-int)
17 | - [String](#string-str)
18 | - [Null](#null-null)
19 | - [Array](#array-arr)
20 | - [Boolean](#boolean-bool)
21 | - [Float](#float-float)
22 | - [Object](#object-bool)
23 | - [Imports](#imports)
24 | - [Variables](#variables)
25 |
26 |
27 |
28 | ### 😴 TL;DR
29 |
30 | Everything in a Koy file is a variable specified on a `key -> value` basis, for example: `hello: "world"`. If it makes sense to your data model, you may group them on objects (`{}`), which you can later put inside of arrays, because these operate on a `index -> value` basis. Furthermore, the data type of these values can be specified using `!!type` to enhance verbosity. These variables can be accessed using `${variable}`; on objects and arrays, however, you can use either the `${dot.convention}` or the `${brackets['convention']}`.
31 |
32 |
33 |
34 | ### General Structure
35 |
36 | Like JSON, `key -> values` are the foundation of everything. The general structure is the following one:
37 |
38 | ```
39 | key: value
40 | ```
41 |
42 | > notice the space between the `:` and the `value`.
43 |
44 | #### Keys
45 |
46 |
47 | Keys can only contain ASCII letters and underscores (A-Za-z0-9_)
48 |
49 | ```
50 | key: "value"
51 | key_1: "value"
52 | 2001: "value"
53 |
54 | // invalid keys
55 | my-key: "value"
56 | இந்தியா: "value"
57 | ```
58 |
59 |
60 |
61 |
62 | Literal keys must be specified using backticks
63 |
64 | ```
65 | `mainland!tv.קום`: "value"
66 | ```
67 |
68 |
69 |
70 |
71 | Keys cannot be empty
72 |
73 | ```
74 | : "value" //invalid
75 | ```
76 |
77 |
78 |
79 |
80 | Keys cannot be duplicated
81 |
82 | ```
83 | hello: "world"
84 | hello: "momma!"
85 | ```
86 |
87 |
88 |
89 | #### Values
90 |
91 | Values can have any of the following data types:
92 |
93 | - Integer (`int`)
94 | - String (`str`)
95 | - Null (`null`)
96 | - Array (`arr`)
97 | - Boolean (`bool`)
98 | - Float (`flt`)
99 | - Object (`obj`)
100 |
101 |
102 |
103 | ### Comments
104 |
105 | Comments are an integral part of any program, that's why you've got two ways to use them:
106 |
107 | ```
108 | // single-line comment here!
109 |
110 | /*
111 | multi-line comment
112 | */
113 | ```
114 |
115 |
116 |
117 | ### Data Types
118 |
119 | A data type, in programming, is a classification that specifies which type of value a variable has and what type of mathematical, relational or logical operations can be applied to it without causing an error. On Koy files they are inferred, however they can still be specified and values can be converted according to your needs.
120 |
121 | #### Specifying/Converting Data Types
122 |
123 | Data types can be specified using two exclamation marks (`!!`) and then the desired type before the `value`. It will convert the incoming value if needed:
124 |
125 | ```
126 | num1: !!int 1.0 // converted to 1
127 | num2: !!float 100 // converted to 100.0
128 | num3: !!str 150 // converted to "150"
129 | ```
130 |
131 | #### Integer `(int)`
132 |
133 | Integers contain whole numbers:
134 |
135 | - Integers can simply be whole numbers
136 | - Positive numbers may be prefixed with a `+`
137 | - Negative numbers are prefixed with a `-`
138 | - Large numbers can be separeted using underscores (`_`) to enhance readability. The only condition is that they must be surrounded by at least one digit on each side
139 |
140 | ```
141 | i1: -35
142 | i2: 0
143 | i3: +104
144 | i4: 201
145 | i5: 2_005
146 | ```
147 |
148 | #### String `(str)`
149 |
150 | Strings contain alphanumeric characters:
151 |
152 | - Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms. The escape codes must be valid Unicode scalar values.
153 | - Basic strings are surrounded by quotation marks ("). Any Unicode character may be used except those that must be escaped: quotation mark, backslash, and the control characters other than tab (U+0000 to U+0008, U+000A to U+001F, U+007F).
154 |
155 | ```
156 | hello: "world!\nYou may call me \"Jeff\""
157 | ```
158 |
159 | For convenience, some popular characters have a compact escape sequence.
160 |
161 | ```
162 | \b - backspace (U+0008)
163 | \t - tab (U+0009)
164 | \n - linefeed (U+000A)
165 | \f - form feed (U+000C)
166 | \r - carriage return (U+000D)
167 | \" - quote (U+0022)
168 | \\ - backslash (U+005C)
169 | \uXXXX - unicode (U+XXXX)
170 | \UXXXXXXXX - unicode (U+XXXXXXXX)
171 | ```
172 |
173 | - Multi-line basic strings are surrounded by three quotation marks on each side and allow newlines. A newline immediately following the opening delimiter will be trimmed. All other whitespace and newline characters remain intact.
174 |
175 | ```
176 | str1 = """
177 | Roses are red
178 | Violets are blue"""
179 | ```
180 |
181 | - For writing long strings without introducing extraneous whitespace, use a "line ending backslash". When the last non-whitespace character on a line is an unescaped \, it will be trimmed along with all whitespace (including newlines) up to the next non-whitespace character or closing delimiter. All of the escape sequences that are valid for basic strings are also valid for multi-line basic strings.
182 |
183 | ```
184 | // The following strings are byte-for-byte equivalent
185 | str1: "The quick brown fox jumps over the lazy dog"
186 |
187 | str2: """
188 | The quick brown \
189 |
190 |
191 | fox jumps over \
192 | the lazy dog"""
193 |
194 | str3: """\
195 | The quick brown \
196 | fox jumps over \
197 | the lazy dog.\
198 | """
199 | ```
200 |
201 | - You can write a quotation mark, or two adjacent quotation marks, anywhere inside a multi-line basic string. They can also be written just inside the delimiters.
202 |
203 | ```
204 | str4: """Here are two quotation marks: "". Simple enough."""
205 | str5: """Here are three quotation marks: """.""" // invalid
206 | str5: """Here are three quotation marks: ""\"."""
207 | str6: """Here are fifteen quotation marks: ""\"""\"""\"""\"""\"."""
208 | ```
209 |
210 | #### Null `(null)`
211 |
212 | - Use the keyword `null`
213 |
214 | ```
215 | key: null
216 | ```
217 |
218 | #### Array `(arr)`
219 |
220 | - Arrays are declared using brackets (`{}`)
221 | - Elements are separated by commas (`,`), except for the last one
222 | - Empty arrays can be declared by leaving the brackets empty
223 | - Arrays can contain values of different types
224 | - They can be nested
225 |
226 | ```
227 | environment: {
228 | term: "linux",
229 | histsize: 5000,
230 | theme: "light",
231 | package_manage: "pacman"
232 | }
233 |
234 | nested_array: {
235 | { 1, 0, 1 },
236 | { "h", "a", "l" , "l", "o" }
237 | }
238 |
239 | friends: {}
240 | ```
241 |
242 | #### Boolean `(bool)`
243 |
244 | - must be either `true` or `false`
245 |
246 | ```
247 | has_children: true
248 | likes_pineapple: false
249 | ```
250 |
251 | #### Float `(float)`
252 |
253 | Floating points contain numbers with a decimal points:
254 |
255 | - They must be implemented as IEEE 754 binary64 values
256 | - The decimal point, if used, must be surrounded by at least one digit on each side.
257 | - A float consists of an integer part followed by a fractional part
258 | - Positive floats may be prefixed with a `+`
259 | - Negative floats are prefixed with a `-`
260 |
261 | ```
262 | f0: -0.00001
263 | f1: +34.0
264 | f2: 2.71828
265 | ```
266 |
267 | #### Object `(bool)`
268 |
269 | Objects are used to represent “things” with characteristics (AKA properties):
270 |
271 | - Objects are declared using squared brackets (`[]`)
272 | - Empty objects can be declared leaving the brackets empty
273 |
274 | ```
275 | user: [
276 | name: "Michael Theodor Mouse",
277 | age: 34,
278 | married: false
279 | ]
280 |
281 | spouse: []
282 | ```
283 |
284 |
285 |
286 | ### Imports
287 |
288 | Imports are statements used to "bring in" other `.koy` files:
289 |
290 | - The effect of importing a file is the same as replacing the import by the file's contents. Therefore, all the keys and variables defined on them will be available in the file which is importing
291 | - Single imports are done using only the `import` keyword
292 | - Multiple imports can be place inside of curly braces (`{}`) next to an `import` statement
293 |
294 | ```
295 | // single import example
296 | import "./directory/my_config.koy"
297 |
298 | // multiple imports
299 | import {
300 | "./directory/user0.koy",
301 | "./directory/user1.koy",
302 | "./directory/user2.koy"
303 | }
304 | ```
305 |
306 |
307 |
308 | ### Variables
309 |
310 | Everything in a `.koy` file is a variable:
311 |
312 | - They can be accessed using `${}`
313 |
314 | ```
315 | name: "Michael Theodor Mouse"
316 | hello: "Good evening ${name}"
317 | ```
318 |
319 | - Variables within an array/object can be accessed using the `.` notation
320 |
321 | ```
322 | user: [
323 | name: "Michael",
324 | surnames: "Theodor Mouse"
325 | ]
326 | hi: "Good morning ${user.name}"
327 | ```
328 |
329 | - The value of a variable can be overwritten using `<<`
330 |
331 | ```
332 | hello: "world"
333 | another_hello: ${hello} << "momma!" // notice that hello is still equal to "world"
334 |
335 | mouse: "Mickey" << "Minnie"
336 | ```
337 |
338 | - When using `<<` on an array or an object, it will replace its fields
339 |
340 | ```
341 | // object
342 | user: [
343 | name: "Michael Theodor Mouse",
344 | age: 93
345 | ]
346 |
347 | // array
348 | ports: { 8000, 8001, 8002 }
349 |
350 | laptop: {
351 | name: "Lenovo Thinkpad",
352 | owner: ${user} << [
353 | name: "Dominic Toretto"
354 | ],
355 | ports: ${ports} << { 2: 8007 }
356 | }
357 | ```
358 |
359 | > Note: you are not overwritting the variable you are calling, you are replacing the value of the incoming variable or the one you are assigning itself (third example, fourth line). On arrays/objects you are simply overwriting fields, not the whole array/object.
360 |
361 |
362 |
--------------------------------------------------------------------------------
/docs/style-guide.md:
--------------------------------------------------------------------------------
1 |
2 |
🖌️ Style Guide
3 |
4 |
5 |
6 |
7 | A guide on how to properly style and implement the language
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/example.koy:
--------------------------------------------------------------------------------
1 | /*
2 | Hello world! this is a simple Koy document
3 | and you are reading a multi-line comment :^)
4 | */
5 |
6 | // let's import some settings, shall we?
7 | import "./settings/screen.koy"
8 |
9 | title: "Koy Example"
10 |
11 | user: {
12 | name: "Michael Theodor Mouse",
13 | age: 23,
14 | married: true
15 | }
16 |
17 | ports: [ 8001, 8002, 8003 ]
18 |
--------------------------------------------------------------------------------
/parsers/lua-parser/README.md:
--------------------------------------------------------------------------------
1 |
2 |
🌙 lua-parser
3 |
4 |
5 |
6 | Koy library for Lua
7 |
8 |
9 |
10 |
11 | ```lua
12 | local koy_parser = require("koy")
13 | local sample = [[
14 | person: {
15 | name: "Michael Theodor Mouse",
16 | age: 38
17 | }
18 | ]]
19 |
20 | local koy_table = koy_parser.decode(sample)
21 |
22 | for k,v in pairs(koy_table) do
23 | print(k .. ": " .. tostring(v))
24 | end
25 | ```
26 |
27 |
28 |
29 | ### ✨ Features
30 |
31 | + `decode()`: receives a Koy string and transforms it into a Lua table (object)
32 | + `encode()`: transforms Lua table (object) into valid Koy
33 |
34 |
35 |
36 | ### 🍣 Caveats
37 |
38 | + `null` values are stored as `"nil"` (in a string) because there is no way to represent them with their actual keyword (`nil`) in a Lua table. See [this](https://stackoverflow.com/questions/40441508/how-to-represent-nil-in-a-table) Stackoverflow question.
39 | + datatypes such as `obj`, `arr` and `nil` are not valid candidates for being casted
40 |
41 |
42 |
43 | ### 🔧 Testing
44 |
45 | There is currently just one very simple test that you can execute like so:
46 |
47 | ```bash
48 | lua test.lua
49 | ```
50 |
--------------------------------------------------------------------------------
/parsers/lua-parser/deps/inspect.lua:
--------------------------------------------------------------------------------
1 | local _tl_compat; if (tonumber((_VERSION or ''):match('[%d.]*$')) or 0) < 5.3 then local p, m = pcall(require, 'compat53.module'); if p then _tl_compat = m end end; local math = _tl_compat and _tl_compat.math or math; local string = _tl_compat and _tl_compat.string or string; local table = _tl_compat and _tl_compat.table or table
2 | local inspect = {Options = {}, }
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | inspect._VERSION = 'inspect.lua 3.1.0'
21 | inspect._URL = 'http://github.com/kikito/inspect.lua'
22 | inspect._DESCRIPTION = 'human-readable representations of tables'
23 | inspect._LICENSE = [[
24 | MIT LICENSE
25 |
26 | Copyright (c) 2022 Enrique García Cota
27 |
28 | Permission is hereby granted, free of charge, to any person obtaining a
29 | copy of this software and associated documentation files (the
30 | "Software"), to deal in the Software without restriction, including
31 | without limitation the rights to use, copy, modify, merge, publish,
32 | distribute, sublicense, and/or sell copies of the Software, and to
33 | permit persons to whom the Software is furnished to do so, subject to
34 | the following conditions:
35 |
36 | The above copyright notice and this permission notice shall be included
37 | in all copies or substantial portions of the Software.
38 |
39 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
40 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
42 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
43 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
44 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
45 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
46 | ]]
47 | inspect.KEY = setmetatable({}, { __tostring = function() return 'inspect.KEY' end })
48 | inspect.METATABLE = setmetatable({}, { __tostring = function() return 'inspect.METATABLE' end })
49 |
50 | local tostring = tostring
51 |
52 | local function rawpairs(t)
53 | return next, t, nil
54 | end
55 |
56 |
57 |
58 | local function smartQuote(str)
59 | if str:match('"') and not str:match("'") then
60 | return "'" .. str .. "'"
61 | end
62 | return '"' .. str:gsub('"', '\\"') .. '"'
63 | end
64 |
65 |
66 | local shortControlCharEscapes = {
67 | ["\a"] = "\\a", ["\b"] = "\\b", ["\f"] = "\\f", ["\n"] = "\\n",
68 | ["\r"] = "\\r", ["\t"] = "\\t", ["\v"] = "\\v", ["\127"] = "\\127",
69 | }
70 | local longControlCharEscapes = { ["\127"] = "\127" }
71 | for i = 0, 31 do
72 | local ch = string.char(i)
73 | if not shortControlCharEscapes[ch] then
74 | shortControlCharEscapes[ch] = "\\" .. i
75 | longControlCharEscapes[ch] = string.format("\\%03d", i)
76 | end
77 | end
78 |
79 | local function escape(str)
80 | return (str:gsub("\\", "\\\\"):
81 | gsub("(%c)%f[0-9]", longControlCharEscapes):
82 | gsub("%c", shortControlCharEscapes))
83 | end
84 |
85 | local function isIdentifier(str)
86 | return type(str) == "string" and not not str:match("^[_%a][_%a%d]*$")
87 | end
88 |
89 | local flr = math.floor
90 | local function isSequenceKey(k, sequenceLength)
91 | return type(k) == "number" and
92 | flr(k) == k and
93 | 1 <= (k) and
94 | k <= sequenceLength
95 | end
96 |
97 | local defaultTypeOrders = {
98 | ['number'] = 1, ['boolean'] = 2, ['string'] = 3, ['table'] = 4,
99 | ['function'] = 5, ['userdata'] = 6, ['thread'] = 7,
100 | }
101 |
102 | local function sortKeys(a, b)
103 | local ta, tb = type(a), type(b)
104 |
105 |
106 | if ta == tb and (ta == 'string' or ta == 'number') then
107 | return (a) < (b)
108 | end
109 |
110 | local dta, dtb = defaultTypeOrders[ta], defaultTypeOrders[tb]
111 |
112 | if dta and dtb then return defaultTypeOrders[ta] < defaultTypeOrders[tb]
113 | elseif dta then return true
114 | elseif dtb then return false
115 | end
116 |
117 |
118 | return ta < tb
119 | end
120 |
121 |
122 |
123 | local function getSequenceLength(t)
124 | local len = 1
125 | local v = rawget(t, len)
126 | while v ~= nil do
127 | len = len + 1
128 | v = rawget(t, len)
129 | end
130 | return len - 1
131 | end
132 |
133 | local function getNonSequentialKeys(t)
134 | local keys, keysLength = {}, 0
135 | local sequenceLength = getSequenceLength(t)
136 | for k, _ in rawpairs(t) do
137 | if not isSequenceKey(k, sequenceLength) then
138 | keysLength = keysLength + 1
139 | keys[keysLength] = k
140 | end
141 | end
142 | table.sort(keys, sortKeys)
143 | return keys, keysLength, sequenceLength
144 | end
145 |
146 | local function countTableAppearances(t, tableAppearances)
147 | tableAppearances = tableAppearances or {}
148 |
149 | if type(t) == "table" then
150 | if not tableAppearances[t] then
151 | tableAppearances[t] = 1
152 | for k, v in rawpairs(t) do
153 | countTableAppearances(k, tableAppearances)
154 | countTableAppearances(v, tableAppearances)
155 | end
156 | countTableAppearances(getmetatable(t), tableAppearances)
157 | else
158 | tableAppearances[t] = tableAppearances[t] + 1
159 | end
160 | end
161 |
162 | return tableAppearances
163 | end
164 |
165 | local function makePath(path, a, b)
166 | local newPath = {}
167 | local len = #path
168 | for i = 1, len do newPath[i] = path[i] end
169 |
170 | newPath[len + 1] = a
171 | newPath[len + 2] = b
172 |
173 | return newPath
174 | end
175 |
176 |
177 | local function processRecursive(process,
178 | item,
179 | path,
180 | visited)
181 | if item == nil then return nil end
182 | if visited[item] then return visited[item] end
183 |
184 | local processed = process(item, path)
185 | if type(processed) == "table" then
186 | local processedCopy = {}
187 | visited[item] = processedCopy
188 | local processedKey
189 |
190 | for k, v in rawpairs(processed) do
191 | processedKey = processRecursive(process, k, makePath(path, k, inspect.KEY), visited)
192 | if processedKey ~= nil then
193 | processedCopy[processedKey] = processRecursive(process, v, makePath(path, processedKey), visited)
194 | end
195 | end
196 |
197 | local mt = processRecursive(process, getmetatable(processed), makePath(path, inspect.METATABLE), visited)
198 | if type(mt) ~= 'table' then mt = nil end
199 | setmetatable(processedCopy, mt)
200 | processed = processedCopy
201 | end
202 | return processed
203 | end
204 |
205 |
206 |
207 |
208 |
209 | local Inspector = {}
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 | local Inspector_mt = { __index = Inspector }
221 |
222 | function Inspector:puts(a, b, c, d, e)
223 | local buffer = self.buffer
224 | local len = #buffer
225 | buffer[len + 1] = a
226 | buffer[len + 2] = b
227 | buffer[len + 3] = c
228 | buffer[len + 4] = d
229 | buffer[len + 5] = e
230 | end
231 |
232 | function Inspector:down(f)
233 | self.level = self.level + 1
234 | f()
235 | self.level = self.level - 1
236 | end
237 |
238 | function Inspector:tabify()
239 | self:puts(self.newline,
240 | string.rep(self.indent, self.level))
241 | end
242 |
243 | function Inspector:alreadyVisited(v)
244 | return self.ids[v] ~= nil
245 | end
246 |
247 | function Inspector:getId(v)
248 | local id = self.ids[v]
249 | if not id then
250 | local tv = type(v)
251 | id = (self.maxIds[tv] or 0) + 1
252 | self.maxIds[tv] = id
253 | self.ids[v] = id
254 | end
255 | return tostring(id)
256 | end
257 |
258 |
259 | function Inspector:putValue(_)
260 | end
261 |
262 | function Inspector:putKey(k)
263 | if isIdentifier(k) then
264 | self:puts(k)
265 | return
266 | end
267 | self:puts("[")
268 | self:putValue(k)
269 | self:puts("]")
270 | end
271 |
272 | function Inspector:putTable(t)
273 | if t == inspect.KEY or t == inspect.METATABLE then
274 | self:puts(tostring(t))
275 | elseif self:alreadyVisited(t) then
276 | self:puts('')
277 | elseif self.level >= self.depth then
278 | self:puts('{...}')
279 | else
280 | if self.tableAppearances[t] > 1 then self:puts('<', self:getId(t), '>') end
281 |
282 | local nonSequentialKeys, nonSequentialKeysLength, sequenceLength = getNonSequentialKeys(t)
283 | local mt = getmetatable(t)
284 |
285 | self:puts('{')
286 | self:down(function()
287 | local count = 0
288 | for i = 1, sequenceLength do
289 | if count > 0 then self:puts(',') end
290 | self:puts(' ')
291 | self:putValue(t[i])
292 | count = count + 1
293 | end
294 |
295 | for i = 1, nonSequentialKeysLength do
296 | local k = nonSequentialKeys[i]
297 | if count > 0 then self:puts(',') end
298 | self:tabify()
299 | self:putKey(k)
300 | self:puts(' = ')
301 | self:putValue(t[k])
302 | count = count + 1
303 | end
304 |
305 | if type(mt) == 'table' then
306 | if count > 0 then self:puts(',') end
307 | self:tabify()
308 | self:puts(' = ')
309 | self:putValue(mt)
310 | end
311 | end)
312 |
313 | if nonSequentialKeysLength > 0 or type(mt) == 'table' then
314 | self:tabify()
315 | elseif sequenceLength > 0 then
316 | self:puts(' ')
317 | end
318 |
319 | self:puts('}')
320 | end
321 | end
322 |
323 | function Inspector:putValue(v)
324 | local tv = type(v)
325 | if tv == 'string' then
326 | self:puts(smartQuote(escape(v)))
327 | elseif tv == 'number' or tv == 'boolean' or tv == 'nil' or
328 | tv == 'cdata' or tv == 'ctype' then
329 | self:puts(tostring(v))
330 | elseif tv == 'table' then
331 | self:putTable(v)
332 | else
333 | self:puts('<', tv, ' ', self:getId(v), '>')
334 | end
335 | end
336 |
337 |
338 |
339 |
340 | function inspect.inspect(root, options)
341 | options = options or {}
342 |
343 | local depth = options.depth or (math.huge)
344 | local newline = options.newline or '\n'
345 | local indent = options.indent or ' '
346 | local process = options.process
347 |
348 | if process then
349 | root = processRecursive(process, root, {}, {})
350 | end
351 |
352 | local inspector = setmetatable({
353 | depth = depth,
354 | level = 0,
355 | buffer = {},
356 | ids = {},
357 | maxIds = {},
358 | newline = newline,
359 | indent = indent,
360 | tableAppearances = countTableAppearances(root),
361 | }, Inspector_mt)
362 |
363 | inspector:putValue(root)
364 |
365 | return table.concat(inspector.buffer)
366 | end
367 |
368 | setmetatable(inspect, {
369 | __call = function(_, root, options)
370 | return inspect.inspect(root, options)
371 | end,
372 | })
373 |
374 | return inspect
375 |
--------------------------------------------------------------------------------
/parsers/lua-parser/samples/s1.koy:
--------------------------------------------------------------------------------
1 | /*
2 | Hello world! this is a simple Koy document
3 | and you are reading a multi-line comment :^)
4 | */
5 |
6 | // let's import some settings, shall we?
7 | import "./samples/s2.koy"
8 |
9 | title: "Koy Example"
10 |
11 | user: {
12 | name: "Michael Theodor Mouse",
13 | age: 23,
14 | married: true
15 | }
16 |
17 | ports: [ 8001, 8002, 8003 ]
18 |
--------------------------------------------------------------------------------
/parsers/lua-parser/samples/s2.koy:
--------------------------------------------------------------------------------
1 | env: {
2 | user: "pocco81",
3 | editor: "neovim"
4 | }
5 |
--------------------------------------------------------------------------------
/parsers/lua-parser/samples/s3.koy:
--------------------------------------------------------------------------------
1 | // this is s3.koy
2 | creation_date: 1892
3 |
--------------------------------------------------------------------------------
/parsers/lua-parser/samples/s4.koy:
--------------------------------------------------------------------------------
1 | // this is s4
2 | is_foss: true
3 |
--------------------------------------------------------------------------------
/parsers/lua-parser/src/lib.lua:
--------------------------------------------------------------------------------
1 | local KOY = {
2 | -- sets whether the parser should follow the KOY spec strictly
3 | -- currently, no errors are thrown for the following rules if strictness is turned off:
4 | -- tables having mixed keys
5 | -- redefining a table
6 | -- redefining a key within a table
7 | strict = true,
8 | }
9 |
10 | local escape_chars = {
11 | b = "\b",
12 | t = "\t",
13 | n = "\n",
14 | f = "\f",
15 | r = "\r",
16 | ['"'] = '"',
17 | ["\\"] = "\\",
18 | }
19 |
20 | function string.insert(str1, str2, pos)
21 | return str1:sub(1, pos) .. str2 .. str1:sub(pos + 1)
22 | end
23 |
24 | -- taken from: https://stackoverflow.com/questions/640642/how-do-you-copy-a-lua-table-by-value
25 | function table.shallow_copy(t)
26 | if type(t) == "table" then
27 | local t2 = {}
28 | for k, v in pairs(t) do
29 | t2[k] = v
30 | end
31 | return t2
32 | else
33 | return t
34 | end
35 | end
36 |
37 | local function dump_file(file)
38 | local f = assert(io.open(file, "rb"))
39 | local content = f:read("*all")
40 | f:close()
41 | return content
42 | end
43 |
44 | -- converts KOY data into a lua table
45 | function KOY.decode(koy, options)
46 | options = options or {}
47 | local strict = (options.strict ~= nil and options.strict or KOY.strict)
48 |
49 | if type(koy) ~= "string" then
50 | error("Error: expected argument of type 'string', got " .. type(koy))
51 | elseif koy == "" then
52 | return {}
53 | end
54 |
55 | -- the official KOY definition of whitespace
56 | local ws = "[\009\032]"
57 |
58 | -- the official KOY definition of newline
59 | local nl = "[\10"
60 | do
61 | local crlf = "\13\10"
62 | nl = nl .. crlf
63 | end
64 | nl = nl .. "]"
65 |
66 | local buffer = "" -- stores text dat
67 | local cursor = 1 -- the current location within the string to parse
68 | local out = {} -- the output table
69 | _G.obj = out -- the current table to write to
70 |
71 | -- ========= Standard Utilities
72 |
73 | -- -- produce a parsing error message
74 | -- the error contains the line number of the current position
75 | local function err(message, strictOnly)
76 | if not strictOnly or (strictOnly and strict) then
77 | local line = 1
78 | local c = 0
79 | for l in koy:gmatch("(.-)" .. nl) do
80 | c = c + l:len()
81 | if c >= cursor then
82 | break
83 | end
84 | line = line + 1
85 | end
86 | error("KOY: " .. message .. " on line " .. line .. ".", 4)
87 | end
88 | end
89 |
90 | --- returns the next n characters from the current position
91 | local function char(n)
92 | n = n or 0
93 | return koy:sub(cursor + n, cursor + n)
94 | end
95 |
96 | -- moves the current position forward n (default: 1) characters
97 | local function step(n)
98 | n = n or 1
99 | cursor = cursor + n
100 | end
101 |
102 | -- remove the (Lua) whitespace at the beginning and end of a string
103 | local function trim(str)
104 | return str:gsub("^%s*(.-)%s*$", "%1")
105 | end
106 |
107 | -- prevent infinite loops by checking whether the cursor is
108 | -- at the end of the document or not
109 | local function bounds()
110 | return cursor <= koy:len()
111 | end
112 |
113 | -- ========= PARSERS for unreal elements
114 | --- NOTE: Any parse_<.> func will parse the current char and any followign one that matches it
115 |
116 | local function parse_newline()
117 | while char():match(nl) do
118 | step()
119 | end
120 | end
121 |
122 | local function parse_tab()
123 | while char():match("[\t]") do
124 | step()
125 | end
126 | end
127 |
128 | local function parse_whitespace()
129 | while char():match(ws) do
130 | step()
131 | end
132 | end
133 |
134 | local function parse_sl_comment()
135 | if char() == "/" and char(1) == "/" then
136 | while not char():match(nl) do
137 | step()
138 | end
139 | end
140 | end
141 |
142 | local function parse_ml_comment()
143 | if char() == "/" and char(1) == "*" then
144 | -- find() returns two values but when operated on it uses only the first one
145 | step((koy:find("*/", cursor + 2) + 2) - cursor)
146 | end
147 | end
148 |
149 | -- track whether the current key was quoted or not
150 | local quoted_key = false
151 |
152 | -- elements that can be ignored
153 | local space_elements = {
154 | ["\n"] = parse_newline,
155 | ["\t"] = parse_tab,
156 | [" "] = parse_whitespace,
157 | [nl] = parse_newline,
158 | ["//"] = parse_sl_comment,
159 | ["/*"] = parse_ml_comment,
160 | }
161 |
162 | -- get current character, but do something special if it's a forward slash (/)
163 | local function get_current_element()
164 | if char() == "/" then
165 | return "/" .. char(1)
166 | end
167 |
168 | return char()
169 | end
170 |
171 | -- placeholder for a parser_func
172 | local unreal_element_parser_f
173 |
174 | local function not_real_element() -- real chars: anything that's not in the space_elements table
175 | local f = space_elements[get_current_element()]
176 | if f then
177 | unreal_element_parser_f = f
178 | return true
179 | end
180 | end
181 |
182 | --- skip any char in the `space_elements` table
183 | local function skip_unreal_elements()
184 | while not_real_element() do
185 | unreal_element_parser_f()
186 | -- unreal_element_parser_f = nil
187 | end
188 | end
189 |
190 | -- ========= PARSERS for real elements
191 |
192 | -- delcaring before because some of them need each other often
193 | local parse_string, parse_number, parse_array, parse_object, parse_constant, parse_variable, parse_datatype
194 | local cast_datatype, get_value
195 |
196 | local vmp = {} -- var_match_placeholder
197 | -- by definition, a variable in Koy looks like: ${var_name} and be escaped using \
198 | local function var_avaiable(str)
199 | --[[ NOTE:
200 | lua doesn't support proper regex, so the equivalent of:
201 | [^\\]\$\{(.+?)\}
202 | is in lua:
203 | [^\\]%$%{(.-)%}
204 | --]]
205 | -- match a literal $ that is after anything but a \, followed by a {, followed by any text until you find the first }
206 | local beginning, ending, match = str:find("[^\\]%$%{(.-)%}")
207 | if beginning ~= nil and ending ~= nil and match ~= "" then
208 | vmp = { [1] = beginning, [2] = ending, [3] = match }
209 | return true
210 | end
211 | return false
212 | end
213 |
214 | -- will parse all variables in a string if given or the variable as the value of the previous key if not (e.g. key: ${this})
215 | function parse_variable(str)
216 | -- in-string varaibles
217 | if str ~= nil then
218 | local parsed_str = str
219 | while var_avaiable(parsed_str) do
220 | local substitution = tostring(load("return obj." .. vmp[3])())
221 | if substitution == "nil" then
222 | err("bad substitution. Variable '" .. vmp[3] .. "' is not defined")
223 | end
224 | parsed_str = parsed_str:sub(1, vmp[1]) .. substitution .. parsed_str:sub(vmp[2] + 1, parsed_str:len())
225 | end
226 |
227 | return parsed_str
228 | else -- variable as value of a key
229 | local literal_var = ""
230 | if char(1) == "{" then
231 | step(2) -- steps over $ and {, ends up at .
232 | -- "%}" .. nl
233 | while bounds() do
234 | if char():match(nl) then
235 | err("Variables cannot have line breaks")
236 | elseif char():match("%}") then
237 | step()
238 | break
239 | else
240 | literal_var = literal_var .. char()
241 | step()
242 | end
243 | end
244 | end
245 | -- for some reason, loading the `obj` into string directly makes `substitution` a pointer
246 | -- (i.e. if we change a value on `substitution`, `obj` will also have that change)
247 | -- so here its loaded and then its values are copied into `substitution`
248 | -- shallow_copy() will also return the element if it's not a table
249 | local substitution = table.shallow_copy(load("return obj." .. literal_var)())
250 | if substitution == nil then
251 | err("bad substitution. Variable '" .. vmp[3] .. "' is not defined")
252 | end
253 |
254 | local substitution_type = type(substitution)
255 |
256 | if substitution_type == "table" then
257 | if not char():match(nl) then
258 | local overwrites
259 | while not char():match(nl) do
260 | if char() == "<" and char(1) == "<" then
261 | step(2) -- <<-
262 | skip_unreal_elements()
263 | if char() == "{" then
264 | overwrites = parse_object()
265 | -- check this
266 | break
267 | end
268 | end
269 | step()
270 | end
271 |
272 | for k, v in pairs(overwrites.value) do
273 | substitution[k] = v
274 | end
275 | end
276 | end
277 |
278 | return { value = substitution, type = substitution_type }
279 | end
280 | end
281 |
282 | function parse_string()
283 | local quoteType = char() -- should be single or double quote
284 |
285 | -- this is a multiline string if the next 2 characters match
286 | local multiline = (char(1) == char(2) and char(1) == char())
287 |
288 | -- buffer to hold the string
289 | local str = ""
290 |
291 | -- skip the quotes
292 | step(multiline and 3 or 1)
293 |
294 | while bounds() do
295 | if multiline and char():match(nl) and str == "" then
296 | step() -- skip line break line at the beginning of multiline string
297 | end
298 |
299 | -- keep going until we encounter the quote character again
300 | if char() == quoteType then
301 | if multiline then
302 | if char(1) == char(2) and char(1) == quoteType then
303 | step(3)
304 | break
305 | end
306 | else
307 | step()
308 | break
309 | end
310 | end
311 |
312 | if char():match(nl) and not multiline then
313 | err("Single-line string cannot contain line break")
314 | end
315 |
316 | -- if we're in a double-quoted string, watch for escape characters!
317 | if quoteType == '"' and char() == "\\" then
318 | if multiline and char(1):match(nl) then
319 | -- skip until first non-whitespace character
320 | step(1) -- go past the line break
321 | while bounds() do
322 | if not char():match(ws) and not char():match(nl) then
323 | break
324 | end
325 | step()
326 | end
327 | else
328 | -- utf function from http://stackoverflow.com/a/26071044
329 | -- converts \uXXX into actual unicode
330 | local function utf(char)
331 | local bytemarkers = { { 0x7ff, 192 }, { 0xffff, 224 }, { 0x1fffff, 240 } }
332 | if char < 128 then
333 | return string.char(char)
334 | end
335 | local charbytes = {}
336 | for bytes, vals in pairs(bytemarkers) do
337 | if char <= vals[1] then
338 | for b = bytes + 1, 2, -1 do
339 | local mod = char % 64
340 | char = (char - mod) / 64
341 | charbytes[b] = string.char(128 + mod)
342 | end
343 | charbytes[1] = string.char(vals[2] + char)
344 | break
345 | end
346 | end
347 | return table.concat(charbytes)
348 | end
349 |
350 | if escape_chars[char(1)] then
351 | -- normal escape
352 | str = str .. escape_chars[char(1)]
353 | step(2) -- go past backslash and the character
354 | elseif char(1) == "u" then
355 | -- utf-16
356 | step()
357 | local uni = char(1) .. char(2) .. char(3) .. char(4)
358 | step(5)
359 | uni = tonumber(uni, 16)
360 | if (uni >= 0 and uni <= 0xd7ff) and not (uni >= 0xe000 and uni <= 0x10ffff) then
361 | str = str .. utf(uni)
362 | else
363 | err("Unicode escape is not a Unicode scalar")
364 | end
365 | elseif char(1) == "U" then
366 | -- utf-32
367 | step()
368 | local uni = char(1) .. char(2) .. char(3) .. char(4) .. char(5) .. char(6) .. char(7) .. char(8)
369 | step(9)
370 | uni = tonumber(uni, 16)
371 | if (uni >= 0 and uni <= 0xd7ff) and not (uni >= 0xe000 and uni <= 0x10ffff) then
372 | str = str .. utf(uni)
373 | else
374 | err("Unicode escape is not a Unicode scalar")
375 | end
376 | else
377 | err("Invalid escape")
378 | end
379 | end
380 | else
381 | -- if we're not in a double-quoted string, just append it to our buffer raw and keep going
382 | str = str .. char()
383 | -- print("char: .. '" .. char() .. "'\t" .. str)
384 | step()
385 | end
386 | end
387 |
388 | -- return { value = str, type = "string" }
389 | return { value = parse_variable(str), type = "string" }
390 | end
391 |
392 | function parse_number()
393 | local num = ""
394 | local exp
395 | local date = false
396 | while bounds() do
397 | if char():match("[%+%-%.eE_0-9]") then
398 | if not exp then
399 | if char():lower() == "e" then
400 | -- as soon as we reach e or E, start appending to exponent buffer instead of
401 | -- number buffer
402 | exp = ""
403 | elseif char() ~= "_" then
404 | num = num .. char()
405 | end
406 | elseif char():match("[%+%-0-9]") then
407 | exp = exp .. char()
408 | else
409 | err("Invalid exponent")
410 | end
411 | elseif
412 | char():match(ws)
413 | or char() == "#"
414 | or char():match(nl)
415 | or char() == ","
416 | or char() == "]"
417 | or char() == "}"
418 | then
419 | break
420 | elseif char() == "T" or char() == "Z" then
421 | -- parse the date (as a string, since lua has no date object)
422 | date = true
423 | while bounds() do
424 | if char() == "," or char() == "]" or char() == "#" or char():match(nl) or char():match(ws) then
425 | break
426 | end
427 | num = num .. char()
428 | step()
429 | end
430 | else
431 | err("Invalid number")
432 | end
433 | step()
434 | end
435 |
436 | if date then
437 | return { value = num, type = "date" }
438 | end
439 |
440 | local float = false
441 | if num:match("%.") then
442 | float = true
443 | end
444 |
445 | exp = exp and tonumber(exp) or 0
446 | num = tonumber(num)
447 |
448 | if not float then
449 | return {
450 | -- lua will automatically convert the result
451 | -- of a power operation to a float, so we have
452 | -- to convert it back to an int with math.floor
453 | value = math.floor(num * 10 ^ exp),
454 | type = "int",
455 | }
456 | end
457 |
458 | return { value = num * 10 ^ exp, type = "float" }
459 | end
460 |
461 | function parse_array()
462 | step() -- skip [
463 | parse_whitespace()
464 |
465 | local arrayType
466 | local array = {}
467 |
468 | while bounds() do
469 | if char() == "]" then
470 | break
471 | elseif char():match(nl) then
472 | -- skip
473 | step()
474 | parse_whitespace()
475 | elseif char() == "#" then
476 | while bounds() and not char():match(nl) do
477 | step()
478 | end
479 | else
480 | -- get the next object in the array
481 | local v = get_value()
482 | if not v then
483 | break
484 | end
485 |
486 | -- set the type if it hasn't been set before
487 | if arrayType == nil then
488 | arrayType = v.type
489 | elseif arrayType ~= v.type then
490 | err("Mixed types in array", true)
491 | end
492 |
493 | array = array or {}
494 | table.insert(array, v.value)
495 |
496 | if char() == "," then
497 | step()
498 | end
499 | parse_whitespace()
500 | end
501 | end
502 | step()
503 |
504 | return { value = array, type = "array" }
505 | end
506 |
507 | function parse_object()
508 | step() -- skip opening brace
509 |
510 | local buffer = ""
511 | local quoted = false
512 | local tbl = {}
513 |
514 | while bounds() do
515 | if char() == "}" then
516 | break
517 | elseif char() == "'" or char() == '"' then
518 | buffer = parse_string().value
519 | quoted = true
520 | elseif char() == ":" then
521 | if not quoted then
522 | buffer = trim(buffer)
523 | end
524 |
525 | step() -- skip :
526 | local data_type = parse_datatype()
527 | parse_whitespace()
528 | parse_ml_comment()
529 | parse_whitespace()
530 |
531 | -- if char():match(nl) then
532 | -- err("Newline in inline table")
533 | -- end
534 |
535 | local v = get_value().value
536 |
537 | if v then
538 | local value = v
539 | if data_type ~= nil then
540 | local casted_type = cast_datatype(value, data_type)
541 | if casted_type ~= nil then
542 | value = casted_type
543 | end
544 | end
545 |
546 | tbl[buffer] = value
547 | end
548 |
549 | parse_whitespace()
550 |
551 | if char() == "," then
552 | step()
553 | -- elseif char():match(nl) then
554 | -- err("Newline in inline table")
555 | end
556 |
557 | quoted = false
558 | buffer = ""
559 | else
560 | buffer = buffer .. char()
561 | step()
562 | end
563 | end
564 | step() -- skip closing brace
565 |
566 | return { value = tbl, type = "array" }
567 | end
568 |
569 | -- parses: true, false and null
570 | function parse_constant()
571 | local v
572 | if koy:sub(cursor, cursor + 3) == "null" then
573 | step(4)
574 | -- returning "nil" instead of nil:
575 | -- https://stackoverflow.com/questions/40441508/how-to-represent-nil-in-a-table
576 | v = { value = "nil", type = "nil" }
577 | elseif koy:sub(cursor, cursor + 3) == "true" then
578 | step(4)
579 | v = { value = true, type = "boolean" }
580 | elseif koy:sub(cursor, cursor + 4) == "false" then
581 | step(5)
582 | v = { value = false, type = "boolean" }
583 | else
584 | err("Invalid constant")
585 | end
586 |
587 | parse_whitespace()
588 | if char() == "#" then
589 | while not char():match(nl) do
590 | step()
591 | end
592 | end
593 |
594 | return v
595 | end
596 |
597 | -- get accurate datatype
598 | local function datatype(var)
599 | local type = type(var)
600 | if type == "number" then
601 | return math.type(var)
602 | end
603 | return type
604 | end
605 |
606 | -- cast datatypes
607 | function cast_datatype(data, to_type)
608 | local t = datatype(data) -- actual data type
609 |
610 | if t == to_type then
611 | return data
612 | elseif to_type == "string" then
613 | if t == "table" then
614 | local function dump(o)
615 | if type(o) == "table" then
616 | local s = "{"
617 | for k, v in pairs(o) do
618 | if type(k) ~= "number" then
619 | k = '"' .. k .. '"'
620 | end
621 | s = s .. " [" .. k .. "] = " .. dump(v) .. ","
622 | end
623 | return s .. " }"
624 | else
625 | return tostring(o)
626 | end
627 | end
628 | return dump(data)
629 | else
630 | return tostring(data)
631 | end
632 | elseif to_type == "integer" then
633 | local good, retval = pcall(tonumber, data)
634 | if good then
635 | return math.floor(retval) -- becuase we might get a float back
636 | end
637 | elseif to_type == "boolean" then
638 | if data == "true" then
639 | return true
640 | elseif data == "false" then
641 | return false
642 | end
643 | elseif to_type == "float" then
644 | local good, retval = pcall(tonumber, data)
645 | if good then
646 | return retval + 0.0
647 | end
648 | end
649 |
650 | return nil -- undefined
651 | end
652 |
653 | -- accepted datatypes: int, str, flt, bool
654 | function parse_datatype()
655 | local v -- lua equivalent
656 | if koy:sub(cursor, cursor + 2) == "int" then
657 | step(3)
658 | v = "integer"
659 | elseif koy:sub(cursor, cursor + 2) == "flt" then
660 | step(3)
661 | v = "float"
662 | elseif koy:sub(cursor, cursor + 2) == "str" then
663 | step(3)
664 | v = "string"
665 | elseif koy:sub(cursor, cursor + 3) == "bool" then
666 | step(4)
667 | v = "boolean"
668 | end
669 |
670 | return v
671 | end
672 |
673 | -- figure out the type and get the next value in the document
674 | function get_value()
675 | if char() == '"' or char() == "'" then
676 | return parse_string()
677 | elseif char():match("[%+%-0-9]") then
678 | return parse_number()
679 | elseif char():match("%$") then
680 | return parse_variable()
681 | elseif char() == "[" then
682 | return parse_array()
683 | elseif char() == "{" then
684 | return parse_object()
685 | else
686 | return parse_constant()
687 | end
688 | -- date regex (for possible future support):
689 | -- %d%d%d%d%-[0-1][0-9]%-[0-3][0-9]T[0-2][0-9]%:[0-6][0-9]%:[0-6][0-9][Z%:%+%-%.0-9]*
690 | end
691 |
692 | -- track whether the current key was quoted or not
693 | local quoted_key = false
694 |
695 | local function is_iterable()
696 | if cursor <= koy:len() then
697 | return true
698 | end
699 | return false
700 | end
701 |
702 | -- run over the string while possible
703 | while is_iterable() do
704 | skip_unreal_elements()
705 |
706 | if char() == ":" then -- variable
707 | -- expect whitespaces with multilinecomments
708 | step()
709 | local data_type = parse_datatype()
710 | parse_whitespace()
711 | parse_ml_comment()
712 | parse_whitespace()
713 |
714 | -- trim key name
715 | buffer = trim(buffer)
716 |
717 | if buffer:match("^[0-9]*$") and not quoted_key then
718 | buffer = tonumber(buffer)
719 | end
720 |
721 | if buffer == "" and not quoted_key then
722 | err("Empty key name")
723 | end
724 |
725 | local v = get_value()
726 |
727 | if v then
728 | -- if the key already exists in the current object, throw an error
729 | if obj[buffer] then
730 | err('Cannot redefine key "' .. buffer .. '"', true)
731 | end
732 |
733 | local value = v.value
734 |
735 | if data_type ~= nil then
736 | local casted_type = cast_datatype(value, data_type)
737 | if casted_type ~= nil then
738 | value = casted_type
739 | end
740 | end
741 |
742 | obj[buffer] = value
743 | end
744 |
745 | -- clear the buffer
746 | buffer = ""
747 | quoted_key = false
748 |
749 | -- skip whitespace and comments
750 | parse_whitespace()
751 | if char() == "/" and char(1) == "/" then
752 | while bounds() and not char():match(nl) do
753 | step()
754 | end
755 | end
756 |
757 | -- if there is anything left on this line after parsing a key and its value,
758 | -- throw an error
759 | if not char():match(nl) and cursor < koy:len() then
760 | err("Invalid primitive")
761 | end
762 | elseif char() == "[" then -- array
763 | buffer = ""
764 | step()
765 | local tableArray = false
766 |
767 | -- if there are two brackets in a row, it's a table array!
768 | if char() == "[" then
769 | tableArray = true
770 | step()
771 | end
772 |
773 | obj = out
774 |
775 | local function processKey(isLast)
776 | isLast = isLast or false
777 | buffer = trim(buffer)
778 |
779 | if not quoted_key and buffer == "" then
780 | err("Empty table name")
781 | end
782 |
783 | if isLast and obj[buffer] and not tableArray and #obj[buffer] > 0 then
784 | err("Cannot redefine table", true)
785 | end
786 |
787 | -- set obj to the appropriate table so we can start
788 | -- filling it with values!
789 | if tableArray then
790 | -- push onto cache
791 | if obj[buffer] then
792 | obj = obj[buffer]
793 | if isLast then
794 | table.insert(obj, {})
795 | end
796 | obj = obj[#obj]
797 | else
798 | obj[buffer] = {}
799 | obj = obj[buffer]
800 | if isLast then
801 | table.insert(obj, {})
802 | obj = obj[1]
803 | end
804 | end
805 | else
806 | obj[buffer] = obj[buffer] or {}
807 | obj = obj[buffer]
808 | end
809 | end
810 |
811 | while bounds() do
812 | if char() == "]" then
813 | if tableArray then
814 | if char(1) ~= "]" then
815 | err("Mismatching brackets")
816 | else
817 | step() -- skip inside bracket
818 | end
819 | end
820 | step() -- skip outside bracket
821 |
822 | processKey(true)
823 | buffer = ""
824 | break
825 | elseif char() == '"' or char() == "'" then
826 | buffer = parse_string().value
827 | quoted_key = true
828 | elseif char() == "." then
829 | step() -- skip period
830 | processKey()
831 | buffer = ""
832 | else
833 | buffer = buffer .. char()
834 | step()
835 | end
836 | end
837 |
838 | buffer = ""
839 | quoted_key = false
840 | elseif char() == '"' or char() == "'" then
841 | -- quoted key
842 | buffer = parse_string().value
843 | quoted_key = true
844 | end
845 |
846 | buffer = buffer .. (char():match(nl) and "" or char())
847 | -- print("buffer (" .. cursor .. "): '" .. buffer .. "'")
848 | step()
849 |
850 | -- assert keywords
851 | if buffer == "import" then
852 | parse_whitespace()
853 | parse_ml_comment()
854 | parse_whitespace()
855 |
856 | if char() == '"' then --single file import
857 | local file_path = parse_string()["value"]
858 | -- https://stackoverflow.com/questions/59561776/how-do-i-insert-a-string-into-another-string-in-lua
859 | koy = koy:sub(1, cursor) .. "\n" .. dump_file(file_path) .. koy:sub(cursor + 1)
860 | buffer = ""
861 | elseif char() == "{" then -- multiline import
862 | step() -- skip opening brace
863 | local file_paths = {}
864 |
865 | while bounds() do
866 | if char() == "'" or char() == '"' then
867 | file_paths[#file_paths + 1] = parse_string().value
868 | elseif char() == "," then
869 | step()
870 | skip_unreal_elements()
871 | elseif char() == "}" then
872 | break
873 | else
874 | skip_unreal_elements()
875 | end
876 | end
877 | step() -- skip closing brace
878 |
879 | local length_until_file = cursor
880 | for _, file_path in pairs(file_paths) do
881 | local file = dump_file(file_path)
882 | koy = koy:sub(1, length_until_file) .. "\n" .. file .. koy:sub(length_until_file + 1)
883 | length_until_file = length_until_file + file:len()
884 | end
885 | buffer = ""
886 | end
887 | end
888 | end
889 |
890 | _G.obj = nil -- unset obj so as to not pollute the global scope
891 | return out
892 | end
893 |
894 | function KOY.encode(tbl)
895 | local koy = ""
896 |
897 | local cache = {}
898 |
899 | local function parse(tbl)
900 | for k, v in pairs(tbl) do
901 | if type(v) == "boolean" then
902 | koy = koy .. k .. " = " .. tostring(v) .. "\n"
903 | elseif type(v) == "number" then
904 | koy = koy .. k .. " = " .. tostring(v) .. "\n"
905 | elseif type(v) == "string" then
906 | local quote = '"'
907 | v = v:gsub("\\", "\\\\")
908 |
909 | -- if the string has any line breaks, make it multiline
910 | if v:match("^\n(.*)$") then
911 | quote = quote:rep(3)
912 | v = "\\n" .. v
913 | elseif v:match("\n") then
914 | quote = quote:rep(3)
915 | end
916 |
917 | v = v:gsub("\b", "\\b")
918 | v = v:gsub("\t", "\\t")
919 | v = v:gsub("\f", "\\f")
920 | v = v:gsub("\r", "\\r")
921 | v = v:gsub('"', '\\"')
922 | v = v:gsub("/", "\\/")
923 | koy = koy .. k .. " = " .. quote .. v .. quote .. "\n"
924 | elseif type(v) == "table" then
925 | local array, arrayTable = true, true
926 | local first = {}
927 | for kk, vv in pairs(v) do
928 | if type(kk) ~= "number" then
929 | array = false
930 | end
931 | if type(vv) ~= "table" then
932 | v[kk] = nil
933 | first[kk] = vv
934 | arrayTable = false
935 | end
936 | end
937 |
938 | if array then
939 | if arrayTable then
940 | -- double bracket syntax go!
941 | table.insert(cache, k)
942 | for kk, vv in pairs(v) do
943 | koy = koy .. "[[" .. table.concat(cache, ".") .. "]]\n"
944 | for k3, v3 in pairs(vv) do
945 | if type(v3) ~= "table" then
946 | vv[k3] = nil
947 | first[k3] = v3
948 | end
949 | end
950 | parse(first)
951 | parse(vv)
952 | end
953 | table.remove(cache)
954 | else
955 | -- plain ol boring array
956 | koy = koy .. k .. " = [\n"
957 | for kk, vv in pairs(first) do
958 | koy = koy .. tostring(vv) .. ",\n"
959 | end
960 | koy = koy .. "]\n"
961 | end
962 | else
963 | -- just a key/value table, folks
964 | table.insert(cache, k)
965 | koy = koy .. "[" .. table.concat(cache, ".") .. "]\n"
966 | parse(first)
967 | parse(v)
968 | table.remove(cache)
969 | end
970 | end
971 | end
972 | end
973 |
974 | parse(tbl)
975 |
976 | return koy:sub(1, -2)
977 | end
978 |
979 | return KOY
980 |
--------------------------------------------------------------------------------
/parsers/lua-parser/src/utils.lua:
--------------------------------------------------------------------------------
1 | local M = {}
2 |
3 | -- https://stackoverflow.com/questions/9168058/how-to-dump-a-table-to-console
4 | function M.dump_table(o)
5 | if type(o) == 'table' then
6 | local s = '{ '
7 | for k,v in pairs(o) do
8 | if type(k) ~= 'number' then k = '"'..k..'"' end
9 | s = s .. '['..k..'] = ' .. M.dump_table(v) .. ','
10 | end
11 | return s .. '} '
12 | else
13 | return tostring(o)
14 | end
15 | end
16 |
17 | function M.dump_file(file)
18 | local f = assert(io.open(file, "rb"))
19 | local content = f:read("*all")
20 | f:close()
21 | return content
22 | end
23 |
24 | function M.tprint (tbl, indent)
25 | if not indent then indent = 0 end
26 | for k, v in pairs(tbl) do
27 | formatting = string.rep(" ", indent) .. k .. ": "
28 | if type(v) == "table" then
29 | print(formatting)
30 | tprint(v, indent+1)
31 | elseif type(v) == 'boolean' then
32 | print(formatting .. tostring(v))
33 | else
34 | print(formatting .. v)
35 | end
36 | end
37 | end
38 |
39 | return M
40 |
--------------------------------------------------------------------------------
/parsers/lua-parser/test.lua:
--------------------------------------------------------------------------------
1 | local koy_parser = require("src.lib")
2 | local inspect = require("deps.inspect")
3 |
4 | local koy_sample = require("src.utils").dump_file("./samples/s1.koy")
5 | local parsed_koy = koy_parser.decode(koy_sample)
6 |
7 | -- NOTE: when using inspector, tables that have the same content will be represented by pointers:
8 | --[[ for example:
9 | {
10 | person1 = <1>{
11 | age = 121,
12 | name = "Michael"
13 | },
14 | person2 =
15 | }
16 | --]]
17 |
18 | print(inspect(parsed_koy))
19 |
--------------------------------------------------------------------------------