├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── doc ├── functions.org ├── paths.org └── readme.org ├── examples ├── fibonacci.bauge ├── fizzbuzz.bauge ├── hello_world.bauge ├── loops.bauge └── test.bauge ├── ideas.org ├── readme.org └── src ├── frontend ├── coordinates.rs ├── lexer │ ├── function_box.rs │ ├── keyword.rs │ ├── mod.rs │ └── path.rs └── mod.rs ├── lib.rs ├── logging └── mod.rs └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /logs -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "anstream" 7 | version = "0.6.18" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" 10 | dependencies = [ 11 | "anstyle", 12 | "anstyle-parse", 13 | "anstyle-query", 14 | "anstyle-wincon", 15 | "colorchoice", 16 | "is_terminal_polyfill", 17 | "utf8parse", 18 | ] 19 | 20 | [[package]] 21 | name = "anstyle" 22 | version = "1.0.10" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" 25 | 26 | [[package]] 27 | name = "anstyle-parse" 28 | version = "0.2.6" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" 31 | dependencies = [ 32 | "utf8parse", 33 | ] 34 | 35 | [[package]] 36 | name = "anstyle-query" 37 | version = "1.1.2" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" 40 | dependencies = [ 41 | "windows-sys", 42 | ] 43 | 44 | [[package]] 45 | name = "anstyle-wincon" 46 | version = "3.0.7" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" 49 | dependencies = [ 50 | "anstyle", 51 | "once_cell", 52 | "windows-sys", 53 | ] 54 | 55 | [[package]] 56 | name = "bauge" 57 | version = "0.1.0" 58 | dependencies = [ 59 | "clap", 60 | "textplots", 61 | ] 62 | 63 | [[package]] 64 | name = "bytemuck" 65 | version = "1.21.0" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" 68 | 69 | [[package]] 70 | name = "clap" 71 | version = "4.5.30" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "92b7b18d71fad5313a1e320fa9897994228ce274b60faa4d694fe0ea89cd9e6d" 74 | dependencies = [ 75 | "clap_builder", 76 | "clap_derive", 77 | ] 78 | 79 | [[package]] 80 | name = "clap_builder" 81 | version = "4.5.30" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "a35db2071778a7344791a4fb4f95308b5673d219dee3ae348b86642574ecc90c" 84 | dependencies = [ 85 | "anstream", 86 | "anstyle", 87 | "clap_lex", 88 | "strsim", 89 | ] 90 | 91 | [[package]] 92 | name = "clap_derive" 93 | version = "4.5.28" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" 96 | dependencies = [ 97 | "heck", 98 | "proc-macro2", 99 | "quote", 100 | "syn", 101 | ] 102 | 103 | [[package]] 104 | name = "clap_lex" 105 | version = "0.7.4" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" 108 | 109 | [[package]] 110 | name = "colorchoice" 111 | version = "1.0.3" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" 114 | 115 | [[package]] 116 | name = "colored" 117 | version = "2.2.0" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" 120 | dependencies = [ 121 | "lazy_static", 122 | "windows-sys", 123 | ] 124 | 125 | [[package]] 126 | name = "drawille" 127 | version = "0.3.0" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "e64e461c3f1e69d99372620640b3fd5f0309eeda2e26e4af69f6760c0e1df845" 130 | dependencies = [ 131 | "colored", 132 | "fnv", 133 | ] 134 | 135 | [[package]] 136 | name = "fnv" 137 | version = "1.0.7" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 140 | 141 | [[package]] 142 | name = "heck" 143 | version = "0.5.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 146 | 147 | [[package]] 148 | name = "is_terminal_polyfill" 149 | version = "1.70.1" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 152 | 153 | [[package]] 154 | name = "lazy_static" 155 | version = "1.5.0" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 158 | 159 | [[package]] 160 | name = "once_cell" 161 | version = "1.20.3" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" 164 | 165 | [[package]] 166 | name = "proc-macro2" 167 | version = "1.0.93" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" 170 | dependencies = [ 171 | "unicode-ident", 172 | ] 173 | 174 | [[package]] 175 | name = "quote" 176 | version = "1.0.38" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" 179 | dependencies = [ 180 | "proc-macro2", 181 | ] 182 | 183 | [[package]] 184 | name = "rgb" 185 | version = "0.8.50" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" 188 | dependencies = [ 189 | "bytemuck", 190 | ] 191 | 192 | [[package]] 193 | name = "strsim" 194 | version = "0.11.1" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 197 | 198 | [[package]] 199 | name = "syn" 200 | version = "2.0.98" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" 203 | dependencies = [ 204 | "proc-macro2", 205 | "quote", 206 | "unicode-ident", 207 | ] 208 | 209 | [[package]] 210 | name = "textplots" 211 | version = "0.8.7" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "8f7657a0066c9f9663659db0665319adff8b0943305fc73eddf1010e5a2072b1" 214 | dependencies = [ 215 | "drawille", 216 | "rgb", 217 | ] 218 | 219 | [[package]] 220 | name = "unicode-ident" 221 | version = "1.0.17" 222 | source = "registry+https://github.com/rust-lang/crates.io-index" 223 | checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" 224 | 225 | [[package]] 226 | name = "utf8parse" 227 | version = "0.2.2" 228 | source = "registry+https://github.com/rust-lang/crates.io-index" 229 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 230 | 231 | [[package]] 232 | name = "windows-sys" 233 | version = "0.59.0" 234 | source = "registry+https://github.com/rust-lang/crates.io-index" 235 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 236 | dependencies = [ 237 | "windows-targets", 238 | ] 239 | 240 | [[package]] 241 | name = "windows-targets" 242 | version = "0.52.6" 243 | source = "registry+https://github.com/rust-lang/crates.io-index" 244 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 245 | dependencies = [ 246 | "windows_aarch64_gnullvm", 247 | "windows_aarch64_msvc", 248 | "windows_i686_gnu", 249 | "windows_i686_gnullvm", 250 | "windows_i686_msvc", 251 | "windows_x86_64_gnu", 252 | "windows_x86_64_gnullvm", 253 | "windows_x86_64_msvc", 254 | ] 255 | 256 | [[package]] 257 | name = "windows_aarch64_gnullvm" 258 | version = "0.52.6" 259 | source = "registry+https://github.com/rust-lang/crates.io-index" 260 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 261 | 262 | [[package]] 263 | name = "windows_aarch64_msvc" 264 | version = "0.52.6" 265 | source = "registry+https://github.com/rust-lang/crates.io-index" 266 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 267 | 268 | [[package]] 269 | name = "windows_i686_gnu" 270 | version = "0.52.6" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 273 | 274 | [[package]] 275 | name = "windows_i686_gnullvm" 276 | version = "0.52.6" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 279 | 280 | [[package]] 281 | name = "windows_i686_msvc" 282 | version = "0.52.6" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 285 | 286 | [[package]] 287 | name = "windows_x86_64_gnu" 288 | version = "0.52.6" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 291 | 292 | [[package]] 293 | name = "windows_x86_64_gnullvm" 294 | version = "0.52.6" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 297 | 298 | [[package]] 299 | name = "windows_x86_64_msvc" 300 | version = "0.52.6" 301 | source = "registry+https://github.com/rust-lang/crates.io-index" 302 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 303 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | edition = "2024" 3 | name = "bauge" 4 | version = "0.1.0" 5 | 6 | [dependencies] 7 | clap = {version = "4.5.29", features = ["derive"]} 8 | textplots = "0.8.7" 9 | -------------------------------------------------------------------------------- /doc/functions.org: -------------------------------------------------------------------------------- 1 | #+Title: Functions 2 | 3 | *Functions* are the main way to work on the data stored on [[./paths.org][Paths]]. They 4 | can be user-defined, and there are some built-in functions in the 5 | standard library. 6 | 7 | * Function application 8 | Functions are applied on Paths by writing the functions and the 9 | potential arguments on them. For example, this path gets applied the 10 | function =foo= to its stored value. 11 | 12 | #+Begin_src bauge 13 | 20 ─────── foo ─────── #(contains foo(20))# 14 | #+End_src 15 | 16 | The same way as the initial values for a path, function calls should 17 | be written on the same line if the path is horizontal; or, if the path 18 | is vertical, the name should have at least one character between the 19 | two ends of the line. So, these function calls are valid: 20 | 21 | #+Begin_src bauge 22 | 12 ──────── foo ───────┐ 23 | │ 24 | bar 25 | └────── 26 | #+End_src 27 | 28 | But these are not: 29 | 30 | #+Begin_src bauge 31 | foo 32 | 12 ────────────────────┐ 33 | │ 34 | bar 35 | └────── 36 | #+End_src 37 | 38 | ** Arguments 39 | Functions can take /arguments/. Arguments are mainly supplied to the 40 | functions via the values the Path holds. They are passed to the 41 | function in order: if a Path holds (20, "bar"), then a function taking 42 | two parameters will get 20 as the first parameter, and "bar" as the 43 | second. 44 | 45 | #+Begin_src bauge 46 | 20 ─────┐ 47 | │ 48 | "bar" ──┴─────── foo ─────── 49 | #+End_src 50 | 51 | It is possible for a function to be applied on a path with more values 52 | than the function has arguments. In this case, only the first 53 | arguments that fit the function's definition will be used. For 54 | example, is =foo()= only takes one argument, only the first value on the 55 | Path is used: 56 | 57 | #+Begin_src bauge 58 | 20 ─────┐ 59 | │ 60 | "bar" ──┴─────── foo ─────── #(only 20 is used)# 61 | #+End_src 62 | 63 | It is also possible to provide /immediate arguments/; that is, provide 64 | directly a value to an argument that isn't on the Path. In this case, 65 | the use of the placeholder =_= is needed to signal at which position the 66 | values on the Path should be. Let's look at an example to understand 67 | this better. Let's say we have the function =+=: it takes 2 arguments 68 | and returns the addition of these two arguments. Let's say we want to 69 | have a Bauge program that increments a value. We could do it the 70 | "traditional way": 71 | 72 | #+Begin_src bauge 73 | 1 ────┐ 74 | │ 75 | ╾─────┴──── + ────── #(contains the incremented argument)# 76 | #+End_src 77 | 78 | But getting all the arguments on the Path before adding is 79 | cumbersome. Since it is a constant, we can directly use =1= on the 80 | function invocation: 81 | 82 | #+Begin_src bauge 83 | ╾─────── + 1 ────── #(uses an immediate argument)# 84 | #+End_src 85 | 86 | Doing so means that the 1 takes the first argument position: we are 87 | computing =1 + argument=. 88 | 89 | Now let's try to decrement the value. Easy, let's replace the =+= with a 90 | =-=: 91 | 92 | #+Begin_src bauge 93 | ╾─────── - 1 ────── 94 | #+End_src 95 | 96 | Remember what the immediate argument does: it takes precedence over 97 | the variables on the Path! So what we're really computing is... =1 - 98 | argument=. This isn't a decremented value at all! 99 | 100 | As we said before, we need the placeholder =_= character. This indicates 101 | that the particular argument should be taken in priority by values on 102 | the Path. With that, we can write our decrementing program: 103 | 104 | #+Begin_src bauge 105 | ╾─────── - _ 1 ────── #(contains the decremented value!)# 106 | #+End_src 107 | 108 | Note: this way of writing mathematical operations is called the /Polish 109 | Notation/. Another famous way of writing such operations is the /Reverse 110 | Polish Notation/: =1 2 += would mean =1 + 2= in this system. 111 | 112 | It is also possible to call a function without using arguments from 113 | the Path at all if all the arguments are provided as immediate 114 | arguments. 115 | 116 | ** Returned value 117 | A function *completely replaces* the values on the Path with its return 118 | value. Nothing is kept from the time before the function call: a Path 119 | only contains the return value of the function call after it has been 120 | called. One must be careful when calling functions; if there is a need 121 | to keep the other values on the Path, a junction out then in does the 122 | trick: 123 | 124 | #+Begin_src bauge 125 | 20 ──────┬──────────┬──── #(now contains 20, 30)# 126 | └── + 10 ──┘ 127 | #+End_src 128 | 129 | * Defining functions 130 | Users can define functions in their programs, in order to use them 131 | later. Functions are basically Bauge code blocks, enclosed in a /box/, 132 | with eventually some documentation. A function has a /signature/, 133 | describing the types of the arguments. 134 | 135 | ** The function box 136 | Functions are defined inside *boxes*. These are written using heavy box 137 | characters, and define the /area/ in which the function is 138 | written. A box is always rectangular. 139 | 140 | | Character | UTF-8 Codepoint | Unicode name | 141 | |-------------+-----------------+--------------------------------------------| 142 | | =┏= | U+250F | BOX DRAWINGS HEAVY DOWN AND RIGHT | 143 | | =┓= | U+2513 | BOX DRAWINGS HEAVY DOWN AND LEFT | 144 | | =┗= | U+2517 | BOX DRAWINGS HEAVY UP AND RIGHT | 145 | | =┛= | U+251B | BOX DRAWINGS HEAVY UP AND LEFT | 146 | | =┃= | U+2503 | BOX DRAWINGS HEAVY VERTICAL | 147 | | =━= | U+2501 | BOX DRAWINGS HEAVY HORIZONTAL | 148 | | =╋= | U+254B | BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL | 149 | | =┣= | U+2523 | BOX DRAWINGS HEAVY VERTICAL AND RIGHT | 150 | | =┫= | U+252B | BOX DRAWINGS HEAVY VERTICAL AND LEFT | 151 | | =┳= | U+2533 | BOX DRAWINGS HEAVY DOWN AND HORIZONTAL | 152 | | =┻= | U+253B | BOX DRAWINGS HEAVY UP AND HORIZONTAL | 153 | 154 | They look like this: 155 | 156 | #+Begin_src bauge 157 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 158 | ┃ ┃ 159 | ┃ #(write your function here)# ┃ 160 | ┃ ┃ 161 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 162 | #+End_src 163 | 164 | This isn't a complete box. A function box needs the function /signature/. 165 | 166 | ** Function signature 167 | A function's signature is the contract between you and the 168 | function. It tells you the name of the function, how many arguments 169 | the function expects, what type are these arguments, and the type of 170 | the returned value. A signature has the following structure: 171 | 172 | #+Begin_src 173 | function_name: (Arg1Type, Arg2Type, ...) -> ReturnType 174 | #+End_src 175 | 176 | If the function only takes one argument, the parenthesis can be omitted: 177 | 178 | #+Begin_src 179 | function_name: Arg1Type -> ReturnType 180 | #+End_src 181 | 182 | The function's signature is placed on the top line of the function, 183 | like this: 184 | 185 | #+Begin_src bauge 186 | ┏━ foo: Int -> String ━━━━━━━━━━━┓ 187 | ┃ ┃ 188 | ┃ #(write your function here)# ┃ 189 | ┃ ┃ 190 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 191 | #+End_src 192 | 193 | The signature shouldn't go over the vertical line on the right; if 194 | needs be, make the box larger. -------------------------------------------------------------------------------- /doc/paths.org: -------------------------------------------------------------------------------- 1 | #+Title: Paths 2 | 3 | The *Path* is the fundamental concept of Bauge. A Bauge program follows 4 | one or more path, sequentially or in parallel, and executes the 5 | instructions that are on the path. 6 | 7 | * How Paths are written 8 | Paths use a wide range of characters to form different directions, 9 | branchings, starts and ends. Here are the possible characters used for 10 | paths: 11 | 12 | | Character | UTF-8 Codepoint | Unicode name | 13 | |-------------+-----------------+--------------------------------------------| 14 | | =┌= | U+250C | BOX DRAWINGS LIGHT DOWN AND RIGHT | 15 | | =┐= | U+2510 | BOX DRAWINGS LIGHT DOWN AND LEFT | 16 | | =└= | U+2514 | BOX DRAWINGS LIGHT UP AND RIGHT | 17 | | =┘= | U+2518 | BOX DRAWINGS LIGHT UP AND LEFT | 18 | | =│= | U+2502 | BOX DRAWINGS LIGHT VERTICAL | 19 | | =─= | U+2500 | BOX DRAWINGS LIGHT HORIZONTAL | 20 | | =┼= | U+253C | BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL | 21 | | =├= | U+251C | BOX DRAWINGS LIGHT VERTICAL AND RIGHT | 22 | | =┤= | U+2524 | BOX DRAWINGS LIGHT VERTICAL AND LEFT | 23 | | =┬= | U+252C | BOX DRAWINGS LIGHT DOWN AND HORIZONTAL | 24 | | =┴= | U+2534 | BOX DRAWINGS LIGHT UP AND HORIZONTAL | 25 | 26 | Paths can also take the form of /doubly-lined Paths/, which mean they 27 | are executed in parallel (more on that later). The characters used are: 28 | 29 | | Character | UTF-8 Codepoint | Unicode name | 30 | |-------------+-----------------+---------------------------------------------| 31 | | =╔= | U+2554 | BOX DRAWINGS DOUBLE DOWN AND RIGHT | 32 | | =╗= | U+2557 | BOX DRAWINGS DOUBLE DOWN AND LEFT | 33 | | =╚= | U+255A | BOX DRAWINGS DOUBLE UP AND RIGHT | 34 | | =╝= | U+255D | BOX DRAWINGS DOUBLE UP AND LEFT | 35 | | =║= | U+2551 | BOX DRAWINGS DOUBLE VERTICAL | 36 | | =═= | U+2550 | BOX DRAWINGS DOUBLE HORIZONTAL | 37 | | =╬= | U+256C | BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL | 38 | | =╠= | U+2560 | BOX DRAWINGS DOUBLE VERTICAL AND RIGHT | 39 | | =╣= | U+2563 | BOX DRAWINGS DOUBLE VERTICAL AND LEFT | 40 | | =╦= | U+2566 | BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL | 41 | | =╩= | U+2569 | BOX DRAWINGS DOUBLE UP AND HORIZONTAL | 42 | 43 | Additionally, there are characters that are used to join double Paths 44 | and single Paths: 45 | 46 | | Character | UTF-8 Codepoint | Unicode name | 47 | |-------------+-----------------+----------------------------------------------------| 48 | | =╒= | U+2552 | BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE | 49 | | =╓= | U+2553 | BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE | 50 | | =╕= | U+2555 | BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE | 51 | | =╖= | U+2556 | BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE | 52 | | =╘= | U+2558 | BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE | 53 | | =╙= | U+2559 | BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE | 54 | | =╛= | U+255B | BOX DRAWINGS UP SINGLE AND LEFT DOUBLE | 55 | | =╜= | U+255C | BOX DRAWINGS UP DOUBLE AND LEFT SINGLE | 56 | | =╞= | U+255E | BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE | 57 | | =╟= | U+255F | BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE | 58 | | =╡= | U+2561 | BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE | 59 | | =╢= | U+2562 | BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE | 60 | | =╤= | U+2564 | BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE | 61 | | =╥= | U+2565 | BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE | 62 | | =╧= | U+2567 | BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE | 63 | | =╨= | U+2568 | BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE | 64 | | =╪= | U+256A | BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE | 65 | | =╫= | U+256B | BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE | 66 | 67 | There are also characters that are used to draw blocks: 68 | 69 | | Character | UTF-8 Codepoint | Unicode name | 70 | |-----------|-----------------|-------------------------------------------| 71 | | =━= | U+2501 | BOX DRAWINGS HEAVY HORIZONTAL | 72 | | =┃= | U+2503 | BOX DRAWINGS HEAVY VERTICAL | 73 | | =┏= | U+250F | BOX DRAWINGS HEAVY DOWN AND RIGHT | 74 | | =┓= | U+2513 | BOX DRAWINGS HEAVY DOWN AND LEFT | 75 | | =┗= | U+2517 | BOX DRAWINGS HEAVY UP AND RIGHT | 76 | | =┛= | U+251B | BOX DRAWINGS HEAVY UP AND LEFT | 77 | | =┣= | U+2523 | BOX DRAWINGS HEAVY VERTICAL AND RIGHT | 78 | | =┫= | U+252B | BOX DRAWINGS HEAVY VERTICAL AND LEFT | 79 | | =┳= | U+2533 | BOX DRAWINGS HEAVY DOWN AND HORIZONTAL | 80 | | =┻= | U+253B | BOX DRAWINGS HEAVY UP AND HORIZONTAL | 81 | | =╋= | U+254B | BOX DRAWINGS HEAVY VERTICAL AND HORIZNTAL | 82 | 83 | Finally, there are special characters used in Paths that have a 84 | specific meaning: 85 | 86 | | Characters | UTF-8 Codepoints | Meaning | 87 | |--------------------+------------------+-----------------------------------------| 88 | | =╼=, =╽=, =╾=, =╿= | U+257C to U+257F | Start the Path with a function argument | 89 | | =╳= | U+2573 | End the program with an error | 90 | | =→=, =←=, =↑=, =↓= | U+2190 to U+2193 | Returns with the content of the path | 91 | 92 | Let's put this in action. Here is an example of Bauge Paths, using 93 | both single and double Paths, as well as an error termination, an 94 | argument start and a return (the semantic of the branching out and ins 95 | are discussed later in the documentation). 96 | 97 | #+Begin_src bauge 98 | ╾──────────┐ 99 | ┌──────────┘ ╔══╗ 100 | │ ┌───╢ ╟───┐ 101 | └──────────────┤ ╚══╝ │ 102 | └───┬──────┴────→ 103 | ╳──────────────────┘ 104 | #+End_src 105 | 106 | * Semantics of the Path 107 | A Path is essentially an *ordered list of values*. Each path can contain 108 | one or more value of any type; value types can be heterogeneous. Paths 109 | can either start with an argument of the current function, or with an 110 | initial, arbitrary value. 111 | 112 | ** Start of a Path 113 | *** Initial value 114 | Paths can start with an initial value. For example, this path starts 115 | with the value =20=. To be more accurate, the Path holds a list of 116 | values containing a single one, with the value of =20=. 117 | 118 | #+Begin_src bauge 119 | 20 ─────────────── 120 | #+End_src 121 | 122 | When starting the Path in this manner in an /horizontal orientation/, 123 | the value must be written on the *same line* as the starting point of 124 | the Path. In a /vertical orientation/, the value must have at least one 125 | character (whitespace or not) *above or below the starting point*. For 126 | example, the following paths are valid: 127 | 128 | #+Begin_src bauge 129 | 20 ────────── 130 | 131 | "hello world" 132 | │ 133 | └──── 134 | #+End_src 135 | 136 | But the following paths are not: 137 | 138 | #+Begin_src bauge 139 | 140 | 42 141 | ──────── 142 | ┌─ 143 | │ 144 | "foobar" 145 | #+End_src 146 | 147 | *** Function argument 148 | Paths can also start with the value of an argument of a function, 149 | using the special characters mentioned above. If we position ourselves 150 | in the context of the function =fibonacci(Int) -> Int=, then the path 151 | below holds the value of the first (and only) argument of the 152 | function. 153 | 154 | #+Begin_src bauge 155 | ╾───────────────── 156 | #+End_src 157 | 158 | When a function has multiple arguments, we consider the arguments 159 | starting from the leftmost one to the rightmost one; and we attribute 160 | them to starting paths in the following order: 161 | - Left to right 162 | - Top to bottom 163 | - Right to left 164 | - Bottom to top 165 | 166 | If there are multiple starting points with the same orientation, we 167 | order them in the following fashion: 168 | - Left to right: *top to bottom*, then if there are still multiple on 169 | the same line, *left to right*. 170 | - Top to bottom: *left to right*, then if there are still multiple on 171 | the same row, *top to bottom*. 172 | - Right to left: *top to bottom*, then if there are still multiple on 173 | the same line, *right to left*. 174 | - Bottom to top: *left to right*, then if there are still multiple on 175 | the same line, *bottom to top*. 176 | 177 | In order to illustrate, here are all the possible cases for the 178 | ordering of the arguments. The number next to the start of a Path 179 | indicates which argument it will hold: 1 means the first argument from 180 | the left, 2 the second, ... 181 | 182 | #+Begin_src bauge 183 | 1 ╾ 2 ╾ 4 ╿ 5 ╿ 184 | ╼ 7 185 | 3 ╾ ╽ 12 6 ╿ 186 | 187 | ╼ 9 ╼ 8 188 | ╽ 10 ╽ 11 189 | #+End_src 190 | 191 | Let's take a more concrete case: we consider the function =square(Int, 192 | Int, Int, Int) -> Square=, which takes the X and Y coordinate of a 193 | square, and its Width and Height, and returns a Square structure. We 194 | can then order our arguments in this way (the text here is indicative 195 | and is not tied to any correct syntax; it only illustrates which 196 | argument is on which path). 197 | 198 | #+Begin_src bauge 199 | ╿ ╿ 200 | Width Height 201 | 202 | ╾── X 203 | 204 | ╾── Y 205 | #+End_src 206 | 207 | **** Multiple arguments on a single path 208 | If there is less Path argument starts than there are arguments to the 209 | function, then all the remaining arguments are stored on the last 210 | Path. For example, we can rewrite the start of the previous =square= 211 | function in this manner: 212 | 213 | #+Begin_src bauge 214 | ╾── X, Y, Width, Height 215 | #+End_src 216 | 217 | The Path contains a list of 4 values, being the arguments of the 218 | function in order. 219 | 220 | ** Conditionality of paths 221 | Paths can either be *conditional* or *unconditional*, depending on how 222 | they are generated. 223 | 224 | *Unconditional* Paths are the default Paths in Bauge. Each unconditional 225 | Path is executed at some point, unless the program exits in the 226 | meantime. 227 | 228 | *Conditional* Paths stem from keywords such as =if=, =match=, and so on. The 229 | program can potentially take all of the Paths, but only one will be 230 | chosen in a single iteration. 231 | 232 | In the following example, unconditional Paths are marked with a =U=, and 233 | conditional Paths are marked with a =C=. This syntax is purely for 234 | demonstrative purposes and does not reflect any meaningful keyword or 235 | syntax of Bauge. 236 | 237 | #+Begin_src bauge 238 | 20 ─────U────── 239 | ┌───C─── 240 | 40 ─────U────── if ──┤ 241 | └───C─── 242 | #+End_src 243 | 244 | In this example, both of the unconditional Paths will be executed 245 | sequentially; but on the second Path, only one of the two conditional 246 | Paths will be taken. 247 | 248 | If an unconditional Path joins another unconditional Path, then the 249 | Paths are merged, and the new Path is considered having two starts 250 | (more on that later). 251 | 252 | ** Path execution 253 | We only consider here single Paths; that is, Paths using the "single" 254 | version of the characters, and not the "double" characters. "Double" 255 | characters denote parallel Paths, which are covered in their own 256 | section. 257 | 258 | *** Returns 259 | In any case, a Bauge code block can have *one and only one 260 | unconditional return*. Unconditional returns are ends of unconditional 261 | Paths. A block can have as many *conditional returns* (returns on 262 | conditional Paths), as long as they *come from the same unconditional 263 | Path*. If there is already an unconditional return, there cannot be any 264 | other return, even conditional. 265 | 266 | For example, the following block is valid: there is only one 267 | unconditional Path, with two starts and a junction. There are two 268 | returns, but they are conditional, and are generated from the same 269 | unconditional Path. 270 | 271 | #+Begin_src bauge 272 | ╾──────────┐ ┌────→ 273 | │ │ 274 | ╾──────────┴──── if ─┴────→ 275 | #+End_src 276 | 277 | While the following is not: there is an unconditional return at the 278 | top, and two conditional returns underneath. While having two 279 | conditional returns is OK, the unconditional return makes the block 280 | invalid. 281 | 282 | #+Begin_src bauge 283 | ╾─────────────────────────→ 284 | ┌─────────────→ 285 | ╾────── if ─┴─────────────→ 286 | #+End_src 287 | 288 | *** Branching out 289 | A Path can branch out unconditionally (without any =if=, =match=, 290 | ... keywords). This creates a *copy* of the Path, and all of its 291 | values. For example, we duplicate here a Path, and both the new and 292 | old path have the same values (the =#(...)#= notation is a comment, 293 | which are covered in their own section). 294 | 295 | #+Begin_src bauge 296 | "foo" ────────┬─────── #(contains "foo")# 297 | │ 298 | └─────── #(also contains "foo")# 299 | #+End_src 300 | 301 | The two Paths are *bound unconditionally*. This means that they are, in 302 | context, unconditional, even if the starting Path is conditional. This 303 | enables use to avoid this kind of problem: 304 | 305 | #+Begin_src bauge 306 | ╾──────── if ───┬───────────→ 307 | │ 308 | └──────┬────→ 309 | └────→ 310 | #+End_src 311 | 312 | If the program end up taking the top conditional Path, there is only 313 | one return, so everything is fine. But the bottom conditional Path 314 | branches out unconditionally, creating two copies of the same 315 | conditional Path. Since both of them, in this context, will be 316 | executed, there is a conflict for the return. This is why they are 317 | bound unconditionally and considered as such. Let's illustrate with 318 | labels to understand better: 319 | 320 | #+Begin_src bauge 321 | ╾───U0─── if ───┬──────────C0────────────→ C0 322 | │ 323 | └─────C0──────┬───C0U1───→ C0U1 324 | └───C0U2───→ C0U2 325 | #+End_src 326 | 327 | The first returns comes from the path C0, which is conditional. The 328 | other two also come from C0, but are first and foremost unconditional 329 | themselves, making the block invalid. 330 | 331 | *** Branching in 332 | Paths can join another by /branching in/. The semantics of branching in 333 | have different meanings depending on the type of the Path. 334 | 335 | **** Unconditional branching in 336 | *Unconditional* branching in, that is, when an unconditional Path merges 337 | into another unconditional Path, *appends or pre-pends the values in the 338 | incoming Path into the receiving Path*, depending on where the incoming 339 | Path comes from. 340 | 341 | Values are added to the start of the list of values of the receiving 342 | Path if the incoming Path joins in from the left (if the receiving 343 | Path is /vertical/) or from the top (if it is /horizontal/). 344 | 345 | #+Begin_src bauge 346 | 20 ─────────┐ 347 | │ 348 | "foo" ──────┴─────── #(contains 20, "foo")# 349 | #+End_src 350 | 351 | Values are added to the end of the list if the incoming Path comes 352 | from the right for /vertical/ Paths or the bottom for /horizontal/ Paths. 353 | 354 | #+Begin_src bauge 355 | "foo" ───────┬────── #(contains "foo", 20)# 356 | │ 357 | 20 ──────────┘ 358 | #+End_src 359 | 360 | Branching in can be combined with branching out to emulate the 361 | creation of variables in languages such as Python (more on the 362 | application of functions later) 363 | 364 | #+Begin_src bauge 365 | 42 ──────┬─────────────┬────── #(contains 42, 45)# 366 | └──── + 3 ────┘ 367 | #+End_src 368 | 369 | **** Conditional branching in 370 | Two conditional Paths can join, merging together. If the two 371 | conditional Path "end" the current conditionality scope, that is, if 372 | they are the last of their scope, then the scope is closed and the 373 | Path assumes the conditionality of the scope before it. Let's take an 374 | example to understand: 375 | 376 | #+Begin_src bauge 377 | ╾───U0─── if ───┬───U0C0────┬─────U0───── 378 | │ │ 379 | └───U0C0────┘ 380 | #+End_src 381 | 382 | When we conditionally branch out due to the =if=, the scope is =U0 C0=: 383 | the conditional Path group =C0= that was generated from the 384 | unconditional Path =U0=. But then, we join the last two Path belonging 385 | to that scope: since having a single Path belonging to a conditional 386 | group is useless, the scope is ended, and the Path continues as an 387 | unconditional one. 388 | 389 | This is similar to how an =if...else= scope is ended in Rust: 390 | 391 | #+Begin_src rust 392 | // Here, we are at U0 393 | if condition { 394 | // Top branch, scope is U0 C0 395 | } else { 396 | // Bottom branch, scope is U0 C0 397 | } 398 | // Conditional scope ended, scope is the same as before: U0 399 | #+End_src 400 | 401 | * Controlling data on a Path 402 | There are multiple ways to control and modify the values stored in 403 | Path. One of them is using [[./functions.org][function calls]], which is discussed in its 404 | own documentation; another is the use of *Path keywords*. These keywords 405 | can be used to retain one or multiple variables on the Path, modify 406 | their order, ... These operations rely on the fact that the variables 407 | on Paths take the form of a list. 408 | 409 | ** Retaining variables 410 | This category of keywords enables you to select one or more variables 411 | in the list to retain, while discarding the others. 412 | 413 | *** =head= and =tail=: first and last value 414 | The =head= keyword retains only the first variable on the Path, while 415 | =tail= retains the last. 416 | 417 | #+Begin_src Bauge 418 | 20 ───────┐ 419 | │ 420 | "foo" ────┴───── head ───── #(contains only 20)# 421 | 422 | 423 | 20 ───────┐ 424 | │ 425 | "foo" ────┴───── tail ───── #(contains only "foo")# 426 | #+End_src 427 | 428 | Using either =head= or =tail= when the Path is empty is an error. 429 | 430 | *** =empty=: retain nothing 431 | If you need to clear the values on a Path, you can use =empty=: 432 | 433 | #+Begin_src bauge 434 | 20 ─────── empty ─────── #(contains nothing)# 435 | #+End_src 436 | 437 | *** =first= and =last=: first n and last n values 438 | If you need to retain more than one value either at the beginning or 439 | the end, you can use either ~first n~ to retain the first =n= values, and 440 | =last n= for the last =n= values on the Path. The argument to the keyword 441 | be passed immediately or use the first value on the Path (more details 442 | in [[./functions.org][the documentation of function calls]]). 443 | 444 | #+Begin_src bauge 445 | 2 ──────┐ 446 | │ 447 | "foo" ──┴──┬──── last 2 ─── #(contains "foo", "bar")# 448 | │ 449 | "bar ──────┘ 450 | #+End_src 451 | 452 | #+Begin_src bauge 453 | 2 ──────┐ 454 | │ 455 | "foo" ──┴──┬──── last ─── #(contains "foo", "bar" since the first value on the Path is 2)# 456 | │ 457 | "bar ──────┘ 458 | #+End_src 459 | 460 | Note: should not be confused with =head= and =tail=, which only retain one 461 | value either at the start or end of the Path list. 462 | 463 | ** Modifying the order 464 | 465 | ** Aggregating and expanding 466 | -------------------------------------------------------------------------------- /doc/readme.org: -------------------------------------------------------------------------------- 1 | #+Title: Bauge documentation 2 | 3 | This is the index of the Bauge language documentation. This file lists 4 | the different pieces of documentation for the language. 5 | 6 | - [[./paths.org][Paths]] 7 | - [[./functions.org][Functions]] 8 | -------------------------------------------------------------------------------- /examples/fibonacci.bauge: -------------------------------------------------------------------------------- 1 | ┏━ fibonacci: (Int) -> Int ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 2 | ┃ ┃ 3 | ┃ This function calculates the Fibonacci number for any ┃ 4 | ┃ unsigned integer. This implementation uses the recursive ┃ 5 | ┃ method without any memoisation; it is done by checking ┃ 6 | ┃ the first argument: ┃ 7 | ┃ - If it's 0 or 1, then we return directly the argument, ┃ 8 | ┃ - If not, we calculate recursively the previous two ┃ 9 | ┃ Fibonacci numbers, and add them to get the current one. ┃ 10 | ┃ ┃ 11 | ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 12 | ┃ ┃ 13 | ┃ ┌─ 0 ───→ ┃ 14 | ┃ │ ┃ 15 | ┃ ╾──── match ──┼─ 1 ───→ ┃ 16 | ┃ │ ┃ 17 | ┃ └─ _ ────┬─ - _ 1 ─── fibonacci ───┬─ + ───→ ┃ 18 | ┃ └─ - _ 2 ─── fibonacci ───┘ ┃ 19 | ┃ ┃ 20 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 21 | 22 | ┏━ main: () -> () ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 23 | ┃ ┃ 24 | ┃ ┏━ Int -> () ━━━━━━━━━━━━━━━━━━━┓ ┃ 25 | ┃ ╾──── 0..10 ─── foreach ┃ ╾─ fibonacci ── println ──→ ┃ ─────────────────────────────→ ┃ 26 | ┃ ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ ┃ 27 | ┃ ┃ 28 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ -------------------------------------------------------------------------------- /examples/fizzbuzz.bauge: -------------------------------------------------------------------------------- 1 | ┏━ fizzbuzz: (Int) -> () ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 2 | ┃ ┃ 3 | ┃ - Print the integer passed as a parameter. ┃ 4 | ┃ - If the integer is divisible by 3, print "Fizz" instead. ┃ 5 | ┃ - If the integer is divisible by 5, print "Buzz" instead. ┃ 6 | ┃ - If the integer is divisible by both 3 and 5, print ┃ 7 | ┃ "FizzBuzz" instead. ┃ 8 | ┃ ┃ 9 | ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 10 | ┃ ┃ 11 | ┃ ╾──┬─ % _ 3 ─┬── match ─┬─ (0, 0) ── println "FizzBuzz" ─┬─→ ┃ 12 | ┃ └─ % _ 5 ─┘ ├─ (0, _) ── println "Fizz" ─────┤ ┃ 13 | ┃ ├─ (_, 0) ── println "Buzz" ─────┤ ┃ 14 | ┃ ├─ (_, _) ── println ────────────┘ ┃ 15 | ┃ └─ _ ────────╳ ┃ 16 | ┃ ┃ 17 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ -------------------------------------------------------------------------------- /examples/hello_world.bauge: -------------------------------------------------------------------------------- 1 | ┏━ hello_world: () -> () #[main] ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 2 | ┃ ┃ 3 | ┃ Prints "Hello, World!" to the screen and exits. ┃ 4 | ┃ ┃ 5 | ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 6 | ┃ ┃ 7 | ┃ ╾──────────────── println "Hello, World!" ────────────────→ ┃ 8 | ┃ ┃ 9 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ -------------------------------------------------------------------------------- /examples/loops.bauge: -------------------------------------------------------------------------------- 1 | ┏━ loop: () -> ! ━━┓ 2 | ┃ ┃ 3 | ┃ ╾──┬───────┐ ┃ 4 | ┃ ↑ │ ┃ 5 | ┃ └───────┘ ┃ 6 | ┃ ┃ 7 | ┗━━━━━━━━━━━━━━━━━━┛ 8 | 9 | ┏━ foreach_inline: () -> () ━━━━━━━━━━━━━━━━┓ 10 | ┃ ┃ 11 | ┃ ┏━ Int -> () ━━━┓ ┃ 12 | ┃ ╾── foreach (0..10) ┃ ╾─ println ─→ ┃ ─→ ┃ 13 | ┃ ┗━━━━━━━━━━━━━━━┛ ┃ 14 | ┃ ┃ 15 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 16 | 17 | ┏━ foreach_accumulated: () -> () ━━━━━━┓ 18 | ┃ ┃ 19 | ┃ (0..10) ─────────────┐ ┃ 20 | ┃ │ ┃ 21 | ┃ ┏━ Int -> () ━━━┓ │ ┃ 22 | ┃ ┃ ╾─ println ─→ ┃ ─┐ │ ┃ 23 | ┃ ┗━━━━━━━━━━━━━━━┛ │ │ ┃ 24 | ┃ │ │ ┃ 25 | ┃ ╾──────────────────┴─┴─ foreach ─→ ┃ 26 | ┃ ┃ 27 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 28 | 29 | ┏━ manual_while: () -> () ━━━━━━━━━━━━━━━━━━━━━━━┓ 30 | ┃ ┃ 31 | ┃ ╾─ 0 ─┬─ if < _ 10 ────────────────┬─ else ─→ ┃ 32 | ┃ ↑ then ┃ 33 | ┃ └─ + 1 ───── println ────────┘ ┃ 34 | ┃ ┃ 35 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ -------------------------------------------------------------------------------- /examples/test.bauge: -------------------------------------------------------------------------------- 1 | 0x1234_6780 helloworld -------------------------------------------------------------------------------- /ideas.org: -------------------------------------------------------------------------------- 1 | #+Title: Bauge programming language initial ideas and design document 2 | 3 | * What is Bauge 4 | Bauge is an idea for a programming language that uses /box-drawing 5 | characters/ as its basis for execution flow. Box drawing characters can 6 | be use to draw figures using Unicode characters, such as this: 7 | 8 | #+Begin_example 9 | ┌─────────────┐ 10 | │ Box example ├── Text on line ──→ 11 | └─────────────┘ 12 | #+End_example 13 | 14 | Different fonts might render this example differently, depending on 15 | their support of the characters. 16 | 17 | The idea for Bauge came from the [[https://hackage.haskell.org/package/needle][Needle package for Haskell]], which 18 | "ASCII-fied arrow notation". I want to take the idea further, and make 19 | a small, working programming language using box drawing characters and 20 | arrows. 21 | 22 | The main aspects I want Bauge to have are: 23 | - Purely/mainly functional programming language 24 | - Parallelism using different arrows 25 | - Turing-complete 26 | 27 | * First ideas of what Bauge should look like 28 | ** Language features 29 | *** Paths 30 | Paths are the defining features of the language. They describe the 31 | series of transformations and functions applied to one or more 32 | variables of the program. 33 | 34 | Path can begin in two ways: 35 | - With a parameter of the current function, written as "╾", 36 | - With a primitive value, such as the number 1 or the string "hello"; 37 | in which case, the primitive is written, followed by the path: "1 ── 38 | ..." 39 | 40 | A path contains one or more variables and goes in one direction 41 | only. Initially, the path only carries the initial values, but 42 | additional variables can be added by /joining/ two paths. The path that 43 | continues is the one carrying the variables; the variable(s) coming 44 | from the merged path are appended to the list of variables of the 45 | continuing path. 46 | 47 | For example, let's take this simple path (the text between the "#(" and ")#" is a comment): 48 | #+Begin_src bauge 49 | #(this path contains a String "hello")# 50 | "hello" ───────────── 51 | #+End_src 52 | 53 | The path starts from an initial value of a String, "hello". If we want 54 | to add another variable to the path, we need to /merge/ another path 55 | into it: 56 | 57 | #+Begin_src bauge 58 | "hello" ─────────┬─── #(the path now contains ("hello", 42))# 59 | │ 60 | 42 ───┘ 61 | #+End_src 62 | 63 | Another way of doing this is to merge multiple paths into a new 64 | one. In this case, the values are ordered from top to bottom, or left 65 | to right: 66 | 67 | #+Begin_src bauge 68 | "hello" ──┐ 69 | │ #(the path contains ("hello", 42, ()) )# 70 | 42 ───────┼────── 71 | │ 72 | () ───────┘ 73 | #+End_src 74 | 75 | Paths can only end in two ways: 76 | - With an arrow (→, ←, ↑, ↓), indicating that the current function 77 | returns the values in the path. 78 | - With a cross ( ╳ ), making the program exit upon reaching it. 79 | 80 | Paths can also /branch out/: this action copies the values of the path 81 | to a new one. This can be done in two ways: 82 | - Using single paths (─, ┌, ...): this means the paths are executed 83 | /sequentially/ from top to bottom or left to right. 84 | - Using double paths (═, ╢, ╧, ...): this means the paths are executed 85 | *in parallel*. 86 | 87 | In any case, paths created this way must be /joined/ to the main path in 88 | order for the program to be correct. For example, this code computes 89 | the value =foo(4) * bar(3)=, by computing the two functions in parallel: 90 | 91 | #+Begin_src bauge 92 | ╔════════ foo ══╗ 93 | 4 ────────╢ ╟─── * ─────── 94 | ╚═ -1 ═══ bar ══╝ 95 | #+End_src 96 | 97 | *** Head and tail 98 | The variables on path behaves like a list; it is then possible to 99 | isolate the first, last or nth variable on a path using =head=, =tail= or 100 | =nth=. 101 | 102 | #+Begin_src bauge 103 | 4 ──┐ 104 | ├─── head ── #(the path only has 4 on it)# 105 | 5 ──┘ 106 | #+End_src 107 | 108 | #+Begin_src bauge 109 | 4 ──────┐ 110 | "hi" ───┼─── nth 1 ── #(the path only has "hi" on it)# 111 | 5 ──────┘ 112 | #+End_src 113 | 114 | If paths intersect, it's possible to create /bridges/. A bridge on a 115 | single-width path can be formed in this way: 116 | 117 | #+Begin_src bauge 118 | 4 ─────┐ ┌──── #(contains 4)# 119 | │ ╧ 120 | 8 ───╢ │ ╟───────── #(contains 8)# 121 | │ ╤ 122 | └──────┘ 123 | #+End_src 124 | 125 | For double-width path, we use the inverse of the previous bridge 126 | characters: 127 | 128 | #+Begin_src bauge 129 | 4 ═════╗ ╔════ #(contains 4)# 130 | ║ ╨ 131 | 8 ═══╡ ║ ╞═════════ #(contains 8)# 132 | ║ ╥ 133 | ╚══════╝ 134 | #+End_src 135 | 136 | *** Named variables 137 | Up until now, all the variables on the paths were unnamed. But 138 | situations can arise where we would need a way to differentiate 139 | between the different variables; this can be done by assigning a name 140 | (a "label") to variables. This can be done in two ways: 141 | - At the start of the path: ~my_variable := 5 ────~ 142 | - On a path, assuming that there is only one variable on it: =───── 143 | :my_variable ─────=. 144 | 145 | The name of a variable can then be used in a function application, 146 | replacing the traditional order of the arguments: 147 | 148 | #+Begin_src bauge 149 | a := 4 ─────────────┐ 150 | ├── / b a ───────── 151 | b := 12 ────────────┘ 152 | #+End_src 153 | 154 | A note: all arithmetic operations are specified using the /polish 155 | notation/: in the above, =b / a= is computed. 156 | 157 | *** Applying functions 158 | Functions in Bauge take a certain number of arguments, and output one 159 | or more values. A function applied to a path will take the values 160 | stored in a path as arguments, apply the function to them, and replace 161 | the arguments in the path with the result. If there are more variables 162 | in the path than there are arguments to the function, the additional 163 | variables are untouched. 164 | 165 | For example, here is the code to add two numbers: 166 | 167 | #+Begin_src bauge 168 | 5 ──┐ 169 | 9 ──┴── + ── #(result: 14)# 170 | #+End_src 171 | 172 | For the ease of writing programs, some arguments can be supplied 173 | directly without the need of a variable in the path. 174 | 175 | #+Begin_src bauge 176 | 2 ───── + 1 ────── #(increments 2 to 3)# 177 | #+End_src 178 | 179 | If we need to have an unnamed variable at a certain place, we can use 180 | the placeholder =_=: For example, this code decrements the variable by 181 | one. 182 | 183 | #+Begin_src bauge 184 | 4 ────── - _ 1 ──────── #(decrements 4 to 3)# 185 | #+End_src 186 | 187 | Due to the polish notation, removing the placeholder would computer 188 | =1 - 4=, which is not the result we're trying to achieve. 189 | 190 | *** Flow control 191 | Controlling the flow of the program, using /conditions/ and /loops/, is 192 | done using special keywords, which affect how branch path are 193 | interpreted. 194 | 195 | **** Conditional branching (if) 196 | #+Begin_src bauge 197 | 5 ──────┬──────────────────────────┬── if condition ──┐ 198 | └─ > 10 ── :condition ─────┘ │ 199 | │ 200 | ┌───────────────────────────────────┴──────────┐ 201 | │ │ 202 | println "10 is greater than {}" println "10 is smaller than {}" 203 | │ │ 204 | ↓ ↓ 205 | #+End_src 206 | 207 | Let's analyze the code above. We begin with an unnamed integer with a 208 | value of 5. We then branch out sequentially: we keep the value of 5 on 209 | the main path, and we compute =10 > 5= on the second path, transforming 210 | the value 5 to =true=. We name this variable =condition=. We then join 211 | the diverging paths into one, appending our =condition= variable to the 212 | list of variables on the main path: it now has for value =(5, 213 | condition: false)=. 214 | 215 | We then use the =if= keyword, using =condition= as the boolean 216 | condition. Then, we offer two paths of execution: the leftmost one is 217 | executed if the condition is true, while the rightmost one is executed 218 | if the condition is false. Since the 10 is greater than 5, we print 219 | "10 is greater than 5". The print pattern has one placeholder, and so 220 | uses the first variable on the path, which is our initial variable =5=, 221 | as the value that replaces the placeholder. 222 | 223 | This also showcases that the flow of a program can be from top to 224 | bottom, and not only from left to right. 225 | 226 | **** Loops 227 | There are currently two looping keywords: =while= and =loop=; as well as 228 | the =break= keyword for breaking out of a loop. 229 | 230 | A =loop= is an unconditional loop; it can only be exited using a =break= 231 | or by exiting the program. The path following a loop should also 232 | "loop" on itself, and is read in a clockwise manner. The rest of the 233 | program is written after the =break= keyword. 234 | 235 | #+Begin_src bauge 236 | ┌───── = 0 ─────┐ 237 | 10 ──────────── loop ─┬──┴───────────────┴─ if ──┬── break ──────────── println "loop finished" 238 | │ │ 239 | └── - _ 1 ───────── tail ──┘ 240 | #+End_src 241 | 242 | The above code starts with a value of 10 on the path, then starts a 243 | =loop=. Since loops are read in a clockwise fashion, the straight path 244 | is taken first. We branch out to pre-pend the boolean value of =10 = 0= 245 | to the path variables. We then =if= on it; if it's true, we can =break= 246 | out of the loop, and print the message; but since this is false, we go 247 | down and left. We remove the boolean on the path using =tail=, which 248 | leaves only the original value of 10, and we decrement it by one. We 249 | then go back to the start of the loop, and can start it again, this 250 | time with a value of =9=. 251 | 252 | A =while= loop is a conditional loop, that is, it exits the loop when 253 | the condition is false. The syntax of the =while= is a bit different: it 254 | takes as an "argument" a condition expression. The path leading out of 255 | the while has to branch in two ways: 256 | - The top or left branch is the loop, in which the instructions are to 257 | be carried out 258 | - The bottom or right branch is the rest of the code, after the loop 259 | is finished. 260 | 261 | Since we need to check the condition at each loop, the looping path 262 | must be injected back into the =while= keyword. 263 | 264 | We can rewrite the previous loop using a =while=: 265 | 266 | #+Begin_src bauge 267 | ┌──────── - _ 1 ─────┐ 268 | │ │ 269 | 10 ───────────── while != 0 ─────────────┴─────────── println "while finished" 270 | #+End_src 271 | 272 | *** Pattern matching 273 | Pattern matching is carried out using the =match= keyword. Following 274 | this keyword, the path can branch out as many times as necessary for 275 | the different patterns to be analyzed. The pattern matching is very 276 | similar to the one Rust uses. Each of the match paths should first 277 | contain the pattern to match, then the instructions for that match. 278 | 279 | =match= matches the entirety of the path variable if no argument is 280 | passed; if we want to match the first =n= variable, we can specify the 281 | number to match after the keyword: =match 3= will match the first 3 282 | variables. It is also possible to use named variables: =match num= will 283 | match the variable =num= on the path. 284 | 285 | The default pattern is the placeholder =_=. 286 | 287 | #+Begin_src bauge 288 | ┌─ "bar" ─── println "matched bar" 289 | │ 290 | "foo" ──────────── match ─┼─ "baz" ─── println "matched baz" 291 | │ 292 | └─ _ ─────── println "I don't know you" 293 | #+End_src 294 | 295 | *** Types 296 | Sometimes, especially using numbers, the type to use can be 297 | ambiguous. We can specify the type of a value using this notation: 298 | 299 | #+Begin_src bauge 300 | #(6 is a SignedInt by type inference)# 301 | my_var: SignedInt := 3 ─── - _ 6 ───── ... 302 | #+End_src 303 | 304 | **** Int 305 | An /Int/ is a 32-bit unsigned number. The following values are Ints: 306 | - =10= 307 | - =0xA32B= 308 | - =0b110110= 309 | 310 | Ints can overflow to 0 and underflow to 2^32 - 1. There are special 311 | Int values: =Int::Min=, which is equal to 0, and =Int::Max=, which is 312 | equal to 2^32 - 1. 313 | 314 | **** SignedInt 315 | A /SignedInt/ is a 32-bit signed integer. The following values are 316 | SignedInts: 317 | - =-10= 318 | - =0xBB32= 319 | - =0b10111011= 320 | 321 | SignedInts can overflow to -2^31, and underflow to 322 | 2^31 - 1. There are special SignedInt values: =SignedInt::Min=, which is 323 | equal to -2^31, and =SignedInt::Max=, which is equal to 2^31 - 1. 324 | 325 | **** Float 326 | A /Float/ is a 32-bit signed floating-point number. They can be written 327 | as =10.341=, or =10= if it's round and the type inference allows it. 328 | 329 | **** Byte 330 | A /Byte/ is a 8-bit unsigned value, going from 0 to 255. The following 331 | values are Bytes: 332 | - =65= 333 | - =0x1B= 334 | - =0b1101= 335 | 336 | Bytes can overflow to 0 and underflow to 255. There are special Byte 337 | values: Byte::Min, which equals to 0, and Byte::Max, which equals 338 | to 255. 339 | 340 | **** Char 341 | A /Char/ is an UTF-8 encoded Unicode codepoint. It is *not* a single ASCII 342 | character as with other languages such as C or Java; this role is 343 | assumed by the Byte in Bauge. A Char is written in single quotes. The 344 | following values are Chars: 345 | - ='a'=, 346 | - ='א'=, 347 | - ='中'=, 348 | - ='🌈'=, 349 | 350 | **** String 351 | A /String/ is one or more /Char/ characters together. The following values 352 | are Strings: 353 | - ="hello world"= 354 | - ="大家好"= 355 | - ="✨ salut à tous ✨"= 356 | 357 | **** Range 358 | A /Range/ is a range of unsigned Ints, delimited by two Ints. The range 359 | includes the start and excludes the end. It is written using the =..= 360 | operator. For example, the range =1..4= contains the numbers 1, 2 and 3. 361 | 362 | **** Vector 363 | A /Vec/ (short for /Vectors/) is a collection of elements of the same 364 | type. It can either be initialized empty, using Vec::new, or created 365 | dynamically with some values, using the square bracket notations: =[1, 366 | 2, 3]=. 367 | **** Tuple 368 | A /Tuple/ is a heterogeneous collection of elements; that is, it can 369 | contain elements of different types. It is different from a vector, in 370 | that it is not an iterator: it is merely a simple way to hold values 371 | of different types together, much like a C =struct=. 372 | *** Defining functions 373 | A function is a path that ends correctly, enclosed in a box. The top 374 | of the box contains the /signature/ of the function, acting as the 375 | "title" of the function-box. The function definition is of the form: 376 | =name: (arg_a: TypeA, arg_b: TypeB, ...) -> ReturnType=. 377 | 378 | #+Begin_src bauge 379 | ┌─ my_function: (Int, String) -> Bool ────┐ 380 | │ │ 381 | │ #(The paths are written in here)# │ 382 | │ │ 383 | └─────────────────────────────────────────┘ 384 | #+End_src 385 | 386 | The =main= function box can be omitted, in which case its signature is 387 | =main: () -> ()=. 388 | 389 | Let's write the Fibonacci function in a recursive manner using what we 390 | have so far: 391 | 392 | #+Begin_src bauge 393 | ┌─ fibonacci: (Int) -> Int ─────────────────────────────────────┐ 394 | │ │ 395 | │ ┌─ 0 ───→ │ 396 | │ │ │ 397 | │ ╾──── match ──┼─ 1 ───→ │ 398 | │ │ │ 399 | │ └─ _ ────┬─ - _ 1 ─── fibonacci ───┬─ + ───→ │ 400 | │ └─ - _ 2 ─── fibonacci ───┘ │ 401 | │ │ 402 | └───────────────────────────────────────────────────────────────┘ 403 | #+End_src 404 | 405 | Let's analyze this. We declare a function, =fibonacci=, which takes a 406 | single Int and returns an Int. We then start our path with the 407 | function's argument, which we =match=. If it is 0 or 1, we return what's 408 | on the path, which is either 0 or 1. If the value is neither, we then 409 | have two branching paths: on one path, we decrement the value by one, 410 | and call recursively the =fibonacci= function. On the other path, we do 411 | the same, except that we decrement by two the value on the path. 412 | 413 | At the junction of the two paths, we have now two Int on the path; we 414 | add them together to get the single Int value we want, and then return 415 | it. 416 | 417 | *** Closures 418 | Paths can also contain /closures/: functions that can be passed as 419 | parameters of other functions. This is done by declaring a function 420 | box inside of the current function, and linking it to a path, much 421 | like having a starting value to a new path. 422 | 423 | #+Begin_src bauge 424 | (0..10) ─────────────────┬────── foreach ───────────────────→ 425 | │ 426 | ┌─ Int -> () ─────┴──────┐ 427 | │ │ 428 | │ ╾─ println "{}" ───→ │ 429 | │ │ 430 | └────────────────────────┘ 431 | #+End_src 432 | 433 | We first have a range of numbers on the path. Then, we declare an 434 | anonymous function, which takes an Int and returns nothing, which 435 | prints the argument. Then, we apply the function =foreach= on the path, 436 | which effectively prints each value in the range. 437 | 438 | For the ease of use of the language, we can also use functions this 439 | way: 440 | 441 | #+Begin_src bauge 442 | ["foo", "bar", "baz"] ───────────── foreach println ─────────→ 443 | #+End_src 444 | 445 | This works because the function =foreach= has the signature 446 | =(Iterator, (T -> ()) -> ()=: A function which takes an iterator of 447 | values, and a function which takes a single value and returns nothing; 448 | =foreach= itself returns nothing as well. 449 | 450 | Here, we have an iterator of =String=, and a function, =println=, which 451 | takes a string and then an arbitrary number of arguments, and returns 452 | nothing; this fits the type requirement. 453 | 454 | *** Comments and documentation 455 | We've already seen comments that take the form of =#( 456 | comment... )#=. There is also a way to create documentation for a 457 | function: by separating the box in two from top to bottom, with the 458 | top part being the documentation, while the bottom part is the code. 459 | Let's document the =fibonacci= function from before: 460 | 461 | #+Begin_src bauge 462 | ┌─ fibonacci: (Int) -> Int ─────────────────────────────────────┐ 463 | │ │ 464 | │ This function calculates the Fibonacci number for any │ 465 | │ unsigned integer. This implementation uses the recursive │ 466 | │ method without any memoisation; it is done by checking │ 467 | │ the first argument: │ 468 | │ - If it's 0 or 1, then we return directly the argument, │ 469 | │ - If not, we calculate recursively the previous two │ 470 | │ Fibonacci numbers, and add them to get the current one. │ 471 | │ │ 472 | ├───────────────────────────────────────────────────────────────┤ 473 | │ │ 474 | │ ┌─ 0 ───→ │ 475 | │ │ │ 476 | │ ╾──── match ──┼─ 1 ───→ │ 477 | │ │ │ 478 | │ └─ _ ────┬─ - _ 1 ─── fibonacci ───┬─ + ───→ │ 479 | │ └─ - _ 2 ─── fibonacci ───┘ │ 480 | │ │ 481 | └───────────────────────────────────────────────────────────────┘ 482 | #+End_src 483 | 484 | ** Standard library 485 | *** Vectors 486 | *** I/O 487 | *** Primitives 488 | *** Math 489 | * Code examples 490 | ** Guessing game 491 | #+Begin_src bauge 492 | 100 0 493 | └──────┴──── .. ──── pick ──┐ 494 | ┌───────────────────────────┘ 495 | └─ loop ──┬─── input "Pick a number:" ──┬────────────────────┬── match cmp ──┬── Equal ─── println "You win!" ─── break ───→ 496 | │ └─ compare ── :cmp ──┘ │ 497 | │ │ 498 | │ ┌─── println "Too high!" ── Greater ─┤ 499 | │ │ │ 500 | └───────────── head ──────────┴─── println "Too low!" ─── Less ────┘ 501 | #+End_src 502 | 503 | We begin by creating a range of 100 numbers, between 0 and 99. We then 504 | =pick= one at random. After that, we enter a =loop=. At the start of the 505 | loop, we ask for the user to pick a number using the =input= 506 | function. Our path now contains the random number, then the guessed 507 | number. Then, we branch out; on the bottom branch, we use =compare=, 508 | which consumes both of the values on the path and outputs a single 509 | "comparison" value, which can be either =Equal=, =Greater= or =Less=. We 510 | then rejoin the main path by the bottom, which gives a path with the 511 | random number, the guessed number, and the comparison value. 512 | 513 | After that, we match on the comparison value: if it's =Equal=, the user 514 | won, and the program returns. If not, we print a message whether the 515 | guess was too high or low. Then, we keep the =head= of our path, which 516 | is the random number, and go back to the beginning of the loop. 517 | 518 | * Ideas for stuff in the language 519 | - Write the standard library of the program in literate programming 520 | - Generic types 521 | - Vectorize function: takes all variables on path and creates a single 522 | vector of the variables, assuming the variables are all of the same 523 | type 524 | -------------------------------------------------------------------------------- /readme.org: -------------------------------------------------------------------------------- 1 | #+Title: Bauge, a 2D esoteric programming language 2 | 3 | * What is Bauge 4 | Bauge is a terrible idea of a programming language, where programs 5 | follow a graphical /path/, written using box-drawing characters such as 6 | =┌=, =│=, and so on. 7 | 8 | The name Bauge comes from the Bauges mountain range, a very nice place 9 | where my parents live in the northern French alps. 10 | 11 | I was inspired by the [[http://scrambledeggsontoast.github.io/2014/09/28/needle-announce/][Haskell "Needle" package]], which "ASCII-fies" the 12 | arrow notation of that language. I wanted to take that concept up a 13 | notch, and create a programming language based only on such concepts. 14 | 15 | At the time I'm writing this, the idea of the language is still very 16 | rough, and I'm still modifying things and changing stuff around a 17 | lot. For example, here's a piece of Bauge code from my [[./ideas.org][ideas file]]: 18 | 19 | #+Begin_src bauge 20 | ┏━ fibonacci: (Int) -> Int ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ 21 | ┃ ┃ 22 | ┃ This function calculates the Fibonacci number for any ┃ 23 | ┃ unsigned integer. This implementation uses the recursive ┃ 24 | ┃ method without any memoisation; it is done by checking ┃ 25 | ┃ the first argument: ┃ 26 | ┃ - If it's 0 or 1, then we return directly the argument, ┃ 27 | ┃ - If not, we calculate recursively the previous two ┃ 28 | ┃ Fibonacci numbers, and add them to get the current one. ┃ 29 | ┃ ┃ 30 | ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 31 | ┃ ┃ 32 | ┃ ┌─ 0 ───→ ┃ 33 | ┃ │ ┃ 34 | ┃ ╾──── match ──┼─ 1 ───→ ┃ 35 | ┃ │ ┃ 36 | ┃ └─ _ ────┬─ - _ 1 ─── fibonacci ───┬─ + ───→ ┃ 37 | ┃ └─ - _ 2 ─── fibonacci ───┘ ┃ 38 | ┃ ┃ 39 | ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 40 | #+End_src 41 | 42 | * Planned features for Bauge 43 | - Interpreted language akin to Python 44 | - Interpreter written in Rust (because that's the language I'm most 45 | familiar with) 46 | - Easy parallelism using the branching system 47 | - Functional style of programming, taking full advantage of Iterators 48 | and Map/Reduce operations 49 | 50 | * Roadmap 51 | ** STARTED Language specification 52 | For the moment, the entirety of the language specification is written 53 | in the ideas file. I need to create a comprehensive documentation 54 | system. 55 | 56 | ** TODO Interpreter 57 | The only certainty I have is that I'm going to write the interpreter 58 | in Rust. It will need to: 59 | - Parse the code and somehow derive elements from it (paths, boxes, 60 | functions, ...) 61 | - Perform lexical analysis, syntax analysis and semantic analysis 62 | - Find how the interpreter is going to interpret: using a VM, with an 63 | intermediate representation seems the most likely. 64 | 65 | ** IDEA Debugger 66 | Having a debugger interface with a step-by-step, path variable 67 | visualization, ... would be nice. 68 | 69 | ** IDEA Visual editor 70 | This won't come until much, much later in the development, but having 71 | a visual editor would be easier than writing the code by hand (even 72 | though I have neat macros for the box drawing characters at the 73 | moment) 74 | -------------------------------------------------------------------------------- /src/frontend/coordinates.rs: -------------------------------------------------------------------------------- 1 | //! Coordinates structures and helper functions. 2 | //! 3 | //! The main structure, [`Coords`], is used to index in two-dimensional 4 | //! structures, the same manner as [`isize`] in one-dimensional structures. 5 | 6 | use std::{error, fmt, ops}; 7 | 8 | /// Errors possible when using [`Coords`] and [`ICoords`]. 9 | #[derive(Debug)] 10 | pub enum Error { 11 | /// The coordinate used as an index in a two-dimensional structure was out 12 | /// of bounds. 13 | OutOfBounds { 14 | coords: Coords, 15 | rectangle: Rectangle, 16 | }, 17 | /// Attempted to create a rectangle with an invalid shape, inverted corners, 18 | /// or otherwise impossible geometry. 19 | InvalidRectangle { 20 | top_left: Coords, 21 | bottom_right: Coords, 22 | }, 23 | } 24 | 25 | impl fmt::Display for Error { 26 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 27 | match self { 28 | Error::OutOfBounds { coords, rectangle } => { 29 | write!(f, "Index {coords} out of bounds for rectangle {rectangle}") 30 | } 31 | Error::InvalidRectangle { 32 | top_left, 33 | bottom_right, 34 | } => { 35 | write!(f, "Cannot construct a rectangle with a top-left of {top_left} and a bottom-right of {bottom_right}") 36 | } 37 | } 38 | } 39 | } 40 | 41 | impl error::Error for Error {} 42 | 43 | /// A two-dimensional coordinate used to index into two-dimensional structures. 44 | /// By convention, coordinates `(0, 0)` correspond to the top-left corner. 45 | /// 46 | /// # Examples 47 | /// You can create [`Coords`] from their [`From`] implementations: 48 | /// 49 | /// ```rust 50 | /// # use bauge::frontend::coordinates::Coords; 51 | /// 52 | /// let c1: Coords = (123, 456).into(); 53 | /// let c2: Coords = [123, 456].into(); 54 | /// ``` 55 | /// 56 | /// Basic operations are supported (with signed operations using [`ICoords`]): 57 | /// ```rust 58 | /// # use bauge::frontend::coordinates::Coords; 59 | /// 60 | /// let c1: Coords = (123, 456).into(); 61 | /// let c2: Coords = (111, 111).into(); 62 | /// 63 | /// assert_eq!(c1 + c2, (234, 567).into()); 64 | /// ``` 65 | #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] 66 | pub struct Coords { 67 | pub x: usize, 68 | pub y: usize, 69 | } 70 | 71 | impl fmt::Display for Coords { 72 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 73 | write!(f, "({}, {})", self.x, self.y) 74 | } 75 | } 76 | 77 | impl ops::Add for Coords { 78 | type Output = Self; 79 | 80 | fn add(self, Coords { x: x2, y: y2 }: Self) -> Self::Output { 81 | let Coords { x: x1, y: y1 } = self; 82 | 83 | Self { 84 | x: x1 + x2, 85 | y: y1 + y2, 86 | } 87 | } 88 | } 89 | 90 | impl ops::Add for Coords { 91 | type Output = Coords; 92 | 93 | fn add(self, ICoords { x: dx, y: dy }: ICoords) -> Self::Output { 94 | let Coords { x, y } = self; 95 | 96 | Self { 97 | x: x.checked_add_signed(dx).unwrap(), 98 | y: y.checked_add_signed(dy).unwrap(), 99 | } 100 | } 101 | } 102 | 103 | impl ops::AddAssign for Coords { 104 | fn add_assign(&mut self, Coords { x, y }: Self) { 105 | self.x += x; 106 | self.y += y; 107 | } 108 | } 109 | 110 | impl ops::Mul for Coords { 111 | type Output = Self; 112 | 113 | fn mul(self, Coords { x: x2, y: y2 }: Self) -> Self::Output { 114 | let Coords { x: x1, y: y1 } = self; 115 | 116 | Self { 117 | x: x1 * x2, 118 | y: y1 * y2, 119 | } 120 | } 121 | } 122 | 123 | impl ops::MulAssign for Coords { 124 | fn mul_assign(&mut self, Coords { x, y }: Self) { 125 | self.x *= x; 126 | self.y *= y; 127 | } 128 | } 129 | 130 | impl ops::Sub for Coords { 131 | type Output = Self; 132 | 133 | fn sub(self, Coords { x: x2, y: y2 }: Self) -> Self::Output { 134 | let Coords { x: x1, y: y1 } = self; 135 | 136 | Self { 137 | x: x1 - x2, 138 | y: y1 - y2, 139 | } 140 | } 141 | } 142 | 143 | impl ops::SubAssign for Coords { 144 | fn sub_assign(&mut self, Coords { x, y }: Self) { 145 | self.x -= x; 146 | self.y -= y; 147 | } 148 | } 149 | 150 | impl ops::Div for Coords { 151 | type Output = Self; 152 | 153 | fn div(self, Coords { x: x2, y: y2 }: Self) -> Self::Output { 154 | let Coords { x: x1, y: y1 } = self; 155 | 156 | Self { 157 | x: x1 / x2, 158 | y: y1 / y2, 159 | } 160 | } 161 | } 162 | 163 | impl ops::DivAssign for Coords { 164 | fn div_assign(&mut self, Coords { x, y }: Self) { 165 | self.x /= x; 166 | self.y /= y; 167 | } 168 | } 169 | 170 | impl From<(usize, usize)> for Coords { 171 | fn from((x, y): (usize, usize)) -> Self { 172 | Self { x, y } 173 | } 174 | } 175 | 176 | impl From for (usize, usize) { 177 | fn from(Coords { x, y }: Coords) -> Self { 178 | (x, y) 179 | } 180 | } 181 | 182 | impl From<[usize; 2]> for Coords { 183 | fn from([x, y]: [usize; 2]) -> Self { 184 | Self { x, y } 185 | } 186 | } 187 | 188 | impl From for [usize; 2] { 189 | fn from(Coords { x, y }: Coords) -> Self { 190 | [x, y] 191 | } 192 | } 193 | 194 | /// A signed two-dimensional coordinate, used to perform arithmetic with 195 | /// [`Coords`]. 196 | #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] 197 | pub struct ICoords { 198 | pub x: isize, 199 | pub y: isize, 200 | } 201 | 202 | impl fmt::Display for ICoords { 203 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 204 | write!(f, "({:+}, {:+})", self.x, self.y) 205 | } 206 | } 207 | 208 | impl ICoords { 209 | /// The "up" direction vector: `(0, -1)`. 210 | pub const UP: Self = Self { x: 0, y: -1 }; 211 | /// The "down" direction vector: `(0, 1)`. 212 | pub const DOWN: Self = Self { x: 0, y: 1 }; 213 | /// The "left" direction vector: `(-1, 0)`. 214 | pub const LEFT: Self = Self { x: -1, y: 0 }; 215 | /// The "right" direction vector: `(1, 0)`. 216 | pub const RIGHT: Self = Self { x: 1, y: 0 }; 217 | } 218 | 219 | impl ops::Add for ICoords { 220 | type Output = Self; 221 | 222 | fn add(self, ICoords { x: x2, y: y2 }: Self) -> Self::Output { 223 | let ICoords { x: x1, y: y1 } = self; 224 | 225 | Self { 226 | x: x1 + x2, 227 | y: y1 + y2, 228 | } 229 | } 230 | } 231 | 232 | impl ops::AddAssign for ICoords { 233 | fn add_assign(&mut self, ICoords { x, y }: Self) { 234 | self.x += x; 235 | self.y += y; 236 | } 237 | } 238 | 239 | impl ops::Mul for ICoords { 240 | type Output = Self; 241 | 242 | fn mul(self, ICoords { x: x2, y: y2 }: Self) -> Self::Output { 243 | let ICoords { x: x1, y: y1 } = self; 244 | 245 | Self { 246 | x: x1 * x2, 247 | y: y1 * y2, 248 | } 249 | } 250 | } 251 | 252 | impl ops::MulAssign for ICoords { 253 | fn mul_assign(&mut self, ICoords { x, y }: Self) { 254 | self.x *= x; 255 | self.y *= y; 256 | } 257 | } 258 | 259 | impl ops::Sub for ICoords { 260 | type Output = Self; 261 | 262 | fn sub(self, ICoords { x: x2, y: y2 }: Self) -> Self::Output { 263 | let ICoords { x: x1, y: y1 } = self; 264 | 265 | Self { 266 | x: x1 - x2, 267 | y: y1 - y2, 268 | } 269 | } 270 | } 271 | 272 | impl ops::SubAssign for ICoords { 273 | fn sub_assign(&mut self, ICoords { x, y }: Self) { 274 | self.x -= x; 275 | self.y -= y; 276 | } 277 | } 278 | 279 | impl ops::Div for ICoords { 280 | type Output = Self; 281 | 282 | fn div(self, ICoords { x: x2, y: y2 }: Self) -> Self::Output { 283 | let ICoords { x: x1, y: y1 } = self; 284 | 285 | Self { 286 | x: x1 / x2, 287 | y: y1 / y2, 288 | } 289 | } 290 | } 291 | 292 | impl ops::DivAssign for ICoords { 293 | fn div_assign(&mut self, ICoords { x, y }: Self) { 294 | self.x /= x; 295 | self.y /= y; 296 | } 297 | } 298 | 299 | impl From<(isize, isize)> for ICoords { 300 | fn from((x, y): (isize, isize)) -> Self { 301 | Self { x, y } 302 | } 303 | } 304 | 305 | impl From for (isize, isize) { 306 | fn from(ICoords { x, y }: ICoords) -> Self { 307 | (x, y) 308 | } 309 | } 310 | 311 | impl From<[isize; 2]> for ICoords { 312 | fn from([x, y]: [isize; 2]) -> Self { 313 | Self { x, y } 314 | } 315 | } 316 | 317 | impl From for [isize; 2] { 318 | fn from(ICoords { x, y }: ICoords) -> Self { 319 | [x, y] 320 | } 321 | } 322 | 323 | /// A two-dimensional rectangle, defined by a top-left and a bottom-right 324 | /// [`Coords`]. 325 | /// 326 | /// The coordinates of the rectangle are **inclusive**: the coordinates equal to 327 | /// its bottom-right are included in the rectangle. 328 | #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] 329 | pub struct Rectangle { 330 | pub top_left: Coords, 331 | pub bottom_right: Coords, 332 | } 333 | 334 | impl Rectangle { 335 | /// Constructs a rectangle from any corners, re-defining a top-left and a 336 | /// bottom-right corner if the two points passed are top-right and bottom-left. 337 | pub fn from_any_corners(c1: C1, c2: C2) -> Self 338 | where 339 | C1: Into, 340 | C2: Into, 341 | { 342 | let c1 = c1.into(); 343 | let c2 = c2.into(); 344 | 345 | let top_left = (c1.x.min(c2.x), c1.y.min(c2.y)).into(); 346 | let bottom_right = (c1.x.max(c2.x), c1.y.max(c2.y)).into(); 347 | 348 | Self { 349 | top_left, 350 | bottom_right, 351 | } 352 | } 353 | 354 | /// Returns `true` if the rectangle contains the passed [`Coords`]. 355 | pub fn contains_point(&self, c: C) -> bool 356 | where 357 | C: Into, 358 | { 359 | let c = c.into(); 360 | (self.top_left.x..=self.bottom_right.x).contains(&c.x) 361 | && (self.top_left.y..=self.bottom_right.y).contains(&c.y) 362 | } 363 | 364 | /// Returns `true` if the rectangle contains the passed rectangle. 365 | pub fn contains_rectangle(&self, other: R) -> bool 366 | where 367 | R: Into, 368 | { 369 | let other: Rectangle = other.into(); 370 | 371 | self.contains_point(other.top_left) && self.contains_point(other.bottom_right) 372 | } 373 | 374 | /// Returns the width of the rectangle. 375 | pub fn width(&self) -> usize { 376 | self.bottom_right.x - self.top_left.x + 1 377 | } 378 | 379 | /// Returns the height of the rectangle. 380 | pub fn height(&self) -> usize { 381 | self.bottom_right.y - self.top_left.y + 1 382 | } 383 | 384 | /// Returns a normalized rectangle, where the top-left corner is shifted to 385 | /// (0, 0). 386 | pub fn normalize(&self) -> Self { 387 | Self { 388 | top_left: (0, 0).into(), 389 | bottom_right: self.bottom_right - self.top_left, 390 | } 391 | } 392 | 393 | /// Returns an iterator over the [`Coords`] contained in this rectangle, 394 | /// going from top to bottom, left to right. 395 | pub fn iter(&self) -> impl Iterator + '_> + '_ { 396 | (self.top_left.y..=self.bottom_right.y) 397 | .map(|y| (self.top_left.x..=self.bottom_right.x).map(move |x| (x, y).into())) 398 | } 399 | 400 | /// Returns rectangles with width of 1 for the left and right side of the 401 | /// rectangle, and with height of 1 for the top and bottom sides of the 402 | /// rectangle. 403 | /// 404 | /// The borders are returned in this order: top, bottom, left, right. 405 | pub fn borders(&self) -> [Rectangle; 4] { 406 | [ 407 | (self.top_left, (self.bottom_right.x, self.top_left.y)) 408 | .try_into() 409 | .unwrap(), 410 | ((self.top_left.x, self.bottom_right.y), self.bottom_right) 411 | .try_into() 412 | .unwrap(), 413 | (self.top_left, (self.top_left.x, self.bottom_right.y)) 414 | .try_into() 415 | .unwrap(), 416 | ((self.bottom_right.x, self.top_left.y), self.bottom_right) 417 | .try_into() 418 | .unwrap(), 419 | ] 420 | } 421 | } 422 | 423 | impl ops::Add for Rectangle { 424 | type Output = Rectangle; 425 | 426 | fn add(self, rhs: Coords) -> Self::Output { 427 | Rectangle { 428 | top_left: self.top_left + rhs, 429 | bottom_right: self.bottom_right + rhs, 430 | } 431 | } 432 | } 433 | 434 | impl ops::Add for Rectangle { 435 | type Output = Rectangle; 436 | 437 | fn add(self, rhs: ICoords) -> Self::Output { 438 | Rectangle { 439 | top_left: self.top_left + rhs, 440 | bottom_right: self.bottom_right + rhs, 441 | } 442 | } 443 | } 444 | 445 | impl TryFrom<(TL, BR)> for Rectangle 446 | where 447 | TL: Into, 448 | BR: Into, 449 | { 450 | type Error = Error; 451 | 452 | // Checks that the two passed coords are indeed top-left and bottom-right 453 | fn try_from((top_left, bottom_right): (TL, BR)) -> Result { 454 | let top_left = top_left.into(); 455 | let bottom_right = bottom_right.into(); 456 | 457 | if top_left.x <= bottom_right.x && top_left.y <= bottom_right.y { 458 | Ok(Self { 459 | top_left, 460 | bottom_right, 461 | }) 462 | } else { 463 | Err(Error::InvalidRectangle { 464 | top_left, 465 | bottom_right, 466 | }) 467 | } 468 | } 469 | } 470 | 471 | impl fmt::Display for Rectangle { 472 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 473 | write!(f, "<{}, {}>", self.top_left, self.bottom_right) 474 | } 475 | } 476 | 477 | #[cfg(test)] 478 | mod tests { 479 | use super::*; 480 | 481 | #[test] 482 | fn coords_from_tuple() { 483 | assert_eq!(Coords { x: 123, y: 456 }, (123, 456).into()) 484 | } 485 | 486 | #[test] 487 | fn coords_from_array() { 488 | assert_eq!(Coords { x: 123, y: 456 }, [123, 456].into()) 489 | } 490 | 491 | #[test] 492 | fn icoords_from_tuple() { 493 | assert_eq!(ICoords { x: -123, y: -456 }, (-123, -456).into()) 494 | } 495 | 496 | #[test] 497 | fn icoords_from_array() { 498 | assert_eq!(ICoords { x: -123, y: -456 }, [-123, -456].into()) 499 | } 500 | 501 | #[test] 502 | fn coords_add() { 503 | assert_eq!( 504 | Coords::from((1, 2)) + Coords::from((3, 4)), 505 | Coords::from((4, 6)) 506 | ) 507 | } 508 | 509 | #[test] 510 | fn coords_icoords_add() { 511 | assert_eq!( 512 | Coords::from((4, 4)) + ICoords::from((-1, 1)), 513 | Coords::from((3, 5)) 514 | ) 515 | } 516 | 517 | #[test] 518 | fn rectangle_contains_coords() { 519 | let r: Rectangle = ((0, 0), (10, 10)).try_into().unwrap(); 520 | 521 | assert!(r.contains_point((0, 0))); 522 | assert!(r.contains_point((0, 1))); 523 | assert!(r.contains_point((10, 1))); 524 | assert!(r.contains_point((10, 10))); 525 | assert!(!r.contains_point((11, 10))); 526 | } 527 | 528 | #[test] 529 | fn rectangle_contains_other() { 530 | let r: Rectangle = ((5, 5), (10, 10)).try_into().unwrap(); 531 | 532 | assert!(r.contains_rectangle(r)); 533 | 534 | let r1: Rectangle = ((0, 0), (5, 5)).try_into().unwrap(); 535 | 536 | assert!(!r.contains_rectangle(r1)); 537 | 538 | let r2: Rectangle = ((5, 5), (6, 6)).try_into().unwrap(); 539 | 540 | assert!(r.contains_rectangle(r2)); 541 | } 542 | } 543 | -------------------------------------------------------------------------------- /src/frontend/lexer/function_box.rs: -------------------------------------------------------------------------------- 1 | //! Lexer structures and function to read function boxes. 2 | 3 | use std::fmt; 4 | 5 | use crate::frontend::{ 6 | is_box_drawing_character, 7 | lexer::{ 8 | coordinates::{Coords, ICoords, Rectangle}, 9 | heavy, CharacterMapWindow, Error, ErrorKind, Token, TokenKind, Tokenize, 10 | }, 11 | }; 12 | 13 | /// A function box, with an optional comment: 14 | /// 15 | /// ```text 16 | /// ┏━ foo: Int -> String ━━━━━━━━━━━┓ 17 | /// ┃ ┃ 18 | /// ┃ Function documentation text. ┃ 19 | /// ┃ ┃ 20 | /// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 21 | /// ┃ #(write your function here)# ┃ 22 | /// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ 23 | /// ``` 24 | pub struct FunctionBox<'a> { 25 | pub signature: CharacterMapWindow<'a>, 26 | pub definition: CharacterMapWindow<'a>, 27 | pub comment: Option>, 28 | } 29 | 30 | impl FunctionBox<'_> { 31 | /// Returns `true` if the token contains the given point. For a function box, 32 | /// this includes the box perimeter, including the signature, the 33 | /// documentation if present; it excludes the definition itself, to allow 34 | /// for the definition to be lexxed separately. 35 | pub fn contains_point(&self, c: C, initial_span: Rectangle) -> bool 36 | where 37 | C: Into, 38 | { 39 | let c = c.into(); 40 | let [top, bottom, left, right] = initial_span.borders(); 41 | 42 | assert!(top.contains_rectangle(self.signature.rectangle)); // Just to be sure. 43 | 44 | let result = top.contains_point(c) 45 | || bottom.contains_point(c) 46 | || left.contains_point(c) 47 | || right.contains_point(c) 48 | || self 49 | .comment 50 | .as_ref() 51 | .map(|comment| { 52 | comment.rectangle.contains_point(c) 53 | || Rectangle::try_from(( 54 | (left.top_left.x, comment.rectangle.bottom_right.y + 1), 55 | (right.top_left.x, comment.rectangle.bottom_right.y + 1), 56 | )) 57 | .unwrap() 58 | .contains_point(c) 59 | }) 60 | .unwrap_or(false); 61 | 62 | if !result { 63 | // If the point is not in the border or in the doc comments, then 64 | // it must be in the definition. 65 | assert!( 66 | self.definition.rectangle.contains_point(c), 67 | "{c} isn't in function definition {} despite not being anywhere else", 68 | self.definition.rectangle, 69 | ); 70 | } 71 | 72 | result 73 | } 74 | } 75 | 76 | impl<'a> Tokenize<'a> for FunctionBox<'a> { 77 | fn tokenize(window: CharacterMapWindow<'a>) -> Result, Error<'a>> { 78 | enum State { 79 | Top, 80 | Signature { 81 | start: Coords, 82 | non_space_encountered: bool, 83 | last_space: bool, 84 | }, 85 | TopSignature { 86 | signature: Rectangle, 87 | }, 88 | Right { 89 | signature: Rectangle, 90 | comment_y: Option, 91 | }, 92 | Bottom { 93 | signature: Rectangle, 94 | comment_y: Option, 95 | bottom_right: Coords, 96 | }, 97 | Left { 98 | signature: Rectangle, 99 | comment_y: Option, 100 | bottom_right: Coords, 101 | comment_seen: bool, 102 | }, 103 | Midline { 104 | signature: Rectangle, 105 | bottom_right: Coords, 106 | comment_y: usize, 107 | }, 108 | } 109 | 110 | let mut state = State::Top; 111 | let mut cursor: Coords = (0, 0).into(); 112 | 113 | assert_eq!(window[cursor], heavy::SE); 114 | 115 | loop { 116 | match &mut state { 117 | State::Top => { 118 | cursor = cursor + ICoords::RIGHT; 119 | 120 | if !window.contains_point(cursor) { 121 | return Err(Error::new( 122 | window.crop( 123 | (cursor + ICoords::LEFT, cursor + ICoords::LEFT) 124 | .try_into() 125 | .unwrap(), 126 | ), 127 | ErrorKind::UnexpectedEof, 128 | Some("while reading along the top of the function box".to_string()), 129 | )); 130 | } 131 | 132 | match window[cursor] { 133 | ' ' => { 134 | state = State::Signature { start: cursor, non_space_encountered: false, last_space: true }; 135 | } 136 | c if c == heavy::EW => { 137 | continue; 138 | } 139 | c if c == heavy::SW => { 140 | return Err(Error::new( 141 | window.crop((cursor, cursor).try_into().unwrap()), 142 | ErrorKind::UnexpectedChar { 143 | expected: "[' ']".to_string(), 144 | }, 145 | Some("missing function signature".to_string()) 146 | )) 147 | } 148 | _ => { 149 | return Err(Error::new( 150 | window.crop((cursor, cursor).try_into().unwrap()), 151 | ErrorKind::UnexpectedChar { 152 | expected: format!("['{}', ' ']", heavy::EW), 153 | }, 154 | Some("expected either function signature or continuation of function box".to_string()) 155 | )) 156 | } 157 | } 158 | } 159 | State::Signature { 160 | start, 161 | non_space_encountered, 162 | last_space, 163 | } => { 164 | cursor = cursor + ICoords::RIGHT; 165 | 166 | if !window.contains_point(cursor) { 167 | return Err(Error::new( 168 | window.crop( 169 | (cursor + ICoords::LEFT, cursor + ICoords::LEFT) 170 | .try_into() 171 | .unwrap(), 172 | ), 173 | ErrorKind::UnexpectedEof, 174 | Some( 175 | "while reading along the signature of the function box".to_string(), 176 | ), 177 | )); 178 | } 179 | 180 | match window[cursor] { 181 | ' ' => { 182 | *last_space = true; 183 | } 184 | c if c == heavy::SW || c == heavy::EW => { 185 | if *non_space_encountered { 186 | if *last_space { 187 | if c == heavy::SW { 188 | state = State::Right { 189 | signature: (*start, cursor + ICoords::LEFT) 190 | .try_into() 191 | .unwrap(), 192 | comment_y: None, 193 | } 194 | } else { 195 | state = State::TopSignature { 196 | signature: (*start, cursor + ICoords::LEFT) 197 | .try_into() 198 | .unwrap(), 199 | } 200 | } 201 | } else { 202 | return Err(Error::new( 203 | window.crop((cursor, cursor).try_into().unwrap()), 204 | ErrorKind::UnexpectedChar { expected: "[' ']".to_string() }, 205 | Some("missing space separating signature and function box perimeter".to_string()), 206 | )); 207 | } 208 | } else { 209 | return Err(Error::new( 210 | window.crop(((0, 0), cursor).try_into().unwrap()), 211 | ErrorKind::UnexpectedChar { 212 | expected: "[(any non box-drawing character)]".to_string(), 213 | }, 214 | Some("function signature found empty".to_string()), 215 | )); 216 | } 217 | } 218 | c if is_box_drawing_character(c) => { 219 | return Err(Error::new( 220 | window.crop(((0, 0), cursor).try_into().unwrap()), 221 | ErrorKind::UnexpectedChar { 222 | expected: "[(any non box-drawing character)]".to_string(), 223 | }, 224 | Some( 225 | "box-drawing characters cannot appear in function signature" 226 | .to_string(), 227 | ), 228 | )); 229 | } 230 | _ => { 231 | *non_space_encountered = true; 232 | *last_space = false; 233 | } 234 | } 235 | } 236 | State::TopSignature { signature } => { 237 | cursor = cursor + ICoords::RIGHT; 238 | 239 | if !window.contains_point(cursor) { 240 | return Err(Error::new( 241 | window.crop( 242 | (cursor + ICoords::LEFT, cursor + ICoords::LEFT) 243 | .try_into() 244 | .unwrap(), 245 | ), 246 | ErrorKind::UnexpectedEof, 247 | Some("while reading along the top of the function box".to_string()), 248 | )); 249 | } 250 | 251 | match window[cursor] { 252 | c if c == heavy::EW => continue, 253 | c if c == heavy::SW => { 254 | state = State::Right { 255 | signature: *signature, 256 | comment_y: None, 257 | } 258 | } 259 | _ => { 260 | return Err(Error::new( 261 | window.crop((cursor, cursor).try_into().unwrap()), 262 | ErrorKind::UnexpectedChar { 263 | expected: format!("['{}', '{}']", heavy::EW, heavy::SW), 264 | }, 265 | None, 266 | )) 267 | } 268 | } 269 | } 270 | State::Right { 271 | signature, 272 | comment_y, 273 | } => { 274 | cursor = cursor + ICoords::DOWN; 275 | 276 | if !window.contains_point(cursor) { 277 | return Err(Error::new( 278 | window.crop( 279 | (cursor + ICoords::UP, cursor + ICoords::UP) 280 | .try_into() 281 | .unwrap(), 282 | ), 283 | ErrorKind::UnexpectedEof, 284 | Some("while reading along the right of the function box".to_string()), 285 | )); 286 | } 287 | 288 | match window[cursor] { 289 | c if c == heavy::NS => continue, 290 | c if c == heavy::NW => { 291 | state = State::Bottom { 292 | signature: *signature, 293 | comment_y: *comment_y, 294 | bottom_right: cursor, 295 | } 296 | } 297 | c if c == heavy::NSW => { 298 | *comment_y = Some(cursor.y); 299 | } 300 | _ => { 301 | return Err(Error::new( 302 | window.crop((cursor, cursor).try_into().unwrap()), 303 | ErrorKind::UnexpectedChar { 304 | expected: format!( 305 | "['{}', '{}', '{}']", 306 | heavy::NS, 307 | heavy::NW, 308 | heavy::NSW 309 | ), 310 | }, 311 | None, 312 | )) 313 | } 314 | } 315 | } 316 | State::Bottom { 317 | signature, 318 | comment_y, 319 | bottom_right, 320 | } => { 321 | if cursor.x == 0 { 322 | return Err(Error::new( 323 | window.crop((cursor, cursor).try_into().unwrap()), 324 | ErrorKind::UnexpectedEof, 325 | Some("while reading along the bottom of the function box".to_string()), 326 | )); 327 | } 328 | 329 | cursor = cursor + ICoords::LEFT; 330 | 331 | match window[cursor] { 332 | c if c == heavy::EW => continue, 333 | c if c == heavy::NE => { 334 | state = State::Left { 335 | signature: *signature, 336 | comment_y: *comment_y, 337 | bottom_right: *bottom_right, 338 | comment_seen: false, 339 | } 340 | } 341 | _ => { 342 | return Err(Error::new( 343 | window.crop((cursor, cursor).try_into().unwrap()), 344 | ErrorKind::UnexpectedChar { 345 | expected: format!("['{}', '{}']", heavy::EW, heavy::NE,), 346 | }, 347 | None, 348 | )) 349 | } 350 | } 351 | } 352 | State::Left { 353 | signature, 354 | comment_y, 355 | bottom_right, 356 | comment_seen, 357 | } => { 358 | if cursor.y == 0 { 359 | return Err(Error::new( 360 | window.crop((cursor, cursor).try_into().unwrap()), 361 | ErrorKind::UnexpectedEof, 362 | Some("while reading along the left of the function box".to_string()), 363 | )); 364 | } 365 | 366 | cursor = cursor + ICoords::UP; 367 | 368 | match window[cursor] { 369 | c if c == heavy::NS => { 370 | if let Some(y) = comment_y.as_ref() { 371 | if *y == cursor.y { 372 | return Err(Error::new( 373 | window.crop((cursor, cursor).try_into().unwrap()), 374 | ErrorKind::UnexpectedChar { 375 | expected: format!( 376 | "['{}']", 377 | heavy::NSE, 378 | ), 379 | }, 380 | Some("function documentation separator line misaligned with right side".to_string()), 381 | )); 382 | } 383 | } 384 | } 385 | c if c == heavy::SE => { 386 | if comment_y.is_some() && !*comment_seen { 387 | return Err(Error::new( 388 | window.crop((cursor, cursor).try_into().unwrap()), 389 | ErrorKind::UnexpectedChar { 390 | expected: format!( 391 | "['{}']", 392 | heavy::NSE, 393 | ), 394 | }, 395 | Some(format!("missing function documentation separator line connector ('{}') on the left side", heavy::NSE)), 396 | )); 397 | } else if let Some(comment_y) = comment_y.as_ref().copied() { 398 | cursor.y = comment_y; 399 | cursor.x = 0; 400 | state = State::Midline { 401 | signature: *signature, 402 | bottom_right: *bottom_right, 403 | comment_y, 404 | } 405 | } else { 406 | break Ok(Token { 407 | span: window.crop(((0, 0), *bottom_right).try_into().unwrap()), 408 | kind: TokenKind::FunctionBox(Self { 409 | signature: window.crop(*signature), 410 | definition: window.crop( 411 | ((1, 1), *bottom_right + ICoords::LEFT + ICoords::UP) 412 | .try_into() 413 | .unwrap(), 414 | ), 415 | comment: None, 416 | }), 417 | }); 418 | } 419 | } 420 | c if c == heavy::NSE => { 421 | if let Some(comment_y) = comment_y.as_ref().copied() { 422 | if cursor.y == comment_y { 423 | *comment_seen = true; 424 | } else { 425 | return Err(Error::new( 426 | window.crop((cursor, cursor).try_into().unwrap()), 427 | ErrorKind::UnexpectedChar { 428 | expected: format!( 429 | "['{}', '{}']", 430 | heavy::NS, heavy::SE 431 | ) 432 | }, 433 | Some("function documentation separator line misaligned with right side".to_string()), 434 | )); 435 | } 436 | } else { 437 | return Err(Error::new( 438 | window.crop((cursor, cursor).try_into().unwrap()), 439 | ErrorKind::UnexpectedChar { 440 | expected: format!( 441 | "['{}', '{}']", 442 | heavy::NS, heavy::SE 443 | ), 444 | }, 445 | Some(format!("missing function documentation separator line connector ('{}') on the right side", heavy::NSW)), 446 | )); 447 | } 448 | } 449 | _ => { 450 | return Err(Error::new( 451 | window.crop((cursor, cursor).try_into().unwrap()), 452 | ErrorKind::UnexpectedChar { 453 | expected: format!( 454 | "['{}', '{}', '{}']", 455 | heavy::NS, 456 | heavy::NSE, 457 | heavy::SE 458 | ), 459 | }, 460 | None, 461 | )) 462 | } 463 | } 464 | } 465 | State::Midline { 466 | signature, 467 | bottom_right, 468 | comment_y, 469 | } => { 470 | cursor = cursor + ICoords::RIGHT; 471 | 472 | if !window.contains_point(cursor) { 473 | return Err(Error::new( 474 | window.crop( 475 | (cursor + ICoords::LEFT, cursor + ICoords::LEFT) 476 | .try_into() 477 | .unwrap(), 478 | ), 479 | ErrorKind::UnexpectedEof, 480 | Some("while reading along the midline of the function box".to_string()), 481 | )); 482 | } 483 | 484 | match window[cursor] { 485 | c if c == heavy::EW => continue, 486 | c if c == heavy::NSW => { 487 | break Ok(Token { 488 | span: window.crop(((0, 0), *bottom_right).try_into().unwrap()), 489 | kind: TokenKind::FunctionBox(FunctionBox { 490 | signature: window.crop(*signature), 491 | definition: window.crop( 492 | ( 493 | (1, *comment_y + 1), 494 | *bottom_right + ICoords::LEFT + ICoords::UP, 495 | ) 496 | .try_into() 497 | .unwrap(), 498 | ), 499 | comment: Some( 500 | window.crop( 501 | ((1, 1), (bottom_right.x - 1, *comment_y - 1)) 502 | .try_into() 503 | .unwrap(), 504 | ), 505 | ), 506 | }), 507 | }); 508 | } 509 | _ => { 510 | return Err(Error::new( 511 | window.crop((cursor, cursor).try_into().unwrap()), 512 | ErrorKind::UnexpectedChar { 513 | expected: format!("['{}', '{}']", heavy::EW, heavy::NSW,), 514 | }, 515 | None, 516 | )) 517 | } 518 | } 519 | } 520 | } 521 | } 522 | } 523 | } 524 | 525 | impl fmt::Display for FunctionBox<'_> { 526 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 527 | writeln!( 528 | f, 529 | "#( [DEBUG] Function box: signature @ {} )#", 530 | self.signature.rectangle 531 | )?; 532 | writeln!(f, "{}", self.signature)?; 533 | writeln!( 534 | f, 535 | "#( [DEBUG] Function box: definition @ {} )#", 536 | self.definition.rectangle 537 | )?; 538 | writeln!(f, "{}", self.definition)?; 539 | 540 | if let Some(comment) = self.comment.as_ref() { 541 | writeln!( 542 | f, 543 | "#( [DEBUG] Function box: doc comment @ {} )#", 544 | comment.rectangle 545 | )?; 546 | writeln!(f, "{}", comment)?; 547 | } 548 | 549 | Ok(()) 550 | } 551 | } 552 | -------------------------------------------------------------------------------- /src/frontend/lexer/keyword.rs: -------------------------------------------------------------------------------- 1 | //! Keyword lexer definitions. 2 | 3 | use crate::frontend::lexer::{CharacterMapWindow, Error, ErrorKind, Token, TokenKind, Tokenize}; 4 | 5 | #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] 6 | pub enum Keyword { 7 | ForEach, 8 | Match, 9 | If, 10 | Then, 11 | Else, 12 | Underscore, 13 | LeftParen, 14 | RightParen, 15 | TwoDots, 16 | Comma, 17 | } 18 | 19 | impl Keyword { 20 | pub fn table() -> impl Iterator { 21 | [ 22 | ("foreach", Keyword::ForEach), 23 | ("match", Keyword::Match), 24 | ("if", Keyword::If), 25 | ("then", Keyword::Then), 26 | ("else", Keyword::Else), 27 | ("_", Keyword::Underscore), 28 | ("(", Keyword::LeftParen), 29 | (")", Keyword::RightParen), 30 | ("..", Keyword::TwoDots), 31 | (",", Keyword::Comma), 32 | ] 33 | .into_iter() 34 | } 35 | } 36 | 37 | impl<'a> Tokenize<'a> for Keyword { 38 | fn tokenize(window: CharacterMapWindow<'a>) -> Result, Error<'a>> { 39 | Self::table() 40 | .find_map(|(tag, t)| { 41 | window.recognize_tag(tag).map(|w| Token { 42 | span: w, 43 | kind: TokenKind::Keyword(t), 44 | }) 45 | }) 46 | .ok_or_else(|| Error::new(window, ErrorKind::NotFound, None)) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/frontend/lexer/mod.rs: -------------------------------------------------------------------------------- 1 | //! Lexer for Bauge. 2 | 3 | use std::{backtrace::Backtrace, error, fmt, ops::Index}; 4 | 5 | use crate::frontend::coordinates::{self, Coords, ICoords, Rectangle}; 6 | use crate::log; 7 | use crate::logging; 8 | 9 | pub mod function_box; 10 | use function_box::FunctionBox; 11 | 12 | pub mod keyword; 13 | use keyword::Keyword; 14 | 15 | pub mod path; 16 | use path::PathFragment; 17 | 18 | use super::is_box_drawing_character; 19 | 20 | macro_rules! path_mods { 21 | ($name:ident, [$ns:literal, $ew:literal, $se:literal, $sw:literal, $ne:literal, $nw:literal, $nsw:literal, $nse:literal, $ewn:literal, $ews:literal, $nsew:literal]) => { 22 | #[allow(dead_code)] 23 | mod $name { 24 | pub const NS: char = $ns; 25 | pub const EW: char = $ew; 26 | pub const SE: char = $se; 27 | pub const SW: char = $sw; 28 | pub const NE: char = $ne; 29 | pub const NW: char = $nw; 30 | pub const NSW: char = $nsw; 31 | pub const NSE: char = $nse; 32 | pub const EWN: char = $ewn; 33 | pub const EWS: char = $ews; 34 | pub const NSEW: char = $nsew; 35 | 36 | pub const fn all() -> [char; 11] { 37 | [$ns, $ew, $se, $sw, $ne, $nw, $nsw, $nse, $ewn, $ews, $nsew] 38 | } 39 | } 40 | }; 41 | } 42 | 43 | path_mods!( 44 | single, 45 | ['│', '─', '┌', '┐', '└', '┘', '┤', '├', '┴', '┬', '┼'] 46 | ); 47 | path_mods!( 48 | double, 49 | ['║', '═', '╔', '╗', '╚', '╝', '╣', '╠', '╩', '╦', '╬'] 50 | ); 51 | path_mods!( 52 | heavy, 53 | ['┃', '━', '┏', '┓', '┗', '┛', '┫', '┣', '┻', '┳', '╋'] 54 | ); 55 | 56 | macro_rules! tokenize_log { 57 | ($t:literal, $c:ident, $v:ident, $e:expr_2021) => { 58 | log!("lexer", h3, "{} @ {}", $t, $c); 59 | 60 | let (maybe_token, t) = logging::time(|| $e); 61 | 62 | let token = match maybe_token { 63 | Ok(token) => token, 64 | Err(e) => { 65 | log!("lexer", p, "Failed after {}", logging::format_time(&t)); 66 | 67 | log!("lexer", err, "{e}"); 68 | 69 | log!("lexer", p, "Occured here:"); 70 | log!("lexer", code, "{}", e.span.to_string()); 71 | 72 | logging::finish("lexer").unwrap(); 73 | return Err(e); 74 | } 75 | }; 76 | 77 | log!("lexer", p, "Finished after {}:", logging::format_time(&t)); 78 | log!("lexer", code, "{}", token.to_string()); 79 | 80 | $v.push(token); 81 | }; 82 | } 83 | 84 | #[rustfmt::skip] 85 | const CONNECTORS: [char; 36] = [ 86 | // single 87 | single::SE, single::SW, single::NE, single::NW, single::NSW, single::NSE, single::EWN, single::EWS, single::NSEW, 88 | // double 89 | double::SE, double::SW, double::NE, double::NW, double::NSW, double::NSE, double::EWN, double::EWS, double::NSEW, 90 | // mixed 91 | '╒', '╓', '╕', '╖', '╘', '╙', '╛', '╜', '╞', '╟', '╡', '╢', '╤', '╥', '╧', '╨', '╪', '╫', 92 | ]; 93 | 94 | /// An error that can arise during lexing. 95 | #[derive(Debug)] 96 | pub struct Error<'a> { 97 | pub span: CharacterMapWindow<'a>, 98 | pub kind: Box>, 99 | pub note: Option, 100 | pub backtrace: Backtrace, 101 | } 102 | 103 | impl<'a> Error<'a> { 104 | pub fn new(span: CharacterMapWindow<'a>, kind: ErrorKind<'a>, note: Option) -> Self { 105 | Self { 106 | span, 107 | kind: Box::new(kind), 108 | note, 109 | backtrace: Backtrace::capture(), 110 | } 111 | } 112 | } 113 | 114 | impl fmt::Display for Error<'_> { 115 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 116 | writeln!(f, "Lexing error: in this area ({}): ", self.span.rectangle)?; 117 | write!(f, "{}", self.kind)?; 118 | if let Some(s) = self.note.as_deref() { 119 | write!(f, "\nNote: {s}")?; 120 | } 121 | 122 | Ok(()) 123 | } 124 | } 125 | 126 | impl error::Error for Error<'_> {} 127 | 128 | /// The kind of lexing [`Error`]. 129 | #[derive(Debug)] 130 | pub enum ErrorKind<'a> { 131 | /// An unexpected character was encountered during lexing. 132 | UnexpectedChar { 133 | expected: String, 134 | }, 135 | /// An error during coordinates handling occured 136 | CoordError(coordinates::Error), 137 | /// The edge of the map was encountered while expecting more characters. 138 | UnexpectedEof, 139 | /// The lexer could not find the corresponding pattern 140 | NotFound, 141 | _Dummy(&'a ()), 142 | } 143 | 144 | impl fmt::Display for ErrorKind<'_> { 145 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 146 | match self { 147 | ErrorKind::UnexpectedChar { expected } => { 148 | writeln!(f, "Unexpected character.")?; 149 | write!(f, "Note: expected any of: {expected}")?; 150 | } 151 | ErrorKind::CoordError(error) => { 152 | writeln!(f, "Coordinates error: {error}")?; 153 | write!(f, "Warning: this is a bug in the compiler.")?; 154 | } 155 | ErrorKind::UnexpectedEof => { 156 | writeln!(f, "Unexpectedly ran into the edge of the program.")?; 157 | } 158 | ErrorKind::NotFound => { 159 | writeln!(f, "Couldn't find the token while reading")?; 160 | } 161 | ErrorKind::_Dummy(_) => todo!(), 162 | } 163 | Ok(()) 164 | } 165 | } 166 | 167 | /// Trait indicating that a structure can be potentially created from a map window. 168 | trait Tokenize<'a> { 169 | /// Tokenize starting at the origin of the passed [`CharacterMapWindow`]. 170 | fn tokenize(window: CharacterMapWindow<'a>) -> Result, Error<'a>>; 171 | } 172 | 173 | /// A lexer "token", unparsed but structured. 174 | /// 175 | /// It contains a span, as well as a specialized [`TokenKind`] to refine its 176 | /// definition. 177 | pub struct Token<'a> { 178 | pub span: CharacterMapWindow<'a>, 179 | pub kind: TokenKind<'a>, 180 | } 181 | 182 | impl Token<'_> { 183 | /// Returns `true` if the token contains the given point. Used to determine, 184 | /// for example, if a circular token with a hole in its middle needs to 185 | /// have its middle lexxed to. 186 | pub fn contains_point(&self, c: C) -> bool 187 | where 188 | C: Into, 189 | { 190 | let c = c.into(); 191 | 192 | // No point (hehe) in checking the finer details if not inside the 193 | // global area already 194 | self.span.rectangle.contains_point(c) && self.kind.contains_point(c, self.span.rectangle) 195 | } 196 | } 197 | 198 | impl fmt::Display for Token<'_> { 199 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 200 | writeln!( 201 | f, 202 | "#( [DEBUG] Token start, span @ {} )#", 203 | self.span.rectangle 204 | )?; 205 | writeln!(f, "{}", self.span)?; 206 | writeln!(f, "{}", self.kind)?; 207 | writeln!(f, "#( [DEBUG] Token end )#")?; 208 | 209 | Ok(()) 210 | } 211 | } 212 | 213 | /// The different kinds of [`Token`]. 214 | pub enum TokenKind<'a> { 215 | /// A [`FunctionBox`]. 216 | FunctionBox(FunctionBox<'a>), 217 | /// An integer literal, such as `123`, `0xABCD`, or `0b011001`. 218 | IntegerLiteral, 219 | /// A string literal, such as `"Hello, world!"`. 220 | StringLiteral, 221 | /// A keyword of the Bauge language, such as `if`, `_` or `foreach`. 222 | Keyword(Keyword), 223 | /// A fragment of a Bauge path. 224 | PathFragment(PathFragment), 225 | /// An identifier for a function. 226 | Identifier, 227 | } 228 | 229 | impl TokenKind<'_> { 230 | /// Returns `true` if the token contains the given point. Used to determine, 231 | /// for example, if a circular token with a hole in its middle needs to 232 | /// have its middle lexxed to. 233 | pub fn contains_point(&self, c: C, initial_span: Rectangle) -> bool 234 | where 235 | C: Into, 236 | { 237 | match self { 238 | TokenKind::FunctionBox(function_box) => function_box.contains_point(c, initial_span), 239 | TokenKind::IntegerLiteral 240 | | TokenKind::StringLiteral 241 | | TokenKind::Keyword(_) 242 | | TokenKind::PathFragment(_) 243 | | TokenKind::Identifier => initial_span.contains_point(c), 244 | } 245 | } 246 | } 247 | 248 | impl fmt::Display for TokenKind<'_> { 249 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 250 | match self { 251 | TokenKind::FunctionBox(function_box) => write!(f, "{function_box}"), 252 | TokenKind::IntegerLiteral => { 253 | write!(f, "#( [DEBUG] Integer literal )#") 254 | } 255 | TokenKind::StringLiteral => { 256 | write!(f, "#( [DEBUG] String literal )#") 257 | } 258 | TokenKind::Keyword(k) => { 259 | write!(f, "#( [DEBUG] Keyword {k:?} )#") 260 | } 261 | TokenKind::PathFragment(path_fragment) => { 262 | write!(f, "{path_fragment}") 263 | } 264 | TokenKind::Identifier => { 265 | write!(f, "#( [DEBUG] Identifier )#") 266 | } 267 | } 268 | } 269 | } 270 | 271 | /// Attempts to recognize a string literal at the top-left of the passed window. 272 | fn tokenize_string_literal(window: CharacterMapWindow<'_>) -> Result, Error> { 273 | let mut cursor = Coords::from((1, 0)); 274 | loop { 275 | if !window.contains_point(cursor) { 276 | return Err(Error::new( 277 | window.crop(((0, 0), cursor + ICoords::LEFT).try_into().unwrap()), 278 | ErrorKind::UnexpectedEof, 279 | Some("while parsing string literal".to_string()), 280 | )); 281 | } else if window[cursor] == '"' { 282 | return Ok(Token { 283 | span: window.crop(((0, 0), cursor).try_into().unwrap()), 284 | kind: TokenKind::StringLiteral, 285 | }); 286 | } 287 | cursor = cursor + ICoords::RIGHT; 288 | } 289 | } 290 | 291 | /// Attempts to recognize an integer literal at the top-left of the passed window. 292 | fn tokenize_integer_literal(window: CharacterMapWindow<'_>) -> Result, Error> { 293 | fn is_integer_literal_char(c: char) -> bool { 294 | matches!(c, '0'..='9' | 'x' | 'X' | 'b' | 'B' | 'o' | 'O' | '_') 295 | } 296 | 297 | let mut cursor = Coords::from((1, 0)); 298 | loop { 299 | if !window.contains_point(cursor) { 300 | return Err(Error::new( 301 | window.crop(((0, 0), cursor + ICoords::LEFT).try_into().unwrap()), 302 | ErrorKind::UnexpectedEof, 303 | Some("while parsing integer literal".to_string()), 304 | )); 305 | } else if !is_integer_literal_char(window[cursor]) { 306 | return Ok(Token { 307 | span: window.crop(((0, 0), cursor + ICoords::LEFT).try_into().unwrap()), 308 | kind: TokenKind::IntegerLiteral, 309 | }); 310 | } 311 | cursor = cursor + ICoords::RIGHT; 312 | } 313 | } 314 | 315 | /// Attempts to recognize an identifier at the top-left of the passed window. 316 | fn tokenize_ident(window: CharacterMapWindow<'_>) -> Result, Error> { 317 | let mut cursor = Coords::from((0, 0)); 318 | while window[cursor] != ' ' { 319 | cursor = cursor + ICoords::RIGHT; 320 | if !window.contains_point(cursor) { 321 | return Err(Error::new( 322 | window.crop(((0, 0), cursor + ICoords::LEFT).try_into().unwrap()), 323 | ErrorKind::UnexpectedEof, 324 | Some("while parsing identifier".to_string()), 325 | )); 326 | } 327 | 328 | if is_box_drawing_character(window[cursor]) { 329 | return Err(Error::new( 330 | window.crop(((0, 0), cursor).try_into().unwrap()), 331 | ErrorKind::UnexpectedChar { 332 | expected: "any valid identifier character".to_string(), 333 | }, 334 | Some("Block-drawing characters are not allowed in identifiers".to_string()), 335 | )); 336 | } 337 | } 338 | 339 | Ok(Token { 340 | span: window.crop(((0, 0), cursor + ICoords::LEFT).try_into().unwrap()), 341 | kind: TokenKind::Identifier, 342 | }) 343 | } 344 | 345 | /// A two-dimensional collection of characters, used to better reason with 346 | /// two-dimensional structures. 347 | /// 348 | /// As programs can have unequal line lengths, lines are padded with spaces to 349 | /// the length of the longest lines. Lengths (such as width and height) are 350 | /// measured in distinct characters. 351 | #[derive(Debug)] 352 | pub struct CharacterMap { 353 | lines: Vec>, 354 | } 355 | 356 | impl CharacterMap { 357 | /// Returns the length of the map, in characters. It corresponds to the 358 | /// longest line of the original program. 359 | pub fn width(&self) -> usize { 360 | self.lines.first().map(|l| l.len()).unwrap_or_default() 361 | } 362 | 363 | /// Returns the height of the map, in characters. It corresponds to the 364 | /// number of lines in the original program. 365 | pub fn height(&self) -> usize { 366 | self.lines.len() 367 | } 368 | 369 | /// Creates a [window](CharacterMapWindow) of the [`CharacterMap`], restricting the view to a 370 | /// rectangle. 371 | pub fn window(&self, rectangle: Rectangle) -> CharacterMapWindow { 372 | CharacterMapWindow { 373 | original: self, 374 | rectangle, 375 | } 376 | } 377 | 378 | /// Creates a [window](CharacterMapWindow) of the [`CharacterMap`], restricting the view to a 379 | /// rectangle whose top-left corner is the point passed as a parameter, and 380 | /// bottom-right is the bottom-right of the map. 381 | pub fn window_from_point(&self, c: C) -> CharacterMapWindow 382 | where 383 | C: Into, 384 | { 385 | let c = c.into(); 386 | self.window( 387 | ( 388 | c, 389 | ( 390 | self.width().saturating_sub(1), 391 | self.height().saturating_sub(1), 392 | ), 393 | ) 394 | .try_into() 395 | .unwrap(), 396 | ) 397 | } 398 | 399 | /// Returns a [`Rectangle`] encompassing the entirety of the map. 400 | pub fn rectangle(&self) -> Rectangle { 401 | ( 402 | (0, 0), 403 | ( 404 | self.width().saturating_sub(1), 405 | self.height().saturating_sub(1), 406 | ), 407 | ) 408 | .try_into() 409 | .unwrap() 410 | } 411 | 412 | /// Reads and tokenizes the map into a list of [`Tokens`](Token). 413 | pub fn tokenize(&self) -> Result, Error> { 414 | let mut tokens: Vec = Vec::new(); 415 | 416 | log!("lexer", h2, "Tokenization"); 417 | 418 | let (_, timing) = logging::time(|| { 419 | for line in self.rectangle().iter() { 420 | for coord in line { 421 | if tokens.iter().any(|t| t.contains_point(coord)) { 422 | continue; 423 | } 424 | 425 | match self[coord] { 426 | ' ' => continue, 427 | '"' => { 428 | tokenize_log!( 429 | "String literal", 430 | coord, 431 | tokens, 432 | tokenize_string_literal(self.window_from_point(coord)) 433 | ); 434 | } 435 | c if c == heavy::SE => { 436 | tokenize_log!( 437 | "Function box", 438 | coord, 439 | tokens, 440 | FunctionBox::tokenize(self.window_from_point(coord)) 441 | ); 442 | } 443 | c if c.is_ascii_digit() => { 444 | tokenize_log!( 445 | "Integer literal", 446 | coord, 447 | tokens, 448 | tokenize_integer_literal(self.window_from_point(coord)) 449 | ); 450 | } 451 | c => { 452 | let (maybe_keyword, t) = 453 | logging::time(|| Keyword::tokenize(self.window_from_point(coord))); 454 | match maybe_keyword { 455 | Ok(token) => { 456 | log!("lexer", h3, "Keyword @ {coord}"); 457 | log!("lexer", p, "Finished in {}:", logging::format_time(&t)); 458 | log!("lexer", code, "{}", token.to_string()); 459 | 460 | tokens.push(token); 461 | } 462 | _ => { 463 | if is_box_drawing_character(c) { 464 | tokenize_log!( 465 | "Path fragment", 466 | coord, 467 | tokens, 468 | PathFragment::tokenize(self.window_from_point(coord)) 469 | ); 470 | } else { 471 | tokenize_log!( 472 | "Identifier", 473 | coord, 474 | tokens, 475 | tokenize_ident(self.window_from_point(coord)) 476 | ); 477 | } 478 | } 479 | } 480 | } 481 | } 482 | } 483 | } 484 | 485 | Ok(()) 486 | }); 487 | 488 | log!("lexer", h2, "Summary"); 489 | log!( 490 | "lexer", 491 | p, 492 | "Lexer recognized {} tokens in {}.", 493 | tokens.len(), 494 | logging::format_time(&timing) 495 | ); 496 | 497 | log!("lexer", finish); 498 | 499 | Ok(tokens) 500 | } 501 | } 502 | 503 | impl fmt::Display for CharacterMap { 504 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 505 | for (i, l) in self.lines.iter().enumerate() { 506 | for c in l { 507 | write!(f, "{c}")?; 508 | } 509 | if i != self.height() - 1 { 510 | writeln!(f)?; 511 | } 512 | } 513 | 514 | Ok(()) 515 | } 516 | } 517 | 518 | impl FromIterator for CharacterMap 519 | where 520 | S: AsRef, 521 | { 522 | fn from_iter>(iter: T) -> Self { 523 | let (lines, t) = logging::time(|| { 524 | let mut max_len = 0; 525 | 526 | let mut lines = iter 527 | .into_iter() 528 | .map(|s| { 529 | let s = s.as_ref(); 530 | let v = s.chars().collect::>(); 531 | max_len = max_len.max(v.len()); 532 | v 533 | }) 534 | .collect::>(); 535 | 536 | for line in lines.iter_mut() { 537 | if line.len() < max_len { 538 | line.extend(std::iter::repeat(' ').take(max_len - line.len())); 539 | } 540 | } 541 | 542 | lines 543 | }); 544 | 545 | let map = Self { lines }; 546 | 547 | log!("lexer", h1, "Lexer"); 548 | log!("lexer", h2, "`CharacterMap` creation"); 549 | log!( 550 | "lexer", 551 | p, 552 | "`CharacterMap` of dimension {}x{} created in {}:", 553 | map.width(), 554 | map.height(), 555 | logging::format_time(&t) 556 | ); 557 | log!("lexer", code, "{}", map.to_string()); 558 | 559 | map 560 | } 561 | } 562 | 563 | impl Index for CharacterMap 564 | where 565 | C: Into, 566 | { 567 | type Output = char; 568 | 569 | fn index(&self, index: C) -> &Self::Output { 570 | let Coords { x, y } = index.into(); 571 | 572 | &self.lines[y][x] 573 | } 574 | } 575 | 576 | /// A slice, or view, into a [`CharacterMap`]. 577 | /// 578 | /// Indexing into that slice uses *local* coordinates (that is, `(0, 0)` is the 579 | /// top-left corner of the slice, and not the original map). 580 | #[derive(Debug)] 581 | pub struct CharacterMapWindow<'a> { 582 | original: &'a CharacterMap, 583 | rectangle: Rectangle, // Relative to the original 584 | } 585 | 586 | impl CharacterMapWindow<'_> { 587 | /// Returns the width of the window, in characters. 588 | pub fn width(&self) -> usize { 589 | self.rectangle.width() 590 | } 591 | 592 | /// Returns the height of the window, in characters. 593 | pub fn height(&self) -> usize { 594 | self.rectangle.height() 595 | } 596 | 597 | /// Crops the window and gives a smaller one. The given rectangle is local 598 | /// to the window's rectangle (and not to the original). 599 | pub fn crop(&self, r: Rectangle) -> Self { 600 | assert!( 601 | self.rectangle.normalize().contains_rectangle(r), 602 | "{} is not contained within {} (normalized from {})", 603 | r, 604 | self.rectangle.normalize(), 605 | self.rectangle 606 | ); 607 | 608 | Self { 609 | original: self.original, 610 | rectangle: r + self.rectangle.top_left, 611 | } 612 | } 613 | 614 | /// Returns true if the passed window-local [`Coords`] is contained within 615 | /// this window. 616 | pub fn contains_point(&self, c: C) -> bool 617 | where 618 | C: Into, 619 | { 620 | self.rectangle.normalize().contains_point(c) 621 | } 622 | 623 | /// Attempts to recognize a string passed as a parameter by reading from 624 | /// local (0, 0) to the left. If it's found, then a new window encompassing 625 | /// the tag is returned. 626 | pub fn recognize_tag(&self, tag: &str) -> Option { 627 | tag.chars() 628 | .try_fold(Coords::from((0, 0)), |cursor, c| { 629 | if self.contains_point(cursor) { 630 | if self[cursor] == c { 631 | Some(cursor + ICoords::RIGHT) 632 | } else { 633 | None 634 | } 635 | } else { 636 | None 637 | } 638 | }) 639 | .map(|end| self.crop(((0, 0), end + ICoords::LEFT).try_into().unwrap())) 640 | } 641 | 642 | /// Creates a window on the entirety of the original map, effectively "resetting" it. 643 | pub fn reset(&self) -> Self { 644 | self.original.window(self.original.rectangle()) 645 | } 646 | 647 | /// Translates coordinates local to `from` into coordinates local to this window. 648 | pub fn translate(&self, c: C, from: &Self) -> Coords 649 | where 650 | C: Into, 651 | { 652 | let c = c.into(); 653 | c + from.rectangle.top_left - self.rectangle.top_left 654 | } 655 | } 656 | 657 | impl Index for CharacterMapWindow<'_> 658 | where 659 | C: Into, 660 | { 661 | type Output = char; 662 | 663 | fn index(&self, index: C) -> &Self::Output { 664 | let index = index.into(); 665 | assert!( 666 | self.rectangle.normalize().contains_point(index), 667 | "Indexing window {} out of bounds at {}", 668 | self.rectangle, 669 | index 670 | ); 671 | &self.original[index + self.rectangle.top_left] 672 | } 673 | } 674 | 675 | impl fmt::Display for CharacterMapWindow<'_> { 676 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 677 | for (i, l) in self.rectangle.normalize().iter().enumerate() { 678 | for c in l { 679 | write!(f, "{}", self[c])?; 680 | } 681 | if i != self.height() - 1 { 682 | writeln!(f)?; 683 | } 684 | } 685 | 686 | Ok(()) 687 | } 688 | } 689 | 690 | #[cfg(test)] 691 | mod tests { 692 | use super::*; 693 | 694 | #[test] 695 | fn test_window() { 696 | let c: CharacterMap = "abcd\nefgh\nijkl\nnmop".lines().collect(); 697 | 698 | let w = c.window(((1, 1), (2, 2)).try_into().unwrap()); 699 | 700 | assert_eq!(w[(0, 0)], 'f'); 701 | } 702 | } 703 | -------------------------------------------------------------------------------- /src/frontend/lexer/path.rs: -------------------------------------------------------------------------------- 1 | //! Path lexer definitions. 2 | 3 | use std::fmt; 4 | 5 | use crate::frontend::{ 6 | coordinates::{Coords, ICoords}, 7 | lexer::{CharacterMapWindow, Error, ErrorKind, Token, TokenKind, Tokenize, CONNECTORS}, 8 | }; 9 | 10 | /// The kind of path of a [`PathFragment`]. 11 | #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] 12 | pub enum PathKind { 13 | /// A single path: `│`, `─`, `┌`, ... 14 | Single, 15 | /// A double path: `║`, `═`, `╔`, ... 16 | Double, 17 | } 18 | 19 | impl PathKind { 20 | /// Returns the type of path in the four [`Directions`](Direction) of a connector. 21 | pub fn from_connector(c: char) -> [Option; 4] { 22 | let north = match c { 23 | '└' | '┘' | '┼' | '├' | '┤' | '┴' | '╘' | '╛' | '╞' | '╡' | '╧' | '╪' => { 24 | Some(PathKind::Single) 25 | } 26 | '╚' | '╝' | '╬' | '╠' | '╣' | '╩' | '╙' | '╜' | '╟' | '╢' | '╨' | '╫' => { 27 | Some(PathKind::Double) 28 | } 29 | _ => None, 30 | }; 31 | 32 | let east = match c { 33 | '┌' | '└' | '┼' | '├' | '┬' | '┴' | '╓' | '╙' | '╟' | '╥' | '╨' | '╫' => { 34 | Some(PathKind::Single) 35 | } 36 | '╔' | '╚' | '╬' | '╠' | '╦' | '╩' | '╒' | '╘' | '╞' | '╤' | '╧' | '╪' => { 37 | Some(PathKind::Double) 38 | } 39 | _ => None, 40 | }; 41 | 42 | let south = match c { 43 | '┌' | '┐' | '┼' | '├' | '┤' | '┬' | '╒' | '╕' | '╞' | '╡' | '╤' | '╪' => { 44 | Some(PathKind::Single) 45 | } 46 | '╔' | '╗' | '╬' | '╠' | '╣' | '╦' | '╓' | '╖' | '╟' | '╢' | '╥' | '╫' => { 47 | Some(PathKind::Double) 48 | } 49 | _ => None, 50 | }; 51 | 52 | let west = match c { 53 | '┐' | '┘' | '┼' | '┤' | '┬' | '┴' | '╖' | '╜' | '╢' | '╥' | '╨' | '╫' => { 54 | Some(PathKind::Single) 55 | } 56 | '╗' | '╝' | '╬' | '╣' | '╦' | '╩' | '╕' | '╛' | '╡' | '╤' | '╧' | '╪' => { 57 | Some(PathKind::Double) 58 | } 59 | _ => None, 60 | }; 61 | 62 | [north, east, south, west] 63 | } 64 | } 65 | 66 | /// The kind of path fragment (or component). 67 | pub enum PathFragment { 68 | /// A straight line, either horizontal or vertical: 69 | /// ```text 70 | /// ────────────── 71 | /// ``` 72 | Line(PathKind), 73 | /// A connector between different [`PathKind`], or directions: `├`, `╞`, ... 74 | Connector { 75 | top: Option, 76 | right: Option, 77 | bottom: Option, 78 | left: Option, 79 | }, 80 | /// An up arrow character: `↑`. 81 | ArrowUp, 82 | /// A right arrow character: `→`. 83 | ArrowRight, 84 | /// A down arrow character: `↓`. 85 | ArrowDown, 86 | /// A left arrow character: `←`. 87 | ArrowLeft, 88 | /// An up path start character: `╿`. 89 | StartUp, 90 | /// A right path start character: `╼`. 91 | StartRight, 92 | /// A down path start character: `╽`. 93 | StartDown, 94 | /// A left path start character: `╾`. 95 | StartLeft, 96 | /// A path error character: `╳`. 97 | Error, 98 | } 99 | 100 | impl<'a> Tokenize<'a> for PathFragment { 101 | fn tokenize(window: CharacterMapWindow<'a>) -> Result, Error<'a>> { 102 | let fragment = match window[(0, 0)] { 103 | '↑' => PathFragment::ArrowUp, 104 | '→' => PathFragment::ArrowRight, 105 | '↓' => PathFragment::ArrowDown, 106 | '←' => PathFragment::ArrowLeft, 107 | '╿' => PathFragment::StartUp, 108 | '╼' => PathFragment::StartRight, 109 | '╽' => PathFragment::StartDown, 110 | '╾' => PathFragment::StartLeft, 111 | '╳' => PathFragment::Error, 112 | c @ ('│' | '─' | '║' | '═') => { 113 | let global_window = window.reset(); 114 | let start = global_window.translate((0, 0), &window); 115 | 116 | let directions = match c { 117 | '│' | '║' => [ICoords::UP, ICoords::DOWN], 118 | '─' | '═' => [ICoords::LEFT, ICoords::RIGHT], 119 | _ => unreachable!(), 120 | }; 121 | 122 | let kind = match c { 123 | '│' | '─' => PathKind::Single, 124 | '║' | '═' => PathKind::Double, 125 | _ => unreachable!(), 126 | }; 127 | 128 | let (mut a, mut b) = (Ok::(start), Ok::(start)); 129 | 130 | loop { 131 | if let (Err(a), Err(b)) = (a, b) { 132 | return Ok(Token { 133 | span: global_window.crop((a, b).try_into().unwrap()), 134 | kind: TokenKind::PathFragment(PathFragment::Line(kind)), 135 | }); 136 | } 137 | 138 | if let Ok(a_coords) = a.as_mut() { 139 | let new_a_coords = *a_coords + directions[0]; 140 | if !global_window.contains_point(new_a_coords) 141 | || global_window[new_a_coords] != c 142 | { 143 | a = Err(*a_coords); 144 | } else { 145 | *a_coords = new_a_coords 146 | } 147 | } 148 | 149 | if let Ok(b_coords) = b.as_mut() { 150 | let new_b_coords = *b_coords + directions[1]; 151 | if !global_window.contains_point(new_b_coords) 152 | || global_window[new_b_coords] != c 153 | { 154 | b = Err(*b_coords); 155 | } else { 156 | *b_coords = new_b_coords 157 | } 158 | } 159 | } 160 | } 161 | c if CONNECTORS.contains(&c) => { 162 | let [top, right, bottom, left] = PathKind::from_connector(c); 163 | PathFragment::Connector { 164 | top, 165 | right, 166 | bottom, 167 | left, 168 | } 169 | } 170 | _ => { 171 | return Err(Error::new( 172 | window.crop(((0, 0), (0, 0)).try_into().unwrap()), 173 | ErrorKind::NotFound, 174 | None, 175 | )) 176 | } 177 | }; 178 | 179 | Ok(Token { 180 | span: window.crop(((0, 0), (0, 0)).try_into().unwrap()), 181 | kind: TokenKind::PathFragment(fragment), 182 | }) 183 | } 184 | } 185 | 186 | impl fmt::Display for PathFragment { 187 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 188 | match self { 189 | PathFragment::Line(path_kind) => { 190 | write!(f, "#( [DEBUG] Path line (kind = {path_kind:?}) )#") 191 | } 192 | PathFragment::Connector { 193 | top, 194 | right, 195 | bottom, 196 | left, 197 | } => { 198 | let top = top 199 | .map(|k| format!("{k:?}")) 200 | .unwrap_or_else(|| "_".to_string()); 201 | let right = right 202 | .map(|k| format!("{k:?}")) 203 | .unwrap_or_else(|| "_".to_string()); 204 | let bottom = bottom 205 | .map(|k| format!("{k:?}")) 206 | .unwrap_or_else(|| "_".to_string()); 207 | let left = left 208 | .map(|k| format!("{k:?}")) 209 | .unwrap_or_else(|| "_".to_string()); 210 | 211 | write!( 212 | f, 213 | "#( [DEBUG] Path connector (N: {top} E: {right} S: {bottom} W: {left}) )#" 214 | ) 215 | } 216 | PathFragment::ArrowUp => write!(f, "#( [DEBUG] Path arrow up )#"), 217 | PathFragment::ArrowRight => write!(f, "#( [DEBUG] Path arrow right )#"), 218 | PathFragment::ArrowDown => write!(f, "#( [DEBUG] Path arrow down )#"), 219 | PathFragment::ArrowLeft => write!(f, "#( [DEBUG] Path arrow left )#"), 220 | PathFragment::StartUp => write!(f, "#( [DEBUG] Path start up )#"), 221 | PathFragment::StartRight => write!(f, "#( [DEBUG] Path start right )#"), 222 | PathFragment::StartDown => write!(f, "#( [DEBUG] Path start down )#"), 223 | PathFragment::StartLeft => write!(f, "#( [DEBUG] Path start left )#"), 224 | PathFragment::Error => write!(f, "#( [DEBUG] Path error end )#"), 225 | } 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /src/frontend/mod.rs: -------------------------------------------------------------------------------- 1 | //! Frontend modules for Bauge. 2 | 3 | pub mod coordinates; 4 | pub mod lexer; 5 | 6 | /// Returns `true` if the passed character is a box-drawing character, including the 4 directional arrows. 7 | pub fn is_box_drawing_character(c: char) -> bool { 8 | ('\u{2500}'..='\u{257F}').contains(&c) || ('\u{2190}'..='\u{2193}').contains(&c) 9 | } 10 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod frontend; 2 | #[macro_use] 3 | pub mod logging; 4 | -------------------------------------------------------------------------------- /src/logging/mod.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for logging debug data. 2 | 3 | use std::{ 4 | cell::RefCell, 5 | collections::HashMap, 6 | io, 7 | path::{Path, PathBuf}, 8 | sync::{ 9 | OnceLock, 10 | atomic::{AtomicBool, AtomicU64, Ordering}, 11 | }, 12 | time::{Duration, Instant}, 13 | }; 14 | 15 | thread_local! { 16 | static LOGS: RefCell> = RefCell::new(HashMap::new()); 17 | } 18 | 19 | static ENABLE: AtomicBool = const { AtomicBool::new(false) }; 20 | static COUNTER: AtomicU64 = const { AtomicU64::new(0) }; 21 | static PATH: OnceLock = OnceLock::new(); 22 | 23 | /// Measures the amount of time it takes to execute the function passed as a 24 | /// parameter. 25 | pub fn time(f: F) -> (R, Duration) 26 | where 27 | F: FnOnce() -> R, 28 | { 29 | let start = Instant::now(); 30 | let r = f(); 31 | (r, start.elapsed()) 32 | } 33 | 34 | /// Formats time for printing purposes. 35 | pub fn format_time(t: &Duration) -> String { 36 | let nanos = t.as_nanos(); 37 | 38 | if nanos == 0 { 39 | return "0ns".to_string(); 40 | } 41 | 42 | let log10 = nanos.ilog10(); 43 | 44 | if log10 < 3 { 45 | format!("{}ns", nanos) 46 | } else if log10 < 6 { 47 | format!("{}µs", nanos / 1000) 48 | } else if log10 < 9 { 49 | format!("{}ms", nanos / 1_000_000) 50 | } else { 51 | format!("{}s", nanos / 1_000_000_000) 52 | } 53 | } 54 | 55 | /// Enables logging. 56 | pub fn enable() { 57 | ENABLE.store(true, Ordering::SeqCst); 58 | } 59 | 60 | fn path() -> &'static Path { 61 | PATH.get_or_init(|| { 62 | let path = PathBuf::from("./logs"); 63 | if !std::fs::exists(&path).unwrap() { 64 | std::fs::create_dir(&path).unwrap(); 65 | } 66 | path 67 | }) 68 | } 69 | 70 | /// A structured Markdown log for debugging purposes. 71 | #[derive(Default)] 72 | pub struct Log { 73 | pub inner: String, 74 | } 75 | 76 | pub fn with_global_log(name: &'static str, f: F) 77 | where 78 | F: FnOnce(&mut Log), 79 | { 80 | if ENABLE.load(Ordering::SeqCst) { 81 | LOGS.with_borrow_mut(|h| f(h.entry(name).or_default())) 82 | } 83 | } 84 | 85 | #[macro_export] 86 | macro_rules! log { 87 | ($n:literal, h1, $($args:expr),*) => {{ 88 | $crate::logging::with_global_log($n, |l| { 89 | l.inner 90 | .push_str(&::std::format!("# {}\n", ::std::format!($($args),*))) 91 | }) 92 | }}; 93 | ($n:literal, h2, $($args:expr),*) => {{ 94 | $crate::logging::with_global_log($n, |l| { 95 | l.inner 96 | .push_str(&::std::format!("## {}\n", ::std::format!($($args),*))) 97 | }) 98 | }}; 99 | ($n:literal, h3, $($args:expr),*) => {{ 100 | $crate::logging::with_global_log($n, |l| { 101 | l.inner 102 | .push_str(&::std::format!("### {}\n", ::std::format!($($args),*))) 103 | }) 104 | }}; 105 | ($n:literal, p, $($args:expr),*) => {{ 106 | $crate::logging::with_global_log($n, |l| { 107 | l.inner 108 | .push_str(&::std::format!("{}\n\n", ::std::format!($($args),*))) 109 | }) 110 | }}; 111 | ($n:literal, code, $($args:expr),*) => {{ 112 | $crate::logging::with_global_log($n, |l| { 113 | l.inner 114 | .push_str(&::std::format!("```\n{}\n```\n", ::std::format!($($args),*))) 115 | }) 116 | }}; 117 | ($n:literal, err, $($args:expr),*) => {{ 118 | $crate::logging::with_global_log($n, |l| { 119 | let s = ::std::format!($($args),*); 120 | l.inner.push_str("> ❌ Error\n"); 121 | for line in s.lines() { 122 | l.inner.push_str(&::std::format!("> {}\n", line)) 123 | } 124 | l.inner.push('\n'); 125 | }) 126 | }}; 127 | ($n:literal, finish) => { 128 | $crate::logging::finish($n).unwrap(); 129 | } 130 | } 131 | 132 | /// Write the document to the corresponding file. 133 | pub fn finish(name: &'static str) -> Result<(), io::Error> { 134 | LOGS.with_borrow_mut(|h| { 135 | if let Some(log) = h.remove(name) { 136 | let counter = COUNTER.fetch_add(1, Ordering::SeqCst); 137 | let mut path = path().to_path_buf(); 138 | path.push(PathBuf::from(format!("{counter}_{name}.md"))); 139 | 140 | std::fs::write(path, &log.inner) 141 | } else { 142 | Ok(()) 143 | } 144 | }) 145 | } 146 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | 3 | use std::path::PathBuf; 4 | 5 | use bauge::frontend::lexer::CharacterMap; 6 | 7 | #[derive(Parser)] 8 | #[command(version, about, long_about = None)] 9 | struct Args { 10 | /// Enables the output of log files in the ./logs directory. 11 | #[arg(long)] 12 | logging: bool, 13 | 14 | /// The Bauge source file to compile. 15 | file: PathBuf, 16 | } 17 | 18 | fn main() { 19 | let args = Args::parse(); 20 | 21 | if args.logging { 22 | bauge::logging::enable(); 23 | } 24 | 25 | let map: CharacterMap = std::fs::read_to_string(args.file) 26 | .unwrap() 27 | .lines() 28 | .collect(); 29 | 30 | let tokens = match map.tokenize() { 31 | Ok(t) => t, 32 | Err(e) => { 33 | eprintln!("{e}"); 34 | return; 35 | } 36 | }; 37 | 38 | println!("Tokenized to {} tokens.", tokens.len()); 39 | } 40 | --------------------------------------------------------------------------------