├── .gitignore ├── LICENSE ├── README.md ├── dates └── parser.odin ├── example.toml ├── main.odin ├── misc.odin ├── mod.pkg ├── parser.odin ├── tests └── test.odin ├── tokenizer.odin ├── toml.odin └── validator.odin /.gitignore: -------------------------------------------------------------------------------- 1 | testing 2 | compile.sh 3 | ols.json 4 | /toml_parser 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Ult1 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TOML parser 2 | 3 | A TOML parser for odin-lang. 4 | 5 | # Example 6 | 7 | ```Odin 8 | import "toml" 9 | import "toml/dates" 10 | 11 | main :: proc() { 12 | using toml 13 | 14 | section, err1 := parse_file("toml/example.toml", context.temp_allocator) 15 | default, err2 := parse(#load("toml/example.toml"), "example.toml", context.temp_allocator) 16 | 17 | if print_error(err2) do return 18 | print_error(err1) 19 | 20 | print_table(section) 21 | 22 | inf := get_f64(section, "infinity") or_else get_f64_panic(default, "infinity") 23 | num := get(i64, section, "num") or_else 5 24 | 25 | str := get(string, section, "multiline_str") or_else "bad" 26 | 27 | date := get_date(section, "letsnot", "k", "l", "m", "n") or_else dates.Date {} 28 | 29 | list := get_panic(^List, section, "o", "p") 30 | 31 | } 32 | ``` 33 | 34 | # Installation 35 | 36 | Simply, 37 | ```nix 38 | cd your_project 39 | git clone github.com/Up05/toml_parser toml 40 | ``` 41 | And `import "toml"` 42 | 43 | # Design/Idiom idea 44 | 45 | Although, you can simply use `or_else` or just `val, ok := get(...`. I propose, that one could: 46 | 1. load a configuration at runtime, by using `parse_file` 47 | 2. load their configuration at compile time by using `parse_data(#load(same_file), "filename.toml")` 48 | 3. first get a value from the runtime config by using `get` then, if need be, (via `or_else`) fallback to the compile-time config and use `get_panic`. 49 | 50 | # Function reference 51 | 52 | ## Errors 53 | ```odin 54 | format_error :: proc(err: Error, allocator := context.temp_allocator) -> (message: string, fatal: bool) 55 | ``` 56 | Format's the error and returns it as well as whether it was fatal or not. 57 | 58 | ```odin 59 | print_error :: proc(err: Error) -> (fatal: bool) 60 | ``` 61 | Format's and prints the specified error to `stdout`. May use `format_error` to only get the error message. 62 | 63 | ## Parsing 64 | 65 | ```odin 66 | parse_file :: proc(filename: string, allocator := context.allocator) -> (section: ^Table, err: Error) 67 | ``` 68 | Parses the specified toml file. Returns the root table & an error, which can then be nicely printed with `print_error`. 69 | 70 | ```odin 71 | parse_data :: proc(data: []u8, original_filename := "untitled data", allocator := context.allocator) -> (section: ^Table, err: Error) 72 | ``` 73 | Parses the given data. Is meant to be used with `#load(file)`. 74 | 75 | ```odin 76 | parse :: proc(data: string, original_file: string, allocator := context.allocator) -> (tokens: ^Table, err: Error) 77 | ``` 78 | Parses the TOML in a string. Underlying function called by `parse_data` and `parse_file`. 79 | 80 | ## Getting the values 81 | 82 | ```odin 83 | get :: proc($T: typeid, section: ^Table, path: ..string) -> (val: T, ok: bool) // where T is in Type union 84 | ``` 85 | Retrieves and type checks the value at path. **Careful, path is not specified by dots!** 86 | Works on any table. 87 | 88 | ```odin 89 | get_panic :: proc($T: typeid, section: ^Table, path: ..string) -> T // where T is in Type union 90 | ``` 91 | Retrieves and type checks the value at path. **Careful, path is not specified by dots!** 92 | Works on any table. Crashes if not ok. 93 | 94 | There are also `get_` & `get__panic` functions for all possible types in the `Type` union. 95 | Here are the variants: `{ table, list, string, bool, i64, f64, date }` 96 | 97 | ## Printing 98 | 99 | Generally replaced by `fmt.print` and `fmt.printf("%#v\n", ...`. 100 | ```odin 101 | print_table :: proc(section: ^Table, level := 0) 102 | ``` 103 | A while back Odin used to hang when printing a map pointer. 104 | I'm pretty sure it does not anymore. 105 | 106 | ```odin 107 | print_value :: proc(v: Type, level := 0) 108 | ``` 109 | 110 | ## Freeing memory 111 | 112 | ```odin 113 | deep_delete :: proc(type: Type, allocator := context.allocator) -> (err: runtime.Allocator_Error) 114 | ``` 115 | Recursively frees parser's output 116 | 117 | ```odin 118 | delete_error :: proc(err: ^Error) 119 | ``` 120 | Simply, frees the error. 121 | *Filename is not freed, because the parser only slices it.* 122 | 123 | ## Testing (internal) 124 | 125 | ```odin 126 | @private 127 | main :: proc() 128 | ``` 129 | This is here for `toml-test`. It takes in the TOML from `stdin`, parses it, marshal's it to JSON and prints the JSON to stdout. 130 | Unless there was an error, in which case the program does not print anything and only exits with exit code `1`. 131 | 132 | *Some tests fail because of how odin formats floats & non-printable characters, cba to fix that and it doesn't matter.* 133 | 134 | # Files 135 | 136 | ```sh 137 | main.odin # an internal file for testing 138 | toml.odin # the main user-facing file 139 | misc.odin # a couple miscellaneous functions 140 | 141 | tokenizer.odin # rips text apart by space and special symbols (string -> [] string) 142 | validator.odin # checks whether given TOML is valid or not ([] string -> Error?) 143 | parser.odin # parses tokens into the recursive Type union ([] string -> Type) 144 | 145 | tests/ # odin core:testing tests (currently, there is 1...) 146 | dates/ # my small RFC3339 date parsing library 147 | mod.pkg # package info for the odin package website (can't find it right now...) 148 | ``` 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /dates/parser.odin: -------------------------------------------------------------------------------- 1 | package dates 2 | 3 | import "core:fmt" 4 | import "core:math" 5 | import "core:slice" 6 | import "core:strconv" 7 | import "core:strings" 8 | 9 | DateError :: enum { 10 | NONE, 11 | 12 | // Where parser REALIZED something is wrong! 13 | FAILED_AT_YEAR, 14 | FAILED_AT_MONTH, 15 | FAILED_AT_DAY, 16 | FAILED_AT_HOUR, 17 | FAILED_AT_MINUTE, 18 | FAILED_AT_SECOND, 19 | FAILED_AT_OFFSET_HOUR, 20 | FAILED_AT_OFFSET_MINUTE, 21 | YEAR_OUT_OF_BOUNDS, 22 | MONTH_OUT_OF_BOUNDS, // 01-12 23 | DAY_OUT_OF_BOUNDS, 24 | HOUR_OUT_OF_BOUNDS, 25 | MINUTE_OUT_OF_BOUNDS, 26 | SECOND_OUT_OF_BOUNDS, 27 | OFFSET_HOUR_OUT_OF_BOUNDS, 28 | OFFSET_MINUTE_OUT_OF_BOUNDS, 29 | FAILED_AT_TIME_SEPERATOR, // character seperating full-date & full-time isn't in variable "time_separators" 30 | } 31 | 32 | // may be overwritten. Set to empty array to accept any time seperator 33 | time_separators: []string = {"t", "T", " "} 34 | offset_separators: []string = {"z", "Z", "+", "-"} 35 | 36 | Date :: struct { 37 | second: f32, 38 | is_date_local: bool, 39 | is_time_only : bool, 40 | is_date_only : bool, 41 | 42 | year, month, day: int, 43 | hour, minute: int, 44 | offset_hour: int, 45 | offset_minute: int, 46 | } 47 | 48 | from_string :: proc(date: string) -> (out: Date, err: DateError) { 49 | date := date 50 | 51 | out.is_date_only = true 52 | out.is_time_only = true 53 | 54 | ok: bool 55 | 56 | // ############################## D A T E ############################## 57 | 58 | // Because there has to be a leading zero 59 | if date[4:5] == "-" { 60 | out.is_time_only = false 61 | out.year = parse_int2(date[0:4], .FAILED_AT_YEAR) or_return 62 | 63 | out.month = parse_int2(date[5:7], .FAILED_AT_MONTH) or_return 64 | if !between(out.month, 1, 12) do return out, .MONTH_OUT_OF_BOUNDS 65 | 66 | out.day = parse_int2(date[8:10], .FAILED_AT_DAY) or_return 67 | if !between(out.day, 1, days_in_month(out.year, out.month)) do return out, .DAY_OUT_OF_BOUNDS 68 | 69 | if len(date) > 10 { 70 | if !(len(time_separators) == 0 || 71 | slice.any_of(time_separators, date[10:11])) { 72 | return out, .FAILED_AT_TIME_SEPERATOR 73 | } 74 | 75 | date = date[11:] 76 | } 77 | } 78 | 79 | // ############################## T I M E ############################## 80 | 81 | if len(date) >= 8 && date[2] == ':' { 82 | out.is_date_only = false 83 | out.hour = parse_int2(date[0:2], .FAILED_AT_HOUR) or_return 84 | if !between(out.hour, 0, 23) do return out, .HOUR_OUT_OF_BOUNDS 85 | 86 | out.minute = parse_int2(date[3:5], .FAILED_AT_MINUTE) or_return 87 | if !between(out.minute, 0, 59) do return out, .MINUTE_OUT_OF_BOUNDS 88 | 89 | date = date[6:] // because of "-" 90 | offset, _ := strings.index_multi(date, offset_separators) 91 | 92 | out.second, ok = strconv.parse_f32( 93 | date[:offset if offset != -1 else len(date)], 94 | ) 95 | if !ok do return out, .FAILED_AT_SECOND 96 | // seconds \in [00, 60], because of leap seconds 97 | if !between(int(out.second), 0, 60) do return out, .SECOND_OUT_OF_BOUNDS 98 | 99 | if offset != -1 { 100 | date = date[offset:] 101 | // fine to have lowercase here, because it wouldn't have been detected otherwise 102 | if strings.to_lower(date[:1]) == "z" do return 103 | 104 | out.offset_hour = parse_int2( 105 | date[1:3], 106 | .FAILED_AT_OFFSET_HOUR, 107 | ) or_return 108 | if !between(out.offset_hour, 0, 23) do return out, .OFFSET_HOUR_OUT_OF_BOUNDS 109 | 110 | out.offset_minute = parse_int2( 111 | date[4:6], 112 | .FAILED_AT_OFFSET_MINUTE, 113 | ) or_return 114 | if !between(out.offset_minute, 0, 59) do return out, .OFFSET_MINUTE_OUT_OF_BOUNDS 115 | 116 | if date[:1] == "-" { 117 | out.offset_hour *= -1 118 | out.offset_minute *= -1 119 | } 120 | 121 | } else { 122 | out.is_date_local = true 123 | } 124 | } 125 | 126 | return 127 | } 128 | 129 | to_string :: proc( 130 | date: Date, 131 | time_sep := ' ', 132 | ) -> ( 133 | out: string, 134 | err: DateError, 135 | ) { 136 | date := date 137 | 138 | { 139 | using date 140 | if !between(year, 0, 9999) do return "", .YEAR_OUT_OF_BOUNDS 141 | if !between(month, 0, 12) do return "", .MONTH_OUT_OF_BOUNDS 142 | if !between(day, 0, days_in_month(year, month)) do return "", .DAY_OUT_OF_BOUNDS 143 | if !between(hour, 0, 23) do return "", .HOUR_OUT_OF_BOUNDS 144 | if !between(minute, 0, 59) do return "", .MINUTE_OUT_OF_BOUNDS 145 | if !between(int(second), 0, 60) do return "", .SECOND_OUT_OF_BOUNDS 146 | if !between(offset_hour, -23, 23) do return "", .OFFSET_HOUR_OUT_OF_BOUNDS 147 | if !between(offset_minute, -59, 59) do return "", .OFFSET_MINUTE_OUT_OF_BOUNDS 148 | } 149 | 150 | b: strings.Builder 151 | strings.builder_init_len_cap(&b, 0, 25) 152 | 153 | fmt.sbprintf(&b, "%04d-%02d-%02d", date.year, date.month, date.day) 154 | strings.write_rune(&b, time_sep) 155 | fmt.sbprintf(&b, "%02d:%02d:%02.0f", date.hour, date.minute, date.second) 156 | 157 | if date.offset_hour == 0 && date.offset_minute == 0 do strings.write_rune(&b, 'Z') 158 | else { 159 | if date.offset_minute != 0 && sign(date.offset_hour) != sign(date.offset_minute) { 160 | date.offset_hour += sign(date.offset_minute) 161 | date.offset_minute = 60 - abs(date.offset_minute) // sign doesn't matter, because later prints the abs of date.offset_minute 162 | fmt.printf("DATE PARSER WARNING: signs of your Date.offset_hour & Date.offset_minute do not match! " + "Given dates will be safely converted, but may be unexpected. " + "Go to line: %d in: %s to find out more.\n", #line - 5, #file) 163 | } 164 | 165 | if date.offset_hour < 0 do strings.write_rune(&b, '-') 166 | else do strings.write_rune(&b, '+') 167 | 168 | fmt.sbprintf(&b, "%02d:%02d", abs(date.offset_hour), abs(date.offset_minute)) 169 | } 170 | 171 | return strings.to_string(b), .NONE 172 | } 173 | 174 | partial_date_to_string :: proc(date: Date, time_sep := ' ',) -> (out: string, err: DateError) { 175 | date := date 176 | { 177 | using date 178 | if !between(year, 0, 9999) do return "", .YEAR_OUT_OF_BOUNDS 179 | if !between(month, 0, 12) do return "", .MONTH_OUT_OF_BOUNDS 180 | if !between(day, 0, days_in_month(year, month)) do return "", .DAY_OUT_OF_BOUNDS 181 | if !between(hour, 0, 23) do return "", .HOUR_OUT_OF_BOUNDS 182 | if !between(minute, 0, 59) do return "", .MINUTE_OUT_OF_BOUNDS 183 | if !between(int(second), 0, 60) do return "", .SECOND_OUT_OF_BOUNDS 184 | if !between(offset_hour, -23, 23) do return "", .OFFSET_HOUR_OUT_OF_BOUNDS 185 | if !between(offset_minute, -59, 59) do return "", .OFFSET_MINUTE_OUT_OF_BOUNDS 186 | } 187 | 188 | b: strings.Builder 189 | strings.builder_init_len_cap(&b, 0, 25) 190 | 191 | _, frac := math.modf_f32(date.second) 192 | timefmt := "%02d:%02d:%02.0f" 193 | if frac > 0 do timefmt = "%02d:%02d:%06.03f" 194 | 195 | if date.is_date_only { 196 | fmt.sbprintf(&b, "%04d-%02d-%02d", date.year, date.month, date.day) 197 | return strings.to_string(b), .NONE 198 | } 199 | if date.is_time_only { 200 | fmt.sbprintf(&b, timefmt, date.hour, date.minute, date.second) 201 | return strings.to_string(b), .NONE 202 | } 203 | 204 | fmt.sbprintf(&b, "%04d-%02d-%02d", date.year, date.month, date.day) 205 | strings.write_rune(&b, time_sep) 206 | fmt.sbprintf(&b, timefmt, date.hour, date.minute, date.second) 207 | 208 | if date.is_date_local do return strings.to_string(b), .NONE 209 | 210 | if date.offset_hour == 0 && date.offset_minute == 0 do strings.write_rune(&b, 'Z') 211 | else { 212 | if date.offset_minute != 0 && sign(date.offset_hour) != sign(date.offset_minute) { 213 | date.offset_hour += sign(date.offset_minute) 214 | date.offset_minute = 60 - abs(date.offset_minute) // sign doesn't matter, because later prints the abs of date.offset_minute 215 | fmt.printf("DATE PARSER WARNING: signs of your Date.offset_hour & Date.offset_minute do not match! " + "Given dates will be safely converted, but may be unexpected. " + "Go to line: %d in: %s to find out more.\n", #line - 5, #file) 216 | } 217 | 218 | if date.offset_hour < 0 do strings.write_rune(&b, '-') 219 | else do strings.write_rune(&b, '+') 220 | 221 | fmt.sbprintf(&b, "%02d:%02d", abs(date.offset_hour), abs(date.offset_minute)) 222 | } 223 | 224 | return strings.to_string(b), .NONE 225 | } 226 | 227 | 228 | // I don't need to test for both the date & the time 229 | is_date_lax :: proc(date: string) -> bool { 230 | is_date := true 231 | is_time := true 232 | 233 | if len(date) >= 10 { 234 | is_date &= are_all_numbers(date[0:4]) 235 | is_date &= are_all_numbers(date[5:7]) 236 | is_date &= are_all_numbers(date[8:10]) 237 | is_date &= date[4] == '-' && date[7] == '-' 238 | } else do is_date = false 239 | 240 | if !is_date && len(date) >= 8 { 241 | is_time &= are_all_numbers(date[0:2]) 242 | is_time &= are_all_numbers(date[3:5]) 243 | is_time &= are_all_numbers(date[6:8]) 244 | is_time &= date[2] == ':' && date[5] == ':' 245 | } else do is_time = false 246 | 247 | return is_date || is_time 248 | } 249 | 250 | @(private) 251 | are_all_numbers :: proc(s: string) -> (out: bool) { 252 | out = true 253 | for r in s { 254 | if r < '0' || r > '9' do out = false 255 | } 256 | return 257 | } 258 | 259 | // odin doesn't have a sign_int??? 260 | @(private) 261 | sign :: proc(#any_int a: int) -> int { 262 | return -1 if a < 0 else 1 if a > 0 else 0 263 | } 264 | 265 | // kind of a misnomer, but whatever. 266 | @(private) 267 | parse_int :: proc(num: string) -> (int, bool) { 268 | num, ok := strconv.parse_uint(num, 10) 269 | return int(num), ok 270 | } 271 | 272 | @(private) 273 | parse_int2 :: proc(num: string, potential: DateError) -> (int, DateError) { 274 | num, ok := strconv.parse_uint(num, 10) 275 | return int(num), nil if ok else potential 276 | } 277 | 278 | @(private) 279 | between :: proc(a, lo, hi: int) -> bool { 280 | return a >= lo && a <= hi 281 | } 282 | 283 | 284 | @(private) 285 | days_in_month :: proc(year: int, month: int) -> int { 286 | if slice.any_of([]int{1, 3, 5, 7, 8, 10, 12}, month) do return 31 287 | if slice.any_of([]int{4, 6, 9, 11}, month) do return 30 288 | // just February left 289 | if leap_year(year) do return 29 290 | return 28 291 | } 292 | 293 | @(private) 294 | leap_year :: proc(year: int) -> bool { 295 | return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) 296 | } 297 | -------------------------------------------------------------------------------- /example.toml: -------------------------------------------------------------------------------- 1 | # example.toml 2 | # example.toml 3 | 4 | integer = 5 5 | num = 123.5 6 | infinity = inf 7 | # unicode chars need chcp 65001 & stuff 8 | multiline_str = """ 9 | a 10 | b c \u2230 11 | """ 12 | a.b = "dotted.tables" 13 | c = { d = "inline tables" } 14 | [e.f] 15 | g = "useful tables" 16 | [[h.i]] 17 | j = "lists of tables" 18 | [letsnot] 19 | k.l.m.n = 2024-06-07T20:00:00.12+02:00 20 | 21 | [o] 22 | p = [ 1, [ 2, 3 ], 4] 23 | -------------------------------------------------------------------------------- /main.odin: -------------------------------------------------------------------------------- 1 | package toml 2 | 3 | /* 4 | 5 | This file is for testing. It should be ignored by library users. 6 | 7 | For contributors: 8 | I have integrated these tests: 9 | https://github.com/toml-lang/toml-test 10 | 11 | To get them please download/build release 1.5.0: 12 | $ go install github.com/toml-lang/toml-test/cmd/toml-test@v1.5.0 13 | 14 | To do so with shell: 15 | $ export $GOBIN="/tmp" 16 | $ go install github.com/toml-lang/toml-test/cmd/toml-test@v1.5.0 17 | $ odin build . 18 | $ /tmp/toml-test 19 | 20 | Also, big thanks to tgolsson for suggesting this project 21 | and arp242 for actually making the tests! 22 | 23 | */ 24 | 25 | import "core:fmt" 26 | import "core:os" 27 | import "core:encoding/json" 28 | import "dates" 29 | 30 | // import "core:testing" 31 | 32 | exit :: os.exit 33 | 34 | @(private) 35 | main :: proc() { 36 | 37 | // parse_file("testing/current.toml") 38 | 39 | data := make([] u8, 16 * 1024 * 1024) 40 | count, err_read := os.read(os.stdin, data) 41 | assert(err_read == nil) 42 | 43 | table, err := parse(string(data[:count]), "") 44 | 45 | if err.type != .None { print_error(err); os.exit(1) } 46 | 47 | idk, ok := marshal(table) 48 | if !ok do return 49 | json, _ := json.marshal(idk) 50 | logln(string(json)) 51 | 52 | // for the valid/key/quoted-unicode test 53 | // for k, v in table^ { 54 | // logln(k, "=", v) 55 | // } 56 | 57 | deep_delete(table) 58 | // delete_error(&err) 59 | 60 | } 61 | 62 | // @test 63 | // memory_test :: proc(t: ^testing.T) { 64 | // data := ` 65 | // [["valid/key/dotted-4.toml-20".arr]] 66 | // ["valid/key/dotted-4.toml-20".arr.a] 67 | // ` 68 | // 69 | // table, err := parse(string(data), "") 70 | // 71 | // if any_of("--print-errors", ..os.args) && err.type != .None { logln(err); print_error(err) } 72 | // if err.type != .None do os.exit(1) 73 | // 74 | // logln(deep_delete(table)) 75 | // delete_error(&err) 76 | // } 77 | 78 | // Dunno what to really call this... 79 | @(private="file") 80 | TestingType :: struct { 81 | type: string, 82 | value: union { 83 | map [string] HelpMePlease, 84 | [] HelpMePlease, 85 | string, 86 | bool, 87 | i64, 88 | f64, 89 | } 90 | } 91 | 92 | @(private="file") 93 | HelpMePlease :: union { 94 | TestingType, 95 | map [string] HelpMePlease, 96 | [] HelpMePlease 97 | } 98 | 99 | @(private="file") 100 | marshal :: proc(input: Type) -> (result: HelpMePlease, ok: bool) { 101 | output: TestingType 102 | 103 | switch value in input { 104 | case nil: assert(false) 105 | case ^List: 106 | if value == nil do return result, false 107 | out := make([] HelpMePlease, len(value)) 108 | for v, i in value { out[i] = marshal(v) or_continue } 109 | return out, true 110 | 111 | case ^Table: 112 | if value == nil do return result, false 113 | out := make(map [string] HelpMePlease) 114 | for k, v in value { out[k] = marshal(v) or_continue } 115 | return out, true 116 | 117 | case string: output = { type = "string", value = value }; 118 | case bool: output = { type = "bool", value = fmt.aprint(value) }; 119 | case i64: output = { type = "integer", value = fmt.aprint(value) }; 120 | case f64: output = { type = "float", value = fmt.aprint(value) }; 121 | 122 | case dates.Date: 123 | result, err := dates.partial_date_to_string(date = value, time_sep = 'T') 124 | if err != .NONE do os.exit(1) // I shouldn't do this like that... 125 | 126 | date := value 127 | if date.is_time_only { 128 | output.type = "time-local" 129 | } else if date.is_date_only { 130 | output.type = "date-local" 131 | } else if date.is_date_local { 132 | output.type = "datetime-local" 133 | } else { 134 | output.type = "datetime" 135 | } 136 | output.value = result 137 | } 138 | 139 | return output, true 140 | } 141 | 142 | -------------------------------------------------------------------------------- /misc.odin: -------------------------------------------------------------------------------- 1 | package toml 2 | 3 | import "core:fmt" 4 | import "core:strings" 5 | import "core:strconv" 6 | import "core:unicode/utf8" 7 | 8 | @private 9 | find_newline :: proc(raw: string) -> (bytes: int, runes: int) { 10 | for r, i in raw { 11 | defer runes += 1 12 | if r == '\r' || r == '\n' do return i, runes 13 | } 14 | return -1, -1 15 | } 16 | 17 | @private 18 | shorten_string :: proc(s: string, limit: int, or_newline := true) -> string { 19 | min :: proc(a, b: int) -> int { 20 | return a if a < b else b 21 | } 22 | 23 | newline, _ := find_newline(s) // add another line if you are using (..MAC OS 9) here... fuck it. 24 | if newline == -1 do newline = len(s) 25 | 26 | if limit < len(s) || newline < len(s) { 27 | return fmt.aprint(s[:min(limit, newline)], "...") 28 | } 29 | 30 | return s 31 | } 32 | 33 | // when literal is true, function JUST returns str 34 | @private 35 | cleanup_backslashes :: proc(str: string, literal := false) -> (result: string, err: Error) { 36 | str := strings.clone(str) 37 | if literal do return str, err 38 | 39 | set_err :: proc(err: ^Error, type: ErrorType, more_fmt: string, more_args: ..any) { 40 | err.type = type 41 | b_printf(&err.more, more_fmt, ..more_args) 42 | } 43 | 44 | using strings 45 | b: Builder 46 | // defer builder_destroy(&b) // don't need to, shouldn't even free the original str here 47 | 48 | to_skip := 0 49 | 50 | last: rune 51 | escaped: bool 52 | for r, i in str { 53 | 54 | if to_skip > 0 { 55 | to_skip -= 1 56 | continue 57 | } 58 | // basically, if last == '\\' { 59 | if escaped { 60 | escaped = false 61 | 62 | switch r { 63 | case 'u': // for \uXXXX 64 | if len(str) < i + 5 { 65 | set_err(&err, .Bad_Unicode_Char, "'\\u' does most have hex 4 digits after it in string:", str) 66 | return str, err 67 | } 68 | 69 | code, ok := strconv.parse_u64(str[i + 1: i + 5], 16) 70 | buf, bytes := toml_ucs_to_utf8(code) 71 | 72 | if bytes == -1 { 73 | set_err(&err, .Bad_Unicode_Char, "'%s'", str[i + 1:i + 5]) 74 | return str, err 75 | } 76 | 77 | parsed_rune, _ := utf8.decode_rune_in_bytes(buf[:bytes]) 78 | 79 | write_rune(&b, parsed_rune) 80 | to_skip = 4 81 | 82 | case 'U': // for \UXXXXXXXX 83 | if len(str) < i + 9 { 84 | set_err(&err, .Bad_Unicode_Char, "'\\U' does most have hex 8 digits after it in string:", str) 85 | return str, err 86 | } 87 | code, ok := strconv.parse_u64(str[i + 1:i + 9], 16) 88 | buf, bytes := toml_ucs_to_utf8(code) 89 | 90 | if bytes == -1 { 91 | set_err(&err, .Bad_Unicode_Char, "'%s'", str[i + 1:i + 9]) 92 | return str, err 93 | } 94 | 95 | parsed_rune, _ := utf8.decode_rune_in_bytes(buf[:bytes]) 96 | 97 | write_rune(&b, parsed_rune) 98 | to_skip = 8 99 | 100 | case 'x': 101 | set_err(&err, .Bad_Unicode_Char, "\\xXX is not in the spec, you can just use \\u00XX instead.") 102 | return str, err 103 | 104 | case 'n' : write_byte(&b, '\n') 105 | case 'r' : write_byte(&b, '\r') 106 | case 't' : write_byte(&b, '\t') 107 | case 'b' : write_byte(&b, '\b') 108 | case 'f' : write_byte(&b, '\f') 109 | case '\\': write_byte(&b, '\\') 110 | case '"' : write_byte(&b, '"') 111 | case '\'': write_byte(&b, '\'') 112 | case ' ', '\t', '\r', '\n': 113 | // if (r == ' ' || r == '\t') && len(str) > i + 1 && (str[i + 1] != '\n' || str[i + 1] != '\r') { 114 | // err.type = .Bad_Unicode_Char 115 | // err.more = "cannot escape space in the middle of the line." 116 | // } 117 | // if len(str) == i + 1 { 118 | // err.type = .Bad_Unicode_Char 119 | // err.more = "Cannot escape space/new line when it is the last character" 120 | // } 121 | 122 | // Fun thing for multiline line string line escaping. 123 | for r in str[i + 1:] { 124 | if r == ' ' || r == '\t' || r == '\r' || r == '\n' do to_skip += 1 125 | else do break 126 | } 127 | case: 128 | set_err(&err, .Bad_Unicode_Char, "Unexpected escape sequence found."); 129 | return str, err 130 | } 131 | } else if r != '\\' { 132 | write_rune(&b, r) 133 | } else { 134 | escaped = true 135 | } 136 | 137 | last = r 138 | } 139 | delete_string(str) 140 | defer b_destroy(&b) // you can't free a builder that has been cast to string 141 | return strings.clone(to_string(b)), err 142 | } 143 | 144 | @private 145 | any_of :: proc(a: $T, B: ..T) -> bool { 146 | for b in B do if a == b do return true 147 | return false 148 | } 149 | 150 | @private 151 | is_space :: proc(r: u8) -> bool { 152 | SPACE : [4] u8 = { ' ', '\r', '\n', '\t' } 153 | return r == SPACE[0] || r == SPACE[1] || r == SPACE[2] || r == SPACE[3] 154 | // Nudge nudge 155 | } 156 | 157 | @private 158 | is_special :: proc(r: u8) -> bool { 159 | SPECIAL : [8] u8 = { '=', ',', '.', '[', ']', '{', '}', 0 } 160 | return r == SPECIAL[0] || r == SPECIAL[1] || r == SPECIAL[2] || r == SPECIAL[3] || 161 | r == SPECIAL[4] || r == SPECIAL[5] || r == SPECIAL[6] || r == SPECIAL[7] 162 | // Shove shove 163 | } 164 | 165 | @private 166 | is_digit :: proc(r: rune, base: int) -> bool { 167 | switch base { 168 | case 16: return (r >= '0' && r <= '9') || (r >= 'A' && r <= 'F') || (r >= 'a' && r <= 'f') 169 | case 10: return r >= '0' && r <= '9' 170 | case 8: return r >= '0' && r <= '7' 171 | case 2: return r >= '0' && r <= '1' 172 | } 173 | assert(false, "Only bases: 16, 10, 8 and 2 are supported in TOML") 174 | return false 175 | } 176 | 177 | @private 178 | between_any :: proc(a: rune, b: ..rune) -> bool { 179 | assert(len(b) % 2 == 0) 180 | for i := 0; i < len(b); i += 2 { 181 | if a >= b[i] && a <= b[i + 1] do return true 182 | } 183 | return false 184 | } 185 | 186 | @(private) 187 | get_quote_count :: proc(a: string) -> int { 188 | s := len(a) 189 | if s > 2 && 190 | ((a[:3] == "\"\"\"" && a[s-3:] == "\"\"\"" ) || 191 | (a[:3] == "'''" && a[s-3:] == "'''")) { return 3 } 192 | 193 | if s > 0 && 194 | ((a[:1] == "\"" && a[s-1:] == "\"") || 195 | (a[:1] == "'" && a[s-1:] == "'")) { return 1 } 196 | 197 | return 0 198 | } 199 | 200 | @(private) 201 | unquote :: proc(a: string, fluff: ..any) -> (result: string, err: Error) { 202 | qcount := get_quote_count(a) 203 | 204 | if qcount == 3 { 205 | first: rune 206 | count: int 207 | #reverse for r, i in a { 208 | if i < 3 do break 209 | if first == 0 do first = r 210 | if r == first do count = count + 1 211 | else if r == '\\' do count -= 1 212 | else do break 213 | } 214 | if count != 3 && count % 3 == 0 { 215 | err.type = .Bad_Value 216 | b_write_string(&err.more, "The quote count in multiline string is divisible by 3. Lol, get fucked!") 217 | return a, err 218 | } 219 | } 220 | 221 | unquoted := a[qcount:len(a) - qcount] 222 | if len(unquoted) > 0 && unquoted[0] == '\n' do unquoted = unquoted[1:] 223 | return cleanup_backslashes(unquoted, a[0] == '\'') 224 | } 225 | 226 | @(private) 227 | starts_with :: proc(a, b: string) -> bool { 228 | return len(a) >= len(b) && a[:len(b)] == b 229 | } 230 | 231 | @(private) 232 | ends_with :: proc(a, b: string) -> bool { 233 | return len(a) >= len(b) && a[len(a) - len(b):] == b 234 | } 235 | 236 | // case-insensitive compare 237 | @private 238 | eq :: proc(a, b: string) -> bool { 239 | if len(a) != len(b) do return false 240 | #no_bounds_check for i in 0..= 'a' && r1 <= 'z') 245 | B := r2 - 32*u8(r2 >= 'a' && r2 <= 'z') 246 | if A != B do return false 247 | } 248 | return true 249 | } 250 | 251 | @private 252 | is_list :: proc(t: Type) -> bool { 253 | _, is_list := t.(^List); 254 | return is_list 255 | 256 | } 257 | 258 | // // from: https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c 259 | // is_rune_valid :: proc(r: rune) -> bool { 260 | // // if !utf8.valid_rune(r) do return false 261 | // 262 | // s, n := utf8.encode_rune(r) 263 | // 264 | // if n == 1 { 265 | // /* 0xxxxxxx */ 266 | // return true 267 | // } else if n == 2 { 268 | // /* 110XXXXx 10xxxxxx */ 269 | // if ((s[1] & 0xc0) != 0x80 || 270 | // (s[0] & 0xfe) == 0xc0) { /* overlong? */ 271 | // return true 272 | // } 273 | // } else if n == 3 { 274 | // /* 1110XXXX 10Xxxxxx 10xxxxxx */ 275 | // if ((s[1] & 0xc0) != 0x80 || 276 | // (s[2] & 0xc0) != 0x80 || 277 | // (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */ 278 | // (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */ 279 | // (s[0] == 0xef && s[1] == 0xbf && 280 | // (s[2] & 0xfe) == 0xbe)) { /* U+FFFE or U+FFFF? */ 281 | // return true 282 | // } 283 | // } else if n == 4 { 284 | // /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ 285 | // if ((s[1] & 0xc0) != 0x80 || 286 | // (s[2] & 0xc0) != 0x80 || 287 | // (s[3] & 0xc0) != 0x80 || 288 | // (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */ 289 | // (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) { /* > U+10FFFF? */ 290 | // return true 291 | // } 292 | // } else do return false 293 | // 294 | // return true 295 | // } 296 | 297 | is_bare_rune_valid :: proc(r: rune) -> bool { 298 | if r == '\n' || r == '\r' || r == '\t' do return true 299 | return r >= 32 300 | } 301 | 302 | 303 | // Completely ripped from tomlc99: 304 | // https://github.com/cktan/tomlc99 305 | 306 | /** 307 | * Convert a UCS char to utf8 code, and return it in buf. 308 | * Return #bytes used in buf to encode the char, or 309 | * -1 on error. 310 | */ 311 | toml_ucs_to_utf8 :: proc(code: u64) -> (buf: [6] u8, byte_count: int) { 312 | /* http://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16 313 | */ 314 | /* The UCS code values 0xd800–0xdfff (UTF-16 surrogates) as well 315 | * as 0xfffe and 0xffff (UCS noncharacters) should not appear in 316 | * conforming UTF-8 streams. 317 | */ 318 | if (0xd800 <= code && code <= 0xdfff) do return buf, -1 319 | // if (0xfffe <= code && code <= 0xffff) do return buf, -1 320 | 321 | /* 0x00000000 - 0x0000007F: 322 | 0xxxxxxx 323 | */ 324 | if (code < 0) do return buf, -1; 325 | if (code <= 0x7F) { 326 | buf[0] = u8(code); 327 | return buf, 1; 328 | } 329 | 330 | /* 0x00000080 - 0x000007FF: 331 | 110xxxxx 10xxxxxx 332 | */ 333 | if (code <= 0x000007FF) { 334 | buf[0] = u8(0xc0 | (code >> 6)); 335 | buf[1] = u8(0x80 | (code & 0x3f)); 336 | return buf, 2; 337 | } 338 | 339 | /* 0x00000800 - 0x0000FFFF: 340 | 1110xxxx 10xxxxxx 10xxxxxx 341 | */ 342 | if (code <= 0x0000FFFF) { 343 | buf[0] = u8(0xe0 | (code >> 12)); 344 | buf[1] = u8(0x80 | ((code >> 6) & 0x3f)); 345 | buf[2] = u8(0x80 | (code & 0x3f)); 346 | return buf, 3; 347 | } 348 | 349 | /* 0x00010000 - 0x001FFFFF: 350 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 351 | */ 352 | if (code <= 0x001FFFFF) { 353 | buf[0] = u8(0xf0 | (code >> 18)); 354 | buf[1] = u8(0x80 | ((code >> 12) & 0x3f)); 355 | buf[2] = u8(0x80 | ((code >> 6) & 0x3f)); 356 | buf[3] = u8(0x80 | (code & 0x3f)); 357 | return buf, 4; 358 | } 359 | 360 | /* 0x00200000 - 0x03FFFFFF: 361 | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 362 | */ 363 | if (code <= 0x03FFFFFF) { 364 | buf[0] = u8(0xf8 | (code >> 24)); 365 | buf[1] = u8(0x80 | ((code >> 18) & 0x3f)); 366 | buf[2] = u8(0x80 | ((code >> 12) & 0x3f)); 367 | buf[3] = u8(0x80 | ((code >> 6) & 0x3f)); 368 | buf[4] = u8(0x80 | (code & 0x3f)); 369 | return buf, 5; 370 | } 371 | 372 | /* 0x04000000 - 0x7FFFFFFF: 373 | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 374 | */ 375 | if (code <= 0x7FFFFFFF) { 376 | buf[0] = u8(0xfc | (code >> 30)); 377 | buf[1] = u8(0x80 | ((code >> 24) & 0x3f)); 378 | buf[2] = u8(0x80 | ((code >> 18) & 0x3f)); 379 | buf[3] = u8(0x80 | ((code >> 12) & 0x3f)); 380 | buf[4] = u8(0x80 | ((code >> 6) & 0x3f)); 381 | buf[5] = u8(0x80 | (code & 0x3f)); 382 | return buf, 6; 383 | } 384 | 385 | return buf, -1; 386 | } 387 | 388 | 389 | 390 | -------------------------------------------------------------------------------- /mod.pkg: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0.0", 3 | "description": "TOML file parser", 4 | "url": "https://github.com/Up05/toml_parser", 5 | "readme": "README.md", 6 | "license": "MIT", 7 | "keywords": ["TOML", "parser"], 8 | "dependencies": { Up05/odin-RFC-3339-date-parser } 9 | } 10 | -------------------------------------------------------------------------------- /parser.odin: -------------------------------------------------------------------------------- 1 | package toml 2 | 3 | import "core:strconv" 4 | import "core:fmt" 5 | import "core:strings" 6 | import rt "base:runtime" 7 | 8 | import "dates" 9 | 10 | Table :: map [string] Type 11 | List :: [dynamic] Type 12 | 13 | Type :: union { 14 | ^Table, 15 | ^List, 16 | string, 17 | bool, 18 | i64, 19 | f64, 20 | dates.Date, 21 | } 22 | 23 | @private 24 | GlobalData :: struct { 25 | toks : [] string, // all token list 26 | curr : int, // the current token index 27 | err : Error, // current error 28 | root : ^Table, // the root/global table 29 | section : ^Table, // TOML's `[section]` table 30 | this : ^Table, // TOML's local p.a.t.h or { table = {} } table 31 | reps : int, // for halting upon infinite loops 32 | aloc : rt.Allocator // probably useless, honestly... 33 | } 34 | 35 | @private // is only allocated when parse() and validate() are working. 36 | g: ^GlobalData 37 | 38 | 39 | @private // gets a token or an empty string. 40 | peek :: proc(o := 0) -> string { 41 | if g.curr + o >= len(g.toks) do return "" 42 | if g.reps >= 1000 { // <-- solution to the halting problem! 43 | if g.toks[g.curr + o] == "\n" { 44 | make_err(.Bad_New_Line, "The parser is stuck on an out-of-place new line.") 45 | } else { 46 | g.err.type = .Parser_Is_Stuck 47 | b_printf(&g.err.more, "Token: '%s' at index: %d", g.toks[g.curr + o], g.curr + o) 48 | } 49 | return "" 50 | } 51 | g.reps += 1 52 | 53 | return g.toks[g.curr + o] 54 | } 55 | 56 | 57 | // skips by one or more tokens, the parser & validator CANNOT go back, 58 | @private // since my solution to the halting problem may not work then. 59 | skip :: proc(o := 1) { 60 | assert(o >= 0) 61 | g.curr += o 62 | if o != 0 do g.reps = 0 63 | } 64 | 65 | @private // returns the current token and skips to the next token. 66 | next :: proc() -> string { 67 | defer skip() 68 | return peek() 69 | } 70 | 71 | parse :: proc(data: string, original_file: string, allocator := context.allocator) -> (tokens: ^Table, err: Error) { 72 | context.allocator = allocator 73 | 74 | // === TOKENIZER === 75 | raw_tokens, t_err := tokenize(data, file = original_file) 76 | defer delete_dynamic_array(raw_tokens) 77 | if t_err.type != .None do return nil, t_err 78 | 79 | // === VALIDATOR === 80 | v_err := validate(raw_tokens[:], original_file, allocator) 81 | if v_err.type != .None do return tokens, v_err 82 | 83 | // === TEMP DATA === 84 | tokens = new(Table) 85 | 86 | initial_data: GlobalData = { 87 | toks = raw_tokens[:], 88 | err = { line = 1, file = original_file }, 89 | 90 | root = tokens, 91 | this = tokens, 92 | section = tokens, 93 | 94 | aloc = allocator, 95 | } 96 | 97 | g = &initial_data 98 | defer g = nil 99 | 100 | // === MAIN WORK === 101 | for peek() != "" { 102 | if g.err.type != .None { 103 | return nil, g.err 104 | } 105 | 106 | if peek() == "\n" { 107 | g.err.line += 1 108 | skip() 109 | continue 110 | } 111 | 112 | parse_statement() 113 | g.this = g.section 114 | } 115 | 116 | if g.err.type != .None { 117 | return nil, g.err 118 | } 119 | 120 | return 121 | } 122 | 123 | // ==================== STATEMENTS ==================== 124 | 125 | parse_statement :: proc() { 126 | ok: bool 127 | 128 | ok = parse_section_list(); if ok do return 129 | ok = parse_section(); if ok do return 130 | ok = parse_assign(); if ok do return 131 | 132 | parse_expr() // skips orphaned expressions 133 | } 134 | 135 | // This function is for dotted.paths (stops at.the.NAME) 136 | walk_down :: proc(parent: ^Table) { 137 | 138 | // ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 139 | // ! This is intricate as fuck and I still don't ! 140 | // ! really get how it works. ! 141 | // ! PLEASE RUN ALL TESTS IF YOU CHANGE THIS AT ALL. ! 142 | // ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! 143 | 144 | if peek(1) != "." do return 145 | 146 | name, err := unquote(next()) 147 | g.err.type = err.type 148 | g.err.more = err.more 149 | if err.type != .None do return 150 | skip() // '.' 151 | 152 | do_not_free: bool 153 | defer if !do_not_free do delete_string(name) 154 | 155 | #partial switch value in parent[name] { 156 | case nil: 157 | g.this = new(Table); 158 | parent[name] = g.this; 159 | do_not_free = true 160 | 161 | case ^Table: 162 | g.this = value 163 | 164 | case ^List: 165 | if len(value^) == 0 { 166 | g.this = new(Table) 167 | append(value, g.this) 168 | 169 | } else { 170 | table, is_table := value[len(value^) - 1].(^Table) 171 | if !is_table { 172 | make_err(.Key_Already_Exists, name) 173 | return 174 | } 175 | g.this = table 176 | } 177 | 178 | case: 179 | make_err(.Key_Already_Exists, name) 180 | return 181 | } 182 | 183 | walk_down(g.this) 184 | } 185 | 186 | 187 | parse_section_list :: proc() -> bool { 188 | if peek(0) != "[" || peek(1) != "[" do return false 189 | skip(2) // '[' '[' 190 | 191 | g.this = g.root 192 | g.section = g.root 193 | walk_down(g.root) 194 | 195 | name, err := unquote(next()) // take care with ordering of this btw 196 | g.err.type = err.type 197 | g.err.more = err.more 198 | if err.type != .None do return true 199 | 200 | list : ^List 201 | result := new(Table) 202 | 203 | if name not_in g.this { 204 | list = new(List) 205 | g.this[name] = list 206 | 207 | } else if !is_list(g.this[name]) { 208 | make_err(.Key_Already_Exists, name) 209 | } else { 210 | list = g.this[name].(^List) 211 | } 212 | 213 | append(list, result) 214 | 215 | skip(2) // ']' ']' 216 | g.section = result 217 | return true 218 | } 219 | 220 | // put() is only used in parse_section, so it's specialized 221 | // general version: commit 8910187045028ce13df3214e04ace6071ea89158 222 | put :: proc(parent: ^Table, key: string, value: ^Table) { 223 | 224 | // I simply admit that I do not understand how tables work... 225 | // fuck this shit! [[a.b]]\n [a] is somehow valid..? 226 | // I do not know what the hell is even that... 227 | // The valid tests pass. That is what matters... 228 | 229 | #partial switch existing in parent[key] { 230 | case ^Table: 231 | for k, v in value { existing[k] = v } 232 | delete_map(value^) 233 | value^ = existing^ 234 | case ^List: 235 | append(existing, value) 236 | 237 | case nil: 238 | parent[key] = value 239 | 240 | case: 241 | make_err(.Key_Already_Exists, key) 242 | } 243 | } 244 | 245 | parse_section :: proc() -> bool { 246 | if peek() != "[" do return false 247 | skip() // '[' 248 | 249 | g.this = g.root 250 | g.section = g.root 251 | walk_down(g.root) 252 | 253 | name, err := unquote(next()) // take care with ordering of this btw 254 | g.err.type = err.type 255 | g.err.more = err.more 256 | if err.type != .None do return true 257 | 258 | result := new(Table) 259 | 260 | put(g.this, name, result) 261 | 262 | skip() // ']' 263 | g.this = result 264 | g.section = g.this 265 | return true 266 | } 267 | 268 | parse_assign :: proc() -> bool { 269 | if peek(1) != "=" && peek(1) != "." do return false 270 | 271 | walk_down(g.this) 272 | 273 | key, err := unquote(peek()) 274 | g.err.type = err.type 275 | g.err.more = err.more 276 | if err.type != .None do return true 277 | 278 | if any_of(u8('\n'), ..transmute([] u8)peek()) { 279 | make_err(.Bad_Name, "Keys cannot have raw new lines in them") 280 | return true 281 | } 282 | 283 | skip(2); 284 | value := parse_expr() 285 | 286 | if key in g.this { 287 | make_err(.Key_Already_Exists, key) 288 | } 289 | 290 | g.this[key] = value 291 | return true 292 | } 293 | 294 | // ==================== EXPRESSIONS ==================== 295 | 296 | 297 | parse_expr :: proc() -> (result: Type) { 298 | ok: bool 299 | result, ok = parse_string(); if ok do return 300 | result, ok = parse_bool(); if ok do return 301 | result, ok = parse_date(); if ok do return 302 | result, ok = parse_float(); if ok do return 303 | result, ok = parse_int(); if ok do return 304 | result, ok = parse_list(); if ok do return 305 | result, ok = parse_table(); if ok do return 306 | return 307 | } 308 | 309 | parse_string :: proc() -> (result: string, ok: bool) { 310 | if len(peek()) == 0 do return 311 | if r := peek()[0]; !any_of(r, '"', '\'') do return 312 | str, err := unquote(next()) 313 | g.err.type = err.type 314 | g.err.more = err.more 315 | return str, true 316 | } 317 | 318 | parse_bool :: proc() -> (result: bool, ok: bool) { 319 | if peek() == "true" { skip(); return true, true } 320 | if peek() == "false" { skip(); return false, true } 321 | return false, false 322 | } 323 | 324 | parse_float :: proc() -> (result: f64, ok: bool) { 325 | 326 | has_e_but_not_x :: proc(s: string) -> bool { 327 | if len(s) > 2 { if any_of(s[1], 'x', 'X') do return false } 328 | #reverse for r in s { if any_of(r, 'e', 'E') do return true } 329 | return false 330 | } 331 | 332 | Infinity : f64 = 1e5000 333 | NaN := transmute(f64) ( transmute(i64) Infinity | 1 ) 334 | 335 | if len(peek()) == 4 { 336 | if peek()[0] == '-' { if peek()[1:] == "inf" { skip(); return -Infinity, true } } 337 | if peek()[0] == '+' { if peek()[1:] == "inf" { skip(); return +Infinity, true } } 338 | if peek()[1:] == "nan" { skip(); return NaN, true } 339 | } 340 | 341 | if peek() == "nan" { skip(); return NaN, true } 342 | if peek() == "inf" { skip(); return Infinity, true } 343 | 344 | if peek(1) == "." { 345 | number := fmt.aprint(peek(), ".", peek(2), sep = "") 346 | cleaned, has_alloc := strings.remove_all(number, "_") 347 | defer if has_alloc do delete(cleaned) 348 | defer delete(number) 349 | skip(3) 350 | return strconv.parse_f64(cleaned) 351 | 352 | } else if has_e_but_not_x(peek()) { 353 | cleaned, has_alloc := strings.remove_all(next(), "_") 354 | defer if has_alloc do delete(cleaned) 355 | return strconv.parse_f64(cleaned) 356 | } 357 | 358 | // it's an int then 359 | return 360 | } 361 | 362 | parse_int :: proc() -> (result: i64, ok: bool) { 363 | result, ok = strconv.parse_i64(peek()) 364 | if ok do skip() 365 | return 366 | } 367 | 368 | parse_date :: proc() -> (result: dates.Date, ok: bool) { 369 | using strings 370 | if !dates.is_date_lax(peek(0)) do return 371 | ok = true 372 | 373 | full: Builder 374 | write_string(&full, next()) 375 | 376 | // is date, time or both? 377 | if dates.is_date_lax(peek()) { 378 | write_rune(&full, ' ') 379 | write_string(&full, next()) 380 | } 381 | 382 | if peek() == "." { 383 | write_byte(&full, '.'); skip() 384 | write_string(&full, next()) 385 | } 386 | 387 | err: dates.DateError 388 | result, err = dates.from_string(to_string(full)) 389 | if err != .NONE { 390 | make_err(.Bad_Date, "Received error: %v by parsing: '%s' as date\n", err, to_string(full)) 391 | return 392 | } 393 | 394 | builder_destroy(&full) 395 | return 396 | 397 | } 398 | 399 | parse_list :: proc() -> (result: ^List, ok: bool) { 400 | if peek() != "[" do return 401 | skip() // '[' 402 | ok = true 403 | 404 | result = new(List) 405 | 406 | for !any_of(peek(), "]", "") { 407 | 408 | if peek() == "," { skip(); continue } 409 | if peek() == "\n" { g.err.line += 1; skip(); continue } 410 | 411 | element := parse_expr() 412 | append(result, element) 413 | } 414 | 415 | skip() // ']' 416 | return 417 | } 418 | 419 | parse_table :: proc() -> (result: ^Table, ok: bool) { 420 | if peek() != "{" do return 421 | skip() // '{' 422 | ok = true 423 | 424 | result = new(Table) 425 | 426 | temp_this, temp_section := g.this, g.section 427 | for !any_of(peek(), "}", "") { 428 | 429 | if peek() == "," { skip(); continue } 430 | if peek() == "\n" { g.err.line += 1; skip(); continue } 431 | 432 | g.this, g.section = result, result 433 | parse_assign() 434 | } 435 | g.this, g.section = temp_this, temp_section 436 | 437 | skip() // '}' 438 | return 439 | } 440 | 441 | @(private="file") 442 | make_err :: proc(type: ErrorType, more_fmt: string, more_args: ..any) { 443 | g.err.type = type 444 | context.allocator = g.aloc 445 | b_reset(&g.err.more) 446 | b_printf(&g.err.more, more_fmt, ..more_args) 447 | } 448 | 449 | @(private="file") 450 | err_if_not :: proc(cond: bool, type: ErrorType, more_fmt: string, more_args: ..any) -> bool { 451 | if !cond do make_err(type, more_fmt, ..more_args) 452 | return !cond 453 | } 454 | -------------------------------------------------------------------------------- /tests/test.odin: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import "core:testing" 4 | 5 | import toml ".." 6 | 7 | @(test) 8 | nil_guard_get :: proc(t: ^testing.T) { 9 | table: toml.Table 10 | 11 | _, found := toml.get_bool(&table, "enabled") 12 | testing.expectf(t, found == false, "should not crash on nullptr exception not found") 13 | } 14 | -------------------------------------------------------------------------------- /tokenizer.odin: -------------------------------------------------------------------------------- 1 | package toml 2 | 3 | tokenize :: proc(raw: string, file := "") -> (tokens: [dynamic] string, err: Error) { 4 | err = { file = file, line = 1 } 5 | 6 | skip: int 7 | outer: for r, i in raw { 8 | this := raw[i:] 9 | 10 | switch { // by the way, do NOT use the 'fallthrough' keyword 11 | // makes more invalid tests pass 12 | case !is_bare_rune_valid(r): 13 | set_err(&err, .Bad_Unicode_Char, "'%v'", r) 14 | return 15 | 16 | // throws error if only a carriage return is found, I guess, fuck macOS ..9? 17 | case r == '\r' && len(raw) > i + 1 && raw[i + 1] != '\n': 18 | set_err(&err, .Bad_Unicode_Char, "carriage returns must be followed by new lines in TOML!") 19 | return 20 | 21 | // skips until the end of e.g.: string and comment (this replaces having state.) 22 | case skip > 0: 23 | skip -= 1 24 | 25 | // unix new lines 26 | case r == '\n': 27 | append(&tokens, "\n") 28 | err.line += 1 29 | 30 | // windows new lines 31 | case starts_with(raw[i:], "\r\n"): 32 | append(&tokens, "\n") 33 | err.line += 1 34 | 35 | case is_space(this[0]): 36 | // do nothing 37 | 38 | case is_special(this[0]): 39 | append(&tokens, this[:1]) 40 | 41 | // removes a comment (in one go) 42 | case r == '#': 43 | j, runes := find_newline(this) 44 | if j == -1 do return tokens, { } 45 | skip += runes - 1 46 | 47 | // ============ START OF STRINGS ============ 48 | case starts_with(this, "\"\"\""): 49 | j, runes := find(this, "\"\"\"", 3) 50 | if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16)) 51 | j2, runes2 := go_further(this[j + 3:], '"') 52 | j += j2; runes += runes2 53 | append(&tokens, this[:j + 3]) 54 | skip += runes + 2 55 | 56 | case starts_with(this, "'''"): 57 | j, runes := find(this, "'''", 3, false) 58 | if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16)) 59 | j2, runes2 := go_further(this[j + 3:], '\'') 60 | j += j2; runes += runes2 61 | append(&tokens, this[:j + 3]) 62 | skip += runes + 2 63 | 64 | case r == '"': 65 | j, runes := find(this, "\"", 1) 66 | if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16)) 67 | append(&tokens, this[:j + 1]) 68 | skip += runes 69 | 70 | case r == '\'': 71 | j, runes := find(this, "'", 1, false) 72 | if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16)) 73 | append(&tokens, this[:j + 1]) 74 | skip += runes 75 | // ============ END OF STRINGS ============ 76 | 77 | // tokenizes all leftover things (in one go) 78 | // this is "text", numbers & so on 79 | case: 80 | key := leftover(this) 81 | if len(key) == 0 do return tokens, set_err(&err, .None, shorten_string(this, 1)) 82 | append(&tokens, key) 83 | skip += len(key) - 1 84 | } 85 | } 86 | 87 | return tokens, err 88 | 89 | } 90 | 91 | @(private="file") 92 | leftover :: proc(raw: string) -> string { 93 | for _, i in raw { 94 | if is_space(raw[i]) || is_special(raw[i]) || raw[i] == '#' { 95 | return raw[:i] 96 | } 97 | } 98 | return "" 99 | } 100 | 101 | @(private="file") 102 | find :: proc(a: string, b: string, skip := 0, escape := true) -> (bytes: int, runes: int) { 103 | escaped: bool 104 | for r, i in a[skip:] { 105 | defer runes += 1 106 | if escaped do escaped = false 107 | else if escape && r == '\\' do escaped = true 108 | else if starts_with(a[i + skip:], b) do return i + skip, runes + skip 109 | } // "+ skip" here is bad, it would be best to count runes up until "skip" 110 | return -1, -1 111 | } 112 | 113 | @(private="file") 114 | go_further :: proc(a: string, r1: rune) -> (bytes: int, runes: int) { 115 | for r2, i in a { 116 | if r1 != r2 do return i, runes 117 | bytes = i 118 | runes += 1 119 | } 120 | return 121 | } 122 | 123 | @(private="file") 124 | set_err :: proc(err: ^Error, type: ErrorType, more_fmt: string, more_args: ..any) -> Error { 125 | err.type = type 126 | b_printf(&err.more, more_fmt, ..more_args) 127 | return err^ 128 | } 129 | -------------------------------------------------------------------------------- /toml.odin: -------------------------------------------------------------------------------- 1 | package toml 2 | 3 | import "core:strings" 4 | import "core:os" 5 | import "base:intrinsics" 6 | import "base:runtime" 7 | import "dates" 8 | 9 | import "core:fmt" 10 | 11 | log :: fmt.print 12 | logf :: fmt.printf 13 | logln :: fmt.println 14 | 15 | assertf :: fmt.assertf 16 | 17 | Builder :: strings.Builder 18 | b_destroy :: strings.builder_destroy 19 | b_reset :: strings.builder_reset 20 | b_write_string :: strings.write_string 21 | b_printf :: fmt.sbprintf 22 | 23 | // Parses the file. You can use print_error(err) for error messages. 24 | parse_file :: proc(filename: string, allocator := context.allocator) -> (section: ^Table, err: Error) { 25 | context.allocator = allocator 26 | blob, ok_file_read := os.read_entire_file_from_filename(filename) 27 | if !ok_file_read { 28 | err.type = .Bad_File 29 | b_write_string(&err.more, filename) 30 | return nil, err 31 | } 32 | 33 | section, err = parse(string(blob), filename, allocator) 34 | delete_slice(blob) 35 | return 36 | } 37 | 38 | // This is made to be used with default, err := #load(filename). original_filename is only used for errors. 39 | parse_data :: proc(data: []u8, original_filename := "untitled data", allocator := context.allocator) -> (section: ^Table, err: Error) { 40 | return parse(string(data), original_filename, allocator) 41 | } 42 | 43 | // Frees all of the memory allocated by the parser for a particular type 44 | // It is recursive, so you can just give it the root Table. 45 | deep_delete :: proc(type: Type, allocator := context.allocator) -> (err: runtime.Allocator_Error) { 46 | context.allocator = allocator 47 | #partial switch value in type { 48 | case ^List: 49 | if value == nil do break 50 | for &item in value { 51 | err = deep_delete(item, allocator); 52 | if err != .None do return 53 | } 54 | err = delete_dynamic_array(value^) 55 | if err == .None do free(value) 56 | 57 | case ^Table: 58 | if value == nil do break 59 | for k, &v in value { 60 | err = delete_string(k); 61 | if err != .None do return 62 | err = deep_delete(v, allocator); 63 | if err != .None do return 64 | } 65 | err = delete_map(value^) 66 | if err == .None do free(value) 67 | 68 | case string: 69 | err = delete_string(value) 70 | } 71 | return 72 | } 73 | 74 | // Retrieves and type checks the value at path. The last element of path is the actual key. 75 | // section may be any Table. 76 | get :: proc($T: typeid, section: ^Table, path: ..string) -> (val: T, ok: bool) 77 | where intrinsics.type_is_variant_of(Type, T) 78 | { 79 | assert(len(path) > 0, "You must specify at least one path str in toml.fetch()!") 80 | if section == nil { 81 | return val, false 82 | } 83 | 84 | section := section 85 | for dir in path[:len(path) - 1] { 86 | if dir in section { 87 | section, ok = section[dir].(^Table) 88 | if !ok do return val, false 89 | } else do return val, false 90 | } 91 | last := path[len(path) - 1] 92 | if last in section do return section[last].(T) 93 | else do return val, false 94 | } 95 | 96 | // Also retrieves and typechecks the value at path, but if something goes wrong, it crashes the program. 97 | get_panic :: proc($T: typeid, section: ^Table, path: ..string) -> T 98 | where intrinsics.type_is_variant_of(Type, T) 99 | { 100 | assert(len(path) > 0, "You must specify at least one path str in toml.fetch_panic()!") 101 | section := section 102 | for dir in path[:len(path) - 1] { 103 | assertf(dir in section, "Missing key: '%s' in table '%v'!", path, section^) 104 | section = section[dir].(^Table) 105 | } 106 | last := path[len(path) - 1] 107 | assertf(last in section, "Missing key: '%s' in table '%v'!", last, section^) 108 | return section[last].(T) 109 | } 110 | 111 | // Currently(2024-06-__), Odin hangs if you simply fmt.print Table 112 | print_table :: proc(section: ^Table, level := 0) { 113 | log("{ ") 114 | i := 0 115 | for k, v in section { 116 | log(k, "= ") 117 | print_value(v, level) 118 | if i != len(section) - 1 do log(", ") 119 | else do log(" ") 120 | i += 1 121 | } 122 | log("}") 123 | if level == 0 do logln() 124 | } 125 | 126 | @(private="file") 127 | print_value :: proc(v: Type, level := 0) { 128 | #partial switch t in v { 129 | case ^Table: 130 | print_table(t, level + 1) 131 | case ^[dynamic] Type: 132 | log("[ ") 133 | for e, i in t { 134 | print_value(e, level) 135 | if i != len(t) - 1 do log(", ") 136 | else do log(" ") 137 | } 138 | log("]") 139 | case string: 140 | logf("%q", v) 141 | case: 142 | log(v) 143 | } 144 | } 145 | 146 | // Here lies the code for LSP: 147 | get_i64 :: proc(section: ^Table, path: ..string) -> 148 | (val: i64, ok: bool) { return get(i64, section, ..path) } 149 | get_f64 :: proc(section: ^Table, path: ..string) -> 150 | (val: f64, ok: bool) { return get(f64, section, ..path) } 151 | get_bool :: proc(section: ^Table, path: ..string) -> 152 | (val: bool, ok: bool) { return get(bool, section, ..path) } 153 | get_string :: proc(section: ^Table, path: ..string) -> 154 | (val: string, ok: bool) { return get(string, section, ..path) } 155 | get_date :: proc(section: ^Table, path: ..string) -> 156 | (val: dates.Date, ok: bool) { return get(dates.Date, section, ..path) } 157 | get_list :: proc(section: ^Table, path: ..string) -> 158 | (val: ^List, ok: bool) { return get(^List, section, ..path) } 159 | get_table :: proc(section: ^Table, path: ..string) -> 160 | (val: ^Table, ok: bool) { return get(^Table, section, ..path) } 161 | 162 | get_i64_panic :: proc(section: ^Table, path: ..string) -> 163 | i64 { return get_panic(i64, section, ..path) } 164 | get_f64_panic :: proc(section: ^Table, path: ..string) -> 165 | f64 { return get_panic(f64, section, ..path) } 166 | get_bool_panic :: proc(section: ^Table, path: ..string) -> 167 | bool { return get_panic(bool, section, ..path) } 168 | get_string_panic :: proc(section: ^Table, path: ..string) -> 169 | string { return get_panic(string, section, ..path) } 170 | get_date_panic :: proc(section: ^Table, path: ..string) -> 171 | dates.Date { return get_panic(dates.Date, section, ..path) } 172 | get_list_panic :: proc(section: ^Table, path: ..string) -> 173 | ^List { return get_panic(^List, section, ..path) } 174 | get_table_panic :: proc(section: ^Table, path: ..string) -> 175 | ^Table { return get_panic(^Table, section, ..path) } 176 | 177 | -------------------------------------------------------------------------------- /validator.odin: -------------------------------------------------------------------------------- 1 | package toml 2 | 3 | import "base:runtime" 4 | import "core:fmt" 5 | 6 | ErrorType :: enum { 7 | None, 8 | 9 | Bad_Date, 10 | Bad_File, 11 | Bad_Float, 12 | Bad_Integer, 13 | Bad_Name, 14 | Bad_New_Line, 15 | Bad_Unicode_Char, 16 | Bad_Value, 17 | 18 | Missing_Bracket, 19 | Missing_Comma, 20 | Missing_Key, 21 | Missing_Newline, 22 | Missing_Quote, 23 | Missing_Value, 24 | 25 | Double_Comma, 26 | Expected_Equals, 27 | Key_Already_Exists, 28 | Parser_Is_Stuck, 29 | Unexpected_Token, 30 | } 31 | 32 | Error :: struct { 33 | type: ErrorType, 34 | line: int, 35 | file: string, 36 | more: Builder, 37 | } 38 | 39 | // The filename is not freed, since it is only sliced 40 | delete_error :: proc(err: ^Error) { 41 | if err.type != .None do b_destroy(&err.more) 42 | } 43 | 44 | // This may also be a warning! 45 | print_error :: proc(err: Error, allocator := context.allocator) -> (fatal: bool) { 46 | message: string 47 | message, fatal = format_error(err) 48 | if message != "" { 49 | logf("[TOML ERROR] %s", message) 50 | delete(message, allocator) 51 | } 52 | return fatal 53 | } 54 | 55 | // The message is allocated and should be freed after use. 56 | format_error :: proc(err: Error, allocator := context.allocator) -> (message: string, fatal: bool) { 57 | descriptions : [ErrorType] string = { 58 | .None = "", 59 | .Bad_Date = "Failed to parse a date", 60 | .Bad_File = "Toml parser could not read the given file", 61 | .Bad_Float = "Failed to parse a floating-point number (may be invalid value)", 62 | .Bad_Integer = "Failed to parse an interger", 63 | .Bad_Name = "Bad key/table name found before, use quotes, or only 'A-Za-z0-9_-'", 64 | .Bad_New_Line = "New line is out of place", 65 | .Bad_Unicode_Char = "Found an invalid unicode character in string", 66 | .Bad_Value = "Bad value found after '='", 67 | .Double_Comma = "Lists must have exactly 1 comma after each element (except trailing commas are optional)", 68 | .Expected_Equals = "Expected '=' after assignment of a key", 69 | .Key_Already_Exists = "That key/section already exists", 70 | .Missing_Bracket = "A bracket is missing (one of: '[', '{', '}', ']')", 71 | .Missing_Comma = "A comma is missing", 72 | .Missing_Key = "Expected key before '='", 73 | .Missing_Newline = "A new line is missing between two key-value pairs", 74 | .Missing_Quote = "Missing a quote", 75 | .Missing_Value = "Expected a value after '='", 76 | .Parser_Is_Stuck = "Parser has halted due to being in an infinite loop", 77 | .Unexpected_Token = "Found a token that should not be there", 78 | } 79 | 80 | return fmt.aprintf("%s:%d %s! %s\n", err.file, err.line + 1, descriptions[err.type], err.more.buf[:]), true 81 | } 82 | 83 | // Skips all consecutive new lines 84 | // new lines should not be skipped everywhere 85 | // that's why this is not inside of the peek() procedure. 86 | skip_newline :: proc() -> (ok: bool) { ok = peek() == "\n"; for peek() == "\n" { g.err.line += 1; skip() }; return } 87 | 88 | validate :: proc(raw_tokens: [] string, file: string, allocator := context.allocator) -> Error { 89 | 90 | initial_data: GlobalData = { 91 | toks = raw_tokens, 92 | err = { line = 1, file = file }, 93 | aloc = allocator, 94 | } 95 | 96 | snapshot := g 97 | g = &initial_data 98 | defer g = snapshot 99 | 100 | for peek() != "" { 101 | if !validate_stmt() { 102 | make_err(.Unexpected_Token, "Could not validate the (assumed to be) statement: %s", peek()) 103 | } 104 | if g.err.type != .None do break 105 | } 106 | 107 | err := g.err 108 | return err 109 | } 110 | 111 | // '||' operator has short-circuiting in Odin, so I use this to chain functions. 112 | validate_stmt :: proc() -> bool { 113 | return skip_newline() || (validate_array() || validate_table() || validate_assign()) && 114 | 115 | !err_if_not(peek() == "" || peek() == "\n", .Missing_Newline, "Found a missing new line between statements.") 116 | } 117 | 118 | // array of tables: `[[item]]` at the start of lines 119 | validate_array :: proc() -> bool { 120 | if peek(0) != "[" || peek(1) != "[" do return false 121 | #no_bounds_check { 122 | if err_if_not(peek(0)[1] == '[', .Missing_Bracket, "In section array both brackets must follow one another! '[[' not '[ ['") do return false 123 | } 124 | 125 | skip(2) // '[' '[' 126 | validate_path() 127 | 128 | #no_bounds_check { 129 | if peek(0) == "]" && peek(1) == "]" && err_if_not(peek(0)[1] == ']', .Missing_Bracket, "In section array both brackets must follow one another! ']]' not '] ]'") do return false 130 | } 131 | if err_if_not(next() == "]", .Missing_Bracket, "']' missing in section array declaration") do return false 132 | if err_if_not(next() == "]", .Missing_Bracket, "']' missing in section array declaration") do return false 133 | 134 | return true 135 | } 136 | 137 | // tables: `[object]` at the start of lines 138 | validate_table :: proc() -> bool { 139 | if peek(0) != "[" do return false 140 | 141 | skip() // '[' 142 | validate_path() 143 | return !err_if_not(next() == "]", .Missing_Bracket, "']' missing in section declaration") 144 | } 145 | 146 | // key = value 147 | validate_assign :: proc() -> bool { 148 | if peek(1) != "=" && peek(1) != "." do return false 149 | 150 | if !validate_path() do return false 151 | if err_if_not(peek() == "=", .Expected_Equals, "Keys must be followed by '=' and then the value! Instead got: %s", peek()) do return false 152 | skip() // '=' 153 | return validate_expr() 154 | } 155 | 156 | // there.are.dotted.paths.in.toml each "directory" is supposed to be an object, last depends on the context. 157 | // for example: in statement [[a.b]] a is a Table, b is a List of Table(s) 158 | validate_path :: proc() -> bool {//{{{ 159 | validate_name :: proc() -> bool { 160 | skip() 161 | return true 162 | } 163 | 164 | for peek(1) == "." { 165 | if peek(0) == "\n" || peek(2) == "\n" { 166 | make_err(.Bad_New_Line, "paths.of.keys must be on the same line") 167 | return false 168 | } 169 | 170 | if !validate_name() { 171 | make_err(.Bad_Name, "key in path cannot have this name: '%s'", peek()) 172 | return false 173 | } 174 | skip() 175 | } 176 | 177 | if !validate_name() { 178 | make_err(.Bad_Name, "key in path cannot have this name: '%s'", peek()) 179 | return false 180 | } 181 | 182 | return true 183 | }//}}} 184 | 185 | // Order matters. There can be expressions without statements (See: last line of validate_assign()). 186 | validate_expr :: proc() -> bool { 187 | return validate_string() || 188 | validate_bool() || 189 | validate_date() || 190 | validate_inline_list() || 191 | validate_inline_table() || 192 | validate_number() 193 | } 194 | 195 | validate_string :: proc() -> bool {//{{{ 196 | validate_quotes :: proc() -> bool { 197 | PATTERNS := [] string { "\"\"\"", "'''", "\"", "\'", } 198 | for p in PATTERNS { 199 | if starts_with(peek(), p) { 200 | if err_if_not(ends_with(peek(), p), .Missing_Quote, "string '%s' is missing one or more quotes", peek()) do return false 201 | } 202 | } 203 | skip() 204 | return true 205 | } 206 | 207 | if len(peek()) == 0 do return false 208 | if r := peek()[0]; !any_of(r, '"', '\'') do return false 209 | 210 | return validate_quotes() 211 | // this should be done in the tokenizer & cleanup_backslashes() (it isn't): || validate_escapes() || validate_codepoints() 212 | }//}}} 213 | 214 | validate_bool :: proc() -> bool { //{{{ 215 | if eq(peek(), "yes") do make_err(.Bad_Value, "'Yes' is not a valid expression in TOML, please use 'true'!") 216 | if eq(peek(), "no") do make_err(.Bad_Value, "'No' is not a valid expression in TOML, please use 'false'!") 217 | 218 | // eq is case-insensitive compare, while '==' operator is case-sensitive 219 | if !eq(peek(), "false") && !eq(peek(), "true") do return false 220 | 221 | defer skip() 222 | return !err_if_not(peek() == "false" || peek() == "true", .Bad_Value, "booleans must be lowercase") 223 | }//}}} 224 | 225 | validate_date :: proc() -> (ok: bool) { //{{{ 226 | is_proper_date :: proc(str: string) -> bool { 227 | // I hope, LLVM can do something with this... 228 | return len(str) > 9 && 229 | str[0] >= '0' && str[0] <= '9' && 230 | str[1] >= '0' && str[1] <= '9' && 231 | str[2] >= '0' && str[2] <= '9' && 232 | str[3] >= '0' && str[3] <= '9' && 233 | str[4] == '-' && 234 | str[5] >= '0' && str[5] <= '9' && 235 | str[6] >= '0' && str[6] <= '9' && 236 | str[7] == '-' && 237 | str[8] >= '0' && str[8] <= '9' && 238 | str[9] >= '0' && str[9] <= '9' 239 | } 240 | 241 | is_proper_time :: proc(str: string) -> bool { 242 | return len(str) > 7 && 243 | str[0] >= '0' && str[0] <= '9' && 244 | str[1] >= '0' && str[1] <= '9' && 245 | str[2] == ':' && 246 | str[3] >= '0' && str[3] <= '9' && 247 | str[4] >= '0' && str[4] <= '9' && 248 | str[5] == ':' && 249 | str[6] >= '0' && str[6] <= '9' && 250 | str[7] >= '0' && str[7] <= '9' 251 | } 252 | 253 | validate_time :: proc(str: string) -> bool { 254 | if err_if_not(is_proper_time(str), .Bad_Date, "The date: '%s' is not valid, please use rfc 3339 (e.g.: 1234-12-12, or 60:45:30+02:00)", peek()) do return false 255 | 256 | offset := str[8:] if len(str) > 8 else "" 257 | 258 | // because of dotted.keys, 'start' '.' 'end' are different tokens. 259 | if peek(1) == "." { 260 | for r, i in peek(2) { 261 | if r == '-' || r == '+' { 262 | offset = peek(2)[i:] 263 | break 264 | } 265 | if err_if_not(is_digit(r, 10) || r == 'Z' || r == 'z', .Bad_Date, "Bad millisecond count in the date.") do return false 266 | } 267 | skip(2) 268 | } 269 | 270 | if offset == "" do return true 271 | 272 | if offset[0] == '+' || offset[0] == '-' { 273 | s := offset[1:] 274 | return len(str) > 4 && 275 | s[0] >= '0' && s[0] <= '9' && 276 | s[1] >= '0' && s[1] <= '9' && 277 | s[2] == ':' && 278 | s[3] >= '0' && s[3] <= '9' && 279 | s[4] >= '0' && s[4] <= '9' 280 | } 281 | return true // 'Z' and 'z' are unnecessary in TOML 282 | } 283 | 284 | // Dates will necessarily have - as their 5th symbol: "0123-00-00" 285 | if len(peek()) > 4 && peek()[4] == '-' { 286 | err_if_not(is_proper_date(peek()), .Bad_Date, "The date: '%s' is not valid, please use rfc 3339 (e.g.: 1234-12-12, or 60:45:30+02:00)", peek()) 287 | 288 | // time can be seperated either by { 't', 'T' or ' ' }, ' ' is split by tokenizer 289 | if len(peek()) > 11 && (peek()[10] == 'T' || peek()[10] == 't') { 290 | if !validate_time(peek()[11:]) do return false 291 | } 292 | next() 293 | ok = true 294 | } 295 | 296 | // Time can be either without date or split from it by whitespace. 297 | // This handles both scenarios 298 | if len(peek()) > 2 && peek()[2] == ':' { 299 | validate_time(peek()) 300 | next() 301 | ok = true 302 | } 303 | 304 | return ok 305 | }//}}} 306 | 307 | // Good luck! 308 | validate_number :: proc() -> bool {//{{{ 309 | at :: proc(s: string, i: int) -> rune { for r, j in s do if i == j do return r; return 0 } 310 | 311 | number := peek() 312 | if at(number, 0) == '+' || at(number, 0) == '-' do number = number[1:] 313 | 314 | if eq(number, "nan") || eq(number, "inf") { 315 | err_if_not(number == "nan" || number == "inf", .Bad_Float, "NaN and Inf must be fully lowercase in TOML: `nan` and `inf`! (I don't know why). Your's is: '%s'", peek()) 316 | skip() 317 | return true 318 | } 319 | 320 | split_by :: proc(a: string, b: string) -> (string, string) { 321 | for r1, i in a { 322 | for r2 in b { 323 | if r1 == r2 do return a[:i], a[i + 1:] 324 | } 325 | } 326 | return a, "" 327 | } 328 | 329 | // underscores must be between 2 digits 330 | validate_underscores :: proc(r: rune, p: rune, is_last: bool) -> bool { 331 | if r != '_' do return true 332 | switch { 333 | case p == '_' : make_err(.Bad_Integer, "Double underscore mid number") 334 | case p == 0 : make_err(.Bad_Integer, "Underscore cannot be the first character in a number") 335 | case is_last : make_err(.Bad_Integer, "Underscore cannot be the last character in a number") 336 | case: return true 337 | } 338 | return false 339 | } 340 | 341 | // I split the number into three parts: main.fractionEexponent or mainEexponent 342 | main, fraction, exponent: string 343 | 344 | { 345 | exp1, exp2: string 346 | main, exp1 = split_by(number, "eE") 347 | if peek(1) == "." { 348 | fraction, exp2 = split_by(peek(2), "eE") 349 | 350 | if exp1 != "" && exp2 != "" { 351 | make_err(.Bad_Float, "A number cannot have 2 exponent parts! '1e5.7e6' is invalid") 352 | return false 353 | } 354 | } 355 | exponent = exp1 if exp1 != "" else exp2 356 | if at(exponent, 0) == '-' || at(exponent, 0) == '+' do exponent = exponent[1:] 357 | } 358 | 359 | // If a number starts with zero it must be followed by 'x', 'o', 'b' ir nothing 360 | base := 10 361 | if at(main, 0) == '0' { 362 | switch at(main, 1) { 363 | case 'x': base = 16; main = main[2:] 364 | case 'o': base = 8; main = main[2:] 365 | case 'b': base = 2; main = main[2:] 366 | case 0 : ; 367 | case: make_err(.Bad_Integer, "A number cannot start with '0'. Please use '0o1234' for octal") 368 | } 369 | } 370 | 371 | prev: rune 372 | 373 | prev = 0 374 | for r, i in main { 375 | if prev == 0 && !is_digit(r, base) do return false 376 | if err_if_not(is_digit(r, base) || r == '_', .Bad_Integer, "Unexpected character: '%v' in number", r) do return false 377 | if !validate_underscores(r, prev, i == len(main) - 1) do return false 378 | prev = r 379 | } 380 | 381 | prev = 0 382 | for r, i in fraction { 383 | if prev == 0 && !is_digit(r, base) do return false 384 | if err_if_not(is_digit(r, base) || r == '_', .Bad_Integer, "Unexpected character: '%v' in decimal part of number ", r) do return false 385 | if !validate_underscores(r, prev, i == len(fraction) - 1) do return false 386 | prev = r 387 | } 388 | 389 | prev = 0 390 | for r, i in exponent { 391 | if prev == 0 && !is_digit(r, base) do return false 392 | if err_if_not(is_digit(r, base) || r == '_', .Bad_Integer, "Unexpected character: '%v' in exponent part of number", r) do return false 393 | if !validate_underscores(r, prev, i == len(exponent) - 1) do return false 394 | prev = r 395 | } 396 | 397 | skip() 398 | if fraction != "" do skip(2) 399 | return true 400 | }//}}} 401 | 402 | validate_inline_list :: proc() -> bool { //{{{ 403 | if peek() != "[" do return false 404 | skip() // '[' 405 | 406 | last_was_comma: bool 407 | for { 408 | 409 | skip_newline() 410 | if peek() == "]" do break 411 | 412 | if !validate_expr() do return false 413 | 414 | skip_newline() 415 | if peek() == "]" do break 416 | 417 | if err_if_not(peek() == ",", .Missing_Comma, "Comma is missing between elements") do return false 418 | skip() // ',' 419 | skip_newline() 420 | if peek() == "," { 421 | make_err(.Double_Comma, "double comma found in an inline list.") 422 | return false 423 | } 424 | 425 | } 426 | 427 | return !err_if_not(next() == "]", .Missing_Bracket, "']' missing in inline array declaration") 428 | }//}}} 429 | 430 | validate_inline_table :: proc() -> bool { //{{{ 431 | if peek() != "{" do return false 432 | skip() // '{' 433 | 434 | for { 435 | skip_newline() 436 | if peek() == "}" do break 437 | 438 | if !validate_assign() do return false 439 | 440 | skip_newline() 441 | if peek() == "}" do break 442 | 443 | if err_if_not(peek() == ",", .Missing_Comma, "Comma is missing between elements") do return false 444 | skip() // ',' // you can have trailing commas in my inline tables, why not? 445 | skip_newline() 446 | if peek() == "," { 447 | make_err(.Double_Comma, "double comma found in an inline list.") 448 | return false 449 | } 450 | } 451 | 452 | return !err_if_not(next() == "}", .Missing_Bracket, "'}' missing in inline table declaration") 453 | }//}}} 454 | 455 | @(private="file") 456 | make_err :: proc(type: ErrorType, more_fmt: string, more_args: ..any) { 457 | g.err.type = type 458 | context.allocator = g.aloc 459 | b_reset(&g.err.more) 460 | b_printf(&g.err.more, more_fmt, ..more_args) 461 | } 462 | 463 | @(private="file") 464 | err_if_not :: proc(cond: bool, type: ErrorType, more_fmt: string, more_args: ..any) -> bool { 465 | if !cond do make_err(type, more_fmt, ..more_args) 466 | return !cond 467 | } 468 | --------------------------------------------------------------------------------