├── .gitignore
├── LICENSE
├── README.md
├── dates
    └── parser.odin
├── example.toml
├── main.odin
├── misc.odin
├── mod.pkg
├── parser.odin
├── tests
    └── test.odin
├── tokenizer.odin
├── toml.odin
└── validator.odin


/.gitignore:
--------------------------------------------------------------------------------
1 | testing
2 | compile.sh
3 | ols.json
4 | /toml_parser
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Ult1
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TOML parser
  2 | 
  3 | A TOML parser for odin-lang. 
  4 | 
  5 | # Example
  6 | 
  7 | ```Odin
  8 | import "toml"
  9 | import "toml/dates"
 10 | 
 11 | main :: proc() {
 12 |   using toml
 13 |   
 14 |   section, err1 := parse_file("toml/example.toml", context.temp_allocator)
 15 |   default, err2 := parse(#load("toml/example.toml"), "example.toml", context.temp_allocator)
 16 | 
 17 |   if print_error(err2) do return
 18 |   print_error(err1)
 19 | 
 20 |   print_table(section)
 21 |   
 22 |   inf := get_f64(section, "infinity") or_else get_f64_panic(default, "infinity")
 23 |   num := get(i64, section, "num") or_else 5
 24 | 
 25 |   str := get(string, section, "multiline_str") or_else "bad"
 26 | 
 27 |   date := get_date(section, "letsnot", "k", "l", "m", "n") or_else dates.Date {}
 28 |   
 29 |   list := get_panic(^List, section, "o", "p")
 30 | 
 31 | }
 32 | ```
 33 | 
 34 | # Installation 
 35 | 
 36 | Simply,
 37 | ```nix
 38 | cd your_project
 39 | git clone github.com/Up05/toml_parser toml
 40 | ```  
 41 | And `import "toml"`
 42 | 
 43 | # Design/Idiom idea
 44 | 
 45 | Although, you can simply use `or_else` or just `val, ok := get(...`. I propose, that one could: 
 46 |   1. load a configuration at runtime, by using `parse_file`
 47 |   2. load their configuration at compile time by using `parse_data(#load(same_file), "filename.toml")`
 48 |   3. first get a value from the runtime config by using `get` then, if need be, (via `or_else`) fallback to the compile-time config and use `get_panic`.
 49 | 
 50 | # Function reference
 51 | 
 52 | ## Errors
 53 | ```odin
 54 | format_error :: proc(err: Error, allocator := context.temp_allocator) -> (message: string, fatal: bool) 
 55 | ```
 56 | Format's the error and returns it as well as whether it was fatal or not.
 57 | 
 58 | ```odin
 59 | print_error :: proc(err: Error) -> (fatal: bool)
 60 | ```
 61 | Format's and prints the specified error to `stdout`. May use `format_error` to only get the error message.
 62 | 
 63 | ## Parsing
 64 | 
 65 | ```odin  
 66 | parse_file :: proc(filename: string, allocator := context.allocator) -> (section: ^Table, err: Error) 
 67 | ```
 68 | Parses the specified toml file. Returns the root table & an error, which can then be nicely printed with `print_error`.
 69 | 
 70 | ```odin  
 71 | parse_data :: proc(data: []u8, original_filename := "untitled data", allocator := context.allocator) -> (section: ^Table, err: Error)  
 72 | ```
 73 | Parses the given data. Is meant to be used with `#load(file)`. 
 74 |  
 75 | ```odin  
 76 | parse :: proc(data: string, original_file: string, allocator := context.allocator) -> (tokens: ^Table, err: Error) 
 77 | ```
 78 | Parses the TOML in a string. Underlying function called by `parse_data` and `parse_file`.
 79 | 
 80 | ## Getting the values
 81 | 
 82 | ```odin
 83 | get :: proc($T: typeid, section: ^Table, path: ..string) -> (val: T, ok: bool) // where T is in Type union
 84 | ```
 85 | Retrieves and type checks the value at path. **Careful, path is not specified by dots!**
 86 | Works on any table.
 87 | 
 88 | ```odin
 89 | get_panic :: proc($T: typeid, section: ^Table, path: ..string) -> T // where T is in Type union
 90 | ```
 91 | Retrieves and type checks the value at path. **Careful, path is not specified by dots!**
 92 | Works on any table. Crashes if not ok.
 93 | 
 94 | There are also `get_<type>` & `get_<type>_panic` functions for all possible types in the `Type` union.  
 95 | Here are the variants: `{ table, list, string, bool, i64, f64, date }`
 96 | 
 97 | ## Printing
 98 | 
 99 | Generally replaced by `fmt.print` and `fmt.printf("%#v\n", ...`.
100 | ```odin
101 | print_table :: proc(section: ^Table, level := 0)
102 | ```
103 | A while back Odin used to hang when printing a map pointer.  
104 | I'm pretty sure it does not anymore.
105 | 
106 | ```odin
107 | print_value :: proc(v: Type, level := 0) 
108 | ```
109 | 
110 | ## Freeing memory
111 | 
112 | ```odin
113 | deep_delete :: proc(type: Type, allocator := context.allocator) -> (err: runtime.Allocator_Error)
114 | ```
115 | Recursively frees parser's output
116 | 
117 | ```odin
118 | delete_error :: proc(err: ^Error)
119 | ```
120 | Simply, frees the error.  
121 | *Filename is not freed, because the parser only slices it.*
122 | 
123 | ## Testing (internal)
124 | 
125 | ```odin
126 | @private
127 | main :: proc()
128 | ```
129 | This is here for `toml-test`. It takes in the TOML from `stdin`, parses it, marshal's it to JSON and prints the JSON to stdout. 
130 | Unless there was an error, in which case the program does not print anything and only exits with exit code `1`. 
131 | 
132 | *Some tests fail because of how odin formats floats & non-printable characters, cba to fix that and it doesn't matter.*
133 | 
134 | # Files
135 | 
136 | ```sh
137 | main.odin       # an internal file for testing
138 | toml.odin       # the main user-facing file
139 | misc.odin       # a couple miscellaneous functions
140 | 
141 | tokenizer.odin  # rips text apart by space and special symbols (string -> [] string)
142 | validator.odin  # checks whether given TOML is valid or not    ([] string -> Error?)
143 | parser.odin     # parses tokens into the recursive Type union  ([] string -> Type)
144 | 
145 | tests/          # odin core:testing tests (currently, there is 1...)
146 | dates/          # my small RFC3339 date parsing library
147 | mod.pkg         # package info for the odin package website (can't find it right now...)
148 | ```
149 | 
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/dates/parser.odin:
--------------------------------------------------------------------------------
  1 | package dates
  2 | 
  3 | import "core:fmt"
  4 | import "core:math"
  5 | import "core:slice"
  6 | import "core:strconv"
  7 | import "core:strings"
  8 | 
  9 | DateError :: enum {
 10 |     NONE,
 11 | 
 12 |     // Where parser REALIZED something is wrong!
 13 |     FAILED_AT_YEAR,
 14 |     FAILED_AT_MONTH,
 15 |     FAILED_AT_DAY,
 16 |     FAILED_AT_HOUR,
 17 |     FAILED_AT_MINUTE,
 18 |     FAILED_AT_SECOND,
 19 |     FAILED_AT_OFFSET_HOUR,
 20 |     FAILED_AT_OFFSET_MINUTE,
 21 |     YEAR_OUT_OF_BOUNDS,
 22 |     MONTH_OUT_OF_BOUNDS, // 01-12
 23 |     DAY_OUT_OF_BOUNDS,
 24 |     HOUR_OUT_OF_BOUNDS,
 25 |     MINUTE_OUT_OF_BOUNDS,
 26 |     SECOND_OUT_OF_BOUNDS,
 27 |     OFFSET_HOUR_OUT_OF_BOUNDS,
 28 |     OFFSET_MINUTE_OUT_OF_BOUNDS,
 29 |     FAILED_AT_TIME_SEPERATOR, // character seperating full-date & full-time isn't in variable "time_separators"
 30 | }
 31 | 
 32 | // may be overwritten. Set to empty array to accept any time seperator
 33 | time_separators: []string = {"t", "T", " "}
 34 | offset_separators: []string = {"z", "Z", "+", "-"}
 35 | 
 36 | Date :: struct {
 37 |     second:           f32,
 38 |     is_date_local:    bool,
 39 |     is_time_only :    bool,
 40 |     is_date_only :    bool,
 41 | 
 42 |     year, month, day: int,
 43 |     hour, minute:     int,
 44 |     offset_hour:      int,
 45 |     offset_minute:    int,
 46 | }
 47 | 
 48 | from_string :: proc(date: string) -> (out: Date, err: DateError) {
 49 |     date := date
 50 | 
 51 |     out.is_date_only = true
 52 |     out.is_time_only = true
 53 | 
 54 |     ok: bool
 55 | 
 56 |     // ##############################  D A T E  ##############################
 57 | 
 58 |     // Because there has to be a leading zero
 59 |     if date[4:5] == "-" {
 60 |         out.is_time_only = false
 61 |         out.year = parse_int2(date[0:4], .FAILED_AT_YEAR) or_return
 62 | 
 63 |         out.month = parse_int2(date[5:7], .FAILED_AT_MONTH) or_return
 64 |         if !between(out.month, 1, 12) do return out, .MONTH_OUT_OF_BOUNDS
 65 | 
 66 |         out.day = parse_int2(date[8:10], .FAILED_AT_DAY) or_return
 67 |         if !between(out.day, 1, days_in_month(out.year, out.month)) do return out, .DAY_OUT_OF_BOUNDS
 68 | 
 69 |         if len(date) > 10 {
 70 |             if !(len(time_separators) == 0 ||
 71 |                    slice.any_of(time_separators, date[10:11])) {
 72 |                 return out, .FAILED_AT_TIME_SEPERATOR
 73 |             }
 74 | 
 75 |             date = date[11:]
 76 |         }
 77 |     }
 78 | 
 79 |     // ##############################  T I M E  ##############################
 80 | 
 81 |     if len(date) >= 8 && date[2] == ':' {
 82 |         out.is_date_only = false
 83 |         out.hour = parse_int2(date[0:2], .FAILED_AT_HOUR) or_return
 84 |         if !between(out.hour, 0, 23) do return out, .HOUR_OUT_OF_BOUNDS
 85 | 
 86 |         out.minute = parse_int2(date[3:5], .FAILED_AT_MINUTE) or_return
 87 |         if !between(out.minute, 0, 59) do return out, .MINUTE_OUT_OF_BOUNDS
 88 | 
 89 |         date = date[6:] // because of "-"
 90 |         offset, _ := strings.index_multi(date, offset_separators)
 91 | 
 92 |         out.second, ok = strconv.parse_f32(
 93 |             date[:offset if offset != -1 else len(date)],
 94 |         )
 95 |         if !ok do return out, .FAILED_AT_SECOND
 96 |         // seconds \in [00, 60], because of leap seconds 
 97 |         if !between(int(out.second), 0, 60) do return out, .SECOND_OUT_OF_BOUNDS
 98 | 
 99 |         if offset != -1 {
100 |             date = date[offset:]
101 |             // fine to have lowercase here, because it wouldn't have been detected otherwise
102 |             if strings.to_lower(date[:1]) == "z" do return
103 | 
104 |             out.offset_hour = parse_int2(
105 |                 date[1:3],
106 |                 .FAILED_AT_OFFSET_HOUR,
107 |             ) or_return
108 |             if !between(out.offset_hour, 0, 23) do return out, .OFFSET_HOUR_OUT_OF_BOUNDS
109 | 
110 |             out.offset_minute = parse_int2(
111 |                 date[4:6],
112 |                 .FAILED_AT_OFFSET_MINUTE,
113 |             ) or_return
114 |             if !between(out.offset_minute, 0, 59) do return out, .OFFSET_MINUTE_OUT_OF_BOUNDS
115 | 
116 |             if date[:1] == "-" {
117 |                 out.offset_hour *= -1
118 |                 out.offset_minute *= -1
119 |             }
120 | 
121 |         } else {
122 |             out.is_date_local = true
123 |         }
124 |     }
125 | 
126 |     return
127 | }
128 | 
129 | to_string :: proc(
130 |     date: Date,
131 |     time_sep := ' ',
132 | ) -> (
133 |     out: string,
134 |     err: DateError,
135 | ) {
136 |     date := date
137 | 
138 |     {
139 |         using date
140 |         if !between(year, 0, 9999) do return "", .YEAR_OUT_OF_BOUNDS
141 |         if !between(month, 0, 12) do return "", .MONTH_OUT_OF_BOUNDS
142 |         if !between(day, 0, days_in_month(year, month)) do return "", .DAY_OUT_OF_BOUNDS
143 |         if !between(hour, 0, 23) do return "", .HOUR_OUT_OF_BOUNDS
144 |         if !between(minute, 0, 59) do return "", .MINUTE_OUT_OF_BOUNDS
145 |         if !between(int(second), 0, 60) do return "", .SECOND_OUT_OF_BOUNDS
146 |         if !between(offset_hour, -23, 23) do return "", .OFFSET_HOUR_OUT_OF_BOUNDS
147 |         if !between(offset_minute, -59, 59) do return "", .OFFSET_MINUTE_OUT_OF_BOUNDS
148 |     }
149 | 
150 |     b: strings.Builder
151 |     strings.builder_init_len_cap(&b, 0, 25)
152 | 
153 |     fmt.sbprintf(&b, "%04d-%02d-%02d", date.year, date.month, date.day)
154 |     strings.write_rune(&b, time_sep)
155 |     fmt.sbprintf(&b, "%02d:%02d:%02.0f", date.hour, date.minute, date.second)
156 | 
157 |     if date.offset_hour == 0 && date.offset_minute == 0 do strings.write_rune(&b, 'Z')
158 |     else {
159 |         if date.offset_minute != 0 && sign(date.offset_hour) != sign(date.offset_minute) {
160 |             date.offset_hour += sign(date.offset_minute)
161 |             date.offset_minute = 60 - abs(date.offset_minute) // sign doesn't matter, because later prints the abs of date.offset_minute
162 |             fmt.printf("DATE PARSER WARNING: signs of your Date.offset_hour & Date.offset_minute do not match! " + "Given dates will be safely converted, but may be unexpected. " + "Go to line: %d in: %s to find out more.\n", #line - 5, #file)
163 |         }
164 | 
165 |         if date.offset_hour < 0 do strings.write_rune(&b, '-')
166 |         else do strings.write_rune(&b, '+')
167 | 
168 |         fmt.sbprintf(&b, "%02d:%02d", abs(date.offset_hour), abs(date.offset_minute))
169 |     }
170 | 
171 |     return strings.to_string(b), .NONE
172 | }
173 | 
174 | partial_date_to_string :: proc(date: Date, time_sep := ' ',) -> (out: string, err: DateError) {
175 |     date := date
176 |     {
177 |         using date
178 |         if !between(year, 0, 9999) do return "", .YEAR_OUT_OF_BOUNDS
179 |         if !between(month, 0, 12) do return "", .MONTH_OUT_OF_BOUNDS
180 |         if !between(day, 0, days_in_month(year, month)) do return "", .DAY_OUT_OF_BOUNDS
181 |         if !between(hour, 0, 23) do return "", .HOUR_OUT_OF_BOUNDS
182 |         if !between(minute, 0, 59) do return "", .MINUTE_OUT_OF_BOUNDS
183 |         if !between(int(second), 0, 60) do return "", .SECOND_OUT_OF_BOUNDS
184 |         if !between(offset_hour, -23, 23) do return "", .OFFSET_HOUR_OUT_OF_BOUNDS
185 |         if !between(offset_minute, -59, 59) do return "", .OFFSET_MINUTE_OUT_OF_BOUNDS
186 |     }
187 | 
188 |     b: strings.Builder
189 |     strings.builder_init_len_cap(&b, 0, 25)
190 | 
191 | 	_, frac := math.modf_f32(date.second)
192 | 	timefmt := "%02d:%02d:%02.0f"
193 | 	if frac > 0  do timefmt = "%02d:%02d:%06.03f"
194 | 
195 |     if date.is_date_only {
196 |         fmt.sbprintf(&b, "%04d-%02d-%02d", date.year, date.month, date.day)
197 |         return strings.to_string(b), .NONE
198 |     }
199 |     if date.is_time_only {
200 |         fmt.sbprintf(&b, timefmt, date.hour, date.minute, date.second)
201 |         return strings.to_string(b), .NONE
202 |     }
203 | 
204 |     fmt.sbprintf(&b, "%04d-%02d-%02d", date.year, date.month, date.day)
205 |     strings.write_rune(&b, time_sep)
206 | 	fmt.sbprintf(&b, timefmt, date.hour, date.minute, date.second)
207 | 
208 |     if date.is_date_local do return strings.to_string(b), .NONE
209 | 
210 |     if date.offset_hour == 0 && date.offset_minute == 0 do strings.write_rune(&b, 'Z')
211 |     else {
212 |         if date.offset_minute != 0 && sign(date.offset_hour) != sign(date.offset_minute) {
213 |             date.offset_hour += sign(date.offset_minute)
214 |             date.offset_minute = 60 - abs(date.offset_minute) // sign doesn't matter, because later prints the abs of date.offset_minute
215 |             fmt.printf("DATE PARSER WARNING: signs of your Date.offset_hour & Date.offset_minute do not match! " + "Given dates will be safely converted, but may be unexpected. " + "Go to line: %d in: %s to find out more.\n", #line - 5, #file)
216 |         }
217 | 
218 |         if date.offset_hour < 0 do strings.write_rune(&b, '-')
219 |         else do strings.write_rune(&b, '+')
220 | 
221 |         fmt.sbprintf(&b, "%02d:%02d", abs(date.offset_hour), abs(date.offset_minute))
222 |     }
223 | 
224 |     return strings.to_string(b), .NONE
225 | }
226 | 
227 | 
228 | // I don't need to test for both the date & the time
229 | is_date_lax :: proc(date: string) -> bool {
230 |     is_date := true
231 |     is_time := true
232 | 
233 |     if len(date) >= 10 {
234 |         is_date &= are_all_numbers(date[0:4])
235 |         is_date &= are_all_numbers(date[5:7])
236 |         is_date &= are_all_numbers(date[8:10])
237 |         is_date &= date[4] == '-' && date[7] == '-'
238 |     } else do is_date = false
239 | 
240 |     if !is_date && len(date) >= 8 {
241 |         is_time &= are_all_numbers(date[0:2])
242 |         is_time &= are_all_numbers(date[3:5])
243 |         is_time &= are_all_numbers(date[6:8])
244 |         is_time &= date[2] == ':' && date[5] == ':'
245 |     } else do is_time = false
246 | 
247 |     return is_date || is_time
248 | }
249 | 
250 | @(private)
251 | are_all_numbers :: proc(s: string) -> (out: bool) {
252 |     out = true
253 |     for r in s {
254 |         if r < '0' || r > '9' do out = false
255 |     }
256 |     return
257 | }
258 | 
259 | // odin doesn't have a sign_int???
260 | @(private)
261 | sign :: proc(#any_int a: int) -> int {
262 |     return -1 if a < 0 else 1 if a > 0 else 0
263 | }
264 | 
265 | // kind of a misnomer, but whatever.
266 | @(private)
267 | parse_int :: proc(num: string) -> (int, bool) {
268 |     num, ok := strconv.parse_uint(num, 10)
269 |     return int(num), ok
270 | }
271 | 
272 | @(private)
273 | parse_int2 :: proc(num: string, potential: DateError) -> (int, DateError) {
274 |     num, ok := strconv.parse_uint(num, 10)
275 |     return int(num), nil if ok else potential
276 | }
277 | 
278 | @(private)
279 | between :: proc(a, lo, hi: int) -> bool {
280 |     return a >= lo && a <= hi
281 | }
282 | 
283 | 
284 | @(private)
285 | days_in_month :: proc(year: int, month: int) -> int {
286 |     if slice.any_of([]int{1, 3, 5, 7, 8, 10, 12}, month) do return 31
287 |     if slice.any_of([]int{4, 6, 9, 11}, month) do return 30
288 |     // just February left
289 |     if leap_year(year) do return 29
290 |     return 28
291 | }
292 | 
293 | @(private)
294 | leap_year :: proc(year: int) -> bool {
295 |     return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)
296 | }
297 | 


--------------------------------------------------------------------------------
/example.toml:
--------------------------------------------------------------------------------
 1 | # example.toml
 2 | # example.toml
 3 | 
 4 | integer = 5
 5 | num = 123.5
 6 | infinity = inf
 7 | # unicode chars need chcp 65001 & stuff
 8 | multiline_str = """
 9 | a
10 | b c \u2230
11 | """
12 | a.b = "dotted.tables"
13 | c = { d = "inline tables" }
14 | [e.f]
15 | g = "useful tables"
16 | [[h.i]]
17 | j = "lists of tables"
18 | [letsnot]
19 | k.l.m.n = 2024-06-07T20:00:00.12+02:00
20 | 
21 | [o]
22 | p = [ 1, [ 2, 3 ], 4]
23 | 


--------------------------------------------------------------------------------
/main.odin:
--------------------------------------------------------------------------------
  1 | package toml
  2 | 
  3 | /*
  4 | 
  5 |     This file is for testing. It should be ignored by library users.
  6 |  
  7 |     For contributors:
  8 |     I have integrated these tests:
  9 |         https://github.com/toml-lang/toml-test
 10 | 
 11 |     To get them please download/build release 1.5.0:
 12 |         $ go install github.com/toml-lang/toml-test/cmd/toml-test@v1.5.0 
 13 | 
 14 |         To do so with shell: 
 15 |             $ export $GOBIN="/tmp"
 16 |             $ go install github.com/toml-lang/toml-test/cmd/toml-test@v1.5.0
 17 |             $ odin build . 
 18 |             $ /tmp/toml-test <the built executable>
 19 | 
 20 |     Also, big thanks to tgolsson for suggesting this project
 21 |     and arp242 for actually making the tests!
 22 | 
 23 | */
 24 | 
 25 | import "core:fmt"
 26 | import "core:os"
 27 | import "core:encoding/json"
 28 | import "dates"
 29 | 
 30 | // import "core:testing"
 31 | 
 32 | exit :: os.exit
 33 | 
 34 | @(private)
 35 | main :: proc() {
 36 | 
 37 |     // parse_file("testing/current.toml")
 38 | 
 39 |     data := make([] u8, 16 * 1024 * 1024)
 40 |     count, err_read := os.read(os.stdin, data)
 41 |     assert(err_read == nil)
 42 | 
 43 |     table, err := parse(string(data[:count]), "<stdin>")
 44 | 
 45 |     if err.type != .None { print_error(err); os.exit(1) }
 46 | 
 47 |     idk,  ok := marshal(table)
 48 |     if !ok do return
 49 |     json, _ := json.marshal(idk)
 50 |     logln(string(json))
 51 | 
 52 |     // for the valid/key/quoted-unicode test
 53 |     // for k, v in table^ {
 54 |     //     logln(k, "=", v)
 55 |     // }
 56 | 
 57 |     deep_delete(table)
 58 |     // delete_error(&err)
 59 | 
 60 | }
 61 | 
 62 | // @test
 63 | // memory_test :: proc(t: ^testing.T) {
 64 | //     data := `
 65 | //     [["valid/key/dotted-4.toml-20".arr]]
 66 | //     ["valid/key/dotted-4.toml-20".arr.a]
 67 | //     `
 68 | // 
 69 | //     table, err := parse(string(data), "<f>")
 70 | // 
 71 | //     if any_of("--print-errors", ..os.args) && err.type != .None { logln(err); print_error(err) }
 72 | //     if err.type != .None do os.exit(1) 
 73 | // 
 74 | //     logln(deep_delete(table))
 75 | //     delete_error(&err)
 76 | // }
 77 | 
 78 | // Dunno what to really call this...
 79 | @(private="file")
 80 | TestingType :: struct {
 81 |     type: string,
 82 |     value: union {
 83 |         map [string] HelpMePlease,
 84 |         [] HelpMePlease,
 85 |         string,
 86 |         bool,
 87 |         i64,
 88 |         f64,
 89 |     }
 90 | }
 91 | 
 92 | @(private="file")
 93 | HelpMePlease :: union {
 94 |     TestingType,
 95 |     map [string] HelpMePlease,
 96 |     [] HelpMePlease
 97 | }
 98 | 
 99 | @(private="file")
100 | marshal :: proc(input: Type) -> (result: HelpMePlease, ok: bool) {
101 |     output: TestingType
102 |     
103 |     switch value in input {
104 |     case nil: assert(false)
105 |     case ^List:
106 |         if value == nil do return result, false
107 |         out := make([] HelpMePlease, len(value))
108 |         for v, i in value { out[i] = marshal(v) or_continue }
109 |         return out, true
110 | 
111 |     case ^Table:
112 |         if value == nil do return result, false
113 |         out := make(map [string] HelpMePlease)
114 |         for k, v in value { out[k] = marshal(v) or_continue }
115 |         return out, true
116 | 
117 |     case string: output = { type = "string",  value = value };
118 |     case bool:   output = { type = "bool",    value = fmt.aprint(value) };
119 |     case i64:    output = { type = "integer", value = fmt.aprint(value) };
120 |     case f64:    output = { type = "float",   value = fmt.aprint(value) };
121 | 
122 |     case dates.Date: 
123 |         result, err := dates.partial_date_to_string(date = value, time_sep = 'T')
124 |         if err != .NONE do os.exit(1) // I shouldn't do this like that...
125 |         
126 |         date := value
127 |         if date.is_time_only {
128 |             output.type = "time-local"
129 |         } else if date.is_date_only {
130 |             output.type = "date-local"
131 |         } else if date.is_date_local {
132 |             output.type = "datetime-local"
133 |         } else {
134 |             output.type = "datetime"
135 |         }
136 |         output.value = result
137 |     }
138 | 
139 |     return output, true
140 | }
141 | 
142 | 


--------------------------------------------------------------------------------
/misc.odin:
--------------------------------------------------------------------------------
  1 | package toml
  2 | 
  3 | import "core:fmt"
  4 | import "core:strings"
  5 | import "core:strconv"
  6 | import "core:unicode/utf8"
  7 | 
  8 | @private
  9 | find_newline :: proc(raw: string) -> (bytes: int, runes: int) {
 10 |     for r, i in raw {
 11 |         defer runes += 1
 12 |         if r == '\r' || r == '\n' do return i, runes
 13 |     }
 14 |     return -1, -1
 15 | }
 16 | 
 17 | @private
 18 | shorten_string :: proc(s: string, limit: int, or_newline := true) -> string {
 19 |     min :: proc(a, b: int) -> int {
 20 |         return a if a < b else b
 21 |     }
 22 | 
 23 |     newline, _ := find_newline(s) // add another line if you are using (..MAC OS 9) here... fuck it.
 24 |     if newline == -1 do newline = len(s)
 25 | 
 26 |     if limit < len(s) || newline < len(s) {
 27 |         return fmt.aprint(s[:min(limit, newline)], "...")
 28 |     }
 29 | 
 30 |     return s
 31 | }
 32 | 
 33 | // when literal is true, function JUST returns str
 34 | @private
 35 | cleanup_backslashes :: proc(str: string, literal := false) -> (result: string, err: Error) {
 36 |     str := strings.clone(str)
 37 |     if literal do return str, err
 38 | 
 39 |     set_err :: proc(err: ^Error, type: ErrorType, more_fmt: string, more_args: ..any) {
 40 |         err.type = type
 41 |         b_printf(&err.more, more_fmt, ..more_args)
 42 |     }
 43 | 
 44 |     using strings
 45 |     b: Builder
 46 |     // defer builder_destroy(&b) // don't need to, shouldn't even free the original str here
 47 | 
 48 |     to_skip := 0
 49 | 
 50 |     last: rune
 51 |     escaped: bool
 52 |     for r, i in str {
 53 | 
 54 |         if to_skip > 0 {
 55 |             to_skip -= 1
 56 |             continue
 57 |         }
 58 |         // basically, if last == '\\' {
 59 |         if escaped {
 60 |             escaped = false
 61 | 
 62 |             switch r {
 63 |             case 'u': // for \uXXXX
 64 |                 if len(str) < i + 5 {
 65 |                     set_err(&err, .Bad_Unicode_Char, "'\\u' does most have hex 4 digits after it in string:", str)
 66 |                     return str, err
 67 |                 }
 68 | 
 69 |                 code, ok := strconv.parse_u64(str[i + 1: i + 5], 16)
 70 |                 buf, bytes := toml_ucs_to_utf8(code)
 71 | 
 72 |                 if bytes == -1 {
 73 |                     set_err(&err, .Bad_Unicode_Char, "'%s'", str[i + 1:i + 5])
 74 |                     return str, err
 75 |                 }
 76 | 
 77 |                 parsed_rune, _ := utf8.decode_rune_in_bytes(buf[:bytes])
 78 |                 
 79 |                 write_rune(&b, parsed_rune)
 80 |                 to_skip = 4
 81 | 
 82 |             case 'U': // for \UXXXXXXXX
 83 |                 if len(str) < i + 9 {
 84 |                     set_err(&err, .Bad_Unicode_Char, "'\\U' does most have hex 8 digits after it in string:", str)
 85 |                     return str, err
 86 |                 }
 87 |                 code, ok := strconv.parse_u64(str[i + 1:i + 9], 16)
 88 |                 buf, bytes := toml_ucs_to_utf8(code)
 89 | 
 90 |                 if bytes == -1 {
 91 |                     set_err(&err, .Bad_Unicode_Char, "'%s'", str[i + 1:i + 9])
 92 |                     return str, err
 93 |                 }
 94 |                 
 95 |                 parsed_rune, _ := utf8.decode_rune_in_bytes(buf[:bytes])
 96 |                 
 97 |                 write_rune(&b, parsed_rune)
 98 |                 to_skip = 8
 99 | 
100 |             case 'x':
101 |                 set_err(&err, .Bad_Unicode_Char, "\\xXX is not in the spec, you can just use \\u00XX instead.")
102 |                 return str, err
103 | 
104 |             case 'n' : write_byte(&b, '\n')
105 |             case 'r' : write_byte(&b, '\r')
106 |             case 't' : write_byte(&b, '\t')
107 |             case 'b' : write_byte(&b, '\b')
108 |             case 'f' : write_byte(&b, '\f')
109 |             case '\\': write_byte(&b, '\\')
110 |             case '"' : write_byte(&b, '"')
111 |             case '\'': write_byte(&b, '\'')
112 |             case ' ', '\t', '\r', '\n': 
113 |                 // if (r == ' ' || r == '\t') && len(str) > i + 1 && (str[i + 1] != '\n' || str[i + 1] != '\r') {
114 |                 //     err.type = .Bad_Unicode_Char
115 |                 //     err.more = "cannot escape space in the middle of the line."
116 |                 // }
117 |                 // if len(str) == i + 1 {
118 |                 //     err.type = .Bad_Unicode_Char
119 |                 //     err.more = "Cannot escape space/new line when it is the last character"
120 |                 // }
121 |                 
122 |                 // Fun thing for multiline line string line escaping.
123 |                 for r in str[i + 1:] {
124 |                     if r == ' ' || r == '\t' || r == '\r' || r == '\n' do to_skip += 1
125 |                     else do break
126 |                 }
127 |             case: 
128 |                 set_err(&err, .Bad_Unicode_Char, "Unexpected escape sequence found."); 
129 |                 return str, err
130 |             }
131 |         } else if r != '\\' {
132 |             write_rune(&b, r)
133 |         } else {
134 |             escaped = true
135 |         }
136 | 
137 |         last = r
138 |     }
139 |     delete_string(str)
140 |     defer b_destroy(&b) // you can't free a builder that has been cast to string
141 |     return strings.clone(to_string(b)), err
142 | }
143 | 
144 | @private
145 | any_of :: proc(a: $T, B: ..T) -> bool {
146 |     for b in B do if a == b do return true
147 |     return false
148 | }
149 | 
150 | @private
151 | is_space :: proc(r: u8) -> bool {
152 |     SPACE : [4] u8 = { ' ', '\r', '\n', '\t' }
153 |     return r == SPACE[0] || r == SPACE[1] || r == SPACE[2] || r == SPACE[3]
154 |     // Nudge nudge
155 | } 
156 | 
157 | @private
158 | is_special :: proc(r: u8) -> bool {
159 |     SPECIAL : [8] u8 = { '=', ',',  '.',  '[', ']', '{', '}', 0 }
160 |     return  r == SPECIAL[0] || r == SPECIAL[1] || r == SPECIAL[2] || r == SPECIAL[3] ||
161 |             r == SPECIAL[4] || r == SPECIAL[5] || r == SPECIAL[6] || r == SPECIAL[7]
162 |     // Shove shove
163 | } 
164 | 
165 | @private
166 | is_digit :: proc(r: rune, base: int) -> bool {
167 |     switch base {
168 |     case 16: return (r >= '0' && r <= '9') || (r >= 'A' && r <= 'F') || (r >= 'a' && r <= 'f')
169 |     case 10: return r >= '0' && r <= '9'
170 |     case 8:  return r >= '0' && r <= '7'
171 |     case 2:  return r >= '0' && r <= '1'
172 |     }
173 |     assert(false, "Only bases: 16, 10, 8 and 2 are supported in TOML")
174 |     return false
175 | }
176 | 
177 | @private
178 | between_any :: proc(a: rune, b: ..rune) -> bool {
179 |     assert(len(b) % 2 == 0)
180 |     for i := 0; i < len(b); i += 2 {
181 |         if a >= b[i] && a <= b[i + 1] do return true
182 |     }
183 |     return false
184 | }
185 | 
186 | @(private)
187 | get_quote_count :: proc(a: string) -> int {
188 |     s := len(a)
189 |     if  s > 2 && 
190 |         ((a[:3] == "\"\"\"" && a[s-3:] == "\"\"\"" ) ||
191 |         (a[:3] == "'''" && a[s-3:] == "'''")) { return 3 }
192 | 
193 |     if  s > 0 && 
194 |         ((a[:1] == "\"" && a[s-1:] == "\"") ||
195 |         (a[:1] == "'" && a[s-1:] == "'")) { return 1 }
196 | 
197 |     return 0
198 | }
199 | 
200 | @(private)
201 | unquote :: proc(a: string, fluff: ..any) -> (result: string, err: Error) {
202 |     qcount := get_quote_count(a)
203 | 
204 |     if qcount == 3 {
205 |         first: rune
206 |         count: int
207 |         #reverse for r, i in a {
208 |             if i < 3 do break
209 |             if first == 0 do first = r
210 |             if r == first do count = count + 1
211 |             else if r == '\\' do count -= 1
212 |             else do break
213 |         }
214 |         if count != 3 && count % 3 == 0 {
215 |             err.type = .Bad_Value
216 |             b_write_string(&err.more, "The quote count in multiline string is divisible by 3. Lol, get fucked!")
217 |             return a, err
218 |         }
219 |     }
220 | 
221 |     unquoted := a[qcount:len(a) - qcount]
222 |     if len(unquoted) > 0 && unquoted[0] == '\n' do unquoted = unquoted[1:]
223 |     return cleanup_backslashes(unquoted, a[0] == '\'')
224 | }
225 | 
226 | @(private)
227 | starts_with :: proc(a, b: string) -> bool {
228 |     return len(a) >= len(b) && a[:len(b)] == b
229 | }
230 | 
231 | @(private)
232 | ends_with :: proc(a, b: string) -> bool {
233 |     return len(a) >= len(b) && a[len(a) - len(b):] == b
234 | }
235 | 
236 | // case-insensitive compare
237 | @private
238 | eq :: proc(a, b: string) -> bool {
239 |     if len(a) != len(b) do return false
240 |     #no_bounds_check for i in 0..<len(a) {
241 |         r1 := a[i]
242 |         r2 := b[i]
243 | 
244 |         A := r1 - 32*u8(r1 >= 'a' && r1 <= 'z')
245 |         B := r2 - 32*u8(r2 >= 'a' && r2 <= 'z')
246 |         if A != B do return false
247 |     }
248 |     return true
249 | }
250 | 
251 | @private
252 | is_list :: proc(t: Type) -> bool { 
253 |     _, is_list := t.(^List); 
254 |     return is_list
255 |     
256 | }
257 | 
258 | // // from: https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
259 | // is_rune_valid :: proc(r: rune) -> bool {
260 | //     // if !utf8.valid_rune(r) do return false
261 | // 
262 | //     s, n := utf8.encode_rune(r)
263 | // 
264 | //     if n == 1 {
265 | //         /* 0xxxxxxx */
266 | //         return true
267 | //     } else if n == 2 {
268 | //         /* 110XXXXx 10xxxxxx */
269 | //         if ((s[1] & 0xc0) != 0x80 ||
270 | //             (s[0] & 0xfe) == 0xc0) {                      /* overlong? */
271 | //             return true
272 | //         }
273 | //     } else if n == 3 {
274 | //         /* 1110XXXX 10Xxxxxx 10xxxxxx */
275 | //         if ((s[1] & 0xc0) != 0x80 ||
276 | //             (s[2] & 0xc0) != 0x80 ||
277 | //             (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) ||    /* overlong? */
278 | //             (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) ||    /* surrogate? */
279 | //             (s[0] == 0xef && s[1] == 0xbf &&
280 | //                 (s[2] & 0xfe) == 0xbe)) {                    /* U+FFFE or U+FFFF? */
281 | //             return true
282 | //         }
283 | //     } else if n == 4 {
284 | //         /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
285 | //         if ((s[1] & 0xc0) != 0x80 ||
286 | //             (s[2] & 0xc0) != 0x80 ||
287 | //             (s[3] & 0xc0) != 0x80 ||
288 | //             (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) ||      /* overlong? */
289 | //             (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) { /* > U+10FFFF? */
290 | //             return true
291 | //         }
292 | //     } else do return false
293 | // 
294 | //     return true
295 | // }
296 | 
297 | is_bare_rune_valid :: proc(r: rune) -> bool {
298 |     if r == '\n' || r == '\r' || r == '\t' do return true
299 |     return r >= 32
300 | }
301 | 
302 | 
303 | // Completely ripped from tomlc99:
304 | // https://github.com/cktan/tomlc99
305 | 
306 | /**
307 |  *	Convert a UCS char to utf8 code, and return it in buf.
308 |  *	Return #bytes used in buf to encode the char, or
309 |  *	-1 on error.
310 |  */
311 | toml_ucs_to_utf8 :: proc(code: u64) -> (buf: [6] u8, byte_count: int) {
312 |     /* http://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16
313 |      */
314 |     /* The UCS code values 0xd800–0xdfff (UTF-16 surrogates) as well
315 |      * as 0xfffe and 0xffff (UCS noncharacters) should not appear in
316 |      * conforming UTF-8 streams.
317 |      */
318 |     if (0xd800 <= code && code <= 0xdfff) do return buf, -1
319 |     // if (0xfffe <= code && code <= 0xffff) do return buf, -1
320 | 
321 |     /* 0x00000000 - 0x0000007F:
322 |         0xxxxxxx
323 |     */
324 |     if (code < 0) do return buf, -1;
325 |     if (code <= 0x7F) {
326 |         buf[0] = u8(code);
327 |         return buf, 1;
328 |     }
329 | 
330 |     /* 0x00000080 - 0x000007FF:
331 |        110xxxxx 10xxxxxx
332 |     */
333 |     if (code <= 0x000007FF) {
334 |         buf[0] = u8(0xc0 | (code >> 6));
335 |         buf[1] = u8(0x80 | (code & 0x3f));
336 |         return buf, 2;
337 |     }
338 | 
339 |     /* 0x00000800 - 0x0000FFFF:
340 |        1110xxxx 10xxxxxx 10xxxxxx
341 |     */
342 |     if (code <= 0x0000FFFF) {
343 |         buf[0] = u8(0xe0 | (code >> 12));
344 |         buf[1] = u8(0x80 | ((code >> 6) & 0x3f));
345 |         buf[2] = u8(0x80 | (code & 0x3f));
346 |         return buf, 3;
347 |     }
348 | 
349 |     /* 0x00010000 - 0x001FFFFF:
350 |        11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
351 |     */
352 |     if (code <= 0x001FFFFF) {
353 |         buf[0] = u8(0xf0 | (code >> 18));
354 |         buf[1] = u8(0x80 | ((code >> 12) & 0x3f));
355 |         buf[2] = u8(0x80 | ((code >> 6) & 0x3f));
356 |         buf[3] = u8(0x80 | (code & 0x3f));
357 |         return buf, 4;
358 |     }
359 | 
360 |     /* 0x00200000 - 0x03FFFFFF:
361 |        111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
362 |      */
363 |     if (code <= 0x03FFFFFF) {
364 |         buf[0] = u8(0xf8 | (code >> 24));
365 |         buf[1] = u8(0x80 | ((code >> 18) & 0x3f));
366 |         buf[2] = u8(0x80 | ((code >> 12) & 0x3f));
367 |         buf[3] = u8(0x80 | ((code >> 6) & 0x3f));
368 |         buf[4] = u8(0x80 | (code & 0x3f));
369 |         return buf, 5;
370 |     }
371 | 
372 |     /* 0x04000000 - 0x7FFFFFFF:
373 |        1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
374 |      */
375 |     if (code <= 0x7FFFFFFF) {
376 |         buf[0] = u8(0xfc | (code >> 30));
377 |         buf[1] = u8(0x80 | ((code >> 24) & 0x3f));
378 |         buf[2] = u8(0x80 | ((code >> 18) & 0x3f));
379 |         buf[3] = u8(0x80 | ((code >> 12) & 0x3f));
380 |         buf[4] = u8(0x80 | ((code >> 6) & 0x3f));
381 |         buf[5] = u8(0x80 | (code & 0x3f));
382 |         return buf, 6;
383 |     }
384 | 
385 |     return buf, -1;
386 | }
387 | 
388 | 
389 | 
390 | 


--------------------------------------------------------------------------------
/mod.pkg:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"version": "1.0.0",
 3 | 	"description": "TOML file parser",
 4 | 	"url": "https://github.com/Up05/toml_parser",
 5 | 	"readme": "README.md",
 6 | 	"license": "MIT",
 7 | 	"keywords": ["TOML", "parser"],
 8 | 	"dependencies": { Up05/odin-RFC-3339-date-parser }
 9 | }
10 | 


--------------------------------------------------------------------------------
/parser.odin:
--------------------------------------------------------------------------------
  1 | package toml
  2 | 
  3 | import "core:strconv"
  4 | import "core:fmt"
  5 | import "core:strings"
  6 | import rt "base:runtime"
  7 | 
  8 | import "dates"
  9 | 
 10 | Table :: map [string] Type
 11 | List  :: [dynamic] Type
 12 | 
 13 | Type :: union {
 14 |     ^Table,
 15 |     ^List,
 16 |     string,
 17 |     bool,
 18 |     i64,
 19 |     f64,
 20 |     dates.Date,
 21 | }
 22 | 
 23 | @private
 24 | GlobalData :: struct {
 25 |     toks    : [] string, // all token list
 26 |     curr    : int,       // the current token index
 27 |     err     : Error,     // current error
 28 |     root    : ^Table,    // the root/global table
 29 |     section : ^Table,    // TOML's `[section]` table
 30 |     this    : ^Table,    // TOML's local p.a.t.h or { table = {} } table
 31 |     reps    : int,       // for halting upon infinite loops
 32 |     aloc    : rt.Allocator // probably useless, honestly...
 33 | }
 34 | 
 35 | @private // is only allocated when parse() and validate() are working.
 36 | g: ^GlobalData 
 37 | 
 38 | 
 39 | @private // gets a token or an empty string.
 40 | peek :: proc(o := 0) -> string {
 41 |     if g.curr + o >= len(g.toks) do return ""
 42 |     if g.reps >= 1000 { // <-- solution to the halting problem!
 43 |         if g.toks[g.curr + o] == "\n" {
 44 |             make_err(.Bad_New_Line,  "The parser is stuck on an out-of-place new line.")
 45 |         } else {
 46 |             g.err.type = .Parser_Is_Stuck
 47 |             b_printf(&g.err.more, "Token: '%s' at index: %d", g.toks[g.curr + o], g.curr + o)
 48 |         }
 49 |         return ""
 50 |     }
 51 |     g.reps += 1
 52 | 
 53 |     return g.toks[g.curr + o]
 54 | }
 55 | 
 56 | 
 57 | // skips by one or more tokens, the parser & validator CANNOT go back, 
 58 | @private // since my solution to the halting problem may not work then.
 59 | skip :: proc(o := 1) {
 60 |     assert(o >= 0)
 61 |     g.curr += o
 62 |     if o != 0 do g.reps = 0
 63 | }             
 64 | 
 65 | @private // returns the current token and skips to the next token.
 66 | next :: proc() -> string {
 67 |     defer skip()
 68 |     return peek()
 69 | }
 70 | 
 71 | parse :: proc(data: string, original_file: string, allocator := context.allocator) -> (tokens: ^Table, err: Error) { 
 72 |     context.allocator = allocator
 73 |     
 74 |     // === TOKENIZER ===
 75 |     raw_tokens, t_err := tokenize(data, file = original_file)
 76 |     defer delete_dynamic_array(raw_tokens)
 77 |     if t_err.type != .None do return nil, t_err
 78 |     
 79 |     // === VALIDATOR ===
 80 |     v_err := validate(raw_tokens[:], original_file, allocator)
 81 |     if v_err.type != .None do return tokens, v_err
 82 | 
 83 |     // === TEMP DATA ===
 84 |     tokens = new(Table)
 85 | 
 86 |     initial_data: GlobalData = {
 87 |         toks = raw_tokens[:],
 88 |         err  = { line = 1, file = original_file }, 
 89 | 
 90 |         root    = tokens,
 91 |         this    = tokens,
 92 |         section = tokens,
 93 | 
 94 |         aloc = allocator,
 95 |     }
 96 | 
 97 |     g = &initial_data
 98 |     defer g = nil
 99 | 
100 |     // === MAIN WORK ===
101 |     for peek() != "" {
102 |         if g.err.type != .None {
103 |             return nil, g.err
104 |         }
105 |         
106 |         if peek() == "\n" {
107 |             g.err.line += 1
108 |             skip()
109 |             continue
110 |         }
111 | 
112 |         parse_statement() 
113 |         g.this = g.section
114 |     }
115 |     
116 |     if g.err.type != .None {
117 |         return nil, g.err
118 |     }
119 |         
120 |     return
121 | }
122 | 
123 | // ==================== STATEMENTS ====================  
124 | 
125 | parse_statement :: proc() {
126 |     ok: bool
127 | 
128 |     ok = parse_section_list();  if ok do return
129 |     ok = parse_section();       if ok do return
130 |     ok = parse_assign();        if ok do return
131 | 
132 |     parse_expr() // skips orphaned expressions
133 | }
134 | 
135 | // This function is for dotted.paths (stops at.the.NAME)
136 | walk_down :: proc(parent: ^Table) {
137 | 
138 |     // ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
139 |     // ! This is intricate as fuck and I still don't         !
140 |     // ! really get how it works.                            !
141 |     // ! PLEASE RUN ALL TESTS IF YOU CHANGE THIS AT ALL.     !
142 |     // ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
143 | 
144 |     if peek(1) != "." do return 
145 | 
146 |     name, err := unquote(next())
147 |     g.err.type = err.type
148 |     g.err.more = err.more
149 |     if err.type != .None do return
150 |     skip() // '.'
151 |     
152 |     do_not_free: bool
153 |     defer if !do_not_free do delete_string(name)
154 | 
155 |     #partial switch value in parent[name] {
156 |     case nil: 
157 |         g.this = new(Table); 
158 |         parent[name] = g.this; 
159 |         do_not_free = true
160 | 
161 |     case ^Table:
162 |         g.this = value
163 | 
164 |     case ^List:
165 |         if len(value^) == 0 {
166 |             g.this = new(Table)
167 |             append(value, g.this)
168 | 
169 |         } else {
170 |             table, is_table := value[len(value^) - 1].(^Table)
171 |             if !is_table {
172 |                 make_err(.Key_Already_Exists, name)
173 |                 return
174 |             }
175 |             g.this = table
176 |         }
177 | 
178 |     case:
179 |         make_err(.Key_Already_Exists, name)
180 |         return
181 |     }
182 | 
183 |     walk_down(g.this)
184 | }
185 | 
186 | 
187 | parse_section_list :: proc() -> bool {
188 |     if peek(0) != "[" || peek(1) != "[" do return false
189 |     skip(2) // '[' '['
190 | 
191 |     g.this = g.root
192 |     g.section = g.root   
193 |     walk_down(g.root) 
194 | 
195 |     name, err := unquote(next()) // take care with ordering of this btw
196 |     g.err.type = err.type
197 |     g.err.more = err.more
198 |     if err.type != .None do return true
199 | 
200 |     list   : ^List
201 |     result := new(Table)
202 | 
203 |     if name not_in g.this {
204 |         list = new(List)
205 |         g.this[name] = list
206 | 
207 |     } else if !is_list(g.this[name]) {
208 |         make_err(.Key_Already_Exists, name)
209 |     } else {
210 |         list = g.this[name].(^List)
211 |     }
212 | 
213 |     append(list, result) 
214 | 
215 |     skip(2) // ']' ']'
216 |     g.section = result
217 |     return true
218 | }
219 | 
220 | // put() is only used in parse_section, so it's specialized
221 | // general version: commit 8910187045028ce13df3214e04ace6071ea89158
222 | put :: proc(parent: ^Table, key: string, value: ^Table) {
223 | 
224 |     // I simply admit that I do not understand how tables work...
225 |     // fuck this shit! [[a.b]]\n [a] is somehow valid..?
226 |     // I do not know what the hell is even that...
227 |     // The valid tests pass. That is what matters...
228 | 
229 |     #partial switch existing in parent[key] {
230 |     case ^Table:
231 |         for k, v in value { existing[k] = v }
232 |         delete_map(value^)
233 |         value^ = existing^
234 |     case ^List:
235 |         append(existing, value)
236 | 
237 |     case nil:
238 |         parent[key] = value
239 | 
240 |     case: 
241 |         make_err(.Key_Already_Exists, key)
242 |     }
243 | }
244 | 
245 | parse_section :: proc() -> bool {
246 |     if peek() != "[" do return false
247 |     skip() // '['
248 |     
249 |     g.this = g.root
250 |     g.section = g.root   
251 |     walk_down(g.root)
252 | 
253 |     name, err := unquote(next()) // take care with ordering of this btw
254 |     g.err.type = err.type
255 |     g.err.more = err.more
256 |     if err.type != .None do return true
257 | 
258 |     result := new(Table)
259 | 
260 |     put(g.this, name, result)
261 | 
262 |     skip() // ']'
263 |     g.this = result
264 |     g.section = g.this
265 |     return true
266 | }
267 | 
268 | parse_assign :: proc()  -> bool {
269 |     if peek(1) != "=" && peek(1) != "." do return false
270 | 
271 |     walk_down(g.this)
272 | 
273 |     key, err := unquote(peek())
274 |     g.err.type = err.type
275 |     g.err.more = err.more
276 |     if err.type != .None do return true
277 |     
278 |     if any_of(u8('\n'), ..transmute([] u8)peek()) {
279 |         make_err(.Bad_Name, "Keys cannot have raw new lines in them")
280 |         return true
281 |     }
282 | 
283 |     skip(2);
284 |     value := parse_expr()
285 |     
286 |     if key in g.this {
287 |         make_err(.Key_Already_Exists, key)
288 |     }
289 | 
290 |     g.this[key] = value
291 |     return true
292 | }
293 | 
294 | // ==================== EXPRESSIONS ====================  
295 | 
296 | 
297 | parse_expr :: proc() -> (result: Type) {
298 |     ok: bool
299 |     result, ok = parse_string(); if ok do return
300 |     result, ok = parse_bool();   if ok do return
301 |     result, ok = parse_date();   if ok do return
302 |     result, ok = parse_float();  if ok do return
303 |     result, ok = parse_int();    if ok do return
304 |     result, ok = parse_list();   if ok do return
305 |     result, ok = parse_table();  if ok do return
306 |     return
307 | }
308 | 
309 | parse_string :: proc() -> (result: string, ok: bool) {
310 |     if len(peek()) == 0 do return
311 |     if r := peek()[0]; !any_of(r, '"', '\'') do return 
312 |     str, err := unquote(next())
313 |     g.err.type = err.type
314 |     g.err.more = err.more
315 |     return str, true
316 | }
317 | 
318 | parse_bool :: proc() -> (result: bool, ok: bool) {
319 |     if peek() == "true"  { skip(); return true, true }
320 |     if peek() == "false" { skip(); return false, true }
321 |     return false, false
322 | }
323 | 
324 | parse_float :: proc() -> (result: f64, ok: bool) {
325 | 
326 |     has_e_but_not_x :: proc(s: string) -> bool {
327 |         if len(s) > 2       { if any_of(s[1], 'x', 'X') do return false }
328 |         #reverse for r in s { if any_of(r,    'e', 'E') do return true }
329 |         return false
330 |     }
331 | 
332 |     Infinity : f64 = 1e5000
333 |     NaN := transmute(f64) ( transmute(i64) Infinity | 1 ) 
334 | 
335 |     if len(peek()) == 4 {
336 |         if peek()[0] == '-' { if peek()[1:] == "inf" { skip(); return -Infinity, true } }
337 |         if peek()[0] == '+' { if peek()[1:] == "inf" { skip(); return +Infinity, true } }
338 |         if peek()[1:] == "nan" { skip(); return NaN, true }
339 |     }
340 | 
341 |     if peek() == "nan" { skip(); return NaN, true }
342 |     if peek() == "inf" { skip(); return Infinity, true }
343 | 
344 |     if peek(1) == "." {
345 |         number := fmt.aprint(peek(), ".", peek(2), sep = "")
346 |         cleaned, has_alloc := strings.remove_all(number, "_")
347 |         defer if has_alloc do delete(cleaned)
348 |         defer delete(number)
349 |         skip(3)
350 |         return strconv.parse_f64(cleaned)
351 | 
352 |     } else if has_e_but_not_x(peek()) {
353 |         cleaned, has_alloc := strings.remove_all(next(), "_")
354 |         defer if has_alloc do delete(cleaned)
355 |         return strconv.parse_f64(cleaned)
356 |     }
357 | 
358 |     // it's an int then
359 |     return 
360 | }
361 | 
362 | parse_int :: proc() -> (result: i64, ok: bool) { 
363 |     result, ok = strconv.parse_i64(peek())
364 |     if ok do skip()
365 |     return
366 | }
367 | 
368 | parse_date :: proc() -> (result: dates.Date, ok: bool) { 
369 |     using strings
370 |     if !dates.is_date_lax(peek(0)) do return
371 |     ok = true
372 | 
373 |     full: Builder
374 |     write_string(&full, next())
375 |     
376 |     // is date, time or both?
377 |     if dates.is_date_lax(peek()) {
378 |         write_rune(&full, ' ')
379 |         write_string(&full, next())
380 |     }
381 | 
382 |     if peek() == "." {
383 |         write_byte(&full, '.'); skip()
384 |         write_string(&full, next())
385 |     }
386 | 
387 |     err: dates.DateError
388 |     result, err = dates.from_string(to_string(full))
389 |     if err != .NONE {
390 |         make_err(.Bad_Date, "Received error: %v by parsing: '%s' as date\n", err, to_string(full))
391 |         return
392 |     }
393 | 
394 |     builder_destroy(&full)
395 |     return
396 | 
397 | }
398 | 
399 | parse_list :: proc() -> (result: ^List, ok: bool) { 
400 |     if peek() != "[" do return
401 |     skip() // '['
402 |     ok = true
403 |     
404 |     result = new(List)
405 | 
406 |     for !any_of(peek(), "]", "") {
407 | 
408 |         if peek() == "," { skip(); continue }
409 |         if peek() == "\n" { g.err.line += 1; skip(); continue }
410 |         
411 |         element := parse_expr()
412 |         append(result, element) 
413 |     }
414 |     
415 |     skip() // ']'
416 |     return
417 | }   
418 | 
419 | parse_table :: proc() -> (result: ^Table, ok: bool) { 
420 |     if peek() != "{" do return
421 |     skip() // '{'
422 |     ok = true
423 | 
424 |     result = new(Table)
425 | 
426 |     temp_this, temp_section := g.this, g.section
427 |     for !any_of(peek(), "}", "") {
428 |         
429 |         if peek() == "," { skip(); continue }
430 |         if peek() == "\n" { g.err.line += 1; skip(); continue }
431 | 
432 |         g.this, g.section = result, result
433 |         parse_assign()
434 |     }
435 |     g.this, g.section = temp_this, temp_section
436 | 
437 |     skip() // '}'
438 |     return
439 | }
440 | 
441 | @(private="file")
442 | make_err :: proc(type: ErrorType, more_fmt: string, more_args: ..any) {
443 |     g.err.type = type
444 |     context.allocator = g.aloc
445 |     b_reset(&g.err.more)
446 |     b_printf(&g.err.more, more_fmt, ..more_args)
447 | }
448 | 
449 | @(private="file")
450 | err_if_not :: proc(cond: bool, type: ErrorType, more_fmt: string, more_args: ..any) -> bool {
451 |     if !cond do make_err(type, more_fmt, ..more_args)
452 |     return !cond
453 | }
454 | 


--------------------------------------------------------------------------------
/tests/test.odin:
--------------------------------------------------------------------------------
 1 | package tests
 2 | 
 3 | import "core:testing"
 4 | 
 5 | import toml ".."
 6 | 
 7 | @(test)
 8 | nil_guard_get :: proc(t: ^testing.T) {
 9 | 	table: toml.Table
10 | 
11 | 	_, found := toml.get_bool(&table, "enabled")
12 | 	testing.expectf(t, found == false, "should not crash on nullptr exception not found")
13 | }
14 | 


--------------------------------------------------------------------------------
/tokenizer.odin:
--------------------------------------------------------------------------------
  1 | package toml
  2 | 
  3 | tokenize :: proc(raw: string, file := "<unknown file>") -> (tokens: [dynamic] string, err: Error) {
  4 |     err = { file = file, line = 1 }
  5 | 
  6 |     skip: int
  7 |     outer: for r, i in raw {
  8 |         this := raw[i:]
  9 | 
 10 |         switch { // by the way, do NOT use the 'fallthrough' keyword
 11 |         // makes more invalid tests pass
 12 |         case !is_bare_rune_valid(r):
 13 |             set_err(&err, .Bad_Unicode_Char, "'%v'", r)
 14 |             return
 15 | 
 16 |         // throws error if only a carriage return is found, I guess, fuck macOS ..9?
 17 |         case r == '\r' && len(raw) > i + 1 && raw[i + 1] != '\n':
 18 |             set_err(&err, .Bad_Unicode_Char, "carriage returns must be followed by new lines in TOML!")
 19 |             return
 20 | 
 21 |         // skips until the end of e.g.: string and comment (this replaces having state.)
 22 |         case skip > 0: 
 23 |             skip -= 1
 24 | 
 25 |         // unix new lines
 26 |         case r == '\n':
 27 |             append(&tokens, "\n")
 28 |             err.line += 1
 29 | 
 30 |         // windows new lines
 31 |         case starts_with(raw[i:], "\r\n"):
 32 |             append(&tokens, "\n")
 33 |             err.line += 1
 34 | 
 35 |         case is_space(this[0]):
 36 |             // do nothing
 37 | 
 38 |         case is_special(this[0]):
 39 |             append(&tokens, this[:1])
 40 | 
 41 |         // removes a comment (in one go)
 42 |         case r == '#':
 43 |             j, runes := find_newline(this)
 44 |             if j == -1 do return tokens, { }
 45 |             skip += runes - 1
 46 | 
 47 |         // ============ START OF STRINGS ============ 
 48 |         case starts_with(this, "\"\"\""):
 49 |             j, runes := find(this, "\"\"\"", 3)
 50 |             if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16))
 51 |             j2, runes2 := go_further(this[j + 3:], '"')
 52 |             j += j2; runes += runes2
 53 |             append(&tokens, this[:j + 3])
 54 |             skip += runes + 2
 55 | 
 56 |         case starts_with(this, "'''"):
 57 |             j, runes := find(this, "'''", 3, false)
 58 |             if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16))
 59 |             j2, runes2 := go_further(this[j + 3:], '\'')
 60 |             j += j2; runes += runes2
 61 |             append(&tokens, this[:j + 3])
 62 |             skip += runes + 2
 63 |         
 64 |         case r == '"':
 65 |             j, runes := find(this, "\"", 1)
 66 |             if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16))
 67 |             append(&tokens, this[:j + 1])
 68 |             skip += runes
 69 | 
 70 |         case r == '\'':
 71 |             j, runes := find(this, "'", 1, false)
 72 |             if j == -1 do return tokens, set_err(&err, .Missing_Quote, shorten_string(this, 16))
 73 |             append(&tokens, this[:j + 1])
 74 |             skip += runes
 75 |         // ============  END OF STRINGS  ============ 
 76 | 
 77 |         // tokenizes all leftover things (in one go)
 78 |         // this is "text", numbers & so on
 79 |         case:
 80 |             key := leftover(this)
 81 |             if len(key) == 0 do return tokens, set_err(&err, .None, shorten_string(this, 1))
 82 |             append(&tokens, key)
 83 |             skip += len(key) - 1
 84 |         }
 85 |     }
 86 | 
 87 |     return tokens, err
 88 | 
 89 | }
 90 | 
 91 | @(private="file")
 92 | leftover :: proc(raw: string) -> string {
 93 |     for _, i in raw {
 94 |         if is_space(raw[i]) || is_special(raw[i]) || raw[i] == '#' {
 95 |             return raw[:i]
 96 |         }
 97 |     }
 98 |     return ""
 99 | }
100 | 
101 | @(private="file")
102 | find :: proc(a: string, b: string, skip := 0, escape := true) -> (bytes: int, runes: int) {
103 |     escaped: bool
104 |     for r, i in a[skip:] {
105 |         defer runes += 1
106 |         if escaped do escaped = false
107 |         else if escape && r == '\\' do escaped = true
108 |         else if starts_with(a[i + skip:], b) do return i + skip, runes + skip 
109 |     }    // "+ skip" here is bad, it would be best to count runes up until "skip"
110 |     return -1, -1
111 | }
112 | 
113 | @(private="file")
114 | go_further :: proc(a: string, r1: rune) -> (bytes: int, runes: int) {
115 |     for r2, i in a {
116 |         if r1 != r2 do return i, runes
117 |         bytes  = i
118 |         runes += 1
119 |     }
120 |     return 
121 | }
122 | 
123 | @(private="file")
124 | set_err :: proc(err: ^Error, type: ErrorType, more_fmt: string, more_args: ..any) -> Error {
125 |     err.type = type
126 |     b_printf(&err.more, more_fmt, ..more_args)
127 |     return err^
128 | }
129 | 


--------------------------------------------------------------------------------
/toml.odin:
--------------------------------------------------------------------------------
  1 | package toml
  2 | 
  3 | import "core:strings"
  4 | import "core:os"
  5 | import "base:intrinsics"
  6 | import "base:runtime"
  7 | import "dates"
  8 | 
  9 | import "core:fmt"
 10 | 
 11 | log     :: fmt.print
 12 | logf    :: fmt.printf
 13 | logln   :: fmt.println
 14 | 
 15 | assertf :: fmt.assertf
 16 | 
 17 | Builder        :: strings.Builder
 18 | b_destroy      :: strings.builder_destroy
 19 | b_reset        :: strings.builder_reset
 20 | b_write_string :: strings.write_string
 21 | b_printf       :: fmt.sbprintf
 22 | 
 23 | // Parses the file. You can use print_error(err) for error messages.
 24 | parse_file :: proc(filename: string, allocator := context.allocator) -> (section: ^Table, err: Error) {
 25 |     context.allocator = allocator
 26 |     blob, ok_file_read := os.read_entire_file_from_filename(filename)
 27 |     if !ok_file_read {
 28 |         err.type = .Bad_File
 29 |         b_write_string(&err.more, filename)
 30 |         return nil, err
 31 |     }
 32 | 
 33 |     section, err = parse(string(blob), filename, allocator)
 34 |     delete_slice(blob)
 35 |     return
 36 | }
 37 | 
 38 | // This is made to be used with default, err := #load(filename). original_filename is only used for errors.
 39 | parse_data :: proc(data: []u8, original_filename := "untitled data", allocator := context.allocator) -> (section: ^Table, err: Error) {
 40 |     return parse(string(data), original_filename, allocator)
 41 | }
 42 | 
 43 | // Frees all of the memory allocated by the parser for a particular type
 44 | // It is recursive, so you can just give it the root Table.
 45 | deep_delete :: proc(type: Type, allocator := context.allocator) -> (err: runtime.Allocator_Error) {
 46 |     context.allocator = allocator
 47 |     #partial switch value in type {
 48 |     case ^List:
 49 |         if value == nil do break
 50 |         for &item in value { 
 51 |             err = deep_delete(item, allocator); 
 52 |             if err != .None do return
 53 |         }
 54 |         err = delete_dynamic_array(value^)
 55 |         if err == .None do free(value)
 56 | 
 57 |     case ^Table:
 58 |         if value == nil do break
 59 |         for k, &v in value { 
 60 |             err = delete_string(k); 
 61 |             if err != .None do return 
 62 |             err = deep_delete(v, allocator); 
 63 |             if err != .None do return 
 64 |         }
 65 |         err = delete_map(value^)
 66 |         if err == .None do free(value)
 67 | 
 68 |     case string:
 69 |         err = delete_string(value)
 70 |     }
 71 |     return
 72 | }
 73 | 
 74 | // Retrieves and type checks the value at path. The last element of path is the actual key.
 75 | // section may be any Table.
 76 | get :: proc($T: typeid, section: ^Table, path: ..string) -> (val: T, ok: bool)
 77 |     where intrinsics.type_is_variant_of(Type, T)
 78 | {
 79 |     assert(len(path) > 0, "You must specify at least one path str in toml.fetch()!")
 80 | 	if section == nil {
 81 | 		return val, false
 82 | 	}
 83 | 
 84 |     section := section
 85 |     for dir in path[:len(path) - 1] {
 86 |         if dir in section {
 87 |             section, ok = section[dir].(^Table)
 88 |             if !ok do return val, false
 89 |         } else do return val, false
 90 |     }
 91 |     last := path[len(path) - 1]
 92 |     if last in section do return section[last].(T)
 93 |     else do return val, false
 94 | }
 95 | 
 96 | // Also retrieves and typechecks the value at path, but if something goes wrong, it crashes the program.
 97 | get_panic :: proc($T: typeid, section: ^Table, path: ..string) -> T
 98 |     where intrinsics.type_is_variant_of(Type, T)
 99 | {
100 |     assert(len(path) > 0, "You must specify at least one path str in toml.fetch_panic()!")
101 |     section := section
102 |     for dir in path[:len(path) - 1] {
103 |         assertf(dir in section, "Missing key: '%s' in table '%v'!", path, section^)
104 |         section = section[dir].(^Table)
105 |     }
106 |     last := path[len(path) - 1]
107 |     assertf(last in section, "Missing key: '%s' in table '%v'!", last, section^)
108 |     return section[last].(T)
109 | }
110 | 
111 | // Currently(2024-06-__), Odin hangs if you simply fmt.print Table
112 | print_table :: proc(section: ^Table, level := 0) {
113 |     log("{ ")
114 |     i := 0
115 |     for k, v in section {
116 |         log(k, "= ") 
117 |         print_value(v, level)
118 |         if i != len(section) - 1 do log(", ")
119 |         else do log(" ")
120 |         i += 1
121 |     }
122 |     log("}")
123 |     if level == 0 do logln()
124 | }
125 | 
126 | @(private="file")
127 | print_value :: proc(v: Type, level := 0) {
128 |     #partial switch t in v {
129 |     case ^Table:
130 |         print_table(t, level + 1)
131 |     case ^[dynamic] Type:
132 |         log("[ ")
133 |         for e, i in t {
134 |             print_value(e, level)
135 |             if i != len(t) - 1 do log(", ")
136 |             else do log(" ")
137 |         }
138 |         log("]")
139 |     case string:
140 |         logf("%q", v)
141 |     case:
142 |         log(v)
143 |     }
144 | }
145 | 
146 | // Here lies the code for LSP:
147 | get_i64    :: proc(section: ^Table, path: ..string) -> 
148 |             (val: i64, ok: bool) { return get(i64, section, ..path) }
149 | get_f64    :: proc(section: ^Table, path: ..string) -> 
150 |             (val: f64, ok: bool) { return get(f64, section, ..path) }
151 | get_bool   :: proc(section: ^Table, path: ..string) -> 
152 |             (val: bool, ok: bool) { return get(bool, section, ..path) }
153 | get_string :: proc(section: ^Table, path: ..string) -> 
154 |             (val: string, ok: bool) { return get(string, section, ..path) }
155 | get_date   :: proc(section: ^Table, path: ..string) -> 
156 |             (val: dates.Date, ok: bool) { return get(dates.Date, section, ..path) }
157 | get_list   :: proc(section: ^Table, path: ..string) -> 
158 |             (val: ^List, ok: bool) { return get(^List, section, ..path) } 
159 | get_table  :: proc(section: ^Table, path: ..string) -> 
160 |             (val: ^Table, ok: bool) { return get(^Table, section, ..path) }
161 | 
162 | get_i64_panic    :: proc(section: ^Table, path: ..string) -> 
163 |             i64 { return get_panic(i64, section, ..path) }
164 | get_f64_panic    :: proc(section: ^Table, path: ..string) -> 
165 |             f64 { return get_panic(f64, section, ..path) }
166 | get_bool_panic   :: proc(section: ^Table, path: ..string) -> 
167 |             bool { return get_panic(bool, section, ..path) }
168 | get_string_panic :: proc(section: ^Table, path: ..string) -> 
169 |             string { return get_panic(string, section, ..path) }
170 | get_date_panic   :: proc(section: ^Table, path: ..string) -> 
171 |             dates.Date { return get_panic(dates.Date, section, ..path) }
172 | get_list_panic   :: proc(section: ^Table, path: ..string) -> 
173 |             ^List { return get_panic(^List, section, ..path) } 
174 | get_table_panic  :: proc(section: ^Table, path: ..string) -> 
175 |             ^Table { return get_panic(^Table, section, ..path) }
176 | 
177 | 


--------------------------------------------------------------------------------
/validator.odin:
--------------------------------------------------------------------------------
  1 | package toml
  2 | 
  3 | import "base:runtime"
  4 | import "core:fmt"
  5 | 
  6 | ErrorType :: enum {
  7 |     None,
  8 | 
  9 |     Bad_Date,
 10 |     Bad_File,
 11 |     Bad_Float,
 12 |     Bad_Integer,
 13 |     Bad_Name,
 14 |     Bad_New_Line,
 15 |     Bad_Unicode_Char,
 16 |     Bad_Value,
 17 | 
 18 |     Missing_Bracket,
 19 |     Missing_Comma,
 20 |     Missing_Key,
 21 |     Missing_Newline,
 22 |     Missing_Quote,
 23 |     Missing_Value,
 24 | 
 25 |     Double_Comma,
 26 |     Expected_Equals,
 27 |     Key_Already_Exists,
 28 |     Parser_Is_Stuck,
 29 |     Unexpected_Token,
 30 | }
 31 | 
 32 | Error :: struct {
 33 |     type: ErrorType,
 34 |     line: int,    
 35 |     file: string,
 36 |     more: Builder,
 37 | }
 38 | 
 39 | // The filename is not freed, since it is only sliced 
 40 | delete_error :: proc(err: ^Error) {
 41 |     if err.type != .None do b_destroy(&err.more)
 42 | }
 43 | 
 44 | // This may also be a warning!
 45 | print_error :: proc(err: Error, allocator := context.allocator) -> (fatal: bool) {
 46 |     message: string
 47 |     message, fatal = format_error(err)
 48 |     if message != "" {
 49 |         logf("[TOML ERROR] %s", message) 
 50 |         delete(message, allocator)
 51 |     }
 52 |     return fatal
 53 | }
 54 | 
 55 | // The message is allocated and should be freed after use.
 56 | format_error :: proc(err: Error, allocator := context.allocator) -> (message: string, fatal: bool) {
 57 |     descriptions : [ErrorType] string = {
 58 |         .None               = "",
 59 |         .Bad_Date           = "Failed to parse a date",
 60 |         .Bad_File           = "Toml parser could not read the given file",
 61 |         .Bad_Float          = "Failed to parse a floating-point number (may be invalid value)",
 62 |         .Bad_Integer        = "Failed to parse an interger",
 63 |         .Bad_Name           = "Bad key/table name found before, use quotes, or only 'A-Za-z0-9_-'",
 64 |         .Bad_New_Line       = "New line is out of place",
 65 |         .Bad_Unicode_Char   = "Found an invalid unicode character in string",
 66 |         .Bad_Value          = "Bad value found after '='",
 67 |         .Double_Comma       = "Lists must have exactly 1 comma after each element (except trailing commas are optional)",
 68 |         .Expected_Equals    = "Expected '=' after assignment of a key",
 69 |         .Key_Already_Exists = "That key/section already exists",
 70 |         .Missing_Bracket    = "A bracket is missing (one of: '[', '{', '}', ']')",
 71 |         .Missing_Comma      = "A comma is missing",
 72 |         .Missing_Key        = "Expected key before '='",
 73 |         .Missing_Newline    = "A new line is missing between two key-value pairs",
 74 |         .Missing_Quote      = "Missing a quote",
 75 |         .Missing_Value      = "Expected a value after '='",
 76 |         .Parser_Is_Stuck    = "Parser has halted due to being in an infinite loop",
 77 |         .Unexpected_Token   = "Found a token that should not be there",
 78 |     }
 79 | 
 80 |     return fmt.aprintf("%s:%d %s! %s\n", err.file, err.line + 1, descriptions[err.type], err.more.buf[:]), true
 81 | }
 82 | 
 83 | // Skips all consecutive new lines
 84 | // new lines should not be skipped everywhere
 85 | // that's why this is not inside of the peek() procedure.
 86 | skip_newline :: proc() -> (ok: bool) { ok = peek() == "\n"; for peek() == "\n" { g.err.line += 1; skip() }; return }
 87 | 
 88 | validate :: proc(raw_tokens: [] string, file: string, allocator := context.allocator) -> Error {
 89 | 
 90 |     initial_data: GlobalData = {
 91 |         toks = raw_tokens,
 92 |         err  = { line = 1, file = file },
 93 |         aloc = allocator,
 94 |     }
 95 | 
 96 |     snapshot := g
 97 |     g = &initial_data
 98 |     defer g = snapshot
 99 | 
100 |     for peek() != "" {
101 |         if !validate_stmt() {
102 |             make_err(.Unexpected_Token, "Could not validate the (assumed to be) statement: %s", peek())
103 |         }
104 |         if g.err.type != .None do break
105 |     }
106 | 
107 |     err := g.err
108 |     return err
109 | }
110 | 
111 | // '||' operator has short-circuiting in Odin, so I use this to chain functions.
112 | validate_stmt :: proc() -> bool {
113 |     return skip_newline()   ||   (validate_array() || validate_table() || validate_assign())   &&    
114 | 
115 |            !err_if_not(peek() == "" || peek() == "\n", .Missing_Newline, "Found a missing new line between statements.") 
116 | }
117 | 
118 | // array of tables: `[[item]]` at the start of lines
119 | validate_array :: proc() -> bool {
120 |     if peek(0) != "[" || peek(1) != "[" do return false
121 |     #no_bounds_check {
122 |         if err_if_not(peek(0)[1] == '[', .Missing_Bracket, "In section array both brackets must follow one another! '[[' not '[ ['") do return false
123 |     }
124 |     
125 |     skip(2) // '[' '['
126 |     validate_path()
127 | 
128 |     #no_bounds_check {
129 |         if peek(0) == "]" && peek(1) == "]" && err_if_not(peek(0)[1] == ']', .Missing_Bracket, "In section array both brackets must follow one another! ']]' not '] ]'") do return false
130 |     }
131 |     if err_if_not(next() == "]", .Missing_Bracket, "']' missing in section array declaration") do return false   
132 |     if err_if_not(next() == "]", .Missing_Bracket, "']' missing in section array declaration") do return false  
133 | 
134 |     return true
135 | }
136 | 
137 | // tables: `[object]` at the start of lines
138 | validate_table :: proc() -> bool {
139 |     if peek(0) != "[" do return false
140 |     
141 |     skip() // '['
142 |     validate_path()
143 |     return !err_if_not(next() == "]", .Missing_Bracket, "']' missing in section declaration")   
144 | }
145 | 
146 | // key = value
147 | validate_assign :: proc() -> bool {
148 |     if peek(1) != "=" && peek(1) != "." do return false
149 | 
150 |     if !validate_path() do return false
151 |     if err_if_not(peek() == "=", .Expected_Equals, "Keys must be followed by '=' and then the value! Instead got: %s", peek()) do return false
152 |     skip() // '='
153 |     return validate_expr()
154 | }
155 | 
156 | // there.are.dotted.paths.in.toml   each "directory" is supposed to be an object, last depends on the context.
157 | // for example: in statement [[a.b]] a is a Table, b is a List of Table(s)
158 | validate_path :: proc() -> bool {//{{{
159 |     validate_name :: proc() -> bool {
160 |         skip()
161 |         return true
162 |     }
163 | 
164 |     for peek(1) == "." {
165 |         if peek(0) == "\n" || peek(2) == "\n" {
166 |             make_err(.Bad_New_Line, "paths.of.keys must be on the same line")
167 |             return false
168 |         }
169 | 
170 |         if !validate_name() {
171 |             make_err(.Bad_Name, "key in path cannot have this name: '%s'", peek())
172 |             return false
173 |         }
174 |         skip()
175 |     }
176 | 
177 |     if !validate_name() {
178 |         make_err(.Bad_Name, "key in path cannot have this name: '%s'", peek())
179 |         return false
180 |     }
181 |     
182 |     return true
183 | }//}}}
184 | 
185 | // Order matters. There can be expressions without statements (See: last line of validate_assign()).
186 | validate_expr :: proc() -> bool {
187 |     return validate_string()       || 
188 |            validate_bool()         || 
189 |            validate_date()         || 
190 |            validate_inline_list()  || 
191 |            validate_inline_table() ||
192 |            validate_number() 
193 | }
194 | 
195 | validate_string :: proc() -> bool {//{{{
196 |     validate_quotes :: proc() -> bool {
197 |         PATTERNS := [] string { "\"\"\"", "'''", "\"", "\'", }
198 |         for p in PATTERNS {
199 |             if starts_with(peek(), p) {
200 |                 if err_if_not(ends_with(peek(), p), .Missing_Quote, "string '%s' is missing one or more quotes", peek()) do return false
201 |             }
202 |         }
203 |         skip()
204 |         return true
205 |     }
206 | 
207 |     if len(peek()) == 0 do return false
208 |     if r := peek()[0]; !any_of(r, '"', '\'') do return false 
209 | 
210 |     return validate_quotes() 
211 |     // this should be done in the tokenizer & cleanup_backslashes() (it isn't):  || validate_escapes() || validate_codepoints()
212 | }//}}}
213 | 
214 | validate_bool :: proc() -> bool {  //{{{
215 |     if eq(peek(), "yes") do make_err(.Bad_Value, "'Yes' is not a valid expression in TOML, please use 'true'!")
216 |     if eq(peek(), "no")  do make_err(.Bad_Value, "'No' is not a valid expression in TOML, please use 'false'!")
217 | 
218 |     // eq is case-insensitive compare, while '==' operator is case-sensitive
219 |     if !eq(peek(), "false") && !eq(peek(), "true") do return false
220 |     
221 |     defer skip()
222 |     return !err_if_not(peek() == "false" || peek() == "true", .Bad_Value, "booleans must be lowercase")
223 | }//}}}
224 | 
225 | validate_date :: proc() -> (ok: bool) {  //{{{
226 |     is_proper_date :: proc(str: string) -> bool {
227 |         // I hope, LLVM can do something with this...
228 |         return len(str) > 9 &&
229 |             str[0] >= '0' && str[0] <= '9' &&
230 |             str[1] >= '0' && str[1] <= '9' &&
231 |             str[2] >= '0' && str[2] <= '9' &&
232 |             str[3] >= '0' && str[3] <= '9' &&
233 |             str[4] == '-' &&      
234 |             str[5] >= '0' && str[5] <= '9' &&
235 |             str[6] >= '0' && str[6] <= '9' &&
236 |             str[7] == '-' &&      
237 |             str[8] >= '0' && str[8] <= '9' &&
238 |             str[9] >= '0' && str[9] <= '9'
239 |     }
240 | 
241 |     is_proper_time :: proc(str: string) -> bool {
242 |         return len(str) > 7 &&
243 |             str[0] >= '0' && str[0] <= '9' &&
244 |             str[1] >= '0' && str[1] <= '9' &&
245 |             str[2] == ':' &&      
246 |             str[3] >= '0' && str[3] <= '9' &&
247 |             str[4] >= '0' && str[4] <= '9' &&
248 |             str[5] == ':' &&      
249 |             str[6] >= '0' && str[6] <= '9' &&
250 |             str[7] >= '0' && str[7] <= '9'
251 |     }
252 | 
253 |     validate_time :: proc(str: string) -> bool {
254 |         if err_if_not(is_proper_time(str), .Bad_Date, "The date: '%s' is not valid, please use rfc 3339 (e.g.: 1234-12-12, or 60:45:30+02:00)", peek()) do return false
255 |         
256 |         offset := str[8:] if len(str) > 8 else ""
257 | 
258 |         // because of dotted.keys, 'start' '.' 'end' are different tokens.
259 |         if peek(1) == "." {
260 |             for r, i in peek(2) {
261 |                 if r == '-' || r == '+' {
262 |                     offset = peek(2)[i:]
263 |                     break
264 |                 }
265 |                 if err_if_not(is_digit(r, 10) || r == 'Z' || r == 'z', .Bad_Date, "Bad millisecond count in the date.") do return false
266 |             }
267 |             skip(2)
268 |         } 
269 |         
270 |         if offset == "" do return true
271 | 
272 |         if offset[0] == '+' || offset[0] == '-' {
273 |             s := offset[1:]
274 |             return len(str) > 4 &&
275 |                 s[0] >= '0' && s[0] <= '9' &&
276 |                 s[1] >= '0' && s[1] <= '9' &&
277 |                 s[2] == ':' &&
278 |                 s[3] >= '0' && s[3] <= '9' &&
279 |                 s[4] >= '0' && s[4] <= '9'
280 |         } 
281 |         return true // 'Z' and 'z' are unnecessary in TOML 
282 |     }
283 |      
284 |     // Dates will necessarily have - as their 5th symbol: "0123-00-00"
285 |     if len(peek()) > 4 && peek()[4] == '-' {
286 |         err_if_not(is_proper_date(peek()), .Bad_Date, "The date: '%s' is not valid, please use rfc 3339 (e.g.: 1234-12-12, or 60:45:30+02:00)", peek())
287 |         
288 |         // time can be seperated either by { 't', 'T' or ' ' }, ' ' is split by tokenizer
289 |         if len(peek()) > 11 && (peek()[10] == 'T' || peek()[10] == 't') {
290 |             if !validate_time(peek()[11:]) do return false
291 |         }
292 |         next()
293 |         ok = true
294 |     }
295 |     
296 |     // Time can be either without date or split from it by whitespace. 
297 |     // This handles both scenarios
298 |     if len(peek()) > 2 && peek()[2] == ':' {
299 |         validate_time(peek())
300 |         next()
301 |         ok = true
302 |     }
303 | 
304 |     return ok
305 | }//}}}
306 | 
307 | // Good luck!
308 | validate_number :: proc() -> bool {//{{{
309 |     at :: proc(s: string, i: int) -> rune { for r, j in s do if i == j do return r; return 0 }
310 |     
311 |     number := peek()
312 |     if at(number, 0) == '+' || at(number, 0) == '-' do number = number[1:] 
313 | 
314 |     if eq(number, "nan") || eq(number, "inf") {
315 |         err_if_not(number == "nan" || number == "inf", .Bad_Float, "NaN and Inf must be fully lowercase in TOML: `nan` and `inf`! (I don't know why). Your's is: '%s'", peek())
316 |         skip()
317 |         return true
318 |     }
319 |     
320 |     split_by :: proc(a: string, b: string) -> (string, string) {
321 |         for r1, i in a {
322 |             for r2 in b {
323 |                if r1 == r2 do return a[:i], a[i + 1:]
324 |             }
325 |         }
326 |         return a, ""
327 |     }
328 |     
329 |     // underscores must be between 2 digits
330 |     validate_underscores :: proc(r: rune, p: rune, is_last: bool) -> bool {
331 |         if r != '_' do return true
332 |         switch {
333 |         case p == '_' : make_err(.Bad_Integer, "Double underscore mid number")
334 |         case p == 0   : make_err(.Bad_Integer, "Underscore cannot be the first character in a number")
335 |         case is_last  : make_err(.Bad_Integer, "Underscore cannot be the last character in a number")
336 |         case: return true
337 |         }
338 |         return false
339 |     }
340 |     
341 |     // I split the number into three parts:  main.fractionEexponent or mainEexponent 
342 |     main, fraction, exponent: string
343 |     
344 |     {
345 |         exp1, exp2: string
346 |         main, exp1 = split_by(number, "eE")
347 |         if peek(1) == "." {
348 |             fraction, exp2 = split_by(peek(2), "eE")
349 | 
350 |             if exp1 != "" && exp2 != "" {
351 |                 make_err(.Bad_Float, "A number cannot have 2 exponent parts! '1e5.7e6' is invalid")
352 |                 return false
353 |             }
354 |         }
355 |         exponent = exp1 if exp1 != "" else exp2
356 |         if at(exponent, 0) == '-' || at(exponent, 0) == '+' do exponent = exponent[1:] 
357 |     }
358 |     
359 |     // If a number starts with zero it must be followed by 'x', 'o', 'b' ir nothing
360 |     base := 10
361 |     if at(main, 0) == '0' {
362 |         switch at(main, 1) {
363 |         case 'x': base = 16; main = main[2:]
364 |         case 'o': base =  8; main = main[2:]
365 |         case 'b': base =  2; main = main[2:]
366 |         case  0 : ;
367 |         case: make_err(.Bad_Integer, "A number cannot start with '0'. Please use '0o1234' for octal")
368 |         }
369 |     }
370 | 
371 |     prev: rune
372 | 
373 |     prev = 0 
374 |     for r, i in main {
375 |         if prev == 0 && !is_digit(r, base) do return false
376 |         if err_if_not(is_digit(r, base) || r == '_', .Bad_Integer, "Unexpected character: '%v' in number", r) do return false
377 |         if !validate_underscores(r, prev, i == len(main) - 1) do return false
378 |         prev = r
379 |     }
380 | 
381 |     prev = 0
382 |     for r, i in fraction {
383 |         if prev == 0 && !is_digit(r, base) do return false
384 |         if err_if_not(is_digit(r, base) || r == '_', .Bad_Integer, "Unexpected character: '%v' in decimal part of number ", r) do return false
385 |         if !validate_underscores(r, prev, i == len(fraction) - 1) do return false
386 |         prev = r
387 |     }
388 |     
389 |     prev = 0
390 |     for r, i in exponent {
391 |         if prev == 0 && !is_digit(r, base) do return false
392 |         if err_if_not(is_digit(r, base) || r == '_', .Bad_Integer, "Unexpected character: '%v' in exponent part of number", r) do return false
393 |         if !validate_underscores(r, prev, i == len(exponent) - 1) do return false
394 |         prev = r
395 |     }
396 |     
397 |     skip()
398 |     if fraction != "" do skip(2)
399 |     return true
400 | }//}}}
401 | 
402 | validate_inline_list :: proc() -> bool { //{{{
403 |     if peek() != "[" do return false
404 |     skip() // '['
405 | 
406 |     last_was_comma: bool
407 |     for {
408 | 
409 |         skip_newline()
410 |         if peek() == "]" do break
411 | 
412 |         if !validate_expr() do return false
413 | 
414 |         skip_newline()
415 |         if peek() == "]" do break
416 | 
417 |         if err_if_not(peek() == ",", .Missing_Comma, "Comma is missing between elements") do return false
418 |         skip() // ','
419 |         skip_newline()
420 |         if peek() == "," {
421 |             make_err(.Double_Comma, "double comma found in an inline list.")
422 |             return false
423 |         }
424 |         
425 |     }
426 |     
427 |     return !err_if_not(next() == "]", .Missing_Bracket, "']' missing in inline array declaration")
428 | }//}}}
429 | 
430 | validate_inline_table :: proc() -> bool { //{{{
431 |     if peek() != "{" do return false
432 |     skip() // '{'
433 |     
434 |     for {
435 |         skip_newline()
436 |         if peek() == "}" do break
437 | 
438 |         if !validate_assign() do return false
439 | 
440 |         skip_newline()
441 |         if peek() == "}" do break
442 |         
443 |         if err_if_not(peek() == ",", .Missing_Comma, "Comma is missing between elements") do return false
444 |         skip() // ','  // you can have trailing commas in my inline tables, why not?
445 |         skip_newline()
446 |         if peek() == "," {
447 |             make_err(.Double_Comma, "double comma found in an inline list.")
448 |             return false
449 |         }
450 |     }
451 | 
452 |     return !err_if_not(next() == "}", .Missing_Bracket, "'}' missing in inline table declaration")
453 | }//}}}
454 | 
455 | @(private="file")
456 | make_err :: proc(type: ErrorType, more_fmt: string, more_args: ..any) {
457 |     g.err.type = type
458 |     context.allocator = g.aloc
459 |     b_reset(&g.err.more)
460 |     b_printf(&g.err.more, more_fmt, ..more_args)
461 | }
462 | 
463 | @(private="file")
464 | err_if_not :: proc(cond: bool, type: ErrorType, more_fmt: string, more_args: ..any) -> bool {
465 |     if !cond do make_err(type, more_fmt, ..more_args)
466 |     return !cond
467 | }
468 | 


--------------------------------------------------------------------------------