├── LICENSE ├── module.jai └── readme.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Raphael Luba 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /module.jai: -------------------------------------------------------------------------------- 1 | // Parses the given csv string into an array of T 2 | // The get_member function is called once for each column. 3 | // It gets passed column title, index and get_member_data and should return the name of the member of T that the column should be parsed into. 4 | // @ToDo: Use macro instead of get_member function? 5 | csv_parse :: (csv_data: string, $T: Type, get_member_data: $U, get_member: (string, int, U) -> string, delimiter : u8 = #char ",", parse_integer := parse_integer_column, parse_float := parse_float_column) -> [..] T, success: bool { 6 | remaining_data := csv_data; 7 | results: [..] T; 8 | 9 | info := type_info(T); 10 | column_members: [..] *Type_Info_Struct_Member; 11 | defer array_free(column_members); 12 | 13 | last := false; 14 | success: bool; 15 | while !last { 16 | name: string = ---; 17 | name, success, last = csv_consume_col(*remaining_data, delimiter); 18 | if !success { 19 | log_error("Could not read column % of first line", column_members.count + 1); 20 | return results, false; 21 | } 22 | member_name := get_member(name, column_members.count, get_member_data); 23 | member: *Type_Info_Struct_Member; 24 | if member_name { 25 | member = get_field(info, member_name); 26 | if !member { 27 | log_error("Type % does not contain a member named \"%\"", T, member_name); 28 | return results, false; 29 | } 30 | tag := member.type.type; 31 | if tag != .STRING && tag != .INTEGER && tag != .FLOAT { 32 | log_error("Member \"%\" has unsupported type %", member_name, member.type); 33 | return results, false; 34 | } 35 | } 36 | array_add(*column_members, member); 37 | } 38 | 39 | while true { 40 | if !remaining_data break; 41 | 42 | result: T; 43 | line_start := remaining_data; 44 | 45 | for column_members { 46 | // @Speed: Skip col if we’re not interested (instead of potentially un-quoting) 47 | value, success, last := csv_consume_col(*remaining_data, delimiter); 48 | should_be_last := (it_index == column_members.count - 1); 49 | if !success || last != should_be_last { 50 | log_error("Could not read column % of line %:\n%", it_index + 1, results.count + 2, line_start); 51 | return results, false; 52 | } 53 | if it { 54 | slot := (cast(*u8) *result) + it.offset_in_bytes; 55 | if it.type.type == { 56 | case .STRING; 57 | << (cast(*string) slot) = copy_string(value); 58 | case .FLOAT; 59 | success, float_value := parse_float(it.name, value); 60 | if !success { 61 | log_error("Could not parse column % of line % as float:\n%", it_index + 1, results.count + 2, line_start); 62 | return results, false; 63 | } 64 | 65 | if it.type.runtime_size == 4 { 66 | (<< cast(*float) slot) = cast(float) float_value; 67 | } else { 68 | assert(it.type.runtime_size == 8); 69 | (<< cast(*float64) slot) = float_value; 70 | } 71 | case .INTEGER; 72 | success, int_value := parse_integer(it.name, value); 73 | if !success { 74 | log_error("Could not parse column % of line % as integer:\n%", it_index + 1, results.count + 2, line_start); 75 | return results, false; 76 | } 77 | 78 | int_info := cast(*Type_Info_Integer) it.type; 79 | success = write_integer(it.name, int_info, slot, int_value); 80 | if !success then return results, false; 81 | case; 82 | // Should have been caught above 83 | assert(false); 84 | } 85 | } 86 | } 87 | array_add(*results, result); 88 | } 89 | 90 | return results, true; 91 | } 92 | 93 | csv_consume_col :: (remaining_data: *string, delimiter: u8) -> col: string, success: bool, last: bool { 94 | if !remaining_data.count return "", true, true; 95 | 96 | num_quotes := 0; 97 | delim_pos := -1; 98 | last := false; 99 | for index: 0..remaining_data.count-1 { 100 | c := (< string { 150 | needs_quotes := false; 151 | num_quotes := 0; 152 | for i: 0..value.count-1 { 153 | if value[i] == #char "\"" { 154 | num_quotes += 1; 155 | needs_quotes = true; 156 | } else if value[i] == #char "\n" || value[i] == delimiter { 157 | needs_quotes = true; 158 | } 159 | } 160 | 161 | if !needs_quotes return value; 162 | 163 | result := talloc_string(value.count + num_quotes + 2); 164 | result.data[0] = #char "\""; 165 | result.count = 1; 166 | for i: 0..value.count-1 { 167 | result.data[result.count] = value[i]; 168 | result.count += 1; 169 | if value[i] == #char "\"" { 170 | result.data[result.count] = #char "\""; 171 | result.count += 1; 172 | } 173 | } 174 | result.data[result.count] = #char "\""; 175 | result.count += 1; 176 | 177 | return result; 178 | } 179 | 180 | append_csv_escaped :: (builder: *String_Builder, value: string, delimiter : u8 = #char ",") { 181 | needs_quotes := false; 182 | num_quotes := 0; 183 | for i: 0..value.count-1 { 184 | if value[i] == #char "\"" { 185 | num_quotes += 1; 186 | needs_quotes = true; 187 | } else if value[i] == #char "\n" || value[i] == delimiter { 188 | needs_quotes = true; 189 | } 190 | } 191 | 192 | if !needs_quotes { 193 | append(builder, value); 194 | return; 195 | } 196 | 197 | append(builder, #char "\""); 198 | for i: 0..value.count-1 { 199 | if value[i] == #char "\"" { 200 | append(builder, "\"\""); 201 | } else { 202 | append(builder, value[i]); 203 | } 204 | } 205 | append(builder, #char "\""); 206 | } 207 | 208 | parse_integer_column :: (name: string, string_value: string) -> success: bool, value: s64 { 209 | int_value, success, remainder := to_integer(string_value); 210 | return success && !remainder, int_value; 211 | } 212 | 213 | parse_float_column :: (name: string, string_value: string) -> success: bool, value: float64 { 214 | float_value, success, remainder := string_to_float64(string_value); 215 | return success && !remainder, float_value; 216 | } 217 | 218 | 219 | #scope_file 220 | 221 | // Copied from modules/Command_Line. 222 | // @ToDo: This should probably be in modules/Reflection because all "parse into a type" parsers need it. 223 | write_integer :: (name: string, info: *Type_Info_Integer, pointer: *void, value: s64) -> bool { 224 | if info.signed { 225 | valid, low, high := range_check_and_store(value, info, pointer); 226 | 227 | if !valid { 228 | log_error("The value of argument '%' %, which is out of range. (The value must be between % and %.)", name, value, low, high); 229 | return false; 230 | } 231 | } else { 232 | valid, low, high := range_check_and_store(cast(u64) value, info, pointer); // Different overload from the above! 233 | 234 | if !valid { 235 | log_error("The value of argument '%' is %, which is out of range. (The value must be between % and %.)", name, cast(u64) value, low, high); 236 | return false; 237 | } 238 | } 239 | 240 | return true; 241 | } 242 | 243 | range_check_and_store :: (value: $T, info: *Type_Info_Integer, pointer: *void) -> (success: bool, low: T, high: T) { 244 | #assert((T == u64) || (T == s64)); 245 | 246 | store :: (pointer: *void, value: T, size: int) { 247 | if size == { 248 | case 1; 249 | << cast(*s8) pointer = xx,no_check value; 250 | case 2; 251 | << cast(*s16) pointer = xx,no_check value; 252 | case 4; 253 | << cast(*s32) pointer = xx,no_check value; 254 | case 8; 255 | << cast(*s64) pointer = xx,no_check value; 256 | case; 257 | assert(false); 258 | } 259 | } 260 | 261 | size := info.runtime_size; 262 | 263 | #if T == u64 { 264 | low, high := unsigned_integer_range_from_size(size); 265 | if (value < low) || (value > high) return false, low, high; 266 | 267 | store(pointer, value, size); 268 | return true, low, high; 269 | } else { 270 | low, high := signed_integer_range_from_size(size); 271 | if (value < low) || (value > high) return false, low, high; 272 | 273 | store(pointer, value, size); 274 | return true, low, high; 275 | } 276 | } 277 | 278 | signed_integer_range_from_size :: (size_in_bytes: int) -> (low: s64, high: s64) { 279 | assert((size_in_bytes == 1) || (size_in_bytes == 2) || (size_in_bytes == 4) || (size_in_bytes == 8)); 280 | 281 | high := (1 << (size_in_bytes*8-1)) - 1; 282 | low := ~high; 283 | 284 | return low, high; 285 | } 286 | 287 | unsigned_integer_range_from_size :: (size_in_bytes: int) -> (low: u64, high: u64) { 288 | assert((size_in_bytes == 1) || (size_in_bytes == 2) || (size_in_bytes == 4) || (size_in_bytes == 8)); 289 | 290 | high: u64 = ---; 291 | if size_in_bytes == 8 { 292 | high = cast(u64) 0xffff_ffff_ffff_ffff; // Hardcode 8 to prevent unpredictable behavior due to platform-specific details. In the future the outcome of << will be language-defined in all cases. 293 | } else { 294 | high = cast(u64) (1 << (size_in_bytes*8)) - 1; // @CompilerBug: Why do we need to cast the 1? That is dumb. 295 | } 296 | 297 | return 0, high; 298 | } 299 | 300 | 301 | #import "Basic"; 302 | #import "String"; 303 | #import "Text_File_Handler"; 304 | 305 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Tiny CSV module for Jai 2 | 3 | This tiny module has just three primary functions: 4 | 5 | * `csv_parse` parses a CSV string into an array of any given type. 6 | * `csv_escape` escapes a value (if needed) so that it can be safely written in to a CSV column. 7 | * `append_csv_escaped` is similar to `csv_escape` but directly appends the (potentially escaped) value to a `String_Builder`. 8 | 9 | Currently, `csv_parse` can only parse into string, float, and integer members. 10 | 11 | See [`module.jai`](./module.jai) for details. 12 | 13 | ## Memory model 14 | 15 | You’re responsible for freeing everything (including strings) in the result array returned by `csv_parse`. 16 | Using a Pool allocator around your `csv_parse` calls might be a good idea. 17 | --------------------------------------------------------------------------------