├── LICENSE
├── module.jai
└── readme.md


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Raphael Luba
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/module.jai:
--------------------------------------------------------------------------------
  1 | // Parses the given csv string into an array of T
  2 | // The get_member function is called once for each column.
  3 | // It gets passed column title, index and get_member_data and should return the name of the member of T that the column should be parsed into.
  4 | // @ToDo: Use macro instead of get_member function?
  5 | csv_parse :: (csv_data: string, $T: Type, get_member_data: $U, get_member: (string, int, U) -> string, delimiter : u8 = #char ",", parse_integer := parse_integer_column, parse_float := parse_float_column) -> [..] T, success: bool {
  6 | 	remaining_data := csv_data;
  7 | 	results: [..] T;
  8 | 
  9 | 	info := type_info(T);
 10 | 	column_members: [..] *Type_Info_Struct_Member;
 11 | 	defer array_free(column_members);
 12 | 
 13 | 	last := false;
 14 | 	success: bool;
 15 | 	while !last {
 16 | 		name: string = ---;
 17 | 		name, success, last = csv_consume_col(*remaining_data, delimiter);
 18 | 		if !success {
 19 | 			log_error("Could not read column % of first line", column_members.count + 1);
 20 | 			return results, false;
 21 | 		}
 22 | 		member_name := get_member(name, column_members.count, get_member_data);
 23 | 		member: *Type_Info_Struct_Member;
 24 | 		if member_name {
 25 | 			member = get_field(info, member_name);
 26 | 			if !member {
 27 | 				log_error("Type % does not contain a member named \"%\"", T, member_name);
 28 | 				return results, false;
 29 | 			}
 30 | 			tag := member.type.type;
 31 | 			if tag != .STRING && tag != .INTEGER && tag != .FLOAT {
 32 | 				log_error("Member \"%\" has unsupported type %", member_name, member.type);
 33 | 				return results, false;
 34 | 			}
 35 | 		}
 36 | 		array_add(*column_members, member);
 37 | 	}
 38 | 
 39 | 	while true {
 40 | 		if !remaining_data	break;
 41 | 
 42 | 		result: T;
 43 | 		line_start := remaining_data;
 44 | 
 45 | 		for column_members {
 46 | 			// @Speed: Skip col if we’re not interested (instead of potentially un-quoting)
 47 | 			value, success, last := csv_consume_col(*remaining_data, delimiter);
 48 | 			should_be_last := (it_index == column_members.count - 1);
 49 | 			if !success || last != should_be_last {
 50 | 				log_error("Could not read column % of line %:\n%", it_index + 1, results.count + 2, line_start);
 51 | 				return results, false;
 52 | 			}
 53 | 			if it {
 54 | 				slot := (cast(*u8) *result) + it.offset_in_bytes;
 55 | 				if it.type.type == {
 56 | 					case .STRING;
 57 | 						<< (cast(*string) slot) = copy_string(value);
 58 | 					case .FLOAT;
 59 | 						success, float_value := parse_float(it.name, value);
 60 | 						if !success {
 61 | 							log_error("Could not parse column % of line % as float:\n%", it_index + 1, results.count + 2, line_start);
 62 | 							return results, false;
 63 | 						}
 64 | 
 65 | 						if it.type.runtime_size == 4 {
 66 | 							(<< cast(*float) slot) = cast(float) float_value;
 67 | 						} else {
 68 | 							assert(it.type.runtime_size == 8);
 69 | 							(<< cast(*float64) slot) = float_value;
 70 | 						}
 71 | 					case .INTEGER;
 72 | 						success, int_value := parse_integer(it.name, value);
 73 | 						if !success {
 74 | 							log_error("Could not parse column % of line % as integer:\n%", it_index + 1, results.count + 2, line_start);
 75 | 							return results, false;
 76 | 						}
 77 | 
 78 | 						int_info := cast(*Type_Info_Integer) it.type;
 79 | 						success = write_integer(it.name, int_info, slot, int_value);
 80 | 						if !success	then return results, false;
 81 | 					case;
 82 | 						// Should have been caught above
 83 | 						assert(false);
 84 | 				}
 85 | 			}
 86 | 		}
 87 | 		array_add(*results, result);
 88 | 	}
 89 | 
 90 | 	return results, true;
 91 | }
 92 | 
 93 | csv_consume_col :: (remaining_data: *string, delimiter: u8) -> col: string, success: bool, last: bool {
 94 | 	if !remaining_data.count	return "", true, true;
 95 | 
 96 | 	num_quotes := 0;
 97 | 	delim_pos := -1;
 98 | 	last := false;
 99 | 	for index: 0..remaining_data.count-1 {
100 | 		c := (<<remaining_data)[index];
101 | 		if c == #char "\"" {
102 | 			num_quotes += 1;
103 | 		} else if num_quotes % 2 == 0 {
104 | 			if c == delimiter {
105 | 				delim_pos = index;
106 | 				break;
107 | 			} else if c == #char "\n" {
108 | 				delim_pos = index;
109 | 				last = true;
110 | 				break;
111 | 			}
112 | 		}
113 | 	}
114 | 	col, remainder: string;
115 | 	if delim_pos == -1 {
116 | 		col = <<remaining_data;
117 | 		remaining_data.count = 0;
118 | 		last = true;
119 | 	} else {
120 | 		col = slice(<<remaining_data, 0, delim_pos);
121 | 		advance(remaining_data, delim_pos + 1);
122 | 	}
123 | 
124 | 	if num_quotes == 0		return col, true, last;
125 | 
126 | 	if num_quotes % 2 != 0 || col[0] != #char "\"" || col[col.count - 1] != #char "\"" {
127 | 		log_error("Invalid number of quotes: % in %", num_quotes, col);
128 | 		return "", false, last;
129 | 	}
130 | 	expected_len := col.count - num_quotes / 2;
131 | 	result := talloc_string(expected_len);
132 | 	result.count = 0;
133 | 	for index: 1..col.count-2 {
134 | 		result.data[result.count] = col[index];
135 | 		result.count += 1;
136 | 		if col[index] == #char "\""  {
137 | 			if col[index + 1] != #char "\"" {
138 | 				log_error("Invalid quote sequence at % in %", index, col);
139 | 				free(result);
140 | 				return "", false, last;
141 | 			}
142 | 			index += 1;
143 | 		}
144 | 	}
145 | 	return result, true, last;
146 | }
147 | 
148 | // Escape a value so that it can be safely written into a CSV column. Might temporary allocate
149 | csv_escape :: (value: string, delimiter : u8 = #char ",") -> string {
150 | 	needs_quotes := false;
151 | 	num_quotes := 0;
152 | 	for i: 0..value.count-1 {
153 | 		if value[i] == #char "\"" {
154 | 			num_quotes += 1;
155 | 			needs_quotes = true;
156 | 		} else if value[i] == #char "\n" || value[i] == delimiter {
157 | 			needs_quotes = true;
158 | 		}
159 | 	}
160 | 
161 | 	if !needs_quotes	return value;
162 | 
163 | 	result := talloc_string(value.count + num_quotes + 2);
164 | 	result.data[0] = #char "\"";
165 | 	result.count = 1;
166 | 	for i: 0..value.count-1 {
167 | 		result.data[result.count] = value[i];
168 | 		result.count += 1;
169 | 		if value[i] == #char "\"" {
170 | 			result.data[result.count] = #char "\"";
171 | 			result.count += 1;
172 | 		}
173 | 	}
174 | 	result.data[result.count] = #char "\"";
175 | 	result.count += 1;
176 | 
177 | 	return result;
178 | }
179 | 
180 | append_csv_escaped :: (builder: *String_Builder, value: string, delimiter : u8 = #char ",") {
181 | 	needs_quotes := false;
182 | 	num_quotes := 0;
183 | 	for i: 0..value.count-1 {
184 | 		if value[i] == #char "\"" {
185 | 			num_quotes += 1;
186 | 			needs_quotes = true;
187 | 		} else if value[i] == #char "\n" || value[i] == delimiter {
188 | 			needs_quotes = true;
189 | 		}
190 | 	}
191 | 
192 | 	if !needs_quotes {
193 |         append(builder, value);
194 |         return;
195 |     }
196 | 
197 |     append(builder, #char "\"");
198 | 	for i: 0..value.count-1 {
199 | 		if value[i] == #char "\"" {
200 |             append(builder, "\"\"");
201 | 		} else {
202 |             append(builder, value[i]);
203 |         }
204 | 	}
205 |     append(builder, #char "\"");
206 | }
207 | 
208 | parse_integer_column :: (name: string, string_value: string) -> success: bool, value: s64 {
209 | 	int_value, success, remainder := to_integer(string_value);
210 | 	return success && !remainder, int_value;
211 | }
212 | 
213 | parse_float_column :: (name: string, string_value: string) -> success: bool, value: float64 {
214 | 	float_value, success, remainder := string_to_float64(string_value);
215 | 	return success && !remainder, float_value;
216 | }
217 | 
218 | 
219 | #scope_file
220 | 
221 | // Copied from modules/Command_Line.
222 | // @ToDo: This should probably be in modules/Reflection because all "parse into a type" parsers need it.
223 | write_integer :: (name: string, info: *Type_Info_Integer, pointer: *void, value: s64) -> bool {
224 |     if info.signed {
225 |         valid, low, high := range_check_and_store(value, info, pointer);
226 | 
227 |         if !valid {
228 |             log_error("The value of argument '%' %, which is out of range. (The value must be between % and %.)", name, value, low, high);
229 |             return false;
230 |         }
231 |     } else {
232 |         valid, low, high := range_check_and_store(cast(u64) value, info, pointer);  // Different overload from the above!
233 | 
234 |         if !valid {
235 |             log_error("The value of argument '%' is %, which is out of range. (The value must be between % and %.)", name, cast(u64) value, low, high);
236 |             return false;
237 |         }
238 |     }
239 | 
240 |     return true;
241 | }
242 | 
243 | range_check_and_store :: (value: $T, info: *Type_Info_Integer, pointer: *void) -> (success: bool, low: T, high: T) {
244 |     #assert((T == u64) || (T == s64));
245 | 
246 |     store :: (pointer: *void, value: T, size: int) {
247 |         if size == {
248 |             case 1;
249 |                 << cast(*s8)  pointer = xx,no_check value;
250 |             case 2;
251 |                 << cast(*s16) pointer = xx,no_check value;
252 |             case 4;
253 |                 << cast(*s32) pointer = xx,no_check value;
254 |             case 8;
255 |                 << cast(*s64) pointer = xx,no_check value;
256 |             case;
257 |             assert(false);
258 |         }
259 |     }
260 | 
261 |     size := info.runtime_size;
262 | 
263 |     #if T == u64 {
264 |         low, high := unsigned_integer_range_from_size(size);
265 |         if (value < low) || (value > high)  return false, low, high;
266 | 
267 |         store(pointer, value, size);
268 |         return true, low, high;
269 |     } else {
270 |         low, high := signed_integer_range_from_size(size);
271 |         if (value < low) || (value > high)  return false, low, high;
272 | 
273 |         store(pointer, value, size);
274 |         return true, low, high;
275 |     }
276 | }
277 | 
278 | signed_integer_range_from_size :: (size_in_bytes: int) -> (low: s64, high: s64) {
279 |     assert((size_in_bytes == 1) || (size_in_bytes == 2) || (size_in_bytes == 4) || (size_in_bytes == 8));
280 | 
281 |     high := (1 << (size_in_bytes*8-1)) - 1;
282 |     low  := ~high;
283 | 
284 |     return low, high;
285 | }
286 | 
287 | unsigned_integer_range_from_size :: (size_in_bytes: int) -> (low: u64, high: u64) {
288 |     assert((size_in_bytes == 1) || (size_in_bytes == 2) || (size_in_bytes == 4) || (size_in_bytes == 8));
289 | 
290 |     high: u64 = ---;
291 |     if size_in_bytes == 8 {
292 |         high = cast(u64) 0xffff_ffff_ffff_ffff;  // Hardcode 8 to prevent unpredictable behavior due to platform-specific details. In the future the outcome of << will be language-defined in all cases.
293 |     } else {
294 |         high = cast(u64) (1 << (size_in_bytes*8)) - 1;  // @CompilerBug: Why do we need to cast the 1? That is dumb.
295 |     }
296 | 
297 |     return 0, high;
298 | }
299 | 
300 | 
301 | #import "Basic";
302 | #import "String";
303 | #import "Text_File_Handler";
304 | 
305 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Tiny CSV module for Jai
 2 | 
 3 | This tiny module has just three primary functions:
 4 | 
 5 | * `csv_parse` parses a CSV string into an array of any given type.
 6 | * `csv_escape` escapes a value (if needed) so that it can be safely written in to a CSV column.
 7 | * `append_csv_escaped` is similar to `csv_escape` but directly appends the (potentially escaped) value to a `String_Builder`.
 8 | 
 9 | Currently, `csv_parse` can only parse into string, float, and integer members.
10 | 
11 | See [`module.jai`](./module.jai) for details.
12 | 
13 | ## Memory model
14 | 
15 | You’re responsible for freeing everything (including strings) in the result array returned by `csv_parse`.
16 | Using a Pool allocator around your `csv_parse` calls might be a good idea.
17 | 


--------------------------------------------------------------------------------