├── .gitignore
├── AUTHORS
├── LICENSE
├── README.md
├── lua
    ├── config.ld
    ├── csv.lua
    └── test.lua
├── makefile
├── rockspecs
    ├── csv-1-1.rockspec
    └── csv-scm-1.rockspec
└── test-data
    ├── BOM.csv
    ├── bars.txt
    ├── blank-line.csv
    ├── embedded-newlines.csv
    ├── embedded-quotes.csv
    └── header.csv


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | lua/docs


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Leyland, Geoff
2 | Martin, Kevin
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2014 Incremental IP Limited
 2 | Copyright (c) 2014 Kevin Martin
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Lua-CSV - delimited file reading
 2 | 
 3 | ## 1. What?
 4 | 
 5 | Lua-CSV is a Lua module for reading delimited text files (popularly CSV and
 6 | tab-separated files, but you can specify the separator).
 7 | 
 8 | Lua-CSV tries to auto-detect whether a file is delimited with commas or tabs,
 9 | copes with non-native newlines, survives newlines and quotes inside quoted
10 | fields and offers an iterator interface so it can handle large files.
11 | 
12 | 
13 | ## 2. How?
14 | 
15 |     local csv = require("csv")
16 |     local f = csv.open("file.csv")
17 |     for fields in f:lines() do
18 |       for i, v in ipairs(fields) do print(i, v) end
19 |     end
20 | 
21 | `csv.open` takes a second argument `parameters`, a table of parameters
22 | controlling how the file is read:
23 | 
24 | + `separator` sets the separator.  It'll probably guess the separator
25 |   correctly if it's a comma or a tab (unless, say, the first field in a
26 |   tab-delimited file contains a comma), but if you want something else you'll
27 |   have to set this.  It could be more than one character, but it's used as
28 |   part of a set: `"["..sep.."\n\r]"`
29 | 
30 | + Set `header` to true if the file contains a header and each set of fields
31 |   will be keyed by the names in the header rather than by integer index.
32 | 
33 | + `columns` provides a mechanism for column remapping.
34 |   Suppose you have a csv file as follows:
35 | 
36 |         Word,Number
37 |         ONE,10
38 | 
39 |     And columns is:
40 | 
41 |     + `{ word = true }` then the only field in the file would be
42 |         `{ word = "ONE" }`
43 |     + `{ first = { name = "word"} }` then it would be `{ first = "ONE" }`
44 |     + `{ word = { transform = string.lower }}` would give `{ word = "one" }`
45 |     + finally,
46 | 
47 |             { word = true
48 |               number = { transform = function(x) return tonumber(x) / 10 end }}
49 | 
50 |       would give `{ word = "ONE", number = 1 }`
51 | 
52 |     A column can have more than one name: 
53 |     `{ first = { names = {"word", "worm"}}}` to help cope with badly specified
54 |     file formats and spelling mistakes.
55 | 
56 | + `buffer_size` controls the size of the blocks the file is read in.  The
57 |   default is 1MB.  It used to be 4096 bytes which is what `pagesize` says on
58 |   my system, but that seems kind of small.
59 | 
60 | `csv.openstring` works exactly like `csv.open` except the first argument
61 | is the contents of the csv file. In this case `buffer_size` is set to
62 | the length of the string.
63 | 
64 | ## 3. Requirements
65 | 
66 | Lua 5.1, 5.2 or LuaJIT.
67 | 
68 | 
69 | ## 4. Issues
70 | 
71 | + Some whitespace-delimited files might use more than one space between
72 |   fields, for example if the columns are "manually" aligned:
73 | 
74 |         street           nr  city
75 |         "Oneway Street"   1  Toontown
76 | 
77 |     It won't cope with this - you'll get lots of extra empty fields.
78 | 
79 | ## 5. Wishlist
80 | 
81 | + Tests would be nice.
82 | + So would better LDoc documentation.
83 | 
84 | 
85 | ## 6. Alternatives
86 | 
87 | + [Penlight](http://github.com/stevedonovan/penlight) contains delimited
88 |   file reading.  It reads the whole file in one go.
89 | + The Lua Wiki contains two pages on CSV
90 |   [here](http://lua-users.org/wiki/LuaCsv) and
91 |   [here](http://lua-users.org/wiki/CsvUtils).
92 | + There's an example using [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/)
93 |   to parse CSV [here](http://www.inf.puc-rio.br/~roberto/lpeg/#CSV)
94 | 


--------------------------------------------------------------------------------
/lua/config.ld:
--------------------------------------------------------------------------------
1 | project = "Lua-CSV"
2 | title = "Lua-CSV Source Documentation"
3 | description = "Lua-CSV reads delimited text files"
4 | format = "markdown"
5 | 


--------------------------------------------------------------------------------
/lua/csv.lua:
--------------------------------------------------------------------------------
  1 | --- Read a comma or tab (or other delimiter) separated file.
  2 | --  This version of a CSV reader differs from others I've seen in that it
  3 | --
  4 | --  + handles embedded newlines in fields (if they're delimited with double
  5 | --    quotes)
  6 | --  + is line-ending agnostic
  7 | --  + reads the file line-by-line, so it can potientially handle large
  8 | --    files.
  9 | --
 10 | --  Of course, for such a simple format, CSV is horribly complicated, so it
 11 | --  likely gets something wrong.
 12 | 
 13 | --  (c) Copyright 2013-2014 Incremental IP Limited.
 14 | --  (c) Copyright 2014 Kevin Martin
 15 | --  Available under the MIT licence.  See LICENSE for more information.
 16 | 
 17 | local DEFAULT_BUFFER_BLOCK_SIZE = 1024 * 1024
 18 | 
 19 | 
 20 | ------------------------------------------------------------------------------
 21 | 
 22 | local function trim_space(s)
 23 |   return s:match("^%s*(.-)%s*$")
 24 | end
 25 | 
 26 | 
 27 | local function fix_quotes(s)
 28 |   -- the sub(..., -2) is to strip the trailing quote
 29 |   return string.sub(s:gsub('""', '"'), 1, -2)
 30 | end
 31 | 
 32 | 
 33 | ------------------------------------------------------------------------------
 34 | 
 35 | local column_map = {}
 36 | column_map.__index = column_map
 37 | 
 38 | 
 39 | local function normalise_string(s)
 40 |   return (s:lower():gsub("[^%w%d]+", " "):gsub("^ *(.-) *$", "%1"))
 41 | end
 42 | 
 43 | 
 44 | --- Parse a list of columns.
 45 | --  The main job here is normalising column names and dealing with columns
 46 | --  for which we have more than one possible name in the header.
 47 | function column_map:new(columns)
 48 |   local name_map = {}
 49 |   for n, v in pairs(columns) do
 50 |     local names
 51 |     local t
 52 |     if type(v) == "table" then
 53 |       t = { transform = v.transform, default = v.default }
 54 |       if v.name then
 55 |         names = { normalise_string(v.name) }
 56 |       elseif v.names then
 57 |         names = v.names
 58 |         for i, n in ipairs(names) do names[i] = normalise_string(n) end
 59 |       end
 60 |     else
 61 |       if type(v) == "function" then
 62 |         t = { transform = v }
 63 |       else
 64 |         t = {}
 65 |         if type(v) == "string" then
 66 |           names = { normalise_string(v) }
 67 |         end
 68 |       end
 69 |     end
 70 | 
 71 |     if not names then
 72 |       names = { (n:lower():gsub("[^%w%d]+", " ")) }
 73 |     end
 74 | 
 75 |     t.name = n
 76 |     for _, n in ipairs(names) do
 77 |       name_map[n:lower()] = t
 78 |     end
 79 |   end
 80 | 
 81 |   return setmetatable({ name_map = name_map }, column_map)
 82 | end
 83 | 
 84 | 
 85 | --- Map "virtual" columns to file columns.
 86 | --  Once we've read the header, work out which columns we're interested in and
 87 | --  what to do with them.  Mostly this is about checking we've got the columns
 88 | --  we need and writing a nice complaint if we haven't.
 89 | function column_map:read_header(header)
 90 |   local index_map = {}
 91 | 
 92 |   -- Match the columns in the file to the columns in the name map
 93 |   local found = {}
 94 |   local found_any
 95 |   for i, word in ipairs(header) do
 96 |     word = normalise_string(word)
 97 |     local r = self.name_map[word]
 98 |     if r then
 99 |       index_map[i] = r
100 |       found[r.name] = true
101 |       found_any = true
102 |     end
103 |   end
104 | 
105 |   if not found_any then return end
106 | 
107 |   -- check we found all the columns we need
108 |   local not_found = {}
109 |   for name, r in pairs(self.name_map) do
110 |     if not found[r.name] then
111 |       local nf = not_found[r.name]
112 |       if nf then
113 |         nf[#nf+1] = name
114 |       else
115 |         not_found[r.name] = { name }
116 |       end
117 |     end
118 |   end
119 |   -- If any columns are missing, assemble an error message
120 |   if next(not_found) then
121 |     local problems = {}
122 |     for k, v in pairs(not_found) do
123 |       local missing
124 |       if #v == 1 then
125 |         missing = "'"..v[1].."'"
126 |       else
127 |         missing = v[1]
128 |         for i = 2, #v - 1 do
129 |           missing = missing..", '"..v[i].."'"
130 |         end
131 |         missing = missing.." or '"..v[#v].."'"
132 |       end
133 |       problems[#problems+1] = "Couldn't find a column named "..missing
134 |     end
135 |     error(table.concat(problems, "\n"), 0)
136 |   end
137 | 
138 |   self.index_map = index_map
139 |   return true
140 | end
141 | 
142 | 
143 | function column_map:transform(value, index)
144 |   local field = self.index_map[index]
145 |   if field then
146 |     if field.transform then
147 |       local ok
148 |       ok, value = pcall(field.transform, value)
149 |       if not ok then
150 |         error(("Error reading field '%s': %s"):format(field.name, value), 0)
151 |       end
152 |     end
153 |     return value or field.default, field.name
154 |   end
155 | end
156 | 
157 | 
158 | ------------------------------------------------------------------------------
159 | 
160 | local file_buffer = {}
161 | file_buffer.__index = file_buffer
162 | 
163 | function file_buffer:new(file, buffer_block_size)
164 |   return setmetatable({
165 |       file              = file,
166 |       buffer_block_size = buffer_block_size or DEFAULT_BUFFER_BLOCK_SIZE,
167 |       buffer_start      = 0,
168 |       buffer            = "",
169 |     }, file_buffer)
170 | end
171 | 
172 | 
173 | --- Cut the front off the buffer if we've already read it
174 | function file_buffer:truncate(p)
175 |   p = p - self.buffer_start
176 |   if p > self.buffer_block_size then
177 |     local remove = self.buffer_block_size *
178 |       math.floor((p-1) / self.buffer_block_size)
179 |     self.buffer = self.buffer:sub(remove + 1)
180 |     self.buffer_start = self.buffer_start + remove
181 |   end
182 | end
183 | 
184 | 
185 | --- Find something in the buffer, extending it if necessary
186 | function file_buffer:find(pattern, init)
187 |   while true do
188 |     local first, last, capture =
189 |       self.buffer:find(pattern, init - self.buffer_start)
190 |     -- if we found nothing, or the last character is at the end of the
191 |     -- buffer (and the match could potentially be longer) then read some
192 |     -- more.
193 |     if not first or last == #self.buffer then
194 |       local s = self.file:read(self.buffer_block_size)
195 |       if not s then
196 |         if not first then
197 |           return
198 |         else
199 |           return first + self.buffer_start, last + self.buffer_start, capture
200 |         end
201 |       end
202 |       self.buffer = self.buffer..s
203 |     else
204 |       return first + self.buffer_start, last + self.buffer_start, capture
205 |     end
206 |   end
207 | end
208 | 
209 | 
210 | --- Extend the buffer so we can see more
211 | function file_buffer:extend(offset)
212 |   local extra = offset - #self.buffer - self.buffer_start
213 |   if extra > 0 then
214 |     local size = self.buffer_block_size *
215 |       math.ceil(extra / self.buffer_block_size)
216 |     local s = self.file:read(size)
217 |     if not s then return end
218 |     self.buffer = self.buffer..s
219 |   end
220 | end
221 | 
222 | 
223 | --- Get a substring from the buffer, extending it if necessary
224 | function file_buffer:sub(a, b)
225 |   self:extend(b)
226 |   b = b == -1 and b or b - self.buffer_start
227 |   return self.buffer:sub(a - self.buffer_start, b)
228 | end
229 | 
230 | 
231 | --- Close a file buffer
232 | function file_buffer:close()
233 |   self.file:close()
234 |   self.file = nil
235 | end
236 | 
237 | 
238 | ------------------------------------------------------------------------------
239 | 
240 | local separator_candidates = { ",", "\t", "|" }
241 | local guess_separator_params = { record_limit = 8; }
242 | 
243 | 
244 | local function try_separator(buffer, sep, f)
245 |   guess_separator_params.separator = sep
246 |   local min, max = math.huge, 0
247 |   local lines, split_lines = 0, 0
248 |   local iterator = coroutine.wrap(function() f(buffer, guess_separator_params) end)
249 |   for t in iterator do
250 |     min = math.min(min, #t)
251 |     max = math.max(max, #t)
252 |     split_lines = split_lines + (t[2] and 1 or 0)
253 |     lines = lines + 1
254 |   end
255 |   if split_lines / lines > 0.75 then
256 |     return max - min
257 |   else
258 |     return math.huge
259 |   end
260 | end
261 | 
262 | 
263 | --- If the user hasn't specified a separator, try to work out what it is.
264 | function guess_separator(buffer, f)
265 |   local best_separator, lowest_diff = "", math.huge
266 |   for _, s in ipairs(separator_candidates) do
267 |     local ok, diff = pcall(function() return try_separator(buffer, s, f) end)
268 |     if ok and diff < lowest_diff then
269 |       best_separator = s
270 |       lowest_diff = diff
271 |     end
272 |   end
273 | 
274 |   return best_separator
275 | end
276 | 
277 | 
278 | local unicode_BOMS =
279 | {
280 |   {
281 |     length = 2,
282 |     BOMS =
283 |     {
284 |       ["\254\255"]      = true, -- UTF-16 big-endian
285 |       ["\255\254"]      = true, -- UTF-16 little-endian
286 |     }
287 |   },
288 |   {
289 |     length = 3,
290 |     BOMS =
291 |     {
292 |       ["\239\187\191"]  = true, -- UTF-8
293 |     }
294 |   }
295 | }
296 | 
297 | 
298 | local function find_unicode_BOM(sub)
299 |   for _, x in ipairs(unicode_BOMS) do
300 |     local code = sub(1, x.length)
301 |     if x.BOMS[code] then
302 |       return x.length
303 |     end
304 |   end
305 |   return 0
306 | end
307 | 
308 | 
309 | --- Iterate through the records in a file
310 | --  Since records might be more than one line (if there's a newline in quotes)
311 | --  and line-endings might not be native, we read the file in chunks of
312 | --  we read the file in chunks using a file_buffer, rather than line-by-line
313 | --  using io.lines.
314 | local function separated_values_iterator(buffer, parameters)
315 |   local field_start = 1
316 | 
317 |   local advance
318 |   if buffer.truncate then
319 |     advance = function(n)
320 |       field_start = field_start + n
321 |       buffer:truncate(field_start)
322 |     end
323 |   else
324 |     advance = function(n)
325 |       field_start = field_start + n
326 |     end
327 |   end
328 | 
329 | 
330 |   local function field_sub(a, b)
331 |     b = b == -1 and b or b + field_start - 1
332 |     return buffer:sub(a + field_start - 1, b)
333 |   end
334 | 
335 | 
336 |   local function field_find(pattern, init)
337 |     init = init or 1
338 |     local f, l, c = buffer:find(pattern, init + field_start - 1)
339 |     if not f then return end
340 |     return f - field_start + 1, l - field_start + 1, c
341 |   end
342 | 
343 | 
344 |   -- Is there some kind of Unicode BOM here?
345 |   advance(find_unicode_BOM(field_sub))
346 | 
347 | 
348 |   -- Start reading the file
349 |   local sep = "(["..(parameters.separator or
350 |                      guess_separator(buffer, separated_values_iterator)).."\n\r])"
351 |   local line_start = 1
352 |   local line = 1
353 |   local field_count, fields, starts, nonblanks = 0, {}, {}
354 |   local header, header_read
355 |   local field_start_line, field_start_column
356 |   local record_count = 0
357 | 
358 | 
359 |   local function problem(message)
360 |     error(("%s:%d:%d: %s"):
361 |       format(parameters.filename, field_start_line, field_start_column,
362 |              message), 0)
363 |   end
364 | 
365 | 
366 |   while true do
367 |     local field_end, sep_end, this_sep
368 |     local tidy
369 |     field_start_line = line
370 |     field_start_column = field_start - line_start + 1
371 | 
372 |     -- If the field is quoted, go find the other quote
373 |     if field_sub(1, 1) == '"' then
374 |       advance(1)
375 |       local current_pos = 0
376 |       repeat
377 |         local a, b, c = field_find('"("?)', current_pos + 1)
378 |         current_pos = b
379 |       until c ~= '"'
380 |       if not current_pos then problem("unmatched quote") end
381 |       tidy = fix_quotes
382 |       field_end, sep_end, this_sep = field_find(" *([^ ])", current_pos+1)
383 |       if this_sep and not this_sep:match(sep) then problem("unmatched quote") end
384 |     else
385 |       field_end, sep_end, this_sep = field_find(sep, 1)
386 |       tidy = trim_space
387 |     end
388 | 
389 |     -- Look for the separator or a newline or the end of the file
390 |     field_end = (field_end or 0) - 1
391 | 
392 |     -- Read the field, then convert all the line endings to \n, and
393 |     -- count any embedded line endings
394 |     local value = field_sub(1, field_end)
395 |     value = value:gsub("\r\n", "\n"):gsub("\r", "\n")
396 |     for nl in value:gmatch("\n()") do
397 |       line = line + 1
398 |       line_start = nl + field_start
399 |     end
400 | 
401 |     value = tidy(value)
402 |     if #value > 0 then nonblanks = true end
403 |     field_count = field_count + 1
404 | 
405 |     -- Insert the value into the table for this "line"
406 |     local key
407 |     if parameters.column_map and header_read then
408 |       local ok
409 |       ok, value, key = pcall(parameters.column_map.transform,
410 |         parameters.column_map, value, field_count)
411 |       if not ok then problem(value) end
412 |     elseif header then
413 |       key = header[field_count]
414 |     else
415 |       key = field_count
416 |     end
417 |     if key then
418 |       fields[key] = value
419 |       starts[key] = { line=field_start_line, column=field_start_column }
420 |     end
421 | 
422 |     -- if we ended on a newline then yield the fields on this line.
423 |     if not this_sep or this_sep == "\r" or this_sep == "\n" then
424 |       if parameters.column_map and not header_read then
425 |         header_read = parameters.column_map:read_header(fields)
426 |       elseif parameters.header and not header_read then
427 |         if nonblanks or field_count > 1 then -- ignore blank lines
428 |           header = fields
429 |           header_read = true
430 |         end
431 |       else
432 |         if nonblanks or field_count > 1 then -- ignore blank lines
433 |           coroutine.yield(fields, starts)
434 |           record_count = record_count + 1
435 |           if parameters.record_limit and
436 |              record_count >= parameters.record_limit then
437 |             break
438 |           end
439 |         end
440 |       end
441 |       field_count, fields, starts, nonblanks = 0, {}, {}
442 |     end
443 | 
444 |     -- If we *really* didn't find a separator then we're done.
445 |     if not sep_end then break end
446 | 
447 |     -- If we ended on a newline then count it.
448 |     if this_sep == "\r" or this_sep == "\n" then
449 |       if this_sep == "\r" and field_sub(sep_end+1, sep_end+1) == "\n" then
450 |         sep_end = sep_end + 1
451 |       end
452 |       line = line + 1
453 |       line_start = field_start + sep_end
454 |     end
455 | 
456 |     advance(sep_end)
457 |   end
458 | end
459 | 
460 | 
461 | ------------------------------------------------------------------------------
462 | 
463 | local buffer_mt =
464 | {
465 |   lines = function(t)
466 |       return coroutine.wrap(function()
467 |           separated_values_iterator(t.buffer, t.parameters)
468 |         end)
469 |     end,
470 |   close = function(t)
471 |       if t.buffer.close then t.buffer:close() end
472 |     end,
473 |   name = function(t)
474 |       return t.parameters.filename
475 |     end,
476 | }
477 | buffer_mt.__index = buffer_mt
478 | 
479 | 
480 | --- Use an existing file or buffer as a stream to read csv from.
481 | --  (A buffer is just something that looks like a string in that we can do
482 | --  `buffer:sub()` and `buffer:find()`)
483 | --  @return a file object
484 | local function use(
485 |   buffer,           -- ?string|file|buffer: the buffer to read from.  If it's:
486 |                     --   - a string, read from that;
487 |                     --   - a file, turn it into a file_buffer;
488 |                     --   - nil, read from stdin
489 |                     -- otherwise assume it's already a a buffer.
490 |   parameters)       -- ?table: parameters controlling reading the file.
491 |                     -- See README.md
492 |   parameters = parameters or {}
493 |   parameters.filename = parameters.filename or "<unknown>"
494 |   parameters.column_map = parameters.columns and
495 |     column_map:new(parameters.columns)
496 | 
497 |   if not buffer then
498 |     buffer = file_buffer:new(io.stdin)
499 |   elseif io.type(buffer) == "file" then
500 |     buffer = file_buffer:new(buffer)
501 |   end
502 | 
503 |   local f = { buffer = buffer, parameters = parameters }
504 |   return setmetatable(f, buffer_mt)
505 | end
506 | 
507 | 
508 | ------------------------------------------------------------------------------
509 | 
510 | --- Open a file for reading as a delimited file
511 | --  @return a file object
512 | local function open(
513 |   filename,         -- string: name of the file to open
514 |   parameters)       -- ?table: parameters controlling reading the file.
515 |                     -- See README.md
516 |   local file, message = io.open(filename, "r")
517 |   if not file then return nil, message end
518 | 
519 |   parameters = parameters or {}
520 |   parameters.filename = filename
521 |   return use(file_buffer:new(file), parameters)
522 | end
523 | 
524 | 
525 | ------------------------------------------------------------------------------
526 | 
527 | local function makename(s)
528 |   local t = {}
529 |   t[#t+1] = "<(String) "
530 |   t[#t+1] = (s:gmatch("[^\n]+")() or ""):sub(1,15)
531 |   if #t[#t] > 14 then t[#t+1] = "..." end
532 |   t[#t+1] = " >"
533 |   return table.concat(t)
534 | end
535 | 
536 | 
537 | --- Open a string for reading as a delimited file
538 | --  @return a file object
539 | local function openstring(
540 |   filecontents,     -- string: The contents of the delimited file
541 |   parameters)       -- ?table: parameters controlling reading the file.
542 |                     -- See README.md
543 | 
544 |   parameters = parameters or {}
545 | 
546 | 
547 |   parameters.filename = parameters.filename or makename(filecontents)
548 |   parameters.buffer_size = parameters.buffer_size or #filecontents
549 |   return use(filecontents, parameters)
550 | end
551 | 
552 | 
553 | ------------------------------------------------------------------------------
554 | 
555 | return { open = open, openstring = openstring, use = use }
556 | 
557 | ------------------------------------------------------------------------------
558 | 


--------------------------------------------------------------------------------
/lua/test.lua:
--------------------------------------------------------------------------------
  1 | pcall(require, "strict")
  2 | local csv = require"csv"
  3 | 
  4 | local errors = 0
  5 | 
  6 | local function testhandle(handle, correct_result)
  7 |   local result = {}
  8 |   for r in handle:lines() do
  9 |     if not r[1] then
 10 |       local r2 = {}
 11 |       for k, v in pairs(r) do r2[#r2+1] = k..":"..tostring(v) end
 12 |       table.sort(r2)
 13 |       r = r2
 14 |     end
 15 |     result[#result+1] = table.concat(r, ",")
 16 |   end
 17 | 
 18 |   handle:close()
 19 | 
 20 |   result = table.concat(result, "!\n").."!"
 21 |   if result ~= correct_result then
 22 |     io.stderr:write(
 23 |       ("Error reading '%s':\nExpected output:\n%s\n\nActual output:\n%s\n\n"):
 24 |       format(handle:name(), correct_result, result))
 25 |     errors = errors + 1
 26 |   return false
 27 |   end
 28 |   return true
 29 | end
 30 | 
 31 | local function test(filename, correct_result, parameters)
 32 |   parameters = parameters or {}
 33 |   for i = 1, 16 do
 34 |     parameters.buffer_size = i
 35 |     local f = csv.open(filename, parameters)
 36 |     local fileok = testhandle(f, correct_result)
 37 | 
 38 |     if fileok then
 39 |       f = io.open(filename, "r")
 40 |       local data = f:read("*a")
 41 |       f:close()
 42 | 
 43 |       f = csv.openstring(data, parameters)
 44 |       testhandle(f, correct_result)
 45 |     end
 46 |   end
 47 | end
 48 | 
 49 | test("../test-data/embedded-newlines.csv", [[
 50 | embedded
 51 | newline,embedded
 52 | newline,embedded
 53 | newline!
 54 | embedded
 55 | newline,embedded
 56 | newline,embedded
 57 | newline!]])
 58 | 
 59 | test("../test-data/embedded-quotes.csv", [[
 60 | embedded "quotes",embedded "quotes",embedded "quotes"!
 61 | embedded "quotes",embedded "quotes",embedded "quotes"!]])
 62 | 
 63 | test("../test-data/header.csv", [[
 64 | alpha:ONE,bravo:two,charlie:3!
 65 | alpha:four,bravo:five,charlie:6!]], {header=true})
 66 | 
 67 | test("../test-data/header.csv", [[
 68 | apple:one,charlie:30!
 69 | apple:four,charlie:60!]],
 70 | { columns = {
 71 |   apple = { name = "ALPHA", transform = string.lower },
 72 |   charlie = { transform = function(x) return tonumber(x) * 10 end }}})
 73 | 
 74 | test("../test-data/blank-line.csv", [[
 75 | this,file,ends,with,a,blank,line!]])
 76 | 
 77 | test("../test-data/BOM.csv", [[
 78 | apple:one,charlie:30!
 79 | apple:four,charlie:60!]],
 80 | { columns = {
 81 |   apple = { name = "ALPHA", transform = string.lower },
 82 |   charlie = { transform = function(x) return tonumber(x) * 10 end }}})
 83 | 
 84 | test("../test-data/bars.txt", [[
 85 | there's a comma in this field, but no newline,embedded
 86 | newline,embedded
 87 | newline!
 88 | embedded
 89 | newline,embedded
 90 | newline,embedded
 91 | newline!]])
 92 | 
 93 | 
 94 | if errors == 0 then
 95 |   io.stdout:write("Passed\n")
 96 | elseif errors == 1 then
 97 |   io.stdout:write("1 error\n")
 98 | else
 99 |   io.stdout:write(("%d errors\n"):format(errors))
100 | end
101 | 
102 | os.exit(errors)
103 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | LUA= $(shell echo `which lua`)
 2 | LUA_BINDIR= $(shell echo `dirname $(LUA)`)
 3 | LUA_PREFIX= $(shell echo `dirname $(LUA_BINDIR)`)
 4 | LUA_VERSION = $(shell echo `lua -v 2>&1 | cut -d " " -f 2 | cut -b 1-3`)
 5 | LUA_SHAREDIR=$(LUA_PREFIX)/share/lua/$(LUA_VERSION)
 6 | 
 7 | default:
 8 | 	@echo "Nothing to build.  Try 'make install' or 'make test'."
 9 | 
10 | install:
11 | 	cp lua/csv.lua $(LUA_SHAREDIR)
12 | 
13 | test:
14 | 	cd lua && $(LUA) test.lua
15 | 


--------------------------------------------------------------------------------
/rockspecs/csv-1-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "csv"
 2 | version = "1-1"
 3 | source =
 4 | {
 5 |   url = "git://github.com/geoffleyland/lua-csv.git",
 6 |   branch = "master",
 7 |   tag = "v1",
 8 | }
 9 | description =
10 | {
11 |   summary = "CSV and other delimited file reading",
12 |   homepage = "http://github.com/geoffleyland/lua-csv",
13 |   license = "MIT/X11",
14 |   maintainer = "Geoff Leyland <geoff.leyland@incremental.co.nz>"
15 | }
16 | dependencies = { "lua >= 5.1" }
17 | build =
18 | {
19 |   type = "builtin",
20 |   modules =
21 |   {
22 |     csv = "lua/csv.lua",
23 |   },
24 | }
25 | 


--------------------------------------------------------------------------------
/rockspecs/csv-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "csv"
 2 | version = "scm-1"
 3 | source =
 4 | {
 5 |   url = "git://github.com/geoffleyland/lua-csv.git",
 6 |   branch = "master",
 7 | }
 8 | description =
 9 | {
10 |   summary = "CSV and other delimited file reading",
11 |   homepage = "http://github.com/geoffleyland/lua-csv",
12 |   license = "MIT/X11",
13 |   maintainer = "Geoff Leyland <geoff.leyland@incremental.co.nz>"
14 | }
15 | dependencies = { "lua >= 5.1" }
16 | build =
17 | {
18 |   type = "builtin",
19 |   modules =
20 |   {
21 |     csv = "lua/csv.lua",
22 |   },
23 | }
24 | 


--------------------------------------------------------------------------------
/test-data/BOM.csv:
--------------------------------------------------------------------------------
1 | ﻿alpha,bravo,charlie
2 | ONE,two,3
3 | four,five,6


--------------------------------------------------------------------------------
/test-data/bars.txt:
--------------------------------------------------------------------------------
1 | there's a comma in this field, but no newline|"embedded
2 | newline"|"embedded
3 | newline"
4 | "embedded
5 | newline"|"embedded
6 | newline"|"embedded
7 | newline"


--------------------------------------------------------------------------------
/test-data/blank-line.csv:
--------------------------------------------------------------------------------
1 | this,file,ends,with,a,blank,line
2 | 
3 | 


--------------------------------------------------------------------------------
/test-data/embedded-newlines.csv:
--------------------------------------------------------------------------------
1 | "embedded
2 | newline","embedded
3 | newline","embedded
4 | newline"
5 | "embedded
6 | newline","embedded
7 | newline","embedded
8 | newline"


--------------------------------------------------------------------------------
/test-data/embedded-quotes.csv:
--------------------------------------------------------------------------------
1 | "embedded ""quotes""","embedded ""quotes""","embedded ""quotes"""
2 | "embedded ""quotes""","embedded ""quotes""","embedded ""quotes"""


--------------------------------------------------------------------------------
/test-data/header.csv:
--------------------------------------------------------------------------------
1 | alpha,bravo,charlie
2 | ONE,two,3
3 | four,five,6


--------------------------------------------------------------------------------