├── CMakeLists.txt ├── File.lua ├── README.md ├── csvigo-scm-1.rockspec ├── init.c ├── init.lua └── test.lua /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) 3 | CMAKE_POLICY(VERSION 2.6) 4 | FIND_PACKAGE(Torch REQUIRED) 5 | 6 | SET(luasrc init.lua File.lua) 7 | SET(src init.c) 8 | 9 | ADD_TORCH_PACKAGE(csvigo "${src}" "${luasrc}" "CSV Reader/Writer") 10 | 11 | TARGET_LINK_LIBRARIES(csvigo luaT TH ) 12 | -------------------------------------------------------------------------------- /File.lua: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------------------- 2 | -- csvigo.File 3 | 4 | -- A class to manage comma separate value files + two directly-usable functions 5 | -- various function to manage csv files 6 | 7 | -- These CSV files all have a comma delimiter and use " as the quote character 8 | -- The separator ',' can be user-defined. A common example is ' ', which allows 9 | -- for space separated values. 10 | 11 | -- Ref: 12 | -- http://www.lua.org/pil/20.4.html 13 | -- http://www.torch.ch/manual/torch/utility#torchclass 14 | ---------------------------------------------------------------------- 15 | 16 | -- enclose commas and quotes between quotes and escape original quotes 17 | local function escapeCsv(s, separator) 18 | if string.find(s, '["' .. separator .. ']') then 19 | --if string.find(s, '[,"]') then 20 | s = '"' .. string.gsub(s, '"', '""') .. '"' 21 | end 22 | return s 23 | end 24 | 25 | -- convert an array of strings or numbers into a row in a csv file 26 | local function tocsv(t, separator, nan_as_missing) 27 | local s = "" 28 | for _,p in ipairs(t) do 29 | if (nan_as_missing and p ~= p) then 30 | p = '' 31 | end 32 | s = s .. separator .. escapeCsv(p, separator) 33 | end 34 | return string.sub(s, 2) -- remove first comma 35 | end 36 | 37 | -- break record from csv file into array of strings 38 | local function fromcsv(s, separator) 39 | if not s then error("s is null") end 40 | s = s .. separator -- end with separator 41 | if separator == ' ' then separator = '%s+' end 42 | local t = {} 43 | local fieldstart = 1 44 | repeat 45 | -- next field is quoted? (starts with "?) 46 | if string.find(s, '^"', fieldstart) then 47 | local a, c 48 | local i = fieldstart 49 | repeat 50 | -- find closing quote 51 | a, i, c = string.find(s, '"("?)', i+1) 52 | until c ~= '"' -- quote not followed by quote? 53 | if not i then error('unmatched "') end 54 | local f = string.sub(s, fieldstart+1, i-1) 55 | table.insert(t, (string.gsub(f, '""', '"'))) 56 | fieldstart = string.find(s, separator, i) + 1 57 | else 58 | local nexti = string.find(s, separator, fieldstart) 59 | table.insert(t, string.sub(s, fieldstart, nexti-1)) 60 | fieldstart = nexti + 1 61 | end 62 | until fieldstart > string.len(s) 63 | return t 64 | end 65 | 66 | ---------------------------------------------------------------------- 67 | 68 | -- create class Csv 69 | local Csv = torch.class("csvigo.File") 70 | 71 | -- initializer 72 | function Csv:__init(filepath, mode, separator, nan_as_missing) 73 | local msg = nil 74 | self.filepath = filepath 75 | self.file, msg = io.open(filepath, mode) 76 | self.separator = separator or ',' 77 | self.nan_as_missing = nan_as_missing or false 78 | if not self.file then error(msg) end 79 | end 80 | 81 | -- close underlying file 82 | function Csv:close() 83 | self.file:close() 84 | end 85 | 86 | -- return iterator that reads all the remaining lines 87 | function Csv:lines() 88 | return self.file:lines() 89 | end 90 | 91 | -- return next record from the csv file 92 | -- return nill if at end of file 93 | function Csv:read() 94 | local line = self.file:read() 95 | if not line then return nil end 96 | -- strip CR line endings 97 | line = line:gsub('\r', '') 98 | return fromcsv(line, self.separator) 99 | end 100 | 101 | function Csv:largereadall() 102 | local ok = pcall(require, 'torch') 103 | if not ok then 104 | error('large mode needs the torch package') 105 | end 106 | local libcsvigo = require 'libcsvigo' 107 | local ffi = require 'ffi' 108 | local path = self.filepath 109 | local f = torch.DiskFile(path, 'r'):binary() 110 | f:seekEnd() 111 | local length = f:position() - 1 112 | f:seek(1) 113 | local data = f:readChar(length) 114 | f:close() 115 | 116 | -- now that the ByteStorage is constructed, 117 | -- one has to make a dictionary of [offset, length] pairs of the row. 118 | -- for efficiency, do one pass to count number of rows, 119 | -- and another pass to create a LongTensor and fill it 120 | local lookup = libcsvigo.create_lookup(data) 121 | 122 | local out = {} 123 | local separator = self.separator 124 | 125 | local function index (tbl, i) 126 | assert(i, 'index has to be given') 127 | assert(i > 0 and i <= lookup:size(1), "index out of bounds: " .. i) 128 | local line = ffi.string(data:data() + lookup[i][1], lookup[i][2]) 129 | local entry = fromcsv(line, separator) 130 | return entry 131 | end 132 | 133 | local function stringm (i) 134 | assert(i, 'index has to be given') 135 | assert(i > 0 and i <= lookup:size(1), "index out of bounds: " .. i) 136 | return ffi.string(data:data() + lookup[i][1], lookup[i][2]) 137 | end 138 | 139 | out.mt = {} 140 | out.mt.__index = index 141 | 142 | out.mt.__newindex = function (t,k,v) 143 | error("attempt to update a read-only table", 2) 144 | end 145 | 146 | out.mt.__len = function (t) 147 | return lookup:size(1) 148 | end 149 | 150 | out.mt.__tostring = function(t) 151 | local s = '' 152 | if lookup:size(1) < 30 then 153 | for i = 1, lookup:size(1) do 154 | s = s .. stringm(i) .. '\n' 155 | end 156 | else 157 | for i = 1, 10 do 158 | s = s .. stringm(i) .. '\n' 159 | end 160 | for i = 1, 10 do 161 | s = s .. '.. .. .. .. .. .. .. .. .. \n' 162 | end 163 | for i = lookup:size(1)-10, lookup:size(1) do 164 | s = s .. stringm(i) .. '\n' 165 | end 166 | end 167 | return s 168 | end 169 | 170 | out.mt.__ipairs = function(t) 171 | local counter = 0 172 | function iter() 173 | counter = counter + 1 174 | if counter <= lookup:size(1) then 175 | return counter, index(t, counter) 176 | end 177 | return nil 178 | end 179 | return iter, t, 0 180 | end 181 | 182 | out.mt.__pairs = function(t) 183 | local counter = 0 184 | function iter() 185 | counter = counter + 1 186 | if counter <= lookup:size(1) then 187 | return counter, index(t, counter) 188 | end 189 | return nil 190 | end 191 | return iter, t, nil 192 | end 193 | 194 | setmetatable(out, out.mt) 195 | -- size 196 | -- tostring 197 | 198 | -- iterator 199 | -- index 200 | -- error on newindex 201 | 202 | return out 203 | end 204 | 205 | -- return all records as an array 206 | -- each element of the array is an array of strings 207 | -- should be faster than reading record by record 208 | function Csv:readall(mode) 209 | if mode == 'large' then 210 | return self:largereadall() 211 | end 212 | local res = {} 213 | while true do 214 | local line = self.file:read("*l") 215 | if not line then break end 216 | -- strip CR line endings 217 | line = line:gsub('\r', '') 218 | local entry = fromcsv(line, self.separator) 219 | res[#res+1] = entry 220 | end 221 | return res 222 | end 223 | 224 | -- write array of strings|numbers to the csv file followed by \n 225 | -- convert to csv format by inserting commas and quoting where necessary 226 | -- return nil 227 | function Csv:write(a) 228 | res, msg = self.file:write(tocsv(a, self.separator, self.nan_as_missing),"\n") 229 | if res then return end 230 | error(msg) 231 | end 232 | 233 | -- write all records in an array (table of tables) 234 | function Csv:writeall(a, nan_as_missing) 235 | for i,entry in ipairs(a) do 236 | res, msg = self.file:write(tocsv(entry, self.separator, self.nan_as_missing),"\n") 237 | if not res then error(msg) end 238 | end 239 | return true 240 | end 241 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # csvigo: a package to handle CSV files (read and write). 2 | 3 | ## Install: 4 | 5 | First install Torch7 (www.torch.ch) then simply install this package 6 | using luarocks: 7 | 8 | ``` 9 | luarocks install csvigo 10 | ``` 11 | 12 | ## Use: 13 | 14 | The library provides 2 high-level functions: csvigo.load and csvigo.save. To get help 15 | on these functions, simply do: 16 | 17 | ``` 18 | > csvigo.save() 19 | > csvigo.load() 20 | ``` 21 | 22 | Loading a CSV file in 'query' mode gives you a convenient query function that 23 | you can use to query subsets of your original CSV file. To get help on this query 24 | function, simply do: 25 | 26 | ``` 27 | > query = csvigo.load{path='somefile.csv', mode='query'} 28 | > query('help') 29 | -- print some help 30 | > all = query('all') 31 | > subset = query('union', {somevar=someval, someothervar={val1, val2}}) 32 | ``` 33 | 34 | ## Large CSV mode 35 | 36 | CSVigo supports efficient loading of very large CSV files into memory. 37 | The loaded data structure is a read-only table with efficiency hidden under the hood. 38 | 39 | Loading: 40 | 41 | ```lua 42 | m = csvigo.load({path = "my_large.csv", mode = "large"}) 43 | ``` 44 | 45 | Printing by default only prints first 10 and last 10 rows 46 | ```lua 47 | print(m) 48 | ``` 49 | 50 | Individual element access 51 | ```lua 52 | print(m[32]) 53 | ``` 54 | 55 | Size of table: 56 | ```lua 57 | print(#m) 58 | ``` 59 | 60 | For loop over entries: 61 | 62 | Type 1: 63 | ```lua 64 | for i=1, #m do 65 | print(m[i]) -- get element 66 | end 67 | ``` 68 | 69 | Type 2: 70 | ```lua 71 | for k,v in ipairs(m) do 72 | print(k) 73 | print(v) 74 | end 75 | ``` 76 | 77 | Type 3: 78 | ```lua 79 | for k,v in pairs(m) do 80 | print(k) 81 | print(v) 82 | end 83 | ``` 84 | 85 | Read-only table 86 | ```lua 87 | -- read only table, will error here: 88 | m[13] = 'a' 89 | ``` 90 | -------------------------------------------------------------------------------- /csvigo-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "csvigo" 2 | version = "scm-1" 3 | 4 | source = { 5 | url = "git://github.com/clementfarabet/lua---csv", 6 | tag = "master" 7 | } 8 | 9 | description = { 10 | summary = "A CSV library, for Torch", 11 | detailed = [[ 12 | A CSV read/write library for Torch. 13 | ]], 14 | homepage = "https://github.com/clementfarabet/lua---csv", 15 | license = "BSD" 16 | } 17 | 18 | dependencies = { 19 | "torch >= 7.0", 20 | } 21 | 22 | build = { 23 | type = "command", 24 | build_command = [[ 25 | cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) 26 | ]], 27 | install_command = "cd build && $(MAKE) install" 28 | } -------------------------------------------------------------------------------- /init.c: -------------------------------------------------------------------------------- 1 | #include "TH.h" 2 | #include "luaT.h" 3 | 4 | #if LUA_VERSION_NUM == 501 5 | static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) 6 | { 7 | luaL_checkstack(L, nup+1, "too many upvalues"); 8 | for (; l->name != NULL; l++) { /* fill the table with given functions */ 9 | int i; 10 | lua_pushstring(L, l->name); 11 | for (i = 0; i < nup; i++) /* copy upvalues to the top */ 12 | lua_pushvalue(L, -(nup+1)); 13 | lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ 14 | lua_settable(L, -(nup + 3)); 15 | } 16 | lua_pop(L, nup); /* remove upvalues */ 17 | } 18 | #endif 19 | 20 | static int create_lookup(lua_State* L) 21 | { 22 | 23 | THCharStorage *input = luaT_checkudata(L, 1, "torch.CharStorage"); 24 | char* data = THCharStorage_data(input); 25 | long length = input->size; 26 | 27 | long num_lines = 0; 28 | long i; 29 | #pragma omp parallel for private(i) 30 | for (i = 0; i < length; i++) { 31 | if (data[i] == '\n') { 32 | num_lines++; 33 | } 34 | } 35 | 36 | if (data[length-1] != '\n') { 37 | num_lines++; 38 | } 39 | 40 | THLongTensor* lookup = THLongTensor_newWithSize2d(num_lines, 2); 41 | long* ldata = THLongTensor_data(lookup); 42 | 43 | long offset = 0; 44 | for (i = 0; i < length; i++) { 45 | if (data[i] == '\n' || data[i] == '\r') { 46 | *ldata++ = offset; 47 | *ldata++ = i - offset; 48 | 49 | if (data[i] == '\r') { 50 | i++; 51 | } 52 | 53 | offset = i+1; 54 | } 55 | } 56 | if (data[length-1] != '\n') { 57 | *ldata++ = offset; 58 | *ldata++ = length - offset; 59 | } 60 | 61 | luaT_pushudata(L, lookup, "torch.LongTensor"); 62 | 63 | return 1; 64 | } 65 | 66 | 67 | static const struct luaL_Reg lib[] = { 68 | {"create_lookup", create_lookup}, 69 | {NULL, NULL}, 70 | }; 71 | 72 | int luaopen_libcsvigo (lua_State *L) { 73 | lua_newtable(L); 74 | luaL_setfuncs(L, lib, 0); 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /init.lua: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------------------- 2 | -- 3 | -- Copyright (c) 2012 Roy Lowrance, Clement Farabet 4 | -- 5 | -- Permission is hereby granted, free of charge, to any person obtaining 6 | -- a copy of this software and associated documentation files (the 7 | -- "Software"), to deal in the Software without restriction, including 8 | -- without limitation the rights to use, copy, modify, merge, publish, 9 | -- distribute, sublicense, and/or sell copies of the Software, and to 10 | -- permit persons to whom the Software is furnished to do so, subject to 11 | -- the following conditions: 12 | -- 13 | -- The above copyright notice and this permission notice shall be 14 | -- included in all copies or substantial portions of the Software. 15 | -- 16 | -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | -- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | -- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | -- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | -- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | -- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | -- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -- 24 | ---------------------------------------------------------------------- 25 | -- description: 26 | -- csvigo - a little package to handle CSV files (read/write) 27 | -- 28 | -- history: 29 | -- June 24, 2012 - create a complete API to make queries - C. Farabet 30 | -- June 23, 2012 - made a pkg, and high-level functions - C. Farabet 31 | -- June 1, 2012 - csvigo.File class - R. Lowrance 32 | ---------------------------------------------------------------------- 33 | 34 | require 'torch' 35 | require 'dok' 36 | 37 | -- create global nnx table: 38 | csvigo = {} 39 | 40 | -- csvigo.File manager: 41 | torch.include('csvigo', 'File.lua') 42 | 43 | ---------------------------------------------------------------------- 44 | 45 | -- functional API: simple shortcuts to serialize data using CSV files 46 | -- this API is similar to the image.load/save, where the user doens't 47 | -- have to create a csvigo.File object, and handle it later on. 48 | 49 | -- load 50 | function csvigo.load(...) 51 | -- usage 52 | local args, path, separator, mode, header, verbose, skip = dok.unpack( 53 | {...}, 54 | 'csvigo.load', 55 | 'Load a CSV file, according to the specified mode:\n' 56 | .. ' - raw : no clean up, return a raw list of lists, a 1-to-1 mapping to the CSV file\n' 57 | .. ' - tidy : return a clean table, where each entry is a variable that points to its values\n' 58 | .. ' - query : return the tidy table, as well as query operators\n' 59 | .. ' - large : returns a table that decodes rows on the fly, on indexing ', 60 | {arg='path', type='string', help='path to file', req=true}, 61 | {arg='separator', type='string', help='separator (one character)', default=','}, 62 | {arg='mode', type='string', help='load mode: raw | tidy | query', default='tidy'}, 63 | {arg='header', type='string', help='file has a header (variable names): true | false | auto', default='auto'}, 64 | {arg='verbose', type='boolean', help='verbose load', default=true}, 65 | {arg='skip', type='number', help='skip this many lines at start of file', default=0}, 66 | {arg='column_order', type='boolean', help='return csv\'s column order in tidy mode', default=false} 67 | ) 68 | 69 | local function checkheader(header, firstline) 70 | if type(header) == 'boolean' then 71 | return header 72 | end 73 | 74 | if type(header) == 'string' then 75 | if header == 'auto' then 76 | return (tonumber(firstline[1]) == nil) 77 | end 78 | else 79 | -- convert to boolean 80 | return not not header 81 | end 82 | end 83 | 84 | -- check path 85 | path = path:gsub('^~',os.getenv('HOME')) 86 | 87 | -- verbose print 88 | local function vprint(...) if verbose then print('',...) end end 89 | 90 | -- load CSV 91 | vprint('parsing file: ' .. path) 92 | local f = csvigo.File(path, 'r', separator) 93 | local loaded = f:readall(mode) 94 | f:close() 95 | 96 | -- do work depending on mode 97 | if mode == 'raw' or mode == 'large' then 98 | -- simple, dont do anything 99 | vprint('parsing done') 100 | return loaded 101 | 102 | elseif mode == 'tidy' or mode == 'query' then 103 | -- tidy up results: 104 | vprint('tidying up entries') 105 | local tidy = {} 106 | local i2key = {} 107 | -- header? 108 | local start = 1 + skip 109 | if checkheader(header, loaded[start]) then 110 | -- use header names 111 | i2key = loaded[start] 112 | start = start + 1 113 | else 114 | -- generate names 115 | for i = 1,#loaded[start] do 116 | i2key[i] = 'var_'..i 117 | end 118 | end 119 | for i,key in ipairs(i2key) do 120 | tidy[key] = {} 121 | end 122 | -- parse all 123 | for i = start,#loaded do 124 | local entry = loaded[i] 125 | for i,val in ipairs(entry) do 126 | table.insert(tidy[i2key[i]], val) 127 | end 128 | end 129 | -- return tidy table 130 | if mode == 'tidy' then 131 | vprint('returning tidy table') 132 | 133 | if args.column_order then 134 | return i2key,tidy 135 | else 136 | return tidy 137 | end 138 | end 139 | 140 | -- query mode: build reverse index 141 | vprint('generating reversed index for fast queries') 142 | local revidx = {} 143 | for var,vals in pairs(tidy) do 144 | revidx[var] = {} 145 | for i,val in ipairs(vals) do 146 | revidx[var][val] = revidx[var][val] or {} 147 | table.insert(revidx[var][val], i) 148 | end 149 | end 150 | 151 | -- create a function/closure that can be used to query 152 | -- the table 153 | local function query(...) 154 | -- usage 155 | local args, query, varvals = dok.unpack( 156 | {...}, 157 | 'query', 158 | 'This closure was automatically generated to query your data.\n' 159 | .. 'Example of query: query(\'union\', {var1={1}, var2={2,3,4}})\n' 160 | .. 'this query will return a subset of the original data, where var1 = 1 OR var2 = 2 or 3 or 4 \n' 161 | .. '\n' 162 | .. 'Other example of query: query(\'inter\', {var1={1}, var2={2,3,4}})\n' 163 | .. 'this query will return a subset of the original data, where var1 = 1 AND var2 = 2 or 3 or 4 \n' 164 | .. '\n' 165 | .. 'Other example of query: query(\'vars\')\n' 166 | .. 'this will return a list of the variable names\n' 167 | .. '\n' 168 | .. 'Other example of query: query() or query(\'all\')\n' 169 | .. 'this query will return the complete dataset' 170 | , 171 | {arg='query', type='string', help='query: all | help | vars | inter | union', default='all'}, 172 | {arg='vars', type='table', help='list of vars/vals'} 173 | ) 174 | if query == 'help' then 175 | -- help 176 | print(args.usage) 177 | return 178 | 179 | elseif query == 'vars' then 180 | -- return vars 181 | local vars = {} 182 | for k in pairs(tidy) do 183 | table.insert(vars,k) 184 | end 185 | return vars 186 | 187 | elseif query == 'all' then 188 | -- query all: return the whole thing 189 | return tidy 190 | 191 | else 192 | -- query has this form: 193 | -- { var1 = {'value1', 'value2'}, var2 = {'value1'} } 194 | -- OR 195 | -- { var1 = 'value1', var2 = 'value2'} 196 | -- convert second form into first one: 197 | for var,vals in pairs(varvals) do 198 | if type(vals) ~= 'table' then 199 | varvals[var] = {vals} 200 | end 201 | end 202 | -- find all indices that are ok 203 | local indices = {} 204 | if query == 'union' then 205 | for var,vals in pairs(varvals) do 206 | for _,val in ipairs(vals) do 207 | local found = revidx[var][tostring(val)] 208 | if found ~= nil then 209 | for _,idx in ipairs(found) do 210 | table.insert(indices, idx) 211 | end 212 | end 213 | end 214 | end 215 | else -- 'inter' 216 | local revindices = {} 217 | local nvars = 0 218 | for var,vals in pairs(varvals) do 219 | for _,val in ipairs(vals) do 220 | local found = revidx[var][tostring(val)] 221 | for _,idx in ipairs(found) do 222 | revindices[idx] = (revindices[idx] or 0) + 1 223 | end 224 | end 225 | nvars = nvars + 1 226 | end 227 | for var,vals in pairs(varvals) do 228 | for _,val in ipairs(vals) do 229 | local found = revidx[var][tostring(val)] 230 | for _,idx in ipairs(found) do 231 | if revindices[idx] == nvars then 232 | table.insert(indices, idx) 233 | end 234 | end 235 | end 236 | end 237 | end 238 | table.sort(indices, function(a,b) return a',...) end end 291 | 292 | -- save CSV 293 | vprint('writing to file: ' .. path) 294 | local f = csvigo.File(path,'w',separator, args.nan_as_missing) 295 | 296 | -- autodetect mode? 297 | if mode == 'autodetect' then 298 | if type(data) == 'function' then 299 | mode = 'query' 300 | elseif type(data) == 'table' then 301 | if #data == 0 then 302 | mode = 'tidy' 303 | else 304 | mode = 'raw' 305 | end 306 | else 307 | error('cannot autodetect mode, incorrect data type') 308 | end 309 | end 310 | 311 | -- do work depending on mode 312 | if mode == 'raw' then 313 | -- simple, just write table 314 | f:writeall(data) 315 | vprint('writing done') 316 | 317 | elseif mode == 'tidy' or mode == 'query' then 318 | -- query mode? 319 | if mode == 'query' then 320 | -- query all data: 321 | vprint('generating tidy table') 322 | data = data('all') 323 | end 324 | -- 'data' is a tidy table, export to raw mode 325 | vprint('exporting tidy table to raw CSV') 326 | local raw = {} 327 | -- use headers? 328 | local headers 329 | if header then 330 | headers = {} 331 | 332 | if args.column_order then 333 | for _,var in pairs(args.column_order) do 334 | table.insert(headers, var) 335 | end 336 | else 337 | for var in pairs(data) do 338 | table.insert(headers, var) 339 | end 340 | end 341 | end 342 | -- export data 343 | if args.column_order then 344 | for var,vals in pairs(args.column_order) do 345 | for i,val in ipairs(data[vals]) do 346 | raw[i] = raw[i] or {} 347 | table.insert(raw[i], val) 348 | end 349 | end 350 | else 351 | for var,vals in pairs(data) do 352 | for i,val in ipairs(vals) do 353 | raw[i] = raw[i] or {} 354 | table.insert(raw[i], val) 355 | end 356 | end 357 | end 358 | -- write raw data 359 | if headers then f:write(headers) end 360 | f:writeall(raw) 361 | vprint('writing done') 362 | 363 | else 364 | print(args.usage) 365 | error('unknown mode') 366 | end 367 | 368 | -- done 369 | f:close() 370 | end 371 | 372 | return csvigo 373 | -------------------------------------------------------------------------------- /test.lua: -------------------------------------------------------------------------------- 1 | 2 | -- test csv.File class 3 | 4 | csv = require 'csvigo' 5 | 6 | tempfilename = "csv-test-delete-me.csv" 7 | 8 | function testerror(a, b, msg) 9 | print("a = ", a) 10 | print("b = ", b) 11 | error(msg) 12 | end 13 | 14 | -- test two arrays 15 | function testequalarray(a, b) 16 | if #a ~= #b then 17 | testerror(a, b, 18 | string.format("#a == %d ~= %d == #b", #a, #b)) 19 | end 20 | for i = 1, #a do 21 | if a[i] ~= b[i] then 22 | testerror(a, b, string.format("for i=%d, %q not equal %q", 23 | i, a[i], b[i])) 24 | end 25 | end 26 | end 27 | 28 | -- test two values 29 | function testvalue(a, b) 30 | local res = a == b 31 | if res then return end 32 | testerror(a, b, string.format("%q not equal %q", a, b)) 33 | end 34 | 35 | function writeCrlf(file) 36 | file:write('one,two,three\r\n1,2,3\r\n11,12,13') 37 | end 38 | 39 | -- test writing file 40 | function writeRecs(csvf) 41 | csvf:write({"a","b","c"}) 42 | csvf:write({01, 02, 03}) 43 | csvf:write({11, 12, 13}) 44 | end 45 | 46 | csvf = csv.File(tempfilename, "w") 47 | writeRecs(csvf) 48 | csvf:close() 49 | 50 | 51 | -- test reading same file line by line 52 | function readRecs(csv) 53 | row = csvf:read() 54 | testequalarray(row, {"a","b","c"}) 55 | datarownum = 0 56 | while true do 57 | local row = csvf:read() 58 | if not row then break end 59 | datarownum = datarownum + 1 60 | if datarownum == 1 then 61 | testequalarray(row, {"1", "2", "3"}) 62 | else 63 | testequalarray(row, {"11", "12", "13"}) 64 | end 65 | end 66 | end 67 | 68 | csvf = csv.File(tempfilename, "r") 69 | readRecs(csvf) 70 | csvf:close() 71 | 72 | -- read same file all at once 73 | csvf = csv.File(tempfilename, "r") 74 | lines = csvf:readall() 75 | csvf:close() 76 | testequalarray(lines[1], {"a","b","c"}) 77 | testequalarray(lines[2], {"1", "2", "3"}) 78 | testequalarray(lines[3], {"11", "12", "13"}) 79 | 80 | -- test using a | instead of , as a separator 81 | csvf = csv.File(tempfilename, "w", "|") 82 | writeRecs(csvf) 83 | csvf:close() 84 | 85 | -- now read the records 86 | csvf = csv.File(tempfilename, "r", "|") 87 | readRecs(csvf) 88 | csvf:close() 89 | 90 | -- write some \r\n line endings 91 | file = io.open(tempfilename, 'w') 92 | writeCrlf(file) 93 | file:close() 94 | 95 | -- read the \r\n file 96 | data = csv.load({ path = tempfilename, mode = "large"}) 97 | testequalarray(data[1], {"one","two","three"}) 98 | testequalarray(data[2], {"1","2","3"}) 99 | testequalarray(data[3], {"11","12","13"}) 100 | 101 | os.execute("rm " .. tempfilename) 102 | 103 | print("all tests passed") 104 | --------------------------------------------------------------------------------