├── .gitignore ├── LICENSE.txt ├── test └── runtest.sh ├── csv.h ├── Makefile ├── test.c ├── README.md └── csv.c /.gitignore: -------------------------------------------------------------------------------- 1 | .csv 2 | *~* 3 | .so 4 | .o 5 | .a 6 | .swp 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Jan Doczy 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /test/runtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # UNIX only 4 | # trivial script used to generate test CSV file for our reader 5 | gen_test() 6 | { 7 | rows=$1 8 | filename=$2 9 | 10 | if [ -e $filename ] 11 | then 12 | # do not generate again 13 | echo [SUCCESS] \"$filename\" test CSV file was already generated. 14 | return 15 | fi 16 | 17 | # generate N row CSV test file 18 | echo [INFO] Try to generate CSV file with $rows rows. 19 | for (( i=1; i<=$rows; i++ )) 20 | do 21 | echo $RANDOM,\"$RANDOM\",$RANDOM.$RANDOM,$RANDOM,\"\"\"$RANDOM\"\"\" >> $filename 22 | done 23 | echo [SUCCESS] $filename generated. 24 | } 25 | 26 | echo "+------------------------+" 27 | echo "| RUNNING TESTS |" 28 | echo "+------------------------+" 29 | 30 | # generate testfile with 5 000 000 lines 31 | let rows=5*1000000 32 | testdir=`dirname "$0"` 33 | testcsv=$testdir/test.csv 34 | gen_test $rows $testcsv 35 | 36 | # check if csv_test binary exists 37 | if [ ! -e "test" ]; then 38 | echo [ERROR] csv_test binary not found 39 | echo [ERROR] Please execute \"make\" inside of \"`readlink -f ../`\" 40 | fi 41 | 42 | # run simple test 43 | let cols=5*$rows 44 | 45 | # testrounds 46 | rounds=5 47 | for (( i=1; i<=$rounds; i++ )) 48 | do 49 | echo [INFO] test round $i ... 50 | $testdir/test "$testcsv" $rows $cols 51 | if [ ! $? -eq 0 ]; then 52 | echo [ERROR] Test failed! 53 | exit -1 54 | fi 55 | done 56 | 57 | -------------------------------------------------------------------------------- /csv.h: -------------------------------------------------------------------------------- 1 | /* (c) 2019 Jan Doczy 2 | * This code is licensed under MIT license (see LICENSE.txt for details) */ 3 | 4 | /* simple and fast CSV reader: 5 | * 1. Open CSV file by calling CsvOpen("filename.csv") 6 | * 2. Read CSV row by calling CsvReadNextRow(csv_handle) 7 | * 3. Read single CSV line column by calling CsvReadNextCol(returned_row_str, csv_handle) 8 | */ 9 | 10 | #ifndef CSV_H_INCLUDED 11 | #define CSV_H_INCLUDED 12 | 13 | #ifdef __cplusplus 14 | extern "C" { /* C++ name mangling */ 15 | #endif 16 | 17 | /* pointer to private handle structure */ 18 | typedef struct CsvHandle_ *CsvHandle; 19 | 20 | /** 21 | * openes csv file 22 | * @filename: pathname of the file 23 | * @return: csv handle 24 | * @notes: you should call CsvClose() to release resources 25 | */ 26 | CsvHandle CsvOpen(const char* filename); 27 | CsvHandle CsvOpen2(const char* filename, 28 | char delim, 29 | char quote, 30 | char escape); 31 | 32 | /** 33 | * closes csv handle, releasing all resources 34 | * @handle: csv handle 35 | */ 36 | void CsvClose(CsvHandle handle); 37 | 38 | /** 39 | * reads (first / next) line of csv file 40 | * @handle: csv handle 41 | */ 42 | char* CsvReadNextRow(CsvHandle handle); 43 | 44 | /** 45 | * get column of file 46 | * @row: csv row (you can use CsvReadNextRow() to parse next line) 47 | * @context: handle returned by CsvOpen() or CsvOpen2() 48 | */ 49 | const char* CsvReadNextCol(char* row, CsvHandle handle); 50 | 51 | #ifdef __cplusplus 52 | }; 53 | #endif 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # simle makefile used to build csv dynamic | static library 2 | 3 | ##include Config.mk 4 | ifeq ($(CC),) 5 | CC=gcc 6 | endif 7 | ifeq ($(AR),) 8 | AR=ar 9 | endif 10 | 11 | CC_FILES = csv.c 12 | SHARED_DIR = ./shared 13 | STATIC_DIR = ./static 14 | TEST_DIR = ./test 15 | SHARED_OBJ := $(CC_FILES:%.c=$(SHARED_DIR)/%.o) 16 | STATIC_OBJ := $(CC_FILES:%.c=$(STATIC_DIR)/%.o) 17 | SHARED_LIB := $(SHARED_DIR)/csv.so 18 | STATIC_LIB := $(STATIC_DIR)/csv.a 19 | TEST_BIN := $(TEST_DIR)/test 20 | CFLAGS= -O3 -Wall -ansi -pedantic -g 21 | DEFINES = -D_FILE_OFFSET_BITS=64 22 | 23 | # make both, shared and static + test 24 | all: make_outdir $(SHARED_LIB) $(STATIC_LIB) $(TEST_BIN) runtest 25 | shared: make_outdir $(SHARED_LIB) 26 | static: make_outdir $(STATIC_LIB) 27 | 28 | make_outdir: 29 | $(shell mkdir -p $(SHARED_DIR) $(STATIC_DIR)) 30 | 31 | # shared library target 32 | $(SHARED_LIB): CFLAGS += -fPIC 33 | $(SHARED_LIB): $(SHARED_OBJ) 34 | $(CC) $^ -shared -o $@ 35 | 36 | $(STATIC_LIB): $(STATIC_OBJ) 37 | $(AR) rcs $@ $^ 38 | 39 | # compile test binary 40 | $(TEST_BIN): CFLAGS=-O3 -Wall -pedantic 41 | $(TEST_BIN): test.c 42 | $(CC) $(CFLAGS) $^ $(STATIC_LIB) -lrt -o $@ 43 | 44 | # all shared objs pass 45 | $(SHARED_DIR)/%.o: %.c 46 | $(CC) $^ $(CFLAGS) -c -o $@ 47 | 48 | # all static 49 | $(STATIC_DIR)/%.o: %.c 50 | $(CC) $^ $(CFLAGS) -c -o $@ 51 | 52 | # runtests 53 | runtest: 54 | ./test/runtest.sh 55 | 56 | 57 | # try clean both static and dynamic 58 | clean: 59 | rm -fR $(SHARED_DIR) 60 | rm -fR $(STATIC_DIR) 61 | rm -f $(TEST_BIN) $(TEST_DIR)/*.csv 62 | 63 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "csv.h" 7 | 8 | /* trivial test case used to check 9 | * CSV reader. 10 | */ 11 | int main(int argc, char** argv) 12 | { 13 | unsigned rowcount = 0; 14 | unsigned colcount = 0; 15 | char* row = NULL; 16 | unsigned expRows = 0; 17 | unsigned expCols = 0; 18 | struct timespec t1, t2; 19 | long diff = 0; 20 | 21 | /* get exp rows / cols */ 22 | if (argc < 1 + 3) 23 | { 24 | puts("Please provide expectations"); 25 | exit(-EINVAL); 26 | } 27 | 28 | /* parse */ 29 | const char* csvname = argv[1]; 30 | expRows = (unsigned)atoi(argv[2]); 31 | expCols = (unsigned)atoi(argv[3]); 32 | 33 | CsvHandle handle = CsvOpen(csvname); 34 | if (!handle) 35 | { 36 | puts("can not open test.csv file"); 37 | return -EINVAL; 38 | } 39 | 40 | /* measure */ 41 | clock_gettime(CLOCK_REALTIME, &t1); 42 | while ((row = CsvReadNextRow(handle))) 43 | { 44 | const char* col = NULL; 45 | rowcount++; 46 | while ((col = CsvReadNextCol(row, handle))) 47 | colcount++; 48 | } 49 | 50 | clock_gettime(CLOCK_REALTIME, &t2); 51 | CsvClose(handle); 52 | 53 | /* analyze K={t2-t1} */ 54 | diff = (t2.tv_nsec - t1.tv_nsec) / 1000 / 1000; 55 | diff += (t2.tv_sec - t1.tv_sec) * 1000; 56 | 57 | /* print measurement in (ms) */ 58 | printf("time in milliseconds: %li (ms)\n", diff); 59 | 60 | /* expectations */ 61 | printf("rowcount: %u/%u, colcount: %u/%u\n", 62 | rowcount, expRows, colcount, expCols); 63 | 64 | assert(expRows == rowcount); 65 | assert(expCols == colcount); 66 | 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple and fast CSV reader written in C 2 | 3 | ## Description 4 | Simple and fast library for fast reading of large CSV files using memory-mapped files. 5 | Purpose of this project was to create fast CSV (comma separated values) reader implementation in C with very simple interface using memory-mapped files. 6 | 7 | ## Features 8 | * Simple C interface 9 | * Very large CSV file support - GBs, TBs 10 | * Using memory mapped files 11 | * Supports UNIX and Windows platforms 12 | * UTF-8 support 13 | * Supports both Windows CRLF "\r\n" and Unix LF "\n" sequences 14 | * Supports newlines "\n" in CSV columns 15 | * Spaces are preserved (e.g "one, two" -> {"one", " two"}) 16 | 17 | ## How to compile 18 | You can add ```csv.c``` file to your project or you can use Makefile provided. 19 | To compile csv library on Linux with GNU Make: 20 | 21 | * run ```make all``` from project root to compile all targets and test application 22 | 23 | ## How to use (trivial example) 24 | Error handing ommited for brevity 25 | 26 | ```C++ 27 | char* row; 28 | int cols = 0; 29 | CsvHandle handle = CsvOpen("csvfile.csv"); 30 | 31 | while (row = CsvReadNextRow(handle)) 32 | { 33 | /* row = CSV row string */ 34 | const char* col; 35 | while (col = CsvReadNextCol(row, handle)) 36 | cols++; /* col = CSV col string */ 37 | } 38 | 39 | printf("Number of cols %i", cols); 40 | ``` 41 | 42 | ## Public API functions 43 | 44 | If you want to read classic CSV files, you can follow this pipeline: 45 | 1. ```CsvOpen()``` to open CSV file 46 | 2. ```CsvReadNextRow()``` to read single CSV line 47 | 3. ```CsvReadNextCol()``` to read single CSV column 48 | 4. ```CsvClose()``` to close opened CSV handle 49 | 50 | ### ```CsvOpen(const char* filepath)``` 51 | Opens a CSV file. 52 | #### Paramters: 53 | * filepath, (```const char*```): path to a CSV file 54 | #### Return value: 55 | ```CsvHandle```: handle to a CSV file on success, NULL otherwise 56 | 57 | ### ```CsvOpen2(const char* filepath, char delim, char quote, char escape)``` 58 | Opens a CSV file. You can specify custom CSV delimeter, quote and escape char. 59 | #### Parameters: 60 | * filepath, (```const char*```): path to a CSV file 61 | * delim (```char```): custom CSV delimeter ASCII character (default ',') 62 | * quote (```char```): custom CSV quote ASCII character (default '"') 63 | * escape (```char```): custom CSV escape ASCII character (default '\\') 64 | #### Return value: 65 | ```CsvHandle```: handle to a CSV file on success, NULL otherwise 66 | 67 | ### ```CsvClose(CsvHandle handle)``` 68 | Releases all resources allocated. 69 | #### Parameters: 70 | * handle (```CsvHandle```): handle opened by CsvOpen() or CsvOpen2() 71 | 72 | ### ```CsvReadNextRow(CsvHandle handle)``` 73 | Returns pointer to new line (UTF-8 zero terminated string) or NULL. 74 | #### Parameters: 75 | * handle (```CsvHandle```): handle opened by CsvOpen() or CsvOpen2() 76 | #### Return value: 77 | ```char*```: zero terminated string on success, NULL on EOF or error. 78 | 79 | ### ```CsvReadNextCol(CsvHandle handle, char* row)``` 80 | Returns pointer to column (UTF-8 zero terminated string) or NULL 81 | #### Parameters: 82 | * handle (```CsvHandle```): handle opened by CsvOpen() or CsvOpen2() 83 | #### Return value 84 | ```const char*```: zero terminated string on success, NULL on EOL or error. 85 | 86 | ## License 87 | MIT (see LICENSE.txt) 88 | -------------------------------------------------------------------------------- /csv.c: -------------------------------------------------------------------------------- 1 | /* (c) 2019 Jan Doczy 2 | * This code is licensed under MIT license (see LICENSE.txt for details) */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "csv.h" 9 | 10 | /* Windows specific */ 11 | #ifdef _WIN32 12 | #include 13 | typedef unsigned long long file_off_t; 14 | #else 15 | #include 16 | typedef off_t file_off_t; 17 | #endif 18 | 19 | /* max allowed buffer */ 20 | #define BUFFER_WIDTH_APROX (40 * 1024 * 1024) 21 | 22 | #if defined (__aarch64__) || defined (__amd64__) || defined (_M_AMD64) 23 | /* unpack csv newline search */ 24 | #define CSV_UNPACK_64_SEARCH 25 | #endif 26 | 27 | /* private csv handle: 28 | * @mem: pointer to memory 29 | * @pos: position in buffer 30 | * @size: size of memory chunk 31 | * @context: context used when processing cols 32 | * @blockSize: size of mapped block 33 | * @fileSize: size of opened file 34 | * @mapSize: ... 35 | * @auxbuf: auxiliary buffer 36 | * @auxbufSize: size of aux buffer 37 | * @auxbufPos: position of aux buffer reader 38 | * @quotes: number of pending quotes parsed 39 | * @fh: file handle - descriptor 40 | * @delim: delimeter - ',' 41 | * @quote: quote '"' 42 | * @escape: escape char 43 | */ 44 | struct CsvHandle_ 45 | { 46 | void* mem; 47 | size_t pos; 48 | size_t size; 49 | char* context; 50 | size_t blockSize; 51 | file_off_t fileSize; 52 | file_off_t mapSize; 53 | size_t auxbufSize; 54 | size_t auxbufPos; 55 | size_t quotes; 56 | void* auxbuf; 57 | 58 | #if defined ( __unix__ ) 59 | int fh; 60 | #elif defined ( _WIN32 ) 61 | HANDLE fh; 62 | HANDLE fm; 63 | #else 64 | #error Wrong platform definition 65 | #endif 66 | 67 | char delim; 68 | char quote; 69 | char escape; 70 | }; 71 | 72 | CsvHandle CsvOpen(const char* filename) 73 | { 74 | /* defaults */ 75 | return CsvOpen2(filename, ',', '"', '\\'); 76 | } 77 | 78 | /* trivial macro used to get page-aligned buffer size */ 79 | #define GET_PAGE_ALIGNED( orig, page ) \ 80 | (((orig) + ((page) - 1)) & ~((page) - 1)) 81 | 82 | /* thin platform dependent layer so we can use file mapping 83 | * with winapi and oses following posix specs. 84 | */ 85 | #ifdef __unix__ 86 | #include 87 | #include 88 | #include 89 | #include 90 | #include 91 | 92 | CsvHandle CsvOpen2(const char* filename, 93 | char delim, 94 | char quote, 95 | char escape) 96 | { 97 | /* alloc zero-initialized mem */ 98 | long pageSize; 99 | struct stat fs; 100 | 101 | CsvHandle handle = calloc(1, sizeof(struct CsvHandle_)); 102 | if (!handle) 103 | goto fail; 104 | 105 | /* set chars */ 106 | handle->delim = delim; 107 | handle->quote = quote; 108 | handle->escape = escape; 109 | 110 | /* page size */ 111 | pageSize = sysconf(_SC_PAGESIZE); 112 | if (pageSize < 0) 113 | goto fail; 114 | 115 | /* align to system page size */ 116 | handle->blockSize = GET_PAGE_ALIGNED(BUFFER_WIDTH_APROX, pageSize); 117 | 118 | /* open new fd */ 119 | handle->fh = open(filename, O_RDONLY); 120 | if (handle->fh < 0) 121 | goto fail; 122 | 123 | /* get real file size */ 124 | if (fstat(handle->fh, &fs)) 125 | { 126 | close(handle->fh); 127 | goto fail; 128 | } 129 | 130 | handle->fileSize = fs.st_size; 131 | return handle; 132 | 133 | fail: 134 | free(handle); 135 | return NULL; 136 | } 137 | 138 | static void* MapMem(CsvHandle handle) 139 | { 140 | handle->mem = mmap(0, handle->blockSize, 141 | PROT_READ | PROT_WRITE, 142 | MAP_PRIVATE, 143 | handle->fh, handle->mapSize); 144 | return handle->mem; 145 | } 146 | 147 | static void UnmapMem(CsvHandle handle) 148 | { 149 | if (handle->mem) 150 | munmap(handle->mem, handle->blockSize); 151 | } 152 | 153 | void CsvClose(CsvHandle handle) 154 | { 155 | if (!handle) 156 | return; 157 | 158 | UnmapMem(handle); 159 | 160 | close(handle->fh); 161 | free(handle->auxbuf); 162 | free(handle); 163 | } 164 | 165 | #else 166 | 167 | /* extra Windows specific implementations 168 | */ 169 | CsvHandle CsvOpen2(const char* filename, 170 | char delim, 171 | char quote, 172 | char escape) 173 | { 174 | LARGE_INTEGER fsize; 175 | SYSTEM_INFO info; 176 | size_t pageSize = 0; 177 | CsvHandle handle = calloc(1, sizeof(struct CsvHandle_)); 178 | if (!handle) 179 | return NULL; 180 | 181 | handle->delim = delim; 182 | handle->quote = quote; 183 | handle->escape = escape; 184 | 185 | GetSystemInfo(&info); 186 | handle->blockSize = GET_PAGE_ALIGNED(BUFFER_WIDTH_APROX, info.dwPageSize); 187 | handle->fh = CreateFile(filename, 188 | GENERIC_READ, 189 | FILE_SHARE_READ, 190 | NULL, 191 | OPEN_EXISTING, 192 | FILE_ATTRIBUTE_NORMAL, 193 | NULL); 194 | 195 | if (handle->fh == INVALID_HANDLE_VALUE) 196 | goto fail; 197 | 198 | if (GetFileSizeEx(handle->fh, &fsize) == FALSE) 199 | goto fail; 200 | 201 | handle->fileSize = fsize.QuadPart; 202 | if (!handle->fileSize) 203 | goto fail; 204 | 205 | handle->fm = CreateFileMapping(handle->fh, NULL, PAGE_WRITECOPY, 0, 0, NULL); 206 | if (handle->fm == NULL) 207 | goto fail; 208 | 209 | return handle; 210 | 211 | fail: 212 | if (handle->fh != INVALID_HANDLE_VALUE) 213 | CloseHandle(handle->fh); 214 | 215 | free(handle); 216 | return NULL; 217 | } 218 | 219 | static void* MapMem(CsvHandle handle) 220 | { 221 | size_t size = handle->blockSize; 222 | if (handle->mapSize + size > handle->fileSize) 223 | size = 0; /* last chunk, extend to file mapping max */ 224 | 225 | handle->mem = MapViewOfFileEx(handle->fm, 226 | FILE_MAP_COPY, 227 | (DWORD)(handle->mapSize >> 32), 228 | (DWORD)(handle->mapSize & 0xFFFFFFFF), 229 | size, 230 | NULL); 231 | return handle->mem; 232 | } 233 | 234 | static void UnmapMem(CsvHandle handle) 235 | { 236 | if (handle->mem) 237 | UnmapViewOfFileEx(handle->mem, 0); 238 | } 239 | 240 | void CsvClose(CsvHandle handle) 241 | { 242 | if (!handle) 243 | return; 244 | 245 | UnmapMem(handle); 246 | 247 | CloseHandle(handle->fm); 248 | CloseHandle(handle->fh); 249 | free(handle->auxbuf); 250 | free(handle); 251 | } 252 | 253 | #endif 254 | 255 | static int CsvEnsureMapped(CsvHandle handle) 256 | { 257 | file_off_t newSize; 258 | 259 | /* do not need to map */ 260 | if (handle->pos < handle->size) 261 | return 0; 262 | 263 | UnmapMem(handle); 264 | 265 | handle->mem = NULL; 266 | if (handle->mapSize >= handle->fileSize) 267 | return -EINVAL; 268 | 269 | newSize = handle->mapSize + handle->blockSize; 270 | if (MapMem(handle)) 271 | { 272 | handle->pos = 0; 273 | handle->mapSize = newSize; 274 | 275 | /* read only up to filesize: 276 | * 1. mapped block size is < then filesize: (use blocksize) 277 | * 2. mapped block size is > then filesize: (use remaining filesize) */ 278 | handle->size = handle->blockSize; 279 | if (handle->mapSize > handle->fileSize) 280 | handle->size = (size_t)(handle->fileSize % handle->blockSize); 281 | 282 | return 0; 283 | } 284 | 285 | return -ENOMEM; 286 | } 287 | 288 | static char* CsvChunkToAuxBuf(CsvHandle handle, char* p, size_t size) 289 | { 290 | size_t newSize = handle->auxbufPos + size + 1; 291 | if (handle->auxbufSize < newSize) 292 | { 293 | void* mem = realloc(handle->auxbuf, newSize); 294 | if (!mem) 295 | return NULL; 296 | 297 | handle->auxbuf = mem; 298 | handle->auxbufSize = newSize; 299 | } 300 | 301 | memcpy((char*)handle->auxbuf + handle->auxbufPos, p, size); 302 | handle->auxbufPos += size; 303 | 304 | *(char*)((char*)handle->auxbuf + handle->auxbufPos) = '\0'; 305 | return handle->auxbuf; 306 | } 307 | 308 | static void CsvTerminateLine(char* p, size_t size) 309 | { 310 | /* we do support standard POSIX LF sequence 311 | * and Windows CR LF sequence. 312 | * old non POSIX Mac OS CR is not supported. 313 | */ 314 | char* res = p; 315 | if (size >= 2 && p[-1] == '\r') 316 | --res; 317 | 318 | *res = 0; 319 | } 320 | 321 | #define CSV_QUOTE_BR(c, n) \ 322 | do \ 323 | if (c##n == quote) \ 324 | handle->quotes++; \ 325 | else if (c##n == '\n' && !(handle->quotes & 1)) \ 326 | return p + n; \ 327 | while (0) 328 | 329 | 330 | static char* CsvSearchLf(char* p, size_t size, CsvHandle handle) 331 | { 332 | /* TODO: this can be greatly optimized by 333 | * using modern SIMD instructions, but for now 334 | * we only fetch 8Bytes "at once" 335 | */ 336 | char* end = p + size; 337 | char quote = handle->quote; 338 | 339 | #ifdef CSV_UNPACK_64_SEARCH 340 | uint64_t* pd = (uint64_t*)p; 341 | uint64_t* pde = pd + (size / sizeof(uint64_t)); 342 | 343 | for (; pd < pde; pd++) 344 | { 345 | /* unpack 64bits to 8x8bits */ 346 | char c0, c1, c2, c3, c4, c5, c6, c7; 347 | p = (char*)pd; 348 | c0 = p[0]; 349 | c1 = p[1]; 350 | c2 = p[2]; 351 | c3 = p[3]; 352 | c4 = p[4]; 353 | c5 = p[5]; 354 | c6 = p[6]; 355 | c7 = p[7]; 356 | 357 | CSV_QUOTE_BR(c, 0); 358 | CSV_QUOTE_BR(c, 1); 359 | CSV_QUOTE_BR(c, 2); 360 | CSV_QUOTE_BR(c, 3); 361 | CSV_QUOTE_BR(c, 4); 362 | CSV_QUOTE_BR(c, 5); 363 | CSV_QUOTE_BR(c, 6); 364 | CSV_QUOTE_BR(c, 7); 365 | } 366 | p = (char*)pde; 367 | #endif 368 | 369 | for (; p < end; p++) 370 | { 371 | char c0 = *p; 372 | CSV_QUOTE_BR(c, 0); 373 | } 374 | 375 | return NULL; 376 | } 377 | 378 | char* CsvReadNextRow(CsvHandle handle) 379 | { 380 | size_t size; 381 | char* p = NULL; 382 | char* found = NULL; 383 | 384 | do 385 | { 386 | int err = CsvEnsureMapped(handle); 387 | handle->context = NULL; 388 | 389 | if (err == -EINVAL) 390 | { 391 | /* if this is n-th iteration 392 | * return auxbuf (remaining bytes of the file) */ 393 | if (p == NULL) 394 | break; 395 | 396 | return handle->auxbuf; 397 | } 398 | else if (err == -ENOMEM) 399 | { 400 | break; 401 | } 402 | 403 | size = handle->size - handle->pos; 404 | if (!size) 405 | break; 406 | 407 | /* search this chunk for NL */ 408 | p = (char*)handle->mem + handle->pos; 409 | found = CsvSearchLf(p, size, handle); 410 | 411 | if (found) 412 | { 413 | /* prepare position for next iteration */ 414 | size = (size_t)(found - p) + 1; 415 | handle->pos += size; 416 | handle->quotes = 0; 417 | 418 | if (handle->auxbufPos) 419 | { 420 | if (!CsvChunkToAuxBuf(handle, p, size)) 421 | break; 422 | 423 | p = handle->auxbuf; 424 | size = handle->auxbufPos; 425 | } 426 | 427 | /* reset auxbuf position */ 428 | handle->auxbufPos = 0; 429 | 430 | /* terminate line */ 431 | CsvTerminateLine(p + size - 1, size); 432 | return p; 433 | } 434 | else 435 | { 436 | /* reset on next iteration */ 437 | handle->pos = handle->size; 438 | } 439 | 440 | /* correctly process boundries, storing 441 | * remaning bytes in aux buffer */ 442 | if (!CsvChunkToAuxBuf(handle, p, size)) 443 | break; 444 | 445 | } while (!found); 446 | 447 | return NULL; 448 | } 449 | 450 | const char* CsvReadNextCol(char* row, CsvHandle handle) 451 | { 452 | /* return properly escaped CSV col 453 | * RFC: [https://tools.ietf.org/html/rfc4180] 454 | */ 455 | char* p = handle->context ? handle->context : row; 456 | char* d = p; /* destination */ 457 | char* b = p; /* begin */ 458 | int quoted = 0; /* idicates quoted string */ 459 | 460 | quoted = *p == handle->quote; 461 | if (quoted) 462 | p++; 463 | 464 | for (; *p; p++, d++) 465 | { 466 | /* double quote is present if (1) */ 467 | int dq = 0; 468 | 469 | /* skip escape */ 470 | if (*p == handle->escape && p[1]) 471 | p++; 472 | 473 | /* skip double-quote */ 474 | if (*p == handle->quote && p[1] == handle->quote) 475 | { 476 | dq = 1; 477 | p++; 478 | } 479 | 480 | /* check if we should end */ 481 | if (quoted && !dq) 482 | { 483 | if (*p == handle->quote) 484 | break; 485 | } 486 | else if (*p == handle->delim) 487 | { 488 | break; 489 | } 490 | 491 | /* copy if required */ 492 | if (d != p) 493 | *d = *p; 494 | } 495 | 496 | if (!*p) 497 | { 498 | /* nothing to do */ 499 | if (p == b) 500 | return NULL; 501 | 502 | handle->context = p; 503 | } 504 | else 505 | { 506 | /* end reached, skip */ 507 | *d = '\0'; 508 | if (quoted) 509 | { 510 | for (p++; *p; p++) 511 | if (*p == handle->delim) 512 | break; 513 | 514 | if (*p) 515 | p++; 516 | 517 | handle->context = p; 518 | } 519 | else 520 | { 521 | handle->context = p + 1; 522 | } 523 | } 524 | return b; 525 | } 526 | --------------------------------------------------------------------------------