├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile.am ├── README.md ├── autogen.sh ├── configure.ac ├── examples ├── Makefile.am ├── iris.csv └── parser.c ├── src ├── Makefile.am ├── csv_parser.c └── csv_parser.h └── tests ├── Makefile.am └── main.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | src/libcsv_parser.a 3 | config 4 | config.log 5 | aclocal.m4 6 | autom4te.cache 7 | configure 8 | config.status 9 | Makefile.in 10 | Makefile 11 | .deps 12 | .dirstamp 13 | libtool 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | compiler: 3 | - gcc 4 | before_install: 5 | - sudo apt-get update -qq 6 | - sudo apt-get install -qq libtool autoconf check 7 | script: ./autogen.sh && ./configure && make && make test 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013 Tadas Vilkeliskis 2 | 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 8 | of the Software, and to permit persons to whom the Software is furnished to do 9 | so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I config 2 | 3 | SUBDIRS = src 4 | 5 | DIST_SUBDIRS = src 6 | 7 | EXTRA_DIST = \ 8 | autogen.sh \ 9 | LICENSE 10 | 11 | test: all 12 | cd tests ; $(MAKE) check 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | csv_parser [![Build Status](https://travis-ci.org/tadasv/csv_parser.png?branch=master)](https://travis-ci.org/tadasv/csv_parser) 2 | ========== 3 | 4 | This library provides a CSV parser to be used in an event loop for processing large amounts of streaming data. 5 | The parser itself does not use any internal buffers. Whenever field value is available a user's specified callback 6 | will be invoked with field data and CSV location (row and column). 7 | 8 | See ``examples`` for more information. 9 | 10 | Building 11 | -------- 12 | 13 | Prerequisites: 14 | 15 | * gcc 16 | * libtool 17 | * autoconf 18 | * automake 19 | * check (to build tests) 20 | 21 | To build and install: 22 | 23 | $ sh autogen.sh 24 | $ ./configure 25 | $ make 26 | $ make install 27 | 28 | You can optionally build and run tests: 29 | 30 | $ make test 31 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Script to generate all required files from a git checkout 4 | 5 | command -v libtool > /dev/null 2>&1 6 | if [ $? -ne 0 ]; then 7 | echo "autogen.sh: error: could not find libtool. libtool is required to run autogen.sh." 1>&2 8 | exit 1 9 | fi 10 | 11 | case `uname` in Darwin*) glibtoolize --copy ;; 12 | esac 13 | 14 | command -v autoreconf > /dev/null 2>&1 15 | 16 | if [ $? -ne 0 ]; then 17 | echo "autogen.sh: error: could not find autoreconf. autoconf and automake are required to run autogen.sh." 1>&2 18 | exit 1 19 | fi 20 | 21 | mkdir -p ./config 22 | if [ $? -ne 0 ]; then 23 | echo "autogen.sh: error: could not create directory: ./config." 1>&2 24 | exit 1 25 | fi 26 | 27 | autoreconf --install --force --verbose -I config 28 | if [ $? -ne 0 ]; then 29 | echo "autogen.sh: error: autoreconf exited with status $?" 1>&2 30 | exit 1 31 | fi 32 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ([2.65]) 2 | AC_INIT([csv_parser], [0.1.0], [vilkeliskis.t@gmail.com]) 3 | AC_CONFIG_AUX_DIR(config) 4 | AC_CONFIG_MACRO_DIR(config) 5 | AM_INIT_AUTOMAKE(tar-ustar dist-zip foreign subdir-objects) 6 | 7 | # This lets us use PACKAGE_VERSION in Makefiles 8 | AC_SUBST(PACKAGE_VERSION) 9 | 10 | # Initialize libtool so we could build shared objects. 11 | LT_INIT 12 | 13 | # Checks for programs. 14 | AC_PROG_CC 15 | 16 | AC_CONFIG_FILES([Makefile src/Makefile tests/Makefile examples/Makefile]) 17 | AC_OUTPUT 18 | -------------------------------------------------------------------------------- /examples/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CPPFLAGS = -I$(top_srcdir)/src 2 | 3 | bin_PROGRAMS = parser 4 | 5 | parser_SOURCES = ../src/csv_parser.c \ 6 | ../src/csv_parser.h \ 7 | parser.c 8 | -------------------------------------------------------------------------------- /examples/iris.csv: -------------------------------------------------------------------------------- 1 | sepalLength,sepalWidth,petalLength,petalWidth,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /examples/parser.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | typedef struct field_ { 9 | char data[32]; 10 | int row; 11 | int col; 12 | struct field_ *next; 13 | } field_t; 14 | 15 | 16 | typedef struct csv_ { 17 | field_t *head; 18 | field_t *tail; 19 | } csv_t; 20 | 21 | 22 | field_t *new_field(const char *data, size_t length, int row, int col) 23 | { 24 | field_t *field = malloc(sizeof(field_t)); 25 | memset(field, 0, sizeof(field_t)); 26 | field->row = row; 27 | field->col = col; 28 | strncpy(field->data, data, length); 29 | return field; 30 | } 31 | 32 | 33 | int field_cb(csv_parser_t *parser, const char *data, size_t length, int row, int col) 34 | { 35 | csv_t *csv = parser->data; 36 | field_t *field; 37 | 38 | if (!csv->tail) { 39 | field = new_field(data, length, row, col); 40 | csv->tail = field; 41 | csv->head = field; 42 | } else { 43 | if (csv->tail->row == row && csv->tail->col == col) { 44 | strncat(csv->tail->data, data, length); 45 | } else { 46 | field = new_field(data, length, row, col); 47 | csv->tail->next = field; 48 | csv->tail = field; 49 | } 50 | } 51 | 52 | return 0; 53 | } 54 | 55 | 56 | int main(int argc, const char *argv[]) 57 | { 58 | csv_t csv; 59 | field_t *field; 60 | char buffer[64]; 61 | ssize_t nread; 62 | csv_parser_t parser; 63 | csv_parser_settings_t settings; 64 | 65 | csv.head = NULL; 66 | csv.tail = NULL; 67 | 68 | settings.delimiter = ','; 69 | settings.field_cb = field_cb; 70 | 71 | csv_parser_init(&parser); 72 | parser.data = &csv; 73 | 74 | int fd = open("iris.csv", O_RDONLY); 75 | if (fd == -1) { 76 | perror("open"); 77 | return -1; 78 | } 79 | 80 | while ((nread = read(fd, buffer, sizeof(buffer))) > 0) { 81 | csv_parser_execute(&parser, &settings, buffer, nread); 82 | } 83 | 84 | close(fd); 85 | 86 | field = csv.head; 87 | while (field) { 88 | printf("row: %3d, col: %3d, data: %s\n", field->row, field->col, field->data); 89 | field_t *next = field->next; 90 | free(field); 91 | field = next; 92 | } 93 | 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | lib_LTLIBRARIES = libcsv_parser.la 2 | libcsv_parser_la_SOURCES = csv_parser.c \ 3 | csv_parser.h 4 | 5 | include_HEADERS = csv_parser.h 6 | -------------------------------------------------------------------------------- /src/csv_parser.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013 Tadas Vilkeliskis 3 | * 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | * of the Software, and to permit persons to whom the Software is furnished to do 10 | * so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | #include 24 | #include 25 | #include 26 | 27 | 28 | void csv_parser_init(csv_parser_t *parser) 29 | { 30 | assert(parser); 31 | parser->state = csvps_line_start; 32 | parser->row = -1; 33 | parser->col = -1; 34 | parser->nread = 0; 35 | parser->data = NULL; 36 | } 37 | 38 | 39 | size_t csv_parser_execute(csv_parser_t *parser, 40 | const csv_parser_settings_t *settings, 41 | const char *data, 42 | size_t data_len) 43 | { 44 | assert(parser); 45 | assert(settings); 46 | assert(data); 47 | 48 | const char *cursor = data; 49 | const char *field_value = NULL; 50 | const char *data_end = data + data_len; 51 | int r; 52 | parser->nread = 0; 53 | 54 | if (data_len < 1) { 55 | return 0; 56 | } 57 | 58 | while (cursor < data_end) { 59 | char ch = *cursor; 60 | switch (parser->state) { 61 | case csvps_line_start: 62 | field_value = NULL; 63 | parser->row += 1; 64 | parser->col = -1; 65 | if (ch == '\r' || ch == '\n') { 66 | parser->state = csvps_line_end_begin; 67 | } else { 68 | parser->state = csvps_field_start; 69 | } 70 | break; 71 | case csvps_field_start: 72 | parser->col += 1; 73 | parser->state = csvps_field_value; 74 | field_value = cursor; 75 | break; 76 | case csvps_field_value: 77 | if (ch == settings->delimiter) { 78 | parser->state = csvps_field_end; 79 | } else if (ch == '\r' || ch == '\n') { 80 | parser->state = csvps_line_end_begin; 81 | } else { 82 | // If we previously been in csvps_field_value state 83 | // and field_value is not set, set it right away. 84 | // This can happen when we execute parser multiple 85 | // times in the value state. 86 | if (field_value == NULL) { 87 | field_value = cursor; 88 | } 89 | 90 | cursor++; 91 | if (cursor == data_end) { 92 | if (settings->field_cb && field_value) { 93 | r = settings->field_cb(parser, 94 | field_value, 95 | cursor - field_value, 96 | parser->row, 97 | parser->col); 98 | if (r) { 99 | parser->state = csvps_error; 100 | return parser->nread; 101 | } 102 | } 103 | } 104 | parser->nread++; 105 | } 106 | break; 107 | case csvps_field_end: 108 | // callback 109 | if (settings->field_cb && field_value) { 110 | r = settings->field_cb(parser, 111 | field_value, 112 | cursor - field_value, 113 | parser->row, 114 | parser->col); 115 | if (r) { 116 | parser->state = csvps_error; 117 | return parser->nread; 118 | } 119 | } 120 | 121 | parser->state = csvps_field_start; 122 | cursor++; 123 | parser->nread++; 124 | break; 125 | case csvps_line_end_begin: 126 | // callback 127 | if (settings->field_cb && field_value) { 128 | r = settings->field_cb(parser, 129 | field_value, 130 | cursor - field_value, 131 | parser->row, 132 | parser->col); 133 | if (r) { 134 | parser->state = csvps_error; 135 | return parser->nread; 136 | } 137 | } 138 | parser->state = csvps_line_end; 139 | break; 140 | case csvps_line_end: 141 | if (ch == '\r' || ch == '\n') { 142 | cursor++; 143 | parser->nread++; 144 | } else { 145 | parser->state = csvps_line_start; 146 | } 147 | break; 148 | case csvps_error: 149 | return parser->nread; 150 | break; 151 | default: 152 | assert(0 && "invalid parser state"); 153 | break; 154 | } 155 | } 156 | 157 | return parser->nread; 158 | } 159 | -------------------------------------------------------------------------------- /src/csv_parser.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013 Tadas Vilkeliskis 3 | * 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | * this software and associated documentation files (the "Software"), to deal in 7 | * the Software without restriction, including without limitation the rights to 8 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | * of the Software, and to permit persons to whom the Software is furnished to do 10 | * so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | #ifndef CSV_PARSER_INCLUDE_GUARD_E95A7C4D_BE4A_49D1_8607_900F36DD25B8 24 | #define CSV_PARSER_INCLUDE_GUARD_E95A7C4D_BE4A_49D1_8607_900F36DD25B8 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include 31 | #include 32 | 33 | 34 | typedef enum csv_parser_state_t { 35 | csvps_line_start = 0, 36 | csvps_field_start, 37 | csvps_field_value, 38 | csvps_field_end, 39 | csvps_line_end_begin, 40 | csvps_line_end, 41 | csvps_error, 42 | } csv_parser_state_t; 43 | 44 | 45 | typedef struct csv_parser_t { 46 | csv_parser_state_t state; 47 | /* csv row */ 48 | int row; 49 | /* csv column */ 50 | int col; 51 | uint32_t nread; 52 | /* user data */ 53 | void *data; 54 | } csv_parser_t; 55 | 56 | 57 | /** 58 | * Field callback interface. 59 | * 60 | * Field callback can be called multiple times for the same row and column. 61 | * It's up to the user to construct full field value. 62 | * 63 | * Return 0 value on success, anything else on error. 64 | */ 65 | typedef int (*csv_parser_field_calback_t)(csv_parser_t *parser, 66 | const char *data, 67 | size_t length, 68 | int row, 69 | int col); 70 | 71 | 72 | typedef struct csv_parser_settings_t { 73 | char delimiter; 74 | csv_parser_field_calback_t field_cb; 75 | } csv_parser_settings_t; 76 | 77 | 78 | void csv_parser_init(csv_parser_t *parser); 79 | 80 | /** 81 | * Executes CSV parser on the given data. Returns number 82 | * of bytes read. User should always check if parser state 83 | * is ``csvps_error``. Parser will get into the error state 84 | * if field callback returns a non-zero value. 85 | */ 86 | size_t csv_parser_execute(csv_parser_t *parser, 87 | const csv_parser_settings_t *settings, 88 | const char *data, 89 | size_t data_len); 90 | 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | #endif /* end of include guard */ 96 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CPPFLAGS = -I$(top_srcdir)/src 2 | 3 | noinst_PROGRAMS = test 4 | 5 | test_LDFLAGS = -lcheck 6 | test_SOURCES = ../src/csv_parser.c \ 7 | ../src/csv_parser.h \ 8 | main.c 9 | 10 | TESTS = test 11 | -------------------------------------------------------------------------------- /tests/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | struct test_results { 9 | int test_case_number; 10 | int *results; 11 | }; 12 | 13 | 14 | // 5 items per row, 3 rows per test case: 15 * 6 slots 15 | static int expected_test_results[] = { 16 | // "" 17 | -1, -1, -1, -1, -1, 18 | -1, -1, -1, -1, -1, 19 | -1, -1, -1, -1, -1, 20 | // ", " 21 | 0, 1, -1, -1, -1, 22 | -1, -1, -1, -1, -1, 23 | -1, -1, -1, -1, -1, 24 | // "," 25 | 0, -1, -1, -1, -1, 26 | -1, -1, -1, -1, -1, 27 | -1, -1, -1, -1, -1, 28 | // "\n" 29 | -1, -1, -1, -1, -1, 30 | -1, -1, -1, -1, -1, 31 | -1, -1, -1, -1, -1, 32 | // "\r\n", 33 | -1, -1, -1, -1, -1, 34 | -1, -1, -1, -1, -1, 35 | -1, -1, -1, -1, -1, 36 | // "a,b,,c," 37 | 1, 2, 0, 1, -1, 38 | -1, -1, -1, -1, -1, 39 | -1, -1, -1, -1, -1, 40 | // "a,b,,c, " 41 | 1, 2, 0, 1, 1, 42 | -1, -1, -1, -1, -1, 43 | -1, -1, -1, -1, -1, 44 | // "a,b,\na,b," 45 | 1, 2, 0, -1, -1, 46 | 1, 1, -1, -1, -1, 47 | -1, -1, -1, -1, -1, 48 | // " bc " 49 | 4, -1, -1, -1, -1, 50 | -1, -1, -1, -1, -1, 51 | -1, -1, -1, -1, -1, 52 | }; 53 | 54 | 55 | static const int results_size = sizeof(expected_test_results); 56 | static const int result_slots = sizeof(expected_test_results)/sizeof(int); 57 | 58 | 59 | static const char *test_data[] = { 60 | "", 61 | ", ", 62 | ",", 63 | "\n", 64 | "\r\n", 65 | "a,bb,,c,", 66 | "a,bb,,c, ", 67 | "a,bb,\na,b,", 68 | " bc ", 69 | }; 70 | 71 | 72 | static int test_data_end_states[] = { 73 | csvps_line_start, 74 | csvps_field_value, 75 | csvps_field_start, 76 | csvps_line_end, 77 | csvps_line_end, 78 | csvps_field_start, 79 | csvps_field_value, 80 | csvps_field_start, 81 | csvps_field_value, 82 | }; 83 | 84 | static int num_test_cases = sizeof(test_data)/(sizeof(char*)); 85 | 86 | 87 | void init_test_results(int *data, int len) 88 | { 89 | int i; 90 | for (i = 0; i < len; i++) { 91 | data[i] = -1; 92 | } 93 | } 94 | 95 | 96 | void print_results(int *data, int len) 97 | { 98 | int i; 99 | for (i = 0; i < len; i++) { 100 | if (i != 0 && i % 5 == 0) { 101 | printf("\n"); 102 | } 103 | printf("%4d ", data[i]); 104 | } 105 | 106 | printf("\n"); 107 | } 108 | 109 | 110 | int field_cb(csv_parser_t *parser, const char *data, size_t length, int row, int col) 111 | { 112 | struct test_results *results = parser->data; 113 | int slot = results->test_case_number * 15 + row * 5 + col; 114 | 115 | if (results->results[slot] == -1) { 116 | results->results[slot] = length; 117 | } else { 118 | results->results[slot] += length; 119 | } 120 | return 0; 121 | } 122 | 123 | 124 | int field_error_cb(csv_parser_t *parser, const char *data, size_t length, int row, int col) 125 | { 126 | return -1; 127 | } 128 | 129 | 130 | START_TEST(test_parser_error_state) 131 | { 132 | csv_parser_t parser; 133 | csv_parser_settings_t settings; 134 | int i; 135 | int nread; 136 | const char *str = "a,v,b\n1,2"; 137 | int len = strlen(str); 138 | 139 | csv_parser_state_t states_to_test[] = { 140 | csvps_line_start, 141 | csvps_field_start, 142 | csvps_field_end, 143 | csvps_line_end_begin, 144 | csvps_line_end 145 | }; 146 | 147 | settings.delimiter = ','; 148 | settings.field_cb = field_error_cb; 149 | 150 | for (i = 0; i < 5; i++) { 151 | csv_parser_state_t state = states_to_test[i]; 152 | csv_parser_init(&parser); 153 | parser.state = state; 154 | nread = csv_parser_execute(&parser, &settings, str, len); 155 | ck_assert(parser.state == csvps_error); 156 | } 157 | } 158 | END_TEST 159 | 160 | 161 | START_TEST(test_parser_full) 162 | { 163 | csv_parser_t parser; 164 | csv_parser_settings_t settings; 165 | struct test_results results; 166 | int i; 167 | 168 | settings.delimiter = ','; 169 | settings.field_cb = field_cb; 170 | 171 | results.results = malloc(results_size); 172 | init_test_results(results.results, result_slots); 173 | for (i = 0; i < num_test_cases; i++) { 174 | const char *str = test_data[i]; 175 | int len = strlen(str); 176 | 177 | results.test_case_number = i; 178 | 179 | csv_parser_init(&parser); 180 | parser.data = &results; 181 | 182 | int nread = csv_parser_execute(&parser, &settings, str, len); 183 | ck_assert_int_eq(len, nread); 184 | ck_assert_int_eq(parser.state, test_data_end_states[i]); 185 | } 186 | 187 | printf("Test results:\n"); 188 | print_results(results.results, result_slots); 189 | ck_assert_int_eq(0, memcmp(results.results, expected_test_results, results_size)); 190 | free(results.results); 191 | 192 | } 193 | END_TEST 194 | 195 | 196 | START_TEST(test_parser_chunked) 197 | { 198 | csv_parser_t parser; 199 | csv_parser_settings_t settings; 200 | struct test_results results; 201 | int i, offset; 202 | 203 | settings.delimiter = ','; 204 | settings.field_cb = field_cb; 205 | 206 | results.results = malloc(results_size); 207 | init_test_results(results.results, result_slots); 208 | for (i = 0; i < num_test_cases; i++) { 209 | const char *str = test_data[i]; 210 | int len = strlen(str); 211 | 212 | results.test_case_number = i; 213 | 214 | csv_parser_init(&parser); 215 | parser.data = &results; 216 | 217 | // parse string one char at a time 218 | for (offset = 0; offset < len; offset++) { 219 | char c = *(str+offset); 220 | int nread = csv_parser_execute(&parser, &settings, &c, 1); 221 | ck_assert_int_eq(1, nread); 222 | } 223 | ck_assert_int_eq(parser.state, test_data_end_states[i]); 224 | } 225 | 226 | printf("Test results:\n"); 227 | print_results(results.results, result_slots); 228 | ck_assert_int_eq(0, memcmp(results.results, expected_test_results, results_size)); 229 | free(results.results); 230 | 231 | } 232 | END_TEST 233 | 234 | 235 | int main(int argc, const char *argv[]) 236 | { 237 | int number_failed; 238 | Suite *s = suite_create("csv_parser"); 239 | TCase *tc = tcase_create("parser"); 240 | tcase_add_test(tc, test_parser_full); 241 | tcase_add_test(tc, test_parser_chunked); 242 | tcase_add_test(tc, test_parser_error_state); 243 | 244 | suite_add_tcase(s, tc); 245 | SRunner *sr = srunner_create(s); 246 | 247 | printf("Expected results:\n"); 248 | print_results(expected_test_results, result_slots); 249 | 250 | srunner_run_all(sr, CK_VERBOSE); 251 | number_failed = srunner_ntests_failed(sr); 252 | srunner_free(sr); 253 | return (number_failed == 0) ? 0 : -1; 254 | } 255 | --------------------------------------------------------------------------------