├── .gitignore ├── CHANGES ├── COPYING ├── INSTALL ├── Makefile.am ├── README ├── README.md ├── autogen.sh ├── cleanup.sh ├── configure.ac ├── csv.xsl ├── csvprintf.1.in ├── csvprintf.h ├── main.c ├── tests ├── run.sh ├── run2.sh ├── test-bash-omit1.tst ├── test-bash-prefix1.tst ├── test-bash-prefix2.tst ├── test-bash-quote.tst ├── test-cflag-not-found.tst ├── test-cflag-xml.tst ├── test-json-skip1.tst ├── test-tab-noskip.tst ├── test1.in ├── test1.out1 ├── test1.out2 ├── test1.out3a ├── test1.out3b ├── test1.out4 ├── test1.out5a ├── test1.out5b ├── test2.in ├── test2.out1 ├── test2.out2 ├── test2.out3a ├── test2.out3b ├── test2.out4 ├── test2.out5a ├── test2.out5b ├── test3.in ├── test3.out1 ├── test3.out2 ├── test3.out3a ├── test3.out3b ├── test3.out4 ├── test3.out5a ├── test3.out5b ├── test4.in ├── test4.out1 ├── test4.out2 ├── test4.out3a ├── test4.out3b ├── test4.out4 ├── test4.out5a ├── test4.out5b ├── test5.in ├── test5.out1 ├── test5.out2 ├── test5.out3a ├── test5.out3b ├── test5.out4 ├── test5.out5a ├── test5.out5b ├── test6.in ├── test6.out1 ├── test6.out2 ├── test6.out3a ├── test6.out3b ├── test6.out4 ├── test6.out5a ├── test6.out5b ├── test7.in ├── test7.out1 ├── test7.out2 ├── test7.out3a ├── test7.out3b ├── test7.out4 ├── test7.out5a ├── test7.out5b ├── test8.in ├── test8.out1 ├── test8.out2 ├── test8.out3a ├── test8.out3b ├── test8.out4 ├── test8.out5a ├── test8.out5b ├── test9.in ├── test9.out1 ├── test9.out2 ├── test9.out3a ├── test9.out3b ├── test9.out4 ├── test9.out5a └── test9.out5b └── xml2csv.in /.gitignore: -------------------------------------------------------------------------------- 1 | aclocal.m4 2 | autom4te.cache 3 | config.h 4 | config.h.in 5 | config.log 6 | config.status 7 | configure 8 | csvprintf 9 | csvprintf.1 10 | .deps 11 | gitrev.c 12 | Makefile 13 | Makefile.in 14 | *.o 15 | scripts 16 | stamp-h1 17 | xml2csv 18 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Version Next 2 | 3 | - Fixed bug where \t separator was being skipped as whitespace 4 | - Allow duplicate column names if the "-c" flag avoids them 5 | 6 | Version 1.3.2 released January 25, 2023 7 | 8 | - Fixed bug where we could emit empty XML tag names 9 | - Fixed bug in man page examples for "-b" flag 10 | 11 | Version 1.3.1 released December 14, 2021 12 | 13 | - Added "-c" flag for explicit column names 14 | - Added "-n" flag that only reads column names 15 | - Added "-p" flag for prefixing names 16 | - Omit special variable names in Bash mode 17 | - Fixed build error on systems without 'u_char' defined 18 | 19 | Version 1.3.0 released December 9, 2021 20 | 21 | - Added "-b" flag for new Bash output mode 22 | 23 | Version 1.2.1 released November 24, 2021 24 | 25 | - Fixed bug where "-x" flag was behaving like "-X" 26 | 27 | Version 1.2.0 released November 22, 2021 28 | 29 | - Added "-j" flag for JSON text sequence document output. 30 | - Stopped escaping double quote as """ in plain XML text. 31 | 32 | Version 1.1.0 released February 25, 2021 33 | 34 | - Added support for format strings containing column names 35 | 36 | Version 1.0.4 released August 1, 2018 37 | 38 | - Fixed "unexpected character" bug when line ends with QUOTE, CR 39 | - Added "-X" flag to derive XML tag names from column headers 40 | 41 | Version 1.0.3 (r32) released January 5, 2013 42 | 43 | - Add support for converting XML back to CSV 44 | - Add `-e' flag to set input character encoding 45 | - Escape CR characters in XML output 46 | - Fixed glitches in man page 47 | 48 | Version 1.0.2 (r25) released August 25, 2012 49 | 50 | - Allow backslash escapes for `-s' and `-q' flags 51 | - Accept files that lack a terminating newline 52 | - Accept CR, LF, or CR-LF line endings 53 | 54 | Version 1.0.1 (r17) released March 9, 2012 55 | 56 | - Fix bug where `-s' flag did not function (Issue #1) 57 | - Document '%0$' specifier in man page 58 | 59 | Version 1.0 (r4) released November 30, 2010 60 | 61 | - Initial release 62 | 63 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | 2 | Simplified instructions: 3 | 4 | 1. ./configure 5 | 2. make 6 | 3. sudo make install 7 | 8 | Please see 9 | 10 | https://github.com/archiecobbs/csvprintf 11 | 12 | for more information. 13 | 14 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | # 3 | # csvprintf - Simple CSV file parser for the UNIX command line 4 | # 5 | 6 | bin_PROGRAMS= csvprintf 7 | 8 | bin_SCRIPTS= xml2csv 9 | 10 | noinst_HEADERS= csvprintf.h 11 | 12 | man_MANS= csvprintf.1 13 | 14 | pkgdata_DATA= csv.xsl 15 | 16 | docdir= $(datadir)/doc/packages/$(PACKAGE) 17 | 18 | doc_DATA= CHANGES COPYING README 19 | 20 | EXTRA_DIST= CHANGES INSTALL csvprintf.1.in xml2csv.in csv.xsl 21 | 22 | csvprintf_SOURCES= main.c \ 23 | gitrev.c 24 | 25 | DISTCLEANFILES= csvprintf.1 xml2csv 26 | 27 | SUFFIXES= .in 28 | .in: 29 | rm -f $@; $(subst) < $< >$@ 30 | 31 | .PHONY: tests 32 | tests: csvprintf 33 | @echo '************' 34 | @echo 'TEST SUITE 1' 35 | @echo '************' 36 | @cd tests && ./run.sh 37 | @echo '************' 38 | @echo 'TEST SUITE 2' 39 | @echo '************' 40 | @cd tests && ./run2.sh 41 | 42 | subst= sed \ 43 | -e 's|@PACKAGE[@]|$(PACKAGE)|g' \ 44 | -e 's|@PACKAGE_VERSION[@]|$(PACKAGE_VERSION)|g' \ 45 | -e 's|@pkgdatadir[@]|$(pkgdatadir)|g' \ 46 | -e 's|@XSLTPROC[@]|$(XSLTPROC)|g' 47 | 48 | install-data-hook: 49 | ln "$(DESTDIR)$(man1dir)"/csvprintf.1 "$(DESTDIR)$(man1dir)"/xml2csv.1 50 | 51 | uninstall-hook: 52 | rm -f "$(DESTDIR)$(man1dir)"/xml2csv.1 53 | 54 | gitrev.c: 55 | printf 'const char *const csvprintf_version = "%s";\n' "`git describe`" > gitrev.c 56 | 57 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | csvprintf is a simple UNIX command line utility for parsing CSV files. 2 | 3 | cvsprintf works just like the printf(1) command line utility. You 4 | supply a printf(1) format string on the command line and each record 5 | in the CSV file is formatted accordingly. Each format specifier in 6 | the format string contains a column accessor to specify which CSV 7 | column to use, so for example '%3$d' would format the third column 8 | as a decimal value. 9 | 10 | csvprintf can also convert CSV files into XML and JSON documents 11 | and Bash variable assignments suitable for eval(1). 12 | 13 | See INSTALL for installation instructions. 14 | 15 | See COPYING for license. 16 | 17 | See CHANGES for change history. 18 | 19 | Enjoy! 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **csvprintf** is a simple UNIX command line utility for parsing CSV files. 2 | 3 | **cvsprintf** works just like the `printf(1)` command line utility. You supply a `printf(1)` format string on the command line and each record in the CSV file is formatted accordingly. Each format specifier in the format string contains a column accessor to specify which CSV column to use, so for example `%3$d` would format the third column as a decimal value. 4 | 5 | **csvprintf** can also convert CSV files into XML, JSON, and `bash(1)` variable assignments. 6 | 7 | You can view the [ManPage](https://github.com/archiecobbs/csvprintf/wiki/ManPage) online. 8 | 9 | ### Examples 10 | 11 | Given this input file `input.csv`: 12 | 13 | ``` 14 | NAME,ADDRESS,POINTS 15 | Fred Smith,"1234 Main St. 16 | Anytown, USA 39103",123.4567 17 | "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999 18 | ``` 19 | 20 | here is the resulting output: 21 | 22 | ``` 23 | $ cat input.csv | csvprintf -i 'Name: [%1$-24.24s]\nAddress: [%2$-12.12s]\nPoints: %3$.2f\n' 24 | Name: [Fred Smith ] 25 | Address: [1234 Main St] 26 | Points: 123.46 27 | Name: [Wayne "The Great One" Gr] 28 | Address: [59 Hockey La] 29 | Points: 999999.00 30 | ``` 31 | 32 | An example of the XML output: 33 | 34 | ``` 35 | $ cat input.csv | csvprintf -iX 36 | 37 | 38 | 39 | Fred Smith 40 |
1234 Main St. 41 | Anytown, USA 39103
42 | 123.4567 43 |
44 | 45 | Wayne "The Great One" Gretsky 46 |
59 Hockey Lane
47 | 999999 48 |
49 |
50 | ``` 51 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Script to regenerate all the GNU auto* gunk. 5 | # Run this from the top directory of the source tree. 6 | # 7 | # If it looks like I don't know what I'm doing here, you're right. 8 | # 9 | 10 | set -e 11 | 12 | . ./cleanup.sh 13 | if [ "${1}" = '-C' ]; then 14 | exit 0 15 | fi 16 | mkdir -p scripts 17 | 18 | ACLOCAL="aclocal" 19 | AUTOHEADER="autoheader" 20 | AUTOMAKE="automake" 21 | AUTOCONF="autoconf" 22 | 23 | echo "running aclocal" 24 | ${ACLOCAL} ${ACLOCAL_ARGS} -I scripts 25 | 26 | echo "running autoheader" 27 | ${AUTOHEADER} 28 | 29 | echo "running automake" 30 | ${AUTOMAKE} --add-missing -c --foreign 31 | 32 | echo "running autoconf" 33 | ${AUTOCONF} -f -i 34 | 35 | if [ "${1}" = '-c' ]; then 36 | echo "running configure" 37 | ./configure 38 | fi 39 | 40 | -------------------------------------------------------------------------------- /cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Script to clean out generated GNU auto* gunk. 5 | # 6 | 7 | set -e 8 | 9 | echo "cleaning up" 10 | rm -rf autom4te*.cache scripts aclocal.m4 configure config.log config.status .deps stamp-h1 a.out.dSYM 11 | rm -f config.h.in config.h.in~ config.h 12 | rm -f scripts 13 | find . \( -name Makefile -o -name Makefile.in \) -print0 | xargs -0 rm -f 14 | rm -f gitrev.c 15 | rm -f *.o csvprintf 16 | rm -f csvprintf-?.?.?.tar.gz 17 | rm -f csvprintf.1 18 | rm -f xml2csv 19 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # 2 | # csvprintf - Simple CSV file parser for the UNIX command line 3 | # 4 | # Copyright 2010 Archie L. Cobbs 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 7 | # not use this file except in compliance with the License. You may obtain 8 | # a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | # License for the specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | 19 | AC_INIT([csvprintf - Simple CSV file parser for the UNIX command line],[1.3.2],[https://github.com/archiecobbs/csvprintf],[csvprintf]) 20 | AC_CONFIG_AUX_DIR(scripts) 21 | AM_INIT_AUTOMAKE 22 | dnl AM_MAINTAINER_MODE 23 | AC_PREREQ([2.69]) 24 | AC_REVISION($Id$) 25 | AC_PREFIX_DEFAULT(/usr) 26 | AC_PROG_MAKE_SET 27 | 28 | [CFLAGS="-g -O3 -pipe -Wall -Waggregate-return -Wcast-align -Wchar-subscripts -Wcomment -Wformat -Wimplicit -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-long-long -Wparentheses -Wpointer-arith -Wredundant-decls -Wreturn-type -Wswitch -Wtrigraphs -Wuninitialized -Wunused -Wwrite-strings -Wshadow -Wstrict-prototypes -Wcast-qual $CFLAGS"] 29 | AC_SUBST(CFLAGS) 30 | 31 | # Compile flags for Linux 32 | AC_DEFINE(_DEFAULT_SOURCE, 1, GNU functions) 33 | AC_DEFINE(_GNU_SOURCE, 1, GNU functions) 34 | AC_DEFINE(_BSD_SOURCE, 1, BSD functions) 35 | AC_DEFINE(_XOPEN_SOURCE, 500, XOpen functions) 36 | 37 | # Compile flags for Mac OS 38 | AC_DEFINE(_DARWIN_C_SOURCE, 1, MacOS functions) 39 | 40 | # Check for required programs 41 | AC_PROG_INSTALL 42 | AC_PROG_CC 43 | AC_PATH_PROG([PRINTF], [printf]) 44 | if test -z "${PRINTF}"; then 45 | AC_MSG_ERROR([printf not found]); 46 | fi 47 | AC_PATH_PROG([XSLTPROC], [xsltproc]) 48 | if test -z "${XSLTPROC}"; then 49 | AC_MSG_ERROR([xsltproc not found]); 50 | fi 51 | 52 | # Add PRINTF def 53 | [CFLAGS="$CFLAGS -DPRINTF_PROGRAM=\\\""${PRINTF}"\\\""] 54 | 55 | # Check for required libc functions 56 | AC_SEARCH_LIBS([iconv_open], [iconv],, 57 | [if test `uname -o` = 'Cygwin' -a -f /usr/lib/libiconv.a; then LIBS="-liconv ${LIBS}"; else AC_MSG_ERROR([required function iconv_open missing]); fi]) 58 | 59 | # Check for required header files 60 | AC_CHECK_HEADERS(sys/wait.h assert.h ctype.h err.h errno.h stddef.h stdint.h stdio.h stdlib.h string.h unistd.h, [], 61 | [AC_MSG_ERROR([required header file '$ac_header' missing])]) 62 | 63 | # Optional features 64 | AC_ARG_ENABLE(assertions, 65 | AS_HELP_STRING([--enable-assertions], 66 | [enable debugging sanity checks (default NO)]), 67 | [test x"$enableval" = "xyes" || AC_DEFINE(NDEBUG, 1, [disable assertions])], 68 | [AC_DEFINE(NDEBUG, 1, [disable assertions])]) 69 | AC_ARG_ENABLE(gprof, 70 | AS_HELP_STRING([--enable-gprof], 71 | [Compile and link with gprof(1) support (default NO)]), 72 | [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -pg"]) 73 | AC_ARG_ENABLE(Werror, 74 | AS_HELP_STRING([--enable-Werror], 75 | [enable compilation with -Werror flag (default NO)]), 76 | [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -Werror"]) 77 | 78 | # Generated files 79 | AC_CONFIG_FILES(Makefile) 80 | AC_CONFIG_HEADERS(config.h) 81 | 82 | # Go 83 | AC_OUTPUT 84 | -------------------------------------------------------------------------------- /csv.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /csvprintf.1.in: -------------------------------------------------------------------------------- 1 | .\" -*- nroff -*- 2 | .\" 3 | .\" csvprintf - Simple CSV file parser for the UNIX command line 4 | .\" 5 | .\" Copyright 2010 Archie L. Cobbs 6 | .\" 7 | .\" Licensed under the Apache License, Version 2.0 (the "License"); you may 8 | .\" not use this file except in compliance with the License. You may obtain 9 | .\" a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 10 | .\" 11 | .\" Unless required by applicable law or agreed to in writing, software 12 | .\" distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 | .\" WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 | .\" License for the specific language governing permissions and limitations 15 | .\" under the License. 16 | .\" 17 | .Dd November 30, 2010 18 | .Dt CSVPRINTF 1 19 | .Os 20 | .Sh NAME 21 | .Nm csvprintf 22 | .Nd CSV file parser 23 | .Sh SYNOPSIS 24 | .Nm csvprintf 25 | .Bk -words 26 | .Op Ar options 27 | .Ar format 28 | .Ek 29 | .Pp 30 | .Nm csvprintf 31 | .Bk -words 32 | .Fl b 33 | .Op Ar options 34 | .Ek 35 | .Pp 36 | .Nm csvprintf 37 | .Bk -words 38 | .Fl j 39 | .Op Ar options 40 | .Ek 41 | .Pp 42 | .Nm csvprintf 43 | .Bk -words 44 | .Fl x 45 | .Op Ar options 46 | .Ek 47 | .Pp 48 | .Nm csvprintf 49 | .Bk -words 50 | .Fl X 51 | .Op Ar options 52 | .Ek 53 | .Pp 54 | .Nm xml2csv 55 | .Bk -words 56 | .Op Ar file.xml 57 | .Ek 58 | .Sh DESCRIPTION 59 | .Nm 60 | is a simple UNIX command line utility for parsing CSV files. 61 | .Pp 62 | In the first form, 63 | .Nm 64 | works like the 65 | .Xr printf 1 66 | command line utility: you supply a 67 | .Xr printf 1 68 | format string on the command line, and each row of the CSV file is split into arguments and formatted accordingly. 69 | .Pp 70 | The format specifiers in the format string contain numeric or symbolic column accessors to specify which CSV column to format. 71 | .Pp 72 | A numeric column accessor is a sequence of decimal digits followed by the 73 | .Pa $ 74 | character (the same accessor format supported by 75 | .Xr printf 1 ) . 76 | So for example, 77 | .Pa \(dq%3$d\(dq 78 | would format the third CSV column as a decimal value. 79 | In addition, the 80 | .Pa \(dq%0$d\(dq 81 | specifier will print the number of columns in the record. 82 | .Pp 83 | When the 84 | .Fl n 85 | flag is given, the first row is assumed to contain column names and is not output. 86 | This allows symbolic, instead of numeric, column accessors to be used. 87 | A symbolic column accessor is the column name enclosed in curly braces. 88 | .Pp 89 | For example, if the first row is 90 | .Pa FirstName,Lastname,IdNum 91 | then the format string 92 | .Pa \(dq%{IdNum}04d: %{LastName}s, %{FirstName}s\(dq 93 | would be equivalent to the format string 94 | .Pa \(dq%3$04d: %2$s, %1$s\(dq . 95 | .Pp 96 | Specifying a column name that does not appear in the first row generates an error, 97 | so the use of symbolic column accessors adds an extra consistency check. 98 | .Sh XML Mode 99 | With 100 | .Fl x , 101 | the entire file is converted into an XML document. 102 | .Pp 103 | The document element is 104 | .Ar "" . 105 | .Pp 106 | Each CSV row becomes a 107 | .Ar "" 108 | element containing its individual column values as sub-elements. 109 | .Pp 110 | The column value sub-elements are named 111 | .Ar "" , 112 | .Ar "" , 113 | etc.; 114 | with 115 | .Fl i , 116 | the sub-elements use the column names read from the first row (with illegal characters replaced by underscores). 117 | .Pp 118 | In XML mode, a character encoding must be assumed; see 119 | .Fl e . 120 | .Pp 121 | The 122 | .Nm xml2csv 123 | command can convert XML documents generated by 124 | .Nm "csvprintf -x" 125 | back into CSV. 126 | .Sh JSON Mode 127 | With 128 | .Fl j , 129 | each row is converted into a JSON document. 130 | .Pp 131 | This form is described by RFC 7464 and consists of concatenated JSON documents 132 | framed by ASCII RS and LF control characters, which is compatible with the 133 | .Xr jq 1 134 | utility's 135 | .Fl \-seq 136 | flag. 137 | .Pp 138 | Normally each row is written as a string array; 139 | with 140 | .Fl i , 141 | each row is written as an object, using column names for fields. 142 | An error occurs if two columns have the same name. 143 | .Pp 144 | In JSON mode, a character encoding must be assumed; see 145 | .Fl e . 146 | .Sh Bash Mode 147 | With 148 | .Fl b , 149 | each row is converted into 150 | .Xr bash 1 151 | variable assignment(s) which may be applied with the 152 | .Xr eval 1 153 | command. 154 | .Pp 155 | Normally the output just assigns 156 | .Ar ROW 157 | as an array of values. 158 | The resulting output can be used like this: 159 | .Bd -literal -offset indent 160 | cat input.csv | csvprintf -b | while read -r LINE; do 161 | eval "${LINE}" 162 | echo "The first column is: ${ROW[0]}" 163 | echo "The second column is: ${ROW[1]}" 164 | ... 165 | done 166 | .Ed 167 | .Pp 168 | With 169 | .Fl i , 170 | each column value is assigned to a separate variable whose name is the corresponding column name 171 | (with underscores replacing non-alphanumeric characters), and an error occurs if two variables have the same name. 172 | .Pp 173 | So an input file like this: 174 | .Bd -literal -offset indent 175 | "Last Name","First Name","Registered???" 176 | "Washington","George","Y" 177 | "Lincoln","Abe","N" 178 | .Ed 179 | .Pp 180 | can be processed like this: 181 | .Bd -literal -offset indent 182 | cat input.csv | csvprintf -bi -p ROW_ | while read -r LINE; do 183 | eval "${LINE}" 184 | echo "First name: ${ROW_First_Name}" 185 | echo "Last name: ${ROW_Last_Name}" 186 | echo "Registered: ${ROW_Registered___}" 187 | done 188 | .Ed 189 | .Sh Bash Mode Security Concerns 190 | There are two security issues to be aware of when using Bash Mode. 191 | .Pp 192 | First, the 193 | .Fl i 194 | flag opens a security hole because Bash has several special variables like 195 | .Ar PATH , 196 | .Ar TMPDIR , 197 | etc., which could be overwritten by malicious input. 198 | To prevent this, 199 | .Nm 200 | omits known Bash variables, but for tighter security use the 201 | .Fl c 202 | flag to explicitly white-list the variables you need. 203 | In addition, use of the 204 | .Fl p 205 | flag is always recommended in Bash Mode to help avoid namespace collisions. 206 | .Pp 207 | Secondly, if the Bash Mode output is piped into 208 | .Ar "while read" 209 | then the 210 | .Fl r 211 | flag must be used to prevent extraneous decoding of backslash escapes. 212 | .Sh Input Encoding 213 | In all modes, lines must be terminated by LF bytes or CR+LF byte pairs, and the separator and quote characters must be recognizable as single byte values. 214 | This parsing behavior is compatible with ASCII, ISO-8859-1, UTF-8, etc., but not multi-byte encodings such as UTF-16, which must be re-encoded (e.g., to UTF-8) first. 215 | .Pp 216 | In normal and Bash modes, column values are copied from input to output bytewise without interpretation. 217 | .Pp 218 | In XML and JSON modes, column values must be interpreted according to an assumed character encoding. 219 | This encoding defaults to ISO-8859-1 but can be changed with the 220 | .Fl e 221 | flag. 222 | .Sh OPTIONS 223 | .Bl -tag -width Ds 224 | .It Fl b 225 | Convert each CSV row into a 226 | .Xr bash 1 227 | variable assignment line. 228 | .It Fl c Ar colname 229 | Specify a column to be included when using column names in XML, JSON, or Bash output. 230 | .Pp 231 | Without this flag, all columns are included. 232 | When this flag is used one or more times, 233 | only the specified columns are included. 234 | .Pp 235 | If any 236 | .Ar colname 237 | doesn't exist, an error occurs. 238 | .It Fl e 239 | Specify input character encoding for XML or JSON mode. 240 | .Pp 241 | By default, ISO-8859-1 is assumed. 242 | .It Fl f 243 | Read CSV input from the specified file. 244 | .Pp 245 | By default (or if ``-'' is specified), 246 | .Nm 247 | reads from standard input. 248 | .It Fl i 249 | Use column names read from the first record in the output. 250 | .Pp 251 | In normal mode, or when used with the 252 | .Fl x 253 | flag, this flag is equivalent to 254 | .Fl n . 255 | .Pp 256 | In JSON mode, output objects instead of arrays and use column names for the object fields. 257 | .Pp 258 | In Bash mode, output a variable for each column instead of a single 259 | .Ar ROW 260 | array variable. 261 | .Pp 262 | It's possible for a row to have more columns than the column header row did. 263 | In that case, 264 | .Nm 265 | reverts to using 266 | .Ar col1 , 267 | .Ar col2 , 268 | etc., for any extra columns. 269 | .Pp 270 | This flag implies 271 | .Fl n . 272 | .It Fl j 273 | Convert the input into a JavaScript Object Notation (JSON) text sequence document. 274 | .It Fl n 275 | Assume the first CSV record contains column names and omit from the output. 276 | .Pp 277 | In normal mode, enable symbolic column accessors. 278 | .It Fl p 279 | Specify a common prefix (UTF-8 encoding) to use with all column names in the output. 280 | .Pp 281 | This flag is ignored unless 282 | .Fl i 283 | is specified. 284 | .Pp 285 | .It Fl q 286 | Specify an alternate CSV column quote character. 287 | The usual backslash escape sequences are accepted. 288 | .Pp 289 | The default quote character is double quote. 290 | .It Fl s 291 | Specify an alternate CSV column separator character. 292 | The usual backslash escape sequences are accepted. 293 | .Pp 294 | The default separator character is comma. 295 | .It Fl h 296 | Output usage message and exit. 297 | .It Fl v 298 | Output version information and exit. 299 | .It Fl x 300 | Convert the input into an XML document. 301 | .It Fl X 302 | Convert the input into an XML document using column names for value sub-elements. 303 | .Pp 304 | This flag implies 305 | .Fl n . 306 | .El 307 | .Sh CSV FORMAT 308 | .Nm 309 | parses according to the format described by ``The Comma Separated Value (CSV) File Format'' (see below). 310 | In particular, quote characters must be escaped with an extra quote and whitespace surrounding column values is ignored. 311 | .Sh EXIT STATUS 312 | .Nm 313 | will exit with a status 1 if invalid CSV input is detected. 314 | Otherwise, if an invocation of 315 | .Xr printf 1 316 | fails, processing stops and that exit value is returned. 317 | .Sh FILES 318 | .Bl -tag -width Ds -compact 319 | .It Pa @pkgdatadir@/csv.xsl 320 | XSL transform that converts XML back into CSV format. 321 | .El 322 | .Sh BUGS 323 | .Pp 324 | Under the hood, 325 | .Nm 326 | invokes the 327 | .Xr printf 1 328 | executable on each CSV row it parses, which makes it relatively slow. 329 | .Sh SEE ALSO 330 | .Xr printf 1 , 331 | .Xr printf 3 , 332 | .Xr jq 1 . 333 | .Rs 334 | .%T "csvprintf: Simple CSV file parser for the UNIX command line" 335 | .%O https://github.com/archiecobbs/csvprintf 336 | .Re 337 | .Rs 338 | .%T "The Comma Separated Value (CSV) File Format" 339 | .%O http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm 340 | .Re 341 | .Rs 342 | .%T "RFC 7464: JavaScript Object Notation (JSON) Text Sequences" 343 | .%O https://datatracker.ietf.org/doc/html/rfc7464 344 | .Re 345 | .Sh AUTHOR 346 | .An Archie L. Cobbs Aq archie.cobbs@gmail.com 347 | -------------------------------------------------------------------------------- /csvprintf.h: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // csvprintf - Simple CSV file parser for the UNIX command line 4 | // 5 | // Copyright 2010 Archie L. Cobbs 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may 8 | // not use this file except in compliance with the License. You may obtain 9 | // a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // Unless required by applicable law or agreed to in writing, software 14 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 16 | // License for the specific language governing permissions and limitations 17 | // under the License. 18 | // 19 | 20 | #include "config.h" 21 | 22 | extern const char *const csvprintf_version; 23 | 24 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // csvprintf - Simple CSV file parser for the UNIX command line 4 | // 5 | // Copyright 2010 Archie L. Cobbs 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may 8 | // not use this file except in compliance with the License. You may obtain 9 | // a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 | // License for the specific language governing permissions and limitations 15 | // under the License. 16 | // 17 | 18 | #include "csvprintf.h" 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #define DEFAULT_QUOTE_CHAR '"' 35 | #define DEFAULT_FSEP_CHAR ',' 36 | #define XML_OUTPUT_ENCODING "UTF-8" 37 | 38 | #define MODE_NORMAL 0 // normal mode 39 | #define MODE_XML_PLAIN 1 // plain XML mode 40 | #define MODE_XML_NAMES 2 // XML mode with names 41 | #define MODE_JSON 3 // JSON mode 42 | #define MODE_BASH 4 // bash mode 43 | 44 | struct col { 45 | char *buf; 46 | size_t len; 47 | size_t alloc; 48 | }; 49 | 50 | struct row { 51 | char **fields; 52 | size_t num; 53 | size_t alloc; 54 | }; 55 | 56 | static int quote = DEFAULT_QUOTE_CHAR; 57 | static int fsep = DEFAULT_FSEP_CHAR; 58 | 59 | static const char *bash_special_vars[] = { 60 | "BASH", "BASHOPTS", "BASHPID", "BASH_ALIASES", "BASH_ARGC", "BASH_ARGV", "BASH_CMDS", "BASH_COMMAND", 61 | "BASH_EXECUTION_STRING", "BASH_LINENO", "BASH_LOADABLES_PATH", "BASH_REMATCH", "BASH_SOURCE", "BASH_SUBSHELL", 62 | "BASH_VERSINFO", "BASH_VERSION", "COMP_CWORD", "COMP_KEY", "COMP_LINE", "COMP_POINT", "COMP_TYPE", "COMP_WORDBREAKS", 63 | "COMP_WORDS", "COPROC", "DIRSTACK", "EUID", "FUNCNAME", "GROUPS", "HISTCMD", "HOSTNAME", "HOSTTYPE", "LINENO", 64 | "MACHTYPE", "MAPFILE", "OLDPWD", "OPTARG", "OPTIND", "OSTYPE", "PIPESTATUS", "PPID", "PWD", "RANDOM", "READLINE_LINE", 65 | "READLINE_POINT", "REPLY", "SECONDS", "SHELLOPTS", "SHLVL", "UID", "BASH_COMPAT", "BASH_ENV", "BASH_XTRACEFD", "CDPATH", 66 | "CHILD_MAX", "COLUMNS", "COMPREPLY", "EMACS", "ENV", "EXECIGNORE", "FCEDIT", "FIGNORE", "FUNCNEST", "GLOBIGNORE", 67 | "HISTCONTROL", "HISTFILE", "HISTFILESIZE", "HISTIGNORE", "HISTSIZE", "HISTTIMEFORMAT", "HOME", "HOSTFILE", "IFS", 68 | "IGNOREEOF", "INPUTRC", "LANG", "LC_ALL", "LC_COLLATE", "LC_CTYPE", "LC_MESSAGES", "LC_NUMERIC", "LC_TIME", "LINES", 69 | "MAIL", "MAILCHECK", "MAILPATH", "OPTERR", "PATH", "POSIXLY_CORRECT", "PROMPT_COMMAND", "PROMPT_DIRTRIM", "PS0", "PS1", 70 | "PS2", "PS3", "PS4", "SHELL", "TIMEFORMAT", "TMOUT", "TMPDIR", "auto_resume", "histchars" 71 | }; 72 | #define NUM_BASH_SPECIAL_VARS (sizeof(bash_special_vars) / sizeof(*bash_special_vars)) 73 | 74 | static int parsechar(const char *str); 75 | static int parsefmt(char *fmt, const struct row *column_names, unsigned int **argsp); 76 | static int readcol(FILE *fp, struct row *row, int *linenum); 77 | static int readqcol(FILE *fp, struct col *col, int *linenum); 78 | static int readuqcol(FILE *fp, struct col *col, int *linenum); 79 | static int readch(FILE *fp, int collapse); 80 | static void freerow(struct row *row); 81 | static void print_xml_tag_name(const char *tag, int linenum); 82 | static void print_json_string(const char *string, int linenum); 83 | static void print_bash_name(const char *string); 84 | static void print_bash_value(const char *string); 85 | static char bash_name_safe(char ch, int first); 86 | static int decode_utf8(const char *const obuf, size_t olen, int *lenp, int linenum); 87 | static void convert_to_utf8(iconv_t icd, struct row *row, int linenum); 88 | static const char *escape_xml_char(int uchar); 89 | static char *eatwidthprec(const char *fspec, const char *desc, const struct row *column_names, 90 | char *s, int *nargs, unsigned int *args); 91 | static char *eataccessor(const char *fspec, const char *desc, const struct row *column_names, 92 | char *s, int *nargs, unsigned int *args); 93 | static void addcolumn(struct row *row, const struct col *col); 94 | static void addstring(struct row *row, const char *const string); 95 | static int findstring(struct row *row, const char *const string); 96 | static int findstring2(const char *const *list, size_t num, const char *const string); 97 | static void growrow(struct row *row); 98 | static void addchar(struct col *col, int ch); 99 | static void trim(struct col *col); 100 | static void usage(void); 101 | static void version(void); 102 | 103 | int 104 | main(int argc, char **argv) 105 | { 106 | const char *input = "-"; 107 | const char *encoding = "ISO-8859-1"; 108 | const char *name_prefix = ""; 109 | char *format = NULL; 110 | iconv_t icd = NULL; 111 | FILE *fp = NULL; 112 | struct row row; 113 | struct row column_names; 114 | struct row allowed_column_names; 115 | unsigned int *args = NULL; 116 | int mode = -1; 117 | int read_column_names = 0; // strip off first row containing column names 118 | int use_column_names = 0; // use column names from first row in output 119 | int first_row = 0; 120 | int nargs = 0; 121 | int file_done; 122 | int linenum; 123 | int new_mode; 124 | int ch; 125 | 126 | // Initialize 127 | memset(&row, 0, sizeof(row)); 128 | memset(&column_names, 0, sizeof(column_names)); 129 | memset(&allowed_column_names, 0, sizeof(allowed_column_names)); 130 | 131 | // Parse command line 132 | while ((ch = getopt(argc, argv, "bc:e:f:hijnp:q:s:vxX")) != -1) { 133 | switch (ch) { 134 | case 'b': 135 | if (mode != -1 && mode != MODE_BASH) 136 | errx(1, "flag \"%c\" conflicts with previous mode flag", ch); 137 | mode = MODE_BASH; 138 | break; 139 | case 'c': 140 | addstring(&allowed_column_names, optarg); 141 | break; 142 | case 'e': 143 | encoding = optarg; 144 | break; 145 | case 'f': 146 | input = optarg; 147 | break; 148 | case 'i': 149 | read_column_names = 1; 150 | use_column_names = 1; 151 | break; 152 | case 'n': 153 | read_column_names = 1; 154 | break; 155 | case 'j': 156 | if (mode != -1 && mode != MODE_JSON) 157 | errx(1, "flag \"%c\" conflicts with previous mode flag", ch); 158 | mode = MODE_JSON; 159 | break; 160 | case 'X': 161 | case 'x': 162 | new_mode = ch == 'X' ? MODE_XML_NAMES : MODE_XML_PLAIN; 163 | if (mode != -1 && mode != new_mode) 164 | errx(1, "flag \"%c\" conflicts with previous mode flag", ch); 165 | if ((mode = new_mode) == MODE_XML_NAMES) { 166 | use_column_names = 1; 167 | read_column_names = 1; 168 | } 169 | break; 170 | case 'p': 171 | name_prefix = optarg; 172 | break; 173 | case 'q': 174 | if ((quote = parsechar(optarg)) == -1) 175 | errx(1, "invalid argument to \"-%c\"", ch); 176 | break; 177 | case 's': 178 | if ((fsep = parsechar(optarg)) == -1) 179 | errx(1, "invalid argument to \"-%c\"", ch); 180 | break; 181 | case 'h': 182 | usage(); 183 | exit(0); 184 | case 'v': 185 | version(); 186 | exit(0); 187 | case '?': 188 | default: 189 | usage(); 190 | exit(1); 191 | } 192 | } 193 | if (mode == -1) 194 | mode = MODE_NORMAL; 195 | argc -= optind; 196 | argv += optind; 197 | if (argc != (mode == MODE_NORMAL ? 1 : 0)) { 198 | usage(); 199 | exit(1); 200 | } 201 | 202 | // Backward compatbitility hack 203 | if (mode == MODE_XML_PLAIN) 204 | use_column_names = 0; 205 | 206 | // Sanity check 207 | if (quote == fsep) 208 | err(1, "quote and field separators cannot be the same character"); 209 | if (allowed_column_names.num > 0 && !read_column_names) 210 | err(1, "\"-c\" flag requires \"-n\" flag"); 211 | 212 | // Get and (maybe) parse format string (normal mode only) 213 | if (mode == MODE_NORMAL) { 214 | format = argv[0]; 215 | 216 | // Parse format string - unless we need to defer 217 | if (!read_column_names) 218 | nargs = parsefmt(format, NULL, &args); 219 | } 220 | 221 | // Open input 222 | if (strcmp(input, "-") == 0) 223 | fp = stdin; 224 | else if ((fp = fopen(input, "r")) == NULL) 225 | err(1, "%s", input); 226 | 227 | // Initialize iconv 228 | switch (mode) { 229 | case MODE_XML_PLAIN: 230 | case MODE_XML_NAMES: 231 | case MODE_JSON: 232 | if ((icd = iconv_open(XML_OUTPUT_ENCODING, encoding)) == (iconv_t)-1) 233 | err(1, "%s", encoding); 234 | break; 235 | default: 236 | break; 237 | } 238 | 239 | // XML opening 240 | if (mode == MODE_XML_PLAIN || mode == MODE_XML_NAMES) { 241 | printf("\n", XML_OUTPUT_ENCODING); 242 | printf("\n"); 243 | } 244 | 245 | // Read and parse input 246 | linenum = 1; 247 | first_row = 1; 248 | for (file_done = 0; !file_done; ) { 249 | 250 | // Start parsing next row 251 | switch ((ch = readch(fp, 1))) { 252 | case EOF: 253 | file_done = 1; 254 | continue; 255 | case '\n': // ignore completely empty lines 256 | linenum++; 257 | continue; 258 | default: 259 | ungetc(ch, fp); 260 | break; 261 | } 262 | 263 | // Read columns 264 | while (readcol(fp, &row, &linenum)) 265 | ; 266 | 267 | // Gather column names from first row, if configured 268 | if (first_row && read_column_names) { 269 | int i, j; 270 | 271 | // Convert to UTF-8 if needed 272 | if (icd != NULL) 273 | convert_to_utf8(icd, &row, linenum); 274 | 275 | // Save column names 276 | memcpy(&column_names, &row, sizeof(row)); 277 | memset(&row, 0, sizeof(row)); 278 | 279 | // If we had to defer parsing format string until we had the column names, do that now 280 | if (mode == MODE_NORMAL) 281 | nargs = parsefmt(format, &column_names, &args); 282 | 283 | // Check that all explicitly specified columns are actually present 284 | for (i = 0; i < allowed_column_names.num; i++) { 285 | if (!findstring(&column_names, allowed_column_names.fields[i])) 286 | errx(1, "column \"%s\" not found", allowed_column_names.fields[i]); 287 | } 288 | 289 | // Check for illegal or duplicate column names 290 | switch (mode) { 291 | case MODE_JSON: 292 | for (i = 0; i < column_names.num - 1; i++) { 293 | if (allowed_column_names.num > 0 294 | && !findstring(&allowed_column_names, column_names.fields[i])) 295 | continue; 296 | for (j = i + 1; j < column_names.num; j++) { 297 | if (strcmp(column_names.fields[i], column_names.fields[j]) == 0) 298 | errx(1, "duplicate column name \"%s\"", column_names.fields[i]); 299 | } 300 | } 301 | break; 302 | case MODE_BASH: 303 | for (i = 0; i < column_names.num; i++) { 304 | char *namei; 305 | 306 | if (allowed_column_names.num > 0 307 | && !findstring(&allowed_column_names, column_names.fields[i])) 308 | continue; 309 | if (asprintf(&namei, "%s%s", name_prefix, column_names.fields[i]) == -1) 310 | err(1, "asprintf"); 311 | if (*namei == '\0') 312 | errx(1, "illegal empty string column name"); 313 | for (j = i + 1; j < column_names.num; j++) { 314 | char *namej; 315 | int same = 1; 316 | int k; 317 | 318 | if (asprintf(&namej, "%s%s", name_prefix, column_names.fields[j]) == -1) 319 | err(1, "asprintf"); 320 | for (k = 0; namei[k] != '\0' || namej[k] != '\0'; k++) { 321 | if (namei[k] == '\0' || namej[k] == '\0' 322 | || bash_name_safe(namei[k], k == 0) != bash_name_safe(namej[k], k == 0)) { 323 | same = 0; 324 | break; 325 | } 326 | } 327 | if (same) 328 | errx(1, "duplicate (bash variable) column names \"%s\" and \"%s\"", namei, namej); 329 | free(namej); 330 | } 331 | free(namei); 332 | } 333 | break; 334 | default: 335 | break; 336 | } 337 | 338 | // Proceed 339 | goto next; 340 | } 341 | 342 | // Handle data row 343 | switch (mode) { 344 | case MODE_JSON: 345 | { 346 | int col; 347 | 348 | // Convert columns to UTF-8 349 | convert_to_utf8(icd, &row, linenum); 350 | 351 | // Output row 352 | printf("\x1e%c", use_column_names ? '{' : '['); 353 | for (col = 0; col < row.num; col++) { 354 | 355 | // Check whether column should be included 356 | if (use_column_names 357 | && allowed_column_names.num > 0 358 | && col < column_names.num 359 | && !findstring(&allowed_column_names, column_names.fields[col])) 360 | continue; 361 | 362 | // Add comma if needed 363 | if (col > 0) 364 | putchar(','); 365 | 366 | // Add column name (if using object notation) 367 | if (use_column_names) { 368 | if (col < column_names.num) { 369 | putchar('"'); 370 | print_json_string(name_prefix, linenum); 371 | print_json_string(column_names.fields[col], linenum); 372 | putchar('"'); 373 | } else 374 | printf("\"col%d\"", col + 1); 375 | putchar(':'); 376 | } 377 | 378 | // Add column value 379 | putchar('"'); 380 | print_json_string(row.fields[col], linenum); 381 | putchar('"'); 382 | } 383 | printf("%c\n", use_column_names ? '}' : ']'); 384 | break; 385 | } 386 | case MODE_XML_PLAIN: 387 | case MODE_XML_NAMES: 388 | { 389 | int col; 390 | 391 | // Convert columns to UTF-8 392 | convert_to_utf8(icd, &row, linenum); 393 | 394 | // Output columns for row 395 | printf(" \n"); 396 | for (col = 0; col < row.num; col++) { 397 | const char *ptr = row.fields[col]; 398 | int len = strlen(ptr); 399 | int use_column_names_this_tag; 400 | const char *esc; 401 | int uchar; 402 | int uclen; 403 | int i; 404 | 405 | // Check whether column should be included 406 | if (use_column_names 407 | && allowed_column_names.num > 0 408 | && col < column_names.num 409 | && !findstring(&allowed_column_names, column_names.fields[col])) 410 | continue; 411 | 412 | // Determine whether we can actually use column name for XML tag name 413 | use_column_names_this_tag = use_column_names && col < column_names.num 414 | && (*name_prefix != '\0' || *column_names.fields[col] != '\0'); 415 | 416 | // Open XML tag 417 | printf(" <"); 418 | if (use_column_names_this_tag) { 419 | print_xml_tag_name(name_prefix, linenum); 420 | print_xml_tag_name(column_names.fields[col], linenum); 421 | } else 422 | printf("col%d", col + 1); 423 | printf(">"); 424 | 425 | // Output XML characters, escaped as needed 426 | while (len > 0) { 427 | uchar = decode_utf8(ptr, len, &uclen, linenum); 428 | if ((esc = escape_xml_char(uchar)) != NULL) 429 | printf("%s", esc); 430 | else { 431 | for (i = 0; i < uclen; i++) 432 | putchar(ptr[i]); 433 | } 434 | ptr += uclen; 435 | len -= uclen; 436 | } 437 | 438 | // Close XML tag 439 | printf("\n"); 446 | } 447 | printf(" \n"); 448 | break; 449 | } 450 | case MODE_BASH: 451 | { 452 | char bash_name_buf[64]; // buffer just needs to be be enough to hold any of the bash_special_vars[] 453 | int col; 454 | 455 | // Start array (if needed) 456 | if (!use_column_names) 457 | printf("ROW=("); 458 | 459 | // Output row 460 | for (col = 0; col < row.num; col++) { 461 | 462 | // Check whether column should be included 463 | if (use_column_names 464 | && allowed_column_names.num > 0 465 | && col < column_names.num 466 | && !findstring(&allowed_column_names, column_names.fields[col])) 467 | continue; 468 | 469 | // Elide any BASH special variable names 470 | if (use_column_names && col < column_names.num) { 471 | snprintf(bash_name_buf, sizeof(bash_name_buf), "%s%s", name_prefix, column_names.fields[col]); 472 | if (findstring2(bash_special_vars, NUM_BASH_SPECIAL_VARS, bash_name_buf)) 473 | continue; 474 | } 475 | 476 | // Add space 477 | if (col > 0 || !use_column_names) 478 | putchar(' '); 479 | 480 | // Add column name (if using column names) 481 | if (use_column_names) { 482 | if (col < column_names.num) { 483 | print_bash_name(name_prefix); 484 | print_bash_name(column_names.fields[col]); 485 | } else 486 | printf("col%d", col + 1); 487 | putchar('='); 488 | } 489 | 490 | // Add column value 491 | print_bash_value(row.fields[col]); 492 | 493 | // Add separator 494 | if (use_column_names) 495 | putchar(';'); 496 | } 497 | 498 | // End array (if needed) 499 | if (!use_column_names) 500 | printf(" )"); 501 | 502 | // End line 503 | printf("\n"); 504 | break; 505 | } 506 | case MODE_NORMAL: 507 | { 508 | char ncolbuf[32]; 509 | char empty[] = { '\0' }; 510 | pid_t pid; 511 | pid_t result; 512 | int status; 513 | int i; 514 | 515 | fflush(stdout); 516 | fflush(stderr); 517 | switch ((pid = fork())) { 518 | case -1: 519 | err(1, "fork"); 520 | case 0: 521 | close(0); 522 | if ((argv = malloc((nargs + 3) * sizeof(*argv))) == NULL) 523 | err(1, "malloc"); 524 | argv[0] = strdup("printf"); 525 | if (argv[0] == NULL) 526 | err(1, "strdup"); 527 | argv[1] = format; 528 | snprintf(ncolbuf, sizeof(ncolbuf), "%lu", (unsigned long)row.num); 529 | for (i = 0; i < nargs; i++) 530 | argv[2 + i] = args[i] == 0 ? ncolbuf : args[i] <= row.num ? row.fields[args[i] - 1] : empty; 531 | argv[2 + nargs] = NULL; 532 | execvp(PRINTF_PROGRAM, argv); 533 | err(1, "execvp"); 534 | default: 535 | while (1) { 536 | if ((result = waitpid(pid, &status, 0)) == -1) 537 | err(1, "waitpid"); 538 | if (WIFEXITED(status)) { 539 | if (WEXITSTATUS(status) != 0) 540 | exit(status); 541 | break; 542 | } 543 | if (WIFSIGNALED(status)) 544 | exit(1); 545 | } 546 | break; 547 | } 548 | break; 549 | } 550 | default: 551 | errx(1, "internal error"); 552 | } 553 | 554 | next: 555 | // Free row memory 556 | freerow(&row); 557 | first_row = 0; 558 | } 559 | 560 | // XML closing 561 | if (mode == MODE_XML_PLAIN || mode == MODE_XML_NAMES) 562 | printf("\n"); 563 | 564 | // Clean up iconv 565 | if (icd != NULL) 566 | (void)iconv_close(icd); 567 | 568 | // Clean up 569 | if (fp != stdin) 570 | fclose(fp); 571 | freerow(&column_names); 572 | free(args); 573 | 574 | // Done 575 | fflush(stdout); 576 | return 0; 577 | } 578 | 579 | // Output XML tag name, substituting invalid characters 580 | static void 581 | print_xml_tag_name(const char *tag, int linenum) 582 | { 583 | int first = 1; 584 | int uchar; 585 | int uclen; 586 | int ok; 587 | int i; 588 | 589 | while (*tag != '\0') { 590 | uchar = decode_utf8(tag, strlen(tag), &uclen, linenum); 591 | if (first) { 592 | ok = isalpha(uchar) || uchar == '_'; 593 | first = 0; 594 | } else 595 | ok = isalpha(uchar) || isdigit(uchar) || uchar == '_' || uchar == '-' || uchar == '.'; 596 | if (!ok) 597 | putchar('_'); 598 | else { 599 | for (i = 0; i < uclen; i++) 600 | putchar(tag[i]); 601 | } 602 | tag += uclen; 603 | } 604 | } 605 | 606 | static const char * 607 | escape_xml_char(int uchar) 608 | { 609 | static char buf[32]; 610 | 611 | switch (uchar) { 612 | case '>': 613 | return ">"; 614 | break; 615 | case '<': 616 | return "<"; 617 | break; 618 | case '&': 619 | return "&"; 620 | break; 621 | default: 622 | 623 | // Pass valid and unrestricted characters through (but not CR) 624 | // http://en.wikipedia.org/wiki/Valid_characters_in_XML 625 | if ((uchar == '\n' || uchar == '\t' 626 | || (uchar >= 0x0020 && uchar <= 0xd7ff) 627 | || (uchar >= 0xe000 && uchar <= 0xfffd) 628 | || (uchar >= 0x10000 && uchar <= 0x10ffff)) 629 | && !((uchar >= 0x007f && uchar <= 0x0084) || (uchar >= 0x0086 && uchar <= 0x009F))) 630 | return NULL; 631 | 632 | // Escape other characters 633 | snprintf(buf, sizeof(buf), "&#%u;", uchar); 634 | return buf; 635 | } 636 | } 637 | 638 | static void 639 | print_bash_name(const char *string) 640 | { 641 | int i; 642 | 643 | for (i = 0; string[i] != '\0'; i++) 644 | fputc(bash_name_safe(string[i], i == 0), stdout); 645 | } 646 | 647 | static void 648 | print_bash_value(const char *string) 649 | { 650 | int single_quotes = 1; 651 | int i; 652 | 653 | // See if plain single quotes will work 654 | for (i = 0; string[i] != '\0'; i++) { 655 | if (string[i] == '\'' || !isprint((unsigned char)string[i])) { 656 | single_quotes = 0; 657 | break; 658 | } 659 | } 660 | 661 | // Output value 662 | if (single_quotes) 663 | printf("'%s'", string); 664 | else { 665 | printf("$'"); 666 | for (i = 0; string[i] != '\0'; i++) { 667 | switch (string[i]) { 668 | case '\'': 669 | printf("\\'"); 670 | break; 671 | case '\\': 672 | printf("\\\\"); 673 | break; 674 | case '\b': 675 | printf("\\b"); 676 | break; 677 | case '\f': 678 | printf("\\f"); 679 | break; 680 | case '\n': 681 | printf("\\n"); 682 | break; 683 | case '\r': 684 | printf("\\r"); 685 | break; 686 | case '\t': 687 | printf("\\t"); 688 | break; 689 | case '\v': 690 | printf("\\v"); 691 | break; 692 | default: 693 | if (isprint((unsigned char)string[i])) 694 | putchar((unsigned char)string[i]); 695 | else 696 | printf("\\x%02x", (unsigned char)string[i]); 697 | break; 698 | } 699 | } 700 | putchar('\''); 701 | } 702 | } 703 | 704 | static char 705 | bash_name_safe(char ch, int first) 706 | { 707 | if (isupper((unsigned char)ch) || islower((unsigned char)ch) || ch == '_') 708 | return ch; 709 | if (!first && isdigit((unsigned char)ch)) 710 | return ch; 711 | return '_'; 712 | } 713 | 714 | // Output JSON string 715 | static void 716 | print_json_string(const char *string, int linenum) 717 | { 718 | int uchar; 719 | int uclen; 720 | 721 | while (*string != '\0') { 722 | uchar = decode_utf8(string, strlen(string), &uclen, linenum); 723 | switch (uchar) { 724 | case '"': 725 | printf("\\\""); 726 | break; 727 | case '\\': 728 | printf("\\\\"); 729 | break; 730 | case '\b': 731 | printf("\\b"); 732 | break; 733 | case '\f': 734 | printf("\\f"); 735 | break; 736 | case '\n': 737 | printf("\\n"); 738 | break; 739 | case '\r': 740 | printf("\\r"); 741 | break; 742 | case '\t': 743 | printf("\\t"); 744 | break; 745 | default: 746 | if (isprint(uchar)) 747 | printf("%c", uchar); 748 | else 749 | printf("\\u%04x", uchar); 750 | break; 751 | } 752 | string += uclen; 753 | } 754 | } 755 | 756 | // Convert row columns to UTF-8 encoding 757 | static void 758 | convert_to_utf8(iconv_t icd, struct row *row, int linenum) 759 | { 760 | int col; 761 | 762 | for (col = 0; col < row->num; col++) { 763 | char *const ibuf = row->fields[col]; 764 | char *iptr; 765 | char *obuf; 766 | char *optr; 767 | size_t iremain; 768 | size_t oremain; 769 | size_t olen; 770 | 771 | // Convert column 772 | if (iconv(icd, NULL, NULL, NULL, NULL) == (size_t)-1) 773 | err(1, "iconv"); 774 | iremain = strlen(ibuf); 775 | oremain = 64 + 4 * iremain; 776 | if ((obuf = malloc(oremain)) == NULL) 777 | err(1, "malloc"); 778 | iptr = ibuf; 779 | optr = obuf; 780 | if (iconv(icd, &iptr, &iremain, &optr, &oremain) == (size_t)-1) { 781 | switch (errno) { 782 | case EILSEQ: 783 | errx(1, "line %d: %s multibyte sequence", linenum, "illegal"); 784 | case EINVAL: 785 | errx(1, "line %d: %s multibyte sequence", linenum, "truncated"); 786 | default: 787 | err(1, "line %d: iconv", linenum); 788 | } 789 | } 790 | olen = optr - obuf; 791 | 792 | // Replace column 793 | if ((row->fields[col] = realloc(row->fields[col], olen + 1)) == NULL) 794 | err(1, "realloc"); 795 | memcpy(row->fields[col], obuf, olen); 796 | row->fields[col][olen] = '\0'; 797 | free(obuf); 798 | } 799 | } 800 | 801 | // Decode UTF-8 character 802 | static int 803 | decode_utf8(const char *const obuf, size_t olen, int *lenp, int linenum) 804 | { 805 | int uchar; 806 | int uclen; 807 | int i = 0; 808 | 809 | if ((obuf[i] & 0x80) == 0x00) { 810 | uclen = 1; 811 | uchar = obuf[i] & 0x7f; 812 | } else if ((obuf[i] & 0xe0) == 0xc0 && i + 1 < olen) { 813 | uclen = 2; 814 | uchar = ((obuf[i] & 0x1f) << 6) 815 | | ((obuf[i + 1] & 0x3f) << 0); 816 | } else if ((obuf[i] & 0xf0) == 0xe0 && i + 2 < olen) { 817 | uclen = 3; 818 | uchar = ((obuf[i] & 0x0f) << 12) 819 | | ((obuf[i + 1] & 0x3f) << 6) 820 | | ((obuf[i + 2] & 0x3f) << 0); 821 | } else if ((obuf[i] & 0xf8) == 0xf0 && i + 3 < olen) { 822 | uclen = 4; 823 | uchar = ((obuf[i] & 0x07) << 18) 824 | | ((obuf[i + 1] & 0x3f) << 12) 825 | | ((obuf[i + 2] & 0x3f) << 6) 826 | | ((obuf[i + 3] & 0x3f) << 0); 827 | } else if ((obuf[i] & 0xfc) == 0xf8 && i + 4 < olen) { 828 | uclen = 5; 829 | uchar = ((obuf[i] & 0x03) << 24) 830 | | ((obuf[i + 1] & 0x3f) << 18) 831 | | ((obuf[i + 2] & 0x3f) << 12) 832 | | ((obuf[i + 3] & 0x3f) << 6) 833 | | ((obuf[i + 4] & 0x3f) << 0); 834 | } else if ((obuf[i] & 0xfe) == 0xfc && i + 5 < olen) { 835 | uclen = 6; 836 | uchar = ((obuf[i] & 0x01) << 30) 837 | | ((obuf[i + 1] & 0x3f) << 24) 838 | | ((obuf[i + 2] & 0x3f) << 18) 839 | | ((obuf[i + 3] & 0x3f) << 12) 840 | | ((obuf[i + 4] & 0x3f) << 6) 841 | | ((obuf[i + 5] & 0x3f) << 0); 842 | } else 843 | errx(1, "line %d: internal error decoding UTF-8: 0x%02x", linenum, obuf[i] & 0xff); 844 | 845 | // Done 846 | *lenp = uclen; 847 | return uchar; 848 | } 849 | 850 | static int 851 | readcol(FILE *fp, struct row *row, int *linenum) 852 | { 853 | struct col col; 854 | int row_done; 855 | int ch; 856 | 857 | // Process initial stuff; skip leading whitespace, excluding our field separator (which could be TAB) 858 | do { 859 | if ((ch = readch(fp, 1)) == EOF) 860 | ch = '\n'; 861 | if (ch == '\n') { // end of line forces empty column and terminates the row 862 | memset(&col, 0, sizeof(col)); 863 | addcolumn(row, &col); 864 | (*linenum)++; 865 | return 0; 866 | } 867 | } while (isspace(ch) && ch != fsep); 868 | ungetc(ch, fp); 869 | 870 | // Read quoted or unquoted value 871 | if (ch == quote) 872 | row_done = readqcol(fp, &col, linenum); 873 | else 874 | row_done = readuqcol(fp, &col, linenum); 875 | addcolumn(row, &col); 876 | return row_done; 877 | } 878 | 879 | // 880 | // Read a quoted column, return true if there's more 881 | // 882 | static int 883 | readqcol(FILE *fp, struct col *col, int *linenum) 884 | { 885 | int done = 0; 886 | int escape = 0; 887 | int ch; 888 | 889 | readch(fp, 0); 890 | memset(col, 0, sizeof(*col)); 891 | while (1) { 892 | assert(!escape || !done); 893 | if ((ch = readch(fp, escape)) == EOF) { 894 | if (escape || done) 895 | ch = '\n'; 896 | else 897 | errx(1, "line %d: premature EOF", *linenum); 898 | } 899 | if (done) { 900 | if (ch == '\n') { 901 | (*linenum)++; 902 | return 0; 903 | } 904 | if (ch == fsep) 905 | return 1; 906 | if (isspace(ch)) 907 | continue; 908 | errx(1, "line %d: unexpected character \"%c\"", *linenum, ch); 909 | } 910 | if (escape) { 911 | if (ch == quote) 912 | addchar(col, quote); 913 | else { 914 | ungetc(ch, fp); 915 | done = 1; 916 | } 917 | escape = 0; 918 | continue; 919 | } 920 | if (ch == quote) { 921 | escape = 1; 922 | continue; 923 | } 924 | addchar(col, ch); 925 | if (ch == '\n') 926 | (*linenum)++; 927 | } 928 | } 929 | 930 | // 931 | // Read an unquoted column, return true if there's more 932 | // 933 | static int 934 | readuqcol(FILE *fp, struct col *col, int *linenum) 935 | { 936 | int ch; 937 | 938 | memset(col, 0, sizeof(*col)); 939 | while (1) { 940 | if ((ch = readch(fp, 1)) == EOF) 941 | ch = '\n'; 942 | if (ch == '\n') { 943 | (*linenum)++; 944 | trim(col); 945 | return 0; 946 | } 947 | if (ch == fsep) { 948 | trim(col); 949 | return 1; 950 | } 951 | addchar(col, ch); 952 | } 953 | } 954 | 955 | // 956 | // Trims whitespace around a column 957 | // 958 | static void 959 | trim(struct col *col) 960 | { 961 | size_t skip; 962 | 963 | while (col->len > 0 && isspace((unsigned char)col->buf[col->len - 1])) 964 | col->len--; 965 | for (skip = 0; skip < col->len && isspace((unsigned char)col->buf[skip]); skip++) 966 | ; 967 | col->len -= skip; 968 | memmove(col->buf, col->buf + skip, col->len); 969 | } 970 | 971 | // 972 | // Adds the character to the column 973 | // 974 | static void 975 | addchar(struct col *col, int ch) 976 | { 977 | if (col->alloc <= col->len) { 978 | int new_alloc; 979 | char *new_buf; 980 | 981 | new_alloc = col->alloc == 0 ? 32 : col->alloc * 2; 982 | if ((new_buf = realloc(col->buf, new_alloc)) == NULL) 983 | err(1, "realloc"); 984 | col->buf = new_buf; 985 | col->alloc = new_alloc; 986 | } 987 | col->buf[col->len++] = ch; 988 | } 989 | 990 | // 991 | // Adds the column to the row, then frees the column 992 | // 993 | static void 994 | addcolumn(struct row *row, const struct col *col) 995 | { 996 | growrow(row); 997 | if (col->alloc >= col->len + 1) { 998 | col->buf[col->len] = '\0'; 999 | row->fields[row->num] = col->buf; 1000 | } else { 1001 | if ((row->fields[row->num] = malloc(col->len + 1)) == NULL) 1002 | err(1, "malloc"); 1003 | memcpy(row->fields[row->num], col->buf, col->len); 1004 | row->fields[row->num][col->len] = '\0'; 1005 | free(col->buf); 1006 | } 1007 | memset(&col, 0, sizeof(col)); 1008 | row->num++; 1009 | } 1010 | 1011 | // Copy given string and add to row 1012 | static void 1013 | addstring(struct row *row, const char *const string) 1014 | { 1015 | growrow(row); 1016 | if ((row->fields[row->num++] = strdup(string)) == NULL) 1017 | err(1, "strdup"); 1018 | } 1019 | 1020 | static int 1021 | findstring(struct row *row, const char *const string) 1022 | { 1023 | return findstring2((const char *const *)row->fields, row->num, string); 1024 | } 1025 | 1026 | static int 1027 | findstring2(const char *const *list, size_t num, const char *const string) 1028 | { 1029 | size_t i; 1030 | 1031 | for (i = 0; i < num; i++) { 1032 | if (strcmp(list[i], string) == 0) 1033 | return 1; 1034 | } 1035 | return 0; 1036 | } 1037 | 1038 | static void 1039 | growrow(struct row *row) 1040 | { 1041 | size_t new_alloc; 1042 | char **new_fields; 1043 | 1044 | if (row->alloc > row->num) 1045 | return; 1046 | new_alloc = row->alloc == 0 ? 32 : row->alloc * 2; 1047 | if ((new_fields = realloc(row->fields, new_alloc * sizeof(*row->fields))) == NULL) 1048 | err(1, "realloc"); 1049 | row->fields = new_fields; 1050 | row->alloc = new_alloc; 1051 | memset(row->fields + row->num, 0, (row->alloc - row->num) * sizeof(*row->fields)); 1052 | } 1053 | 1054 | static int 1055 | parsefmt(char *fmt, const struct row *column_names, unsigned int **argsp) 1056 | { 1057 | unsigned int *args; 1058 | int nargs; 1059 | int alloc; 1060 | char *s; 1061 | 1062 | // Size and allocate array 1063 | alloc = 0; 1064 | for (s = fmt; *s != '\0'; s++) { 1065 | if (*s == '%') 1066 | alloc += 3; 1067 | } 1068 | if ((args = malloc(alloc * sizeof(*args))) == NULL) 1069 | err(1, "malloc"); 1070 | nargs = 0; 1071 | 1072 | // Parse format 1073 | for (s = fmt; *s != '\0'; s++) { 1074 | char *const fspec = s; 1075 | if (*s != '%' || *++s == '%') 1076 | continue; 1077 | s = eataccessor(fspec, "format specification", column_names, s, &nargs, args); 1078 | while (*s != '\0' && strchr("#-+ 0", *s) != NULL) // eat up optional flags 1079 | s++; 1080 | s = eatwidthprec(fspec, "field width for format specification", column_names, s, &nargs, args); 1081 | if (*s == '.') 1082 | s = eatwidthprec(fspec, "precision for format specification", column_names, s + 1, &nargs, args); 1083 | if (*s == '\0') 1084 | errx(1, "truncated format specification starting at \"%.20s...\"", fspec); 1085 | } 1086 | 1087 | // Done 1088 | *argsp = args; 1089 | return nargs; 1090 | } 1091 | 1092 | static int 1093 | parsechar(const char *str) 1094 | { 1095 | char *eptr; 1096 | int ch; 1097 | 1098 | switch (strlen(str)) { 1099 | case 1: 1100 | ch = (unsigned char)*str; 1101 | break; 1102 | case 2: 1103 | if (*str != '\\') 1104 | return -1; 1105 | switch (str[1]) { 1106 | case 'a': 1107 | ch = '\a'; 1108 | break; 1109 | case 't': 1110 | ch = '\t'; 1111 | break; 1112 | case 'b': 1113 | ch = '\b'; 1114 | break; 1115 | case 'r': 1116 | ch = '\r'; 1117 | break; 1118 | case 'f': 1119 | ch = '\f'; 1120 | break; 1121 | case 'v': 1122 | ch = '\v'; 1123 | break; 1124 | case '\\': 1125 | case '\'': 1126 | case '"': 1127 | ch = str[1]; 1128 | break; 1129 | default: 1130 | return -1; 1131 | } 1132 | break; 1133 | case 4: 1134 | if (*str != '\\') 1135 | return -1; 1136 | ch = str[1] == 'x' ? strtoul(str + 2, &eptr, 16) : strtoul(str + 1, &eptr, 8); 1137 | if (*eptr != '\0') 1138 | return -1; 1139 | break; 1140 | default: 1141 | return -1; 1142 | } 1143 | 1144 | // Disallow line separator 1145 | if (ch == '\n') 1146 | return -1; 1147 | 1148 | // Disallow overflown values 1149 | if (ch != (ch & 0xff)) 1150 | return -1; 1151 | 1152 | // Done 1153 | return ch; 1154 | } 1155 | 1156 | static char * 1157 | eatwidthprec(const char *const fspec, const char *desc, const struct row *column_names, char *s, int *nargs, unsigned int *args) 1158 | { 1159 | if (*s == '*') 1160 | return eataccessor(fspec, desc, column_names, s + 1, nargs, args); 1161 | while (isdigit((unsigned char)*s)) // eat up numerical field width or precision 1162 | s++; 1163 | return s; 1164 | } 1165 | 1166 | static char * 1167 | eataccessor(const char *const fspec, const char *desc, const struct row *column_names, char *s, int *nargs, unsigned int *args) 1168 | { 1169 | char *const start = s; 1170 | const char *colname; 1171 | int namelen; 1172 | int argnum; 1173 | int i; 1174 | 1175 | if (*s == '{') { 1176 | if (column_names == NULL) 1177 | errx(1, "symbolic column accessors require \"-i\" flag in %s starting at \"%.20s...\"", desc, fspec); 1178 | colname = ++s; 1179 | while (*s != '}') { 1180 | if (*s++ == '\0') 1181 | errx(1, "malformed column accessor in %s starting at \"%.20s...\"", desc, fspec); 1182 | } 1183 | namelen = s++ - colname; 1184 | argnum = 0; 1185 | for (i = 0; i < column_names->num; i++) { 1186 | if (strncmp(colname, column_names->fields[i], namelen) == 0 && column_names->fields[i][namelen] == '\0') { 1187 | if (argnum != 0) { 1188 | errx(1, "ambiguous column name \"%.*s\" in symbolic column accessor in %s starting at \"%.20s...\"", 1189 | namelen, colname, desc, fspec); 1190 | } 1191 | argnum = i + 1; 1192 | } 1193 | } 1194 | if (argnum == 0) { 1195 | errx(1, "unknown column name \"%.*s\" in symbolic column accessor in %s starting at \"%.20s...\"", 1196 | namelen, colname, desc, fspec); 1197 | } 1198 | args[(*nargs)++] = argnum; 1199 | } else { 1200 | while (isdigit((unsigned char)*s)) 1201 | s++; 1202 | if (s == start || *s++ != '$') 1203 | errx(1, "missing required column accessor in %s starting at \"%.20s...\"", desc, fspec); 1204 | sscanf(start, "%u", &args[(*nargs)++]); 1205 | } 1206 | memmove(start, s, strlen(s) + 1); 1207 | return start; 1208 | } 1209 | 1210 | // Like getc() but optionally collapses CR or CR, LF into a single LF 1211 | static int 1212 | readch(FILE *fp, int collapse) 1213 | { 1214 | int ch; 1215 | 1216 | ch = getc(fp); 1217 | if (collapse && ch == '\r') { 1218 | if ((ch = getc(fp)) != '\n') { 1219 | ungetc(ch, fp); 1220 | ch = '\n'; 1221 | } 1222 | } 1223 | return ch; 1224 | } 1225 | 1226 | static void 1227 | freerow(struct row *row) 1228 | { 1229 | while (row->num > 0) 1230 | free(row->fields[--row->num]); 1231 | free(row->fields); 1232 | memset(row, 0, sizeof(*row)); 1233 | } 1234 | 1235 | static void 1236 | usage(void) 1237 | { 1238 | 1239 | fprintf(stderr, "Usage:\n"); 1240 | fprintf(stderr, " csvprintf [options] format\n"); 1241 | fprintf(stderr, " csvprintf -b [options]\n"); 1242 | fprintf(stderr, " csvprintf -j [options]\n"); 1243 | fprintf(stderr, " csvprintf -x [options]\n"); 1244 | fprintf(stderr, " csvprintf -X [options]\n"); 1245 | fprintf(stderr, " csvprintf -h\n"); 1246 | fprintf(stderr, " csvprintf -v\n"); 1247 | fprintf(stderr, "Options:\n"); 1248 | fprintf(stderr, " -b\t\tConvert input to bash(1) variable assignments\n"); 1249 | fprintf(stderr, " -e encoding\tSpecify input character encoding (XML and JSON modes only; default ISO-8859-1)\n"); 1250 | fprintf(stderr, " -f input\tRead CSV input from specified file (default stdin)\n"); 1251 | fprintf(stderr, " -i\t\tAssume the first CSV record contains column names\n"); 1252 | fprintf(stderr, " -j\t\tConvert input to JSON text sequences\n"); 1253 | fprintf(stderr, " -q char\tSpecify quote character (default `%c')\n", DEFAULT_QUOTE_CHAR); 1254 | fprintf(stderr, " -s char\tSpecify field separator character (default `%c')\n", DEFAULT_FSEP_CHAR); 1255 | fprintf(stderr, " -x\t\tConvert input to XML using numeric tags\n"); 1256 | fprintf(stderr, " -X\t\tConvert input to XML using column name tags (implies \"-i\")\n"); 1257 | fprintf(stderr, " -h\t\tOutput this help message and exit\n"); 1258 | fprintf(stderr, " -v\t\tOutput version information and exit\n"); 1259 | } 1260 | 1261 | static void 1262 | version(void) 1263 | { 1264 | fprintf(stderr, "%s version %s", PACKAGE_TARNAME, PACKAGE_VERSION); 1265 | if (*csvprintf_version != '\0') 1266 | fprintf(stderr, " (%s)", csvprintf_version); 1267 | fprintf(stderr, "\n"); 1268 | fprintf(stderr, "Copyright (C) 2010-2023 Archie L. Cobbs\n"); 1269 | fprintf(stderr, "This is free software; see the source for copying conditions. There is NO\n"); 1270 | fprintf(stderr, "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); 1271 | } 1272 | 1273 | -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -o pipefail 5 | 6 | FAILED_TESTS='' 7 | for INPUT_FILE in *.in; do 8 | OUTPUT_FILE1=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out1/gp'` 9 | OUTPUT_FILE2=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out2/gp'` 10 | OUTPUT_FILE3A=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out3a/gp'` 11 | OUTPUT_FILE3B=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out3b/gp'` 12 | OUTPUT_FILE4=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out4/gp'` 13 | OUTPUT_FILE5A=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out5a/gp'` 14 | OUTPUT_FILE5B=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out5b/gp'` 15 | echo "*** testing ${INPUT_FILE}..." 1>&2 16 | if ! ../csvprintf -x -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE1}" -; then 17 | echo "*** FAILED: [1] ${INPUT_FILE}" 1>&2 18 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE1}" 19 | fi 20 | if ! ../csvprintf -X -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE2}" -; then 21 | echo "*** FAILED: [2] ${INPUT_FILE}" 1>&2 22 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE2}" 23 | fi 24 | if ! ../csvprintf -e iso-8859-1 -x -f "${INPUT_FILE}" | xsltproc ../csv.xsl - | ../csvprintf -e UTF-8 -x | diff -u "${OUTPUT_FILE1}" -; then 25 | echo "*** FAILED: [1x] ${INPUT_FILE}" 1>&2 26 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/csv2xml" 27 | fi 28 | if ! ../csvprintf -j -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE3A}" -; then 29 | echo "*** FAILED: [3a] ${INPUT_FILE}" 1>&2 30 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE3A}" 31 | fi 32 | if ! ../csvprintf -ij -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE3B}" -; then 33 | echo "*** FAILED: [3b] ${INPUT_FILE}" 1>&2 34 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE3B}" 35 | fi 36 | if ! ../csvprintf -ix -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE4}" -; then 37 | echo "*** FAILED: [4] ${INPUT_FILE}" 1>&2 38 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE4}" 39 | fi 40 | if ! ../csvprintf -b -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE5A}" -; then 41 | echo "*** FAILED: [5a] ${INPUT_FILE}" 1>&2 42 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE5A}" 43 | fi 44 | if ! ../csvprintf -ib -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE5B}" -; then 45 | echo "*** FAILED: [5b] ${INPUT_FILE}" 1>&2 46 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE5B}" 47 | fi 48 | done 49 | 50 | if [ -z "${FAILED_TESTS}" ]; then 51 | echo "*** all tests passed" 52 | else 53 | echo "*** test(s) failed:${FAILED_TESTS}" 54 | exit 1 55 | fi 56 | 57 | -------------------------------------------------------------------------------- /tests/run2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Bail on error 4 | set -e 5 | 6 | # Setup temporary files 7 | TMP_STDOUT_EXPECTED='csvprintf-test-out-expected.tmp' 8 | TMP_STDERR_EXPECTED='csvprintf-test-err-expected.tmp' 9 | TMP_STDOUT_ACTUAL='csvprintf-test-out-actual.tmp' 10 | TMP_STDERR_ACTUAL='csvprintf-test-err-actual.tmp' 11 | TMP_SWAP_FILE=''csvprintf-test-hexdump.tmp 12 | trap "rm -f \ 13 | ${TMP_STDOUT_EXPECTED} \ 14 | ${TMP_STDERR_EXPECTED} \ 15 | ${TMP_STDOUT_ACTUAL} \ 16 | ${TMP_STDERR_ACTUAL} \ 17 | ${TMP_SWAP_FILE}" 0 2 3 5 10 13 15 18 | 19 | # Convert a file to hexdump version 20 | hexdumpify() 21 | { 22 | FILE="${1}" 23 | hexdump -C < "${FILE}" > "${TMP_SWAP_FILE}" 24 | mv "${TMP_SWAP_FILE}" "${FILE}" 25 | } 26 | 27 | # Compare files, on failure set ${DIFF_FAIL} 28 | checkdiff() 29 | { 30 | if [ "${1}" = '-h' ]; then 31 | HEXDUMPIFY='true' 32 | shift 33 | else 34 | HEXDUMPIFY='false' 35 | fi 36 | TESTFILE="${1}" 37 | WHAT="${2}" 38 | EXPECTED="${3}" 39 | ACTUAL="${4}" 40 | if diff -q "${EXPECTED}" "${ACTUAL}" >/dev/null; then 41 | return 0 42 | fi 43 | echo "test: ${TESTFILE}: ${WHAT} mismatch" 44 | echo '------------------------------------------------------' 45 | if [ "${HEXDUMPIFY}" = 'true' ]; then 46 | hexdumpify "${EXPECTED}" 47 | hexdumpify "${ACTUAL}" 48 | fi 49 | diff -u "${EXPECTED}" "${ACTUAL}" || true 50 | echo '------------------------------------------------------' 51 | DIFF_FAIL='true' 52 | } 53 | 54 | # Execute one test, on failure set ${TEST_FAIL} 55 | runtest() 56 | { 57 | # Read test data 58 | unset FLAGS 59 | unset STDIN 60 | unset STDOUT 61 | unset STDERR 62 | unset EXITVAL 63 | . "${TESTFILE}" 64 | if [ -z "${FLAGS+x}" \ 65 | -o -z "${STDIN+x}" \ 66 | -o -z "${STDOUT+x}" \ 67 | -o -z "${STDERR+x}" \ 68 | -o -z "${EXITVAL+x}" ]; then 69 | echo "test: ${TESTFILE}: invalid test file" 70 | exit 1 71 | fi 72 | 73 | # Set up files 74 | echo -en "${STDOUT}" > "${TMP_STDOUT_EXPECTED}" 75 | echo -en "${STDERR}" > "${TMP_STDERR_EXPECTED}" 76 | set +e 77 | echo -en "${STDIN}" | ../csvprintf ${FLAGS} >"${TMP_STDOUT_ACTUAL}" 2>"${TMP_STDERR_ACTUAL}" 78 | ACTUAL_EXITVAL="$?" 79 | set -e 80 | 81 | # Special hacks 82 | if [ "${STDERR}" = '!USAGE!' ]; then 83 | ../csvprintf --help 2>"${TMP_STDERR_EXPECTED}" 84 | fi 85 | 86 | # Check result 87 | DIFF_FAIL='false' 88 | if [ "${STDOUT}" != '!IGNORE!' ]; then 89 | checkdiff -h "${TESTFILE}" "standard output" "${TMP_STDOUT_EXPECTED}" "${TMP_STDOUT_ACTUAL}" 90 | fi 91 | checkdiff "${TESTFILE}" "standard error" "${TMP_STDERR_EXPECTED}" "${TMP_STDERR_ACTUAL}" 92 | if [ "${DIFF_FAIL}" != 'false' ]; then 93 | TEST_FAIL='true' 94 | fi 95 | if [ "${ACTUAL_EXITVAL}" -ne "${EXITVAL}" ]; then 96 | echo "test: ${TESTFILE}: exit value ${ACTUAL_EXITVAL} != ${EXITVAL}" 97 | TEST_FAIL='true' 98 | fi 99 | 100 | # Print success or if failure show params 101 | if [ "${TEST_FAIL}" = 'false' ]; then 102 | echo "test: ${TESTFILE}: success" 103 | else 104 | echo "******************************************************" 105 | echo "test: ${TESTFILE} FAILED with:" 106 | echo " FLAGS='${FLAGS}'" 107 | echo " STDIN='${STDIN}'" 108 | echo "******************************************************" 109 | fi 110 | } 111 | 112 | # Find all tests and run them 113 | ANY_FAIL='false' 114 | for TESTFILE in `find . -maxdepth 1 -type f -name 'test-*.tst' | sort | sed 's|^./||g'`; do 115 | TEST_FAIL='false' 116 | runtest "${TESTFILE}" 117 | if [ "${TEST_FAIL}" != 'false' ]; then 118 | ANY_FAIL='true' 119 | fi 120 | done 121 | 122 | # Exit with error if any test failed 123 | if [ "${ANY_FAIL}" != 'false' ]; then 124 | exit 1 125 | fi 126 | -------------------------------------------------------------------------------- /tests/test-bash-omit1.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-bi' 2 | STDIN='aaa,PATH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n' 3 | STDOUT=$'aaa=\'a1\'; ccc=\'c1\';\naaa=\'a2\'; ccc=\'c2\';\n' 4 | STDERR='' 5 | EXITVAL='0' 6 | -------------------------------------------------------------------------------- /tests/test-bash-prefix1.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-bi -p FOO_' 2 | STDIN='aaa,PATH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n' 3 | STDOUT=$'FOO_aaa=\'a1\'; FOO_PATH=\'b1\'; FOO_ccc=\'c1\';\nFOO_aaa=\'a2\'; FOO_PATH=\'b2\'; FOO_ccc=\'c2\';\n' 4 | STDERR='' 5 | EXITVAL='0' 6 | -------------------------------------------------------------------------------- /tests/test-bash-prefix2.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-bi -p PA' 2 | STDIN='aaa,TH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n' 3 | STDOUT=$'PAaaa=\'a1\'; PAccc=\'c1\';\nPAaaa=\'a2\'; PAccc=\'c2\';\n' 4 | STDERR='' 5 | EXITVAL='0' 6 | -------------------------------------------------------------------------------- /tests/test-bash-quote.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-bi -p PA_' 2 | STDIN=$'aaa,bbb,ccc\n\'aa\'xx\',"bb""yy",cc`zz\\ww\n' 3 | STDOUT=$'PA_aaa=$\'\\\'aa\\\'xx\\\'\'; PA_bbb=\'bb"yy\'; PA_ccc=\'cc`zz\\ww\';\n' 4 | STDERR='' 5 | EXITVAL='0' 6 | -------------------------------------------------------------------------------- /tests/test-cflag-not-found.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-X -c bbb -c zzz' 2 | STDIN='aaa,bbb,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n' 3 | STDOUT='!IGNORE!' 4 | STDERR='csvprintf: column "zzz" not found\n' 5 | EXITVAL='1' 6 | -------------------------------------------------------------------------------- /tests/test-cflag-xml.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-X -c bbb' 2 | STDIN='aaa,bbb,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n' 3 | STDOUT='\n\n \n b1\n \n \n b2\n \n\n' 4 | STDERR='' 5 | EXITVAL='0' 6 | -------------------------------------------------------------------------------- /tests/test-json-skip1.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-jn' 2 | STDIN='aaa,bbb\n"a1","b1"\n"a2","b2"\n' 3 | STDOUT='\x1e["a1","b1"]\n\x1e["a2","b2"]\n' 4 | STDERR='' 5 | EXITVAL='0' 6 | -------------------------------------------------------------------------------- /tests/test-tab-noskip.tst: -------------------------------------------------------------------------------- 1 | FLAGS='-nj -s \t' 2 | STDIN='aaa\tbbb\tccc\n\t\t\n' 3 | STDOUT='\x1e["","",""]\n' 4 | STDERR='' 5 | EXITVAL='0' 6 | -------------------------------------------------------------------------------- /tests/test1.in: -------------------------------------------------------------------------------- 1 | NAME,ADDRESS,POINTS 2 | Fred Smith,"1234 Main St. 3 | Anytown, USA 39103",123.4567 4 | "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999 5 | -------------------------------------------------------------------------------- /tests/test1.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME 5 | ADDRESS 6 | POINTS 7 | 8 | 9 | Fred Smith 10 | 1234 Main St. 11 | Anytown, USA 39103 12 | 123.4567 13 | 14 | 15 | Wayne "The Great One" Gretsky 16 | 59 Hockey Lane 17 | 999999 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/test1.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 |
1234 Main St. 6 | Anytown, USA 39103
7 | 123.4567 8 |
9 | 10 | Wayne "The Great One" Gretsky 11 |
59 Hockey Lane
12 | 999999 13 |
14 |
15 | -------------------------------------------------------------------------------- /tests/test1.out3a: -------------------------------------------------------------------------------- 1 | ["NAME","ADDRESS","POINTS"] 2 | ["Fred Smith","1234 Main St.\nAnytown, USA 39103","123.4567"] 3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"] 4 | -------------------------------------------------------------------------------- /tests/test1.out3b: -------------------------------------------------------------------------------- 1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA 39103","POINTS":"123.4567"} 2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"} 3 | -------------------------------------------------------------------------------- /tests/test1.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 | 1234 Main St. 6 | Anytown, USA 39103 7 | 123.4567 8 | 9 | 10 | Wayne "The Great One" Gretsky 11 | 59 Hockey Lane 12 | 999999 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/test1.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' ) 2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA 39103' '123.4567' ) 3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' ) 4 | -------------------------------------------------------------------------------- /tests/test1.out5b: -------------------------------------------------------------------------------- 1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA 39103'; POINTS='123.4567'; 2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999'; 3 | -------------------------------------------------------------------------------- /tests/test2.in: -------------------------------------------------------------------------------- 1 | NAME,ADDRESS,POINTS 2 | Fred Smith,"1234 Main St. 3 | Anytown, USA 39103",123.4567 4 | "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999 5 | -------------------------------------------------------------------------------- /tests/test2.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME 5 | ADDRESS 6 | POINTS 7 | 8 | 9 | Fred Smith 10 | 1234 Main St. 11 | Anytown, USA 39103 12 | 123.4567 13 | 14 | 15 | Wayne "The Great One" Gretsky 16 | 59 Hockey Lane 17 | 999999 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/test2.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 |
1234 Main St. 6 | Anytown, USA 39103
7 | 123.4567 8 |
9 | 10 | Wayne "The Great One" Gretsky 11 |
59 Hockey Lane
12 | 999999 13 |
14 |
15 | -------------------------------------------------------------------------------- /tests/test2.out3a: -------------------------------------------------------------------------------- 1 | ["NAME","ADDRESS","POINTS"] 2 | ["Fred Smith","1234 Main St.\r\nAnytown, USA 39103","123.4567"] 3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"] 4 | -------------------------------------------------------------------------------- /tests/test2.out3b: -------------------------------------------------------------------------------- 1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\r\nAnytown, USA 39103","POINTS":"123.4567"} 2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"} 3 | -------------------------------------------------------------------------------- /tests/test2.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 | 1234 Main St. 6 | Anytown, USA 39103 7 | 123.4567 8 | 9 | 10 | Wayne "The Great One" Gretsky 11 | 59 Hockey Lane 12 | 999999 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/test2.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' ) 2 | ROW=( 'Fred Smith' $'1234 Main St.\r\nAnytown, USA 39103' '123.4567' ) 3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' ) 4 | -------------------------------------------------------------------------------- /tests/test2.out5b: -------------------------------------------------------------------------------- 1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\r\nAnytown, USA 39103'; POINTS='123.4567'; 2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999'; 3 | -------------------------------------------------------------------------------- /tests/test3.in: -------------------------------------------------------------------------------- 1 | NAME,ADDRESS,POINTS Fred Smith,"1234 Main St. Anytown, USA 39103",123.4567 "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999 -------------------------------------------------------------------------------- /tests/test3.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME 5 | ADDRESS 6 | POINTS 7 | 8 | 9 | Fred Smith 10 | 1234 Main St. Anytown, USA 39103 11 | 123.4567 12 | 13 | 14 | Wayne "The Great One" Gretsky 15 | 59 Hockey Lane 16 | 999999 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/test3.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 |
1234 Main St. Anytown, USA 39103
6 | 123.4567 7 |
8 | 9 | Wayne "The Great One" Gretsky 10 |
59 Hockey Lane
11 | 999999 12 |
13 |
14 | -------------------------------------------------------------------------------- /tests/test3.out3a: -------------------------------------------------------------------------------- 1 | ["NAME","ADDRESS","POINTS"] 2 | ["Fred Smith","1234 Main St.\rAnytown, USA 39103","123.4567"] 3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"] 4 | -------------------------------------------------------------------------------- /tests/test3.out3b: -------------------------------------------------------------------------------- 1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\rAnytown, USA 39103","POINTS":"123.4567"} 2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"} 3 | -------------------------------------------------------------------------------- /tests/test3.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 | 1234 Main St. Anytown, USA 39103 6 | 123.4567 7 | 8 | 9 | Wayne "The Great One" Gretsky 10 | 59 Hockey Lane 11 | 999999 12 | 13 | 14 | -------------------------------------------------------------------------------- /tests/test3.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' ) 2 | ROW=( 'Fred Smith' $'1234 Main St.\rAnytown, USA 39103' '123.4567' ) 3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' ) 4 | -------------------------------------------------------------------------------- /tests/test3.out5b: -------------------------------------------------------------------------------- 1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\rAnytown, USA 39103'; POINTS='123.4567'; 2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999'; 3 | -------------------------------------------------------------------------------- /tests/test4.in: -------------------------------------------------------------------------------- 1 | NAME,ADDRESS,POINTS Fred Smith,"1234 Main St. 2 | Anytown, USA 39103",123.4567 "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999 -------------------------------------------------------------------------------- /tests/test4.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME 5 | ADDRESS 6 | POINTS 7 | 8 | 9 | Fred Smith 10 | 1234 Main St. 11 | Anytown, USA 39103 12 | 123.4567 13 | 14 | 15 | Wayne "The Great One" Gretsky 16 | 59 Hockey Lane 17 | 999999 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/test4.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 |
1234 Main St. 6 | Anytown, USA 39103
7 | 123.4567 8 |
9 | 10 | Wayne "The Great One" Gretsky 11 |
59 Hockey Lane
12 | 999999 13 |
14 |
15 | -------------------------------------------------------------------------------- /tests/test4.out3a: -------------------------------------------------------------------------------- 1 | ["NAME","ADDRESS","POINTS"] 2 | ["Fred Smith","1234 Main St.\nAnytown, USA 39103","123.4567"] 3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"] 4 | -------------------------------------------------------------------------------- /tests/test4.out3b: -------------------------------------------------------------------------------- 1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA 39103","POINTS":"123.4567"} 2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"} 3 | -------------------------------------------------------------------------------- /tests/test4.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 | 1234 Main St. 6 | Anytown, USA 39103 7 | 123.4567 8 | 9 | 10 | Wayne "The Great One" Gretsky 11 | 59 Hockey Lane 12 | 999999 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/test4.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' ) 2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA 39103' '123.4567' ) 3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' ) 4 | -------------------------------------------------------------------------------- /tests/test4.out5b: -------------------------------------------------------------------------------- 1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA 39103'; POINTS='123.4567'; 2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999'; 3 | -------------------------------------------------------------------------------- /tests/test5.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.in -------------------------------------------------------------------------------- /tests/test5.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | piñata 5 | 6 | 7 | -------------------------------------------------------------------------------- /tests/test5.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/test5.out3a: -------------------------------------------------------------------------------- 1 | ["pi\u00f1ata"] 2 | -------------------------------------------------------------------------------- /tests/test5.out3b: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.out3b -------------------------------------------------------------------------------- /tests/test5.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /tests/test5.out5a: -------------------------------------------------------------------------------- 1 | ROW=( $'pi\xf1ata' ) 2 | -------------------------------------------------------------------------------- /tests/test5.out5b: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.out5b -------------------------------------------------------------------------------- /tests/test6.in: -------------------------------------------------------------------------------- 1 | NAME,ADDRESS,POINTS Fred Smith,"1234 Main St. 2 | Anytown, USA 39103",123.4567 "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , "999999" 3 | -------------------------------------------------------------------------------- /tests/test6.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NAME 5 | ADDRESS 6 | POINTS 7 | 8 | 9 | Fred Smith 10 | 1234 Main St. 11 | Anytown, USA 39103 12 | 123.4567 13 | 14 | 15 | Wayne "The Great One" Gretsky 16 | 59 Hockey Lane 17 | 999999 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/test6.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 |
1234 Main St. 6 | Anytown, USA 39103
7 | 123.4567 8 |
9 | 10 | Wayne "The Great One" Gretsky 11 |
59 Hockey Lane
12 | 999999 13 |
14 |
15 | -------------------------------------------------------------------------------- /tests/test6.out3a: -------------------------------------------------------------------------------- 1 | ["NAME","ADDRESS","POINTS"] 2 | ["Fred Smith","1234 Main St.\nAnytown, USA 39103","123.4567"] 3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"] 4 | -------------------------------------------------------------------------------- /tests/test6.out3b: -------------------------------------------------------------------------------- 1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA 39103","POINTS":"123.4567"} 2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"} 3 | -------------------------------------------------------------------------------- /tests/test6.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fred Smith 5 | 1234 Main St. 6 | Anytown, USA 39103 7 | 123.4567 8 | 9 | 10 | Wayne "The Great One" Gretsky 11 | 59 Hockey Lane 12 | 999999 13 | 14 | 15 | -------------------------------------------------------------------------------- /tests/test6.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' ) 2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA 39103' '123.4567' ) 3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' ) 4 | -------------------------------------------------------------------------------- /tests/test6.out5b: -------------------------------------------------------------------------------- 1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA 39103'; POINTS='123.4567'; 2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999'; 3 | -------------------------------------------------------------------------------- /tests/test7.in: -------------------------------------------------------------------------------- 1 | Name With Spaces,#~!@#$%^&*(),"&<>&""\" 2 | aaa,bbb,ccc 3 | -------------------------------------------------------------------------------- /tests/test7.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Name With Spaces 5 | #~!@#$%^&*() 6 | &<>&"\ 7 | 8 | 9 | aaa 10 | bbb 11 | ccc 12 | 13 | 14 | -------------------------------------------------------------------------------- /tests/test7.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | aaa 5 | <____________>bbb 6 | <______>ccc 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/test7.out3a: -------------------------------------------------------------------------------- 1 | ["Name With Spaces","#~!@#$%^&*()","&<>&\"\\"] 2 | ["aaa","bbb","ccc"] 3 | -------------------------------------------------------------------------------- /tests/test7.out3b: -------------------------------------------------------------------------------- 1 | {"Name With Spaces":"aaa","#~!@#$%^&*()":"bbb","&<>&\"\\":"ccc"} 2 | -------------------------------------------------------------------------------- /tests/test7.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | aaa 5 | bbb 6 | ccc 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/test7.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'Name With Spaces' '#~!@#$%^&*()' '&<>&"\' ) 2 | ROW=( 'aaa' 'bbb' 'ccc' ) 3 | -------------------------------------------------------------------------------- /tests/test7.out5b: -------------------------------------------------------------------------------- 1 | Name_With_Spaces='aaa'; ____________='bbb'; ______='ccc'; 2 | -------------------------------------------------------------------------------- /tests/test8.in: -------------------------------------------------------------------------------- 1 | ColA,ColB,ColC 2 | aaa,bbb,ccc 3 | aaa,bbb 4 | aaa,bbb,ccc,ddd 5 | -------------------------------------------------------------------------------- /tests/test8.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ColA 5 | ColB 6 | ColC 7 | 8 | 9 | aaa 10 | bbb 11 | ccc 12 | 13 | 14 | aaa 15 | bbb 16 | 17 | 18 | aaa 19 | bbb 20 | ccc 21 | ddd 22 | 23 | 24 | -------------------------------------------------------------------------------- /tests/test8.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | aaa 5 | bbb 6 | ccc 7 | 8 | 9 | aaa 10 | bbb 11 | 12 | 13 | aaa 14 | bbb 15 | ccc 16 | ddd 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/test8.out3a: -------------------------------------------------------------------------------- 1 | ["ColA","ColB","ColC"] 2 | ["aaa","bbb","ccc"] 3 | ["aaa","bbb"] 4 | ["aaa","bbb","ccc","ddd"] 5 | -------------------------------------------------------------------------------- /tests/test8.out3b: -------------------------------------------------------------------------------- 1 | {"ColA":"aaa","ColB":"bbb","ColC":"ccc"} 2 | {"ColA":"aaa","ColB":"bbb"} 3 | {"ColA":"aaa","ColB":"bbb","ColC":"ccc","col4":"ddd"} 4 | -------------------------------------------------------------------------------- /tests/test8.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | aaa 5 | bbb 6 | ccc 7 | 8 | 9 | aaa 10 | bbb 11 | 12 | 13 | aaa 14 | bbb 15 | ccc 16 | ddd 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/test8.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'ColA' 'ColB' 'ColC' ) 2 | ROW=( 'aaa' 'bbb' 'ccc' ) 3 | ROW=( 'aaa' 'bbb' ) 4 | ROW=( 'aaa' 'bbb' 'ccc' 'ddd' ) 5 | -------------------------------------------------------------------------------- /tests/test8.out5b: -------------------------------------------------------------------------------- 1 | ColA='aaa'; ColB='bbb'; ColC='ccc'; 2 | ColA='aaa'; ColB='bbb'; 3 | ColA='aaa'; ColB='bbb'; ColC='ccc'; col4='ddd'; 4 | -------------------------------------------------------------------------------- /tests/test9.in: -------------------------------------------------------------------------------- 1 | foo 2 | value1,value2,value3 3 | -------------------------------------------------------------------------------- /tests/test9.out1: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | foo 5 | 6 | 7 | value1 8 | value2 9 | value3 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/test9.out2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | value1 5 | value2 6 | value3 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/test9.out3a: -------------------------------------------------------------------------------- 1 | ["foo"] 2 | ["value1","value2","value3"] 3 | -------------------------------------------------------------------------------- /tests/test9.out3b: -------------------------------------------------------------------------------- 1 | {"foo":"value1","col2":"value2","col3":"value3"} 2 | -------------------------------------------------------------------------------- /tests/test9.out4: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | value1 5 | value2 6 | value3 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/test9.out5a: -------------------------------------------------------------------------------- 1 | ROW=( 'foo' ) 2 | ROW=( 'value1' 'value2' 'value3' ) 3 | -------------------------------------------------------------------------------- /tests/test9.out5b: -------------------------------------------------------------------------------- 1 | foo='value1'; col2='value2'; col3='value3'; 2 | -------------------------------------------------------------------------------- /xml2csv.in: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Set constants and defaults 4 | NAME="xml2csv" 5 | XSLTPROC="@XSLTPROC@" 6 | CSVXSL="@pkgdatadir@/csv.xsl" 7 | 8 | # Usage message 9 | usage() 10 | { 11 | echo "Usage:" 1>&2 12 | echo " ${NAME} [input.xml]" 1>&2 13 | echo "Options:" 1>&2 14 | echo " -h Show this help message and exit" 1>&2 15 | } 16 | 17 | # Log functions 18 | log() 19 | { 20 | echo ${NAME}: ${1+"$@"} 1>&2 21 | } 22 | 23 | # Error function 24 | errout() 25 | { 26 | log ${1+"$@"} 27 | exit 1 28 | } 29 | 30 | # Bail on errors 31 | set -e 32 | 33 | # Parse flags passed in on the command line 34 | while [ ${#} -gt 0 ]; do 35 | case "$1" in 36 | -h|--help) 37 | usage 38 | exit 39 | ;; 40 | --) 41 | shift 42 | break 43 | ;; 44 | -*) 45 | echo "${NAME}: unrecognized flag \`${1}'" 1>&2 46 | usage 47 | exit 1 48 | ;; 49 | *) 50 | break 51 | ;; 52 | esac 53 | done 54 | case "${#}" in 55 | 0) 56 | INPUT_FILE="-" 57 | ;; 58 | 1) 59 | INPUT_FILE="${1}" 60 | ;; 61 | *) 62 | usage 63 | exit 1 64 | ;; 65 | esac 66 | 67 | # Run 68 | exec "${XSLTPROC}" "${CSVXSL}" "${INPUT_FILE}" 69 | 70 | --------------------------------------------------------------------------------