├── .gitignore
├── CHANGES
├── COPYING
├── INSTALL
├── Makefile.am
├── README
├── README.md
├── autogen.sh
├── cleanup.sh
├── configure.ac
├── csv.xsl
├── csvprintf.1.in
├── csvprintf.h
├── main.c
├── tests
    ├── run.sh
    ├── run2.sh
    ├── test-bash-omit1.tst
    ├── test-bash-prefix1.tst
    ├── test-bash-prefix2.tst
    ├── test-bash-quote.tst
    ├── test-cflag-not-found.tst
    ├── test-cflag-xml.tst
    ├── test-json-skip1.tst
    ├── test-tab-noskip.tst
    ├── test1.in
    ├── test1.out1
    ├── test1.out2
    ├── test1.out3a
    ├── test1.out3b
    ├── test1.out4
    ├── test1.out5a
    ├── test1.out5b
    ├── test2.in
    ├── test2.out1
    ├── test2.out2
    ├── test2.out3a
    ├── test2.out3b
    ├── test2.out4
    ├── test2.out5a
    ├── test2.out5b
    ├── test3.in
    ├── test3.out1
    ├── test3.out2
    ├── test3.out3a
    ├── test3.out3b
    ├── test3.out4
    ├── test3.out5a
    ├── test3.out5b
    ├── test4.in
    ├── test4.out1
    ├── test4.out2
    ├── test4.out3a
    ├── test4.out3b
    ├── test4.out4
    ├── test4.out5a
    ├── test4.out5b
    ├── test5.in
    ├── test5.out1
    ├── test5.out2
    ├── test5.out3a
    ├── test5.out3b
    ├── test5.out4
    ├── test5.out5a
    ├── test5.out5b
    ├── test6.in
    ├── test6.out1
    ├── test6.out2
    ├── test6.out3a
    ├── test6.out3b
    ├── test6.out4
    ├── test6.out5a
    ├── test6.out5b
    ├── test7.in
    ├── test7.out1
    ├── test7.out2
    ├── test7.out3a
    ├── test7.out3b
    ├── test7.out4
    ├── test7.out5a
    ├── test7.out5b
    ├── test8.in
    ├── test8.out1
    ├── test8.out2
    ├── test8.out3a
    ├── test8.out3b
    ├── test8.out4
    ├── test8.out5a
    ├── test8.out5b
    ├── test9.in
    ├── test9.out1
    ├── test9.out2
    ├── test9.out3a
    ├── test9.out3b
    ├── test9.out4
    ├── test9.out5a
    └── test9.out5b
└── xml2csv.in


/.gitignore:
--------------------------------------------------------------------------------
 1 | aclocal.m4
 2 | autom4te.cache
 3 | config.h
 4 | config.h.in
 5 | config.log
 6 | config.status
 7 | configure
 8 | csvprintf
 9 | csvprintf.1
10 | .deps
11 | gitrev.c
12 | Makefile
13 | Makefile.in
14 | *.o
15 | scripts
16 | stamp-h1
17 | xml2csv
18 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
 1 | Version Next
 2 | 
 3 |     - Fixed bug where \t separator was being skipped as whitespace
 4 |     - Allow duplicate column names if the "-c" flag avoids them
 5 | 
 6 | Version 1.3.2 released January 25, 2023
 7 | 
 8 |     - Fixed bug where we could emit empty XML tag names
 9 |     - Fixed bug in man page examples for "-b" flag
10 | 
11 | Version 1.3.1 released December 14, 2021
12 | 
13 |     - Added "-c" flag for explicit column names
14 |     - Added "-n" flag that only reads column names
15 |     - Added "-p" flag for prefixing names
16 |     - Omit special variable names in Bash mode
17 |     - Fixed build error on systems without 'u_char' defined
18 | 
19 | Version 1.3.0 released December 9, 2021
20 | 
21 |     - Added "-b" flag for new Bash output mode
22 | 
23 | Version 1.2.1 released November 24, 2021
24 | 
25 |     - Fixed bug where "-x" flag was behaving like "-X"
26 | 
27 | Version 1.2.0 released November 22, 2021
28 | 
29 |     - Added "-j" flag for JSON text sequence document output.
30 |     - Stopped escaping double quote as "&quot;" in plain XML text.
31 | 
32 | Version 1.1.0 released February 25, 2021
33 | 
34 |     - Added support for format strings containing column names
35 | 
36 | Version 1.0.4 released August 1, 2018
37 | 
38 |     - Fixed "unexpected character" bug when line ends with QUOTE, CR
39 |     - Added "-X" flag to derive XML tag names from column headers
40 | 
41 | Version 1.0.3 (r32) released January 5, 2013
42 | 
43 |     - Add support for converting XML back to CSV
44 |     - Add `-e' flag to set input character encoding
45 |     - Escape CR characters in XML output
46 |     - Fixed glitches in man page
47 | 
48 | Version 1.0.2 (r25) released August 25, 2012
49 | 
50 |     - Allow backslash escapes for `-s' and `-q' flags
51 |     - Accept files that lack a terminating newline
52 |     - Accept CR, LF, or CR-LF line endings
53 | 
54 | Version 1.0.1 (r17) released March 9, 2012
55 | 
56 |     - Fix bug where `-s' flag did not function (Issue #1)
57 |     - Document '%0$' specifier in man page
58 | 
59 | Version 1.0 (r4) released November 30, 2010
60 | 
61 |     - Initial release
62 | 
63 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
 1 | 
 2 | Simplified instructions:
 3 | 
 4 |     1. ./configure
 5 |     2. make 
 6 |     3. sudo make install
 7 | 
 8 | Please see
 9 | 
10 |     https://github.com/archiecobbs/csvprintf
11 | 
12 | for more information.
13 | 
14 | 


--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | #
 3 | # csvprintf - Simple CSV file parser for the UNIX command line
 4 | # 
 5 | 
 6 | bin_PROGRAMS=		csvprintf
 7 | 
 8 | bin_SCRIPTS=		xml2csv
 9 | 
10 | noinst_HEADERS=		csvprintf.h
11 | 
12 | man_MANS=		csvprintf.1
13 | 
14 | pkgdata_DATA=		csv.xsl
15 | 
16 | docdir=			$(datadir)/doc/packages/$(PACKAGE)
17 | 
18 | doc_DATA=		CHANGES COPYING README
19 | 
20 | EXTRA_DIST=		CHANGES INSTALL csvprintf.1.in xml2csv.in csv.xsl
21 | 
22 | csvprintf_SOURCES=	main.c \
23 | 			gitrev.c
24 | 
25 | DISTCLEANFILES=		csvprintf.1 xml2csv
26 | 
27 | SUFFIXES=		.in
28 | .in:
29 | 			rm -f $@; $(subst) < $< >$@
30 | 
31 | .PHONY:			tests
32 | tests:			csvprintf
33 | 			@echo '************'
34 | 			@echo 'TEST SUITE 1'
35 | 			@echo '************'
36 | 			@cd tests && ./run.sh
37 | 			@echo '************'
38 | 			@echo 'TEST SUITE 2'
39 | 			@echo '************'
40 | 			@cd tests && ./run2.sh
41 | 
42 | subst=			sed \
43 | 			    -e 's|@PACKAGE[@]|$(PACKAGE)|g' \
44 | 			    -e 's|@PACKAGE_VERSION[@]|$(PACKAGE_VERSION)|g' \
45 | 			    -e 's|@pkgdatadir[@]|$(pkgdatadir)|g' \
46 | 			    -e 's|@XSLTPROC[@]|$(XSLTPROC)|g'
47 | 
48 | install-data-hook:
49 | 			ln "$(DESTDIR)$(man1dir)"/csvprintf.1 "$(DESTDIR)$(man1dir)"/xml2csv.1
50 | 
51 | uninstall-hook:
52 | 			rm -f "$(DESTDIR)$(man1dir)"/xml2csv.1
53 | 
54 | gitrev.c:
55 | 			printf 'const char *const csvprintf_version = "%s";\n' "`git describe`" > gitrev.c
56 | 
57 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | csvprintf is a simple UNIX command line utility for parsing CSV files.
 2 | 
 3 | cvsprintf works just like the printf(1) command line utility. You
 4 | supply a printf(1) format string on the command line and each record
 5 | in the CSV file is formatted accordingly. Each format specifier in
 6 | the format string contains a column accessor to specify which CSV
 7 | column to use, so for example '%3$d' would format the third column
 8 | as a decimal value.
 9 | 
10 | csvprintf can also convert CSV files into XML and JSON documents
11 | and Bash variable assignments suitable for eval(1).
12 | 
13 | See INSTALL for installation instructions.
14 | 
15 | See COPYING for license.
16 | 
17 | See CHANGES for change history.
18 | 
19 | Enjoy!
20 | 
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **csvprintf** is a simple UNIX command line utility for parsing CSV files.
 2 | 
 3 | **cvsprintf** works just like the `printf(1)` command line utility. You supply a `printf(1)` format string on the command line and each record in the CSV file is formatted accordingly. Each format specifier in the format string contains a column accessor to specify which CSV column to use, so for example `%3$d` would format the third column as a decimal value.
 4 | 
 5 | **csvprintf** can also convert CSV files into XML, JSON, and `bash(1)` variable assignments.
 6 | 
 7 | You can view the [ManPage](https://github.com/archiecobbs/csvprintf/wiki/ManPage) online.
 8 | 
 9 | ### Examples
10 | 
11 | Given this input file `input.csv`:
12 | 
13 | ```
14 | NAME,ADDRESS,POINTS
15 | Fred Smith,"1234 Main St.
16 | Anytown, USA   39103",123.4567
17 | "Wayne ""The Great One"" Gretsky",  59 Hockey Lane  , 999999
18 | ```
19 | 
20 | here is the resulting output:
21 | 
22 | ```
23 | $ cat input.csv | csvprintf -i 'Name:    [%1$-24.24s]\nAddress: [%2$-12.12s]\nPoints:  %3$.2f\n'
24 | Name:    [Fred Smith              ]
25 | Address: [1234 Main St]
26 | Points:  123.46
27 | Name:    [Wayne "The Great One" Gr]
28 | Address: [59 Hockey La]
29 | Points:  999999.00
30 | ```
31 | 
32 | An example of the XML output:
33 | 
34 | ```
35 | $ cat input.csv | csvprintf -iX
36 | <?xml version="1.0" encoding="ISO-8859-1"?>
37 | <csv>
38 |   <row>
39 |     <NAME>Fred Smith</NAME>
40 |     <ADDRESS>1234 Main St.
41 | Anytown, USA   39103</ADDRESS>
42 |     <POINTS>123.4567</POINTS>
43 |   </row>
44 |   <row>
45 |     <NAME>Wayne "The Great One" Gretsky</NAME>
46 |     <ADDRESS>59 Hockey Lane</ADDRESS>
47 |     <POINTS>999999</POINTS>
48 |   </row>
49 | </csv>
50 | ```
51 | 


--------------------------------------------------------------------------------
/autogen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Script to regenerate all the GNU auto* gunk.
 5 | # Run this from the top directory of the source tree.
 6 | #
 7 | # If it looks like I don't know what I'm doing here, you're right.
 8 | #
 9 | 
10 | set -e
11 | 
12 | . ./cleanup.sh
13 | if [ "${1}" = '-C' ]; then
14 |     exit 0
15 | fi
16 | mkdir -p scripts
17 | 
18 | ACLOCAL="aclocal"
19 | AUTOHEADER="autoheader"
20 | AUTOMAKE="automake"
21 | AUTOCONF="autoconf"
22 | 
23 | echo "running aclocal"
24 | ${ACLOCAL} ${ACLOCAL_ARGS} -I scripts
25 | 
26 | echo "running autoheader"
27 | ${AUTOHEADER}
28 | 
29 | echo "running automake"
30 | ${AUTOMAKE} --add-missing -c --foreign
31 | 
32 | echo "running autoconf"
33 | ${AUTOCONF} -f -i
34 | 
35 | if [ "${1}" = '-c' ]; then
36 |     echo "running configure"
37 |     ./configure
38 | fi
39 | 
40 | 


--------------------------------------------------------------------------------
/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #
 4 | # Script to clean out generated GNU auto* gunk.
 5 | #
 6 | 
 7 | set -e
 8 | 
 9 | echo "cleaning up"
10 | rm -rf autom4te*.cache scripts aclocal.m4 configure config.log config.status .deps stamp-h1 a.out.dSYM
11 | rm -f config.h.in config.h.in~ config.h
12 | rm -f scripts
13 | find . \( -name Makefile -o -name Makefile.in \) -print0 | xargs -0 rm -f
14 | rm -f gitrev.c
15 | rm -f *.o csvprintf
16 | rm -f csvprintf-?.?.?.tar.gz
17 | rm -f csvprintf.1
18 | rm -f xml2csv
19 | 


--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
 1 | #
 2 | # csvprintf - Simple CSV file parser for the UNIX command line
 3 | # 
 4 | # Copyright 2010 Archie L. Cobbs <archie@dellroad.org>
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
 7 | # not use this file except in compliance with the License. You may obtain
 8 | # a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15 | # License for the specific language governing permissions and limitations
16 | # under the License.
17 | #
18 | 
19 | AC_INIT([csvprintf - Simple CSV file parser for the UNIX command line],[1.3.2],[https://github.com/archiecobbs/csvprintf],[csvprintf])
20 | AC_CONFIG_AUX_DIR(scripts)
21 | AM_INIT_AUTOMAKE
22 | dnl AM_MAINTAINER_MODE
23 | AC_PREREQ([2.69])
24 | AC_REVISION($Id$)
25 | AC_PREFIX_DEFAULT(/usr)
26 | AC_PROG_MAKE_SET
27 | 
28 | [CFLAGS="-g -O3 -pipe -Wall -Waggregate-return -Wcast-align -Wchar-subscripts -Wcomment -Wformat -Wimplicit -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-long-long -Wparentheses -Wpointer-arith -Wredundant-decls -Wreturn-type -Wswitch -Wtrigraphs -Wuninitialized -Wunused -Wwrite-strings -Wshadow -Wstrict-prototypes -Wcast-qual $CFLAGS"]
29 | AC_SUBST(CFLAGS)
30 | 
31 | # Compile flags for Linux
32 | AC_DEFINE(_DEFAULT_SOURCE, 1, GNU functions)
33 | AC_DEFINE(_GNU_SOURCE, 1, GNU functions)
34 | AC_DEFINE(_BSD_SOURCE, 1, BSD functions)
35 | AC_DEFINE(_XOPEN_SOURCE, 500, XOpen functions)
36 | 
37 | # Compile flags for Mac OS
38 | AC_DEFINE(_DARWIN_C_SOURCE, 1, MacOS functions)
39 | 
40 | # Check for required programs
41 | AC_PROG_INSTALL
42 | AC_PROG_CC
43 | AC_PATH_PROG([PRINTF], [printf])
44 | if test -z "${PRINTF}"; then
45 |     AC_MSG_ERROR([printf not found]);
46 | fi
47 | AC_PATH_PROG([XSLTPROC], [xsltproc])
48 | if test -z "${XSLTPROC}"; then
49 |     AC_MSG_ERROR([xsltproc not found]);
50 | fi
51 | 
52 | # Add PRINTF def
53 | [CFLAGS="$CFLAGS -DPRINTF_PROGRAM=\\\""${PRINTF}"\\\""]
54 | 
55 | # Check for required libc functions
56 | AC_SEARCH_LIBS([iconv_open], [iconv],,
57 |     [if test `uname -o` = 'Cygwin' -a -f /usr/lib/libiconv.a; then LIBS="-liconv ${LIBS}"; else AC_MSG_ERROR([required function iconv_open missing]); fi])
58 | 
59 | # Check for required header files
60 | AC_CHECK_HEADERS(sys/wait.h assert.h ctype.h err.h errno.h stddef.h stdint.h stdio.h stdlib.h string.h unistd.h, [],
61 | 	[AC_MSG_ERROR([required header file '$ac_header' missing])])
62 | 
63 | # Optional features
64 | AC_ARG_ENABLE(assertions,
65 |     AS_HELP_STRING([--enable-assertions],
66 |         [enable debugging sanity checks (default NO)]),
67 |     [test x"$enableval" = "xyes" || AC_DEFINE(NDEBUG, 1, [disable assertions])],
68 |     [AC_DEFINE(NDEBUG, 1, [disable assertions])])
69 | AC_ARG_ENABLE(gprof,
70 |     AS_HELP_STRING([--enable-gprof],
71 |         [Compile and link with gprof(1) support (default NO)]),
72 |     [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -pg"])
73 | AC_ARG_ENABLE(Werror,
74 |     AS_HELP_STRING([--enable-Werror],
75 |         [enable compilation with -Werror flag (default NO)]),
76 |     [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -Werror"])
77 | 
78 | # Generated files
79 | AC_CONFIG_FILES(Makefile)
80 | AC_CONFIG_HEADERS(config.h)
81 | 
82 | # Go
83 | AC_OUTPUT
84 | 


--------------------------------------------------------------------------------
/csv.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 4 | 
 5 |     <xsl:output method="text" encoding="UTF-8" media-type="text/csv"/>
 6 | 
 7 |     <xsl:template match="/csv/row">
 8 |         <xsl:apply-templates select="*"/>
 9 |         <xsl:value-of select="'&#10;'"/>
10 |     </xsl:template>
11 | 
12 |     <xsl:template match="/csv/row/*">
13 |         <xsl:if test="position() &gt; 1">
14 |             <xsl:value-of select="','"/>
15 |         </xsl:if>
16 |         <xsl:call-template name="enquote"/>
17 |     </xsl:template>
18 | 
19 |     <xsl:template name="enquote">
20 |         <xsl:param name="s" select="."/>
21 |         <xsl:value-of select="'&quot;'"/>
22 |         <xsl:call-template name="escape">
23 |             <xsl:with-param name="s" select="$s"/>
24 |         </xsl:call-template>
25 |         <xsl:value-of select="'&quot;'"/>
26 |     </xsl:template>
27 | 
28 |     <xsl:template name="escape">
29 |         <xsl:param name="s" select="."/>
30 |         <xsl:choose>
31 |             <xsl:when test="contains($s, '&quot;')">
32 |                 <xsl:call-template name="escape">
33 |                     <xsl:with-param name="s" select="substring-before($s, '&quot;')"/>
34 |                 </xsl:call-template>
35 |                 <xsl:value-of select="'&quot;&quot;'"/>
36 |                 <xsl:call-template name="escape">
37 |                     <xsl:with-param name="s" select="substring-after($s, '&quot;')"/>
38 |                 </xsl:call-template>
39 |             </xsl:when>
40 |             <xsl:otherwise>
41 |                 <xsl:value-of select="$s"/>
42 |             </xsl:otherwise>
43 |         </xsl:choose>
44 |     </xsl:template>
45 | 
46 |     <xsl:template match="@*|node()">
47 |         <xsl:apply-templates select="*"/>
48 |     </xsl:template>
49 | 
50 | </xsl:transform>
51 | 


--------------------------------------------------------------------------------
/csvprintf.1.in:
--------------------------------------------------------------------------------
  1 | .\"  -*- nroff -*-
  2 | .\"
  3 | .\" csvprintf - Simple CSV file parser for the UNIX command line
  4 | .\"
  5 | .\" Copyright 2010 Archie L. Cobbs <archie.cobbs@gmail.com>
  6 | .\"
  7 | .\" Licensed under the Apache License, Version 2.0 (the "License"); you may
  8 | .\" not use this file except in compliance with the License. You may obtain
  9 | .\" a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
 10 | .\"
 11 | .\" Unless required by applicable law or agreed to in writing, software
 12 | .\" distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 13 | .\" WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 14 | .\" License for the specific language governing permissions and limitations
 15 | .\" under the License.
 16 | .\"
 17 | .Dd November 30, 2010
 18 | .Dt CSVPRINTF 1
 19 | .Os
 20 | .Sh NAME
 21 | .Nm csvprintf
 22 | .Nd CSV file parser
 23 | .Sh SYNOPSIS
 24 | .Nm csvprintf
 25 | .Bk -words
 26 | .Op Ar options
 27 | .Ar format
 28 | .Ek
 29 | .Pp
 30 | .Nm csvprintf
 31 | .Bk -words
 32 | .Fl b
 33 | .Op Ar options
 34 | .Ek
 35 | .Pp
 36 | .Nm csvprintf
 37 | .Bk -words
 38 | .Fl j
 39 | .Op Ar options
 40 | .Ek
 41 | .Pp
 42 | .Nm csvprintf
 43 | .Bk -words
 44 | .Fl x
 45 | .Op Ar options
 46 | .Ek
 47 | .Pp
 48 | .Nm csvprintf
 49 | .Bk -words
 50 | .Fl X
 51 | .Op Ar options
 52 | .Ek
 53 | .Pp
 54 | .Nm xml2csv
 55 | .Bk -words
 56 | .Op Ar file.xml
 57 | .Ek
 58 | .Sh DESCRIPTION
 59 | .Nm
 60 | is a simple UNIX command line utility for parsing CSV files.
 61 | .Pp
 62 | In the first form,
 63 | .Nm
 64 | works like the
 65 | .Xr printf 1
 66 | command line utility: you supply a
 67 | .Xr printf 1
 68 | format string on the command line, and each row of the CSV file is split into arguments and formatted accordingly.
 69 | .Pp
 70 | The format specifiers in the format string contain numeric or symbolic column accessors to specify which CSV column to format.
 71 | .Pp
 72 | A numeric column accessor is a sequence of decimal digits followed by the
 73 | .Pa $
 74 | character (the same accessor format supported by
 75 | .Xr printf 1 ) .
 76 | So for example,
 77 | .Pa \(dq%3$d\(dq
 78 | would format the third CSV column as a decimal value.
 79 | In addition, the
 80 | .Pa \(dq%0$d\(dq
 81 | specifier will print the number of columns in the record.
 82 | .Pp
 83 | When the
 84 | .Fl n
 85 | flag is given, the first row is assumed to contain column names and is not output.
 86 | This allows symbolic, instead of numeric, column accessors to be used.
 87 | A symbolic column accessor is the column name enclosed in curly braces.
 88 | .Pp
 89 | For example, if the first row is
 90 | .Pa FirstName,Lastname,IdNum
 91 | then the format string
 92 | .Pa \(dq%{IdNum}04d: %{LastName}s, %{FirstName}s\(dq
 93 | would be equivalent to the format string
 94 | .Pa \(dq%3$04d: %2$s, %1$s\(dq .
 95 | .Pp
 96 | Specifying a column name that does not appear in the first row generates an error,
 97 | so the use of symbolic column accessors adds an extra consistency check.
 98 | .Sh XML Mode
 99 | With
100 | .Fl x ,
101 | the entire file is converted into an XML document.
102 | .Pp
103 | The document element is
104 | .Ar "<csv>" .
105 | .Pp
106 | Each CSV row becomes a
107 | .Ar "<row>"
108 | element containing its individual column values as sub-elements.
109 | .Pp
110 | The column value sub-elements are named
111 | .Ar "<col1>" ,
112 | .Ar "<col2>" ,
113 | etc.;
114 | with
115 | .Fl i ,
116 | the sub-elements use the column names read from the first row (with illegal characters replaced by underscores).
117 | .Pp
118 | In XML mode, a character encoding must be assumed; see
119 | .Fl e .
120 | .Pp
121 | The
122 | .Nm xml2csv
123 | command can convert XML documents generated by
124 | .Nm "csvprintf -x"
125 | back into CSV.
126 | .Sh JSON Mode
127 | With
128 | .Fl j ,
129 | each row is converted into a JSON document.
130 | .Pp
131 | This form is described by RFC 7464 and consists of concatenated JSON documents
132 | framed by ASCII RS and LF control characters, which is compatible with the
133 | .Xr jq 1
134 | utility's
135 | .Fl \-seq
136 | flag.
137 | .Pp
138 | Normally each row is written as a string array;
139 | with
140 | .Fl i ,
141 | each row is written as an object, using column names for fields.
142 | An error occurs if two columns have the same name.
143 | .Pp
144 | In JSON mode, a character encoding must be assumed; see
145 | .Fl e .
146 | .Sh Bash Mode
147 | With
148 | .Fl b ,
149 | each row is converted into
150 | .Xr bash 1
151 | variable assignment(s) which may be applied with the
152 | .Xr eval 1
153 | command.
154 | .Pp
155 | Normally the output just assigns
156 | .Ar ROW
157 | as an array of values.
158 | The resulting output can be used like this:
159 | .Bd -literal -offset indent
160 | cat input.csv | csvprintf -b | while read -r LINE; do
161 |     eval "${LINE}"
162 |     echo "The first column is: ${ROW[0]}"
163 |     echo "The second column is: ${ROW[1]}"
164 |     ...
165 | done
166 | .Ed
167 | .Pp
168 | With
169 | .Fl i ,
170 | each column value is assigned to a separate variable whose name is the corresponding column name
171 | (with underscores replacing non-alphanumeric characters), and an error occurs if two variables have the same name.
172 | .Pp
173 | So an input file like this:
174 | .Bd -literal -offset indent
175 | "Last Name","First Name","Registered???"
176 | "Washington","George","Y"
177 | "Lincoln","Abe","N"
178 | .Ed
179 | .Pp
180 | can be processed like this:
181 | .Bd -literal -offset indent
182 | cat input.csv | csvprintf -bi -p ROW_ | while read -r LINE; do
183 |     eval "${LINE}"
184 |     echo "First name: ${ROW_First_Name}"
185 |     echo "Last name: ${ROW_Last_Name}"
186 |     echo "Registered: ${ROW_Registered___}"
187 | done
188 | .Ed
189 | .Sh Bash Mode Security Concerns
190 | There are two security issues to be aware of when using Bash Mode.
191 | .Pp
192 | First, the
193 | .Fl i
194 | flag opens a security hole because Bash has several special variables like
195 | .Ar PATH ,
196 | .Ar TMPDIR ,
197 | etc., which could be overwritten by malicious input.
198 | To prevent this,
199 | .Nm
200 | omits known Bash variables, but for tighter security use the
201 | .Fl c
202 | flag to explicitly white-list the variables you need.
203 | In addition, use of the
204 | .Fl p
205 | flag is always recommended in Bash Mode to help avoid namespace collisions.
206 | .Pp
207 | Secondly, if the Bash Mode output is piped into
208 | .Ar "while read"
209 | then the
210 | .Fl r
211 | flag must be used to prevent extraneous decoding of backslash escapes.
212 | .Sh Input Encoding
213 | In all modes, lines must be terminated by LF bytes or CR+LF byte pairs, and the separator and quote characters must be recognizable as single byte values.
214 | This parsing behavior is compatible with ASCII, ISO-8859-1, UTF-8, etc., but not multi-byte encodings such as UTF-16, which must be re-encoded (e.g., to UTF-8) first.
215 | .Pp
216 | In normal and Bash modes, column values are copied from input to output bytewise without interpretation.
217 | .Pp
218 | In XML and JSON modes, column values must be interpreted according to an assumed character encoding.
219 | This encoding defaults to ISO-8859-1 but can be changed with the
220 | .Fl e
221 | flag.
222 | .Sh OPTIONS
223 | .Bl -tag -width Ds
224 | .It Fl b
225 | Convert each CSV row into a
226 | .Xr bash 1
227 | variable assignment line.
228 | .It Fl c Ar colname
229 | Specify a column to be included when using column names in XML, JSON, or Bash output.
230 | .Pp
231 | Without this flag, all columns are included.
232 | When this flag is used one or more times,
233 | only the specified columns are included.
234 | .Pp
235 | If any
236 | .Ar colname
237 | doesn't exist, an error occurs.
238 | .It Fl e
239 | Specify input character encoding for XML or JSON mode.
240 | .Pp
241 | By default, ISO-8859-1 is assumed.
242 | .It Fl f
243 | Read CSV input from the specified file.
244 | .Pp
245 | By default (or if ``-'' is specified),
246 | .Nm
247 | reads from standard input.
248 | .It Fl i
249 | Use column names read from the first record in the output.
250 | .Pp
251 | In normal mode, or when used with the
252 | .Fl x
253 | flag, this flag is equivalent to
254 | .Fl n .
255 | .Pp
256 | In JSON mode, output objects instead of arrays and use column names for the object fields.
257 | .Pp
258 | In Bash mode, output a variable for each column instead of a single
259 | .Ar ROW
260 | array variable.
261 | .Pp
262 | It's possible for a row to have more columns than the column header row did.
263 | In that case,
264 | .Nm
265 | reverts to using
266 | .Ar col1 ,
267 | .Ar col2 ,
268 | etc., for any extra columns.
269 | .Pp
270 | This flag implies
271 | .Fl n .
272 | .It Fl j
273 | Convert the input into a JavaScript Object Notation (JSON) text sequence document.
274 | .It Fl n
275 | Assume the first CSV record contains column names and omit from the output.
276 | .Pp
277 | In normal mode, enable symbolic column accessors.
278 | .It Fl p
279 | Specify a common prefix (UTF-8 encoding) to use with all column names in the output.
280 | .Pp
281 | This flag is ignored unless
282 | .Fl i
283 | is specified.
284 | .Pp
285 | .It Fl q
286 | Specify an alternate CSV column quote character.
287 | The usual backslash escape sequences are accepted.
288 | .Pp
289 | The default quote character is double quote.
290 | .It Fl s
291 | Specify an alternate CSV column separator character.
292 | The usual backslash escape sequences are accepted.
293 | .Pp
294 | The default separator character is comma.
295 | .It Fl h
296 | Output usage message and exit.
297 | .It Fl v
298 | Output version information and exit.
299 | .It Fl x
300 | Convert the input into an XML document.
301 | .It Fl X
302 | Convert the input into an XML document using column names for value sub-elements.
303 | .Pp
304 | This flag implies
305 | .Fl n .
306 | .El
307 | .Sh CSV FORMAT
308 | .Nm
309 | parses according to the format described by ``The Comma Separated Value (CSV) File Format'' (see below).
310 | In particular, quote characters must be escaped with an extra quote and whitespace surrounding column values is ignored.
311 | .Sh EXIT STATUS
312 | .Nm
313 | will exit with a status 1 if invalid CSV input is detected.
314 | Otherwise, if an invocation of
315 | .Xr printf 1
316 | fails, processing stops and that exit value is returned.
317 | .Sh FILES
318 | .Bl -tag -width Ds -compact
319 | .It Pa @pkgdatadir@/csv.xsl
320 | XSL transform that converts XML back into CSV format.
321 | .El
322 | .Sh BUGS
323 | .Pp
324 | Under the hood,
325 | .Nm
326 | invokes the
327 | .Xr printf 1
328 | executable on each CSV row it parses, which makes it relatively slow.
329 | .Sh SEE ALSO
330 | .Xr printf 1 ,
331 | .Xr printf 3 ,
332 | .Xr jq 1 .
333 | .Rs
334 | .%T "csvprintf: Simple CSV file parser for the UNIX command line"
335 | .%O https://github.com/archiecobbs/csvprintf
336 | .Re
337 | .Rs
338 | .%T "The Comma Separated Value (CSV) File Format"
339 | .%O http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
340 | .Re
341 | .Rs
342 | .%T "RFC 7464: JavaScript Object Notation (JSON) Text Sequences"
343 | .%O https://datatracker.ietf.org/doc/html/rfc7464
344 | .Re
345 | .Sh AUTHOR
346 | .An Archie L. Cobbs Aq archie.cobbs@gmail.com
347 | 


--------------------------------------------------------------------------------
/csvprintf.h:
--------------------------------------------------------------------------------
 1 | 
 2 | //
 3 | // csvprintf - Simple CSV file parser for the UNIX command line
 4 | // 
 5 | // Copyright 2010 Archie L. Cobbs <archie@dellroad.org>
 6 | // 
 7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may
 8 | // not use this file except in compliance with the License. You may obtain
 9 | // a copy of the License at
10 | //
11 | //     http://www.apache.org/licenses/LICENSE-2.0
12 | // 
13 | // Unless required by applicable law or agreed to in writing, software
14 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16 | // License for the specific language governing permissions and limitations
17 | // under the License.
18 | // 
19 | 
20 | #include "config.h"
21 | 
22 | extern const char *const csvprintf_version;
23 | 
24 | 


--------------------------------------------------------------------------------
/main.c:
--------------------------------------------------------------------------------
   1 | 
   2 | //
   3 | // csvprintf - Simple CSV file parser for the UNIX command line
   4 | // 
   5 | // Copyright 2010 Archie L. Cobbs <archie@dellroad.org>
   6 | // 
   7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may
   8 | // not use this file except in compliance with the License. You may obtain
   9 | // a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
  10 | // 
  11 | // Unless required by applicable law or agreed to in writing, software
  12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14 | // License for the specific language governing permissions and limitations
  15 | // under the License.
  16 | // 
  17 | 
  18 | #include "csvprintf.h"
  19 | 
  20 | #include <sys/wait.h>
  21 | 
  22 | #include <assert.h>
  23 | #include <ctype.h>
  24 | #include <err.h>
  25 | #include <errno.h>
  26 | #include <iconv.h>
  27 | #include <stddef.h>
  28 | #include <stdint.h>
  29 | #include <stdio.h>
  30 | #include <stdlib.h>
  31 | #include <string.h>
  32 | #include <unistd.h>
  33 | 
  34 | #define DEFAULT_QUOTE_CHAR      '"'
  35 | #define DEFAULT_FSEP_CHAR       ','
  36 | #define XML_OUTPUT_ENCODING     "UTF-8"
  37 | 
  38 | #define MODE_NORMAL             0           // normal mode
  39 | #define MODE_XML_PLAIN          1           // plain XML mode
  40 | #define MODE_XML_NAMES          2           // XML mode with names
  41 | #define MODE_JSON               3           // JSON mode
  42 | #define MODE_BASH               4           // bash mode
  43 | 
  44 | struct col {
  45 |     char    *buf;
  46 |     size_t  len;
  47 |     size_t  alloc;
  48 | };
  49 | 
  50 | struct row {
  51 |     char    **fields;
  52 |     size_t  num;
  53 |     size_t  alloc;
  54 | };
  55 | 
  56 | static int quote = DEFAULT_QUOTE_CHAR;
  57 | static int fsep = DEFAULT_FSEP_CHAR;
  58 | 
  59 | static const char *bash_special_vars[] = {
  60 |     "BASH", "BASHOPTS", "BASHPID", "BASH_ALIASES", "BASH_ARGC", "BASH_ARGV", "BASH_CMDS", "BASH_COMMAND",
  61 |     "BASH_EXECUTION_STRING", "BASH_LINENO", "BASH_LOADABLES_PATH", "BASH_REMATCH", "BASH_SOURCE", "BASH_SUBSHELL",
  62 |     "BASH_VERSINFO", "BASH_VERSION", "COMP_CWORD", "COMP_KEY", "COMP_LINE", "COMP_POINT", "COMP_TYPE", "COMP_WORDBREAKS",
  63 |     "COMP_WORDS", "COPROC", "DIRSTACK", "EUID", "FUNCNAME", "GROUPS", "HISTCMD", "HOSTNAME", "HOSTTYPE", "LINENO",
  64 |     "MACHTYPE", "MAPFILE", "OLDPWD", "OPTARG", "OPTIND", "OSTYPE", "PIPESTATUS", "PPID", "PWD", "RANDOM", "READLINE_LINE",
  65 |     "READLINE_POINT", "REPLY", "SECONDS", "SHELLOPTS", "SHLVL", "UID", "BASH_COMPAT", "BASH_ENV", "BASH_XTRACEFD", "CDPATH",
  66 |     "CHILD_MAX", "COLUMNS", "COMPREPLY", "EMACS", "ENV", "EXECIGNORE", "FCEDIT", "FIGNORE", "FUNCNEST", "GLOBIGNORE",
  67 |     "HISTCONTROL", "HISTFILE", "HISTFILESIZE", "HISTIGNORE", "HISTSIZE", "HISTTIMEFORMAT", "HOME", "HOSTFILE", "IFS",
  68 |     "IGNOREEOF", "INPUTRC", "LANG", "LC_ALL", "LC_COLLATE", "LC_CTYPE", "LC_MESSAGES", "LC_NUMERIC", "LC_TIME", "LINES",
  69 |     "MAIL", "MAILCHECK", "MAILPATH", "OPTERR", "PATH", "POSIXLY_CORRECT", "PROMPT_COMMAND", "PROMPT_DIRTRIM", "PS0", "PS1",
  70 |     "PS2", "PS3", "PS4", "SHELL", "TIMEFORMAT", "TMOUT", "TMPDIR", "auto_resume", "histchars"
  71 | };
  72 | #define NUM_BASH_SPECIAL_VARS   (sizeof(bash_special_vars) / sizeof(*bash_special_vars))
  73 | 
  74 | static int parsechar(const char *str);
  75 | static int parsefmt(char *fmt, const struct row *column_names, unsigned int **argsp);
  76 | static int readcol(FILE *fp, struct row *row, int *linenum);
  77 | static int readqcol(FILE *fp, struct col *col, int *linenum);
  78 | static int readuqcol(FILE *fp, struct col *col, int *linenum);
  79 | static int readch(FILE *fp, int collapse);
  80 | static void freerow(struct row *row);
  81 | static void print_xml_tag_name(const char *tag, int linenum);
  82 | static void print_json_string(const char *string, int linenum);
  83 | static void print_bash_name(const char *string);
  84 | static void print_bash_value(const char *string);
  85 | static char bash_name_safe(char ch, int first);
  86 | static int decode_utf8(const char *const obuf, size_t olen, int *lenp, int linenum);
  87 | static void convert_to_utf8(iconv_t icd, struct row *row, int linenum);
  88 | static const char *escape_xml_char(int uchar);
  89 | static char *eatwidthprec(const char *fspec, const char *desc, const struct row *column_names,
  90 |     char *s, int *nargs, unsigned int *args);
  91 | static char *eataccessor(const char *fspec, const char *desc, const struct row *column_names,
  92 |     char *s, int *nargs, unsigned int *args);
  93 | static void addcolumn(struct row *row, const struct col *col);
  94 | static void addstring(struct row *row, const char *const string);
  95 | static int findstring(struct row *row, const char *const string);
  96 | static int findstring2(const char *const *list, size_t num, const char *const string);
  97 | static void growrow(struct row *row);
  98 | static void addchar(struct col *col, int ch);
  99 | static void trim(struct col *col);
 100 | static void usage(void);
 101 | static void version(void);
 102 | 
 103 | int
 104 | main(int argc, char **argv)
 105 | {
 106 |     const char *input = "-";
 107 |     const char *encoding = "ISO-8859-1";
 108 |     const char *name_prefix = "";
 109 |     char *format = NULL;
 110 |     iconv_t icd = NULL;
 111 |     FILE *fp = NULL;
 112 |     struct row row;
 113 |     struct row column_names;
 114 |     struct row allowed_column_names;
 115 |     unsigned int *args = NULL;
 116 |     int mode = -1;
 117 |     int read_column_names = 0;                  // strip off first row containing column names
 118 |     int use_column_names = 0;                   // use column names from first row in output
 119 |     int first_row = 0;
 120 |     int nargs = 0;
 121 |     int file_done;
 122 |     int linenum;
 123 |     int new_mode;
 124 |     int ch;
 125 | 
 126 |     // Initialize
 127 |     memset(&row, 0, sizeof(row));
 128 |     memset(&column_names, 0, sizeof(column_names));
 129 |     memset(&allowed_column_names, 0, sizeof(allowed_column_names));
 130 | 
 131 |     // Parse command line
 132 |     while ((ch = getopt(argc, argv, "bc:e:f:hijnp:q:s:vxX")) != -1) {
 133 |         switch (ch) {
 134 |         case 'b':
 135 |             if (mode != -1 && mode != MODE_BASH)
 136 |                 errx(1, "flag \"%c\" conflicts with previous mode flag", ch);
 137 |             mode = MODE_BASH;
 138 |             break;
 139 |         case 'c':
 140 |             addstring(&allowed_column_names, optarg);
 141 |             break;
 142 |         case 'e':
 143 |             encoding = optarg;
 144 |             break;
 145 |         case 'f':
 146 |             input = optarg;
 147 |             break;
 148 |         case 'i':
 149 |             read_column_names = 1;
 150 |             use_column_names = 1;
 151 |             break;
 152 |         case 'n':
 153 |             read_column_names = 1;
 154 |             break;
 155 |         case 'j':
 156 |             if (mode != -1 && mode != MODE_JSON)
 157 |                 errx(1, "flag \"%c\" conflicts with previous mode flag", ch);
 158 |             mode = MODE_JSON;
 159 |             break;
 160 |         case 'X':
 161 |         case 'x':
 162 |             new_mode = ch == 'X' ? MODE_XML_NAMES : MODE_XML_PLAIN;
 163 |             if (mode != -1 && mode != new_mode)
 164 |                 errx(1, "flag \"%c\" conflicts with previous mode flag", ch);
 165 |             if ((mode = new_mode) == MODE_XML_NAMES) {
 166 |                 use_column_names = 1;
 167 |                 read_column_names = 1;
 168 |             }
 169 |             break;
 170 |         case 'p':
 171 |             name_prefix = optarg;
 172 |             break;
 173 |         case 'q':
 174 |             if ((quote = parsechar(optarg)) == -1)
 175 |                 errx(1, "invalid argument to \"-%c\"", ch);
 176 |             break;
 177 |         case 's':
 178 |             if ((fsep = parsechar(optarg)) == -1)
 179 |                 errx(1, "invalid argument to \"-%c\"", ch);
 180 |             break;
 181 |         case 'h':
 182 |             usage();
 183 |             exit(0);
 184 |         case 'v':
 185 |             version();
 186 |             exit(0);
 187 |         case '?':
 188 |         default:
 189 |             usage();
 190 |             exit(1);
 191 |         }
 192 |     }
 193 |     if (mode == -1)
 194 |         mode = MODE_NORMAL;
 195 |     argc -= optind;
 196 |     argv += optind;
 197 |     if (argc != (mode == MODE_NORMAL ? 1 : 0)) {
 198 |         usage();
 199 |         exit(1);
 200 |     }
 201 | 
 202 |     // Backward compatbitility hack
 203 |     if (mode == MODE_XML_PLAIN)
 204 |         use_column_names = 0;
 205 | 
 206 |     // Sanity check
 207 |     if (quote == fsep)
 208 |         err(1, "quote and field separators cannot be the same character");
 209 |     if (allowed_column_names.num > 0 && !read_column_names)
 210 |         err(1, "\"-c\" flag requires \"-n\" flag");
 211 | 
 212 |     // Get and (maybe) parse format string (normal mode only)
 213 |     if (mode == MODE_NORMAL) {
 214 |         format = argv[0];
 215 | 
 216 |         // Parse format string - unless we need to defer
 217 |         if (!read_column_names)
 218 |             nargs = parsefmt(format, NULL, &args);
 219 |     }
 220 | 
 221 |     // Open input
 222 |     if (strcmp(input, "-") == 0)
 223 |         fp = stdin;
 224 |     else if ((fp = fopen(input, "r")) == NULL)
 225 |         err(1, "%s", input);
 226 | 
 227 |     // Initialize iconv
 228 |     switch (mode) {
 229 |     case MODE_XML_PLAIN:
 230 |     case MODE_XML_NAMES:
 231 |     case MODE_JSON:
 232 |         if ((icd = iconv_open(XML_OUTPUT_ENCODING, encoding)) == (iconv_t)-1)
 233 |             err(1, "%s", encoding);
 234 |         break;
 235 |     default:
 236 |         break;
 237 |     }
 238 | 
 239 |     // XML opening
 240 |     if (mode == MODE_XML_PLAIN || mode == MODE_XML_NAMES) {
 241 |         printf("<?xml version=\"1.0\" encoding=\"%s\"?>\n", XML_OUTPUT_ENCODING);
 242 |         printf("<csv>\n");
 243 |     }
 244 | 
 245 |     // Read and parse input
 246 |     linenum = 1;
 247 |     first_row = 1;
 248 |     for (file_done = 0; !file_done; ) {
 249 | 
 250 |         // Start parsing next row
 251 |         switch ((ch = readch(fp, 1))) {
 252 |         case EOF:
 253 |             file_done = 1;
 254 |             continue;
 255 |         case '\n':                          // ignore completely empty lines
 256 |             linenum++;
 257 |             continue;
 258 |         default:
 259 |             ungetc(ch, fp);
 260 |             break;
 261 |         }
 262 | 
 263 |         // Read columns
 264 |         while (readcol(fp, &row, &linenum))
 265 |             ;
 266 | 
 267 |         // Gather column names from first row, if configured
 268 |         if (first_row && read_column_names) {
 269 |             int i, j;
 270 | 
 271 |             // Convert to UTF-8 if needed
 272 |             if (icd != NULL)
 273 |                 convert_to_utf8(icd, &row, linenum);
 274 | 
 275 |             // Save column names
 276 |             memcpy(&column_names, &row, sizeof(row));
 277 |             memset(&row, 0, sizeof(row));
 278 | 
 279 |             // If we had to defer parsing format string until we had the column names, do that now
 280 |             if (mode == MODE_NORMAL)
 281 |                 nargs = parsefmt(format, &column_names, &args);
 282 | 
 283 |             // Check that all explicitly specified columns are actually present
 284 |             for (i = 0; i < allowed_column_names.num; i++) {
 285 |                 if (!findstring(&column_names, allowed_column_names.fields[i]))
 286 |                     errx(1, "column \"%s\" not found", allowed_column_names.fields[i]);
 287 |             }
 288 | 
 289 |             // Check for illegal or duplicate column names
 290 |             switch (mode) {
 291 |             case MODE_JSON:
 292 |                 for (i = 0; i < column_names.num - 1; i++) {
 293 |                     if (allowed_column_names.num > 0
 294 |                       && !findstring(&allowed_column_names, column_names.fields[i]))
 295 |                         continue;
 296 |                     for (j = i + 1; j < column_names.num; j++) {
 297 |                         if (strcmp(column_names.fields[i], column_names.fields[j]) == 0)
 298 |                             errx(1, "duplicate column name \"%s\"", column_names.fields[i]);
 299 |                     }
 300 |                 }
 301 |                 break;
 302 |             case MODE_BASH:
 303 |                 for (i = 0; i < column_names.num; i++) {
 304 |                     char *namei;
 305 | 
 306 |                     if (allowed_column_names.num > 0
 307 |                       && !findstring(&allowed_column_names, column_names.fields[i]))
 308 |                         continue;
 309 |                     if (asprintf(&namei, "%s%s", name_prefix, column_names.fields[i]) == -1)
 310 |                         err(1, "asprintf");
 311 |                     if (*namei == '\0')
 312 |                         errx(1, "illegal empty string column name");
 313 |                     for (j = i + 1; j < column_names.num; j++) {
 314 |                         char *namej;
 315 |                         int same = 1;
 316 |                         int k;
 317 | 
 318 |                         if (asprintf(&namej, "%s%s", name_prefix, column_names.fields[j]) == -1)
 319 |                             err(1, "asprintf");
 320 |                         for (k = 0; namei[k] != '\0' || namej[k] != '\0'; k++) {
 321 |                             if (namei[k] == '\0' || namej[k] == '\0'
 322 |                               || bash_name_safe(namei[k], k == 0) != bash_name_safe(namej[k], k == 0)) {
 323 |                                 same = 0;
 324 |                                 break;
 325 |                             }
 326 |                         }
 327 |                         if (same)
 328 |                             errx(1, "duplicate (bash variable) column names \"%s\" and \"%s\"", namei, namej);
 329 |                         free(namej);
 330 |                     }
 331 |                     free(namei);
 332 |                 }
 333 |                 break;
 334 |             default:
 335 |                 break;
 336 |             }
 337 | 
 338 |             // Proceed
 339 |             goto next;
 340 |         }
 341 | 
 342 |         // Handle data row
 343 |         switch (mode) {
 344 |         case MODE_JSON:
 345 |           {
 346 |             int col;
 347 | 
 348 |             // Convert columns to UTF-8
 349 |             convert_to_utf8(icd, &row, linenum);
 350 | 
 351 |             // Output row
 352 |             printf("\x1e%c", use_column_names ? '{' : '[');
 353 |             for (col = 0; col < row.num; col++) {
 354 | 
 355 |                 // Check whether column should be included
 356 |                 if (use_column_names
 357 |                   && allowed_column_names.num > 0
 358 |                   && col < column_names.num
 359 |                   && !findstring(&allowed_column_names, column_names.fields[col]))
 360 |                     continue;
 361 | 
 362 |                 // Add comma if needed
 363 |                 if (col > 0)
 364 |                     putchar(',');
 365 | 
 366 |                 // Add column name (if using object notation)
 367 |                 if (use_column_names) {
 368 |                     if (col < column_names.num) {
 369 |                         putchar('"');
 370 |                         print_json_string(name_prefix, linenum);
 371 |                         print_json_string(column_names.fields[col], linenum);
 372 |                         putchar('"');
 373 |                     } else
 374 |                         printf("\"col%d\"", col + 1);
 375 |                     putchar(':');
 376 |                 }
 377 | 
 378 |                 // Add column value
 379 |                 putchar('"');
 380 |                 print_json_string(row.fields[col], linenum);
 381 |                 putchar('"');
 382 |             }
 383 |             printf("%c\n", use_column_names ? '}' : ']');
 384 |             break;
 385 |           }
 386 |         case MODE_XML_PLAIN:
 387 |         case MODE_XML_NAMES:
 388 |           {
 389 |             int col;
 390 | 
 391 |             // Convert columns to UTF-8
 392 |             convert_to_utf8(icd, &row, linenum);
 393 | 
 394 |             // Output columns for row
 395 |             printf("  <row>\n");
 396 |             for (col = 0; col < row.num; col++) {
 397 |                 const char *ptr = row.fields[col];
 398 |                 int len = strlen(ptr);
 399 |                 int use_column_names_this_tag;
 400 |                 const char *esc;
 401 |                 int uchar;
 402 |                 int uclen;
 403 |                 int i;
 404 | 
 405 |                 // Check whether column should be included
 406 |                 if (use_column_names
 407 |                   && allowed_column_names.num > 0
 408 |                   && col < column_names.num
 409 |                   && !findstring(&allowed_column_names, column_names.fields[col]))
 410 |                     continue;
 411 | 
 412 |                 // Determine whether we can actually use column name for XML tag name
 413 |                 use_column_names_this_tag = use_column_names && col < column_names.num
 414 |                   && (*name_prefix != '\0' || *column_names.fields[col] != '\0');
 415 | 
 416 |                 // Open XML tag
 417 |                 printf("    <");
 418 |                 if (use_column_names_this_tag) {
 419 |                     print_xml_tag_name(name_prefix, linenum);
 420 |                     print_xml_tag_name(column_names.fields[col], linenum);
 421 |                 } else
 422 |                     printf("col%d", col + 1);
 423 |                 printf(">");
 424 | 
 425 |                 // Output XML characters, escaped as needed
 426 |                 while (len > 0) {
 427 |                     uchar = decode_utf8(ptr, len, &uclen, linenum);
 428 |                     if ((esc = escape_xml_char(uchar)) != NULL)
 429 |                         printf("%s", esc);
 430 |                     else {
 431 |                         for (i = 0; i < uclen; i++)
 432 |                             putchar(ptr[i]);
 433 |                     }
 434 |                     ptr += uclen;
 435 |                     len -= uclen;
 436 |                 }
 437 | 
 438 |                 // Close XML tag
 439 |                 printf("</");
 440 |                 if (use_column_names_this_tag) {
 441 |                     print_xml_tag_name(name_prefix, linenum);
 442 |                     print_xml_tag_name(column_names.fields[col], linenum);
 443 |                 } else
 444 |                     printf("col%d", col + 1);
 445 |                 printf(">\n");
 446 |             }
 447 |             printf("  </row>\n");
 448 |             break;
 449 |           }
 450 |         case MODE_BASH:
 451 |           {
 452 |             char bash_name_buf[64];         // buffer just needs to be be enough to hold any of the bash_special_vars[]
 453 |             int col;
 454 | 
 455 |             // Start array (if needed)
 456 |             if (!use_column_names)
 457 |                 printf("ROW=(");
 458 | 
 459 |             // Output row
 460 |             for (col = 0; col < row.num; col++) {
 461 | 
 462 |                 // Check whether column should be included
 463 |                 if (use_column_names
 464 |                   && allowed_column_names.num > 0
 465 |                   && col < column_names.num
 466 |                   && !findstring(&allowed_column_names, column_names.fields[col]))
 467 |                     continue;
 468 | 
 469 |                 // Elide any BASH special variable names
 470 |                 if (use_column_names && col < column_names.num) {
 471 |                     snprintf(bash_name_buf, sizeof(bash_name_buf), "%s%s", name_prefix, column_names.fields[col]);
 472 |                     if (findstring2(bash_special_vars, NUM_BASH_SPECIAL_VARS, bash_name_buf))
 473 |                         continue;
 474 |                 }
 475 | 
 476 |                 // Add space
 477 |                 if (col > 0 || !use_column_names)
 478 |                     putchar(' ');
 479 | 
 480 |                 // Add column name (if using column names)
 481 |                 if (use_column_names) {
 482 |                     if (col < column_names.num) {
 483 |                         print_bash_name(name_prefix);
 484 |                         print_bash_name(column_names.fields[col]);
 485 |                     } else
 486 |                         printf("col%d", col + 1);
 487 |                     putchar('=');
 488 |                 }
 489 | 
 490 |                 // Add column value
 491 |                 print_bash_value(row.fields[col]);
 492 | 
 493 |                 // Add separator
 494 |                 if (use_column_names)
 495 |                     putchar(';');
 496 |             }
 497 | 
 498 |             // End array (if needed)
 499 |             if (!use_column_names)
 500 |                 printf(" )");
 501 | 
 502 |             // End line
 503 |             printf("\n");
 504 |             break;
 505 |           }
 506 |         case MODE_NORMAL:
 507 |           {
 508 |             char ncolbuf[32];
 509 |             char empty[] = { '\0' };
 510 |             pid_t pid;
 511 |             pid_t result;
 512 |             int status;
 513 |             int i;
 514 | 
 515 |             fflush(stdout);
 516 |             fflush(stderr);
 517 |             switch ((pid = fork())) {
 518 |             case -1:
 519 |                 err(1, "fork");
 520 |             case 0:
 521 |                 close(0);
 522 |                 if ((argv = malloc((nargs + 3) * sizeof(*argv))) == NULL)
 523 |                     err(1, "malloc");
 524 |                 argv[0] = strdup("printf");
 525 |                 if (argv[0] == NULL)
 526 |                     err(1, "strdup");
 527 |                 argv[1] = format;
 528 |                 snprintf(ncolbuf, sizeof(ncolbuf), "%lu", (unsigned long)row.num);
 529 |                 for (i = 0; i < nargs; i++)
 530 |                     argv[2 + i] = args[i] == 0 ? ncolbuf : args[i] <= row.num ? row.fields[args[i] - 1] : empty;
 531 |                 argv[2 + nargs] = NULL;
 532 |                 execvp(PRINTF_PROGRAM, argv);
 533 |                 err(1, "execvp");
 534 |             default:
 535 |                 while (1) {
 536 |                     if ((result = waitpid(pid, &status, 0)) == -1)
 537 |                         err(1, "waitpid");
 538 |                     if (WIFEXITED(status)) {
 539 |                         if (WEXITSTATUS(status) != 0)
 540 |                             exit(status);
 541 |                         break;
 542 |                     }
 543 |                     if (WIFSIGNALED(status))
 544 |                         exit(1);
 545 |                 }
 546 |                 break;
 547 |             }
 548 |             break;
 549 |           }
 550 |         default:
 551 |             errx(1, "internal error");
 552 |         }
 553 | 
 554 | next:
 555 |         // Free row memory
 556 |         freerow(&row);
 557 |         first_row = 0;
 558 |     }
 559 | 
 560 |     // XML closing
 561 |     if (mode == MODE_XML_PLAIN || mode == MODE_XML_NAMES)
 562 |         printf("</csv>\n");
 563 | 
 564 |     // Clean up iconv
 565 |     if (icd != NULL)
 566 |         (void)iconv_close(icd);
 567 | 
 568 |     // Clean up
 569 |     if (fp != stdin)
 570 |         fclose(fp);
 571 |     freerow(&column_names);
 572 |     free(args);
 573 | 
 574 |     // Done
 575 |     fflush(stdout);
 576 |     return 0;
 577 | }
 578 | 
 579 | // Output XML tag name, substituting invalid characters
 580 | static void
 581 | print_xml_tag_name(const char *tag, int linenum)
 582 | {
 583 |     int first = 1;
 584 |     int uchar;
 585 |     int uclen;
 586 |     int ok;
 587 |     int i;
 588 | 
 589 |     while (*tag != '\0') {
 590 |         uchar = decode_utf8(tag, strlen(tag), &uclen, linenum);
 591 |         if (first) {
 592 |             ok = isalpha(uchar) || uchar == '_';
 593 |             first = 0;
 594 |         } else
 595 |             ok = isalpha(uchar) || isdigit(uchar) || uchar == '_' || uchar == '-' || uchar == '.';
 596 |         if (!ok)
 597 |             putchar('_');
 598 |         else {
 599 |             for (i = 0; i < uclen; i++)
 600 |                 putchar(tag[i]);
 601 |         }
 602 |         tag += uclen;
 603 |     }
 604 | }
 605 | 
 606 | static const char *
 607 | escape_xml_char(int uchar)
 608 | {
 609 |     static char buf[32];
 610 | 
 611 |     switch (uchar) {
 612 |     case '>':
 613 |         return "&gt;";
 614 |         break;
 615 |     case '<':
 616 |         return "&lt;";
 617 |         break;
 618 |     case '&':
 619 |         return "&amp;";
 620 |         break;
 621 |     default:
 622 | 
 623 |         // Pass valid and unrestricted characters through (but not CR)
 624 |         // http://en.wikipedia.org/wiki/Valid_characters_in_XML
 625 |         if ((uchar == '\n' || uchar == '\t'
 626 |             || (uchar >= 0x0020 && uchar <= 0xd7ff)
 627 |             || (uchar >= 0xe000 && uchar <= 0xfffd)
 628 |             || (uchar >= 0x10000 && uchar <= 0x10ffff))
 629 |           && !((uchar >= 0x007f && uchar <= 0x0084) || (uchar >= 0x0086 && uchar <= 0x009F)))
 630 |             return NULL;
 631 | 
 632 |         // Escape other characters
 633 |         snprintf(buf, sizeof(buf), "&#%u;", uchar);
 634 |         return buf;
 635 |     }
 636 | }
 637 | 
 638 | static void
 639 | print_bash_name(const char *string)
 640 | {
 641 |     int i;
 642 | 
 643 |     for (i = 0; string[i] != '\0'; i++)
 644 |         fputc(bash_name_safe(string[i], i == 0), stdout);
 645 | }
 646 | 
 647 | static void
 648 | print_bash_value(const char *string)
 649 | {
 650 |     int single_quotes = 1;
 651 |     int i;
 652 | 
 653 |     // See if plain single quotes will work
 654 |     for (i = 0; string[i] != '\0'; i++) {
 655 |         if (string[i] == '\'' || !isprint((unsigned char)string[i])) {
 656 |             single_quotes = 0;
 657 |             break;
 658 |         }
 659 |     }
 660 | 
 661 |     // Output value
 662 |     if (single_quotes)
 663 |         printf("'%s'", string);
 664 |     else {
 665 |         printf("$'");
 666 |         for (i = 0; string[i] != '\0'; i++) {
 667 |             switch (string[i]) {
 668 |             case '\'':
 669 |                 printf("\\'");
 670 |                 break;
 671 |             case '\\':
 672 |                 printf("\\\\");
 673 |                 break;
 674 |             case '\b':
 675 |                 printf("\\b");
 676 |                 break;
 677 |             case '\f':
 678 |                 printf("\\f");
 679 |                 break;
 680 |             case '\n':
 681 |                 printf("\\n");
 682 |                 break;
 683 |             case '\r':
 684 |                 printf("\\r");
 685 |                 break;
 686 |             case '\t':
 687 |                 printf("\\t");
 688 |                 break;
 689 |             case '\v':
 690 |                 printf("\\v");
 691 |                 break;
 692 |             default:
 693 |                 if (isprint((unsigned char)string[i]))
 694 |                     putchar((unsigned char)string[i]);
 695 |                 else
 696 |                     printf("\\x%02x", (unsigned char)string[i]);
 697 |                 break;
 698 |             }
 699 |         }
 700 |         putchar('\'');
 701 |     }
 702 | }
 703 | 
 704 | static char
 705 | bash_name_safe(char ch, int first)
 706 | {
 707 |     if (isupper((unsigned char)ch) || islower((unsigned char)ch) || ch == '_')
 708 |         return ch;
 709 |     if (!first && isdigit((unsigned char)ch))
 710 |         return ch;
 711 |     return '_';
 712 | }
 713 | 
 714 | // Output JSON string
 715 | static void
 716 | print_json_string(const char *string, int linenum)
 717 | {
 718 |     int uchar;
 719 |     int uclen;
 720 | 
 721 |     while (*string != '\0') {
 722 |         uchar = decode_utf8(string, strlen(string), &uclen, linenum);
 723 |         switch (uchar) {
 724 |         case '"':
 725 |             printf("\\\"");
 726 |             break;
 727 |         case '\\':
 728 |             printf("\\\\");
 729 |             break;
 730 |         case '\b':
 731 |             printf("\\b");
 732 |             break;
 733 |         case '\f':
 734 |             printf("\\f");
 735 |             break;
 736 |         case '\n':
 737 |             printf("\\n");
 738 |             break;
 739 |         case '\r':
 740 |             printf("\\r");
 741 |             break;
 742 |         case '\t':
 743 |             printf("\\t");
 744 |             break;
 745 |         default:
 746 |             if (isprint(uchar))
 747 |                 printf("%c", uchar);
 748 |             else
 749 |                 printf("\\u%04x", uchar);
 750 |             break;
 751 |         }
 752 |         string += uclen;
 753 |     }
 754 | }
 755 | 
 756 | // Convert row columns to UTF-8 encoding
 757 | static void
 758 | convert_to_utf8(iconv_t icd, struct row *row, int linenum)
 759 | {
 760 |     int col;
 761 | 
 762 |     for (col = 0; col < row->num; col++) {
 763 |         char *const ibuf = row->fields[col];
 764 |         char *iptr;
 765 |         char *obuf;
 766 |         char *optr;
 767 |         size_t iremain;
 768 |         size_t oremain;
 769 |         size_t olen;
 770 | 
 771 |         // Convert column
 772 |         if (iconv(icd, NULL, NULL, NULL, NULL) == (size_t)-1)
 773 |             err(1, "iconv");
 774 |         iremain = strlen(ibuf);
 775 |         oremain = 64 + 4 * iremain;
 776 |         if ((obuf = malloc(oremain)) == NULL)
 777 |             err(1, "malloc");
 778 |         iptr = ibuf;
 779 |         optr = obuf;
 780 |         if (iconv(icd, &iptr, &iremain, &optr, &oremain) == (size_t)-1) {
 781 |             switch (errno) {
 782 |             case EILSEQ:
 783 |                 errx(1, "line %d: %s multibyte sequence", linenum, "illegal");
 784 |             case EINVAL:
 785 |                 errx(1, "line %d: %s multibyte sequence", linenum, "truncated");
 786 |             default:
 787 |                 err(1, "line %d: iconv", linenum);
 788 |             }
 789 |         }
 790 |         olen = optr - obuf;
 791 | 
 792 |         // Replace column
 793 |         if ((row->fields[col] = realloc(row->fields[col], olen + 1)) == NULL)
 794 |             err(1, "realloc");
 795 |         memcpy(row->fields[col], obuf, olen);
 796 |         row->fields[col][olen] = '\0';
 797 |         free(obuf);
 798 |     }
 799 | }
 800 | 
 801 | // Decode UTF-8 character
 802 | static int
 803 | decode_utf8(const char *const obuf, size_t olen, int *lenp, int linenum)
 804 | {
 805 |     int uchar;
 806 |     int uclen;
 807 |     int i = 0;
 808 | 
 809 |     if ((obuf[i] & 0x80) == 0x00) {
 810 |         uclen = 1;
 811 |         uchar = obuf[i] & 0x7f;
 812 |     } else if ((obuf[i] & 0xe0) == 0xc0 && i + 1 < olen) {
 813 |         uclen = 2;
 814 |         uchar = ((obuf[i] & 0x1f) <<  6)
 815 |           | ((obuf[i + 1] & 0x3f) <<  0);
 816 |     } else if ((obuf[i] & 0xf0) == 0xe0 && i + 2 < olen) {
 817 |         uclen = 3;
 818 |         uchar = ((obuf[i] & 0x0f) << 12)
 819 |           | ((obuf[i + 1] & 0x3f) <<  6)
 820 |           | ((obuf[i + 2] & 0x3f) <<  0);
 821 |     } else if ((obuf[i] & 0xf8) == 0xf0 && i + 3 < olen) {
 822 |         uclen = 4;
 823 |         uchar = ((obuf[i] & 0x07) << 18)
 824 |           | ((obuf[i + 1] & 0x3f) << 12)
 825 |           | ((obuf[i + 2] & 0x3f) <<  6)
 826 |           | ((obuf[i + 3] & 0x3f) <<  0);
 827 |     } else if ((obuf[i] & 0xfc) == 0xf8 && i + 4 < olen) {
 828 |         uclen = 5;
 829 |         uchar = ((obuf[i] & 0x03) << 24)
 830 |           | ((obuf[i + 1] & 0x3f) << 18)
 831 |           | ((obuf[i + 2] & 0x3f) << 12)
 832 |           | ((obuf[i + 3] & 0x3f) <<  6)
 833 |           | ((obuf[i + 4] & 0x3f) <<  0);
 834 |     } else if ((obuf[i] & 0xfe) == 0xfc && i + 5 < olen) {
 835 |         uclen = 6;
 836 |         uchar = ((obuf[i] & 0x01) << 30)
 837 |           | ((obuf[i + 1] & 0x3f) << 24)
 838 |           | ((obuf[i + 2] & 0x3f) << 18)
 839 |           | ((obuf[i + 3] & 0x3f) << 12)
 840 |           | ((obuf[i + 4] & 0x3f) <<  6)
 841 |           | ((obuf[i + 5] & 0x3f) <<  0);
 842 |     } else
 843 |         errx(1, "line %d: internal error decoding UTF-8: 0x%02x", linenum, obuf[i] & 0xff);
 844 | 
 845 |     // Done
 846 |     *lenp = uclen;
 847 |     return uchar;
 848 | }
 849 | 
 850 | static int
 851 | readcol(FILE *fp, struct row *row, int *linenum)
 852 | {
 853 |     struct col col;
 854 |     int row_done;
 855 |     int ch;
 856 | 
 857 |     // Process initial stuff; skip leading whitespace, excluding our field separator (which could be TAB)
 858 |     do {
 859 |         if ((ch = readch(fp, 1)) == EOF)
 860 |             ch = '\n';
 861 |         if (ch == '\n') {           // end of line forces empty column and terminates the row
 862 |             memset(&col, 0, sizeof(col));
 863 |             addcolumn(row, &col);
 864 |             (*linenum)++;
 865 |             return 0;
 866 |         }
 867 |     } while (isspace(ch) && ch != fsep);
 868 |     ungetc(ch, fp);
 869 | 
 870 |     // Read quoted or unquoted value
 871 |     if (ch == quote)
 872 |         row_done = readqcol(fp, &col, linenum);
 873 |     else
 874 |         row_done = readuqcol(fp, &col, linenum);
 875 |     addcolumn(row, &col);
 876 |     return row_done;
 877 | }
 878 | 
 879 | //
 880 | // Read a quoted column, return true if there's more
 881 | //
 882 | static int
 883 | readqcol(FILE *fp, struct col *col, int *linenum)
 884 | {
 885 |     int done = 0;
 886 |     int escape = 0;
 887 |     int ch;
 888 | 
 889 |     readch(fp, 0);
 890 |     memset(col, 0, sizeof(*col));
 891 |     while (1) {
 892 |         assert(!escape || !done);
 893 |         if ((ch = readch(fp, escape)) == EOF) {
 894 |             if (escape || done)
 895 |                 ch = '\n';
 896 |             else
 897 |                 errx(1, "line %d: premature EOF", *linenum);
 898 |         }
 899 |         if (done) {
 900 |             if (ch == '\n') {
 901 |                 (*linenum)++;
 902 |                 return 0;
 903 |             }
 904 |             if (ch == fsep)
 905 |                 return 1;
 906 |             if (isspace(ch))
 907 |                 continue;
 908 |             errx(1, "line %d: unexpected character \"%c\"", *linenum, ch);
 909 |         }
 910 |         if (escape) {
 911 |             if (ch == quote)
 912 |                 addchar(col, quote);
 913 |             else {
 914 |                 ungetc(ch, fp);
 915 |                 done = 1;
 916 |             }
 917 |             escape = 0;
 918 |             continue;
 919 |         }
 920 |         if (ch == quote) {
 921 |             escape = 1;
 922 |             continue;
 923 |         }
 924 |         addchar(col, ch);
 925 |         if (ch == '\n')
 926 |             (*linenum)++;
 927 |     }
 928 | }
 929 | 
 930 | //
 931 | // Read an unquoted column, return true if there's more
 932 | //
 933 | static int
 934 | readuqcol(FILE *fp, struct col *col, int *linenum)
 935 | {
 936 |     int ch;
 937 | 
 938 |     memset(col, 0, sizeof(*col));
 939 |     while (1) {
 940 |         if ((ch = readch(fp, 1)) == EOF)
 941 |             ch = '\n';
 942 |         if (ch == '\n') {
 943 |             (*linenum)++;
 944 |             trim(col);
 945 |             return 0;
 946 |         }
 947 |         if (ch == fsep) {
 948 |             trim(col);
 949 |             return 1;
 950 |         }
 951 |         addchar(col, ch);
 952 |     }
 953 | }
 954 | 
 955 | //
 956 | // Trims whitespace around a column
 957 | //
 958 | static void
 959 | trim(struct col *col)
 960 | {
 961 |     size_t skip;
 962 | 
 963 |     while (col->len > 0 && isspace((unsigned char)col->buf[col->len - 1]))
 964 |         col->len--;
 965 |     for (skip = 0; skip < col->len && isspace((unsigned char)col->buf[skip]); skip++)
 966 |         ;
 967 |     col->len -= skip;
 968 |     memmove(col->buf, col->buf + skip, col->len);
 969 | }
 970 | 
 971 | //
 972 | // Adds the character to the column
 973 | //
 974 | static void
 975 | addchar(struct col *col, int ch)
 976 | {
 977 |     if (col->alloc <= col->len) {
 978 |         int new_alloc;
 979 |         char *new_buf;
 980 | 
 981 |         new_alloc = col->alloc == 0 ? 32 : col->alloc * 2;
 982 |         if ((new_buf = realloc(col->buf, new_alloc)) == NULL)
 983 |             err(1, "realloc");
 984 |         col->buf = new_buf;
 985 |         col->alloc = new_alloc;
 986 |     }
 987 |     col->buf[col->len++] = ch;
 988 | }
 989 | 
 990 | //
 991 | // Adds the column to the row, then frees the column
 992 | //
 993 | static void
 994 | addcolumn(struct row *row, const struct col *col)
 995 | {
 996 |     growrow(row);
 997 |     if (col->alloc >= col->len + 1) {
 998 |         col->buf[col->len] = '\0';
 999 |         row->fields[row->num] = col->buf;
1000 |     } else {
1001 |         if ((row->fields[row->num] = malloc(col->len + 1)) == NULL)
1002 |             err(1, "malloc");
1003 |         memcpy(row->fields[row->num], col->buf, col->len);
1004 |         row->fields[row->num][col->len] = '\0';
1005 |         free(col->buf);
1006 |     }
1007 |     memset(&col, 0, sizeof(col));
1008 |     row->num++;
1009 | }
1010 | 
1011 | // Copy given string and add to row
1012 | static void
1013 | addstring(struct row *row, const char *const string)
1014 | {
1015 |     growrow(row);
1016 |     if ((row->fields[row->num++] = strdup(string)) == NULL)
1017 |         err(1, "strdup");
1018 | }
1019 | 
1020 | static int
1021 | findstring(struct row *row, const char *const string)
1022 | {
1023 |     return findstring2((const char *const *)row->fields, row->num, string);
1024 | }
1025 | 
1026 | static int
1027 | findstring2(const char *const *list, size_t num, const char *const string)
1028 | {
1029 |     size_t i;
1030 | 
1031 |     for (i = 0; i < num; i++) {
1032 |         if (strcmp(list[i], string) == 0)
1033 |             return 1;
1034 |     }
1035 |     return 0;
1036 | }
1037 | 
1038 | static void
1039 | growrow(struct row *row)
1040 | {
1041 |     size_t new_alloc;
1042 |     char **new_fields;
1043 | 
1044 |     if (row->alloc > row->num)
1045 |         return;
1046 |     new_alloc = row->alloc == 0 ? 32 : row->alloc * 2;
1047 |     if ((new_fields = realloc(row->fields, new_alloc * sizeof(*row->fields))) == NULL)
1048 |         err(1, "realloc");
1049 |     row->fields = new_fields;
1050 |     row->alloc = new_alloc;
1051 |     memset(row->fields + row->num, 0, (row->alloc - row->num) * sizeof(*row->fields));
1052 | }
1053 | 
1054 | static int
1055 | parsefmt(char *fmt, const struct row *column_names, unsigned int **argsp)
1056 | {
1057 |     unsigned int *args;
1058 |     int nargs;
1059 |     int alloc;
1060 |     char *s;
1061 | 
1062 |     // Size and allocate array
1063 |     alloc = 0;
1064 |     for (s = fmt; *s != '\0'; s++) {
1065 |         if (*s == '%')
1066 |             alloc += 3;
1067 |     }
1068 |     if ((args = malloc(alloc * sizeof(*args))) == NULL)
1069 |         err(1, "malloc");
1070 |     nargs = 0;
1071 | 
1072 |     // Parse format
1073 |     for (s = fmt; *s != '\0'; s++) {
1074 |         char *const fspec = s;
1075 |         if (*s != '%' || *++s == '%')
1076 |             continue;
1077 |         s = eataccessor(fspec, "format specification", column_names, s, &nargs, args);
1078 |         while (*s != '\0' && strchr("#-+ 0", *s) != NULL)       // eat up optional flags
1079 |             s++;
1080 |         s = eatwidthprec(fspec, "field width for format specification", column_names, s, &nargs, args);
1081 |         if (*s == '.')
1082 |             s = eatwidthprec(fspec, "precision for format specification", column_names, s + 1, &nargs, args);
1083 |         if (*s == '\0')
1084 |             errx(1, "truncated format specification starting at \"%.20s...\"", fspec);
1085 |     }
1086 | 
1087 |     // Done
1088 |     *argsp = args;
1089 |     return nargs;
1090 | }
1091 | 
1092 | static int
1093 | parsechar(const char *str)
1094 | {
1095 |     char *eptr;
1096 |     int ch;
1097 | 
1098 |     switch (strlen(str)) {
1099 |     case 1:
1100 |         ch = (unsigned char)*str;
1101 |         break;
1102 |     case 2:
1103 |         if (*str != '\\')
1104 |             return -1;
1105 |         switch (str[1]) {
1106 |         case 'a':
1107 |             ch = '\a';
1108 |             break;
1109 |         case 't':
1110 |             ch = '\t';
1111 |             break;
1112 |         case 'b':
1113 |             ch = '\b';
1114 |             break;
1115 |         case 'r':
1116 |             ch = '\r';
1117 |             break;
1118 |         case 'f':
1119 |             ch = '\f';
1120 |             break;
1121 |         case 'v':
1122 |             ch = '\v';
1123 |             break;
1124 |         case '\\':
1125 |         case '\'':
1126 |         case '"':
1127 |             ch = str[1];
1128 |             break;
1129 |         default:
1130 |             return -1;
1131 |         }
1132 |         break;
1133 |     case 4:
1134 |         if (*str != '\\')
1135 |             return -1;
1136 |         ch = str[1] == 'x' ? strtoul(str + 2, &eptr, 16) : strtoul(str + 1, &eptr, 8);
1137 |         if (*eptr != '\0')
1138 |             return -1;
1139 |         break;
1140 |     default:
1141 |         return -1;
1142 |     }
1143 | 
1144 |     // Disallow line separator
1145 |     if (ch == '\n')
1146 |         return -1;
1147 | 
1148 |     // Disallow overflown values
1149 |     if (ch != (ch & 0xff))
1150 |         return -1;
1151 | 
1152 |     // Done
1153 |     return ch;
1154 | }
1155 | 
1156 | static char *
1157 | eatwidthprec(const char *const fspec, const char *desc, const struct row *column_names, char *s, int *nargs, unsigned int *args)
1158 | {
1159 |     if (*s == '*')
1160 |         return eataccessor(fspec, desc, column_names, s + 1, nargs, args);
1161 |     while (isdigit((unsigned char)*s))                          // eat up numerical field width or precision
1162 |         s++;
1163 |     return s;
1164 | }
1165 | 
1166 | static char *
1167 | eataccessor(const char *const fspec, const char *desc, const struct row *column_names, char *s, int *nargs, unsigned int *args)
1168 | {
1169 |     char *const start = s;
1170 |     const char *colname;
1171 |     int namelen;
1172 |     int argnum;
1173 |     int i;
1174 | 
1175 |     if (*s == '{') {
1176 |         if (column_names == NULL)
1177 |             errx(1, "symbolic column accessors require \"-i\" flag in %s starting at \"%.20s...\"", desc, fspec);
1178 |         colname = ++s;
1179 |         while (*s != '}') {
1180 |             if (*s++ == '\0')
1181 |                 errx(1, "malformed column accessor in %s starting at \"%.20s...\"", desc, fspec);
1182 |         }
1183 |         namelen = s++ - colname;
1184 |         argnum = 0;
1185 |         for (i = 0; i < column_names->num; i++) {
1186 |             if (strncmp(colname, column_names->fields[i], namelen) == 0 && column_names->fields[i][namelen] == '\0') {
1187 |                 if (argnum != 0) {
1188 |                     errx(1, "ambiguous column name \"%.*s\" in symbolic column accessor in %s starting at \"%.20s...\"",
1189 |                       namelen, colname, desc, fspec);
1190 |                 }
1191 |                 argnum = i + 1;
1192 |             }
1193 |         }
1194 |         if (argnum == 0) {
1195 |             errx(1, "unknown column name \"%.*s\" in symbolic column accessor in %s starting at \"%.20s...\"",
1196 |               namelen, colname, desc, fspec);
1197 |         }
1198 |         args[(*nargs)++] = argnum;
1199 |     } else {
1200 |         while (isdigit((unsigned char)*s))
1201 |             s++;
1202 |         if (s == start || *s++ != '$')
1203 |             errx(1, "missing required column accessor in %s starting at \"%.20s...\"", desc, fspec);
1204 |         sscanf(start, "%u", &args[(*nargs)++]);
1205 |     }
1206 |     memmove(start, s, strlen(s) + 1);
1207 |     return start;
1208 | }
1209 | 
1210 | // Like getc() but optionally collapses CR or CR, LF into a single LF
1211 | static int
1212 | readch(FILE *fp, int collapse)
1213 | {
1214 |     int ch;
1215 | 
1216 |     ch = getc(fp);
1217 |     if (collapse && ch == '\r') {
1218 |         if ((ch = getc(fp)) != '\n') {
1219 |             ungetc(ch, fp);
1220 |             ch = '\n';
1221 |         }
1222 |     }
1223 |     return ch;
1224 | }
1225 | 
1226 | static void
1227 | freerow(struct row *row)
1228 | {
1229 |     while (row->num > 0)
1230 |         free(row->fields[--row->num]);
1231 |     free(row->fields);
1232 |     memset(row, 0, sizeof(*row));
1233 | }
1234 | 
1235 | static void
1236 | usage(void)
1237 | {
1238 | 
1239 |     fprintf(stderr, "Usage:\n");
1240 |     fprintf(stderr, "  csvprintf [options] format\n");
1241 |     fprintf(stderr, "  csvprintf -b [options]\n");
1242 |     fprintf(stderr, "  csvprintf -j [options]\n");
1243 |     fprintf(stderr, "  csvprintf -x [options]\n");
1244 |     fprintf(stderr, "  csvprintf -X [options]\n");
1245 |     fprintf(stderr, "  csvprintf -h\n");
1246 |     fprintf(stderr, "  csvprintf -v\n");
1247 |     fprintf(stderr, "Options:\n");
1248 |     fprintf(stderr, "  -b\t\tConvert input to bash(1) variable assignments\n");
1249 |     fprintf(stderr, "  -e encoding\tSpecify input character encoding (XML and JSON modes only; default ISO-8859-1)\n");
1250 |     fprintf(stderr, "  -f input\tRead CSV input from specified file (default stdin)\n");
1251 |     fprintf(stderr, "  -i\t\tAssume the first CSV record contains column names\n");
1252 |     fprintf(stderr, "  -j\t\tConvert input to JSON text sequences\n");
1253 |     fprintf(stderr, "  -q char\tSpecify quote character (default `%c')\n", DEFAULT_QUOTE_CHAR);
1254 |     fprintf(stderr, "  -s char\tSpecify field separator character (default `%c')\n", DEFAULT_FSEP_CHAR);
1255 |     fprintf(stderr, "  -x\t\tConvert input to XML using numeric tags\n");
1256 |     fprintf(stderr, "  -X\t\tConvert input to XML using column name tags (implies \"-i\")\n");
1257 |     fprintf(stderr, "  -h\t\tOutput this help message and exit\n");
1258 |     fprintf(stderr, "  -v\t\tOutput version information and exit\n");
1259 | }
1260 | 
1261 | static void
1262 | version(void)
1263 | {
1264 |     fprintf(stderr, "%s version %s", PACKAGE_TARNAME, PACKAGE_VERSION);
1265 |     if (*csvprintf_version != '\0')
1266 |         fprintf(stderr, " (%s)", csvprintf_version);
1267 |     fprintf(stderr, "\n");
1268 |     fprintf(stderr, "Copyright (C) 2010-2023 Archie L. Cobbs\n");
1269 |     fprintf(stderr, "This is free software; see the source for copying conditions. There is NO\n");
1270 |     fprintf(stderr, "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
1271 | }
1272 | 
1273 | 


--------------------------------------------------------------------------------
/tests/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | set -o pipefail
 5 | 
 6 | FAILED_TESTS=''
 7 | for INPUT_FILE in *.in; do
 8 |     OUTPUT_FILE1=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out1/gp'`
 9 |     OUTPUT_FILE2=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out2/gp'`
10 |     OUTPUT_FILE3A=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out3a/gp'`
11 |     OUTPUT_FILE3B=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out3b/gp'`
12 |     OUTPUT_FILE4=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out4/gp'`
13 |     OUTPUT_FILE5A=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out5a/gp'`
14 |     OUTPUT_FILE5B=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out5b/gp'`
15 |     echo "*** testing ${INPUT_FILE}..." 1>&2
16 |     if ! ../csvprintf -x -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE1}" -; then
17 |         echo "*** FAILED: [1] ${INPUT_FILE}" 1>&2
18 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE1}"
19 |     fi
20 |     if ! ../csvprintf -X -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE2}" -; then
21 |         echo "*** FAILED: [2] ${INPUT_FILE}" 1>&2
22 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE2}"
23 |     fi
24 |     if ! ../csvprintf -e iso-8859-1 -x -f "${INPUT_FILE}" | xsltproc ../csv.xsl - | ../csvprintf -e UTF-8 -x | diff -u "${OUTPUT_FILE1}" -; then
25 |         echo "*** FAILED: [1x] ${INPUT_FILE}" 1>&2
26 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/csv2xml"
27 |     fi
28 |     if ! ../csvprintf -j -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE3A}" -; then
29 |         echo "*** FAILED: [3a] ${INPUT_FILE}" 1>&2
30 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE3A}"
31 |     fi
32 |     if ! ../csvprintf -ij -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE3B}" -; then
33 |         echo "*** FAILED: [3b] ${INPUT_FILE}" 1>&2
34 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE3B}"
35 |     fi
36 |     if ! ../csvprintf -ix -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE4}" -; then
37 |         echo "*** FAILED: [4] ${INPUT_FILE}" 1>&2
38 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE4}"
39 |     fi
40 |     if ! ../csvprintf -b -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE5A}" -; then
41 |         echo "*** FAILED: [5a] ${INPUT_FILE}" 1>&2
42 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE5A}"
43 |     fi
44 |     if ! ../csvprintf -ib -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE5B}" -; then
45 |         echo "*** FAILED: [5b] ${INPUT_FILE}" 1>&2
46 |         FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE5B}"
47 |     fi
48 | done
49 | 
50 | if [ -z "${FAILED_TESTS}" ]; then
51 |     echo "*** all tests passed"
52 | else
53 |     echo "*** test(s) failed:${FAILED_TESTS}"
54 |     exit 1
55 | fi
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/run2.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Bail on error
  4 | set -e
  5 | 
  6 | # Setup temporary files
  7 | TMP_STDOUT_EXPECTED='csvprintf-test-out-expected.tmp'
  8 | TMP_STDERR_EXPECTED='csvprintf-test-err-expected.tmp'
  9 | TMP_STDOUT_ACTUAL='csvprintf-test-out-actual.tmp'
 10 | TMP_STDERR_ACTUAL='csvprintf-test-err-actual.tmp'
 11 | TMP_SWAP_FILE=''csvprintf-test-hexdump.tmp
 12 | trap "rm -f \
 13 |     ${TMP_STDOUT_EXPECTED} \
 14 |     ${TMP_STDERR_EXPECTED} \
 15 |     ${TMP_STDOUT_ACTUAL} \
 16 |     ${TMP_STDERR_ACTUAL} \
 17 |     ${TMP_SWAP_FILE}" 0 2 3 5 10 13 15
 18 | 
 19 | # Convert a file to hexdump version
 20 | hexdumpify()
 21 | {
 22 |     FILE="${1}"
 23 |     hexdump -C < "${FILE}" > "${TMP_SWAP_FILE}"
 24 |     mv "${TMP_SWAP_FILE}" "${FILE}"
 25 | }
 26 | 
 27 | # Compare files, on failure set ${DIFF_FAIL}
 28 | checkdiff()
 29 | {
 30 |     if [ "${1}" = '-h' ]; then
 31 |         HEXDUMPIFY='true'
 32 |         shift
 33 |     else
 34 |         HEXDUMPIFY='false'
 35 |     fi
 36 |     TESTFILE="${1}"
 37 |     WHAT="${2}"
 38 |     EXPECTED="${3}"
 39 |     ACTUAL="${4}"
 40 |     if diff -q "${EXPECTED}" "${ACTUAL}" >/dev/null; then
 41 |         return 0
 42 |     fi
 43 |     echo "test: ${TESTFILE}: ${WHAT} mismatch"
 44 |     echo '------------------------------------------------------'
 45 |     if [ "${HEXDUMPIFY}" = 'true' ]; then
 46 |         hexdumpify "${EXPECTED}"
 47 |         hexdumpify "${ACTUAL}"
 48 |     fi
 49 |     diff -u "${EXPECTED}" "${ACTUAL}" || true
 50 |     echo '------------------------------------------------------'
 51 |     DIFF_FAIL='true'
 52 | }
 53 | 
 54 | # Execute one test, on failure set ${TEST_FAIL}
 55 | runtest()
 56 | {
 57 |     # Read test data
 58 |     unset FLAGS
 59 |     unset STDIN
 60 |     unset STDOUT
 61 |     unset STDERR
 62 |     unset EXITVAL
 63 |     . "${TESTFILE}"
 64 |     if [ -z "${FLAGS+x}" \
 65 |       -o -z "${STDIN+x}" \
 66 |       -o -z "${STDOUT+x}" \
 67 |       -o -z "${STDERR+x}" \
 68 |       -o -z "${EXITVAL+x}" ]; then
 69 |         echo "test: ${TESTFILE}: invalid test file"
 70 |         exit 1
 71 |     fi
 72 | 
 73 |     # Set up files
 74 |     echo -en "${STDOUT}" > "${TMP_STDOUT_EXPECTED}"
 75 |     echo -en "${STDERR}" > "${TMP_STDERR_EXPECTED}"
 76 |     set +e
 77 |     echo -en "${STDIN}" | ../csvprintf ${FLAGS} >"${TMP_STDOUT_ACTUAL}" 2>"${TMP_STDERR_ACTUAL}"
 78 |     ACTUAL_EXITVAL="$?"
 79 |     set -e
 80 | 
 81 |     # Special hacks
 82 |     if [ "${STDERR}" = '!USAGE!' ]; then
 83 |         ../csvprintf --help 2>"${TMP_STDERR_EXPECTED}"
 84 |     fi
 85 | 
 86 |     # Check result
 87 |     DIFF_FAIL='false'
 88 |     if [ "${STDOUT}" != '!IGNORE!' ]; then
 89 |         checkdiff -h "${TESTFILE}" "standard output" "${TMP_STDOUT_EXPECTED}" "${TMP_STDOUT_ACTUAL}"
 90 |     fi
 91 |     checkdiff "${TESTFILE}" "standard error" "${TMP_STDERR_EXPECTED}" "${TMP_STDERR_ACTUAL}"
 92 |     if [ "${DIFF_FAIL}" != 'false' ]; then
 93 |         TEST_FAIL='true'
 94 |     fi
 95 |     if [ "${ACTUAL_EXITVAL}" -ne "${EXITVAL}" ]; then
 96 |         echo "test: ${TESTFILE}: exit value ${ACTUAL_EXITVAL} != ${EXITVAL}"
 97 |         TEST_FAIL='true'
 98 |     fi
 99 | 
100 |     # Print success or if failure show params
101 |     if [ "${TEST_FAIL}" = 'false' ]; then
102 |         echo "test: ${TESTFILE}: success"
103 |     else
104 |         echo "******************************************************"
105 |         echo "test: ${TESTFILE} FAILED with:"
106 |         echo "  FLAGS='${FLAGS}'"
107 |         echo "  STDIN='${STDIN}'"
108 |         echo "******************************************************"
109 |     fi
110 | }
111 | 
112 | # Find all tests and run them
113 | ANY_FAIL='false'
114 | for TESTFILE in `find . -maxdepth 1 -type f -name 'test-*.tst' | sort | sed 's|^./||g'`; do
115 |     TEST_FAIL='false'
116 |     runtest "${TESTFILE}"
117 |     if [ "${TEST_FAIL}" != 'false' ]; then
118 |         ANY_FAIL='true'
119 |     fi
120 | done
121 | 
122 | # Exit with error if any test failed
123 | if [ "${ANY_FAIL}" != 'false' ]; then
124 |     exit 1
125 | fi
126 | 


--------------------------------------------------------------------------------
/tests/test-bash-omit1.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi'
2 | STDIN='aaa,PATH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT=$'aaa=\'a1\'; ccc=\'c1\';\naaa=\'a2\'; ccc=\'c2\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 | 


--------------------------------------------------------------------------------
/tests/test-bash-prefix1.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi -p FOO_'
2 | STDIN='aaa,PATH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT=$'FOO_aaa=\'a1\'; FOO_PATH=\'b1\'; FOO_ccc=\'c1\';\nFOO_aaa=\'a2\'; FOO_PATH=\'b2\'; FOO_ccc=\'c2\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 | 


--------------------------------------------------------------------------------
/tests/test-bash-prefix2.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi -p PA'
2 | STDIN='aaa,TH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT=$'PAaaa=\'a1\'; PAccc=\'c1\';\nPAaaa=\'a2\'; PAccc=\'c2\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 | 


--------------------------------------------------------------------------------
/tests/test-bash-quote.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi -p PA_'
2 | STDIN=$'aaa,bbb,ccc\n\'aa\'xx\',"bb""yy",cc`zz\\ww\n'
3 | STDOUT=$'PA_aaa=$\'\\\'aa\\\'xx\\\'\'; PA_bbb=\'bb"yy\'; PA_ccc=\'cc`zz\\ww\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 | 


--------------------------------------------------------------------------------
/tests/test-cflag-not-found.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-X -c bbb -c zzz'
2 | STDIN='aaa,bbb,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT='!IGNORE!'
4 | STDERR='csvprintf: column "zzz" not found\n'
5 | EXITVAL='1'
6 | 


--------------------------------------------------------------------------------
/tests/test-cflag-xml.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-X -c bbb'
2 | STDIN='aaa,bbb,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT='<?xml version="1.0" encoding="UTF-8"?>\n<csv>\n  <row>\n    <bbb>b1</bbb>\n  </row>\n  <row>\n    <bbb>b2</bbb>\n  </row>\n</csv>\n'
4 | STDERR=''
5 | EXITVAL='0'
6 | 


--------------------------------------------------------------------------------
/tests/test-json-skip1.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-jn'
2 | STDIN='aaa,bbb\n"a1","b1"\n"a2","b2"\n'
3 | STDOUT='\x1e["a1","b1"]\n\x1e["a2","b2"]\n'
4 | STDERR=''
5 | EXITVAL='0'
6 | 


--------------------------------------------------------------------------------
/tests/test-tab-noskip.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-nj -s \t'
2 | STDIN='aaa\tbbb\tccc\n\t\t\n'
3 | STDOUT='\x1e["","",""]\n'
4 | STDERR=''
5 | EXITVAL='0'
6 | 


--------------------------------------------------------------------------------
/tests/test1.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTS
2 | Fred Smith,"1234 Main St.
3 | Anytown, USA   39103",123.4567
4 | "Wayne ""The Great One"" Gretsky",  59 Hockey Lane  , 999999
5 | 


--------------------------------------------------------------------------------
/tests/test1.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>NAME</col1>
 5 |     <col2>ADDRESS</col2>
 6 |     <col3>POINTS</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>Fred Smith</col1>
10 |     <col2>1234 Main St.
11 | Anytown, USA   39103</col2>
12 |     <col3>123.4567</col3>
13 |   </row>
14 |   <row>
15 |     <col1>Wayne "The Great One" Gretsky</col1>
16 |     <col2>59 Hockey Lane</col2>
17 |     <col3>999999</col3>
18 |   </row>
19 | </csv>
20 | 


--------------------------------------------------------------------------------
/tests/test1.out2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <NAME>Fred Smith</NAME>
 5 |     <ADDRESS>1234 Main St.
 6 | Anytown, USA   39103</ADDRESS>
 7 |     <POINTS>123.4567</POINTS>
 8 |   </row>
 9 |   <row>
10 |     <NAME>Wayne "The Great One" Gretsky</NAME>
11 |     <ADDRESS>59 Hockey Lane</ADDRESS>
12 |     <POINTS>999999</POINTS>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test1.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\nAnytown, USA   39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 | 


--------------------------------------------------------------------------------
/tests/test1.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA   39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 | 


--------------------------------------------------------------------------------
/tests/test1.out4:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>Fred Smith</col1>
 5 |     <col2>1234 Main St.
 6 | Anytown, USA   39103</col2>
 7 |     <col3>123.4567</col3>
 8 |   </row>
 9 |   <row>
10 |     <col1>Wayne "The Great One" Gretsky</col1>
11 |     <col2>59 Hockey Lane</col2>
12 |     <col3>999999</col3>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test1.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA   39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 | 


--------------------------------------------------------------------------------
/tests/test1.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA   39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 | 


--------------------------------------------------------------------------------
/tests/test2.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTS
2 | Fred Smith,"1234 Main St.
3 | Anytown, USA   39103",123.4567
4 | "Wayne ""The Great One"" Gretsky",  59 Hockey Lane  , 999999
5 | 


--------------------------------------------------------------------------------
/tests/test2.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>NAME</col1>
 5 |     <col2>ADDRESS</col2>
 6 |     <col3>POINTS</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>Fred Smith</col1>
10 |     <col2>1234 Main St.&#13;
11 | Anytown, USA   39103</col2>
12 |     <col3>123.4567</col3>
13 |   </row>
14 |   <row>
15 |     <col1>Wayne "The Great One" Gretsky</col1>
16 |     <col2>59 Hockey Lane</col2>
17 |     <col3>999999</col3>
18 |   </row>
19 | </csv>
20 | 


--------------------------------------------------------------------------------
/tests/test2.out2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <NAME>Fred Smith</NAME>
 5 |     <ADDRESS>1234 Main St.&#13;
 6 | Anytown, USA   39103</ADDRESS>
 7 |     <POINTS>123.4567</POINTS>
 8 |   </row>
 9 |   <row>
10 |     <NAME>Wayne "The Great One" Gretsky</NAME>
11 |     <ADDRESS>59 Hockey Lane</ADDRESS>
12 |     <POINTS>999999</POINTS>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test2.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\r\nAnytown, USA   39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 | 


--------------------------------------------------------------------------------
/tests/test2.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\r\nAnytown, USA   39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 | 


--------------------------------------------------------------------------------
/tests/test2.out4:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>Fred Smith</col1>
 5 |     <col2>1234 Main St.&#13;
 6 | Anytown, USA   39103</col2>
 7 |     <col3>123.4567</col3>
 8 |   </row>
 9 |   <row>
10 |     <col1>Wayne "The Great One" Gretsky</col1>
11 |     <col2>59 Hockey Lane</col2>
12 |     <col3>999999</col3>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test2.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\r\nAnytown, USA   39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 | 


--------------------------------------------------------------------------------
/tests/test2.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\r\nAnytown, USA   39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 | 


--------------------------------------------------------------------------------
/tests/test3.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTSFred Smith,"1234 Main St.Anytown, USA   39103",123.4567"Wayne ""The Great One"" Gretsky",  59 Hockey Lane  , 999999


--------------------------------------------------------------------------------
/tests/test3.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>NAME</col1>
 5 |     <col2>ADDRESS</col2>
 6 |     <col3>POINTS</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>Fred Smith</col1>
10 |     <col2>1234 Main St.&#13;Anytown, USA   39103</col2>
11 |     <col3>123.4567</col3>
12 |   </row>
13 |   <row>
14 |     <col1>Wayne "The Great One" Gretsky</col1>
15 |     <col2>59 Hockey Lane</col2>
16 |     <col3>999999</col3>
17 |   </row>
18 | </csv>
19 | 


--------------------------------------------------------------------------------
/tests/test3.out2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <NAME>Fred Smith</NAME>
 5 |     <ADDRESS>1234 Main St.&#13;Anytown, USA   39103</ADDRESS>
 6 |     <POINTS>123.4567</POINTS>
 7 |   </row>
 8 |   <row>
 9 |     <NAME>Wayne "The Great One" Gretsky</NAME>
10 |     <ADDRESS>59 Hockey Lane</ADDRESS>
11 |     <POINTS>999999</POINTS>
12 |   </row>
13 | </csv>
14 | 


--------------------------------------------------------------------------------
/tests/test3.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\rAnytown, USA   39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 | 


--------------------------------------------------------------------------------
/tests/test3.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\rAnytown, USA   39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 | 


--------------------------------------------------------------------------------
/tests/test3.out4:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>Fred Smith</col1>
 5 |     <col2>1234 Main St.&#13;Anytown, USA   39103</col2>
 6 |     <col3>123.4567</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>Wayne "The Great One" Gretsky</col1>
10 |     <col2>59 Hockey Lane</col2>
11 |     <col3>999999</col3>
12 |   </row>
13 | </csv>
14 | 


--------------------------------------------------------------------------------
/tests/test3.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\rAnytown, USA   39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 | 


--------------------------------------------------------------------------------
/tests/test3.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\rAnytown, USA   39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 | 


--------------------------------------------------------------------------------
/tests/test4.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTSFred Smith,"1234 Main St.
2 | Anytown, USA   39103",123.4567"Wayne ""The Great One"" Gretsky",  59 Hockey Lane  , 999999


--------------------------------------------------------------------------------
/tests/test4.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>NAME</col1>
 5 |     <col2>ADDRESS</col2>
 6 |     <col3>POINTS</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>Fred Smith</col1>
10 |     <col2>1234 Main St.
11 | Anytown, USA   39103</col2>
12 |     <col3>123.4567</col3>
13 |   </row>
14 |   <row>
15 |     <col1>Wayne "The Great One" Gretsky</col1>
16 |     <col2>59 Hockey Lane</col2>
17 |     <col3>999999</col3>
18 |   </row>
19 | </csv>
20 | 


--------------------------------------------------------------------------------
/tests/test4.out2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <NAME>Fred Smith</NAME>
 5 |     <ADDRESS>1234 Main St.
 6 | Anytown, USA   39103</ADDRESS>
 7 |     <POINTS>123.4567</POINTS>
 8 |   </row>
 9 |   <row>
10 |     <NAME>Wayne "The Great One" Gretsky</NAME>
11 |     <ADDRESS>59 Hockey Lane</ADDRESS>
12 |     <POINTS>999999</POINTS>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test4.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\nAnytown, USA   39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 | 


--------------------------------------------------------------------------------
/tests/test4.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA   39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 | 


--------------------------------------------------------------------------------
/tests/test4.out4:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>Fred Smith</col1>
 5 |     <col2>1234 Main St.
 6 | Anytown, USA   39103</col2>
 7 |     <col3>123.4567</col3>
 8 |   </row>
 9 |   <row>
10 |     <col1>Wayne "The Great One" Gretsky</col1>
11 |     <col2>59 Hockey Lane</col2>
12 |     <col3>999999</col3>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test4.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA   39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 | 


--------------------------------------------------------------------------------
/tests/test4.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA   39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 | 


--------------------------------------------------------------------------------
/tests/test5.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.in


--------------------------------------------------------------------------------
/tests/test5.out1:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <csv>
3 |   <row>
4 |     <col1>piñata</col1>
5 |   </row>
6 | </csv>
7 | 


--------------------------------------------------------------------------------
/tests/test5.out2:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <csv>
3 | </csv>
4 | 


--------------------------------------------------------------------------------
/tests/test5.out3a:
--------------------------------------------------------------------------------
1 | ["pi\u00f1ata"]
2 | 


--------------------------------------------------------------------------------
/tests/test5.out3b:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.out3b


--------------------------------------------------------------------------------
/tests/test5.out4:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <csv>
3 | </csv>
4 | 


--------------------------------------------------------------------------------
/tests/test5.out5a:
--------------------------------------------------------------------------------
1 | ROW=( $'pi\xf1ata' )
2 | 


--------------------------------------------------------------------------------
/tests/test5.out5b:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.out5b


--------------------------------------------------------------------------------
/tests/test6.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTSFred Smith,"1234 Main St.
2 | Anytown, USA   39103",123.4567"Wayne ""The Great One"" Gretsky",  59 Hockey Lane  , "999999"
3 | 


--------------------------------------------------------------------------------
/tests/test6.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>NAME</col1>
 5 |     <col2>ADDRESS</col2>
 6 |     <col3>POINTS</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>Fred Smith</col1>
10 |     <col2>1234 Main St.
11 | Anytown, USA   39103</col2>
12 |     <col3>123.4567</col3>
13 |   </row>
14 |   <row>
15 |     <col1>Wayne "The Great One" Gretsky</col1>
16 |     <col2>59 Hockey Lane</col2>
17 |     <col3>999999</col3>
18 |   </row>
19 | </csv>
20 | 


--------------------------------------------------------------------------------
/tests/test6.out2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <NAME>Fred Smith</NAME>
 5 |     <ADDRESS>1234 Main St.
 6 | Anytown, USA   39103</ADDRESS>
 7 |     <POINTS>123.4567</POINTS>
 8 |   </row>
 9 |   <row>
10 |     <NAME>Wayne "The Great One" Gretsky</NAME>
11 |     <ADDRESS>59 Hockey Lane</ADDRESS>
12 |     <POINTS>999999</POINTS>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test6.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\nAnytown, USA   39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 | 


--------------------------------------------------------------------------------
/tests/test6.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA   39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 | 


--------------------------------------------------------------------------------
/tests/test6.out4:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>Fred Smith</col1>
 5 |     <col2>1234 Main St.
 6 | Anytown, USA   39103</col2>
 7 |     <col3>123.4567</col3>
 8 |   </row>
 9 |   <row>
10 |     <col1>Wayne "The Great One" Gretsky</col1>
11 |     <col2>59 Hockey Lane</col2>
12 |     <col3>999999</col3>
13 |   </row>
14 | </csv>
15 | 


--------------------------------------------------------------------------------
/tests/test6.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA   39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 | 


--------------------------------------------------------------------------------
/tests/test6.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA   39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 | 


--------------------------------------------------------------------------------
/tests/test7.in:
--------------------------------------------------------------------------------
1 | Name With Spaces,#~!@#$%^&*(),"&<>&""\"
2 | aaa,bbb,ccc
3 | 


--------------------------------------------------------------------------------
/tests/test7.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>Name With Spaces</col1>
 5 |     <col2>#~!@#$%^&amp;*()</col2>
 6 |     <col3>&amp;&lt;&gt;&amp;"\</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>aaa</col1>
10 |     <col2>bbb</col2>
11 |     <col3>ccc</col3>
12 |   </row>
13 | </csv>
14 | 


--------------------------------------------------------------------------------
/tests/test7.out2:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <csv>
3 |   <row>
4 |     <Name_With_Spaces>aaa</Name_With_Spaces>
5 |     <____________>bbb</____________>
6 |     <______>ccc</______>
7 |   </row>
8 | </csv>
9 | 


--------------------------------------------------------------------------------
/tests/test7.out3a:
--------------------------------------------------------------------------------
1 | ["Name With Spaces","#~!@#$%^&*()","&<>&\"\\"]
2 | ["aaa","bbb","ccc"]
3 | 


--------------------------------------------------------------------------------
/tests/test7.out3b:
--------------------------------------------------------------------------------
1 | {"Name With Spaces":"aaa","#~!@#$%^&*()":"bbb","&<>&\"\\":"ccc"}
2 | 


--------------------------------------------------------------------------------
/tests/test7.out4:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <csv>
3 |   <row>
4 |     <col1>aaa</col1>
5 |     <col2>bbb</col2>
6 |     <col3>ccc</col3>
7 |   </row>
8 | </csv>
9 | 


--------------------------------------------------------------------------------
/tests/test7.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'Name With Spaces' '#~!@#$%^&*()' '&<>&"\' )
2 | ROW=( 'aaa' 'bbb' 'ccc' )
3 | 


--------------------------------------------------------------------------------
/tests/test7.out5b:
--------------------------------------------------------------------------------
1 | Name_With_Spaces='aaa'; ____________='bbb'; ______='ccc';
2 | 


--------------------------------------------------------------------------------
/tests/test8.in:
--------------------------------------------------------------------------------
1 | ColA,ColB,ColC
2 | aaa,bbb,ccc
3 | aaa,bbb
4 | aaa,bbb,ccc,ddd
5 | 


--------------------------------------------------------------------------------
/tests/test8.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>ColA</col1>
 5 |     <col2>ColB</col2>
 6 |     <col3>ColC</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>aaa</col1>
10 |     <col2>bbb</col2>
11 |     <col3>ccc</col3>
12 |   </row>
13 |   <row>
14 |     <col1>aaa</col1>
15 |     <col2>bbb</col2>
16 |   </row>
17 |   <row>
18 |     <col1>aaa</col1>
19 |     <col2>bbb</col2>
20 |     <col3>ccc</col3>
21 |     <col4>ddd</col4>
22 |   </row>
23 | </csv>
24 | 


--------------------------------------------------------------------------------
/tests/test8.out2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <ColA>aaa</ColA>
 5 |     <ColB>bbb</ColB>
 6 |     <ColC>ccc</ColC>
 7 |   </row>
 8 |   <row>
 9 |     <ColA>aaa</ColA>
10 |     <ColB>bbb</ColB>
11 |   </row>
12 |   <row>
13 |     <ColA>aaa</ColA>
14 |     <ColB>bbb</ColB>
15 |     <ColC>ccc</ColC>
16 |     <col4>ddd</col4>
17 |   </row>
18 | </csv>
19 | 


--------------------------------------------------------------------------------
/tests/test8.out3a:
--------------------------------------------------------------------------------
1 | ["ColA","ColB","ColC"]
2 | ["aaa","bbb","ccc"]
3 | ["aaa","bbb"]
4 | ["aaa","bbb","ccc","ddd"]
5 | 


--------------------------------------------------------------------------------
/tests/test8.out3b:
--------------------------------------------------------------------------------
1 | {"ColA":"aaa","ColB":"bbb","ColC":"ccc"}
2 | {"ColA":"aaa","ColB":"bbb"}
3 | {"ColA":"aaa","ColB":"bbb","ColC":"ccc","col4":"ddd"}
4 | 


--------------------------------------------------------------------------------
/tests/test8.out4:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>aaa</col1>
 5 |     <col2>bbb</col2>
 6 |     <col3>ccc</col3>
 7 |   </row>
 8 |   <row>
 9 |     <col1>aaa</col1>
10 |     <col2>bbb</col2>
11 |   </row>
12 |   <row>
13 |     <col1>aaa</col1>
14 |     <col2>bbb</col2>
15 |     <col3>ccc</col3>
16 |     <col4>ddd</col4>
17 |   </row>
18 | </csv>
19 | 


--------------------------------------------------------------------------------
/tests/test8.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'ColA' 'ColB' 'ColC' )
2 | ROW=( 'aaa' 'bbb' 'ccc' )
3 | ROW=( 'aaa' 'bbb' )
4 | ROW=( 'aaa' 'bbb' 'ccc' 'ddd' )
5 | 


--------------------------------------------------------------------------------
/tests/test8.out5b:
--------------------------------------------------------------------------------
1 | ColA='aaa'; ColB='bbb'; ColC='ccc';
2 | ColA='aaa'; ColB='bbb';
3 | ColA='aaa'; ColB='bbb'; ColC='ccc'; col4='ddd';
4 | 


--------------------------------------------------------------------------------
/tests/test9.in:
--------------------------------------------------------------------------------
1 | foo
2 | value1,value2,value3
3 | 


--------------------------------------------------------------------------------
/tests/test9.out1:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <csv>
 3 |   <row>
 4 |     <col1>foo</col1>
 5 |   </row>
 6 |   <row>
 7 |     <col1>value1</col1>
 8 |     <col2>value2</col2>
 9 |     <col3>value3</col3>
10 |   </row>
11 | </csv>
12 | 


--------------------------------------------------------------------------------
/tests/test9.out2:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <csv>
3 |   <row>
4 |     <foo>value1</foo>
5 |     <col2>value2</col2>
6 |     <col3>value3</col3>
7 |   </row>
8 | </csv>
9 | 


--------------------------------------------------------------------------------
/tests/test9.out3a:
--------------------------------------------------------------------------------
1 | ["foo"]
2 | ["value1","value2","value3"]
3 | 


--------------------------------------------------------------------------------
/tests/test9.out3b:
--------------------------------------------------------------------------------
1 | {"foo":"value1","col2":"value2","col3":"value3"}
2 | 


--------------------------------------------------------------------------------
/tests/test9.out4:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <csv>
3 |   <row>
4 |     <col1>value1</col1>
5 |     <col2>value2</col2>
6 |     <col3>value3</col3>
7 |   </row>
8 | </csv>
9 | 


--------------------------------------------------------------------------------
/tests/test9.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'foo' )
2 | ROW=( 'value1' 'value2' 'value3' )
3 | 


--------------------------------------------------------------------------------
/tests/test9.out5b:
--------------------------------------------------------------------------------
1 | foo='value1'; col2='value2'; col3='value3';
2 | 


--------------------------------------------------------------------------------
/xml2csv.in:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Set constants and defaults
 4 | NAME="xml2csv"
 5 | XSLTPROC="@XSLTPROC@"
 6 | CSVXSL="@pkgdatadir@/csv.xsl"
 7 | 
 8 | # Usage message
 9 | usage()
10 | {
11 |     echo "Usage:" 1>&2
12 |     echo "    ${NAME} [input.xml]" 1>&2
13 |     echo "Options:" 1>&2
14 |     echo "    -h    Show this help message and exit" 1>&2
15 | }
16 | 
17 | # Log functions
18 | log()
19 | {
20 |     echo ${NAME}: ${1+"$@"} 1>&2
21 | }
22 | 
23 | # Error function
24 | errout()
25 | {
26 |     log ${1+"$@"}
27 |     exit 1
28 | }
29 | 
30 | # Bail on errors
31 | set -e
32 | 
33 | # Parse flags passed in on the command line
34 | while [ ${#} -gt 0 ]; do
35 |     case "$1" in
36 |         -h|--help)
37 |             usage
38 |             exit
39 |             ;;
40 |         --)
41 |             shift
42 |             break
43 |             ;;
44 |         -*)
45 |             echo "${NAME}: unrecognized flag \`${1}'" 1>&2
46 |             usage
47 |             exit 1
48 |             ;;
49 |         *)
50 |             break
51 |             ;;
52 |     esac
53 | done
54 | case "${#}" in
55 |     0)
56 |         INPUT_FILE="-"
57 |         ;;
58 |     1)
59 |         INPUT_FILE="${1}"
60 |         ;;
61 |     *)
62 |         usage
63 |         exit 1
64 |         ;;
65 | esac
66 | 
67 | # Run
68 | exec "${XSLTPROC}" "${CSVXSL}" "${INPUT_FILE}"
69 | 
70 | 


--------------------------------------------------------------------------------