├── .gitignore
├── CHANGES
├── COPYING
├── INSTALL
├── Makefile.am
├── README
├── README.md
├── autogen.sh
├── cleanup.sh
├── configure.ac
├── csv.xsl
├── csvprintf.1.in
├── csvprintf.h
├── main.c
├── tests
├── run.sh
├── run2.sh
├── test-bash-omit1.tst
├── test-bash-prefix1.tst
├── test-bash-prefix2.tst
├── test-bash-quote.tst
├── test-cflag-not-found.tst
├── test-cflag-xml.tst
├── test-json-skip1.tst
├── test-tab-noskip.tst
├── test1.in
├── test1.out1
├── test1.out2
├── test1.out3a
├── test1.out3b
├── test1.out4
├── test1.out5a
├── test1.out5b
├── test2.in
├── test2.out1
├── test2.out2
├── test2.out3a
├── test2.out3b
├── test2.out4
├── test2.out5a
├── test2.out5b
├── test3.in
├── test3.out1
├── test3.out2
├── test3.out3a
├── test3.out3b
├── test3.out4
├── test3.out5a
├── test3.out5b
├── test4.in
├── test4.out1
├── test4.out2
├── test4.out3a
├── test4.out3b
├── test4.out4
├── test4.out5a
├── test4.out5b
├── test5.in
├── test5.out1
├── test5.out2
├── test5.out3a
├── test5.out3b
├── test5.out4
├── test5.out5a
├── test5.out5b
├── test6.in
├── test6.out1
├── test6.out2
├── test6.out3a
├── test6.out3b
├── test6.out4
├── test6.out5a
├── test6.out5b
├── test7.in
├── test7.out1
├── test7.out2
├── test7.out3a
├── test7.out3b
├── test7.out4
├── test7.out5a
├── test7.out5b
├── test8.in
├── test8.out1
├── test8.out2
├── test8.out3a
├── test8.out3b
├── test8.out4
├── test8.out5a
├── test8.out5b
├── test9.in
├── test9.out1
├── test9.out2
├── test9.out3a
├── test9.out3b
├── test9.out4
├── test9.out5a
└── test9.out5b
└── xml2csv.in
/.gitignore:
--------------------------------------------------------------------------------
1 | aclocal.m4
2 | autom4te.cache
3 | config.h
4 | config.h.in
5 | config.log
6 | config.status
7 | configure
8 | csvprintf
9 | csvprintf.1
10 | .deps
11 | gitrev.c
12 | Makefile
13 | Makefile.in
14 | *.o
15 | scripts
16 | stamp-h1
17 | xml2csv
18 |
--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
1 | Version Next
2 |
3 | - Fixed bug where \t separator was being skipped as whitespace
4 | - Allow duplicate column names if the "-c" flag avoids them
5 |
6 | Version 1.3.2 released January 25, 2023
7 |
8 | - Fixed bug where we could emit empty XML tag names
9 | - Fixed bug in man page examples for "-b" flag
10 |
11 | Version 1.3.1 released December 14, 2021
12 |
13 | - Added "-c" flag for explicit column names
14 | - Added "-n" flag that only reads column names
15 | - Added "-p" flag for prefixing names
16 | - Omit special variable names in Bash mode
17 | - Fixed build error on systems without 'u_char' defined
18 |
19 | Version 1.3.0 released December 9, 2021
20 |
21 | - Added "-b" flag for new Bash output mode
22 |
23 | Version 1.2.1 released November 24, 2021
24 |
25 | - Fixed bug where "-x" flag was behaving like "-X"
26 |
27 | Version 1.2.0 released November 22, 2021
28 |
29 | - Added "-j" flag for JSON text sequence document output.
30 | - Stopped escaping double quote as """ in plain XML text.
31 |
32 | Version 1.1.0 released February 25, 2021
33 |
34 | - Added support for format strings containing column names
35 |
36 | Version 1.0.4 released August 1, 2018
37 |
38 | - Fixed "unexpected character" bug when line ends with QUOTE, CR
39 | - Added "-X" flag to derive XML tag names from column headers
40 |
41 | Version 1.0.3 (r32) released January 5, 2013
42 |
43 | - Add support for converting XML back to CSV
44 | - Add `-e' flag to set input character encoding
45 | - Escape CR characters in XML output
46 | - Fixed glitches in man page
47 |
48 | Version 1.0.2 (r25) released August 25, 2012
49 |
50 | - Allow backslash escapes for `-s' and `-q' flags
51 | - Accept files that lack a terminating newline
52 | - Accept CR, LF, or CR-LF line endings
53 |
54 | Version 1.0.1 (r17) released March 9, 2012
55 |
56 | - Fix bug where `-s' flag did not function (Issue #1)
57 | - Document '%0$' specifier in man page
58 |
59 | Version 1.0 (r4) released November 30, 2010
60 |
61 | - Initial release
62 |
63 |
--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
1 |
2 | Simplified instructions:
3 |
4 | 1. ./configure
5 | 2. make
6 | 3. sudo make install
7 |
8 | Please see
9 |
10 | https://github.com/archiecobbs/csvprintf
11 |
12 | for more information.
13 |
14 |
--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | #
3 | # csvprintf - Simple CSV file parser for the UNIX command line
4 | #
5 |
6 | bin_PROGRAMS= csvprintf
7 |
8 | bin_SCRIPTS= xml2csv
9 |
10 | noinst_HEADERS= csvprintf.h
11 |
12 | man_MANS= csvprintf.1
13 |
14 | pkgdata_DATA= csv.xsl
15 |
16 | docdir= $(datadir)/doc/packages/$(PACKAGE)
17 |
18 | doc_DATA= CHANGES COPYING README
19 |
20 | EXTRA_DIST= CHANGES INSTALL csvprintf.1.in xml2csv.in csv.xsl
21 |
22 | csvprintf_SOURCES= main.c \
23 | gitrev.c
24 |
25 | DISTCLEANFILES= csvprintf.1 xml2csv
26 |
27 | SUFFIXES= .in
28 | .in:
29 | rm -f $@; $(subst) < $< >$@
30 |
31 | .PHONY: tests
32 | tests: csvprintf
33 | @echo '************'
34 | @echo 'TEST SUITE 1'
35 | @echo '************'
36 | @cd tests && ./run.sh
37 | @echo '************'
38 | @echo 'TEST SUITE 2'
39 | @echo '************'
40 | @cd tests && ./run2.sh
41 |
42 | subst= sed \
43 | -e 's|@PACKAGE[@]|$(PACKAGE)|g' \
44 | -e 's|@PACKAGE_VERSION[@]|$(PACKAGE_VERSION)|g' \
45 | -e 's|@pkgdatadir[@]|$(pkgdatadir)|g' \
46 | -e 's|@XSLTPROC[@]|$(XSLTPROC)|g'
47 |
48 | install-data-hook:
49 | ln "$(DESTDIR)$(man1dir)"/csvprintf.1 "$(DESTDIR)$(man1dir)"/xml2csv.1
50 |
51 | uninstall-hook:
52 | rm -f "$(DESTDIR)$(man1dir)"/xml2csv.1
53 |
54 | gitrev.c:
55 | printf 'const char *const csvprintf_version = "%s";\n' "`git describe`" > gitrev.c
56 |
57 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | csvprintf is a simple UNIX command line utility for parsing CSV files.
2 |
3 | cvsprintf works just like the printf(1) command line utility. You
4 | supply a printf(1) format string on the command line and each record
5 | in the CSV file is formatted accordingly. Each format specifier in
6 | the format string contains a column accessor to specify which CSV
7 | column to use, so for example '%3$d' would format the third column
8 | as a decimal value.
9 |
10 | csvprintf can also convert CSV files into XML and JSON documents
11 | and Bash variable assignments suitable for eval(1).
12 |
13 | See INSTALL for installation instructions.
14 |
15 | See COPYING for license.
16 |
17 | See CHANGES for change history.
18 |
19 | Enjoy!
20 |
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | **csvprintf** is a simple UNIX command line utility for parsing CSV files.
2 |
3 | **cvsprintf** works just like the `printf(1)` command line utility. You supply a `printf(1)` format string on the command line and each record in the CSV file is formatted accordingly. Each format specifier in the format string contains a column accessor to specify which CSV column to use, so for example `%3$d` would format the third column as a decimal value.
4 |
5 | **csvprintf** can also convert CSV files into XML, JSON, and `bash(1)` variable assignments.
6 |
7 | You can view the [ManPage](https://github.com/archiecobbs/csvprintf/wiki/ManPage) online.
8 |
9 | ### Examples
10 |
11 | Given this input file `input.csv`:
12 |
13 | ```
14 | NAME,ADDRESS,POINTS
15 | Fred Smith,"1234 Main St.
16 | Anytown, USA 39103",123.4567
17 | "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999
18 | ```
19 |
20 | here is the resulting output:
21 |
22 | ```
23 | $ cat input.csv | csvprintf -i 'Name: [%1$-24.24s]\nAddress: [%2$-12.12s]\nPoints: %3$.2f\n'
24 | Name: [Fred Smith ]
25 | Address: [1234 Main St]
26 | Points: 123.46
27 | Name: [Wayne "The Great One" Gr]
28 | Address: [59 Hockey La]
29 | Points: 999999.00
30 | ```
31 |
32 | An example of the XML output:
33 |
34 | ```
35 | $ cat input.csv | csvprintf -iX
36 |
37 |
38 |
39 | Fred Smith
40 | 1234 Main St.
41 | Anytown, USA 39103
42 | 123.4567
43 |
44 |
45 | Wayne "The Great One" Gretsky
46 | 59 Hockey Lane
47 | 999999
48 |
49 |
50 | ```
51 |
--------------------------------------------------------------------------------
/autogen.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # Script to regenerate all the GNU auto* gunk.
5 | # Run this from the top directory of the source tree.
6 | #
7 | # If it looks like I don't know what I'm doing here, you're right.
8 | #
9 |
10 | set -e
11 |
12 | . ./cleanup.sh
13 | if [ "${1}" = '-C' ]; then
14 | exit 0
15 | fi
16 | mkdir -p scripts
17 |
18 | ACLOCAL="aclocal"
19 | AUTOHEADER="autoheader"
20 | AUTOMAKE="automake"
21 | AUTOCONF="autoconf"
22 |
23 | echo "running aclocal"
24 | ${ACLOCAL} ${ACLOCAL_ARGS} -I scripts
25 |
26 | echo "running autoheader"
27 | ${AUTOHEADER}
28 |
29 | echo "running automake"
30 | ${AUTOMAKE} --add-missing -c --foreign
31 |
32 | echo "running autoconf"
33 | ${AUTOCONF} -f -i
34 |
35 | if [ "${1}" = '-c' ]; then
36 | echo "running configure"
37 | ./configure
38 | fi
39 |
40 |
--------------------------------------------------------------------------------
/cleanup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #
4 | # Script to clean out generated GNU auto* gunk.
5 | #
6 |
7 | set -e
8 |
9 | echo "cleaning up"
10 | rm -rf autom4te*.cache scripts aclocal.m4 configure config.log config.status .deps stamp-h1 a.out.dSYM
11 | rm -f config.h.in config.h.in~ config.h
12 | rm -f scripts
13 | find . \( -name Makefile -o -name Makefile.in \) -print0 | xargs -0 rm -f
14 | rm -f gitrev.c
15 | rm -f *.o csvprintf
16 | rm -f csvprintf-?.?.?.tar.gz
17 | rm -f csvprintf.1
18 | rm -f xml2csv
19 |
--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
1 | #
2 | # csvprintf - Simple CSV file parser for the UNIX command line
3 | #
4 | # Copyright 2010 Archie L. Cobbs
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you may
7 | # not use this file except in compliance with the License. You may obtain
8 | # a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15 | # License for the specific language governing permissions and limitations
16 | # under the License.
17 | #
18 |
19 | AC_INIT([csvprintf - Simple CSV file parser for the UNIX command line],[1.3.2],[https://github.com/archiecobbs/csvprintf],[csvprintf])
20 | AC_CONFIG_AUX_DIR(scripts)
21 | AM_INIT_AUTOMAKE
22 | dnl AM_MAINTAINER_MODE
23 | AC_PREREQ([2.69])
24 | AC_REVISION($Id$)
25 | AC_PREFIX_DEFAULT(/usr)
26 | AC_PROG_MAKE_SET
27 |
28 | [CFLAGS="-g -O3 -pipe -Wall -Waggregate-return -Wcast-align -Wchar-subscripts -Wcomment -Wformat -Wimplicit -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-long-long -Wparentheses -Wpointer-arith -Wredundant-decls -Wreturn-type -Wswitch -Wtrigraphs -Wuninitialized -Wunused -Wwrite-strings -Wshadow -Wstrict-prototypes -Wcast-qual $CFLAGS"]
29 | AC_SUBST(CFLAGS)
30 |
31 | # Compile flags for Linux
32 | AC_DEFINE(_DEFAULT_SOURCE, 1, GNU functions)
33 | AC_DEFINE(_GNU_SOURCE, 1, GNU functions)
34 | AC_DEFINE(_BSD_SOURCE, 1, BSD functions)
35 | AC_DEFINE(_XOPEN_SOURCE, 500, XOpen functions)
36 |
37 | # Compile flags for Mac OS
38 | AC_DEFINE(_DARWIN_C_SOURCE, 1, MacOS functions)
39 |
40 | # Check for required programs
41 | AC_PROG_INSTALL
42 | AC_PROG_CC
43 | AC_PATH_PROG([PRINTF], [printf])
44 | if test -z "${PRINTF}"; then
45 | AC_MSG_ERROR([printf not found]);
46 | fi
47 | AC_PATH_PROG([XSLTPROC], [xsltproc])
48 | if test -z "${XSLTPROC}"; then
49 | AC_MSG_ERROR([xsltproc not found]);
50 | fi
51 |
52 | # Add PRINTF def
53 | [CFLAGS="$CFLAGS -DPRINTF_PROGRAM=\\\""${PRINTF}"\\\""]
54 |
55 | # Check for required libc functions
56 | AC_SEARCH_LIBS([iconv_open], [iconv],,
57 | [if test `uname -o` = 'Cygwin' -a -f /usr/lib/libiconv.a; then LIBS="-liconv ${LIBS}"; else AC_MSG_ERROR([required function iconv_open missing]); fi])
58 |
59 | # Check for required header files
60 | AC_CHECK_HEADERS(sys/wait.h assert.h ctype.h err.h errno.h stddef.h stdint.h stdio.h stdlib.h string.h unistd.h, [],
61 | [AC_MSG_ERROR([required header file '$ac_header' missing])])
62 |
63 | # Optional features
64 | AC_ARG_ENABLE(assertions,
65 | AS_HELP_STRING([--enable-assertions],
66 | [enable debugging sanity checks (default NO)]),
67 | [test x"$enableval" = "xyes" || AC_DEFINE(NDEBUG, 1, [disable assertions])],
68 | [AC_DEFINE(NDEBUG, 1, [disable assertions])])
69 | AC_ARG_ENABLE(gprof,
70 | AS_HELP_STRING([--enable-gprof],
71 | [Compile and link with gprof(1) support (default NO)]),
72 | [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -pg"])
73 | AC_ARG_ENABLE(Werror,
74 | AS_HELP_STRING([--enable-Werror],
75 | [enable compilation with -Werror flag (default NO)]),
76 | [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -Werror"])
77 |
78 | # Generated files
79 | AC_CONFIG_FILES(Makefile)
80 | AC_CONFIG_HEADERS(config.h)
81 |
82 | # Go
83 | AC_OUTPUT
84 |
--------------------------------------------------------------------------------
/csv.xsl:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/csvprintf.1.in:
--------------------------------------------------------------------------------
1 | .\" -*- nroff -*-
2 | .\"
3 | .\" csvprintf - Simple CSV file parser for the UNIX command line
4 | .\"
5 | .\" Copyright 2010 Archie L. Cobbs
6 | .\"
7 | .\" Licensed under the Apache License, Version 2.0 (the "License"); you may
8 | .\" not use this file except in compliance with the License. You may obtain
9 | .\" a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
10 | .\"
11 | .\" Unless required by applicable law or agreed to in writing, software
12 | .\" distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 | .\" WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 | .\" License for the specific language governing permissions and limitations
15 | .\" under the License.
16 | .\"
17 | .Dd November 30, 2010
18 | .Dt CSVPRINTF 1
19 | .Os
20 | .Sh NAME
21 | .Nm csvprintf
22 | .Nd CSV file parser
23 | .Sh SYNOPSIS
24 | .Nm csvprintf
25 | .Bk -words
26 | .Op Ar options
27 | .Ar format
28 | .Ek
29 | .Pp
30 | .Nm csvprintf
31 | .Bk -words
32 | .Fl b
33 | .Op Ar options
34 | .Ek
35 | .Pp
36 | .Nm csvprintf
37 | .Bk -words
38 | .Fl j
39 | .Op Ar options
40 | .Ek
41 | .Pp
42 | .Nm csvprintf
43 | .Bk -words
44 | .Fl x
45 | .Op Ar options
46 | .Ek
47 | .Pp
48 | .Nm csvprintf
49 | .Bk -words
50 | .Fl X
51 | .Op Ar options
52 | .Ek
53 | .Pp
54 | .Nm xml2csv
55 | .Bk -words
56 | .Op Ar file.xml
57 | .Ek
58 | .Sh DESCRIPTION
59 | .Nm
60 | is a simple UNIX command line utility for parsing CSV files.
61 | .Pp
62 | In the first form,
63 | .Nm
64 | works like the
65 | .Xr printf 1
66 | command line utility: you supply a
67 | .Xr printf 1
68 | format string on the command line, and each row of the CSV file is split into arguments and formatted accordingly.
69 | .Pp
70 | The format specifiers in the format string contain numeric or symbolic column accessors to specify which CSV column to format.
71 | .Pp
72 | A numeric column accessor is a sequence of decimal digits followed by the
73 | .Pa $
74 | character (the same accessor format supported by
75 | .Xr printf 1 ) .
76 | So for example,
77 | .Pa \(dq%3$d\(dq
78 | would format the third CSV column as a decimal value.
79 | In addition, the
80 | .Pa \(dq%0$d\(dq
81 | specifier will print the number of columns in the record.
82 | .Pp
83 | When the
84 | .Fl n
85 | flag is given, the first row is assumed to contain column names and is not output.
86 | This allows symbolic, instead of numeric, column accessors to be used.
87 | A symbolic column accessor is the column name enclosed in curly braces.
88 | .Pp
89 | For example, if the first row is
90 | .Pa FirstName,Lastname,IdNum
91 | then the format string
92 | .Pa \(dq%{IdNum}04d: %{LastName}s, %{FirstName}s\(dq
93 | would be equivalent to the format string
94 | .Pa \(dq%3$04d: %2$s, %1$s\(dq .
95 | .Pp
96 | Specifying a column name that does not appear in the first row generates an error,
97 | so the use of symbolic column accessors adds an extra consistency check.
98 | .Sh XML Mode
99 | With
100 | .Fl x ,
101 | the entire file is converted into an XML document.
102 | .Pp
103 | The document element is
104 | .Ar "" .
105 | .Pp
106 | Each CSV row becomes a
107 | .Ar ""
108 | element containing its individual column values as sub-elements.
109 | .Pp
110 | The column value sub-elements are named
111 | .Ar "" ,
112 | .Ar "" ,
113 | etc.;
114 | with
115 | .Fl i ,
116 | the sub-elements use the column names read from the first row (with illegal characters replaced by underscores).
117 | .Pp
118 | In XML mode, a character encoding must be assumed; see
119 | .Fl e .
120 | .Pp
121 | The
122 | .Nm xml2csv
123 | command can convert XML documents generated by
124 | .Nm "csvprintf -x"
125 | back into CSV.
126 | .Sh JSON Mode
127 | With
128 | .Fl j ,
129 | each row is converted into a JSON document.
130 | .Pp
131 | This form is described by RFC 7464 and consists of concatenated JSON documents
132 | framed by ASCII RS and LF control characters, which is compatible with the
133 | .Xr jq 1
134 | utility's
135 | .Fl \-seq
136 | flag.
137 | .Pp
138 | Normally each row is written as a string array;
139 | with
140 | .Fl i ,
141 | each row is written as an object, using column names for fields.
142 | An error occurs if two columns have the same name.
143 | .Pp
144 | In JSON mode, a character encoding must be assumed; see
145 | .Fl e .
146 | .Sh Bash Mode
147 | With
148 | .Fl b ,
149 | each row is converted into
150 | .Xr bash 1
151 | variable assignment(s) which may be applied with the
152 | .Xr eval 1
153 | command.
154 | .Pp
155 | Normally the output just assigns
156 | .Ar ROW
157 | as an array of values.
158 | The resulting output can be used like this:
159 | .Bd -literal -offset indent
160 | cat input.csv | csvprintf -b | while read -r LINE; do
161 | eval "${LINE}"
162 | echo "The first column is: ${ROW[0]}"
163 | echo "The second column is: ${ROW[1]}"
164 | ...
165 | done
166 | .Ed
167 | .Pp
168 | With
169 | .Fl i ,
170 | each column value is assigned to a separate variable whose name is the corresponding column name
171 | (with underscores replacing non-alphanumeric characters), and an error occurs if two variables have the same name.
172 | .Pp
173 | So an input file like this:
174 | .Bd -literal -offset indent
175 | "Last Name","First Name","Registered???"
176 | "Washington","George","Y"
177 | "Lincoln","Abe","N"
178 | .Ed
179 | .Pp
180 | can be processed like this:
181 | .Bd -literal -offset indent
182 | cat input.csv | csvprintf -bi -p ROW_ | while read -r LINE; do
183 | eval "${LINE}"
184 | echo "First name: ${ROW_First_Name}"
185 | echo "Last name: ${ROW_Last_Name}"
186 | echo "Registered: ${ROW_Registered___}"
187 | done
188 | .Ed
189 | .Sh Bash Mode Security Concerns
190 | There are two security issues to be aware of when using Bash Mode.
191 | .Pp
192 | First, the
193 | .Fl i
194 | flag opens a security hole because Bash has several special variables like
195 | .Ar PATH ,
196 | .Ar TMPDIR ,
197 | etc., which could be overwritten by malicious input.
198 | To prevent this,
199 | .Nm
200 | omits known Bash variables, but for tighter security use the
201 | .Fl c
202 | flag to explicitly white-list the variables you need.
203 | In addition, use of the
204 | .Fl p
205 | flag is always recommended in Bash Mode to help avoid namespace collisions.
206 | .Pp
207 | Secondly, if the Bash Mode output is piped into
208 | .Ar "while read"
209 | then the
210 | .Fl r
211 | flag must be used to prevent extraneous decoding of backslash escapes.
212 | .Sh Input Encoding
213 | In all modes, lines must be terminated by LF bytes or CR+LF byte pairs, and the separator and quote characters must be recognizable as single byte values.
214 | This parsing behavior is compatible with ASCII, ISO-8859-1, UTF-8, etc., but not multi-byte encodings such as UTF-16, which must be re-encoded (e.g., to UTF-8) first.
215 | .Pp
216 | In normal and Bash modes, column values are copied from input to output bytewise without interpretation.
217 | .Pp
218 | In XML and JSON modes, column values must be interpreted according to an assumed character encoding.
219 | This encoding defaults to ISO-8859-1 but can be changed with the
220 | .Fl e
221 | flag.
222 | .Sh OPTIONS
223 | .Bl -tag -width Ds
224 | .It Fl b
225 | Convert each CSV row into a
226 | .Xr bash 1
227 | variable assignment line.
228 | .It Fl c Ar colname
229 | Specify a column to be included when using column names in XML, JSON, or Bash output.
230 | .Pp
231 | Without this flag, all columns are included.
232 | When this flag is used one or more times,
233 | only the specified columns are included.
234 | .Pp
235 | If any
236 | .Ar colname
237 | doesn't exist, an error occurs.
238 | .It Fl e
239 | Specify input character encoding for XML or JSON mode.
240 | .Pp
241 | By default, ISO-8859-1 is assumed.
242 | .It Fl f
243 | Read CSV input from the specified file.
244 | .Pp
245 | By default (or if ``-'' is specified),
246 | .Nm
247 | reads from standard input.
248 | .It Fl i
249 | Use column names read from the first record in the output.
250 | .Pp
251 | In normal mode, or when used with the
252 | .Fl x
253 | flag, this flag is equivalent to
254 | .Fl n .
255 | .Pp
256 | In JSON mode, output objects instead of arrays and use column names for the object fields.
257 | .Pp
258 | In Bash mode, output a variable for each column instead of a single
259 | .Ar ROW
260 | array variable.
261 | .Pp
262 | It's possible for a row to have more columns than the column header row did.
263 | In that case,
264 | .Nm
265 | reverts to using
266 | .Ar col1 ,
267 | .Ar col2 ,
268 | etc., for any extra columns.
269 | .Pp
270 | This flag implies
271 | .Fl n .
272 | .It Fl j
273 | Convert the input into a JavaScript Object Notation (JSON) text sequence document.
274 | .It Fl n
275 | Assume the first CSV record contains column names and omit from the output.
276 | .Pp
277 | In normal mode, enable symbolic column accessors.
278 | .It Fl p
279 | Specify a common prefix (UTF-8 encoding) to use with all column names in the output.
280 | .Pp
281 | This flag is ignored unless
282 | .Fl i
283 | is specified.
284 | .Pp
285 | .It Fl q
286 | Specify an alternate CSV column quote character.
287 | The usual backslash escape sequences are accepted.
288 | .Pp
289 | The default quote character is double quote.
290 | .It Fl s
291 | Specify an alternate CSV column separator character.
292 | The usual backslash escape sequences are accepted.
293 | .Pp
294 | The default separator character is comma.
295 | .It Fl h
296 | Output usage message and exit.
297 | .It Fl v
298 | Output version information and exit.
299 | .It Fl x
300 | Convert the input into an XML document.
301 | .It Fl X
302 | Convert the input into an XML document using column names for value sub-elements.
303 | .Pp
304 | This flag implies
305 | .Fl n .
306 | .El
307 | .Sh CSV FORMAT
308 | .Nm
309 | parses according to the format described by ``The Comma Separated Value (CSV) File Format'' (see below).
310 | In particular, quote characters must be escaped with an extra quote and whitespace surrounding column values is ignored.
311 | .Sh EXIT STATUS
312 | .Nm
313 | will exit with a status 1 if invalid CSV input is detected.
314 | Otherwise, if an invocation of
315 | .Xr printf 1
316 | fails, processing stops and that exit value is returned.
317 | .Sh FILES
318 | .Bl -tag -width Ds -compact
319 | .It Pa @pkgdatadir@/csv.xsl
320 | XSL transform that converts XML back into CSV format.
321 | .El
322 | .Sh BUGS
323 | .Pp
324 | Under the hood,
325 | .Nm
326 | invokes the
327 | .Xr printf 1
328 | executable on each CSV row it parses, which makes it relatively slow.
329 | .Sh SEE ALSO
330 | .Xr printf 1 ,
331 | .Xr printf 3 ,
332 | .Xr jq 1 .
333 | .Rs
334 | .%T "csvprintf: Simple CSV file parser for the UNIX command line"
335 | .%O https://github.com/archiecobbs/csvprintf
336 | .Re
337 | .Rs
338 | .%T "The Comma Separated Value (CSV) File Format"
339 | .%O http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
340 | .Re
341 | .Rs
342 | .%T "RFC 7464: JavaScript Object Notation (JSON) Text Sequences"
343 | .%O https://datatracker.ietf.org/doc/html/rfc7464
344 | .Re
345 | .Sh AUTHOR
346 | .An Archie L. Cobbs Aq archie.cobbs@gmail.com
347 |
--------------------------------------------------------------------------------
/csvprintf.h:
--------------------------------------------------------------------------------
1 |
2 | //
3 | // csvprintf - Simple CSV file parser for the UNIX command line
4 | //
5 | // Copyright 2010 Archie L. Cobbs
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may
8 | // not use this file except in compliance with the License. You may obtain
9 | // a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // Unless required by applicable law or agreed to in writing, software
14 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16 | // License for the specific language governing permissions and limitations
17 | // under the License.
18 | //
19 |
20 | #include "config.h"
21 |
22 | extern const char *const csvprintf_version;
23 |
24 |
--------------------------------------------------------------------------------
/main.c:
--------------------------------------------------------------------------------
1 |
2 | //
3 | // csvprintf - Simple CSV file parser for the UNIX command line
4 | //
5 | // Copyright 2010 Archie L. Cobbs
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License"); you may
8 | // not use this file except in compliance with the License. You may obtain
9 | // a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 | // License for the specific language governing permissions and limitations
15 | // under the License.
16 | //
17 |
18 | #include "csvprintf.h"
19 |
20 | #include
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 |
34 | #define DEFAULT_QUOTE_CHAR '"'
35 | #define DEFAULT_FSEP_CHAR ','
36 | #define XML_OUTPUT_ENCODING "UTF-8"
37 |
38 | #define MODE_NORMAL 0 // normal mode
39 | #define MODE_XML_PLAIN 1 // plain XML mode
40 | #define MODE_XML_NAMES 2 // XML mode with names
41 | #define MODE_JSON 3 // JSON mode
42 | #define MODE_BASH 4 // bash mode
43 |
44 | struct col {
45 | char *buf;
46 | size_t len;
47 | size_t alloc;
48 | };
49 |
50 | struct row {
51 | char **fields;
52 | size_t num;
53 | size_t alloc;
54 | };
55 |
56 | static int quote = DEFAULT_QUOTE_CHAR;
57 | static int fsep = DEFAULT_FSEP_CHAR;
58 |
59 | static const char *bash_special_vars[] = {
60 | "BASH", "BASHOPTS", "BASHPID", "BASH_ALIASES", "BASH_ARGC", "BASH_ARGV", "BASH_CMDS", "BASH_COMMAND",
61 | "BASH_EXECUTION_STRING", "BASH_LINENO", "BASH_LOADABLES_PATH", "BASH_REMATCH", "BASH_SOURCE", "BASH_SUBSHELL",
62 | "BASH_VERSINFO", "BASH_VERSION", "COMP_CWORD", "COMP_KEY", "COMP_LINE", "COMP_POINT", "COMP_TYPE", "COMP_WORDBREAKS",
63 | "COMP_WORDS", "COPROC", "DIRSTACK", "EUID", "FUNCNAME", "GROUPS", "HISTCMD", "HOSTNAME", "HOSTTYPE", "LINENO",
64 | "MACHTYPE", "MAPFILE", "OLDPWD", "OPTARG", "OPTIND", "OSTYPE", "PIPESTATUS", "PPID", "PWD", "RANDOM", "READLINE_LINE",
65 | "READLINE_POINT", "REPLY", "SECONDS", "SHELLOPTS", "SHLVL", "UID", "BASH_COMPAT", "BASH_ENV", "BASH_XTRACEFD", "CDPATH",
66 | "CHILD_MAX", "COLUMNS", "COMPREPLY", "EMACS", "ENV", "EXECIGNORE", "FCEDIT", "FIGNORE", "FUNCNEST", "GLOBIGNORE",
67 | "HISTCONTROL", "HISTFILE", "HISTFILESIZE", "HISTIGNORE", "HISTSIZE", "HISTTIMEFORMAT", "HOME", "HOSTFILE", "IFS",
68 | "IGNOREEOF", "INPUTRC", "LANG", "LC_ALL", "LC_COLLATE", "LC_CTYPE", "LC_MESSAGES", "LC_NUMERIC", "LC_TIME", "LINES",
69 | "MAIL", "MAILCHECK", "MAILPATH", "OPTERR", "PATH", "POSIXLY_CORRECT", "PROMPT_COMMAND", "PROMPT_DIRTRIM", "PS0", "PS1",
70 | "PS2", "PS3", "PS4", "SHELL", "TIMEFORMAT", "TMOUT", "TMPDIR", "auto_resume", "histchars"
71 | };
72 | #define NUM_BASH_SPECIAL_VARS (sizeof(bash_special_vars) / sizeof(*bash_special_vars))
73 |
74 | static int parsechar(const char *str);
75 | static int parsefmt(char *fmt, const struct row *column_names, unsigned int **argsp);
76 | static int readcol(FILE *fp, struct row *row, int *linenum);
77 | static int readqcol(FILE *fp, struct col *col, int *linenum);
78 | static int readuqcol(FILE *fp, struct col *col, int *linenum);
79 | static int readch(FILE *fp, int collapse);
80 | static void freerow(struct row *row);
81 | static void print_xml_tag_name(const char *tag, int linenum);
82 | static void print_json_string(const char *string, int linenum);
83 | static void print_bash_name(const char *string);
84 | static void print_bash_value(const char *string);
85 | static char bash_name_safe(char ch, int first);
86 | static int decode_utf8(const char *const obuf, size_t olen, int *lenp, int linenum);
87 | static void convert_to_utf8(iconv_t icd, struct row *row, int linenum);
88 | static const char *escape_xml_char(int uchar);
89 | static char *eatwidthprec(const char *fspec, const char *desc, const struct row *column_names,
90 | char *s, int *nargs, unsigned int *args);
91 | static char *eataccessor(const char *fspec, const char *desc, const struct row *column_names,
92 | char *s, int *nargs, unsigned int *args);
93 | static void addcolumn(struct row *row, const struct col *col);
94 | static void addstring(struct row *row, const char *const string);
95 | static int findstring(struct row *row, const char *const string);
96 | static int findstring2(const char *const *list, size_t num, const char *const string);
97 | static void growrow(struct row *row);
98 | static void addchar(struct col *col, int ch);
99 | static void trim(struct col *col);
100 | static void usage(void);
101 | static void version(void);
102 |
103 | int
104 | main(int argc, char **argv)
105 | {
106 | const char *input = "-";
107 | const char *encoding = "ISO-8859-1";
108 | const char *name_prefix = "";
109 | char *format = NULL;
110 | iconv_t icd = NULL;
111 | FILE *fp = NULL;
112 | struct row row;
113 | struct row column_names;
114 | struct row allowed_column_names;
115 | unsigned int *args = NULL;
116 | int mode = -1;
117 | int read_column_names = 0; // strip off first row containing column names
118 | int use_column_names = 0; // use column names from first row in output
119 | int first_row = 0;
120 | int nargs = 0;
121 | int file_done;
122 | int linenum;
123 | int new_mode;
124 | int ch;
125 |
126 | // Initialize
127 | memset(&row, 0, sizeof(row));
128 | memset(&column_names, 0, sizeof(column_names));
129 | memset(&allowed_column_names, 0, sizeof(allowed_column_names));
130 |
131 | // Parse command line
132 | while ((ch = getopt(argc, argv, "bc:e:f:hijnp:q:s:vxX")) != -1) {
133 | switch (ch) {
134 | case 'b':
135 | if (mode != -1 && mode != MODE_BASH)
136 | errx(1, "flag \"%c\" conflicts with previous mode flag", ch);
137 | mode = MODE_BASH;
138 | break;
139 | case 'c':
140 | addstring(&allowed_column_names, optarg);
141 | break;
142 | case 'e':
143 | encoding = optarg;
144 | break;
145 | case 'f':
146 | input = optarg;
147 | break;
148 | case 'i':
149 | read_column_names = 1;
150 | use_column_names = 1;
151 | break;
152 | case 'n':
153 | read_column_names = 1;
154 | break;
155 | case 'j':
156 | if (mode != -1 && mode != MODE_JSON)
157 | errx(1, "flag \"%c\" conflicts with previous mode flag", ch);
158 | mode = MODE_JSON;
159 | break;
160 | case 'X':
161 | case 'x':
162 | new_mode = ch == 'X' ? MODE_XML_NAMES : MODE_XML_PLAIN;
163 | if (mode != -1 && mode != new_mode)
164 | errx(1, "flag \"%c\" conflicts with previous mode flag", ch);
165 | if ((mode = new_mode) == MODE_XML_NAMES) {
166 | use_column_names = 1;
167 | read_column_names = 1;
168 | }
169 | break;
170 | case 'p':
171 | name_prefix = optarg;
172 | break;
173 | case 'q':
174 | if ((quote = parsechar(optarg)) == -1)
175 | errx(1, "invalid argument to \"-%c\"", ch);
176 | break;
177 | case 's':
178 | if ((fsep = parsechar(optarg)) == -1)
179 | errx(1, "invalid argument to \"-%c\"", ch);
180 | break;
181 | case 'h':
182 | usage();
183 | exit(0);
184 | case 'v':
185 | version();
186 | exit(0);
187 | case '?':
188 | default:
189 | usage();
190 | exit(1);
191 | }
192 | }
193 | if (mode == -1)
194 | mode = MODE_NORMAL;
195 | argc -= optind;
196 | argv += optind;
197 | if (argc != (mode == MODE_NORMAL ? 1 : 0)) {
198 | usage();
199 | exit(1);
200 | }
201 |
202 | // Backward compatbitility hack
203 | if (mode == MODE_XML_PLAIN)
204 | use_column_names = 0;
205 |
206 | // Sanity check
207 | if (quote == fsep)
208 | err(1, "quote and field separators cannot be the same character");
209 | if (allowed_column_names.num > 0 && !read_column_names)
210 | err(1, "\"-c\" flag requires \"-n\" flag");
211 |
212 | // Get and (maybe) parse format string (normal mode only)
213 | if (mode == MODE_NORMAL) {
214 | format = argv[0];
215 |
216 | // Parse format string - unless we need to defer
217 | if (!read_column_names)
218 | nargs = parsefmt(format, NULL, &args);
219 | }
220 |
221 | // Open input
222 | if (strcmp(input, "-") == 0)
223 | fp = stdin;
224 | else if ((fp = fopen(input, "r")) == NULL)
225 | err(1, "%s", input);
226 |
227 | // Initialize iconv
228 | switch (mode) {
229 | case MODE_XML_PLAIN:
230 | case MODE_XML_NAMES:
231 | case MODE_JSON:
232 | if ((icd = iconv_open(XML_OUTPUT_ENCODING, encoding)) == (iconv_t)-1)
233 | err(1, "%s", encoding);
234 | break;
235 | default:
236 | break;
237 | }
238 |
239 | // XML opening
240 | if (mode == MODE_XML_PLAIN || mode == MODE_XML_NAMES) {
241 | printf("\n", XML_OUTPUT_ENCODING);
242 | printf("\n");
243 | }
244 |
245 | // Read and parse input
246 | linenum = 1;
247 | first_row = 1;
248 | for (file_done = 0; !file_done; ) {
249 |
250 | // Start parsing next row
251 | switch ((ch = readch(fp, 1))) {
252 | case EOF:
253 | file_done = 1;
254 | continue;
255 | case '\n': // ignore completely empty lines
256 | linenum++;
257 | continue;
258 | default:
259 | ungetc(ch, fp);
260 | break;
261 | }
262 |
263 | // Read columns
264 | while (readcol(fp, &row, &linenum))
265 | ;
266 |
267 | // Gather column names from first row, if configured
268 | if (first_row && read_column_names) {
269 | int i, j;
270 |
271 | // Convert to UTF-8 if needed
272 | if (icd != NULL)
273 | convert_to_utf8(icd, &row, linenum);
274 |
275 | // Save column names
276 | memcpy(&column_names, &row, sizeof(row));
277 | memset(&row, 0, sizeof(row));
278 |
279 | // If we had to defer parsing format string until we had the column names, do that now
280 | if (mode == MODE_NORMAL)
281 | nargs = parsefmt(format, &column_names, &args);
282 |
283 | // Check that all explicitly specified columns are actually present
284 | for (i = 0; i < allowed_column_names.num; i++) {
285 | if (!findstring(&column_names, allowed_column_names.fields[i]))
286 | errx(1, "column \"%s\" not found", allowed_column_names.fields[i]);
287 | }
288 |
289 | // Check for illegal or duplicate column names
290 | switch (mode) {
291 | case MODE_JSON:
292 | for (i = 0; i < column_names.num - 1; i++) {
293 | if (allowed_column_names.num > 0
294 | && !findstring(&allowed_column_names, column_names.fields[i]))
295 | continue;
296 | for (j = i + 1; j < column_names.num; j++) {
297 | if (strcmp(column_names.fields[i], column_names.fields[j]) == 0)
298 | errx(1, "duplicate column name \"%s\"", column_names.fields[i]);
299 | }
300 | }
301 | break;
302 | case MODE_BASH:
303 | for (i = 0; i < column_names.num; i++) {
304 | char *namei;
305 |
306 | if (allowed_column_names.num > 0
307 | && !findstring(&allowed_column_names, column_names.fields[i]))
308 | continue;
309 | if (asprintf(&namei, "%s%s", name_prefix, column_names.fields[i]) == -1)
310 | err(1, "asprintf");
311 | if (*namei == '\0')
312 | errx(1, "illegal empty string column name");
313 | for (j = i + 1; j < column_names.num; j++) {
314 | char *namej;
315 | int same = 1;
316 | int k;
317 |
318 | if (asprintf(&namej, "%s%s", name_prefix, column_names.fields[j]) == -1)
319 | err(1, "asprintf");
320 | for (k = 0; namei[k] != '\0' || namej[k] != '\0'; k++) {
321 | if (namei[k] == '\0' || namej[k] == '\0'
322 | || bash_name_safe(namei[k], k == 0) != bash_name_safe(namej[k], k == 0)) {
323 | same = 0;
324 | break;
325 | }
326 | }
327 | if (same)
328 | errx(1, "duplicate (bash variable) column names \"%s\" and \"%s\"", namei, namej);
329 | free(namej);
330 | }
331 | free(namei);
332 | }
333 | break;
334 | default:
335 | break;
336 | }
337 |
338 | // Proceed
339 | goto next;
340 | }
341 |
342 | // Handle data row
343 | switch (mode) {
344 | case MODE_JSON:
345 | {
346 | int col;
347 |
348 | // Convert columns to UTF-8
349 | convert_to_utf8(icd, &row, linenum);
350 |
351 | // Output row
352 | printf("\x1e%c", use_column_names ? '{' : '[');
353 | for (col = 0; col < row.num; col++) {
354 |
355 | // Check whether column should be included
356 | if (use_column_names
357 | && allowed_column_names.num > 0
358 | && col < column_names.num
359 | && !findstring(&allowed_column_names, column_names.fields[col]))
360 | continue;
361 |
362 | // Add comma if needed
363 | if (col > 0)
364 | putchar(',');
365 |
366 | // Add column name (if using object notation)
367 | if (use_column_names) {
368 | if (col < column_names.num) {
369 | putchar('"');
370 | print_json_string(name_prefix, linenum);
371 | print_json_string(column_names.fields[col], linenum);
372 | putchar('"');
373 | } else
374 | printf("\"col%d\"", col + 1);
375 | putchar(':');
376 | }
377 |
378 | // Add column value
379 | putchar('"');
380 | print_json_string(row.fields[col], linenum);
381 | putchar('"');
382 | }
383 | printf("%c\n", use_column_names ? '}' : ']');
384 | break;
385 | }
386 | case MODE_XML_PLAIN:
387 | case MODE_XML_NAMES:
388 | {
389 | int col;
390 |
391 | // Convert columns to UTF-8
392 | convert_to_utf8(icd, &row, linenum);
393 |
394 | // Output columns for row
395 | printf(" \n");
396 | for (col = 0; col < row.num; col++) {
397 | const char *ptr = row.fields[col];
398 | int len = strlen(ptr);
399 | int use_column_names_this_tag;
400 | const char *esc;
401 | int uchar;
402 | int uclen;
403 | int i;
404 |
405 | // Check whether column should be included
406 | if (use_column_names
407 | && allowed_column_names.num > 0
408 | && col < column_names.num
409 | && !findstring(&allowed_column_names, column_names.fields[col]))
410 | continue;
411 |
412 | // Determine whether we can actually use column name for XML tag name
413 | use_column_names_this_tag = use_column_names && col < column_names.num
414 | && (*name_prefix != '\0' || *column_names.fields[col] != '\0');
415 |
416 | // Open XML tag
417 | printf(" <");
418 | if (use_column_names_this_tag) {
419 | print_xml_tag_name(name_prefix, linenum);
420 | print_xml_tag_name(column_names.fields[col], linenum);
421 | } else
422 | printf("col%d", col + 1);
423 | printf(">");
424 |
425 | // Output XML characters, escaped as needed
426 | while (len > 0) {
427 | uchar = decode_utf8(ptr, len, &uclen, linenum);
428 | if ((esc = escape_xml_char(uchar)) != NULL)
429 | printf("%s", esc);
430 | else {
431 | for (i = 0; i < uclen; i++)
432 | putchar(ptr[i]);
433 | }
434 | ptr += uclen;
435 | len -= uclen;
436 | }
437 |
438 | // Close XML tag
439 | printf("");
440 | if (use_column_names_this_tag) {
441 | print_xml_tag_name(name_prefix, linenum);
442 | print_xml_tag_name(column_names.fields[col], linenum);
443 | } else
444 | printf("col%d", col + 1);
445 | printf(">\n");
446 | }
447 | printf("
\n");
448 | break;
449 | }
450 | case MODE_BASH:
451 | {
452 | char bash_name_buf[64]; // buffer just needs to be be enough to hold any of the bash_special_vars[]
453 | int col;
454 |
455 | // Start array (if needed)
456 | if (!use_column_names)
457 | printf("ROW=(");
458 |
459 | // Output row
460 | for (col = 0; col < row.num; col++) {
461 |
462 | // Check whether column should be included
463 | if (use_column_names
464 | && allowed_column_names.num > 0
465 | && col < column_names.num
466 | && !findstring(&allowed_column_names, column_names.fields[col]))
467 | continue;
468 |
469 | // Elide any BASH special variable names
470 | if (use_column_names && col < column_names.num) {
471 | snprintf(bash_name_buf, sizeof(bash_name_buf), "%s%s", name_prefix, column_names.fields[col]);
472 | if (findstring2(bash_special_vars, NUM_BASH_SPECIAL_VARS, bash_name_buf))
473 | continue;
474 | }
475 |
476 | // Add space
477 | if (col > 0 || !use_column_names)
478 | putchar(' ');
479 |
480 | // Add column name (if using column names)
481 | if (use_column_names) {
482 | if (col < column_names.num) {
483 | print_bash_name(name_prefix);
484 | print_bash_name(column_names.fields[col]);
485 | } else
486 | printf("col%d", col + 1);
487 | putchar('=');
488 | }
489 |
490 | // Add column value
491 | print_bash_value(row.fields[col]);
492 |
493 | // Add separator
494 | if (use_column_names)
495 | putchar(';');
496 | }
497 |
498 | // End array (if needed)
499 | if (!use_column_names)
500 | printf(" )");
501 |
502 | // End line
503 | printf("\n");
504 | break;
505 | }
506 | case MODE_NORMAL:
507 | {
508 | char ncolbuf[32];
509 | char empty[] = { '\0' };
510 | pid_t pid;
511 | pid_t result;
512 | int status;
513 | int i;
514 |
515 | fflush(stdout);
516 | fflush(stderr);
517 | switch ((pid = fork())) {
518 | case -1:
519 | err(1, "fork");
520 | case 0:
521 | close(0);
522 | if ((argv = malloc((nargs + 3) * sizeof(*argv))) == NULL)
523 | err(1, "malloc");
524 | argv[0] = strdup("printf");
525 | if (argv[0] == NULL)
526 | err(1, "strdup");
527 | argv[1] = format;
528 | snprintf(ncolbuf, sizeof(ncolbuf), "%lu", (unsigned long)row.num);
529 | for (i = 0; i < nargs; i++)
530 | argv[2 + i] = args[i] == 0 ? ncolbuf : args[i] <= row.num ? row.fields[args[i] - 1] : empty;
531 | argv[2 + nargs] = NULL;
532 | execvp(PRINTF_PROGRAM, argv);
533 | err(1, "execvp");
534 | default:
535 | while (1) {
536 | if ((result = waitpid(pid, &status, 0)) == -1)
537 | err(1, "waitpid");
538 | if (WIFEXITED(status)) {
539 | if (WEXITSTATUS(status) != 0)
540 | exit(status);
541 | break;
542 | }
543 | if (WIFSIGNALED(status))
544 | exit(1);
545 | }
546 | break;
547 | }
548 | break;
549 | }
550 | default:
551 | errx(1, "internal error");
552 | }
553 |
554 | next:
555 | // Free row memory
556 | freerow(&row);
557 | first_row = 0;
558 | }
559 |
560 | // XML closing
561 | if (mode == MODE_XML_PLAIN || mode == MODE_XML_NAMES)
562 | printf("\n");
563 |
564 | // Clean up iconv
565 | if (icd != NULL)
566 | (void)iconv_close(icd);
567 |
568 | // Clean up
569 | if (fp != stdin)
570 | fclose(fp);
571 | freerow(&column_names);
572 | free(args);
573 |
574 | // Done
575 | fflush(stdout);
576 | return 0;
577 | }
578 |
579 | // Output XML tag name, substituting invalid characters
580 | static void
581 | print_xml_tag_name(const char *tag, int linenum)
582 | {
583 | int first = 1;
584 | int uchar;
585 | int uclen;
586 | int ok;
587 | int i;
588 |
589 | while (*tag != '\0') {
590 | uchar = decode_utf8(tag, strlen(tag), &uclen, linenum);
591 | if (first) {
592 | ok = isalpha(uchar) || uchar == '_';
593 | first = 0;
594 | } else
595 | ok = isalpha(uchar) || isdigit(uchar) || uchar == '_' || uchar == '-' || uchar == '.';
596 | if (!ok)
597 | putchar('_');
598 | else {
599 | for (i = 0; i < uclen; i++)
600 | putchar(tag[i]);
601 | }
602 | tag += uclen;
603 | }
604 | }
605 |
606 | static const char *
607 | escape_xml_char(int uchar)
608 | {
609 | static char buf[32];
610 |
611 | switch (uchar) {
612 | case '>':
613 | return ">";
614 | break;
615 | case '<':
616 | return "<";
617 | break;
618 | case '&':
619 | return "&";
620 | break;
621 | default:
622 |
623 | // Pass valid and unrestricted characters through (but not CR)
624 | // http://en.wikipedia.org/wiki/Valid_characters_in_XML
625 | if ((uchar == '\n' || uchar == '\t'
626 | || (uchar >= 0x0020 && uchar <= 0xd7ff)
627 | || (uchar >= 0xe000 && uchar <= 0xfffd)
628 | || (uchar >= 0x10000 && uchar <= 0x10ffff))
629 | && !((uchar >= 0x007f && uchar <= 0x0084) || (uchar >= 0x0086 && uchar <= 0x009F)))
630 | return NULL;
631 |
632 | // Escape other characters
633 | snprintf(buf, sizeof(buf), "%u;", uchar);
634 | return buf;
635 | }
636 | }
637 |
638 | static void
639 | print_bash_name(const char *string)
640 | {
641 | int i;
642 |
643 | for (i = 0; string[i] != '\0'; i++)
644 | fputc(bash_name_safe(string[i], i == 0), stdout);
645 | }
646 |
647 | static void
648 | print_bash_value(const char *string)
649 | {
650 | int single_quotes = 1;
651 | int i;
652 |
653 | // See if plain single quotes will work
654 | for (i = 0; string[i] != '\0'; i++) {
655 | if (string[i] == '\'' || !isprint((unsigned char)string[i])) {
656 | single_quotes = 0;
657 | break;
658 | }
659 | }
660 |
661 | // Output value
662 | if (single_quotes)
663 | printf("'%s'", string);
664 | else {
665 | printf("$'");
666 | for (i = 0; string[i] != '\0'; i++) {
667 | switch (string[i]) {
668 | case '\'':
669 | printf("\\'");
670 | break;
671 | case '\\':
672 | printf("\\\\");
673 | break;
674 | case '\b':
675 | printf("\\b");
676 | break;
677 | case '\f':
678 | printf("\\f");
679 | break;
680 | case '\n':
681 | printf("\\n");
682 | break;
683 | case '\r':
684 | printf("\\r");
685 | break;
686 | case '\t':
687 | printf("\\t");
688 | break;
689 | case '\v':
690 | printf("\\v");
691 | break;
692 | default:
693 | if (isprint((unsigned char)string[i]))
694 | putchar((unsigned char)string[i]);
695 | else
696 | printf("\\x%02x", (unsigned char)string[i]);
697 | break;
698 | }
699 | }
700 | putchar('\'');
701 | }
702 | }
703 |
704 | static char
705 | bash_name_safe(char ch, int first)
706 | {
707 | if (isupper((unsigned char)ch) || islower((unsigned char)ch) || ch == '_')
708 | return ch;
709 | if (!first && isdigit((unsigned char)ch))
710 | return ch;
711 | return '_';
712 | }
713 |
714 | // Output JSON string
715 | static void
716 | print_json_string(const char *string, int linenum)
717 | {
718 | int uchar;
719 | int uclen;
720 |
721 | while (*string != '\0') {
722 | uchar = decode_utf8(string, strlen(string), &uclen, linenum);
723 | switch (uchar) {
724 | case '"':
725 | printf("\\\"");
726 | break;
727 | case '\\':
728 | printf("\\\\");
729 | break;
730 | case '\b':
731 | printf("\\b");
732 | break;
733 | case '\f':
734 | printf("\\f");
735 | break;
736 | case '\n':
737 | printf("\\n");
738 | break;
739 | case '\r':
740 | printf("\\r");
741 | break;
742 | case '\t':
743 | printf("\\t");
744 | break;
745 | default:
746 | if (isprint(uchar))
747 | printf("%c", uchar);
748 | else
749 | printf("\\u%04x", uchar);
750 | break;
751 | }
752 | string += uclen;
753 | }
754 | }
755 |
756 | // Convert row columns to UTF-8 encoding
757 | static void
758 | convert_to_utf8(iconv_t icd, struct row *row, int linenum)
759 | {
760 | int col;
761 |
762 | for (col = 0; col < row->num; col++) {
763 | char *const ibuf = row->fields[col];
764 | char *iptr;
765 | char *obuf;
766 | char *optr;
767 | size_t iremain;
768 | size_t oremain;
769 | size_t olen;
770 |
771 | // Convert column
772 | if (iconv(icd, NULL, NULL, NULL, NULL) == (size_t)-1)
773 | err(1, "iconv");
774 | iremain = strlen(ibuf);
775 | oremain = 64 + 4 * iremain;
776 | if ((obuf = malloc(oremain)) == NULL)
777 | err(1, "malloc");
778 | iptr = ibuf;
779 | optr = obuf;
780 | if (iconv(icd, &iptr, &iremain, &optr, &oremain) == (size_t)-1) {
781 | switch (errno) {
782 | case EILSEQ:
783 | errx(1, "line %d: %s multibyte sequence", linenum, "illegal");
784 | case EINVAL:
785 | errx(1, "line %d: %s multibyte sequence", linenum, "truncated");
786 | default:
787 | err(1, "line %d: iconv", linenum);
788 | }
789 | }
790 | olen = optr - obuf;
791 |
792 | // Replace column
793 | if ((row->fields[col] = realloc(row->fields[col], olen + 1)) == NULL)
794 | err(1, "realloc");
795 | memcpy(row->fields[col], obuf, olen);
796 | row->fields[col][olen] = '\0';
797 | free(obuf);
798 | }
799 | }
800 |
801 | // Decode UTF-8 character
802 | static int
803 | decode_utf8(const char *const obuf, size_t olen, int *lenp, int linenum)
804 | {
805 | int uchar;
806 | int uclen;
807 | int i = 0;
808 |
809 | if ((obuf[i] & 0x80) == 0x00) {
810 | uclen = 1;
811 | uchar = obuf[i] & 0x7f;
812 | } else if ((obuf[i] & 0xe0) == 0xc0 && i + 1 < olen) {
813 | uclen = 2;
814 | uchar = ((obuf[i] & 0x1f) << 6)
815 | | ((obuf[i + 1] & 0x3f) << 0);
816 | } else if ((obuf[i] & 0xf0) == 0xe0 && i + 2 < olen) {
817 | uclen = 3;
818 | uchar = ((obuf[i] & 0x0f) << 12)
819 | | ((obuf[i + 1] & 0x3f) << 6)
820 | | ((obuf[i + 2] & 0x3f) << 0);
821 | } else if ((obuf[i] & 0xf8) == 0xf0 && i + 3 < olen) {
822 | uclen = 4;
823 | uchar = ((obuf[i] & 0x07) << 18)
824 | | ((obuf[i + 1] & 0x3f) << 12)
825 | | ((obuf[i + 2] & 0x3f) << 6)
826 | | ((obuf[i + 3] & 0x3f) << 0);
827 | } else if ((obuf[i] & 0xfc) == 0xf8 && i + 4 < olen) {
828 | uclen = 5;
829 | uchar = ((obuf[i] & 0x03) << 24)
830 | | ((obuf[i + 1] & 0x3f) << 18)
831 | | ((obuf[i + 2] & 0x3f) << 12)
832 | | ((obuf[i + 3] & 0x3f) << 6)
833 | | ((obuf[i + 4] & 0x3f) << 0);
834 | } else if ((obuf[i] & 0xfe) == 0xfc && i + 5 < olen) {
835 | uclen = 6;
836 | uchar = ((obuf[i] & 0x01) << 30)
837 | | ((obuf[i + 1] & 0x3f) << 24)
838 | | ((obuf[i + 2] & 0x3f) << 18)
839 | | ((obuf[i + 3] & 0x3f) << 12)
840 | | ((obuf[i + 4] & 0x3f) << 6)
841 | | ((obuf[i + 5] & 0x3f) << 0);
842 | } else
843 | errx(1, "line %d: internal error decoding UTF-8: 0x%02x", linenum, obuf[i] & 0xff);
844 |
845 | // Done
846 | *lenp = uclen;
847 | return uchar;
848 | }
849 |
850 | static int
851 | readcol(FILE *fp, struct row *row, int *linenum)
852 | {
853 | struct col col;
854 | int row_done;
855 | int ch;
856 |
857 | // Process initial stuff; skip leading whitespace, excluding our field separator (which could be TAB)
858 | do {
859 | if ((ch = readch(fp, 1)) == EOF)
860 | ch = '\n';
861 | if (ch == '\n') { // end of line forces empty column and terminates the row
862 | memset(&col, 0, sizeof(col));
863 | addcolumn(row, &col);
864 | (*linenum)++;
865 | return 0;
866 | }
867 | } while (isspace(ch) && ch != fsep);
868 | ungetc(ch, fp);
869 |
870 | // Read quoted or unquoted value
871 | if (ch == quote)
872 | row_done = readqcol(fp, &col, linenum);
873 | else
874 | row_done = readuqcol(fp, &col, linenum);
875 | addcolumn(row, &col);
876 | return row_done;
877 | }
878 |
879 | //
880 | // Read a quoted column, return true if there's more
881 | //
882 | static int
883 | readqcol(FILE *fp, struct col *col, int *linenum)
884 | {
885 | int done = 0;
886 | int escape = 0;
887 | int ch;
888 |
889 | readch(fp, 0);
890 | memset(col, 0, sizeof(*col));
891 | while (1) {
892 | assert(!escape || !done);
893 | if ((ch = readch(fp, escape)) == EOF) {
894 | if (escape || done)
895 | ch = '\n';
896 | else
897 | errx(1, "line %d: premature EOF", *linenum);
898 | }
899 | if (done) {
900 | if (ch == '\n') {
901 | (*linenum)++;
902 | return 0;
903 | }
904 | if (ch == fsep)
905 | return 1;
906 | if (isspace(ch))
907 | continue;
908 | errx(1, "line %d: unexpected character \"%c\"", *linenum, ch);
909 | }
910 | if (escape) {
911 | if (ch == quote)
912 | addchar(col, quote);
913 | else {
914 | ungetc(ch, fp);
915 | done = 1;
916 | }
917 | escape = 0;
918 | continue;
919 | }
920 | if (ch == quote) {
921 | escape = 1;
922 | continue;
923 | }
924 | addchar(col, ch);
925 | if (ch == '\n')
926 | (*linenum)++;
927 | }
928 | }
929 |
930 | //
931 | // Read an unquoted column, return true if there's more
932 | //
933 | static int
934 | readuqcol(FILE *fp, struct col *col, int *linenum)
935 | {
936 | int ch;
937 |
938 | memset(col, 0, sizeof(*col));
939 | while (1) {
940 | if ((ch = readch(fp, 1)) == EOF)
941 | ch = '\n';
942 | if (ch == '\n') {
943 | (*linenum)++;
944 | trim(col);
945 | return 0;
946 | }
947 | if (ch == fsep) {
948 | trim(col);
949 | return 1;
950 | }
951 | addchar(col, ch);
952 | }
953 | }
954 |
955 | //
956 | // Trims whitespace around a column
957 | //
958 | static void
959 | trim(struct col *col)
960 | {
961 | size_t skip;
962 |
963 | while (col->len > 0 && isspace((unsigned char)col->buf[col->len - 1]))
964 | col->len--;
965 | for (skip = 0; skip < col->len && isspace((unsigned char)col->buf[skip]); skip++)
966 | ;
967 | col->len -= skip;
968 | memmove(col->buf, col->buf + skip, col->len);
969 | }
970 |
971 | //
972 | // Adds the character to the column
973 | //
974 | static void
975 | addchar(struct col *col, int ch)
976 | {
977 | if (col->alloc <= col->len) {
978 | int new_alloc;
979 | char *new_buf;
980 |
981 | new_alloc = col->alloc == 0 ? 32 : col->alloc * 2;
982 | if ((new_buf = realloc(col->buf, new_alloc)) == NULL)
983 | err(1, "realloc");
984 | col->buf = new_buf;
985 | col->alloc = new_alloc;
986 | }
987 | col->buf[col->len++] = ch;
988 | }
989 |
990 | //
991 | // Adds the column to the row, then frees the column
992 | //
993 | static void
994 | addcolumn(struct row *row, const struct col *col)
995 | {
996 | growrow(row);
997 | if (col->alloc >= col->len + 1) {
998 | col->buf[col->len] = '\0';
999 | row->fields[row->num] = col->buf;
1000 | } else {
1001 | if ((row->fields[row->num] = malloc(col->len + 1)) == NULL)
1002 | err(1, "malloc");
1003 | memcpy(row->fields[row->num], col->buf, col->len);
1004 | row->fields[row->num][col->len] = '\0';
1005 | free(col->buf);
1006 | }
1007 | memset(&col, 0, sizeof(col));
1008 | row->num++;
1009 | }
1010 |
1011 | // Copy given string and add to row
1012 | static void
1013 | addstring(struct row *row, const char *const string)
1014 | {
1015 | growrow(row);
1016 | if ((row->fields[row->num++] = strdup(string)) == NULL)
1017 | err(1, "strdup");
1018 | }
1019 |
1020 | static int
1021 | findstring(struct row *row, const char *const string)
1022 | {
1023 | return findstring2((const char *const *)row->fields, row->num, string);
1024 | }
1025 |
1026 | static int
1027 | findstring2(const char *const *list, size_t num, const char *const string)
1028 | {
1029 | size_t i;
1030 |
1031 | for (i = 0; i < num; i++) {
1032 | if (strcmp(list[i], string) == 0)
1033 | return 1;
1034 | }
1035 | return 0;
1036 | }
1037 |
1038 | static void
1039 | growrow(struct row *row)
1040 | {
1041 | size_t new_alloc;
1042 | char **new_fields;
1043 |
1044 | if (row->alloc > row->num)
1045 | return;
1046 | new_alloc = row->alloc == 0 ? 32 : row->alloc * 2;
1047 | if ((new_fields = realloc(row->fields, new_alloc * sizeof(*row->fields))) == NULL)
1048 | err(1, "realloc");
1049 | row->fields = new_fields;
1050 | row->alloc = new_alloc;
1051 | memset(row->fields + row->num, 0, (row->alloc - row->num) * sizeof(*row->fields));
1052 | }
1053 |
1054 | static int
1055 | parsefmt(char *fmt, const struct row *column_names, unsigned int **argsp)
1056 | {
1057 | unsigned int *args;
1058 | int nargs;
1059 | int alloc;
1060 | char *s;
1061 |
1062 | // Size and allocate array
1063 | alloc = 0;
1064 | for (s = fmt; *s != '\0'; s++) {
1065 | if (*s == '%')
1066 | alloc += 3;
1067 | }
1068 | if ((args = malloc(alloc * sizeof(*args))) == NULL)
1069 | err(1, "malloc");
1070 | nargs = 0;
1071 |
1072 | // Parse format
1073 | for (s = fmt; *s != '\0'; s++) {
1074 | char *const fspec = s;
1075 | if (*s != '%' || *++s == '%')
1076 | continue;
1077 | s = eataccessor(fspec, "format specification", column_names, s, &nargs, args);
1078 | while (*s != '\0' && strchr("#-+ 0", *s) != NULL) // eat up optional flags
1079 | s++;
1080 | s = eatwidthprec(fspec, "field width for format specification", column_names, s, &nargs, args);
1081 | if (*s == '.')
1082 | s = eatwidthprec(fspec, "precision for format specification", column_names, s + 1, &nargs, args);
1083 | if (*s == '\0')
1084 | errx(1, "truncated format specification starting at \"%.20s...\"", fspec);
1085 | }
1086 |
1087 | // Done
1088 | *argsp = args;
1089 | return nargs;
1090 | }
1091 |
1092 | static int
1093 | parsechar(const char *str)
1094 | {
1095 | char *eptr;
1096 | int ch;
1097 |
1098 | switch (strlen(str)) {
1099 | case 1:
1100 | ch = (unsigned char)*str;
1101 | break;
1102 | case 2:
1103 | if (*str != '\\')
1104 | return -1;
1105 | switch (str[1]) {
1106 | case 'a':
1107 | ch = '\a';
1108 | break;
1109 | case 't':
1110 | ch = '\t';
1111 | break;
1112 | case 'b':
1113 | ch = '\b';
1114 | break;
1115 | case 'r':
1116 | ch = '\r';
1117 | break;
1118 | case 'f':
1119 | ch = '\f';
1120 | break;
1121 | case 'v':
1122 | ch = '\v';
1123 | break;
1124 | case '\\':
1125 | case '\'':
1126 | case '"':
1127 | ch = str[1];
1128 | break;
1129 | default:
1130 | return -1;
1131 | }
1132 | break;
1133 | case 4:
1134 | if (*str != '\\')
1135 | return -1;
1136 | ch = str[1] == 'x' ? strtoul(str + 2, &eptr, 16) : strtoul(str + 1, &eptr, 8);
1137 | if (*eptr != '\0')
1138 | return -1;
1139 | break;
1140 | default:
1141 | return -1;
1142 | }
1143 |
1144 | // Disallow line separator
1145 | if (ch == '\n')
1146 | return -1;
1147 |
1148 | // Disallow overflown values
1149 | if (ch != (ch & 0xff))
1150 | return -1;
1151 |
1152 | // Done
1153 | return ch;
1154 | }
1155 |
1156 | static char *
1157 | eatwidthprec(const char *const fspec, const char *desc, const struct row *column_names, char *s, int *nargs, unsigned int *args)
1158 | {
1159 | if (*s == '*')
1160 | return eataccessor(fspec, desc, column_names, s + 1, nargs, args);
1161 | while (isdigit((unsigned char)*s)) // eat up numerical field width or precision
1162 | s++;
1163 | return s;
1164 | }
1165 |
1166 | static char *
1167 | eataccessor(const char *const fspec, const char *desc, const struct row *column_names, char *s, int *nargs, unsigned int *args)
1168 | {
1169 | char *const start = s;
1170 | const char *colname;
1171 | int namelen;
1172 | int argnum;
1173 | int i;
1174 |
1175 | if (*s == '{') {
1176 | if (column_names == NULL)
1177 | errx(1, "symbolic column accessors require \"-i\" flag in %s starting at \"%.20s...\"", desc, fspec);
1178 | colname = ++s;
1179 | while (*s != '}') {
1180 | if (*s++ == '\0')
1181 | errx(1, "malformed column accessor in %s starting at \"%.20s...\"", desc, fspec);
1182 | }
1183 | namelen = s++ - colname;
1184 | argnum = 0;
1185 | for (i = 0; i < column_names->num; i++) {
1186 | if (strncmp(colname, column_names->fields[i], namelen) == 0 && column_names->fields[i][namelen] == '\0') {
1187 | if (argnum != 0) {
1188 | errx(1, "ambiguous column name \"%.*s\" in symbolic column accessor in %s starting at \"%.20s...\"",
1189 | namelen, colname, desc, fspec);
1190 | }
1191 | argnum = i + 1;
1192 | }
1193 | }
1194 | if (argnum == 0) {
1195 | errx(1, "unknown column name \"%.*s\" in symbolic column accessor in %s starting at \"%.20s...\"",
1196 | namelen, colname, desc, fspec);
1197 | }
1198 | args[(*nargs)++] = argnum;
1199 | } else {
1200 | while (isdigit((unsigned char)*s))
1201 | s++;
1202 | if (s == start || *s++ != '$')
1203 | errx(1, "missing required column accessor in %s starting at \"%.20s...\"", desc, fspec);
1204 | sscanf(start, "%u", &args[(*nargs)++]);
1205 | }
1206 | memmove(start, s, strlen(s) + 1);
1207 | return start;
1208 | }
1209 |
1210 | // Like getc() but optionally collapses CR or CR, LF into a single LF
1211 | static int
1212 | readch(FILE *fp, int collapse)
1213 | {
1214 | int ch;
1215 |
1216 | ch = getc(fp);
1217 | if (collapse && ch == '\r') {
1218 | if ((ch = getc(fp)) != '\n') {
1219 | ungetc(ch, fp);
1220 | ch = '\n';
1221 | }
1222 | }
1223 | return ch;
1224 | }
1225 |
1226 | static void
1227 | freerow(struct row *row)
1228 | {
1229 | while (row->num > 0)
1230 | free(row->fields[--row->num]);
1231 | free(row->fields);
1232 | memset(row, 0, sizeof(*row));
1233 | }
1234 |
1235 | static void
1236 | usage(void)
1237 | {
1238 |
1239 | fprintf(stderr, "Usage:\n");
1240 | fprintf(stderr, " csvprintf [options] format\n");
1241 | fprintf(stderr, " csvprintf -b [options]\n");
1242 | fprintf(stderr, " csvprintf -j [options]\n");
1243 | fprintf(stderr, " csvprintf -x [options]\n");
1244 | fprintf(stderr, " csvprintf -X [options]\n");
1245 | fprintf(stderr, " csvprintf -h\n");
1246 | fprintf(stderr, " csvprintf -v\n");
1247 | fprintf(stderr, "Options:\n");
1248 | fprintf(stderr, " -b\t\tConvert input to bash(1) variable assignments\n");
1249 | fprintf(stderr, " -e encoding\tSpecify input character encoding (XML and JSON modes only; default ISO-8859-1)\n");
1250 | fprintf(stderr, " -f input\tRead CSV input from specified file (default stdin)\n");
1251 | fprintf(stderr, " -i\t\tAssume the first CSV record contains column names\n");
1252 | fprintf(stderr, " -j\t\tConvert input to JSON text sequences\n");
1253 | fprintf(stderr, " -q char\tSpecify quote character (default `%c')\n", DEFAULT_QUOTE_CHAR);
1254 | fprintf(stderr, " -s char\tSpecify field separator character (default `%c')\n", DEFAULT_FSEP_CHAR);
1255 | fprintf(stderr, " -x\t\tConvert input to XML using numeric tags\n");
1256 | fprintf(stderr, " -X\t\tConvert input to XML using column name tags (implies \"-i\")\n");
1257 | fprintf(stderr, " -h\t\tOutput this help message and exit\n");
1258 | fprintf(stderr, " -v\t\tOutput version information and exit\n");
1259 | }
1260 |
1261 | static void
1262 | version(void)
1263 | {
1264 | fprintf(stderr, "%s version %s", PACKAGE_TARNAME, PACKAGE_VERSION);
1265 | if (*csvprintf_version != '\0')
1266 | fprintf(stderr, " (%s)", csvprintf_version);
1267 | fprintf(stderr, "\n");
1268 | fprintf(stderr, "Copyright (C) 2010-2023 Archie L. Cobbs\n");
1269 | fprintf(stderr, "This is free software; see the source for copying conditions. There is NO\n");
1270 | fprintf(stderr, "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
1271 | }
1272 |
1273 |
--------------------------------------------------------------------------------
/tests/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -e
4 | set -o pipefail
5 |
6 | FAILED_TESTS=''
7 | for INPUT_FILE in *.in; do
8 | OUTPUT_FILE1=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out1/gp'`
9 | OUTPUT_FILE2=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out2/gp'`
10 | OUTPUT_FILE3A=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out3a/gp'`
11 | OUTPUT_FILE3B=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out3b/gp'`
12 | OUTPUT_FILE4=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out4/gp'`
13 | OUTPUT_FILE5A=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out5a/gp'`
14 | OUTPUT_FILE5B=`echo "${INPUT_FILE}" | sed -n 's/\.in$/.out5b/gp'`
15 | echo "*** testing ${INPUT_FILE}..." 1>&2
16 | if ! ../csvprintf -x -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE1}" -; then
17 | echo "*** FAILED: [1] ${INPUT_FILE}" 1>&2
18 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE1}"
19 | fi
20 | if ! ../csvprintf -X -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE2}" -; then
21 | echo "*** FAILED: [2] ${INPUT_FILE}" 1>&2
22 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE2}"
23 | fi
24 | if ! ../csvprintf -e iso-8859-1 -x -f "${INPUT_FILE}" | xsltproc ../csv.xsl - | ../csvprintf -e UTF-8 -x | diff -u "${OUTPUT_FILE1}" -; then
25 | echo "*** FAILED: [1x] ${INPUT_FILE}" 1>&2
26 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/csv2xml"
27 | fi
28 | if ! ../csvprintf -j -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE3A}" -; then
29 | echo "*** FAILED: [3a] ${INPUT_FILE}" 1>&2
30 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE3A}"
31 | fi
32 | if ! ../csvprintf -ij -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE3B}" -; then
33 | echo "*** FAILED: [3b] ${INPUT_FILE}" 1>&2
34 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE3B}"
35 | fi
36 | if ! ../csvprintf -ix -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE4}" -; then
37 | echo "*** FAILED: [4] ${INPUT_FILE}" 1>&2
38 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE4}"
39 | fi
40 | if ! ../csvprintf -b -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE5A}" -; then
41 | echo "*** FAILED: [5a] ${INPUT_FILE}" 1>&2
42 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE5A}"
43 | fi
44 | if ! ../csvprintf -ib -f "${INPUT_FILE}" | diff -u "${OUTPUT_FILE5B}" -; then
45 | echo "*** FAILED: [5b] ${INPUT_FILE}" 1>&2
46 | FAILED_TESTS="${FAILED_TESTS} ${INPUT_FILE}/${OUTPUT_FILE5B}"
47 | fi
48 | done
49 |
50 | if [ -z "${FAILED_TESTS}" ]; then
51 | echo "*** all tests passed"
52 | else
53 | echo "*** test(s) failed:${FAILED_TESTS}"
54 | exit 1
55 | fi
56 |
57 |
--------------------------------------------------------------------------------
/tests/run2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Bail on error
4 | set -e
5 |
6 | # Setup temporary files
7 | TMP_STDOUT_EXPECTED='csvprintf-test-out-expected.tmp'
8 | TMP_STDERR_EXPECTED='csvprintf-test-err-expected.tmp'
9 | TMP_STDOUT_ACTUAL='csvprintf-test-out-actual.tmp'
10 | TMP_STDERR_ACTUAL='csvprintf-test-err-actual.tmp'
11 | TMP_SWAP_FILE=''csvprintf-test-hexdump.tmp
12 | trap "rm -f \
13 | ${TMP_STDOUT_EXPECTED} \
14 | ${TMP_STDERR_EXPECTED} \
15 | ${TMP_STDOUT_ACTUAL} \
16 | ${TMP_STDERR_ACTUAL} \
17 | ${TMP_SWAP_FILE}" 0 2 3 5 10 13 15
18 |
19 | # Convert a file to hexdump version
20 | hexdumpify()
21 | {
22 | FILE="${1}"
23 | hexdump -C < "${FILE}" > "${TMP_SWAP_FILE}"
24 | mv "${TMP_SWAP_FILE}" "${FILE}"
25 | }
26 |
27 | # Compare files, on failure set ${DIFF_FAIL}
28 | checkdiff()
29 | {
30 | if [ "${1}" = '-h' ]; then
31 | HEXDUMPIFY='true'
32 | shift
33 | else
34 | HEXDUMPIFY='false'
35 | fi
36 | TESTFILE="${1}"
37 | WHAT="${2}"
38 | EXPECTED="${3}"
39 | ACTUAL="${4}"
40 | if diff -q "${EXPECTED}" "${ACTUAL}" >/dev/null; then
41 | return 0
42 | fi
43 | echo "test: ${TESTFILE}: ${WHAT} mismatch"
44 | echo '------------------------------------------------------'
45 | if [ "${HEXDUMPIFY}" = 'true' ]; then
46 | hexdumpify "${EXPECTED}"
47 | hexdumpify "${ACTUAL}"
48 | fi
49 | diff -u "${EXPECTED}" "${ACTUAL}" || true
50 | echo '------------------------------------------------------'
51 | DIFF_FAIL='true'
52 | }
53 |
54 | # Execute one test, on failure set ${TEST_FAIL}
55 | runtest()
56 | {
57 | # Read test data
58 | unset FLAGS
59 | unset STDIN
60 | unset STDOUT
61 | unset STDERR
62 | unset EXITVAL
63 | . "${TESTFILE}"
64 | if [ -z "${FLAGS+x}" \
65 | -o -z "${STDIN+x}" \
66 | -o -z "${STDOUT+x}" \
67 | -o -z "${STDERR+x}" \
68 | -o -z "${EXITVAL+x}" ]; then
69 | echo "test: ${TESTFILE}: invalid test file"
70 | exit 1
71 | fi
72 |
73 | # Set up files
74 | echo -en "${STDOUT}" > "${TMP_STDOUT_EXPECTED}"
75 | echo -en "${STDERR}" > "${TMP_STDERR_EXPECTED}"
76 | set +e
77 | echo -en "${STDIN}" | ../csvprintf ${FLAGS} >"${TMP_STDOUT_ACTUAL}" 2>"${TMP_STDERR_ACTUAL}"
78 | ACTUAL_EXITVAL="$?"
79 | set -e
80 |
81 | # Special hacks
82 | if [ "${STDERR}" = '!USAGE!' ]; then
83 | ../csvprintf --help 2>"${TMP_STDERR_EXPECTED}"
84 | fi
85 |
86 | # Check result
87 | DIFF_FAIL='false'
88 | if [ "${STDOUT}" != '!IGNORE!' ]; then
89 | checkdiff -h "${TESTFILE}" "standard output" "${TMP_STDOUT_EXPECTED}" "${TMP_STDOUT_ACTUAL}"
90 | fi
91 | checkdiff "${TESTFILE}" "standard error" "${TMP_STDERR_EXPECTED}" "${TMP_STDERR_ACTUAL}"
92 | if [ "${DIFF_FAIL}" != 'false' ]; then
93 | TEST_FAIL='true'
94 | fi
95 | if [ "${ACTUAL_EXITVAL}" -ne "${EXITVAL}" ]; then
96 | echo "test: ${TESTFILE}: exit value ${ACTUAL_EXITVAL} != ${EXITVAL}"
97 | TEST_FAIL='true'
98 | fi
99 |
100 | # Print success or if failure show params
101 | if [ "${TEST_FAIL}" = 'false' ]; then
102 | echo "test: ${TESTFILE}: success"
103 | else
104 | echo "******************************************************"
105 | echo "test: ${TESTFILE} FAILED with:"
106 | echo " FLAGS='${FLAGS}'"
107 | echo " STDIN='${STDIN}'"
108 | echo "******************************************************"
109 | fi
110 | }
111 |
112 | # Find all tests and run them
113 | ANY_FAIL='false'
114 | for TESTFILE in `find . -maxdepth 1 -type f -name 'test-*.tst' | sort | sed 's|^./||g'`; do
115 | TEST_FAIL='false'
116 | runtest "${TESTFILE}"
117 | if [ "${TEST_FAIL}" != 'false' ]; then
118 | ANY_FAIL='true'
119 | fi
120 | done
121 |
122 | # Exit with error if any test failed
123 | if [ "${ANY_FAIL}" != 'false' ]; then
124 | exit 1
125 | fi
126 |
--------------------------------------------------------------------------------
/tests/test-bash-omit1.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi'
2 | STDIN='aaa,PATH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT=$'aaa=\'a1\'; ccc=\'c1\';\naaa=\'a2\'; ccc=\'c2\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 |
--------------------------------------------------------------------------------
/tests/test-bash-prefix1.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi -p FOO_'
2 | STDIN='aaa,PATH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT=$'FOO_aaa=\'a1\'; FOO_PATH=\'b1\'; FOO_ccc=\'c1\';\nFOO_aaa=\'a2\'; FOO_PATH=\'b2\'; FOO_ccc=\'c2\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 |
--------------------------------------------------------------------------------
/tests/test-bash-prefix2.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi -p PA'
2 | STDIN='aaa,TH,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT=$'PAaaa=\'a1\'; PAccc=\'c1\';\nPAaaa=\'a2\'; PAccc=\'c2\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 |
--------------------------------------------------------------------------------
/tests/test-bash-quote.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-bi -p PA_'
2 | STDIN=$'aaa,bbb,ccc\n\'aa\'xx\',"bb""yy",cc`zz\\ww\n'
3 | STDOUT=$'PA_aaa=$\'\\\'aa\\\'xx\\\'\'; PA_bbb=\'bb"yy\'; PA_ccc=\'cc`zz\\ww\';\n'
4 | STDERR=''
5 | EXITVAL='0'
6 |
--------------------------------------------------------------------------------
/tests/test-cflag-not-found.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-X -c bbb -c zzz'
2 | STDIN='aaa,bbb,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT='!IGNORE!'
4 | STDERR='csvprintf: column "zzz" not found\n'
5 | EXITVAL='1'
6 |
--------------------------------------------------------------------------------
/tests/test-cflag-xml.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-X -c bbb'
2 | STDIN='aaa,bbb,ccc\n"a1","b1","c1"\n"a2","b2","c2"\n'
3 | STDOUT='\n\n \n b1\n
\n \n b2\n
\n\n'
4 | STDERR=''
5 | EXITVAL='0'
6 |
--------------------------------------------------------------------------------
/tests/test-json-skip1.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-jn'
2 | STDIN='aaa,bbb\n"a1","b1"\n"a2","b2"\n'
3 | STDOUT='\x1e["a1","b1"]\n\x1e["a2","b2"]\n'
4 | STDERR=''
5 | EXITVAL='0'
6 |
--------------------------------------------------------------------------------
/tests/test-tab-noskip.tst:
--------------------------------------------------------------------------------
1 | FLAGS='-nj -s \t'
2 | STDIN='aaa\tbbb\tccc\n\t\t\n'
3 | STDOUT='\x1e["","",""]\n'
4 | STDERR=''
5 | EXITVAL='0'
6 |
--------------------------------------------------------------------------------
/tests/test1.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTS
2 | Fred Smith,"1234 Main St.
3 | Anytown, USA 39103",123.4567
4 | "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999
5 |
--------------------------------------------------------------------------------
/tests/test1.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | NAME
5 | ADDRESS
6 | POINTS
7 |
8 |
9 | Fred Smith
10 | 1234 Main St.
11 | Anytown, USA 39103
12 | 123.4567
13 |
14 |
15 | Wayne "The Great One" Gretsky
16 | 59 Hockey Lane
17 | 999999
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tests/test1.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test1.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\nAnytown, USA 39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 |
--------------------------------------------------------------------------------
/tests/test1.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA 39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 |
--------------------------------------------------------------------------------
/tests/test1.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test1.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA 39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 |
--------------------------------------------------------------------------------
/tests/test1.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA 39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 |
--------------------------------------------------------------------------------
/tests/test2.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTS
2 | Fred Smith,"1234 Main St.
3 | Anytown, USA 39103",123.4567
4 | "Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999
5 |
--------------------------------------------------------------------------------
/tests/test2.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | NAME
5 | ADDRESS
6 | POINTS
7 |
8 |
9 | Fred Smith
10 | 1234 Main St.
11 | Anytown, USA 39103
12 | 123.4567
13 |
14 |
15 | Wayne "The Great One" Gretsky
16 | 59 Hockey Lane
17 | 999999
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tests/test2.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test2.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\r\nAnytown, USA 39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 |
--------------------------------------------------------------------------------
/tests/test2.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\r\nAnytown, USA 39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 |
--------------------------------------------------------------------------------
/tests/test2.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test2.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\r\nAnytown, USA 39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 |
--------------------------------------------------------------------------------
/tests/test2.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\r\nAnytown, USA 39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 |
--------------------------------------------------------------------------------
/tests/test3.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTS
Fred Smith,"1234 Main St.
Anytown, USA 39103",123.4567
"Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999
--------------------------------------------------------------------------------
/tests/test3.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | NAME
5 | ADDRESS
6 | POINTS
7 |
8 |
9 | Fred Smith
10 | 1234 Main St.
Anytown, USA 39103
11 | 123.4567
12 |
13 |
14 | Wayne "The Great One" Gretsky
15 | 59 Hockey Lane
16 | 999999
17 |
18 |
19 |
--------------------------------------------------------------------------------
/tests/test3.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
Anytown, USA 39103
6 | 123.4567
7 |
8 |
9 | Wayne "The Great One" Gretsky
10 | 59 Hockey Lane
11 | 999999
12 |
13 |
14 |
--------------------------------------------------------------------------------
/tests/test3.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\rAnytown, USA 39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 |
--------------------------------------------------------------------------------
/tests/test3.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\rAnytown, USA 39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 |
--------------------------------------------------------------------------------
/tests/test3.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
Anytown, USA 39103
6 | 123.4567
7 |
8 |
9 | Wayne "The Great One" Gretsky
10 | 59 Hockey Lane
11 | 999999
12 |
13 |
14 |
--------------------------------------------------------------------------------
/tests/test3.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\rAnytown, USA 39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 |
--------------------------------------------------------------------------------
/tests/test3.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\rAnytown, USA 39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 |
--------------------------------------------------------------------------------
/tests/test4.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTS
Fred Smith,"1234 Main St.
2 | Anytown, USA 39103",123.4567
"Wayne ""The Great One"" Gretsky", 59 Hockey Lane , 999999
--------------------------------------------------------------------------------
/tests/test4.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | NAME
5 | ADDRESS
6 | POINTS
7 |
8 |
9 | Fred Smith
10 | 1234 Main St.
11 | Anytown, USA 39103
12 | 123.4567
13 |
14 |
15 | Wayne "The Great One" Gretsky
16 | 59 Hockey Lane
17 | 999999
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tests/test4.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test4.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\nAnytown, USA 39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 |
--------------------------------------------------------------------------------
/tests/test4.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA 39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 |
--------------------------------------------------------------------------------
/tests/test4.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test4.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA 39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 |
--------------------------------------------------------------------------------
/tests/test4.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA 39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 |
--------------------------------------------------------------------------------
/tests/test5.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.in
--------------------------------------------------------------------------------
/tests/test5.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | piñata
5 |
6 |
7 |
--------------------------------------------------------------------------------
/tests/test5.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/tests/test5.out3a:
--------------------------------------------------------------------------------
1 | ["pi\u00f1ata"]
2 |
--------------------------------------------------------------------------------
/tests/test5.out3b:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.out3b
--------------------------------------------------------------------------------
/tests/test5.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/tests/test5.out5a:
--------------------------------------------------------------------------------
1 | ROW=( $'pi\xf1ata' )
2 |
--------------------------------------------------------------------------------
/tests/test5.out5b:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/archiecobbs/csvprintf/c300f17d2f82c53e433f7bdca742805d602eb31e/tests/test5.out5b
--------------------------------------------------------------------------------
/tests/test6.in:
--------------------------------------------------------------------------------
1 | NAME,ADDRESS,POINTS
Fred Smith,"1234 Main St.
2 | Anytown, USA 39103",123.4567
"Wayne ""The Great One"" Gretsky", 59 Hockey Lane , "999999"
3 |
--------------------------------------------------------------------------------
/tests/test6.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | NAME
5 | ADDRESS
6 | POINTS
7 |
8 |
9 | Fred Smith
10 | 1234 Main St.
11 | Anytown, USA 39103
12 | 123.4567
13 |
14 |
15 | Wayne "The Great One" Gretsky
16 | 59 Hockey Lane
17 | 999999
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tests/test6.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test6.out3a:
--------------------------------------------------------------------------------
1 | ["NAME","ADDRESS","POINTS"]
2 | ["Fred Smith","1234 Main St.\nAnytown, USA 39103","123.4567"]
3 | ["Wayne \"The Great One\" Gretsky","59 Hockey Lane","999999"]
4 |
--------------------------------------------------------------------------------
/tests/test6.out3b:
--------------------------------------------------------------------------------
1 | {"NAME":"Fred Smith","ADDRESS":"1234 Main St.\nAnytown, USA 39103","POINTS":"123.4567"}
2 | {"NAME":"Wayne \"The Great One\" Gretsky","ADDRESS":"59 Hockey Lane","POINTS":"999999"}
3 |
--------------------------------------------------------------------------------
/tests/test6.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Fred Smith
5 | 1234 Main St.
6 | Anytown, USA 39103
7 | 123.4567
8 |
9 |
10 | Wayne "The Great One" Gretsky
11 | 59 Hockey Lane
12 | 999999
13 |
14 |
15 |
--------------------------------------------------------------------------------
/tests/test6.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'NAME' 'ADDRESS' 'POINTS' )
2 | ROW=( 'Fred Smith' $'1234 Main St.\nAnytown, USA 39103' '123.4567' )
3 | ROW=( 'Wayne "The Great One" Gretsky' '59 Hockey Lane' '999999' )
4 |
--------------------------------------------------------------------------------
/tests/test6.out5b:
--------------------------------------------------------------------------------
1 | NAME='Fred Smith'; ADDRESS=$'1234 Main St.\nAnytown, USA 39103'; POINTS='123.4567';
2 | NAME='Wayne "The Great One" Gretsky'; ADDRESS='59 Hockey Lane'; POINTS='999999';
3 |
--------------------------------------------------------------------------------
/tests/test7.in:
--------------------------------------------------------------------------------
1 | Name With Spaces,#~!@#$%^&*(),"&<>&""\"
2 | aaa,bbb,ccc
3 |
--------------------------------------------------------------------------------
/tests/test7.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Name With Spaces
5 | #~!@#$%^&*()
6 | &<>&"\
7 |
8 |
9 | aaa
10 | bbb
11 | ccc
12 |
13 |
14 |
--------------------------------------------------------------------------------
/tests/test7.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | aaa
5 | <____________>bbb
6 | <______>ccc
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/test7.out3a:
--------------------------------------------------------------------------------
1 | ["Name With Spaces","#~!@#$%^&*()","&<>&\"\\"]
2 | ["aaa","bbb","ccc"]
3 |
--------------------------------------------------------------------------------
/tests/test7.out3b:
--------------------------------------------------------------------------------
1 | {"Name With Spaces":"aaa","#~!@#$%^&*()":"bbb","&<>&\"\\":"ccc"}
2 |
--------------------------------------------------------------------------------
/tests/test7.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | aaa
5 | bbb
6 | ccc
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/test7.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'Name With Spaces' '#~!@#$%^&*()' '&<>&"\' )
2 | ROW=( 'aaa' 'bbb' 'ccc' )
3 |
--------------------------------------------------------------------------------
/tests/test7.out5b:
--------------------------------------------------------------------------------
1 | Name_With_Spaces='aaa'; ____________='bbb'; ______='ccc';
2 |
--------------------------------------------------------------------------------
/tests/test8.in:
--------------------------------------------------------------------------------
1 | ColA,ColB,ColC
2 | aaa,bbb,ccc
3 | aaa,bbb
4 | aaa,bbb,ccc,ddd
5 |
--------------------------------------------------------------------------------
/tests/test8.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ColA
5 | ColB
6 | ColC
7 |
8 |
9 | aaa
10 | bbb
11 | ccc
12 |
13 |
14 | aaa
15 | bbb
16 |
17 |
18 | aaa
19 | bbb
20 | ccc
21 | ddd
22 |
23 |
24 |
--------------------------------------------------------------------------------
/tests/test8.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | aaa
5 | bbb
6 | ccc
7 |
8 |
9 | aaa
10 | bbb
11 |
12 |
13 | aaa
14 | bbb
15 | ccc
16 | ddd
17 |
18 |
19 |
--------------------------------------------------------------------------------
/tests/test8.out3a:
--------------------------------------------------------------------------------
1 | ["ColA","ColB","ColC"]
2 | ["aaa","bbb","ccc"]
3 | ["aaa","bbb"]
4 | ["aaa","bbb","ccc","ddd"]
5 |
--------------------------------------------------------------------------------
/tests/test8.out3b:
--------------------------------------------------------------------------------
1 | {"ColA":"aaa","ColB":"bbb","ColC":"ccc"}
2 | {"ColA":"aaa","ColB":"bbb"}
3 | {"ColA":"aaa","ColB":"bbb","ColC":"ccc","col4":"ddd"}
4 |
--------------------------------------------------------------------------------
/tests/test8.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | aaa
5 | bbb
6 | ccc
7 |
8 |
9 | aaa
10 | bbb
11 |
12 |
13 | aaa
14 | bbb
15 | ccc
16 | ddd
17 |
18 |
19 |
--------------------------------------------------------------------------------
/tests/test8.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'ColA' 'ColB' 'ColC' )
2 | ROW=( 'aaa' 'bbb' 'ccc' )
3 | ROW=( 'aaa' 'bbb' )
4 | ROW=( 'aaa' 'bbb' 'ccc' 'ddd' )
5 |
--------------------------------------------------------------------------------
/tests/test8.out5b:
--------------------------------------------------------------------------------
1 | ColA='aaa'; ColB='bbb'; ColC='ccc';
2 | ColA='aaa'; ColB='bbb';
3 | ColA='aaa'; ColB='bbb'; ColC='ccc'; col4='ddd';
4 |
--------------------------------------------------------------------------------
/tests/test9.in:
--------------------------------------------------------------------------------
1 | foo
2 | value1,value2,value3
3 |
--------------------------------------------------------------------------------
/tests/test9.out1:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | foo
5 |
6 |
7 | value1
8 | value2
9 | value3
10 |
11 |
12 |
--------------------------------------------------------------------------------
/tests/test9.out2:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | value1
5 | value2
6 | value3
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/test9.out3a:
--------------------------------------------------------------------------------
1 | ["foo"]
2 | ["value1","value2","value3"]
3 |
--------------------------------------------------------------------------------
/tests/test9.out3b:
--------------------------------------------------------------------------------
1 | {"foo":"value1","col2":"value2","col3":"value3"}
2 |
--------------------------------------------------------------------------------
/tests/test9.out4:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | value1
5 | value2
6 | value3
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/test9.out5a:
--------------------------------------------------------------------------------
1 | ROW=( 'foo' )
2 | ROW=( 'value1' 'value2' 'value3' )
3 |
--------------------------------------------------------------------------------
/tests/test9.out5b:
--------------------------------------------------------------------------------
1 | foo='value1'; col2='value2'; col3='value3';
2 |
--------------------------------------------------------------------------------
/xml2csv.in:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Set constants and defaults
4 | NAME="xml2csv"
5 | XSLTPROC="@XSLTPROC@"
6 | CSVXSL="@pkgdatadir@/csv.xsl"
7 |
8 | # Usage message
9 | usage()
10 | {
11 | echo "Usage:" 1>&2
12 | echo " ${NAME} [input.xml]" 1>&2
13 | echo "Options:" 1>&2
14 | echo " -h Show this help message and exit" 1>&2
15 | }
16 |
17 | # Log functions
18 | log()
19 | {
20 | echo ${NAME}: ${1+"$@"} 1>&2
21 | }
22 |
23 | # Error function
24 | errout()
25 | {
26 | log ${1+"$@"}
27 | exit 1
28 | }
29 |
30 | # Bail on errors
31 | set -e
32 |
33 | # Parse flags passed in on the command line
34 | while [ ${#} -gt 0 ]; do
35 | case "$1" in
36 | -h|--help)
37 | usage
38 | exit
39 | ;;
40 | --)
41 | shift
42 | break
43 | ;;
44 | -*)
45 | echo "${NAME}: unrecognized flag \`${1}'" 1>&2
46 | usage
47 | exit 1
48 | ;;
49 | *)
50 | break
51 | ;;
52 | esac
53 | done
54 | case "${#}" in
55 | 0)
56 | INPUT_FILE="-"
57 | ;;
58 | 1)
59 | INPUT_FILE="${1}"
60 | ;;
61 | *)
62 | usage
63 | exit 1
64 | ;;
65 | esac
66 |
67 | # Run
68 | exec "${XSLTPROC}" "${CSVXSL}" "${INPUT_FILE}"
69 |
70 |
--------------------------------------------------------------------------------