├── .prev-version
├── tests
    ├── .gitignore
    ├── test.sh
    ├── no-perl
    ├── datamash-show-env.sh
    ├── datamash-io-errors-cheap.sh
    ├── CuSkip.pm
    ├── datamash-rand.pl
    ├── decorate-valgrind.sh
    ├── CuTmpdir.pm
    ├── datamash-rand.sh
    ├── datamash-strbin.sh
    ├── datamash-md5.pl
    ├── datamash-sort-header-deprecated.pl
    ├── datamash-sort-errors.sh
    ├── datamash-sha.pl
    ├── datamash-io-errors.sh
    ├── datamash-i18n-de.pl
    ├── datamash-sort-header.pl
    ├── datamash-check-tabular.pl
    ├── datamash-output-format.pl
    ├── datamash-tests-deprecated.pl
    ├── datamash-tests-2-deprecated.pl
    ├── datamash-crosstab.pl
    ├── datamash-check.pl
    ├── decorate-sort-tests.pl
    ├── datamash-transpose.pl
    └── datamash-pair-tests.pl
├── .gitmodules
├── po
    ├── quot.sed
    ├── boldquot.sed
    ├── POTFILES.in
    ├── remove-potcdate.sin
    ├── insert-header.sin
    ├── ChangeLog
    ├── en@quot.header
    └── en@boldquot.header
├── hooks
    ├── pre-commit.sh
    ├── setup-hooks.sh
    └── README.md
├── AUTHORS
├── lib
    └── local.mk
├── THANKS
├── src
    ├── double-format.h
    ├── decorate-functions.h
    ├── randutils.h
    ├── die.h
    ├── randutils.c
    ├── crosstab.h
    ├── op-scanner.h
    ├── column-headers.h
    ├── text-lines.h
    ├── double-format.c
    ├── text-options.h
    ├── column-headers.c
    ├── op-parser.h
    ├── text-options.c
    ├── op-defs.h
    ├── key-compare.h
    ├── field-ops.h
    ├── decorate-functions.c
    └── crosstab.c
├── m4
    ├── ax_c_long_long.m4
    └── .gitignore
├── doc
    ├── local.mk
    └── datamash-texinfo.css
├── examples
    ├── make_genes_example.sh
    ├── make_score_example.r
    ├── scores.txt
    └── scores_h.txt
├── man
    └── decorate.x
├── init.cfg
├── .github
    ├── ISSUE_TEMPLATE.txt
    └── PULL_REQUEST_TEMPLATE.txt
├── .gitignore
├── contrib
    └── bash-completion
    │   └── datamash
├── bootstrap.conf
└── README


/.prev-version:
--------------------------------------------------------------------------------
1 | 1.9
2 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | /*.trs
2 | /*.log
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "gnulib"]
2 | 	path = gnulib
3 | 	url = git://git.sv.gnu.org/gnulib.git
4 | 


--------------------------------------------------------------------------------
/po/quot.sed:
--------------------------------------------------------------------------------
1 | s/"\([^"]*\)"/“\1”/g
2 | s/`\([^`']*\)'/‘\1’/g
3 | s/ '\([^`']*\)' / ‘\1’ /g
4 | s/ '\([^`']*\)'$/ ‘\1’/g
5 | s/^'\([^`']*\)' /‘\1’ /g
6 | s/“”/""/g
7 | 


--------------------------------------------------------------------------------
/po/boldquot.sed:
--------------------------------------------------------------------------------
 1 | s/"\([^"]*\)"/“\1”/g
 2 | s/`\([^`']*\)'/‘\1’/g
 3 | s/ '\([^`']*\)' / ‘\1’ /g
 4 | s/ '\([^`']*\)'$/ ‘\1’/g
 5 | s/^'\([^`']*\)' /‘\1’ /g
 6 | s/“”/""/g
 7 | s/“/“[1m/g
 8 | s/”/[0m”/g
 9 | s/‘/‘[1m/g
10 | s/’/[0m’/g
11 | 


--------------------------------------------------------------------------------
/hooks/pre-commit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Prefer gmake over make to get GNU make on non-GNU userland systems if present
 4 | if command -v gmake 2>/dev/null; then
 5 |     make_cmd=gmake
 6 | else
 7 |     make_cmd=make
 8 | fi
 9 | 
10 | $make_cmd syntax-check
11 | 


--------------------------------------------------------------------------------
/hooks/setup-hooks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Find the path to datamash/.git
 4 | gitdir=$(git rev-parse --absolute-git-dir)
 5 | [ $? -ne 0 ] && exit 1
 6 | 
 7 | # Find the path to datamash/hooks
 8 | hookdir="$(dirname "$gitdir")/hooks"
 9 | 
10 | # Install hooks
11 | ln -fs "$hookdir/pre-commit.sh" "$gitdir/hooks/pre-commit" || \
12 |     { echo "Unable to install pre-commit hook" >&2; exit 1; }
13 | 


--------------------------------------------------------------------------------
/po/POTFILES.in:
--------------------------------------------------------------------------------
 1 | # List of source files which contain translatable strings.
 2 | lib/closeout.c
 3 | lib/error.c
 4 | lib/getopt.c
 5 | lib/quotearg.c
 6 | lib/version-etc.c
 7 | lib/xalloc-die.c
 8 | lib/xstrtol-error.c
 9 | src/datamash.c
10 | src/decorate-functions.c
11 | src/decorate.c
12 | src/double-format.c
13 | src/field-ops.c
14 | src/key-compare.c
15 | src/op-parser.c
16 | src/op-scanner.c
17 | src/system.h
18 | src/text-lines.c
19 | src/text-options.c
20 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | GNU Datamash was originally authored by Assaf Gordon <assafgordon@gmail.com>.
 2 | It is currently maintained by Assaf Gordon <assafgordon@gmail.com> and Tim Rice
 3 | <trice@posteo.net>, with assistance from Shawn Wagner and Erik Auerswald.
 4 | 
 5 | In addition, the following have provided patches and/or Git commits to Datamash:
 6 | 
 7 |     Barry Nisly
 8 |     Benno Schulenberg
 9 |     Dima Kogan
10 |     Georg Sauthoff
11 |     Jeroen Roovers
12 |     Yu Fu
13 | 
14 | See also the THANKS file.
15 | 


--------------------------------------------------------------------------------
/po/remove-potcdate.sin:
--------------------------------------------------------------------------------
 1 | # Sed script that remove the POT-Creation-Date line in the header entry
 2 | # from a POT file.
 3 | #
 4 | # The distinction between the first and the following occurrences of the
 5 | # pattern is achieved by looking at the hold space.
 6 | /^"POT-Creation-Date: .*"$/{
 7 | x
 8 | # Test if the hold space is empty.
 9 | s/P/P/
10 | ta
11 | # Yes it was empty. First occurrence. Remove the line.
12 | g
13 | d
14 | bb
15 | :a
16 | # The hold space was nonempty. Following occurrences. Do nothing.
17 | x
18 | :b
19 | }
20 | 


--------------------------------------------------------------------------------
/hooks/README.md:
--------------------------------------------------------------------------------
 1 | Git Hooks
 2 | =========
 3 | 
 4 | git client-side hooks are not considered part of the repository and
 5 | aren't included in a `git clone`. Running `hooks/setup-hooks.sh` will
 6 | install some useful ones for you.
 7 | 
 8 | This is only needed if you're working on the datamash source; if
 9 | you're just compiling it, there's no reason to do this.
10 | 
11 | Installed Hooks
12 | ===============
13 | 
14 | pre-commit
15 | ----------
16 | 
17 | Makes sure the code tree passes `make syntax-check` before allowing a
18 | commit.
19 | 


--------------------------------------------------------------------------------
/lib/local.mk:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 2 | #
 3 | # This file is free software; as a special exception the author gives
 4 | # unlimited permission to copy and/or distribute it, with or without
 5 | # modifications, as long as this notice is preserved.
 6 | #
 7 | # This program is distributed in the hope that it will be useful, but
 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10 | 
11 | include lib/gnulib.mk
12 | 
13 | # Allow "make distdir" to succeed before "make all" has run.
14 | dist-hook: $(noinst_LIBRARIES)
15 | .PHONY: dist-hook
16 | 


--------------------------------------------------------------------------------
/po/insert-header.sin:
--------------------------------------------------------------------------------
 1 | # Sed script that inserts the file called HEADER before the header entry.
 2 | #
 3 | # At each occurrence of a line starting with "msgid ", we execute the following
 4 | # commands. At the first occurrence, insert the file. At the following
 5 | # occurrences, do nothing. The distinction between the first and the following
 6 | # occurrences is achieved by looking at the hold space.
 7 | /^msgid /{
 8 | x
 9 | # Test if the hold space is empty.
10 | s/m/m/
11 | ta
12 | # Yes it was empty. First occurrence. Read the file.
13 | r HEADER
14 | # Output the file's contents by reading the next line. But don't lose the
15 | # current line while doing this.
16 | g
17 | N
18 | bb
19 | :a
20 | # The hold space was nonempty. Following occurrences. Do nothing.
21 | x
22 | :b
23 | }
24 | 


--------------------------------------------------------------------------------
/THANKS:
--------------------------------------------------------------------------------
 1 | Thanks to:
 2 | 
 3 |     Aaron Quinlan ( http://quinlanlab.org/ ) whose 'groupBy' program
 4 |     (https://github.com/arq5x/bedtools) was the inspiration for GNU Datamash.
 5 | 
 6 | The following people provided bug reports, feature requests and other
 7 | suggestions which resulted in notable improvements to GNU Datamash:
 8 | 
 9 |     Alejandro Garrido Mota
10 |     Benno Schulenberg
11 |     Bruno Haible
12 |     Dagobert Michelseni
13 |     Dave Myron
14 |     Dima Kogan
15 |     Frank Busse
16 |     Jérémie Roquet
17 |     Jeroen Hoek
18 |     Khavish Bhundoo
19 |     Kingsley G. Morse Jr.
20 |     Mark van Rossum
21 |     Renan Valieris
22 |     Renato Alves
23 |     Sanjeev Kumar Sharma
24 |     Steve Ward
25 |     Torsten Seemann
26 |     wheat MAX
27 | 
28 | See also the AUTHORS and ChangeLog files.
29 | 


--------------------------------------------------------------------------------
/po/ChangeLog:
--------------------------------------------------------------------------------
 1 | 2015-05-28  gettextize  <bug-gnu-gettext@gnu.org>
 2 | 
 3 | 	* Makefile.in.in: Upgrade to gettext-0.19.4.
 4 | 
 5 | 2015-05-28  gettextize  <bug-gnu-gettext@gnu.org>
 6 | 
 7 | 	* Makefile.in.in: Upgrade to gettext-0.19.4.
 8 | 	* Rules-quot: Upgrade to gettext-0.19.4.
 9 | 
10 | 2013-04-11  gettextize  <bug-gnu-gettext@gnu.org>
11 | 
12 | 	* Makefile.in.in: New file, from gettext-0.18.1.
13 | 	* Rules-quot: New file, from gettext-0.18.1.
14 | 	* boldquot.sed: New file, from gettext-0.18.1.
15 | 	* en@boldquot.header: New file, from gettext-0.18.1.
16 | 	* en@quot.header: New file, from gettext-0.18.1.
17 | 	* insert-header.sin: New file, from gettext-0.18.1.
18 | 	* quot.sed: New file, from gettext-0.18.1.
19 | 	* remove-potcdate.sin: New file, from gettext-0.18.1.
20 | 	* POTFILES.in: New file.
21 | 
22 | 


--------------------------------------------------------------------------------
/src/double-format.h:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2018-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | 
 5 |    This file is part of GNU Datamash.
 6 | 
 7 |    GNU Datamash is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    GNU Datamash is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | #ifndef __DOUBLE_FORMAT_H__
21 | #define __DOUBLE_FORMAT_H__
22 | 
23 | char*
24 | validate_double_format (char const *fmt);
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/tests/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #   Unit Tests for GNU Datamash - perform simple calculation on input data
 3 | 
 4 | #    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 5 | #
 6 | #    This file is part of GNU Datamash.
 7 | #
 8 | #    GNU Datamash is free software: you can redistribute it and/or modify
 9 | #    it under the terms of the GNU General Public License as published by
10 | #    the Free Software Foundation, either version 3 of the License, or
11 | #    (at your option) any later version.
12 | #
13 | #    GNU Datamash is distributed in the hope that it will be useful,
14 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #    GNU General Public License for more details.
17 | #
18 | #    You should have received a copy of the GNU General Public License
19 | #    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | #    Written by Assaf Gordon
22 | 
23 | echo "Hello (Shell Unit-Testing) World"
24 | 
25 | ## test passed:
26 | exit 0
27 | 


--------------------------------------------------------------------------------
/tests/no-perl:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #   Unit Tests for GNU Datamash - stub for systems without Perl
 4 | 
 5 | #    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 6 | #
 7 | #    This file is part of GNU Datamash.
 8 | #
 9 | #    GNU Datamash is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    GNU Datamash is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with GNU Datamash  If not, see <https://www.gnu.org/licenses/>.
21 | #
22 | #    Written by Assaf Gordon.
23 | 
24 | . "${test_dir=.}/init.sh"; path_prepend_ ./src
25 | skip_ "this test requires a working perl"
26 | 


--------------------------------------------------------------------------------
/src/decorate-functions.h:
--------------------------------------------------------------------------------
 1 | /* Decorate functions
 2 | 
 3 |    Copyright (C) 2020-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | 
 5 |    This program is free software: you can redistribute it and/or modify
 6 |    it under the terms of the GNU General Public License as published by
 7 |    the Free Software Foundation, either version 3 of the License, or
 8 |    (at your option) any later version.
 9 | 
10 |    This program is distributed in the hope that it will be useful,
11 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |    GNU General Public License for more details.
14 | 
15 |    You should have received a copy of the GNU General Public License
16 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17 | 
18 | #ifndef DECORATE_FUNCTIONS_H
19 | #define DECORATE_FUNCTIONS_H
20 | 
21 | struct conversions_t
22 | {
23 |   const char* name;
24 |   const char* description;
25 |   bool (*decorate_fn)(const char* in);
26 | };
27 | 
28 | 
29 | extern struct conversions_t builtin_conversions[];
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/randutils.h:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
 4 | 
 5 |    This file is part of GNU Datamash.
 6 | 
 7 |    GNU Datamash is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    GNU Datamash is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | 
21 | /* Written by Tim Rice */
22 | #ifndef __RANDUTILS_H__
23 | #define __RANDUTILS_H__
24 | 
25 | # include <stdbool.h>
26 | 
27 | /* Initialize random number source */
28 | void
29 | init_random (bool force_seed, unsigned long seed);
30 | 
31 | #endif // __RANDUTILS_H__
32 | 


--------------------------------------------------------------------------------
/tests/datamash-show-env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #   Unit Tests for GNU Datamash - perform simple calculation on input data
 4 | 
 5 | #    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 6 | #
 7 | #    This file is part of GNU Datamash.
 8 | #
 9 | #    GNU Datamash is free software: you can redistribute it and/or modify
10 | #    it under the terms of the GNU General Public License as published by
11 | #    the Free Software Foundation, either version 3 of the License, or
12 | #    (at your option) any later version.
13 | #
14 | #    GNU Datamash is distributed in the hope that it will be useful,
15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | #    GNU General Public License for more details.
18 | #
19 | #    You should have received a copy of the GNU General Public License
20 | #    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
21 | #
22 | #    Written by Assaf Gordon
23 | 
24 | ### DEBUG Helper to show the ENV
25 | echo "Debug Helper"
26 | echo "-----ENV------"
27 | env
28 | echo
29 | echo
30 | echo "-----PWD------"
31 | pwd
32 | echo
33 | echo
34 | exit 0
35 | 


--------------------------------------------------------------------------------
/src/die.h:
--------------------------------------------------------------------------------
 1 | /* Report an error and exit.
 2 |    Copyright 2016-2018 Free Software Foundation, Inc.
 3 | 
 4 |    This program is free software; you can redistribute it and/or modify
 5 |    it under the terms of the GNU General Public License as published by
 6 |    the Free Software Foundation; either version 3, or (at your option)
 7 |    any later version.
 8 | 
 9 |    This program is distributed in the hope that it will be useful,
10 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |    GNU General Public License for more details.
13 | 
14 |    You should have received a copy of the GNU General Public License
15 |    along with this program; if not, see https://www.gnu.org/licenses/.
16 |    */
17 | 
18 | #ifndef DIE_H
19 | # define DIE_H
20 | 
21 | # include <error.h>
22 | # include <stdbool.h>
23 | # include <verify.h>
24 | 
25 | /* Like 'error (STATUS, ...)', except STATUS must be a nonzero constant.
26 |    This may pacify the compiler or help it generate better code.  */
27 | # define die(status, ...) \
28 |   verify_expr (status, (error (status, __VA_ARGS__), assume (false)))
29 | 
30 | #endif /* DIE_H */
31 | 


--------------------------------------------------------------------------------
/po/en@quot.header:
--------------------------------------------------------------------------------
 1 | # All this catalog "translates" are quotation characters.
 2 | # The msgids must be ASCII and therefore cannot contain real quotation
 3 | # characters, only substitutes like grave accent (0x60), apostrophe (0x27)
 4 | # and double quote (0x22). These substitutes look strange; see
 5 | # http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html
 6 | #
 7 | # This catalog translates grave accent (0x60) and apostrophe (0x27) to
 8 | # left single quotation mark (U+2018) and right single quotation mark (U+2019).
 9 | # It also translates pairs of apostrophe (0x27) to
10 | # left single quotation mark (U+2018) and right single quotation mark (U+2019)
11 | # and pairs of quotation mark (0x22) to
12 | # left double quotation mark (U+201C) and right double quotation mark (U+201D).
13 | #
14 | # When output to an UTF-8 terminal, the quotation characters appear perfectly.
15 | # When output to an ISO-8859-1 terminal, the single quotation marks are
16 | # transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to
17 | # grave/acute accent (by libiconv), and the double quotation marks are
18 | # transliterated to 0x22.
19 | # When output to an ASCII terminal, the single quotation marks are
20 | # transliterated to apostrophes, and the double quotation marks are
21 | # transliterated to 0x22.
22 | #
23 | 


--------------------------------------------------------------------------------
/m4/ax_c_long_long.m4:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | #      https://www.gnu.org/software/autoconf-archive/ax_c_long_long.html
 3 | # ===========================================================================
 4 | #
 5 | # SYNOPSIS
 6 | #
 7 | #   AX_C_LONG_LONG
 8 | #
 9 | # DESCRIPTION
10 | #
11 | #   Provides a test for the existence of the long long int type and defines
12 | #   HAVE_LONG_LONG if it is found.
13 | #
14 | # LICENSE
15 | #
16 | #   Copyright (c) 2008 Caolan McNamara <caolan@skynet.ie>
17 | #
18 | #   Copying and distribution of this file, with or without modification, are
19 | #   permitted in any medium without royalty provided the copyright notice
20 | #   and this notice are preserved. This file is offered as-is, without any
21 | #   warranty.
22 | 
23 | #serial 7
24 | 
25 | AU_ALIAS([AC_C_LONG_LONG], [AX_C_LONG_LONG])
26 | AC_DEFUN([AX_C_LONG_LONG],
27 | [AC_CACHE_CHECK(for long long int, ac_cv_c_long_long,
28 | [if test "$GCC" = yes; then
29 |   ac_cv_c_long_long=yes
30 |   else
31 |     AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[long long int i;]])],
32 |       [ac_cv_c_long_long=yes],
33 |       [ac_cv_c_long_long=no])
34 |   fi])
35 |   if test $ac_cv_c_long_long = yes; then
36 |     AC_DEFINE([HAVE_LONG_LONG], 1, [compiler understands long long])
37 |   fi
38 | ])
39 | 


--------------------------------------------------------------------------------
/po/en@boldquot.header:
--------------------------------------------------------------------------------
 1 | # All this catalog "translates" are quotation characters.
 2 | # The msgids must be ASCII and therefore cannot contain real quotation
 3 | # characters, only substitutes like grave accent (0x60), apostrophe (0x27)
 4 | # and double quote (0x22). These substitutes look strange; see
 5 | # http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html
 6 | #
 7 | # This catalog translates grave accent (0x60) and apostrophe (0x27) to
 8 | # left single quotation mark (U+2018) and right single quotation mark (U+2019).
 9 | # It also translates pairs of apostrophe (0x27) to
10 | # left single quotation mark (U+2018) and right single quotation mark (U+2019)
11 | # and pairs of quotation mark (0x22) to
12 | # left double quotation mark (U+201C) and right double quotation mark (U+201D).
13 | #
14 | # When output to an UTF-8 terminal, the quotation characters appear perfectly.
15 | # When output to an ISO-8859-1 terminal, the single quotation marks are
16 | # transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to
17 | # grave/acute accent (by libiconv), and the double quotation marks are
18 | # transliterated to 0x22.
19 | # When output to an ASCII terminal, the single quotation marks are
20 | # transliterated to apostrophes, and the double quotation marks are
21 | # transliterated to 0x22.
22 | #
23 | # This catalog furthermore displays the text between the quotation marks in
24 | # bold face, assuming the VT100/XTerm escape sequences.
25 | #
26 | 


--------------------------------------------------------------------------------
/tests/datamash-io-errors-cheap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Unit Tests for GNU Datamash - simple I/O error simulation
 4 | 
 5 | # Copyright (C) 2022 Erik Auerswald <auerswal@unix-ag.uni-kl.de>
 6 | #
 7 | # This file is part of GNU Datamash.
 8 | #
 9 | # GNU Datamash is free software: you can redistribute it and/or modify
10 | # it under the terms of the GNU General Public License as published by
11 | # the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # GNU Datamash is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | # GNU General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU General Public License
20 | # along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
21 | #
22 | # Written by Erik Auerswald,
23 | # based on datamash-io-errors.sh written by Assaf Gordon
24 | 
25 | ##
26 | ## This script tests GNU Datamash's handling of basic I/O errors.
27 | ##
28 | 
29 | . "${test_dir=.}/init.sh"; path_prepend_ ./src
30 | 
31 | fail=0
32 | 
33 | ##
34 | ## This test requires the special file /dev/full
35 | ##
36 | test -w /dev/full || skip_ 'requires writable /dev/full'
37 | 
38 | ## Test 1: output error
39 | echo 0 | datamash -g 1 count 1 > /dev/full &&
40 | 	{ warn_ "datamash failed to detect no-space error" ; fail=1 ; }
41 | 
42 | Exit $fail
43 | 


--------------------------------------------------------------------------------
/tests/CuSkip.pm:
--------------------------------------------------------------------------------
 1 | package CuSkip;
 2 | # This file (CuSkip.pm) was copied from GNU Coretils.
 3 | # It has the following copyright notice and license:
 4 | 
 5 | # Skip a test: emit diag to log and to stderr, and exit 77
 6 | 
 7 | # Copyright (C) 2011-2013 Free Software Foundation, Inc.
 8 | 
 9 | # This program is free software: you can redistribute it and/or modify
10 | # it under the terms of the GNU General Public License as published by
11 | # the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | 
14 | # This program is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 | # GNU General Public License for more details.
18 | 
19 | # You should have received a copy of the GNU General Public License
20 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 | 
22 | use strict;
23 | use warnings;
24 | 
25 | our $ME = $0 || "<???>";
26 | 
27 | # Emit a diagnostic both to stderr and to $stderr_fileno_.
28 | # FIXME: don't hard-code that value (9), since it's already defined in init.cfg.
29 | sub skip ($)
30 | {
31 |   my ($msg) = @_;
32 |   my $stderr_fileno_ = 9;
33 |   warn $msg;
34 |   open FH, ">&$stderr_fileno_"
35 |     or warn "$ME: failed to dup stderr\n";
36 |   print FH $msg;
37 |   close FH
38 |     or warn "$ME: failed to close FD $stderr_fileno_\n";
39 |   exit 77;
40 | }
41 | 
42 | 1;
43 | 


--------------------------------------------------------------------------------
/src/randutils.c:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
 4 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
 5 | 
 6 |    This file is part of GNU Datamash.
 7 | 
 8 |    GNU Datamash is free software: you can redistribute it and/or modify
 9 |    it under the terms of the GNU General Public License as published by
10 |    the Free Software Foundation, either version 3 of the License, or
11 |    (at your option) any later version.
12 | 
13 |    GNU Datamash is distributed in the hope that it will be useful,
14 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |    GNU General Public License for more details.
17 | 
18 |    You should have received a copy of the GNU General Public License
19 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
20 | */
21 | 
22 | #include <config.h>
23 | #include <errno.h>
24 | #include <stdio.h>
25 | #include <stdlib.h>
26 | #include <string.h>
27 | #include <time.h>
28 | #include <unistd.h>
29 | #include <sys/random.h>
30 | 
31 | #include "randutils.h"
32 | 
33 | void
34 | init_random (bool force_seed, unsigned long seed)
35 | {
36 |   if (!force_seed)
37 |     {
38 |       errno = 0;
39 |       ssize_t nbytes = getrandom (&seed, sizeof (seed), 0);
40 |       if (nbytes == -1 || errno != 0)
41 |         {
42 |           fprintf (stderr, "Error %d: %s\n", errno, strerror (errno));
43 |         }
44 |     }
45 |   srandom (seed);
46 | }
47 | 


--------------------------------------------------------------------------------
/doc/local.mk:
--------------------------------------------------------------------------------
 1 | # Make GNU Datamash documentation.				-*-Makefile-*-
 2 | # This is included by the top-level Makefile.am.
 3 | 
 4 | # Based on GNU Hello:
 5 | #   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
 6 | #   2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free
 7 | #   Software Foundation, Inc.
 8 | 
 9 | # Modifications for GNU Datamash are
10 | # Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
11 | 
12 | # This program is free software: you can redistribute it and/or modify
13 | # it under the terms of the GNU General Public License as published by
14 | # the Free Software Foundation, either version 3 of the License, or
15 | # (at your option) any later version.
16 | 
17 | # This program is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 | # GNU General Public License for more details.
21 | 
22 | # You should have received a copy of the GNU General Public License
23 | # along with this program.  If not, see <https://www.gnu.org/licenses/>.
24 | 
25 | info_TEXINFOS = doc/datamash.texi
26 | EXTRA_DIST += doc/datamash-texinfo.css
27 | 
28 | # For the 'make html' target - generate a single HTML file
29 | # and embed the CSS statements in it.
30 | AM_MAKEINFOHTMLFLAGS = --no-split \
31 | 	--css-include=$(top_srcdir)/doc/datamash-texinfo.css
32 | 
33 | # Changes to the CSS should trigger a new HTML regeneration
34 | $(top_builddir)/doc/datamash.html: $(top_srcdir)/doc/datamash-texinfo.css
35 | 
36 | doc_datamash_TEXINFOS = \
37 |   doc/fdl.texi
38 | 


--------------------------------------------------------------------------------
/src/crosstab.h:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | 
 5 |    This file is part of GNU Datamash.
 6 | 
 7 |    GNU Datamash is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    GNU Datamash is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | 
21 | /* Written by Assaf Gordon */
22 | #ifndef __CROSSTAB_H__
23 | #define __CROSSTAB_H__
24 | 
25 | struct crosstab
26 | {
27 |   Hash_table *rows;
28 |   Hash_table *columns;
29 |   Hash_table *data;
30 | };
31 | 
32 | struct crosstab_datacell
33 | {
34 |   const char* row_name;
35 |   const char* col_name;
36 |   const char* data;
37 | };
38 | 
39 | struct crosstab_data_cell*
40 | crosstab_new_datacell (const char* row, const char* col, const char* data);
41 | 
42 | struct crosstab*
43 | crosstab_init ();
44 | 
45 | void
46 | crosstab_add_result (struct crosstab* ct,
47 |                       const char* row, const char* col, const char* data);
48 | 
49 | void
50 | crosstab_print (const struct crosstab* ct);
51 | 
52 | void
53 | crosstab_free (struct crosstab* ct);
54 | 
55 | #endif /* __CROSSTAB_H__ */
56 | 


--------------------------------------------------------------------------------
/examples/make_genes_example.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | ## Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | ##
 5 | ## This file is part of GNU Datamash.
 6 | ##
 7 | ## GNU Datamash is free software: you can redistribute it and/or modify
 8 | ## it under the terms of the GNU General Public License as published by
 9 | ## the Free Software Foundation, either version 3 of the License, or
10 | ## (at your option) any later version.
11 | ##
12 | ## GNU Datamash is distributed in the hope that it will be useful,
13 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | ## GNU General Public License for more details.
16 | ##
17 | ## You should have received a copy of the GNU General Public License
18 | ## along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | 
21 | ##
22 | ## A short script to general a sample of genes based on HG19/RefSeq file.
23 | ##
24 | 
25 | if [ ! -e "refGene.txt" ] ; then
26 | 	wget http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz || exit 1
27 | 	gunzip refGene.txt.gz || exit 1
28 | fi
29 | 
30 | (cat refGene.txt |
31 | 		sort -k13,13 |
32 | 		../datamash -g 13 countunique 3 countunique 4 |
33 | 		awk '$2>1 || $3>1' | sort -R | head -n 100 | cut -f1 -d " " ;
34 | 	   cut -f13 refGene.txt | sort -R -u | head -n 1000 ) |
35 | 		 sort -u > genelist.txt
36 | 
37 | grep -F -f genelist.txt refGene.txt | grep -E -v "chrUn|hap" > genes.txt
38 | 
39 | ( echo "bin
40 | name
41 | chrom
42 | strand
43 | txStart
44 | txEnd
45 | cdsStart
46 | cdsEnd
47 | exonCount
48 | exonStarts
49 | exonEnds
50 | score
51 | name2
52 | cdsStartStat
53 | cdsEndStat
54 | exonFrames" | paste -s -d '	' ; cat genes.txt ) > genes_h.txt
55 | 
56 | rm -f genelist.txt
57 | 


--------------------------------------------------------------------------------
/src/op-scanner.h:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | 
 5 |    This file is part of GNU Datamash.
 6 | 
 7 |    GNU Datamash is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    GNU Datamash is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | 
21 | /* Written by Assaf Gordon */
22 | #ifndef __OP_SCANNER_H__
23 | #define __OP_SCANNER_H__
24 | 
25 | #define MAX_IDENTIFIER_LENGTH 512
26 | 
27 | enum TOKEN
28 | {
29 |   TOK_END=0,
30 |   TOK_IDENTIFIER,
31 |   TOK_INTEGER,
32 |   TOK_FLOAT,
33 |   TOK_COMMA,
34 |   TOK_DASH,
35 |   TOK_COLONS,
36 |   TOK_WHITESPACE
37 | };
38 | 
39 | extern uintmax_t scan_val_int;
40 | extern long double scan_val_float;
41 | extern char* scanner_identifier;
42 | extern bool scanner_keep_whitespace;
43 | 
44 | /* Initialize the scanner from argc/argv pair.
45 |    note: argv should contain only the actual input: remove
46 |          any other program parameters (including progname/argv[0]) */
47 | void
48 | scanner_set_input_from_argv (int argc, const char* argv[]);
49 | 
50 | /* Free any data/memory associated with the scanner */
51 | void
52 | scanner_free ();
53 | 
54 | enum TOKEN
55 | scanner_get_token ();
56 | 
57 | enum TOKEN
58 | scanner_peek_token ();
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/man/decorate.x:
--------------------------------------------------------------------------------
 1 | ." GNU decorate - manual page
 2 | ." Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 3 | [NAME]
 4 | decorate - convert fields of various formats
 5 | 
 6 | 
 7 | [>DESCRIPTION]
 8 | The \fBdecorate\fR program allows sorting input according to various
 9 | ordering, e.g. IP addresses, roman numerals, etc.
10 | It works in tandem with sort(1) to perform the actual sorting.
11 | 
12 | The idea was suggested by
13 | .UR https://lists.gnu.org/r/bug-coreutils/2015-06/msg00076.html
14 | Pádraig Brady in https://lists.gnu.org/r/bug-coreutils/2015-06/msg00076.html:
15 | 
16 | 1. Decorate: convert the input to a sortable-format as additional fields
17 | .br
18 | 2. Sort according to the inserted fields
19 | .br
20 | 3. Undecorate: remove the inserted fields
21 | 
22 | [=EXAMPLES]
23 | Example of preparing to sort by roman numerals:
24 | .PP
25 | .nf
26 | .RS
27 | $ printf "%s\\n" C V III IX XI | \fBdecorate\fR \-k1,1:roman \-\-decorate
28 | 0000100 C
29 | 0000005 V
30 | 0000003 III
31 | 0000009 IX
32 | 0000011 XI
33 | .RE
34 | .fi
35 | .PP
36 | 
37 | The output can now be sent to sort(1), followed by removing (=undecorate)
38 | the first field.
39 | 
40 | .PP
41 | .nf
42 | .RS
43 | $ printf "%s\\n" C V III IX XI \\
44 |        | \fBdecorate\fR \-k1,1:roman \-\-decorate \\
45 |        | sort \-k1,1 \\
46 |        | \fBdecorate\fR \-\-undecorate 1
47 | III
48 | V
49 | IX
50 | XI
51 | C
52 | .RE
53 | .fi
54 | .PP
55 | 
56 | \fBdecorate(1)\fR can automatically combine the decorate-sort-undecorate steps
57 | (when run without \-\-decorate or \-\-undecorate):
58 | 
59 | .PP
60 | .nf
61 | .RS
62 | $ printf "%s\\n" C V III IX XI | \fBdecorate\fR \-k1,1:roman
63 | III
64 | V
65 | IX
66 | XI
67 | C
68 | .RE
69 | .fi
70 | .PP
71 | 
72 | 
73 | 
74 | 
75 | 
76 | [ADDITIONAL INFORMATION]
77 | See
78 | .UR https://www.gnu.org/software/datamash
79 | GNU Datamash Website (https://www.gnu.org/software/datamash)
80 | 


--------------------------------------------------------------------------------
/examples/make_score_example.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | ## Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | ##
 5 | ## This file is part of Compute.
 6 | ##
 7 | ## Compute is free software: you can redistribute it and/or modify
 8 | ## it under the terms of the GNU General Public License as published by
 9 | ## the Free Software Foundation, either version 3 of the License, or
10 | ## (at your option) any later version.
11 | ##
12 | ## Compute is distributed in the hope that it will be useful,
13 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | ## GNU General Public License for more details.
16 | ##
17 | ## You should have received a copy of the GNU General Public License
18 | ## along with Compute.  If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | ##
21 | ## A short R script to generate random data for the 'scores' example.
22 | ##
23 | library(randomNames)
24 | 
25 | gen_data = function(count,group_name,mean,sd)
26 | {
27 |   return(
28 |      data.frame(Name=gsub(" ","-",randomNames(count,gender="M",which.names="first")),
29 |                 Major=rep(group_name,count),
30 |                 Score=pmin(round(rnorm(count, mean=mean,sd=sd)),100)))
31 | }
32 | 
33 | exp = rbind(
34 | 	gen_data( runif(1,min=10,max=20), "Arts", runif(1,min=50,max=90), runif(1,min=5,max=20) ),
35 | 	gen_data( runif(1,min=10,max=20), "Business", runif(1,min=50,max=90), runif(1,min=5,max=20) ),
36 | 	gen_data( runif(1,min=10,max=20), "Health-Medicine", runif(1,min=50,max=90), runif(1,min=5,max=20) ),
37 | 	gen_data( runif(1,min=10,max=20), "Social-Sciences", runif(1,min=50,max=90), runif(1,min=5,max=20) ),
38 | 	gen_data( runif(1,min=10,max=20), "Life-Sciences", runif(1,min=50,max=90), runif(1,min=5,max=20) ),
39 | 	gen_data( runif(1,min=10,max=20), "Engineering", runif(1,min=50,max=90), runif(1,min=5,max=20) )
40 |        )
41 | 
42 | write.table(exp,file="scores.txt",sep="\t",row.names=FALSE,col.names=FALSE,quote=FALSE);
43 | write.table(exp,file="scores_h.txt",sep="\t",row.names=FALSE,col.names=T,quote=FALSE);
44 | 


--------------------------------------------------------------------------------
/tests/datamash-rand.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | =pod
 3 |    Unit Tests for GNU Datamash - tests rand operations
 4 | 
 5 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
 6 | 
 7 |    This file is part of GNU Datamash.
 8 | 
 9 |    GNU Datamash is free software: you can redistribute it and/or modify
10 |    it under the terms of the GNU General Public License as published by
11 |    the Free Software Foundation, either version 3 of the License, or
12 |    (at your option) any later version.
13 | 
14 |    GNU Datamash is distributed in the hope that it will be useful,
15 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
16 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 |    GNU General Public License for more details.
18 | 
19 |    You should have received a copy of the GNU General Public License
20 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
21 | 
22 |    Written by Tim Rice.
23 | =cut
24 | use strict;
25 | use warnings;
26 | 
27 | # Until a better way comes along to auto-use Coreutils Perl modules
28 | # as in the coreutils' autotools system.
29 | use Coreutils;
30 | use CuSkip;
31 | use CuTmpdir qw(datamash);
32 | 
33 | (my $program_name = $0) =~ s|.*/||;
34 | my $prog_bin = 'datamash';
35 | 
36 | ## Cross-Compiling portability hack:
37 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
38 | ##  the full path of the binary, if the binary is on the $PATH.
39 | ##  So we try to detect what is the actual returned value of the program
40 | ##  in case of an error.
41 | my $prog = `$prog_bin ---print-progname`;
42 | $prog = $prog_bin unless $prog;
43 | 
44 | # Turn off localization of executable's output.
45 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
46 | 
47 | my $in=<<'EOF';
48 | A A
49 | A B
50 | B A
51 | B B
52 | EOF
53 | 
54 | my $out=<<'EOF';
55 | A	B
56 | B	A
57 | EOF
58 | 
59 | my @Tests =
60 | (
61 |   ['r1',  '-W -S0 groupby 1 rand 2',  {IN_PIPE=>$in}, {OUT=>$out}],
62 | );
63 | 
64 | my $save_temps = $ENV{SAVE_TEMPS};
65 | my $verbose = $ENV{VERBOSE};
66 | 
67 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
68 | exit $fail;
69 | 


--------------------------------------------------------------------------------
/examples/scores.txt:
--------------------------------------------------------------------------------
 1 | Shawn	Arts	65
 2 | Marques	Arts	58
 3 | Fernando	Arts	78
 4 | Paul	Arts	63
 5 | Walter	Arts	75
 6 | Derek	Arts	60
 7 | Nathaniel	Arts	88
 8 | Tyreque	Arts	74
 9 | Trevon	Arts	74
10 | Nathan	Arts	71
11 | Zachary	Arts	80
12 | Donovan	Arts	75
13 | Levi	Arts	76
14 | Sage	Arts	55
15 | Roberto	Arts	65
16 | William	Arts	46
17 | Nico	Arts	59
18 | Bryan	Arts	68
19 | Isaiah	Arts	80
20 | David	Business	92
21 | Leonard	Business	87
22 | Tysza	Business	92
23 | Darren	Business	94
24 | Christian	Business	88
25 | Aaron	Business	83
26 | Kerris	Business	82
27 | Dakota	Business	83
28 | Teriuse	Business	94
29 | Caleb	Business	87
30 | Juan	Business	79
31 | Andre	Health-Medicine	72
32 | Diego	Health-Medicine	82
33 | Jonathan	Health-Medicine	100
34 | Kevin	Health-Medicine	100
35 | Patrick	Health-Medicine	92
36 | D'Angelo	Health-Medicine	90
37 | Daniel	Health-Medicine	91
38 | Dilan	Health-Medicine	84
39 | Angel	Health-Medicine	100
40 | Peter	Health-Medicine	86
41 | Dalton	Health-Medicine	100
42 | Israel	Health-Medicine	81
43 | Gabriel	Health-Medicine	100
44 | Chase	Social-Sciences	27
45 | Leroy	Social-Sciences	74
46 | Jesse	Social-Sciences	32
47 | Drake	Social-Sciences	76
48 | Ja'Won	Social-Sciences	37
49 | Joel	Social-Sciences	72
50 | Darius	Social-Sciences	51
51 | David	Social-Sciences	69
52 | Williams	Social-Sciences	62
53 | Manuel	Social-Sciences	61
54 | Lance	Social-Sciences	65
55 | Drake	Social-Sciences	59
56 | Joseph	Social-Sciences	61
57 | Randy	Social-Sciences	68
58 | Justin	Social-Sciences	90
59 | Yeng	Life-Sciences	39
60 | Allen	Life-Sciences	50
61 | Brandon	Life-Sciences	72
62 | Christian	Life-Sciences	67
63 | Aaron	Life-Sciences	58
64 | Gurnam	Life-Sciences	66
65 | Anthony	Life-Sciences	32
66 | Joshua	Life-Sciences	14
67 | Nathen	Life-Sciences	46
68 | Christopher	Life-Sciences	59
69 | John	Life-Sciences	70
70 | Austin	Life-Sciences	91
71 | Antonio	Engineering	88
72 | Faison	Engineering	47
73 | Devin	Engineering	92
74 | Ignatius	Engineering	83
75 | Sonny	Engineering	50
76 | Antonio	Engineering	56
77 | Zackery	Engineering	54
78 | Joe'Quann	Engineering	75
79 | Thanh	Engineering	53
80 | Michael	Engineering	39
81 | Leonardo	Engineering	78
82 | Omar	Engineering	99
83 | Avery	Engineering	51
84 | 


--------------------------------------------------------------------------------
/examples/scores_h.txt:
--------------------------------------------------------------------------------
 1 | Name	Major	Score
 2 | Shawn	Arts	65
 3 | Marques	Arts	58
 4 | Fernando	Arts	78
 5 | Paul	Arts	63
 6 | Walter	Arts	75
 7 | Derek	Arts	60
 8 | Nathaniel	Arts	88
 9 | Tyreque	Arts	74
10 | Trevon	Arts	74
11 | Nathan	Arts	71
12 | Zachary	Arts	80
13 | Donovan	Arts	75
14 | Levi	Arts	76
15 | Sage	Arts	55
16 | Roberto	Arts	65
17 | William	Arts	46
18 | Nico	Arts	59
19 | Bryan	Arts	68
20 | Isaiah	Arts	80
21 | David	Business	92
22 | Leonard	Business	87
23 | Tysza	Business	92
24 | Darren	Business	94
25 | Christian	Business	88
26 | Aaron	Business	83
27 | Kerris	Business	82
28 | Dakota	Business	83
29 | Teriuse	Business	94
30 | Caleb	Business	87
31 | Juan	Business	79
32 | Andre	Health-Medicine	72
33 | Diego	Health-Medicine	82
34 | Jonathan	Health-Medicine	100
35 | Kevin	Health-Medicine	100
36 | Patrick	Health-Medicine	92
37 | D'Angelo	Health-Medicine	90
38 | Daniel	Health-Medicine	91
39 | Dilan	Health-Medicine	84
40 | Angel	Health-Medicine	100
41 | Peter	Health-Medicine	86
42 | Dalton	Health-Medicine	100
43 | Israel	Health-Medicine	81
44 | Gabriel	Health-Medicine	100
45 | Chase	Social-Sciences	27
46 | Leroy	Social-Sciences	74
47 | Jesse	Social-Sciences	32
48 | Drake	Social-Sciences	76
49 | Ja'Won	Social-Sciences	37
50 | Joel	Social-Sciences	72
51 | Darius	Social-Sciences	51
52 | David	Social-Sciences	69
53 | Williams	Social-Sciences	62
54 | Manuel	Social-Sciences	61
55 | Lance	Social-Sciences	65
56 | Drake	Social-Sciences	59
57 | Joseph	Social-Sciences	61
58 | Randy	Social-Sciences	68
59 | Justin	Social-Sciences	90
60 | Yeng	Life-Sciences	39
61 | Allen	Life-Sciences	50
62 | Brandon	Life-Sciences	72
63 | Christian	Life-Sciences	67
64 | Aaron	Life-Sciences	58
65 | Gurnam	Life-Sciences	66
66 | Anthony	Life-Sciences	32
67 | Joshua	Life-Sciences	14
68 | Nathen	Life-Sciences	46
69 | Christopher	Life-Sciences	59
70 | John	Life-Sciences	70
71 | Austin	Life-Sciences	91
72 | Antonio	Engineering	88
73 | Faison	Engineering	47
74 | Devin	Engineering	92
75 | Ignatius	Engineering	83
76 | Sonny	Engineering	50
77 | Antonio	Engineering	56
78 | Zackery	Engineering	54
79 | Joe'Quann	Engineering	75
80 | Thanh	Engineering	53
81 | Michael	Engineering	39
82 | Leonardo	Engineering	78
83 | Omar	Engineering	99
84 | Avery	Engineering	51
85 | 


--------------------------------------------------------------------------------
/tests/decorate-valgrind.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #   Unit Tests for GNU Decorate - auxiliary program for sort preprocessing
 4 | 
 5 | #    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 6 | #    Copyright (C) 2025 Erik Auerswald <auerswal@unix-ag.uni-kl.de>
 7 | #
 8 | #    This file is part of GNU Datamash.
 9 | #
10 | #    GNU Datamash is free software: you can redistribute it and/or modify
11 | #    it under the terms of the GNU General Public License as published by
12 | #    the Free Software Foundation, either version 3 of the License, or
13 | #    (at your option) any later version.
14 | #
15 | #    GNU Datamash is distributed in the hope that it will be useful,
16 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 | #    GNU General Public License for more details.
19 | #
20 | #    You should have received a copy of the GNU General Public License
21 | #    along with GNU Datamash  If not, see <https://www.gnu.org/licenses/>.
22 | #
23 | #    Written by Assaf Gordon.
24 | 
25 | . "${test_dir=.}/init.sh"; path_prepend_ ./src
26 | 
27 | require_valgrind_
28 | 
29 | ## Don't use valgrind on statically-compiled binary
30 | ## (it gives some false-positives and the test fails).
31 | if which ldd >/dev/null ; then
32 |   ## Tricky implicit assumption:
33 |   ## If the system has "ldd" - we can test if this is a static binary.
34 |   ## If the system doesn't have "ldd", we can't test it, and we'll assume
35 |   ## we can valgrind without false-positives.
36 |   ## This is relevant for Mac OS X, where static binaries are discouraged and
37 |   ## difficult to create
38 |   ## (https://developer.apple.com/library/mac/qa/qa1118/_index.html)
39 |   ldd $(which decorate) >/dev/null 2>/dev/null ||
40 |     skip_ "skipping valgrind test for a non-dynamic-binary decorate"
41 | fi
42 | 
43 | 
44 | fail=0
45 | 
46 | # check fix for buffer under-read (CWE-127) reported by Frank Busse in
47 | # <https://lists.gnu.org/archive/html/bug-datamash/2025-10/msg00000.html>
48 | echo | valgrind --error-exitcode=1 decorate --undecorate 6 > /dev/null ||
49 |   { warn_ "--undecorate 6 buffer under-read - failed" ; fail=1 ; }
50 | 
51 | Exit $fail
52 | 


--------------------------------------------------------------------------------
/src/column-headers.h:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | 
 5 |    This file is part of GNU Datamash.
 6 | 
 7 |    GNU Datamash is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    GNU Datamash is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | 
21 | /* Written by Assaf Gordon */
22 | #ifndef __COLUMN_HEADERS_H__
23 | #define __COLUMN_HEADERS_H__
24 | 
25 | /*
26 |  Column Headers Module
27 | */
28 | 
29 | /*
30 |   Given a parsed line (representing the header line),
31 |   sets the column names.
32 | 
33 |   if 'store_names' is true,
34 |       stores the name of each field as the column header.
35 |   if 'store_names' is false,
36 |       simply counts the number of fields in the input line.
37 |  */
38 | void
39 | build_input_line_headers (const struct line_record_t *lr, bool store_names);
40 | 
41 | /*
42 |  returns the number of fields as extracted by 'build_input_line_headers ()'
43 |  */
44 | size_t
45 | get_num_column_headers ();
46 | 
47 | /*
48 |  returns the name of column 'field_num' (1 == first field).
49 | 
50 |  If 'store_names' (above) was true, returns the name of the column as
51 |     appeared in the first input line.
52 |  If 'store_names' (above) was false, returns 'field-X'.
53 | 
54 |  The returned string must not be modified (or free'd).
55 | */
56 | const char*
57 | get_input_field_name (size_t field_num);
58 | 
59 | 
60 | /* returns field number (1== first field)
61 |    which matches the given field name.
62 | 
63 |    returns ZERO if no such field found. */
64 | size_t
65 | get_input_field_number (const char* field_name);
66 | 
67 | void
68 | free_column_headers ();
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/init.cfg:
--------------------------------------------------------------------------------
 1 | # This file is sourced by init.sh, *before* its initialization.
 2 | 
 3 | ##
 4 | ## The file was copied from GNU coreutils, with the following license:
 5 | ##
 6 | 
 7 | # Copyright (C) 2010-2014 Free Software Foundation, Inc.
 8 | 
 9 | # Modifications for GNU Datamash are
10 | # Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
11 | 
12 | # This program is free software: you can redistribute it and/or modify
13 | # it under the terms of the GNU General Public License as published by
14 | # the Free Software Foundation, either version 3 of the License, or
15 | # (at your option) any later version.
16 | 
17 | # This program is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 | # GNU General Public License for more details.
21 | 
22 | # You should have received a copy of the GNU General Public License
23 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
24 | 
25 | # This goes hand in hand with the "exec 9>&2;" in tests/Makefile.am's
26 | # TESTS_ENVIRONMENT definition.
27 | stderr_fileno_=9
28 | 
29 | # Skip the current test if valgrind doesn't work,
30 | # which could happen if not installed,
31 | # or hasn't support for the built architecture,
32 | # or hasn't appropriate error suppressions installed etc.
33 | require_valgrind_()
34 | {
35 |   valgrind --error-exitcode=1 true 2>/dev/null ||
36 |     skip_ "requires a working valgrind"
37 | }
38 | 
39 | # Skip the current test if 'paste' doesn't work.
40 | # Alpine linux does not have 'paste' in the default minimal installation.
41 | require_paste_()
42 | {
43 |   paste - </dev/null 2>/dev/null ||
44 |     skip_ "requires a working paste(1)"
45 | }
46 | 
47 | 
48 | openbsd_seq_replacement_()
49 | {
50 |   ## Wrap jot on OpenBSD since it doesn't have seq
51 |   test "$(uname -s)" = OpenBSD || return
52 | 
53 |   seq()
54 |   {
55 |     if [ $# -eq 1 ]; then
56 |       jot "$1"
57 |     elif [ $# -eq 2 ]; then
58 |       jot - "$1" "$2"
59 |     fi
60 |   }
61 | }
62 | 
63 | 
64 | expensive_()
65 | {
66 |   if test "$RUN_EXPENSIVE_TESTS" != yes; then
67 |     skip_ 'expensive: disabled by default
68 | This test is relatively expensive, so it is disabled by default.
69 | To run it anyway, rerun make check with the RUN_EXPENSIVE_TESTS
70 | environment variable set to yes.  E.g.,
71 | 
72 |   env RUN_EXPENSIVE_TESTS=yes make check
73 | 
74 | or use the shortcut target of the toplevel Makefile,
75 | 
76 |   make check-expensive
77 | '
78 |   fi
79 | }
80 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.txt:
--------------------------------------------------------------------------------
 1 | Please **do not** send pull-requests or open new issues on Github.
 2 | 
 3 | Github is a downstream mirror and is not frequently monitored,
 4 | all development is coordinated upstream on GNU resources.
 5 | 
 6 | Send general questions, suggestions or bug reports to:
 7 |      bug-datamash@gnu.org
 8 | 
 9 | Before reporting a new bug, please previous discussions and bug reports
10 | on the Datamash Mailing list:
11 |      https://lists.gnu.org/r/bug-datamash/
12 | 
13 | ## Effective bug reports
14 | 
15 | * Include a descriptive subject line (e.g. what the problem is).
16 | * Include the version (i.e. the output of `datamash --version`).
17 | * Include the operating system and the type of hardware you are using
18 |   (e.g. the output of `uname -a`).
19 | * Include the exact command and parameters you have used.
20 | * Clearly explain what is the output you expected to get, and what is
21 |   the actual result you encountered.
22 | * Include as much information as possible to reproduce the problem.
23 |   If the problem happens on a very large input file, try to provide
24 |   a minimal example (a subset of the input file) that still causes the problem.
25 |   *Do not* include attachments over 40kB.
26 | * List policy is reply-to-all, and non-subscribers may post.
27 | * There may be a moderation delay for a first-time post, whether or not
28 |   you subscribe.
29 | 
30 | 
31 | ## Mailing List Etiquette
32 | 
33 | When sending messages to bug-datamash@gnu.org :
34 | 
35 | * Send messages as plain text.
36 | * Do not send messages encoded as HTML nor encoded as base64 MIME nor
37 |   included as multiple formats.
38 | * Avoid sending large messages, such as log files, system call trace
39 |   output, and other content resulting in messages over about 40 kB.
40 | * Avoid sending screenshots (e.g. PNG files). When reporting errors
41 |   you encounter on the terminal, copy and paste the text to your message.
42 | 
43 | 
44 | 
45 | 
46 | 
47 | Copyright (C) 2017-2021 Assaf Gordon <assafgordon@gmail.com>
48 | 
49 | This program is free software: you can redistribute it and/or modify
50 | it under the terms of the GNU General Public License as published by
51 | the Free Software Foundation, either version 3 of the License, or
52 | (at your option) any later version.
53 | 
54 | This program is distributed in the hope that it will be useful,
55 | but WITHOUT ANY WARRANTY; without even the implied warranty of
56 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
57 | GNU General Public License for more details.
58 | 
59 | You should have received a copy of the GNU General Public License
60 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
61 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.txt:
--------------------------------------------------------------------------------
 1 | Please **do not** send pull-requests or open new issues on Github.
 2 | 
 3 | Github is a downstream mirror and is not frequently monitored,
 4 | all development is coordinated upstream on GNU resources.
 5 | 
 6 | Send general questions, suggestions or bug reports to:
 7 |      bug-datamash@gnu.org
 8 | 
 9 | Before reporting a new bug, please previous discussions and bug reports
10 | on the Datamash Mailing list:
11 |      https://lists.gnu.org/r/bug-datamash/
12 | 
13 | ## Effective bug reports
14 | 
15 | * Include a descriptive subject line (e.g. what the problem is).
16 | * Include the version (i.e. the output of `datamash --version`).
17 | * Include the operating system and the type of hardware you are using
18 |   (e.g. the output of `uname -a`).
19 | * Include the exact command and parameters you have used.
20 | * Clearly explain what is the output you expected to get, and what is
21 |   the actual result you encountered.
22 | * Include as much information as possible to reproduce the problem.
23 |   If the problem happens on a very large input file, try to provide
24 |   a minimal example (a subset of the input file) that still causes the problem.
25 |   *Do not* include attachments over 40kB.
26 | * List policy is reply-to-all, and non-subscribers may post.
27 | * There may be a moderation delay for a first-time post, whether or not
28 |   you subscribe.
29 | 
30 | 
31 | ## Mailing List Etiquette
32 | 
33 | When sending messages to bug-datamash@gnu.org :
34 | 
35 | * Send messages as plain text.
36 | * Do not send messages encoded as HTML nor encoded as base64 MIME nor
37 |   included as multiple formats.
38 | * Avoid sending large messages, such as log files, system call trace
39 |   output, and other content resulting in messages over about 40 kB.
40 | * Avoid sending screenshots (e.g. PNG files). When reporting errors
41 |   you encounter on the terminal, copy and paste the text to your message.
42 | 
43 | 
44 | 
45 | 
46 | 
47 | Copyright (C) 2017-2021 Assaf Gordon <assafgordon@gmail.com>
48 | 
49 | This program is free software: you can redistribute it and/or modify
50 | it under the terms of the GNU General Public License as published by
51 | the Free Software Foundation, either version 3 of the License, or
52 | (at your option) any later version.
53 | 
54 | This program is distributed in the hope that it will be useful,
55 | but WITHOUT ANY WARRANTY; without even the implied warranty of
56 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
57 | GNU General Public License for more details.
58 | 
59 | You should have received a copy of the GNU General Public License
60 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
61 | 


--------------------------------------------------------------------------------
/src/text-lines.h:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | 
 5 |    This file is part of GNU Datamash.
 6 | 
 7 |    GNU Datamash is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    GNU Datamash is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | 
21 | /* Written by Assaf Gordon */
22 | #ifndef __TEXT_LINES_H__
23 | #define __TEXT_LINES_H__
24 | 
25 | struct field_record_t
26 | {
27 |   size_t len;
28 |   const char*  buf;
29 | };
30 | 
31 | struct line_record_t
32 | {
33 |   /* buffer of the entire line, as created with gnulib's
34 |      readlinbuffer_delim */
35 |   struct linebuffer lbuf;
36 | 
37 |   /* array of fields. Each valid field is a pointer to 'lbuf' */
38 |   struct field_record_t *fields;
39 |   size_t num_fields;    /* number of fields in this line */
40 |   size_t alloc_fields;  /* number of fields allocated */
41 | };
42 | 
43 | static inline size_t
44 | line_record_length (const struct line_record_t *lr)
45 | {
46 |   return lr->lbuf.length;
47 | }
48 | 
49 | static inline const char*
50 | line_record_buffer (const struct line_record_t *lr)
51 | {
52 |   return lr->lbuf.buffer;
53 | }
54 | 
55 | static inline size_t
56 | line_record_num_fields (const struct line_record_t *lr)
57 | {
58 |   return lr->num_fields;
59 | }
60 | 
61 | static inline const struct field_record_t*
62 | line_record_field_unsafe (const struct line_record_t *lr, const size_t n)
63 | {
64 |   return &lr->fields[n-1];
65 | }
66 | 
67 | static inline bool
68 | line_record_get_field (const struct line_record_t *lr, const size_t n,
69 |                        const char ** /* out */ pptr, size_t* /*out*/ plen)
70 | {
71 |   assert (n!=0); /* LCOV_EXCL_LINE */
72 |   if (line_record_num_fields (lr) < n)
73 |     return false;
74 | 
75 |   *pptr = lr->fields[n-1].buf;
76 |   *plen = lr->fields[n-1].len;
77 |   return true;
78 | }
79 | 
80 | void
81 | line_record_init (struct line_record_t* lr);
82 | 
83 | bool
84 | line_record_fread (struct /* in/out */ line_record_t* lr,
85 |                    FILE *stream, char delimiter, bool skip_comments,
86 |                    bool vnlog_prologue);
87 | 
88 | void
89 | line_record_free (struct line_record_t* lr);
90 | 
91 | #endif
92 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /ABOUT-NLS
  2 | /ABOUT-NLS~
  3 | *~
  4 | .*.swp
  5 | 
  6 | .version
  7 | 
  8 | aclocal.m4
  9 | autom4te.cache/*
 10 | # sym-linked macro files, autogenerated by libtoolize:
 11 | m4/libtool.m4
 12 | m4/ltoptions.m4
 13 | m4/ltsugar.m4
 14 | m4/ltversion.m4
 15 | m4/gnulib-cache.m4
 16 | m4/lt~obsolete.m4
 17 | ABOUT-NLS
 18 | build-aux/
 19 | 
 20 | 
 21 | # Autogenerate man page
 22 | datamash.1
 23 | decorate.1
 24 | 
 25 | config.h
 26 | config.h.in
 27 | config.in
 28 | config.log
 29 | config.status
 30 | config.cache
 31 | configure
 32 | libtool
 33 | 
 34 | */.deps/*
 35 | 
 36 | src/.dirstamp
 37 | 
 38 | config/compile
 39 | config/config.guess
 40 | config/config.sub
 41 | config/depcomp
 42 | config/install-sh
 43 | config/ltmain.sh
 44 | config/missing
 45 | 
 46 | Makefile
 47 | */Makefile
 48 | Makefile.in
 49 | */Makefile.in
 50 | 
 51 | stamp-h1
 52 | *.o
 53 | 
 54 | ChangeLog
 55 | INSTALL
 56 | 
 57 | # Jekyll's _site
 58 | _site/
 59 | bin/
 60 | 
 61 | # gnulib modules, auto-generated in "./reconf"
 62 | lib/
 63 | 
 64 | # getext files
 65 | po/*.pot
 66 | po/Makefile.in
 67 | po/Makevars
 68 | po/.gitignore
 69 | po/stamp-po
 70 | po/POTFILES
 71 | po/Rules-quot
 72 | po/remove-potcdate.sed
 73 | po/*.po
 74 | po/*.gmo
 75 | po/LINGUAS
 76 | po/.reference
 77 | 
 78 | test-suite.log
 79 | 
 80 | # compiled binary
 81 | /datamash
 82 | /decorate
 83 | # compiled binary (with mingw cross-compiling)
 84 | /datamash.exe
 85 | /decorate.exe
 86 | 
 87 | # Documentation files
 88 | doc/.dirstamp
 89 | doc/.gitignore
 90 | doc/datamash.info
 91 | doc/datamash.pdf
 92 | doc/datamash.t2p
 93 | doc/stamp-vti
 94 | doc/version.texi
 95 | # Auto-generated coverage info with 'make coverage'
 96 | doc/coverage
 97 | # Auto-generated HTML manual with 'make web-manual'
 98 | doc/manual
 99 | # Auto-generated PDF-related files
100 | doc/datamash.aux
101 | doc/datamash.cp
102 | doc/datamash.cps
103 | doc/datamash.fn
104 | doc/datamash.ky
105 | doc/datamash.log
106 | doc/datamash.op
107 | doc/datamash.pg
108 | doc/datamash.toc
109 | doc/datamash.tp
110 | doc/datamash.vr
111 | 
112 | # Coverage files
113 | src/*.gcda
114 | src/*.gcno
115 | lib/*.gcda
116 | lib/*.gcno
117 | datamash.lcov
118 | datamash-cov/
119 | 
120 | # clang static analysis files
121 | /clang_output_*
122 | 
123 | # GNU Global tags
124 | /GPATH
125 | /GRTAGS
126 | /GTAGS
127 | 
128 | /nohup.out
129 | 
130 | datamash-*.tar.gz
131 | datamash-*.tar.gz.sig
132 | /GNUmakefile
133 | /README-release
134 | /maint.mk
135 | 
136 | # Generated by pmccabe2html
137 | cyclo-datamash.html
138 | 
139 | # Side-effects of expensive file system checks
140 | /bad_disk.img
141 | /bottles.txt
142 | /log.txt
143 | /numbers.txt
144 | /tiny_disk.img
145 | 
146 | # Side-effects of syntax checks
147 | /.sc-start-sc_*
148 | 
149 | # Side-effects of make release
150 | /vc-diffs
151 | 


--------------------------------------------------------------------------------
/src/double-format.c:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
 4 |    Copyright (C) 2018-2021 Assaf Gordon <assafgordon@gmail.com>
 5 |    Copyright (C) 1994-2018 Free Software Foundation, Inc.
 6 | 
 7 |    This program is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    This program is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | 
21 | /*
22 | Portions of this function were copied from GNU coreutils' seq.c,
23 | hence FSF copyright.
24 | */
25 | 
26 | 
27 | #include <config.h>
28 | 
29 | #include "system.h"
30 | #include "die.h"
31 | #include "quote.h"
32 | #include "xalloc.h"
33 | 
34 | #include "text-options.h"
35 | #include "double-format.h"
36 | 
37 | char*
38 | validate_double_format (char const *fmt)
39 | {
40 |   size_t i;
41 |   size_t len;
42 |   char *out;
43 | 
44 |   len = strlen (fmt);
45 | 
46 |   if (len > MAX_NUMERIC_FORMAT_LEN - 1) {
47 |     die (EXIT_FAILURE, 0, _("numeric format too large"));
48 |   }
49 | 
50 |   /* extra space for NUL and 'L' printf-modifier */
51 |   out = xmalloc (len+2);
52 | 
53 |   for (i = 0; ! (fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
54 |     if (!fmt[i])
55 |       die (EXIT_FAILURE, 0,
56 |            _("format %s has no %% directive"), quote (fmt));
57 | 
58 |   i++;
59 |   i += strspn (fmt + i, "-+#0 '");
60 |   i += strspn (fmt + i, "0123456789");
61 |   if (fmt[i] == '.')
62 |     {
63 |       i++;
64 |       i += strspn (fmt + i, "0123456789");
65 |     }
66 | 
67 |   if (!fmt[i])
68 |     die (EXIT_FAILURE, 0,
69 |          _("format %s missing valid type after '%%'"), quote (fmt));
70 | 
71 |   if (! strchr ("efgaEFGA", fmt[i]))
72 |     die (EXIT_FAILURE, 0,
73 |          _("format %s has unknown/invalid type %%%c directive"),
74 |   quote (fmt), fmt[i]);
75 | 
76 |   /* Copy characters until the type character, add 'L', then the type,
77 |      then the rest of the format string. */
78 |   memcpy (out, fmt, i);
79 |   out[i] = 'L';
80 |   out[i+1] = fmt[i];
81 |   memcpy (out+i+2, fmt+i+1, len-i);
82 |   out[len+1] = '\0';
83 | 
84 |   for (i++; fmt[i] ; i += (fmt[i] == '%') + 1)
85 |     if (fmt[i] == '%' && fmt[i + 1] != '%')
86 |       die (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
87 |            quote (fmt));
88 | 
89 |   return out;
90 | }
91 | 


--------------------------------------------------------------------------------
/tests/CuTmpdir.pm:
--------------------------------------------------------------------------------
  1 | package CuTmpdir;
  2 | # This file (CuTmpDir.pm) was copied from GNU Coretils.
  3 | # It has the following copyright notice and license:
  4 | #
  5 | # create, then chdir into a temporary sub-directory
  6 | 
  7 | # Copyright (C) 2007-2013 Free Software Foundation, Inc.
  8 | 
  9 | # This program is free software: you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation, either version 3 of the License, or
 12 | # (at your option) any later version.
 13 | 
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | # GNU General Public License for more details.
 18 | 
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | use strict;
 23 | use warnings;
 24 | 
 25 | use File::Temp;
 26 | use File::Find;
 27 | 
 28 | our $ME = $0 || "<???>";
 29 | 
 30 | my $dir;
 31 | 
 32 | sub skip_test($)
 33 | {
 34 |   warn "$ME: skipping test: unsafe working directory name: '$_[0]'\n";
 35 |   exit 77;
 36 | }
 37 | 
 38 | sub chmod_1
 39 | {
 40 |   my $name = $_;
 41 | 
 42 |   # Skip symlinks and non-directories.
 43 |   -l $name || !-d _
 44 |     and return;
 45 | 
 46 |   chmod 0700, $name;
 47 | }
 48 | 
 49 | sub chmod_tree
 50 | {
 51 |   # When tempdir fails, it croaks, which leaves $dir undefined.
 52 |   defined $dir
 53 |     or return;
 54 | 
 55 |   # Perform the equivalent of find "$dir" -type d -print0|xargs -0 chmod -R 700.
 56 |   my $options = {untaint => 1, wanted => \&chmod_1};
 57 |   find ($options, $dir);
 58 | }
 59 | 
 60 | sub import {
 61 |   my $prefix = $_[1];
 62 | 
 63 |   $ME eq '-' && defined $prefix
 64 |     and $ME = $prefix;
 65 | 
 66 |   if ($prefix !~ /^\//)
 67 |     {
 68 |       eval 'use Cwd';
 69 |       my $cwd = $@ ? '.' : Cwd::getcwd();
 70 |       $prefix = "$cwd/$prefix";
 71 |     }
 72 | 
 73 |   # Untaint for the upcoming mkdir.
 74 |   $prefix =~ m!^([-+\@\w./]+)$!
 75 |     or skip_test $prefix;
 76 |   $prefix = $1;
 77 | 
 78 |   my $original_pid = $$;
 79 | 
 80 |   my $on_sig_remove_tmpdir = sub {
 81 |     my ($sig) = @_;
 82 |     if ($$ == $original_pid and defined $dir)
 83 |       {
 84 |         chmod_tree;
 85 |         # Older versions of File::Temp lack this method.
 86 |         exists &File::Temp::cleanup
 87 |           and &File::Temp::cleanup;
 88 |       }
 89 |     $SIG{$sig} = 'DEFAULT';
 90 |     kill $sig, $$;
 91 |   };
 92 | 
 93 |   foreach my $sig (qw (INT TERM HUP))
 94 |     {
 95 |       $SIG{$sig} = $on_sig_remove_tmpdir;
 96 |     }
 97 | 
 98 |   $dir = File::Temp::tempdir("$prefix.tmp-XXXX", CLEANUP => 1 );
 99 |   chdir $dir
100 |     or warn "$ME: failed to chdir to $dir: $!\n";
101 | }
102 | 
103 | END {
104 |   # Move cwd out of the directory we're about to remove.
105 |   # This is required on some systems, and by some versions of File::Temp.
106 |   chdir '..'
107 |     or warn "$ME: failed to chdir to .. from $dir: $!\n";
108 | 
109 |   my $saved_errno = $?;
110 |   chmod_tree;
111 |   $? = $saved_errno;
112 | }
113 | 
114 | 1;
115 | 


--------------------------------------------------------------------------------
/src/text-options.h:
--------------------------------------------------------------------------------
  1 | /* GNU Datamash - perform simple calculation on input data
  2 | 
  3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  4 | 
  5 |    This file is part of GNU Datamash.
  6 | 
  7 |    GNU Datamash is free software: you can redistribute it and/or modify
  8 |    it under the terms of the GNU General Public License as published by
  9 |    the Free Software Foundation, either version 3 of the License, or
 10 |    (at your option) any later version.
 11 | 
 12 |    GNU Datamash is distributed in the hope that it will be useful,
 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |    GNU General Public License for more details.
 16 | 
 17 |    You should have received a copy of the GNU General Public License
 18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | /* Written by Assaf Gordon */
 22 | #ifndef __TEXT_OPTIONS_H__
 23 | #define __TEXT_OPTIONS_H__
 24 | 
 25 | /*
 26 |  Text Processing options, used by several modules.
 27 |  */
 28 | 
 29 | /* The character marking end of line. Default to \n. */
 30 | extern char eolchar;
 31 | 
 32 | /* If TAB has this value, blanks separate fields.  */
 33 | enum { TAB_WHITESPACE = CHAR_MAX + 1 };
 34 | 
 35 | /* Tab character separating fields.  If TAB_WHITESPACE, then fields are
 36 |    separated by the empty string between a non-blank character and a blank
 37 |    character. */
 38 | extern int in_tab ;
 39 | /* The output field separator character, defaults to a TAB (ASCII 9) */
 40 | extern int out_tab ;
 41 | 
 42 | /* Global case-sensitivity option. Defaults to 'true' . */
 43 | extern bool case_sensitive ;
 44 | 
 45 | /* Largest possible format string */
 46 | #define MAX_NUMERIC_FORMAT_LEN 100
 47 | /* Numeric output format (default: "%.14Lg" */
 48 | extern char numeric_output_format[MAX_NUMERIC_FORMAT_LEN + 1];
 49 | /* number of bytes to allocate for output buffer */
 50 | extern int   numeric_output_bufsize;
 51 | 
 52 | /* The character used to separate collapsed/uniqued strings */
 53 | extern char collapse_separator;
 54 | 
 55 | /* Should NA/NaN/empty values be silengtly ignored? */
 56 | extern bool remove_na_values;
 57 | 
 58 | /* if true, 'transpose' and 'reverse' require every line to have
 59 |    the exact same number of fields. Otherwise, the program
 60 |    will fail with non-zero exit code. */
 61 | extern bool strict;
 62 | 
 63 | /* if 'strict' is false, lines with fewer-than-expected fields
 64 |    will be filled with this value */
 65 | extern const char* missing_field_filler;
 66 | 
 67 | /* if true, skip comments line (lines starting with optional whitespace
 68 |    followed by '#' or ';'. See line_record_is_comment.  */
 69 | extern bool skip_comments;
 70 | 
 71 | extern bool vnlog;
 72 | 
 73 | #define UCHAR_LIM (UCHAR_MAX + 1)
 74 | extern bool blanks[UCHAR_LIM];
 75 | 
 76 | /* Initializes the 'blanks' table. */
 77 | void
 78 | init_blank_table (void);
 79 | 
 80 | static inline void
 81 | print_field_separator ()
 82 | {
 83 |   putchar (out_tab);
 84 | }
 85 | 
 86 | static inline void
 87 | print_line_separator ()
 88 | {
 89 |   putchar (eolchar);
 90 | }
 91 | 
 92 | 
 93 | void
 94 | set_numeric_output_precision (const char* digits);
 95 | 
 96 | void
 97 | set_numeric_printf_format (const char* format);
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/tests/datamash-rand.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #   Unit Tests for GNU Datamash - perform simple calculation on input data
 3 | 
 4 | #    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
 5 | #
 6 | #    This file is part of GNU Datamash.
 7 | #
 8 | #    GNU Datamash is free software: you can redistribute it and/or modify
 9 | #    it under the terms of the GNU General Public License as published by
10 | #    the Free Software Foundation, either version 3 of the License, or
11 | #    (at your option) any later version.
12 | #
13 | #    GNU Datamash is distributed in the hope that it will be useful,
14 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | #    GNU General Public License for more details.
17 | #
18 | #    You should have received a copy of the GNU General Public License
19 | #    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | #    Written by Assaf Gordon
22 | 
23 | ##
24 | ## This script tests the randomness of the 'rand' operation
25 | ##
26 | 
27 | . "${test_dir=.}/init.sh"; path_prepend_ ./src
28 | 
29 | fail=0
30 | 
31 | require_paste_
32 | 
33 | ## Ensure seq is useable
34 | openbsd_seq_replacement_
35 | seq 10 >/dev/null 2>/dev/null ||
36 |     skip_ "requires a working seq"
37 | 
38 | 
39 | ##
40 | ## --- First test ---
41 | ##
42 | ##    select a random number between 0 and 9,
43 | ##    repeat selection for 1000 times.
44 | ##    Each digit should be returned at least once
45 | ##    (unless we're extremely unlucky...)
46 | 
47 | INPUT=$(seq 0 9) || framework_failure_ "generating INPUT failed"
48 | 
49 | for i in $(seq 1000) ;
50 | do
51 |   echo "$INPUT" | datamash rand 1
52 | done > out_rand1 || framework_failure_ "test1 failed: datamash error"
53 | 
54 | ## First Check: each number should be there once
55 | RESULT=$(cat out_rand1 | sort -n | uniq | paste -d , -s -) ||
56 |     framework_failure_ "test1 failed: error preparing first check"
57 | 
58 | [ "$RESULT" = "0,1,2,3,4,5,6,7,8,9" ] ||
59 |     { warn_ "test1 failed. RESULT='$RESULT'." ; fail=1 ; }
60 | 
61 | 
62 | ## Second check - we expect (hope?) the distribution is uniform,
63 | ##                and each number appears more-or-less equaly.
64 | ## This is a poor-man's way of quasi-validation...
65 | ## Using 'datamash', count how many times each number appears,
66 | ## then, find the smallest count - in a uniform distribution,
67 | ## we expect each number to appear close to 100 times (1000 draws of 10 items).
68 | ##
69 | ## NOTE:
70 | ##  We use 'datamash' to validate itself... but only after assuming the
71 | ##  basic operations (sort, group, count, min) have been already tested.
72 | RESULT=$(cat out_rand1 |
73 |              datamash --sort --group 1 count 1 |
74 |              datamash min 2) ||
75 |     framework_failure_ "test1 failed: error preparing second check"
76 | 
77 | ## We set the cut-off at 60 - if any number appeared less than 60 times,
78 | ## we *might* have a problem in the uniform randomness in 'datamash'.
79 | if [ "$RESULT" -lt "60" ] ; then
80 |   warn_ "Possible unifority problem in 'rand' operation."
81 |   echo "--- distribution of numbers ---"
82 |   cat out_rand1 | datamash --sort --group 1 count 1
83 |   echo "--- end ---"
84 |   echo "--- 1000 random draws start here---"
85 |   cat out_rand1
86 |   echo "---- end ----"
87 |   fail=1
88 | fi
89 | 
90 | 
91 | Exit $fail
92 | 


--------------------------------------------------------------------------------
/tests/datamash-strbin.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | #   Unit Tests for GNU Datamash - perform simple calculation on input data
  3 | 
  4 | #    Copyright (C) 2015-2021 Assaf Gordon <assafgordon@gmail.com>
  5 | #
  6 | #    This file is part of GNU Datamash.
  7 | #
  8 | #    GNU Datamash is free software: you can redistribute it and/or modify
  9 | #    it under the terms of the GNU General Public License as published by
 10 | #    the Free Software Foundation, either version 3 of the License, or
 11 | #    (at your option) any later version.
 12 | #
 13 | #    GNU Datamash is distributed in the hope that it will be useful,
 14 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #    GNU General Public License for more details.
 17 | #
 18 | #    You should have received a copy of the GNU General Public License
 19 | #    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | #    Written by Assaf Gordon
 22 | 
 23 | ##
 24 | ## This script tests the strbin (string binning/hashing) operator
 25 | ##
 26 | 
 27 | . "${test_dir=.}/init.sh"; path_prepend_ ./src
 28 | 
 29 | fail=0
 30 | 
 31 | ## Ensure seq is useable
 32 | openbsd_seq_replacement_
 33 | seq 10 >/dev/null 2>/dev/null \
 34 |     || skip_ "requires a working seq"
 35 | 
 36 | 
 37 | # Generate input
 38 | seq 1000 | sed 's/^/id-/' > in \
 39 |     || framework_failure_ "generating INPUT failed"
 40 | 
 41 | # bin into 10 groups
 42 | datamash strbin 1 < in > out1 \
 43 |     || { warn_ "'datamash strbin 1' failed" ; fail=1 ; }
 44 | 
 45 | # Check output values
 46 | sort -n -u < out1 > out2 || framework_failure_ "failed to sort out1"
 47 | 
 48 | 
 49 | # Default binning to 10 bins, accept only single digits
 50 | grep '^[^0-9]$' < out2 > /dev/null \
 51 |     &&  { warn_ "'datamash strbin 1' generated invalid output (out2):" ;
 52 |          cat out2 >&2 ;
 53 |          fail=1 ; }
 54 | 
 55 | # Test binning into varying number of bins
 56 | for i in 5 10 100 300 ;
 57 | do
 58 |     datamash strbin:$i 1 < in > out-$i \
 59 | 	|| { warn_ "'datamash strbin:$i 1' failed" ; fail=1 ; break ; }
 60 | 
 61 |     # Check output values
 62 |     max=$(sort -n -u -r < out-$i | head -n1)
 63 | 
 64 |     test -n "$max" \
 65 | 	|| { warn_ "'datamash strbin:$i 1' failed - max output is empty" ;
 66 | 	     fail=1 ;
 67 | 	     break ; }
 68 | 
 69 |     test "$max" -gt 0 \
 70 | 	|| { warn_ "'datamash strbin:$i 1' failed - max value too small ($max)";
 71 | 	     fail=1 ;
 72 | 	     break ; }
 73 | 
 74 |     test "$max" -lt "$i" \
 75 | 	|| { warn_ "'datamash strbin:$i 1' failed - max value too large ($max)";
 76 | 	     fail=1 ;
 77 | 	     break ; }
 78 | done
 79 | 
 80 | 
 81 | # Same srting must result in the same bin,
 82 | # in the same run and in different runs.
 83 | # (the returned value, however, is machine-dependant)
 84 | 
 85 | text="hello-42-world"
 86 | for i in 5 10 100 300 ;
 87 | do
 88 |     bin1=$(printf "%s\n%s\n%s\n" "$text" "$text" "$text" \
 89 |                | datamash strbin:$i 1 | uniq)
 90 |     bin2=$(printf "%s\n" "$text" \
 91 |                | datamash strbin:$i 1 | uniq)
 92 | 
 93 |     test -n "$bin1" \
 94 | 	|| { warn_ "'datamash strbin:$i 1' failed on text '$text' - empty";
 95 | 	     fail=1 ;
 96 | 	     break ; }
 97 | 
 98 |     test "x$bin1" = "x$bin2" \
 99 | 	|| { warn_ "'datamash strbin:$i 1' failed on text '$text' - " \
100 |                    "bin1 ($bin1) doesn't match bin2 ($bin2)" ;
101 | 	     fail=1 ;
102 | 	     break ; }
103 | done
104 | 
105 | 
106 | Exit $fail
107 | 


--------------------------------------------------------------------------------
/src/column-headers.c:
--------------------------------------------------------------------------------
  1 | /* GNU Datamash - perform simple calculation on input data
  2 | 
  3 |    Copyright (C) 2014-2021 Assaf Gordon.
  4 | 
  5 |    This file is part of GNU Datamash.
  6 | 
  7 |    GNU Datamash is free software: you can redistribute it and/or modify
  8 |    it under the terms of the GNU General Public License as published by
  9 |    the Free Software Foundation, either version 3 of the License, or
 10 |    (at your option) any later version.
 11 | 
 12 |    GNU Datamash is distributed in the hope that it will be useful,
 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |    GNU General Public License for more details.
 16 | 
 17 |    You should have received a copy of the GNU General Public License
 18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | /* Written by Assaf Gordon */
 22 | #include <config.h>
 23 | #include <assert.h>
 24 | #include <stddef.h>
 25 | #include <stdbool.h>
 26 | #include <strings.h>
 27 | #include <inttypes.h>
 28 | 
 29 | #include "system.h"
 30 | #include "xalloc.h"
 31 | #include "linebuffer.h"
 32 | #include "ignore-value.h"
 33 | #include "intprops.h"
 34 | 
 35 | #include "text-options.h"
 36 | #include "text-lines.h"
 37 | #include "column-headers.h"
 38 | 
 39 | static size_t num_input_column_headers = 0 ;
 40 | static char** input_column_headers;
 41 | 
 42 | void free_column_headers ()
 43 | {
 44 |   for (size_t i = 0; i < num_input_column_headers; ++i)
 45 |     {
 46 |       free (input_column_headers[i]);
 47 |       input_column_headers[i] = NULL;
 48 |     }
 49 |   free (input_column_headers);
 50 |   input_column_headers = NULL;
 51 | }
 52 | 
 53 | size_t _GL_ATTRIBUTE_PURE
 54 | get_num_column_headers ()
 55 | {
 56 |   return num_input_column_headers;
 57 | }
 58 | 
 59 | const char* _GL_ATTRIBUTE_PURE
 60 | get_input_field_name (size_t field_num)
 61 | {
 62 |   assert (field_num > 0                              /* LCOV_EXCL_LINE */
 63 |           && field_num <= num_input_column_headers); /* LCOV_EXCL_LINE */
 64 |   return input_column_headers[field_num-1];
 65 | }
 66 | 
 67 | size_t _GL_ATTRIBUTE_PURE
 68 | get_input_field_number (const char* field_name)
 69 | {
 70 |   assert (field_name != NULL); /* LCOV_EXCL_LINE */
 71 |   assert (*field_name != 0);   /* LCOV_EXCL_LINE */
 72 |   for (size_t i = 0 ; i < num_input_column_headers ; ++i)
 73 |     {
 74 |       if (STREQ (field_name,input_column_headers[i]))
 75 |         return i+1;
 76 |     }
 77 |   return 0;
 78 | }
 79 | 
 80 | void
 81 | build_input_line_headers (const struct line_record_t *lr, bool store_names)
 82 | {
 83 |   char *str;
 84 |   size_t len = 0;
 85 |   const size_t num_fields = line_record_num_fields (lr);
 86 |   const size_t field_name_buf_size = 7+INT_BUFSIZE_BOUND (size_t)+1;
 87 | 
 88 |   num_input_column_headers = num_fields;
 89 |   input_column_headers = XNMALLOC (num_fields, char*);
 90 | 
 91 |   for (size_t i = 1; i <= num_fields; ++i)
 92 |     {
 93 |       if (!store_names)
 94 |         {
 95 |           str = xmalloc ( field_name_buf_size );
 96 |           ignore_value (snprintf (str, field_name_buf_size,
 97 |                                   "field-%"PRIuMAX,(uintmax_t)i));
 98 |         }
 99 |       else
100 |         {
101 |           const char* tmp = NULL;
102 |           line_record_get_field (lr, i, &tmp, &len);
103 |           str = xmalloc ( len+1 );
104 |           memcpy (str, tmp, len);
105 |           str[len] = 0;
106 |         }
107 | 
108 |       input_column_headers[i-1] = str;
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/tests/datamash-md5.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |    Unit Tests for GNU Datamash - tests md5 operations
  4 | 
  5 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon.
 24 | =cut
 25 | ## NOTE: Digest::MD5 is supposed to be a core module,
 26 | ##       but some OSes don't distributed it (e.g. CentOS 7 requires
 27 | ##       a separate package 'perl-Digest-MD5').
 28 | ##       If not available, skip this tests (instead of failing).
 29 | use strict;
 30 | use warnings;
 31 | 
 32 | # Until a better way comes along to auto-use Coreutils Perl modules
 33 | # as in the coreutils' autotools system.
 34 | use Coreutils;
 35 | use CuSkip;
 36 | use CuTmpdir qw(datamash);
 37 | 
 38 | ## Perl 5.8 and earlier do not have Digest::SHA as core module.
 39 | ## Skip the test if it is not found.
 40 | my $have_sha =
 41 |    eval qq{use Digest::MD5 qw(md5_hex);1;};
 42 | 
 43 | CuSkip::skip "requires Perl with Digest::MD5 module\nload error:\n$@"
 44 |    unless $have_sha;
 45 | 
 46 | (my $program_name = $0) =~ s|.*/||;
 47 | my $prog_bin = 'datamash';
 48 | 
 49 | ## Cross-Compiling portability hack:
 50 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 51 | ##  the full path of the binary, if the binary is on the $PATH.
 52 | ##  So we try to detect what is the actual returned value of the program
 53 | ##  in case of an error.
 54 | my $prog = `$prog_bin ---print-progname`;
 55 | $prog = $prog_bin unless $prog;
 56 | 
 57 | # Turn off localization of executable's output.
 58 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 59 | 
 60 | my $in_g1=<<'EOF';
 61 | A 100
 62 | A 10
 63 | A 50
 64 | A 35
 65 | EOF
 66 | 
 67 | # Header line, with custom field separator
 68 | my $in_hdr2=<<'EOF';
 69 | x:y:z
 70 | A:3:W
 71 | A:5:W
 72 | A:7:W
 73 | A:11:X
 74 | A:13:X
 75 | B:17:Y
 76 | B:19:Z
 77 | C:23:Z
 78 | EOF
 79 | 
 80 | =pod
 81 |   Example:
 82 |   my $data = "a 1\nb 2\n";
 83 |   my $out = transform_column($data, 2, \&md5_hex);
 84 |   # out => md5_hex("1") . "\n" . md5_hex("2") . "\n" ;
 85 | =cut
 86 | sub transform_column($$$)
 87 | {
 88 |   my $input_text = shift;
 89 |   my $input_column = shift;
 90 |   my $function = shift;
 91 | 
 92 |   return join "",
 93 | 		map { "$_\n" }
 94 | 		map { &$function($_->[ $input_column - 1 ]) }
 95 | 		map { [ split / / ] }
 96 | 		split("\n", $input_text);
 97 | }
 98 | 
 99 | # md5 of the second column of '$in_g1'
100 | my $out_g1_md5 = transform_column ($in_g1, 2, \&md5_hex);
101 | 
102 | my @Tests =
103 | (
104 |   ['md5-1',   '-W md5 2',    {IN_PIPE=>$in_g1}, {OUT=>$out_g1_md5}],
105 | );
106 | 
107 | my $save_temps = $ENV{SAVE_TEMPS};
108 | my $verbose = $ENV{VERBOSE};
109 | 
110 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
111 | exit $fail;
112 | 


--------------------------------------------------------------------------------
/src/op-parser.h:
--------------------------------------------------------------------------------
 1 | /* GNU Datamash - perform simple calculation on input data
 2 | 
 3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
 4 | 
 5 |    This file is part of GNU Datamash.
 6 | 
 7 |    GNU Datamash is free software: you can redistribute it and/or modify
 8 |    it under the terms of the GNU General Public License as published by
 9 |    the Free Software Foundation, either version 3 of the License, or
10 |    (at your option) any later version.
11 | 
12 |    GNU Datamash is distributed in the hope that it will be useful,
13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |    GNU General Public License for more details.
16 | 
17 |    You should have received a copy of the GNU General Public License
18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
19 | */
20 | 
21 | /* Written by Assaf Gordon */
22 | #ifndef __OP_PARSER_H__
23 | #define __OP_PARSER_H__
24 | 
25 | struct group_column_t
26 | {
27 |   size_t num;       /* 1 = first field */
28 |   bool   by_name;   /* true if the user gave a column name */
29 |   char*  name;      /* column name - to be converted to number after
30 |                        header line is read */
31 | };
32 | 
33 | struct op_column_t
34 | {
35 |   size_t num;                /* 1 = first field */
36 |   bool   by_name;            /* true if the user gave a column name */
37 |   char*  name;               /* column name - to be converted to number after
38 |                                 header line is read */
39 |   enum   field_operation op;
40 | };
41 | 
42 | struct mode_check_params_t
43 | {
44 |   uintmax_t n_lines;         /* If not zero, require this number of lines */
45 |   uintmax_t n_fields;        /* if not zero, require this number of fields */
46 | };
47 | 
48 | struct datamash_ops
49 | {
50 |   enum processing_mode mode; /* the processing mode */
51 |   bool header_required;      /* true if any of the fields (groups/operations)
52 |                                 used a named column instead of a number. */
53 | 
54 |   struct group_column_t *grps; /* group-by columns */
55 |   size_t num_grps;
56 |   size_t alloc_grps;
57 | 
58 |   struct fieldop    *ops;  /* field operations */
59 |   size_t num_ops;
60 |   size_t alloc_ops;
61 | 
62 |   /* Additional parameters for mode operatons
63 |      (i.e. ones relating to the operation mode,
64 |      not to specific field-ops) */
65 |   union
66 |   {
67 |     struct mode_check_params_t check_params;
68 |   } mode_params;
69 | };
70 | 
71 | /* Parse the operations, return new datamash_ops structure.
72 |    This function assumes new syntax:
73 |    1. The first word is either a mode (e.g. transpose/groupby/reverse)
74 |       or an operation (e.g. sum/min/max) - implying a 'group-by' mode.
75 |    2. The rest of the parameters are operations. */
76 | struct datamash_ops*
77 | datamash_ops_parse ( int argc, const char* argv[] );
78 | 
79 | /* Parse the operations, return new datamash_ops structure.
80 |    This function assumes old syntax:
81 |     The user already specified "-g X,Y,Z" - the processing mode is known,
82 |     and the grouping text 'X,Y,Z' is known.
83 |    The function will only accept operations (e.g. sum/min/max). */
84 | struct datamash_ops*
85 | datamash_ops_parse_premode ( enum processing_mode pm,
86 |                              const char* grouping_spec,
87 |                              int argc, const char* argv[] );
88 | 
89 | void
90 | datamash_ops_debug_print ( const struct datamash_ops* p );
91 | 
92 | void
93 | datamash_ops_free (struct datamash_ops *p);
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/tests/datamash-sort-header-deprecated.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 | 
  5 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  6 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon and Tim Rice.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | use List::Util qw/max/;
 28 | use Data::Dumper;
 29 | 
 30 | # Until a better way comes along to auto-use Coreutils Perl modules
 31 | # as in the coreutils' autotools system.
 32 | use Coreutils;
 33 | use CuSkip;
 34 | use CuTmpdir qw(datamash);
 35 | 
 36 | (my $program_name = $0) =~ s|.*/||;
 37 | my $prog_bin = 'datamash';
 38 | 
 39 | ## Cross-Compiling portability hack:
 40 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 41 | ##  the full path of the binary, if the binary is on the $PATH.
 42 | ##  So we try to detect what is the actual returned value of the program
 43 | ##  in case of an error.
 44 | my $prog = `$prog_bin --foobar 2>&1 | head -n 1 | cut -f1 -d:`;
 45 | chomp $prog if $prog;
 46 | $prog = $prog_bin unless $prog;
 47 | 
 48 | # Turn off localization of executable's output.
 49 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 50 | 
 51 | # An unsorted input with a header line
 52 | my $INFILE=<<'EOF';
 53 | x y z
 54 | A % 1
 55 | B ( 2
 56 | A & 3
 57 | B = 4
 58 | EOF
 59 | 
 60 | my @INFILE_lines = split /\n/, $INFILE, -1;
 61 | my $INFILE_NO_HEADER = join("\n", @INFILE_lines[1..$#INFILE_lines]);
 62 | 
 63 | my $exp_sort_in_header_full=<<'EOF';
 64 | A % 1 1,3
 65 | B ( 2 2,4
 66 | EOF
 67 | 
 68 | my $exp_sort_out_header_full=<<'EOF';
 69 | field-1 field-2 field-3 unique(field-3)
 70 | A % 1 1,3
 71 | B ( 2 2,4
 72 | EOF
 73 | 
 74 | my $exp_sort_headers_full=<<'EOF';
 75 | x y z unique(z)
 76 | A % 1 1,3
 77 | B ( 2 2,4
 78 | EOF
 79 | 
 80 | my $deprecation_notice="$prog: Using -f/--full with non-linewise " .
 81 |                        "operations is deprecated and will be disabled " .
 82 |                        "in a future release.\n";
 83 | 
 84 | my @Tests =
 85 | (
 86 |   ['shdep01',  '-t " " --sort --full --header-out -g 1 unique 3',
 87 |     {IN_PIPE=>$INFILE_NO_HEADER}, {OUT=>$exp_sort_out_header_full},
 88 |     {ERR=>$deprecation_notice}],
 89 |   ['shdep02',  '-t " " -g 1 --sort --full --header-in unique 3',
 90 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_sort_in_header_full},
 91 |     {ERR=>$deprecation_notice}],
 92 |   ['shdep03',  '-t " " -g 1 --sort --full --headers unique 3',
 93 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_sort_headers_full},
 94 |     {ERR=>$deprecation_notice}],
 95 | 
 96 |   # Check sort-piping with empty input - should always produce empty output
 97 |   ['shdep04',  '-t " " --sort --full unique 3',
 98 |     {IN_PIPE=>""}, {OUT=>""},
 99 |     {ERR=>$deprecation_notice}],
100 | );
101 | 
102 | my $save_temps = $ENV{SAVE_TEMPS};
103 | my $verbose = $ENV{VERBOSE};
104 | 
105 | my $fail = run_tests ($program_name, $prog_bin, \@Tests, $save_temps, $verbose);
106 | exit $fail;
107 | 


--------------------------------------------------------------------------------
/tests/datamash-sort-errors.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | #   Unit Tests for GNU Datamash - perform simple calculation on input data
  3 | 
  4 | #    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
  5 | #
  6 | #    This file is part of GNU Datamash.
  7 | #
  8 | #    GNU Datamash is free software: you can redistribute it and/or modify
  9 | #    it under the terms of the GNU General Public License as published by
 10 | #    the Free Software Foundation, either version 3 of the License, or
 11 | #    (at your option) any later version.
 12 | #
 13 | #    GNU Datamash is distributed in the hope that it will be useful,
 14 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #    GNU General Public License for more details.
 17 | #
 18 | #    You should have received a copy of the GNU General Public License
 19 | #    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | #    Written by Assaf Gordon
 22 | 
 23 | ##
 24 | ## This script tests the sort piping code for errors
 25 | ##
 26 | 
 27 | . "${test_dir=.}/init.sh"; path_prepend_ ./src
 28 | 
 29 | fail=0
 30 | 
 31 | require_paste_
 32 | 
 33 | ## Ensure seq is useable
 34 | openbsd_seq_replacement_
 35 | seq 10 >/dev/null 2>/dev/null ||
 36 |   skip_ "requires a working seq"
 37 | 
 38 | ## Cross-Compiling portability hack:
 39 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 40 | ##  the full path of the binary, if the binary is on the $PATH.
 41 | ##  So we try to detect what is the actual returned value of the program
 42 | ##  in case of an error.
 43 | PROG_ARGV0=$(datamash --foobar 2>&1 | head -n 1 | cut -f1 -d:)
 44 | [ -z "$PROG_ARGV0" ] && PROG_ARGV0="datamash"
 45 | 
 46 | ##
 47 | ##
 48 | ## Test preparations
 49 | ##
 50 | ##
 51 | GROUPPARAM=$(seq 1000 2000 | paste -d "," -s -) ||
 52 |   framework_failure_ "failed to construct too-long group parameter"
 53 | 
 54 | ## The expected error message when 'sort' is not found
 55 | printf 'sh: sort: not found\ndatamash: read error (on close)' > exp_err2 ||
 56 |   framework_failure_ "failed to create exp_err2"
 57 | 
 58 | ##
 59 | ## Create a bad 'sort' executable, to simulate failed pipe/popen
 60 | ##
 61 | BADDIR1=$(mktemp -d bad_sort.XXXXXX) ||
 62 |   framework_failure_ "Failed to create temp directory for bad-sort"
 63 | printf "#!/foo/bar/bad/interpreter" > "$BADDIR1/sort" ||
 64 |   framework_failure_ "Failed to create bad-sort: $BADDIR1/sort"
 65 | chmod a+x "$BADDIR1/sort" ||
 66 |   framework_failure_ "failed to make bad-sort executable"
 67 | ORIGPATH=$PATH
 68 | 
 69 | ## The directory where the "datamash' executable is
 70 | DATAMASHDIR=$(dirname $(which datamash))
 71 | test -z "$DATAMASHDIR" &&
 72 |   framework_failure_ "failed to find datamash's directory"
 73 | 
 74 | ## Create a 'sort' which will crash
 75 | BADDIR=$(mktemp -d badsort.XXXXXX) ||
 76 |   framework_failure_ "failed to create bad-sort-dir"
 77 | echo '#!/bin/sh
 78 | read A
 79 | echo "$A"
 80 | read B
 81 | echo "$B"
 82 | Z=0
 83 | C=$((1/$Z))
 84 | ' > "$BADDIR/sort" || framework_failure_ "failed to create $BADDIR/sort"
 85 | chmod a+x "$BADDIR/sort" ||
 86 |   framework_failure_ "failed to make $BADDIR/sort executable"
 87 | 
 88 | 
 89 | ##
 90 | ## Tests start here
 91 | ##
 92 | 
 93 | ##
 94 | ## Test with non-existing 'sort' executable, by giving an invalid path
 95 | ##
 96 | ## NOTE: This run SHOULD return an error, hence the "&&" instead of "||"
 97 | ##
 98 | seq 10 | datamash --sort --sort-cmd=/not/a/sort -g 1 sum 1 &&
 99 |   { warn_ "datamash --sort with non existing 'sort' did not fail " \
100 |           "(it should have failed)" ; fail=1 ; }
101 | 
102 | ##
103 | ## Test with a 'sort' that crashes
104 | ## NOTE: This run SHOULD return an error, hence the "&&" instead of "||"
105 | ##
106 | seq 10 | datamash --sort --sort-cmd="${BADDIR}/sort" -g 1 sum 1 &&
107 |   { warn_ "datamash --sort with crashing 'sort' did not fail " \
108 |           "(it should have failed)" ; fail=1 ; }
109 | 
110 | Exit $fail
111 | 


--------------------------------------------------------------------------------
/contrib/bash-completion/datamash:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # datamash bash-completion
  3 | #
  4 | ## Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
  5 | ## Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  6 | ##
  7 | ## This file is part of GNU Datamash.
  8 | ##
  9 | ## This file is free software; as a special exception the author gives
 10 | ## unlimited permission to copy and/or distribute it, with or without
 11 | ## modifications, as long as this notice is preserved.
 12 | ##
 13 | ## This program is distributed in the hope that it will be useful, but
 14 | ## WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
 15 | ## implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 16 | ##
 17 | 
 18 | _datamash ()
 19 | {
 20 |   local cur prev words cword split=false
 21 |   _get_comp_words_by_ref cur prev words cword
 22 | 
 23 |   local modes="check crosstab groupby reverse rmdup transpose"
 24 |   local modes_re=${modes// /|}
 25 | 
 26 |   #NOTE: do not change the spaces (or indentation or backslashes)
 27 |   #      or the regex will fail.
 28 |   local groupby_ops="sum min max absmin absmax range \
 29 | count first last rand \
 30 | unique uniq collapse countunique \
 31 | mean geomean harmmean trimmean median q1 q3 iqr perc mode antimode \
 32 | pstdev sstdev pvar svar mad madraw \
 33 | pskew sskew pkurt skurt dpo jarque \
 34 | pcov scov ppearson spearson dotprod"
 35 |  local groupby_ops_re=${groupby_ops// /|}
 36 | 
 37 |   local line_ops="base64 debase64 md5 sha1 sha224 sha256 sha384 sha512 \
 38 | round floor ceil trunc frac bin strbin dirname basename extname barename \
 39 | getnum cut echo"
 40 |   local line_ops_re=${line_ops// /|}
 41 | 
 42 |   local datamash_short_options="-c -C -f -g -h -H -i -s -t -R -V -W -z"
 43 | 
 44 |   local datamash_long_options=" --skip-comments --full --group --header-in
 45 |   --header-out --headers --vnlog --ignore-case --sort --no-strict --filler
 46 |   --format --field-separator --narm --output-delimiter --round --whitespace
 47 |   --zero-terminated --collapse-delimiter --help --version"
 48 | 
 49 |   local all_ops_re="$modes_re|$groupby_ops_re|$line_ops_re"
 50 | 
 51 |   # IF the previous word as an operator, the next parameter should
 52 |   # be a numeric value, so don't offer any completion.
 53 |   if [[ "$prev" =~ $all_ops_re ]] ; then
 54 |     return 0
 55 |   fi
 56 | 
 57 |   # Based on current parameteres, check which mode we're in.
 58 |   local suggest_modes=1
 59 |   local suggest_groupby_ops=1
 60 |   local suggest_line_ops=1
 61 |   local i=$((cword-1))
 62 | 
 63 |   while [ "$i" -gt 0 ] ; do
 64 |     local tmp_word=${words[$i]}
 65 | 
 66 |     if [[ "$tmp_word" =~ $modes_re ]] ; then
 67 |       suggest_modes=0
 68 |       case "$tmp_word" in
 69 |         crosstab|groupby) suggest_line_ops=0
 70 |       esac
 71 |     fi
 72 | 
 73 |     if [[ "$tmp_word" =~ $groupby_ops_re ]]; then
 74 |       suggest_modes=0
 75 |       suggest_line_ops=0
 76 |     fi
 77 |     # if the user specified -g, we're in "groupby" mode
 78 |     if [[ "$tmp_word" = "-g" ]] ; then
 79 |       suggest_modes=0
 80 |       suggest_line_ops=0
 81 |     fi
 82 | 
 83 |     if [[ "$tmp_word" =~ $line_ops_re ]]; then
 84 |       suggest_modes=0
 85 |       suggest_groupby_ops=0
 86 |     fi
 87 | 
 88 |     i=$((i-1))
 89 |   done
 90 | 
 91 |   # Options trump everything (if the user typed '-')
 92 |   if [[ "$cur" = "-"* ]] ; then
 93 |     COMPREPLY=( $(compgen -W \
 94 |         "$datamash_short_options $datamash_long_options" -- "$cur") )
 95 |     return 0
 96 |   fi
 97 | 
 98 |   # suggest other possibilities
 99 |   local suggest=""
100 |   if [ "$suggest_modes" -eq 1 ] ; then
101 |     suggest="$modes"
102 |   fi
103 |   if [ "$suggest_groupby_ops" -eq 1 ] ; then
104 |     suggest="$suggest $groupby_ops"
105 |   fi
106 |   if [ "$suggest_line_ops" -eq 1 ] ; then
107 |     suggest="$suggest $line_ops"
108 |   fi
109 | 
110 |   COMPREPLY=( $(compgen -W "$suggest" -- "$cur") )
111 |   return 0
112 | }
113 | 
114 | complete -F _datamash datamash
115 | 
116 | # ex: ts=4 sw=4 et filetype=sh
117 | 


--------------------------------------------------------------------------------
/tests/datamash-sha.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |    Unit Tests for GNU Datamash - tests sha1/256/5125 operations
  4 | 
  5 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon.
 24 | =cut
 25 | ## NOTE: Perl<5.10  don't have Digest::SHA core module -
 26 | ##       So skip only those tests if needed.
 27 | ##       Other line operations (e.g. md5/base64) are tested in the main
 28 | ##       unit test module 'datamash-tests.pl'.
 29 | use strict;
 30 | use warnings;
 31 | 
 32 | # Until a better way comes along to auto-use Coreutils Perl modules
 33 | # as in the coreutils' autotools system.
 34 | use Coreutils;
 35 | use CuSkip;
 36 | use CuTmpdir qw(datamash);
 37 | 
 38 | ## Perl 5.8 and earlier do not have Digest::SHA as core module.
 39 | ## Skip the test if it is not found.
 40 | my $have_sha =
 41 |    eval qq{use Digest::SHA qw(sha1_hex sha224_hex sha256_hex
 42 |            sha384_hex sha512_hex);1;};
 43 | 
 44 | CuSkip::skip "requires Perl>5.8 with Digest::SHA module\nload error:\n$@"
 45 |    unless $have_sha;
 46 | 
 47 | (my $program_name = $0) =~ s|.*/||;
 48 | my $prog_bin = 'datamash';
 49 | 
 50 | ## Cross-Compiling portability hack:
 51 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 52 | ##  the full path of the binary, if the binary is on the $PATH.
 53 | ##  So we try to detect what is the actual returned value of the program
 54 | ##  in case of an error.
 55 | my $prog = `$prog_bin --foobar 2>&1 | head -n 1 | cut -f1 -d:`;
 56 | chomp $prog if $prog;
 57 | $prog = $prog_bin unless $prog;
 58 | 
 59 | # Turn off localization of executable's output.
 60 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 61 | 
 62 | my $in_g1=<<'EOF';
 63 | A 100
 64 | A 10
 65 | A 50
 66 | A 35
 67 | EOF
 68 | 
 69 | # Header line, with custom field separator
 70 | my $in_hdr2=<<'EOF';
 71 | x:y:z
 72 | A:3:W
 73 | A:5:W
 74 | A:7:W
 75 | A:11:X
 76 | A:13:X
 77 | B:17:Y
 78 | B:19:Z
 79 | C:23:Z
 80 | EOF
 81 | 
 82 | =pod
 83 |   Example:
 84 |   my $data = "a 1\nb 2\n";
 85 |   my $out = transform_column($data, 2, \&md5_hex);
 86 |   # out => md5_hex("1") . "\n" . md5_hex("2") . "\n" ;
 87 | =cut
 88 | sub transform_column($$$)
 89 | {
 90 |   my $input_text = shift;
 91 |   my $input_column = shift;
 92 |   my $function = shift;
 93 | 
 94 |   return join "",
 95 | 		map { "$_\n" }
 96 | 		map { &$function($_->[ $input_column - 1 ]) }
 97 | 		map { [ split / / ] }
 98 | 		split("\n", $input_text);
 99 | }
100 | 
101 | my $out_g1_sha1 = transform_column ($in_g1, 2, \&sha1_hex);
102 | my $out_g1_sha224 = transform_column ($in_g1, 2, \&sha224_hex);
103 | my $out_g1_sha256 = transform_column ($in_g1, 2, \&sha256_hex);
104 | my $out_g1_sha384 = transform_column ($in_g1, 2, \&sha384_hex);
105 | my $out_g1_sha512 = transform_column ($in_g1, 2, \&sha512_hex);
106 | 
107 | my @Tests =
108 | (
109 |   ['sha1-1',  '-W sha1 2',   {IN_PIPE=>$in_g1}, {OUT=>$out_g1_sha1}],
110 |   ['sha224-1','-W sha224 2', {IN_PIPE=>$in_g1}, {OUT=>$out_g1_sha224}],
111 |   ['sha256-1','-W sha256 2', {IN_PIPE=>$in_g1}, {OUT=>$out_g1_sha256}],
112 |   ['sha384-1','-W sha384 2', {IN_PIPE=>$in_g1}, {OUT=>$out_g1_sha384}],
113 |   ['sha512-1','-W sha512 2', {IN_PIPE=>$in_g1}, {OUT=>$out_g1_sha512}],
114 | );
115 | 
116 | my $save_temps = $ENV{SAVE_TEMPS};
117 | my $verbose = $ENV{VERBOSE};
118 | 
119 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
120 | exit $fail;
121 | 


--------------------------------------------------------------------------------
/tests/datamash-io-errors.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | #   Unit Tests for GNU Datamash - I/O error simulation
  3 | 
  4 | #    Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
  5 | #
  6 | #    This file is part of GNU Datamash.
  7 | #
  8 | #    GNU Datamash is free software: you can redistribute it and/or modify
  9 | #    it under the terms of the GNU General Public License as published by
 10 | #    the Free Software Foundation, either version 3 of the License, or
 11 | #    (at your option) any later version.
 12 | #
 13 | #    GNU Datamash is distributed in the hope that it will be useful,
 14 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #    GNU General Public License for more details.
 17 | #
 18 | #    You should have received a copy of the GNU General Public License
 19 | #    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | #    Written by Assaf Gordon
 22 | 
 23 | ##
 24 | ## This script tests datamash's handling of I/O errors.
 25 | ## It requires special setup, and is skipped unless found.
 26 | ##
 27 | 
 28 | . "${test_dir=.}/init.sh"; path_prepend_ ./src
 29 | 
 30 | expensive_
 31 | 
 32 | fail=0
 33 | 
 34 | ##
 35 | ## The required mounted file-systems
 36 | ##
 37 | FULLFS=/tmp/fullfs/
 38 | BADFS=/tmp/badfs/
 39 | 
 40 | which mountpoint >/dev/null 2>&1 ||
 41 |     skip_ "requires mountpoint program"
 42 | stdbuf --version >/dev/null 2>&1 ||
 43 |     skip_ "requires GNU stdbuf program"
 44 | stat --version >/dev/null 2>&1 ||
 45 |     skip_ "requires GNU stat program"
 46 | mountpoint -q "$FULLFS" ||
 47 |     skip_ "requires special mounted file system '$FULLFS'"
 48 | mountpoint -q "$BADFS" ||
 49 |     skip_ "requires special mounted file system '$BADFS'"
 50 | 
 51 | ##
 52 | ## Clean files in the (almost) full file-system.
 53 | ## This will ensure few writes are successful before getting "no space" error
 54 | ## (unlike "/dev/full").
 55 | ##
 56 | clean_full_fs()
 57 | {
 58 |   find "$FULLFS" -maxdepth 1 -type f -delete ||
 59 |     framework_failure_ "failed to clean full-fs"
 60 |   # Give the system time to actually delete the files
 61 |   fullfs_retries=1
 62 |   FREE=0
 63 |   while test $fullfs_retries -lt 5 && test $FREE -le 5 ; do
 64 |     sync ; sleep 1
 65 |     FREE=$(stat --file-system -c %a "$FULLFS") ||
 66 |       framework_failure_ "failed to find free space on $FULLFS"
 67 |     fullfs_retries=$((fullfs_retries+1))
 68 |   done
 69 |   # Ensure the (almost) full file system has a bit of free space...
 70 |   test "$FREE" -gt 5 ||
 71 |     framework_failure_ "almost-full-file system has no free space"
 72 |   # ... but not too much (otherwise the program will not get "no space" errors).
 73 |   test "$FREE" -lt 64 ||
 74 |     framework_failure_ "almost-full-file system has too much free spcae"
 75 | }
 76 | 
 77 | ##
 78 | ## Sanity checks:
 79 | ## 1. Ensure the corrupted file system is corrupted
 80 | cat "$BADFS/numbers.txt" >/dev/null 2>&1 &&
 81 |     framework_failure_ "corrupted file system did not trigger I/O error"
 82 | ## 2. Ensure the (almost) full file system gets full
 83 | clean_full_fs
 84 | seq 10000 >"$FULLFS/test.txt" 2>/dev/null &&
 85 |     framework_failure_ "almost full file system did not trigger no-space error"
 86 | clean_full_fs
 87 | 
 88 | ## Test 1:
 89 | ##  input error, reading file directly
 90 | datamash sum 1 < "$BADFS/numbers.txt" >/dev/null &&
 91 | 	{ warn_ "datamash failed to detect read error" ; fail=1 ; }
 92 | 
 93 | ## Test 2:
 94 | ##  input error, using sort (and popen/pipe)
 95 | datamash -s -g 1 sum 1 < "$BADFS/numbers.txt" >/dev/null &&
 96 | 	{ warn_ "datamash+sort failed to detect read error" ; fail=1 ; }
 97 | 
 98 | ## Test 3:
 99 | ##  output error, default line-buffering
100 | seq 10000 | datamash -g 1 count 1 > "$FULLFS/test.txt" &&
101 | 	{ warn_ "datamash failed to detect no-space error" ; fail=1 ; }
102 | clean_full_fs
103 | 
104 | ## Test 4:
105 | ##  output error, with line-buffering.
106 | ##  This means few of the first "write()" calls will succeed,
107 | ##  and later ones should fail with "no space" (which is different than
108 | ##  writing to "/dev/full").
109 | seq 10000 | stdbuf -oL datamash -g 1 count 1 > "$FULLFS/test.txt" &&
110 | 	{ warn_ "datamash failed to detect no-space error" ; fail=1 ; }
111 | clean_full_fs
112 | 
113 | Exit $fail
114 | 


--------------------------------------------------------------------------------
/tests/datamash-i18n-de.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - check German locale (de_DE.UTF-8).
  4 | 
  5 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon and Timothy Rice.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | 
 28 | # Until a better way comes along to auto-use Coreutils Perl modules
 29 | # as in the coreutils' autotools system.
 30 | use Coreutils;
 31 | use CuSkip;
 32 | use CuTmpdir qw(datamash);
 33 | use MIME::Base64 ;
 34 | 
 35 | ## Skip this test if Deutsche (German) locale not found.
 36 | use POSIX qw(locale_h);
 37 | use locale;
 38 | my $lc_de = setlocale(LC_ALL, "de_DE.utf8");
 39 | CuSkip::skip "requires de_DE.utf8 locale\n"
 40 |    unless defined($lc_de);
 41 | 
 42 | (my $program_name = $0) =~ s|.*/||;
 43 | my $prog_bin = 'datamash';
 44 | 
 45 | ## Cross-Compiling portability hack:
 46 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 47 | ##  the full path of the binary, if the binary is on the $PATH.
 48 | ##  So we try to detect what is the actual returned value of the program
 49 | ##  in case of an error.
 50 | my $prog = `$prog_bin ---print-progname`;
 51 | $prog = $prog_bin unless $prog;
 52 | 
 53 | ## Portability hack
 54 | ## Check if the system's sort supports stable sorting ('-s').
 55 | ## If it doesn't - skip some tests
 56 | my $rc = system("sort -s < /dev/null > /dev/null 2>/dev/null");
 57 | die "testing framework failure: failed to execute sort -s"
 58 |   if ( ($rc == -1) || ($rc & 127) );
 59 | my $sort_exit_code = ($rc >> 8);
 60 | my $have_stable_sort = ($sort_exit_code==0);
 61 | 
 62 | 
 63 | # Deutsche Prüfungen
 64 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('de_DE.utf8') x 3;
 65 | 
 66 | my @Prufungen =
 67 | (
 68 |   # Prüfen Sie, ob das Komma als Dezimaltrennzeichen funktioniert
 69 |   ['de1', 'sum 1',       {IN_PIPE=>"1,1\n"},           {OUT=>"1,1\n"}],
 70 |   ['de2', 'sum 1,2',     {IN_PIPE=>"1,1\t2,2\n"},      {OUT=>"1,1\t2,2\n"}],
 71 |   ['de3', 'count 1,2,3', {IN_PIPE=>"1,1\t2,2\t3,3\n"}, {OUT=>"1\t1\t1\n"}],
 72 | 
 73 |   # There is a bug where the bin operation does not respect
 74 |   # the locale's choice of decimal separator.
 75 |   # TODO: Be able to uncomment the following line.
 76 |   #['de4', 'bin:0,1 1'    {IN_PIPE=>"1,15\n"},          {OUT=>"1,1\n"}],
 77 | 
 78 |   # Comma as field separator is problematic for numeric operations
 79 |   ['de5',  '-t, cut 2,1',         {IN_PIPE=>"1,2\n"},   {OUT=>"2,1\n"}],
 80 |   ['de6',  '-t, unique 1,2',      {IN_PIPE=>"1,2\n"},   {OUT=>"1,2\n"}],
 81 |   ['de7',  '-t, count 1,2',       {IN_PIPE=>"1,2\n"},   {OUT=>"1,1\n"}],
 82 |   ['de8',  '-t, countunique 1,2', {IN_PIPE=>"1,2\n"},   {OUT=>"1,1\n"}],
 83 |   ['de9',  '-t, rmdup 1',         {IN_PIPE=>"1,2\n"},   {OUT=>"1,2\n"}],
 84 |   ['de10', '-t, rmdup 2',         {IN_PIPE=>"1,2\n"},   {OUT=>"1,2\n"}],
 85 |   ['de11', '-t, sum 1,2',         {IN_PIPE=>"1,2\n"},   {OUT=>"1,2\n"}],
 86 |   ['de12', '-t, sum 1,2,3',       {IN_PIPE=>"1,2,3\n"}, {OUT=>"1,2,3\n"}],
 87 |   ['de13', '-st, groupby 1 sum 2,3',
 88 |     {IN_PIPE=>"a,14,1\nb,1,14\na,2,1\n"}, {OUT=>"a,16,2\nb,1,14\n"}],
 89 | 
 90 |   # TODO: make the getnum operation locale-aware
 91 |   #['de14', 'getnum 1',   {IN_PIPE=>"bar-1,2\n"}, {OUT=>"1,2\n"}],
 92 |   #['de15', 'getnum:p 1', {IN_PIPE=>"bar-1,2\n"}, {OUT=>"1,2\n"}],
 93 |   #['de16', 'getnum:d 1', {IN_PIPE=>"bar-1,2\n"}, {OUT=>"-1,2\n"}],
 94 | 
 95 | );
 96 | 
 97 | my $save_temps = $ENV{SAVE_TEMPS};
 98 | my $verbose = $ENV{VERBOSE};
 99 | 
100 | my $fail = run_tests ($program_name, $prog, \@Prufungen, $save_temps, $verbose);
101 | 
102 | exit $fail;
103 | 


--------------------------------------------------------------------------------
/tests/datamash-sort-header.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 | 
  5 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  6 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon and Tim Rice.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | use List::Util qw/max/;
 28 | use Data::Dumper;
 29 | 
 30 | # Until a better way comes along to auto-use Coreutils Perl modules
 31 | # as in the coreutils' autotools system.
 32 | use Coreutils;
 33 | use CuSkip;
 34 | use CuTmpdir qw(datamash);
 35 | 
 36 | (my $program_name = $0) =~ s|.*/||;
 37 | my $prog_bin = 'datamash';
 38 | 
 39 | ## Cross-Compiling portability hack:
 40 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 41 | ##  the full path of the binary, if the binary is on the $PATH.
 42 | ##  So we try to detect what is the actual returned value of the program
 43 | ##  in case of an error.
 44 | my $prog = `$prog_bin --foobar 2>&1 | head -n 1 | cut -f1 -d:`;
 45 | chomp $prog if $prog;
 46 | $prog = $prog_bin unless $prog;
 47 | 
 48 | # Turn off localization of executable's output.
 49 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 50 | 
 51 | # An unsorted input with a header line
 52 | my $INFILE=<<'EOF';
 53 | x y z
 54 | A % 1
 55 | B ( 2
 56 | A & 3
 57 | B = 4
 58 | EOF
 59 | 
 60 | my @INFILE_lines = split /\n/, $INFILE, -1;
 61 | my $INFILE_NO_HEADER = join("\n", @INFILE_lines[1..$#INFILE_lines]);
 62 | 
 63 | # The expected output with different option combinations
 64 | my $exp_no_sort_no_header=<<'EOF';
 65 | x z
 66 | A 1
 67 | B 2
 68 | A 3
 69 | B 4
 70 | EOF
 71 | 
 72 | my $exp_no_sort_in_header=<<'EOF';
 73 | A 1
 74 | B 2
 75 | A 3
 76 | B 4
 77 | EOF
 78 | 
 79 | my $exp_sort_in_header=<<'EOF';
 80 | A 1,3
 81 | B 2,4
 82 | EOF
 83 | 
 84 | my $exp_no_sort_headers=<<'EOF';
 85 | GroupBy(x) unique(z)
 86 | A 1
 87 | B 2
 88 | A 3
 89 | B 4
 90 | EOF
 91 | 
 92 | my $exp_sort_headers=<<'EOF';
 93 | GroupBy(x) unique(z)
 94 | A 1,3
 95 | B 2,4
 96 | EOF
 97 | 
 98 | my $exp_sort_out_header=<<'EOF';
 99 | GroupBy(field-1) unique(field-3)
100 | A 1,3
101 | B 2,4
102 | EOF
103 | 
104 | my @Tests =
105 | (
106 |   # Simple transpose and reverse
107 |   ['sh01',  '-t " " -g 1 unique 3',
108 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_no_sort_no_header}],
109 |   ['sh02',  '-t " " -g 1 --header-in unique 3',
110 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_no_sort_in_header}],
111 |   ['sh03',  '-t " " -g 1 --sort --header-in unique 3',
112 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_sort_in_header}],
113 |   ['sh04',  '-t " " -g 1 --headers unique 3',
114 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_no_sort_headers}],
115 |   ['sh05',  '-t " " -g 1 --sort --headers unique 3',
116 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_sort_headers}],
117 |   ['sh06',  '-t " " -sH -g 1 unique 3',
118 |     {IN_PIPE=>$INFILE}, {OUT=>$exp_sort_headers}],
119 |   ['sh07',  '-t " " --sort --header-out -g 1 unique 3',
120 |     {IN_PIPE=>$INFILE_NO_HEADER}, {OUT=>$exp_sort_out_header}],
121 | 
122 |   # Check sort-piping with empty input - should always produce empty output
123 |   ['sh08',  '-t " " --sort unique 3',
124 |     {IN_PIPE=>""}, {OUT=>""}],
125 |   ['sh09',  '-t " " --sort --header-in unique 3',
126 |     {IN_PIPE=>""}, {OUT=>""}],
127 |   ['sh10',  '-t " " --sort --header-out unique 3',
128 |     {IN_PIPE=>""}, {OUT=>""}],
129 |   ['sh11',  '-t " " --sort --headers unique 3',
130 |     {IN_PIPE=>""}, {OUT=>""}],
131 | 
132 | );
133 | 
134 | my $save_temps = $ENV{SAVE_TEMPS};
135 | my $verbose = $ENV{VERBOSE};
136 | 
137 | my $fail = run_tests ($program_name, $prog_bin, \@Tests, $save_temps, $verbose);
138 | exit $fail;
139 | 


--------------------------------------------------------------------------------
/src/text-options.c:
--------------------------------------------------------------------------------
  1 | /* GNU Datamash - perform simple calculation on input data
  2 | 
  3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  4 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  5 | 
  6 |    This file is part of GNU Datamash.
  7 | 
  8 |    GNU Datamash is free software: you can redistribute it and/or modify
  9 |    it under the terms of the GNU General Public License as published by
 10 |    the Free Software Foundation, either version 3 of the License, or
 11 |    (at your option) any later version.
 12 | 
 13 |    GNU Datamash is distributed in the hope that it will be useful,
 14 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |    GNU General Public License for more details.
 17 | 
 18 |    You should have received a copy of the GNU General Public License
 19 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 20 | */
 21 | 
 22 | /* Written by Assaf Gordon */
 23 | #include <config.h>
 24 | #include <float.h>
 25 | #include <ctype.h>
 26 | #include <stdbool.h>
 27 | 
 28 | #include "system.h"
 29 | 
 30 | #include "die.h"
 31 | #include "double-format.h"
 32 | #include "text-options.h"
 33 | 
 34 | /* The character marking end of line. Default to \n. */
 35 | char eolchar = '\n';
 36 | 
 37 | /* Tab character separating fields.  If TAB_WHITESPACE, then fields are
 38 |    separated by the empty string between a non-blank character and a blank
 39 |    character. */
 40 | int in_tab = '\t';
 41 | int out_tab= '\t';
 42 | 
 43 | /* Global case-sensitivity option. Defaults to 'true' . */
 44 | bool case_sensitive = true;
 45 | 
 46 | /* In the future: allow users to change this */
 47 | char numeric_output_format[MAX_NUMERIC_FORMAT_LEN + 1] = "%.14Lg";
 48 | 
 49 | /* number of bytes to allocate for output buffer */
 50 | int   numeric_output_bufsize = 200;
 51 | 
 52 | /* The character used to separate collapsed/uniqued strings */
 53 | char collapse_separator = ',';
 54 | 
 55 | /* Should NA/NaN/empty values be silengtly ignored? */
 56 | bool remove_na_values = false;
 57 | 
 58 | /* if true, 'transpose' and 'reverse' require every line to have
 59 |    the exact same number of fields. Otherwise, the program
 60 |    will fail with non-zero exit code. */
 61 | bool strict = true;
 62 | 
 63 | /* if 'strict' is false, lines with fewer-than-expected fields
 64 |    will be filled with this value */
 65 | const char* missing_field_filler = "N/A";
 66 | 
 67 | /* if true, skip comments line (lines starting with optional whitespace
 68 |    followed by '#' or ';'. See line_record_is_comment.  */
 69 | bool skip_comments = false;
 70 | 
 71 | bool vnlog = false;
 72 | 
 73 | #define UCHAR_LIM (UCHAR_MAX + 1)
 74 | bool blanks[UCHAR_LIM];
 75 | 
 76 | void
 77 | init_blank_table (void)
 78 | {
 79 |   size_t i;
 80 | 
 81 |   for (i = 0; i < UCHAR_LIM; ++i)
 82 |     {
 83 |       blanks[i] = !! isblank (i);
 84 |     }
 85 | }
 86 | 
 87 | /* Force generation of these inline'd symbols, needed to avoid
 88 |    "undefined reference" when compiling with coverage instrumentation.
 89 |    See: http://stackoverflow.com/a/16245669 */
 90 | void print_field_separator ();
 91 | void print_line_separator ();
 92 | 
 93 | 
 94 | 
 95 | /* Calculate the required size of the output buffer */
 96 | static void
 97 | finalize_numeric_output_buffer ()
 98 | {
 99 |   char c;
100 |   long double d = LDBL_MAX;
101 |   int n = snprintf (&c, 1, numeric_output_format, d);
102 |   numeric_output_bufsize = n + 100 ;
103 | }
104 | 
105 | void
106 | set_numeric_output_precision (const char* digits)
107 | {
108 |   long int l;
109 |   char *p;
110 | 
111 |   if (digits == NULL || digits[0] == '\0')
112 |     die (EXIT_FAILURE, 0, _("missing rounding digits value"));
113 | 
114 |   errno = 0;
115 |   l = strtol (digits, &p, 10);
116 |   if (errno != 0 || *p != '\0' || l <=0 || l> 50)
117 |     die (EXIT_FAILURE, 0, _("invalid rounding digits value %s"),
118 |          quote (digits));
119 | 
120 |   snprintf (numeric_output_format, sizeof (numeric_output_format), "%%.%dLf",
121 |             (int)l);
122 | 
123 |   finalize_numeric_output_buffer ();
124 | }
125 | 
126 | void
127 | set_numeric_printf_format (const char* format)
128 | {
129 |   char *new_format = validate_double_format (format);
130 |   snprintf (numeric_output_format, sizeof (numeric_output_format), "%s",
131 |             new_format);
132 |   free (new_format);
133 |   finalize_numeric_output_buffer ();
134 | }
135 | 


--------------------------------------------------------------------------------
/tests/datamash-check-tabular.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 |   Tests for 'transpose' and 'reverse' operation modes.
  5 | 
  6 | 
  7 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  8 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  9 | 
 10 |    This file is part of GNU Datamash.
 11 | 
 12 |    GNU Datamash is free software: you can redistribute it and/or modify
 13 |    it under the terms of the GNU General Public License as published by
 14 |    the Free Software Foundation, either version 3 of the License, or
 15 |    (at your option) any later version.
 16 | 
 17 |    GNU Datamash is distributed in the hope that it will be useful,
 18 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |    GNU General Public License for more details.
 21 | 
 22 |    You should have received a copy of the GNU General Public License
 23 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 24 | 
 25 |    Written by Assaf Gordon.
 26 | =cut
 27 | use strict;
 28 | use warnings;
 29 | use List::Util qw/max/;
 30 | use Data::Dumper;
 31 | 
 32 | # Until a better way comes along to auto-use Coreutils Perl modules
 33 | # as in the coreutils' autotools system.
 34 | use Coreutils;
 35 | use CuSkip;
 36 | use CuTmpdir qw(datamash);
 37 | 
 38 | (my $program_name = $0) =~ s|.*/||;
 39 | my $prog_bin = 'datamash';
 40 | 
 41 | my $prog = `$prog_bin ---print-progname`;
 42 | 
 43 | # Turn off localization of executable's output.
 44 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 45 | 
 46 | my $in1=<<'EOF';
 47 | a	x	1
 48 | b	z	6
 49 | c	x	7
 50 | EOF
 51 | 
 52 | # missing field on second line
 53 | my $in2=<<'EOF';
 54 | a	x	1
 55 | b	z
 56 | c	x	7
 57 | EOF
 58 | 
 59 | # Same as in2, with whitespace delimiters
 60 | my $in2_ws=<<"EOF";
 61 | a    x  \t  1
 62 |   b   \t  z
 63 | c x 7
 64 | EOF
 65 | 
 66 | # second line has 2 tab characters, thus 3 fields
 67 | # (the last field is empty).
 68 | # version 1.1.0 and before rejected such input.
 69 | my $in3=<<"EOF";
 70 | a	x	1
 71 | b	z\t
 72 | c	x	7
 73 | EOF
 74 | 
 75 | # Same as in3, with whitespace delimiters
 76 | my $in3_ws=<<"EOF";
 77 | a     x  \t  1
 78 | b \t  z  \t
 79 |    c\t\t\tx     \t  7
 80 | EOF
 81 | 
 82 | 
 83 | # one line
 84 | my $in4=<<'EOF';
 85 | a	x	1
 86 | EOF
 87 | 
 88 | # one field
 89 | my $in5=<<'EOF';
 90 | a
 91 | b
 92 | c
 93 | d
 94 | EOF
 95 | 
 96 | # one field, with bad input (fourth line has 0 fields)
 97 | my $in6=<<'EOF';
 98 | a
 99 | b
100 | c
101 | 
102 | e
103 | EOF
104 | 
105 | my @Tests =
106 | (
107 |   ['c1', 'check', {IN_PIPE=>$in1}, {OUT=>"3 lines, 3 fields\n"}],
108 | 
109 |   ['c2', 'check', {IN_PIPE=>$in4}, {OUT=>"1 line, 3 fields\n"}],
110 |   ['c3', 'check', {IN_PIPE=>$in5}, {OUT=>"4 lines, 1 field\n"}],
111 |   ['c4', 'check', {IN_PIPE=>$in3}, {OUT=>"3 lines, 3 fields\n"}],
112 |   ['c5', '-W check', {IN_PIPE=>$in3_ws}, {OUT=>"3 lines, 3 fields\n"}],
113 | 
114 |   # Check bad input:
115 |   # The first four lines will be something like:
116 |   #   'line X has N fields:'
117 |   #   '  [content of line X]'
118 |   #   'line Y has M fields:'
119 |   #   '  [content of line Y]'
120 |   # The ERR_SUBSTR will remove these messages, as they are highly variable
121 |   # and dependant on the input. Then only the last line of error message
122 |   # is checked.
123 |   ['e1', 'check', {IN_PIPE=>$in2}, {EXIT=>1},
124 |     {ERR_SUBST => 's/^(li|  ).*$//'},
125 |     {ERR => "\n\n\n\n$prog: check failed: line 2 has 2 fields " .
126 |                            "(previous line had 3)\n"}],
127 |   ['e1ws', '-W check', {IN_PIPE=>$in2_ws}, {EXIT=>1},
128 |     {ERR_SUBST => 's/^(li|  ).*$//'},
129 |     {ERR => "\n\n\n\n$prog: check failed: line 2 has 2 fields " .
130 |                            "(previous line had 3)\n"}],
131 | 
132 |   ['e2', 'check', {IN_PIPE=>$in6}, {EXIT=>1},
133 |     {ERR_SUBST => 's/^(li|  ).*$//'},
134 |     {ERR => "\n\n\n\n$prog: check failed: line 4 has 0 fields " .
135 |                            "(previous line had 1)\n"}],
136 |   ['e2ws', '-W check', {IN_PIPE=>$in6}, {EXIT=>1},
137 |     {ERR_SUBST => 's/^(li|  ).*$//'},
138 |     {ERR => "\n\n\n\n$prog: check failed: line 4 has 0 fields " .
139 |                            "(previous line had 1)\n"}],
140 | );
141 | 
142 | my $save_temps = $ENV{SAVE_TEMPS};
143 | my $verbose = $ENV{VERBOSE};
144 | 
145 | my $fail = run_tests ($program_name, $prog_bin, \@Tests, $save_temps, $verbose);
146 | exit $fail;
147 | 


--------------------------------------------------------------------------------
/m4/.gitignore:
--------------------------------------------------------------------------------
  1 | *~
  2 | /00gnulib.m4
  3 | /absolute-header.m4
  4 | /af_alg.m4
  5 | /alloca.m4
  6 | /arpa_inet_h.m4
  7 | /assert.m4
  8 | /base64.m4
  9 | /byteswap.m4
 10 | /calloc.m4
 11 | /ceill.m4
 12 | /ceil.m4
 13 | /check-math-lib.m4
 14 | /close.m4
 15 | /codeset.m4
 16 | /configmake.m4
 17 | /ctype_h.m4
 18 | /ctype.m4
 19 | /double-slash-root.m4
 20 | /dup2.m4
 21 | /eealloc.m4
 22 | /errno_h.m4
 23 | /error.m4
 24 | /expl.m4
 25 | /exp.m4
 26 | /exponentd.m4
 27 | /exponentf.m4
 28 | /exponentl.m4
 29 | /extensions.m4
 30 | /extern-inline.m4
 31 | /extern-inline.m4~
 32 | /fabsl.m4
 33 | /fabs.m4
 34 | /fcntl_h.m4
 35 | /fcntl.m4
 36 | /fcntl-o.m4
 37 | /fflush.m4
 38 | /flexmember.m4
 39 | /float_h.m4
 40 | /floorl.m4
 41 | /floor.m4
 42 | /fpending.m4
 43 | /fpieee.m4
 44 | /fpurge.m4
 45 | /freading.m4
 46 | /free.m4
 47 | /frexpl.m4
 48 | /frexp.m4
 49 | /fseek.m4
 50 | /fseeko.m4
 51 | /fstat.m4
 52 | /ftell.m4
 53 | /ftello.m4
 54 | /getdtablesize.m4
 55 | /getlocalename_l.m4
 56 | /getopt.m4
 57 | /getprogname.m4
 58 | /getrandom.m4
 59 | /gettext.m4
 60 | /glibc21.m4
 61 | /glibc2.m4
 62 | /gl-openssl.m4
 63 | /gnulib-cache.m4
 64 | /gnulib-common.m4
 65 | /gnulib-comp.m4
 66 | /gnulib-tool.m4
 67 | /host-cpu-c-abi.m4
 68 | /iconv_h.m4
 69 | /iconv.m4
 70 | /iconv_open.m4
 71 | /include_next.m4
 72 | /inet_pton.m4
 73 | /init-package-version.m4
 74 | /__inline.m4
 75 | /inline.m4
 76 | /intdiv0.m4
 77 | /intldir.m4
 78 | /intl.m4
 79 | /intlmacosx.m4
 80 | /intl-thread-locale.m4
 81 | /intmax.m4
 82 | /intmax_t.m4
 83 | /inttostr.m4
 84 | /inttypes_h.m4
 85 | /inttypes.m4
 86 | /inttypes-pri.m4
 87 | /isblank.m4
 88 | /isfinite.m4
 89 | /isinf.m4
 90 | /isnand.m4
 91 | /isnanf.m4
 92 | /isnanl.m4
 93 | /iswblank.m4
 94 | /iswdigit.m4
 95 | /iswxdigit.m4
 96 | /largefile.m4
 97 | /lcmessage.m4
 98 | /ldexpl.m4
 99 | /ldexp.m4
100 | /lib-ld.m4
101 | /lib-link.m4
102 | /lib-prefix.m4
103 | /libunistring-base.m4
104 | /limits-h.m4
105 | /localcharset.m4
106 | /localeconv.m4
107 | /locale-fr.m4
108 | /locale_h.m4
109 | /locale-ja.m4
110 | /localename.m4
111 | /locale-zh.m4
112 | /lock.m4
113 | /logl.m4
114 | /log.m4
115 | /longlong.m4
116 | /lseek.m4
117 | /malloca.m4
118 | /malloc.m4
119 | /mathfunc.m4
120 | /math_h.m4
121 | /mbchar.m4
122 | /mbiter.m4
123 | /mbrtowc.m4
124 | /mbsinit.m4
125 | /mbslen.m4
126 | /mbstate_t.m4
127 | /md5.m4
128 | /memchr.m4
129 | /minmax.m4
130 | /mmap-anon.m4
131 | /mode_t.m4
132 | /modfl.m4
133 | /modf.m4
134 | /msvc-inval.m4
135 | /msvc-nothrow.m4
136 | /multiarch.m4
137 | /netinet_in_h.m4
138 | /nls.m4
139 | /nocrash.m4
140 | /off_t.m4
141 | /open-cloexec.m4
142 | /open.m4
143 | /open-slash.m4
144 | /pathmax.m4
145 | /pclose.m4
146 | /pid_t.m4
147 | /po.m4
148 | /popen.m4
149 | /printf.m4
150 | /printf-posix.m4
151 | /progtest.m4
152 | /pthread_rwlock_rdlock.m4
153 | /quotearg.m4
154 | /random.m4
155 | /random_r.m4
156 | /reallocarray.m4
157 | /realloc.m4
158 | /roundl.m4
159 | /round.m4
160 | /setlocale.m4
161 | /setlocale_null.m4
162 | /sha1.m4
163 | /sha256.m4
164 | /sha512.m4
165 | /signbit.m4
166 | /size_max.m4
167 | /snprintf.m4
168 | /socklen.m4
169 | /sockpfaf.m4
170 | /sqrtl.m4
171 | /sqrt.m4
172 | /ssize_t.m4
173 | /stat.m4
174 | /stat-time.m4
175 | /stdalign.m4
176 | /stdarg.m4
177 | /stdbool.m4
178 | /stddef_h.m4
179 | /std-gnu11.m4
180 | /stdint_h.m4
181 | /stdint.m4
182 | /stdio_h.m4
183 | /stdlib_h.m4
184 | /stdnoreturn.m4
185 | /stpcpy.m4
186 | /strcasecmp.m4
187 | /strdup.m4
188 | /strerror.m4
189 | /string_h.m4
190 | /strings_h.m4
191 | /strncasecmp.m4
192 | /strndup.m4
193 | /strnlen.m4
194 | /strsep.m4
195 | /strtod.m4
196 | /strtold.m4
197 | /strtoll.m4
198 | /strtoull.m4
199 | /strtoumax.m4
200 | /sys_random_h.m4
201 | /sys_socket_h.m4
202 | /sys_stat_h.m4
203 | /sys_types_h.m4
204 | /sys_uio_h.m4
205 | /sys_wait_h.m4
206 | /threadlib.m4
207 | /time_h.m4
208 | /truncl.m4
209 | /trunc.m4
210 | /uintmax_t.m4
211 | /ungetc.m4
212 | /unistd_h.m4
213 | /unlocked-io.m4
214 | /vararrays.m4
215 | /vasnprintf.m4
216 | /version-etc.m4
217 | /visibility.m4
218 | /waitpid.m4
219 | /warnings.m4
220 | /warn-on-use.m4
221 | /wchar_h.m4
222 | /wchar_t.m4
223 | /wctype_h.m4
224 | /wcwidth.m4
225 | /wint_t.m4
226 | /xalloc.m4
227 | /xsize.m4
228 | /xstrndup.m4
229 | /xstrtol.m4
230 | /year2038.m4
231 | /zzgnulib.m4
232 | /fseterr.m4
233 | /gettext_h.m4
234 | /lstat.m4
235 | /stdckdint_h.m4
236 | /stringeq.m4
237 | /unitypes_h.m4
238 | /assert_h.m4
239 | /build-to-host.m4
240 | /c-bool.m4
241 | /c32rtomb.m4
242 | /error_h.m4
243 | /extensions-aix.m4
244 | /gnulib-i18n.m4
245 | /iswpunct.m4
246 | /locale-en.m4
247 | /mbrtoc32.m4
248 | /mempcpy.m4
249 | /musl.m4
250 | /off64_t.m4
251 | /once.m4
252 | /pthread-once.m4
253 | /pthread-spin.m4
254 | /pthread_h.m4
255 | /sched_h.m4
256 | /sys_cdefs_h.m4
257 | /uchar_h.m4
258 | /unicase_h.m4
259 | /unictype_h.m4
260 | /uninorm_h.m4
261 | 


--------------------------------------------------------------------------------
/bootstrap.conf:
--------------------------------------------------------------------------------
  1 | # Bootstrap configuration.
  2 | 
  3 | # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2017
  4 | # Free Software Foundation, Inc.
  5 | 
  6 | # Modifications for GNU Datamash are
  7 | # Copyright (C) 2014-2021 Assaf Gordon <assafgordon@gmail.com>
  8 | 
  9 | # This program is free software: you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation; either version 3 of the License, or
 12 | # (at your option) any later version.
 13 | 
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | # GNU General Public License for more details.
 18 | 
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 21 | #
 22 | 
 23 | # gnulib modules used by this package.
 24 | gnulib_modules="
 25 |     announce-gen
 26 |     assert
 27 |     base64
 28 |     calloc-gnu
 29 |     c-ctype
 30 |     ceill
 31 |     closeout
 32 |     configmake
 33 |     crypto/sha1
 34 |     crypto/sha256
 35 |     crypto/sha512
 36 |     crypto/md5
 37 |     dirname
 38 |     do-release-commit-and-tag
 39 |     dup2
 40 |     errno
 41 |     error
 42 |     expl
 43 |     extensions
 44 |     fabsl
 45 |     floorl
 46 |     fpucw
 47 |     gendocs
 48 |     getopt-gnu
 49 |     getrandom
 50 |     gettext-h
 51 |     gitlog-to-changelog
 52 |     git-version-gen
 53 |     gnupload
 54 |     gnu-web-doc-update
 55 |     hard-locale
 56 |     hash
 57 |     hash-pjw
 58 |     hash-pjw-bare
 59 |     ignore-value
 60 |     inet_pton
 61 |     inline
 62 |     inttypes
 63 |     intprops
 64 |     inttostr
 65 |     isblank
 66 |     isnanl
 67 |     linebuffer
 68 |     locale
 69 |     localeconv
 70 |     logl
 71 |     maintainer-makefile
 72 |     minmax
 73 |     modfl
 74 |     isnanl
 75 |     netinet_in
 76 |     pclose
 77 |     pmccabe2html
 78 |     popen
 79 |     progname
 80 |     propername
 81 |     random
 82 |     readme-release
 83 |     realloc-gnu
 84 |     roundl
 85 |     setlocale
 86 |     signbit
 87 |     sh-quote
 88 |     size_max
 89 |     snprintf
 90 |     sqrtl
 91 |     std-gnu11
 92 |     stdbool
 93 |     stdint
 94 |     stdnoreturn
 95 |     stpcpy
 96 |     strcase
 97 |     strdup-posix
 98 |     strsep
 99 |     strtold
100 |     strtoll
101 |     sys_random
102 |     sys_socket
103 |     unlocked-io
104 |     update-copyright
105 |     version-etc
106 |     warnings
107 |     waitpid
108 |     xalloc
109 |     xstrtol
110 |     xstrtol-error
111 |     xstrtoumax
112 | "
113 | 
114 | # Additional xgettext options to use.  Use "\\\newline" to break lines.
115 | XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
116 |  --from-code=UTF-8\\\
117 |  --flag=asprintf:2:c-format --flag=vasprintf:2:c-format\\\
118 |  --flag=asnprintf:3:c-format --flag=vasnprintf:3:c-format\\\
119 |  --flag=wrapf:1:c-format\\\
120 | '
121 | 
122 | # If "AM_GNU_GETTEXT(external" or "AM_GNU_GETTEXT([external]"
123 | # appears in configure.ac, exclude some unnecessary files.
124 | # Without grep's -E option (not portable enough, pre-configure),
125 | # the following test is ugly.  Also, this depends on the existence
126 | # of configure.ac, not the obsolescent-named configure.in.  But if
127 | # you're using this infrastructure, you should care about such things.
128 | 
129 | gettext_external=0
130 | grep '^[	 ]*AM_GNU_GETTEXT(external\>' configure.ac > /dev/null &&
131 |   gettext_external=1
132 | grep '^[	 ]*AM_GNU_GETTEXT(\[external\]' configure.ac > /dev/null &&
133 |   gettext_external=1
134 | 
135 | if test $gettext_external = 1; then
136 |   # Gettext supplies these files, but we don't need them since
137 |   # we don't have an intl subdirectory.
138 |   excluded_files='
139 |       m4/glibc2.m4
140 |       m4/intdiv0.m4
141 |       m4/lcmessage.m4
142 |       m4/lock.m4
143 |       m4/printf-posix.m4
144 |       m4/size_max.m4
145 |       m4/uintmax_t.m4
146 |       m4/ulonglong.m4
147 |       m4/visibility.m4
148 |       m4/xsize.m4
149 |   '
150 | fi
151 | 
152 | gnulib_tool_option_extras="--makefile-name=gnulib.mk --automake-subdir"
153 | 
154 | # Build prerequisites
155 | buildreq="\
156 | autoconf   2.69
157 | automake   1.11.1
158 | autopoint  0.19.4
159 | git        1.5.5
160 | gettext    0.19.4
161 | gperf      -
162 | gzip       -
163 | makeinfo   -
164 | perl       5.8
165 | pkg-config 0.28
166 | tar        -
167 | "
168 | 
169 | bootstrap_post_import_hook ()
170 | {
171 |   # Automake requires that ChangeLog exist.
172 |   touch ChangeLog || return 1
173 | }
174 | 
175 | # File that should exist in the top directory of a checked out hierarchy,
176 | # but not in a distribution tarball.
177 | checkout_only_file=HACKING.md
178 | 


--------------------------------------------------------------------------------
/tests/datamash-output-format.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 | 
  5 |    Copyright (C) 2018-2021 Assaf Gordon <assafgordon@gmail.com
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | 
 28 | ##
 29 | ## This script tests output format options
 30 | ##
 31 | 
 32 | 
 33 | # Until a better way comes along to auto-use Coreutils Perl modules
 34 | # as in the coreutils' autotools system.
 35 | use Coreutils;
 36 | use CuSkip;
 37 | use CuTmpdir qw(datamash);
 38 | 
 39 | (my $program_name = $0) =~ s|.*/||;
 40 | my $prog = 'datamash';
 41 | 
 42 | # TODO: add localization tests with "grouping"
 43 | # Turn off localization of executable's output.
 44 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 45 | 
 46 | my $in1=<<'EOF';
 47 | 1.000004
 48 | 0.000005
 49 | EOF
 50 | 
 51 | my @Tests =
 52 | (
 53 |   # Test Rouding
 54 |   ['r1', 'sum 1' ,  {IN_PIPE=>$in1},  {OUT => "1.000009\n"}],
 55 |   ['r2', '--round 1 sum 1' ,  {IN_PIPE=>$in1},  {OUT => "1.0\n"}],
 56 |   ['r3', '--round 3 sum 1' ,  {IN_PIPE=>$in1},  {OUT => "1.000\n"}],
 57 |   ['r4', '--round 5 sum 1' ,  {IN_PIPE=>$in1},  {OUT => "1.00001\n"}],
 58 |   ['r5', '--round 6 sum 1' ,  {IN_PIPE=>$in1},  {OUT => "1.000009\n"}],
 59 |   ['r6', '--round 7 sum 1' ,  {IN_PIPE=>$in1},  {OUT => "1.0000090\n"}],
 60 | 
 61 |   # Test short rounding option
 62 |   ['r7', '-R 7 sum 1',        {IN_PIPE=>$in1},  {OUT => "1.0000090\n"}],
 63 | 
 64 |   # Test multiple rounding options
 65 |   ['r8', '--round 3 -R 7 sum 1',   {IN_PIPE=>$in1},  {OUT => "1.0000090\n"}],
 66 |   ['r9', '--round 7 -R 3 sum 1',   {IN_PIPE=>$in1},  {OUT => "1.000\n"}],
 67 | 
 68 | 
 69 |   # Test Custom formats: %f
 70 |   ['f1', '--format "%07.3f" sum 1',  {IN_PIPE=>$in1},  {OUT => "001.000\n"}],
 71 |   ['f2', '--format "%.7f"   sum 1',  {IN_PIPE=>$in1},  {OUT => "1.0000090\n"}],
 72 |   ['f3', '--format "%10f"   sum 1',  {IN_PIPE=>$in1},  {OUT => "  1.000009\n"}],
 73 |   ['f4', '--format "%-10f"  sum 1',  {IN_PIPE=>$in1},  {OUT => "1.000009  \n"}],
 74 |   ['f5', '--format "%+10f"  sum 1',  {IN_PIPE=>$in1},  {OUT => " +1.000009\n"}],
 75 |   # Test %#f (alternate form: always show decimal point)
 76 |   ['f6', '--format "%.0f"   sum 1',  {IN_PIPE=>$in1},  {OUT => "1\n"}],
 77 |   ['f7', '--format "%#.0f"  sum 1',  {IN_PIPE=>$in1},  {OUT => "1.\n"}],
 78 | 
 79 |   # Test Custom formats: %g
 80 |   ['g1', '--format "%g"    sum 1',  {IN_PIPE=>$in1},  {OUT => "1.00001\n"}],
 81 |   ['g2', '--format "%10g"  sum 1',  {IN_PIPE=>$in1},  {OUT => "   1.00001\n"}],
 82 |   ['g3', '--format "%010g" sum 1',  {IN_PIPE=>$in1},  {OUT => "0001.00001\n"}],
 83 |   ['g4', '--format "%.10g" sum 1',  {IN_PIPE=>$in1},  {OUT => "1.000009\n"}],
 84 |   ['g5', '--format "%.3g"  sum 1',  {IN_PIPE=>$in1},  {OUT => "1\n"}],
 85 |   # Test %#g (alternate form: don't trim zero decimal digits)
 86 |   ['g6', '--format "%.4g"  sum 1',  {IN_PIPE=>$in1},  {OUT => "1\n"}],
 87 |   ['g7', '--format "%#.4g" sum 1',  {IN_PIPE=>$in1},  {OUT => "1.000\n"}],
 88 | 
 89 |   # Test Custom formats: %e
 90 |   ['e1', '--format "%e"    sum 1', {IN_PIPE=>$in1}, {OUT=>"1.000009e+00\n"}],
 91 |   ['e2', '--format "%.3e"  sum 1', {IN_PIPE=>$in1}, {OUT=>"1.000e+00\n"}],
 92 | 
 93 |   # Test Custom formats: %a
 94 |   # Disable the test for now. Valid output can differ (e.g. 0x8.000p-3 and
 95 |   # 0x1.000p0 ).
 96 |   # ['a1', '--format "%0.3a" sum 1', {IN_PIPE=>$in1}, {OUT=>"0x8.000p-3\n"}],
 97 | 
 98 | 
 99 |   # Custom formats can use lots of memory
100 |   ['m1', '--format "%04000.0f"   sum 1',  {IN_PIPE=>$in1},
101 |     {OUT => "0" x 3999 . "1\n"}],
102 | 
103 |   # due to binary floating representation, some decimal point digits won't be
104 |   # zero (e.g. 1.0000090000000000000000000000000523453254320000000... or
105 |   # 1.000008999999...).
106 |   # The OUT_SUBST replaces exactly 3995 digits (as expected from the format)
107 |   # with an "X".
108 |   ['m2', '--format "%.4000f"   sum 1',  {IN_PIPE=>$in1},
109 |     {OUT => "1.00000X\n"},
110 |     {OUT_SUBST => 's/^(1\.00000)([0-9]{3995})$/\1X/'}],
111 | );
112 | 
113 | 
114 | my $save_temps = $ENV{SAVE_TEMPS};
115 | my $verbose = $ENV{VERBOSE};
116 | 
117 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
118 | exit $fail;
119 | 


--------------------------------------------------------------------------------
/src/op-defs.h:
--------------------------------------------------------------------------------
  1 | /* GNU Datamash - perform simple calculation on input data
  2 | 
  3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  4 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  5 | 
  6 |    This file is part of GNU Datamash.
  7 | 
  8 |    GNU Datamash is free software: you can redistribute it and/or modify
  9 |    it under the terms of the GNU General Public License as published by
 10 |    the Free Software Foundation, either version 3 of the License, or
 11 |    (at your option) any later version.
 12 | 
 13 |    GNU Datamash is distributed in the hope that it will be useful,
 14 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |    GNU General Public License for more details.
 17 | 
 18 |    You should have received a copy of the GNU General Public License
 19 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 20 | */
 21 | 
 22 | /* Written by Assaf Gordon */
 23 | #ifndef __OPERATION_DEFINITONS_H__
 24 | #define __OPERATION_DEFINITONS_H__
 25 | 
 26 | enum field_operation
 27 | {
 28 |   OP_INVALID = -1,
 29 |   OP_COUNT = 0,
 30 |   OP_SUM,
 31 |   OP_MIN,
 32 |   OP_MAX,
 33 |   OP_ABSMIN,
 34 |   OP_ABSMAX,
 35 |   OP_RANGE,
 36 |   OP_FIRST,
 37 |   OP_LAST,
 38 |   OP_RAND,
 39 |   OP_MEAN,
 40 |   OP_GEOMEAN,
 41 |   OP_HARMMEAN,
 42 |   OP_MS,
 43 |   OP_RMS,
 44 |   OP_MEDIAN,
 45 |   OP_QUARTILE_1,
 46 |   OP_QUARTILE_3,
 47 |   OP_IQR,       /* Inter-quartile range */
 48 |   OP_PERCENTILE,
 49 |   OP_PSTDEV,    /* Population Standard Deviation */
 50 |   OP_SSTDEV,    /* Sample Standard Deviation */
 51 |   OP_PVARIANCE, /* Population Variance */
 52 |   OP_SVARIANCE, /* Sample Variance */
 53 |   OP_MAD,       /* MAD - Median Absolute Deviation, with adjustment constant of
 54 |                    1.4826 for normal distribution */
 55 |   OP_MADRAW,    /* MAD (same as above), with constant=1 */
 56 |   OP_S_SKEWNESS,/* Sample Skewness */
 57 |   OP_P_SKEWNESS,/* Population Skewness */
 58 |   OP_S_EXCESS_KURTOSIS, /* Sample Excess Kurtosis */
 59 |   OP_P_EXCESS_KURTOSIS, /* Population Excess Kurtosis */
 60 |   OP_JARQUE_BERA,   /* Jarque-Bera test of normality */
 61 |   OP_DP_OMNIBUS,    /* D'Agostino-Pearson omnibus test of normality */
 62 |   OP_MODE,
 63 |   OP_ANTIMODE,
 64 |   OP_UNIQUE,        /* Collapse Unique string into comma separated values */
 65 |   OP_COLLAPSE,      /* Collapse strings into comma separated values */
 66 |   OP_COUNT_UNIQUE,  /* count number of unique values */
 67 |   OP_BASE64,        /* Encode Field to Base64 */
 68 |   OP_DEBASE64,      /* Decode Base64 field */
 69 |   OP_MD5,           /* Calculate MD5 of a field */
 70 |   OP_SHA1,          /* Calculate SHA1 of a field */
 71 |   OP_SHA224,        /* Calculate SHA224 of a field */
 72 |   OP_SHA256,        /* Calculate SHA256 of a field */
 73 |   OP_SHA384,        /* Calculate SHA384 of a field */
 74 |   OP_SHA512,        /* Calculate SHA512 of a field */
 75 |   OP_P_COVARIANCE,  /* Population Covariance */
 76 |   OP_S_COVARIANCE,  /* Sample Covariance */
 77 |   OP_P_PEARSON_COR, /* Pearson Correlation Coefficient (population) */
 78 |   OP_S_PEARSON_COR, /* Pearson Correlation Coefficient (sample) */
 79 |   OP_DOT_PRODUCT,   /* Scalar Product */
 80 |   OP_BIN_BUCKETS,   /* numeric binning operation */
 81 |   OP_STRBIN,        /* String hash/binning */
 82 |   OP_FLOOR,         /* Floor */
 83 |   OP_CEIL,          /* Ceiling */
 84 |   OP_ROUND,         /* Round */
 85 |   OP_TRUNCATE,      /* Truncate */
 86 |   OP_FRACTION,      /* Fraction */
 87 |   OP_TRIMMED_MEAN,  /* Trimmed Mean */
 88 |   OP_DIRNAME,       /* like dirname (1) */
 89 |   OP_BASENAME,      /* like basename (1) */
 90 |   OP_EXTNAME,       /* guess extension of file name */
 91 |   OP_BARENAME,      /* like basename without the guessed extension  */
 92 |   OP_GETNUM,        /* Extract a number from a string */
 93 |   OP_CUT            /* like cut (1) */
 94 | };
 95 | 
 96 | enum processing_mode
 97 | {
 98 |   MODE_INVALID = -1,
 99 |   MODE_GROUPBY = 0,   /* Group By similar keys */
100 |   MODE_TRANSPOSE,     /* transpose */
101 |   MODE_REVERSE,       /* reverse fields in each line */
102 |   MODE_PER_LINE,      /* Operations on each line, no grouping */
103 |   MODE_REMOVE_DUPS,   /* Remove duplicated keys from a file */
104 |   MODE_CROSSTAB,      /* Cross tabulation (aka pivot tables) */
105 |   MODE_TABULAR_CHECK, /* Verif the file has tabular format */
106 |   MODE_NOOP           /* Do nothing. Used for testing and profiling */
107 | };
108 | 
109 | /* Given a text string, returns the matching operation, or OP_INVALID.
110 |    if 'mode' is not NULL, stores the implied processing mode
111 |    (e.g. sum=>MODE_GROUPBY,  md5=>MODE_PER_LINE). */
112 | enum field_operation
113 | get_field_operation (const char* s, enum processing_mode* /*out*/ mode);
114 | 
115 | const char*
116 | get_field_operation_name (enum field_operation op);
117 | 
118 | /* Given a text string,
119 |    returns the matching processing mode, or MODE_INVALID. */
120 | enum processing_mode
121 | get_processing_mode (const char* s);
122 | 
123 | const char*
124 | get_processing_mode_name (enum processing_mode m);
125 | 
126 | #endif
127 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 |   Copyright (C) 2005, 2006, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
  2 |   2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Free Software Foundation, Inc.
  3 | 
  4 |   Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  5 |   Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  6 | 
  7 |   Copying and distribution of this file, with or without modification,
  8 |   are permitted in any medium without royalty provided the copyright
  9 |   notice and this notice are preserved.
 10 | 
 11 | GNU Datamash
 12 | ============
 13 | 
 14 | GNU Datamash is a command-line program which performs basic
 15 | numeric,textual and statistical operations on input textual data files.
 16 | 
 17 | it is designed to be portable and reliable, and aid researchers
 18 | to easily automate analysis pipelines, without writing code or even
 19 | short scripts.
 20 | 
 21 | Home page: https://www.gnu.org/software/datamash
 22 | 
 23 | 
 24 | Usage
 25 | =====
 26 | 
 27 | See `datamash --help` for basic usage information.
 28 | 
 29 | See `man datamash` for examples and operation details.
 30 | 
 31 | For the instrucions manual, see `info datamash` or visit
 32 |   https://www.gnu.org/software/datamash/manual/
 33 | 
 34 | 
 35 | 
 36 | Examples
 37 | ========
 38 | 
 39 | What's the sum and mean of the values in field 1 ?
 40 | 
 41 |     $ seq 10 | datamash sum 1 mean 1
 42 |     55 5.5
 43 | 
 44 | Given a file with three columns (Name, College Major, Score),
 45 | what is the average, grouped by college major?
 46 | 
 47 |     $ cat scores.txt
 48 |     John       Life-Sciences    91
 49 |     Dilan      Health-Medicine  84
 50 |     Nathaniel  Arts             88
 51 |     Antonio    Engineering      56
 52 |     Kerris     Business         82
 53 |     ...
 54 | 
 55 | 
 56 |     # Sort input and group by column 2, calculate average on column 3:
 57 | 
 58 |     $ datamash --sort --group 2  mean 3 < scores.txt
 59 |     Arts             68.9474
 60 |     Business         87.3636
 61 |     Health-Medicine  90.6154
 62 |     Social-Sciences  60.2667
 63 |     Life-Sciences    55.3333
 64 |     Engineering      66.5385
 65 | 
 66 | See more examples at https://www.gnu.org/software/datamash/examples/
 67 | 
 68 | 
 69 | Download and Installation
 70 | =========================
 71 | 
 72 | Download the latest source code at https://www.gnu.org/software/datamash .
 73 | 
 74 | General installation commands:
 75 | 
 76 |     $ tar -xzf datamash-[VERSION].tar.gz
 77 |     $ cd datamash-[VERSION]
 78 |     $ ./configure
 79 |     $ make
 80 |     $ make check
 81 |     $ sudo make install
 82 | 
 83 | Also see INSTALL.
 84 | 
 85 | See Platform/OS-specific download instructions at
 86 |   https://www.gnu.org/software/datamash/download/
 87 | 
 88 | 
 89 | To build from latest git sources, see the HACKING.md file. This file is
 90 | available when cloning from git, but is not distributed in the tar archive.
 91 | To clone the git repository run
 92 |     git clone git://git.savannah.gnu.org/datamash.git
 93 | It is also available online at
 94 |     https://git.savannah.gnu.org/cgit/datamash.git/tree/HACKING.md
 95 | 
 96 | 
 97 | BASH Auto-completion
 98 | ====================
 99 | 
100 | The datamash package inclueds a bash auto-completion script.
101 | The installation location can be controlled using
102 | 
103 |     ./configure --with-bash-completion-dir=[no|local|global|PATH]
104 | 
105 | The options are:
106 | 
107 | * local  - install under the package's $PREFIX path.
108 |            typically `/usr/local/share/datamash/bash-completion.d/` ,
109 |            but can be changed with `./configure --prefix`.
110 |            This is the default.
111 | 
112 | * no     - do not install the bash completion script.
113 | 
114 | * [PATH] - install into the PATH specified on the command line, e.g.
115 |            `./configure --with-bash-completion-dir=/for/bar/bash-completion.d/`
116 | 
117 | * global - install into the system's global bash-completion directory,
118 |            as reported by `pkg-config`. This will be the result of:
119 |            `pkg-config --variable=completionsdir bash-completion`
120 |            Which is commonly `/usr/share/bash-completion/completions`
121 |            or `/etc/bash.d`.
122 |            If `pkg-config` is not found or if `pkg-config` does not have
123 |            the config (.pc) file for the bash-completion package,
124 |            defaults to 'local'.
125 | 
126 | `local` is the default, and should be used particularly if installing under
127 | a non-default `--prefix` without root permissions.
128 | `global` should be used if you are installing to default location (/usr/local)
129 | and have root permissions (e.g. `sudo make install`).
130 | Using custom PATH or `global` should be used when packaging datamash for
131 | further distribution.
132 | 
133 | Questions and Bug Reports
134 | =========================
135 | 
136 | - Please send questions and bug reports to bug-datamash@gnu.org .
137 | - Searchable archive at https://lists.gnu.org/archive/html/bug-datamash .
138 | - Subscribe at https://lists.gnu.org/mailman/listinfo/bug-datamash .
139 | 
140 | 
141 | Copyright and License
142 | =====================
143 | Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
144 | 
145 | License: GPL Version 3 (or later). See COPYING.
146 | 
147 | For any copyright year range specified as YYYY-ZZZZ in this package
148 | note that the range specifies every single year in that closed interval.
149 | 


--------------------------------------------------------------------------------
/tests/datamash-tests-deprecated.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 | 
  5 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | 
 28 | # Until a better way comes along to auto-use Coreutils Perl modules
 29 | # as in the coreutils' autotools system.
 30 | use Coreutils;
 31 | use CuSkip;
 32 | use CuTmpdir qw(datamash);
 33 | use MIME::Base64 ;
 34 | 
 35 | (my $program_name = $0) =~ s|.*/||;
 36 | my $prog_bin = 'datamash';
 37 | 
 38 | ## Cross-Compiling portability hack:
 39 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 40 | ##  the full path of the binary, if the binary is on the $PATH.
 41 | ##  So we try to detect what is the actual returned value of the program
 42 | ##  in case of an error.
 43 | my $prog = `$prog_bin ---print-progname`;
 44 | $prog = $prog_bin unless $prog;
 45 | 
 46 | ## Portability hack
 47 | ## Check if the system's sort supports stable sorting ('-s').
 48 | ## If it doesn't - skip some tests
 49 | my $rc = system("sort -s < /dev/null > /dev/null 2>/dev/null");
 50 | die "testing framework failure: failed to execute sort -s"
 51 |   if ( ($rc == -1) || ($rc & 127) );
 52 | my $sort_exit_code = ($rc >> 8);
 53 | my $have_stable_sort = ($sort_exit_code==0);
 54 | 
 55 | 
 56 | # TODO: add localization tests with "grouping"
 57 | # Turn off localization of executable's output.
 58 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 59 | 
 60 | my $in_g3=<<'EOF';
 61 | A 3 W
 62 | A 5 W
 63 | A 7 W
 64 | A 11 X
 65 | A 13 X
 66 | B 17 Y
 67 | B 19 Z
 68 | C 23 Z
 69 | EOF
 70 | 
 71 | my $in_g4=<<'EOF';
 72 | A 5
 73 | K 6
 74 | P 2
 75 | EOF
 76 | 
 77 | my $in_hdr1=<<'EOF';
 78 | x y z
 79 | A 1 10
 80 | A 2 10
 81 | A 3 10
 82 | A 4 10
 83 | A 4 10
 84 | B 5 10
 85 | B 6 20
 86 | B 7 30
 87 | C 8 11
 88 | C 9 22
 89 | C 1 33
 90 | C 2 44
 91 | EOF
 92 | 
 93 | my $in_hdr_only=<<'EOF';
 94 | X:Y:Z
 95 | EOF
 96 | 
 97 | my $full_deprecation = "$prog: Using -f/--full with non-linewise operations " .
 98 | "is deprecated and will be disabled in a future release.\n";
 99 | 
100 | my @Tests =
101 | (
102 |   # empty input = empty output, regardless of options
103 |   ['emp2dep', '--full count 2', {IN_PIPE=>""},{OUT=>""},
104 |     {ERR=>"$full_deprecation"}],
105 |   ['emp5dep', '--full --header-in count 2', {IN_PIPE=>""},{OUT=>""},
106 |     {ERR=>"$full_deprecation"}],
107 |   ['emp6dep', '--full --header-out count 2', {IN_PIPE=>""},{OUT=>""},
108 |     {ERR=>"$full_deprecation"}],
109 |   ['emp7dep', '--full --header-in --header-out count 2',
110 |     {IN_PIPE=>""},{OUT=>""},
111 |     {ERR=>"$full_deprecation"}],
112 |   ['emp8dep', '-g3,4 --full --header-in --header-out count 2',
113 |     {IN_PIPE=>""},{OUT=>""},
114 |     {ERR=>"$full_deprecation"}],
115 | 
116 |   # --full option - without grouping, returns the first line
117 |   ['fl1dep', '-t" " --full sum 2', {IN_PIPE=>$in_g3},
118 |     {OUT=>"A 3 W 98\n"},
119 |     {ERR=>"$full_deprecation"}],
120 |   # --full with grouping - print entire line of each group
121 |   ['fl2dep', '-t" " --full -g3 sum 2', {IN_PIPE=>$in_g3},
122 |     {OUT=>"A 3 W 15\nA 11 X 24\nB 17 Y 17\nB 19 Z 42\n"},
123 |     {ERR=>"$full_deprecation"}],
124 | 
125 |   # Input and output header, with full line
126 |   ['hdr3dep', '-t" " -g 1 --full --header-in --header-out count 2',
127 |     {IN_PIPE=>$in_hdr1},
128 |     {OUT=>"x y z count(y)\nA 1 10 5\nB 5 10 3\nC 8 11 4\n"},
129 |     {ERR=>"$full_deprecation"}],
130 | 
131 |   # Output Header with --full
132 |   ['hdr5dep', '-t" " -g 1 --full --header-out count 2', {IN_PIPE=>$in_g3},
133 |     {OUT=>"field-1 field-2 field-3 count(field-2)\n" .
134 |           "A 3 W 5\nB 17 Y 2\nC 23 Z 1\n"},
135 |     {ERR=>"$full_deprecation"}],
136 | 
137 |   # Input has only one header line (no data lines), and the user requested
138 |   # header-in and header-out => header line should be printed
139 |   ['hdr15dep', '-t: --full -H sum 1', {IN_PIPE=>$in_hdr_only},
140 |     {OUT=>"X:Y:Z:sum(X)\n"},
141 |     {ERR=>"$full_deprecation"}],
142 |   ['hdr17dep', '-t: --full -s -g1 -H sum 2', {IN_PIPE=>$in_hdr_only},
143 |     {OUT=>"X:Y:Z:sum(Y)\n"},
144 |     {ERR=>"$full_deprecation"}],
145 | 
146 |   # Test single line per group
147 |   ['sl2dep', '-t" " --full -g 1 mean 2', {IN_PIPE=>$in_g4},
148 |     {OUT=>"A 5 5\nK 6 6\nP 2 2\n"},
149 |     {ERR=>"$full_deprecation"}],
150 | );
151 | 
152 | my $save_temps = $ENV{SAVE_TEMPS};
153 | my $verbose = $ENV{VERBOSE};
154 | 
155 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
156 | exit $fail;
157 | 


--------------------------------------------------------------------------------
/doc/datamash-texinfo.css:
--------------------------------------------------------------------------------
  1 | /*
  2 | CSS for TexInfo/HTML files.
  3 | 
  4 | Copyright (C) 2015-2021 Assaf Gordon (assafgordon@gmail.com)
  5 | 
  6 | License:
  7 | GNU All Permissive License
  8 | https://www.gnu.org/prep/maintain/html_node/License-Notices-for-Other-Files.html
  9 | 
 10 |  Copying and distribution of this file, with or without modification,
 11 |  are permitted in any medium without royalty provided the copyright
 12 |  notice and this notice are preserved.  This file is offered as-is,
 13 |  without any warranty.
 14 | 
 15 | 
 16 | The used tags/classes were collected from a Texinfo-generated HTML using:
 17 | 
 18 |   cd coreutils
 19 |   makeinfo --html --no-split -o coreutils.html doc/coreutils.texi
 20 |   cat coreutils.html | sed 's/</\n</g' | sed 's;>.*;>;g' \
 21 |          | grep '^<' | grep 'class=' | sort -u \
 22 |          | perl -lane 'm/<(\w+) .*class="([-\w]+)"/ ; print $1, "\t", $2' \
 23 |          | sort -u
 24 | 
 25 | */
 26 | body {
 27 |   font-family: sans-serif;
 28 |   font-size: 16px;
 29 |   margin: 1em;
 30 | 
 31 |   overflow-x: hidden; /* Coupled with the div.header trick,
 32 | 			 this will extend the header lines
 33 | 			 access the entire page width without causing
 34 | 			 a horizontal scroll bar to appear. */
 35 | }
 36 | 
 37 | 
 38 | a {
 39 |   text-decoration: none;
 40 |   outline-style: none;
 41 |   color: blue;
 42 | }
 43 | a:visited {
 44 |   color: rgb(16,0,112);
 45 | }
 46 | a:hover {
 47 |   text-decoration: underline;
 48 | }
 49 | 
 50 | 
 51 | /*****************************************************
 52 |    Titles / Headers
 53 | ******************************************************/
 54 | 
 55 | /* @settitle:
 56 |    The title of the document at the top of the document/header */
 57 | h1.settitle {
 58 |   color: rgb(51,70,131);
 59 |   text-shadow: rgb(153,153,153) 1px 1px 0px;
 60 | }
 61 | 
 62 | /* The title at the beginning of the document, before the @menu */
 63 | h1.top {
 64 |   color: rgb(51,70,131);
 65 |   text-shadow: rgb(153,153,153) 1px 1px 0px;
 66 | }
 67 | 
 68 | /* @chapter */
 69 | h2.chapter {
 70 | }
 71 | 
 72 | h2.appendix { }
 73 | h2.unnumbered { }
 74 | 
 75 | /* @section */
 76 | h3.section {
 77 | }
 78 | /* @unnumberedsec */
 79 | h3.unnumberedsec {
 80 | }
 81 | /* @heading (seems to be only used in fdl.texi) */
 82 | h3.heading {
 83 | }
 84 | 
 85 | /* @subsection */
 86 | h4.subsection {
 87 | }
 88 | 
 89 | 
 90 | /**************************************************
 91 |   Short Contents (if @shortcontents command is used)
 92 | ***************************************************/
 93 | h2.shortcontents-heading { }
 94 | div.shortcontents { }
 95 | div.shortcontents ul { }
 96 | div.shortcontents ul li { }
 97 | 
 98 | 
 99 | /**************************************************
100 |   Contents (if @contents command is used)
101 | ***************************************************/
102 | h2.contents-heading { }
103 | div.contents { }
104 | div.contents ul { }
105 | div.contents ul li { }
106 | 
107 | 
108 | /* The @menu table */
109 | table.menu { }
110 | pre.menu-comment {}
111 | 
112 | 
113 | 
114 | /************************************
115 |   @example   and   @verbatim
116 | ************************************/
117 | div.example {
118 |   margin-left: 2em;
119 |   margin-right: 2em;
120 | }
121 | div.example pre.example {
122 |   /* Round Corners */
123 |   -webkit-border-radius: 3px;
124 |   -moz-border-radius: 3px;
125 |   border-radius: 3px;
126 |   border: 1px solid #c0c0c0;
127 | 
128 |   padding: 1ex;
129 |   background-color: #f3f3f3;
130 | }
131 | 
132 | /* Note: @verbatim is also rendered inside a 'div.example' */
133 | div.example pre.verbatim {
134 |   /* Round Corners */
135 |   -webkit-border-radius: 3px;
136 |   -moz-border-radius: 3px;
137 |   border-radius: 3px;
138 |   border: 1px solid #c0c0c0;
139 | 
140 |   padding: 1ex;
141 |   background-color: #f3f3f3;
142 | }
143 | 
144 | 
145 | 
146 | /************************************
147 |   @smallexample
148 | ************************************/
149 | div.smallexample {
150 | }
151 | div.smallexample pre.smallexample {
152 | }
153 | 
154 | /***********************************
155 |   @display
156 | ***********************************/
157 | div.display {
158 | }
159 | div.display pre.display {
160 | }
161 | 
162 | 
163 | 
164 | /**************************************
165 |   @footnote
166 | **************************************/
167 | div.footnote { }
168 | h4.footnotes-heading { }
169 | 
170 | /**************************************
171 | The header at the top of each page / section
172 | (the next/previous/top/up links)
173 | **************************************/
174 | div.header {
175 |    padding-top: 0.5ex;
176 |    padding-bottom: 0.5ex;
177 |    background-color: #ddddff;
178 | 
179 |    /* This will extend the background color of the header
180 |       bar to the entire width of the page (and beyond),
181 |       requires 'overflow-x: hidden' in the 'body'. */
182 |    padding-left: 3000px;
183 |    margin-left: -3000px;
184 |    padding-right: 3000px;
185 |    margin-right: -3000px;
186 | }
187 | 
188 | /* Disable any additional margins */
189 | div.header p {
190 |     margin: 0;
191 | }
192 | div.header p a {
193 |     color: blue;
194 | }
195 | 
196 | 
197 | /**************************************
198 |    @table is rendered as <dl> (defnition list),
199 |    @item  is rendered as <dt> (definition term),
200 |    text is rendered as <dd> (definition description)
201 | **************************************/
202 | dl {
203 |    margin: 0 1em;
204 | }
205 | dl dt {
206 |    margin: 1em 0;
207 | }
208 | dl dd {
209 |    margin-left: 2em;
210 | }
211 | 
212 | /*******************************************************
213 |   Text Styles
214 | *******************************************************/
215 | 
216 | /* @var{} */
217 | var {
218 |   color: #CC0000;
219 | }
220 | 
221 | /* @samp{} */
222 | samp {
223 |   color: #6600CC;
224 | }
225 | 
226 | /* @env{} will result in <p><code>X</code></p> */
227 | p code {
228 |   color: #532c14;
229 | }
230 | 
231 | /* @option{} */
232 | span.nocodebreak {
233 |   color: #5D4C46;
234 | }
235 | 


--------------------------------------------------------------------------------
/src/key-compare.h:
--------------------------------------------------------------------------------
  1 | /* Key Comparison functions
  2 | 
  3 |    Copyright (C) 2014 Free Software Foundation, Inc.
  4 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  5 | 
  6 |    This program is free software: you can redistribute it and/or modify
  7 |    it under the terms of the GNU General Public License as published by
  8 |    the Free Software Foundation, either version 3 of the License, or
  9 |    (at your option) any later version.
 10 | 
 11 |    This program is distributed in the hope that it will be useful,
 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 |    GNU General Public License for more details.
 15 | 
 16 |    You should have received a copy of the GNU General Public License
 17 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 18 | 
 19 | #ifndef KEY_COMPARE_H
 20 | # define KEY_COMPARE_H
 21 | 
 22 | #define UCHAR_LIM (UCHAR_MAX + 1)
 23 | 
 24 | 
 25 | /* The representation of the decimal point in the current locale.  */
 26 | extern int decimal_point;
 27 | 
 28 | /* Thousands separator; if -1, then there isn't one.  */
 29 | extern int thousands_sep;
 30 | 
 31 | /* Nonzero if the corresponding locales are hard.  */
 32 | extern bool hard_LC_COLLATE;
 33 | #if HAVE_NL_LANGINFO
 34 | extern bool hard_LC_TIME;
 35 | #endif
 36 | 
 37 | #define NONZERO(x) ((x) != 0)
 38 | 
 39 | /* The kind of blanks for '-b' to skip in various options. */
 40 | enum blanktype { bl_start, bl_end, bl_both };
 41 | 
 42 | /* Lines are held in core as counted strings. */
 43 | struct line
 44 | {
 45 |   char *text;      /* Text of the line. */
 46 |   size_t length;    /* Length including final newline. */
 47 |   char *keybeg;      /* Start of first key. */
 48 |   char *keylim;      /* Limit of first key. */
 49 | };
 50 | 
 51 | /* Sort key.  */
 52 | struct keyfield
 53 | {
 54 |   size_t sword;      /* Zero-origin 'word' to start at. */
 55 |   size_t schar;      /* Additional characters to skip. */
 56 |   size_t eword;      /* Zero-origin last 'word' of key. */
 57 |   size_t echar;      /* Additional characters in field. */
 58 |   bool const *ignore;    /* Boolean array of characters to ignore. */
 59 |   char const *translate;  /* Translation applied to characters. */
 60 |   bool skipsblanks;    /* Skip leading blanks when finding start.  */
 61 |   bool skipeblanks;    /* Skip leading blanks when finding end.  */
 62 |   bool numeric;      /* Flag for numeric comparison.  Handle
 63 |                                    strings of digits with optional decimal
 64 |                                    point, but no exponential notation. */
 65 | #ifdef KEY_COMPARE_RANDOM
 66 |   bool random;      /* Sort by random hash of key.  */
 67 | #endif
 68 |   bool general_numeric;    /* Flag for general, numeric comparison.
 69 |                                    Handle numbers in exponential notation. */
 70 | #ifdef KEY_COMPARE_HUMAN_NUMERIC
 71 |   bool human_numeric;    /* Flag for sorting by human readable
 72 |                                    units with either SI xor IEC prefixes. */
 73 | #endif
 74 | #ifdef KEY_COMPARE_MONTH
 75 |   bool month;      /* Flag for comparison by month name. */
 76 | #endif
 77 | #ifdef KEY_COMPARE_REVERSE
 78 |   bool reverse;      /* Reverse the sense of comparison. */
 79 | #endif
 80 | #ifdef KEY_COMPARE_VERSION
 81 |   bool version;      /* sort by version number */
 82 | #endif
 83 | #ifdef KEY_COMPARE_DECORATION
 84 |   bool (*decorate_fn)(const char* in);
 85 |   const char* decorate_cmd;
 86 | #endif
 87 |   bool traditional_used;  /* Traditional key option format is used. */
 88 |   struct keyfield *next;  /* Next keyfield to try. */
 89 | };
 90 | 
 91 | /* If TAB has this value, blanks separate fields.  */
 92 | enum { TAB_DEFAULT = CHAR_MAX + 1 };
 93 | 
 94 | /* Tab character separating fields.  If TAB_DEFAULT, then fields are
 95 |    separated by the empty string between a non-blank character and a blank
 96 |    character. */
 97 | extern int tab;
 98 | 
 99 | /* List of key field comparisons to be tried.  */
100 | extern struct keyfield *keylist;
101 | 
102 | /* Return a pointer to the first character of the field specified
103 |    by KEY in LINE. */
104 | 
105 | char *
106 | begfield (struct line const *line, struct keyfield const *key);
107 | 
108 | /* Return the limit of (a pointer to the first character after) the field
109 |    in LINE specified by KEY. */
110 | 
111 | char *
112 | limfield (struct line const *line, struct keyfield const *key);
113 | 
114 | /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
115 | 
116 | extern struct keyfield*
117 | insertkey (struct keyfield *key_arg);
118 | 
119 | /* Report a bad field specification SPEC, with extra info MSGID.  */
120 | void badfieldspec (char const *, char const *)
121 |      ATTRIBUTE_NORETURN;
122 | 
123 | /* Parse the leading integer in STRING and store the resulting value
124 |    (which must fit into size_t) into *VAL.  Return the address of the
125 |    suffix after the integer.  If the value is too large, silently
126 |    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
127 |    failure; otherwise, report MSGID and exit on failure.  */
128 | 
129 | char const *
130 | parse_field_count (char const *string, size_t *val, char const *msgid);
131 | 
132 | /* Set the ordering options for KEY specified in S.
133 |    Return the address of the first character in S that
134 |    is not a valid ordering option.
135 |    BLANKTYPE is the kind of blanks that 'b' should skip. */
136 | 
137 | char *
138 | set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype);
139 | 
140 | /* Initialize KEY.  */
141 | struct keyfield *
142 | key_init (struct keyfield *key);
143 | 
144 | /* print the key spec as a parameter */
145 | void
146 | debug_keylist (FILE* stream);
147 | 
148 | char*
149 | debug_keyfield (const struct keyfield *key);
150 | 
151 | 
152 | 
153 | /* Initializes 'common' key-comparison global variables:
154 |     thousand_sep
155 |     decimal_point
156 |     hard_LC_COLLATE
157 |     hard_LC_TIME
158 |     blanks, months, nonprintable tables (calls inittables).
159 | 
160 |     This function should be called once from main .
161 |  */
162 | void
163 | init_key_spec (void);
164 | 
165 | #endif /* KEY_COMPARE_H */
166 | 


--------------------------------------------------------------------------------
/src/field-ops.h:
--------------------------------------------------------------------------------
  1 | /* GNU Datamash - perform simple calculation on input data
  2 | 
  3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  4 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  5 | 
  6 |    This file is part of GNU Datamash.
  7 | 
  8 |    GNU Datamash is free software: you can redistribute it and/or modify
  9 |    it under the terms of the GNU General Public License as published by
 10 |    the Free Software Foundation, either version 3 of the License, or
 11 |    (at your option) any later version.
 12 | 
 13 |    GNU Datamash is distributed in the hope that it will be useful,
 14 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |    GNU General Public License for more details.
 17 | 
 18 |    You should have received a copy of the GNU General Public License
 19 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 20 | */
 21 | 
 22 | /* Written by Assaf Gordon */
 23 | #ifndef __FIELD_OPS_H__
 24 | #define __FIELD_OPS_H__
 25 | 
 26 | /*
 27 |  Operations Module
 28 |  */
 29 | 
 30 | enum accumulation_type
 31 | {
 32 |   NUMERIC_SCALAR = 0,
 33 |   NUMERIC_VECTOR,
 34 |   STRING_SCALAR,
 35 |   STRING_VECTOR
 36 | };
 37 | 
 38 | enum operation_result_type
 39 | {
 40 |   NUMERIC_RESULT = 0,
 41 |   STRING_RESULT
 42 | };
 43 | 
 44 | enum operation_first_value
 45 | {
 46 |   AUTO_SET_FIRST = true,
 47 |   IGNORE_FIRST = false
 48 | };
 49 | 
 50 | enum FIELD_OP_COLLECT_RESULT
 51 | {
 52 |   FLOCR_OK = 0,
 53 |   FLOCR_OK_KEEP_LINE,
 54 |   FLOCR_OK_SKIPPED,
 55 |   FLOCR_INVALID_NUMBER,
 56 |   FLOCR_INVALID_BASE64
 57 | };
 58 | 
 59 | struct operation_data
 60 | {
 61 |   enum accumulation_type acc_type;
 62 |   enum operation_first_value auto_first;
 63 |   enum operation_result_type res_type;
 64 | };
 65 | 
 66 | /* Operation on a field */
 67 | struct fieldop
 68 | {
 69 |     /* operation 'class' information */
 70 |   enum field_operation op;
 71 |   enum accumulation_type acc_type;
 72 |   enum operation_result_type res_type;
 73 |   bool numeric;
 74 |   bool auto_first; /* if true, automatically set 'value' if 'first' */
 75 |   bool master;     /* if true, this field_op uses another as a slave */
 76 |   bool slave;      /* if true, not used directly, but referenced by
 77 |                       another field_op */
 78 |   size_t slave_idx;
 79 |   struct fieldop* slave_op;
 80 | 
 81 |   /* Instance information */
 82 |   size_t field; /* field number.  1 = first field in input file. */
 83 |   bool   field_by_name; /* if true, user gave field name (instead of number),
 84 |                            which needs to be resolved AFTER the header line
 85 |                            is loaded */
 86 |   char* field_name;
 87 | 
 88 |   union {
 89 |     long double bin_bucket_size;
 90 |     size_t strbin_bucket_size;
 91 |     size_t percentile;
 92 |     long double trimmed_mean;
 93 |     enum extract_number_type get_num_type;
 94 |   } params;
 95 | 
 96 |   /* Collected Data */
 97 |   bool first;   /* true if this is the first item in a new group */
 98 | 
 99 |   /* NUMERIC_SCALAR operations */
100 |   size_t count; /* number of items collected so far in a group */
101 |   long double value; /* for single-value operations (sum, min, max, absmin,
102 |                         absmax, mean) - this is the accumulated value */
103 | 
104 |   /* NUMERIC_VECTOR operations */
105 |   long double *values;     /* array for multi-valued ops (median,mode,stdev) */
106 |   size_t      num_values;  /* number of used values */
107 |   size_t      alloc_values;/* number of allocated values */
108 | 
109 |   /* String buffer for STRING_VECTOR operations */
110 |   char *str_buf;   /* points to the beginning of the buffer */
111 |   size_t str_buf_used; /* number of bytes used in the buffer */
112 |   size_t str_buf_alloc; /* number of bytes allocated in the buffer */
113 | 
114 |   /* Output buffer containing the final results of an operation,
115 |      set by 'summarize' functions.
116 |      also used for line operations (md5/sha1/256/512/base64). */
117 |   char *out_buf;
118 |   size_t out_buf_used;
119 |   size_t out_buf_alloc;
120 | };
121 | 
122 | /* Initializes a new field-op, using an *existing* (pre-allocated) struct. */
123 | void
124 | field_op_init (struct fieldop* /*in-out*/ op,
125 |                enum field_operation oper,
126 |                bool by_name, size_t num, const char* name);
127 | 
128 | /* Frees the internal structures in the field-op.
129 |    Does *not* free 'op' itself */
130 | void
131 | field_op_free (struct fieldop* op);
132 | 
133 | /* Add a value (from input) to the current field operation.
134 |    'str' does not need to be null-terminated.
135 | 
136 |   Returns true if the operation was successful.
137 |   Returns false if the input was invalid numeric value.
138 | */
139 | enum FIELD_OP_COLLECT_RESULT
140 | field_op_collect (struct fieldop *op, const char* str, size_t slen);
141 | 
142 | /* Evaluates to true/false depending if the value returned from
143 |    field_op_collect represents a successful operation. */
144 | #define field_op_ok(X) \
145 |   (((X)==FLOCR_OK)||((X)==FLOCR_OK_KEEP_LINE)||((X)==FLOCR_OK_SKIPPED))
146 | 
147 | /* If field_op_ok returned false, this function will return a textual
148 |    error message of the error. The returned value is a static string,
149 |    do not free it. */
150 | const char*
151 | field_op_collect_result_name (const enum FIELD_OP_COLLECT_RESULT flocr);
152 | 
153 | 
154 | /* Called after all values in a group are collected in a field-op,
155 |    to perform any (optional) finalizing steps
156 |    (e.g. in OP_MEAN, calculate the mean).
157 |    Result will be stored in op->out_buf. */
158 | void
159 | field_op_summarize (struct fieldop *op);
160 | 
161 | /* resets internal variables, should be called when starting a new
162 |    group of values. */
163 | void
164 | field_op_reset (struct fieldop *op);
165 | 
166 | /* Output precision, to be used with "printf ("%.*Lg",)" */
167 | extern int field_op_output_precision;
168 | 
169 | /* Helper function to print to stdout the 'empty value' of a numeric
170 |    operation (e.g. what's printed by 'OP_MEAN' with empty input).
171 |    Used in some of the tests. */
172 | void
173 | field_op_print_empty_value (enum field_operation mode);
174 | 
175 | #endif
176 | 


--------------------------------------------------------------------------------
/tests/datamash-tests-2-deprecated.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 | 
  5 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | 
 28 | ##
 29 | ## This is a continuation of 'datamash-tests.pl'
 30 | ##   split into two files, as it was getting too large.
 31 | ##
 32 | 
 33 | # Until a better way comes along to auto-use Coreutils Perl modules
 34 | # as in the coreutils' autotools system.
 35 | use Coreutils;
 36 | use CuSkip;
 37 | use CuTmpdir qw(datamash);
 38 | use MIME::Base64 ;
 39 | 
 40 | (my $program_name = $0) =~ s|.*/||;
 41 | my $prog_bin = 'datamash';
 42 | 
 43 | ## Cross-Compiling portability hack:
 44 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 45 | ##  the full path of the binary, if the binary is on the $PATH.
 46 | ##  So we try to detect what is the actual returned value of the program
 47 | ##  in case of an error.
 48 | my $prog = `$prog_bin ---print-progname`;
 49 | $prog = $prog_bin unless $prog;
 50 | 
 51 | ## Portability hack:
 52 | ## find the exact wording of 'nan' and inf (not-a-number).
 53 | ## It's lower case in GNU/Linux,FreeBSD,OpenBSD,
 54 | ## but is "NaN" on Illumos/OpenSolaris
 55 | my $nan = `$prog_bin ---print-nan`;
 56 | die "test infrastructure failed: can't determine 'nan' string" unless $nan;
 57 | my $inf = `$prog_bin ---print-inf`;
 58 | die "test infrastructure failed: can't determine 'inf' string" unless $inf;
 59 | 
 60 | ## Portability hack
 61 | ## Check if the system's sort supports stable sorting ('-s').
 62 | ## If it doesn't - skip some tests
 63 | my $rc = system("sort -s < /dev/null > /dev/null 2>/dev/null");
 64 | die "testing framework failure: failed to execute sort -s"
 65 |   if ( ($rc == -1) || ($rc & 127) );
 66 | my $sort_exit_code = ($rc >> 8);
 67 | my $have_stable_sort = ($sort_exit_code==0);
 68 | 
 69 | 
 70 | # TODO: add localization tests with "grouping"
 71 | # Turn off localization of executable's output.
 72 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 73 | 
 74 | 
 75 | # Test the selection operation (first/last/min/max) together with "FULL":
 76 | # group by column 1 ("A" or "B"), and operate on column 2 (numeric).
 77 | # Ensure the matching "iX" is displayed, despite not being part of the
 78 | # operation (example: if 'min(2)' is the operation, then "B 8" should be
 79 | # selected, and "i7" must be displayed with "-full" (because "i7" is on the
 80 | # same line as the min(2) value zero).
 81 | my $in_full1=<<'EOF';
 82 | A 4 i1
 83 | A 3 i2
 84 | A 5 i3
 85 | B 1 i4
 86 | B 8 i5
 87 | B 0 i6
 88 | B 3 i7
 89 | EOF
 90 | 
 91 | my $full_deprecation = "$prog: Using -f/--full with non-linewise operations " .
 92 | "is deprecated and will be disabled in a future release.\n";
 93 | 
 94 | my @Tests =
 95 | (
 96 |   # Test 'min' + --full
 97 |   # Test with "--full", "i2" and "i6" should be displayed
 98 |   ['slct2dep', '-t" " -f -g1 min 2', {IN_PIPE=>$in_full1},
 99 |     {OUT=>"A 3 i2 3\nB 0 i6 0\n"},
100 |     {ERR=>"$full_deprecation"}],
101 |   # --full with --sort => should not change results
102 |   ['slct3dep', '-s -t" " -f -g1 min 2', {IN_PIPE=>$in_full1},
103 |     {OUT=>"A 3 i2 3\nB 0 i6 0\n"},
104 |     {ERR=>"$full_deprecation"}],
105 | 
106 |   # Test 'max' + --full
107 |   # Test with "--full", "i3" and "i7" should be displayed
108 |   ['slct5dep', '-t" " -f -g1 max 2', {IN_PIPE=>$in_full1},
109 |     {OUT=>"A 5 i3 5\nB 8 i5 8\n"},
110 |     {ERR=>"$full_deprecation"}],
111 |   # --full with --sort => should not change results
112 |   ['slct6dep', '-s -t" " -f -g1 max 2', {IN_PIPE=>$in_full1},
113 |     {OUT=>"A 5 i3 5\nB 8 i5 8\n"},
114 |     {ERR=>"$full_deprecation"}],
115 | 
116 |   # Test 'first' + --full
117 |   # Test with "--full", "i1" and "i4" should be displayed
118 |   ['slct8dep', '-t" " -f -g1 first 2', {IN_PIPE=>$in_full1},
119 |     {OUT=>"A 4 i1 4\nB 1 i4 1\n"},
120 |     {ERR=>"$full_deprecation"}],
121 |   # more --full with --sort => see test 'sortslct1' below
122 | 
123 |   # Test 'last' + --full
124 |   # Test with "--full", "i1" and "i4" should be displayed
125 |   ['slct10dep', '-t" " -f -g1 last 2', {IN_PIPE=>$in_full1},
126 |     {OUT=>"A 5 i3 5\nB 3 i7 3\n"},
127 |     {ERR=>"$full_deprecation"}],
128 | );
129 | 
130 | if ($have_stable_sort) {
131 |   push @Tests, (
132 |   # Test 'first' + --full + --sort
133 |   # NOTE: This is subtle:
134 |   #       Sorting should be stable: only ordering the column which is used
135 |   #       for grouping (column 1 in this test). This means that the second
136 |   #       column (containing numbers) should NOT affect sorting, and the order
137 |   #       of the lines should not change. The results of this test
138 |   #       should be the same as 'slct8'. If the system doesn't have stable
139 |   #       'sort', then the order will change.
140 |   ['sortslct1dep', '-s -t" " -f -g1 first 2', {IN_PIPE=>$in_full1},
141 |     {OUT=>"A 4 i1 4\nB 1 i4 1\n"},
142 |     {ERR=>"$full_deprecation"}],
143 |   # Test 'last' + --full + --sort
144 |   # See note above regarding 'first' - applies to 'last' as well.
145 |   ['sortslct2dep', '-s -t" " -f -g1 last 2', {IN_PIPE=>$in_full1},
146 |     {OUT=>"A 5 i3 5\nB 3 i7 3\n"},
147 |     {ERR=>"$full_deprecation"}],
148 |   )
149 | }
150 | 
151 | my $save_temps = $ENV{SAVE_TEMPS};
152 | my $verbose = $ENV{VERBOSE};
153 | 
154 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
155 | exit $fail;
156 | 


--------------------------------------------------------------------------------
/src/decorate-functions.c:
--------------------------------------------------------------------------------
  1 | /* Decorate functions
  2 | 
  3 |    Copyright (C) 2020-2021 Assaf Gordon <assafgordon@gmail.com>
  4 | 
  5 |    This program is free software: you can redistribute it and/or modify
  6 |    it under the terms of the GNU General Public License as published by
  7 |    the Free Software Foundation, either version 3 of the License, or
  8 |    (at your option) any later version.
  9 | 
 10 |    This program is distributed in the hope that it will be useful,
 11 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |    GNU General Public License for more details.
 14 | 
 15 |    You should have received a copy of the GNU General Public License
 16 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 17 | 
 18 | #include <config.h>
 19 | #include <sys/types.h>
 20 | #include <sys/socket.h>
 21 | #include <netinet/in.h>
 22 | #include <arpa/inet.h>
 23 | #include <inttypes.h>
 24 | #include <intprops.h>
 25 | #include <stdbool.h>
 26 | #include <error.h>
 27 | 
 28 | #include "system.h"
 29 | #include "die.h"
 30 | #include "decorate-functions.h"
 31 | 
 32 | 
 33 | static bool
 34 | decorate_as_is (const char* in)
 35 | {
 36 |   fprintf (stdout, "%s", in);
 37 |   return true;
 38 | }
 39 | 
 40 | static bool
 41 | decorate_strlen (const char* in)
 42 | {
 43 |   uintmax_t u = (uintmax_t)strlen (in);
 44 |   printf ("%0*"PRIuMAX, (int)INT_BUFSIZE_BOUND (u), u);
 45 |   return true;
 46 | }
 47 | 
 48 | static _GL_ATTRIBUTE_CONST int
 49 | roman_numeral_to_value (char c)
 50 | {
 51 |   switch (c)
 52 |     {
 53 |     case 'M': return 1000;
 54 |     case 'D': return 500;
 55 |     case 'C': return 100;
 56 |     case 'L': return 50;
 57 |     case 'X': return 10;
 58 |     case 'V': return 5;
 59 |     case 'I': return 1;
 60 |     default:  return 0;
 61 |     }
 62 | }
 63 | 
 64 | 
 65 | /* Naive implementation of Roman numerals conversion.
 66 |    Does not support alternative forms such as
 67 |     XIIX,IIXX for 18,
 68 |     IIC for 98.  */
 69 | static bool
 70 | decorate_roman_numerals (const char* in)
 71 | {
 72 |   intmax_t result = 0;
 73 |   intmax_t cur,last = 0;
 74 |   if (*in=='\0')
 75 |     {
 76 |       error (0, 0, _("invalid empty roman numeral"));
 77 |       return false;
 78 |     }
 79 |   while (*in)
 80 |     {
 81 |       cur = roman_numeral_to_value (*in);
 82 |       if (!cur)
 83 |         {
 84 |           error (0, 0, _("invalid roman numeral '%c' in %s"),  *in, quote (in));
 85 |           return false;
 86 |         }
 87 | 
 88 |       if (last)
 89 |         {
 90 |           if (last >= cur)
 91 |             {
 92 |               result += last;
 93 |             }
 94 |           else
 95 |             {
 96 |               result += (cur - last);
 97 |               cur = 0;
 98 |             }
 99 |         }
100 | 
101 |       last = cur;
102 |       ++in;
103 |     }
104 | 
105 |   result += last;
106 |   printf ("%0*"PRIiMAX, (int)INT_BUFSIZE_BOUND (result), result);
107 |   return true;
108 | }
109 | 
110 | static bool
111 | decorate_ipv4_inet_addr (const char* in)
112 | {
113 |   struct in_addr adr;
114 |   int s;
115 | 
116 |   s = inet_aton (in, &adr);
117 | 
118 |   if (s == 0)
119 |     {
120 |       error (0, 0, _("invalid IPv4 address %s"), quote (in));
121 |       return false;
122 |     }
123 | 
124 | 
125 |   printf ("%08X", ntohl (adr.s_addr));
126 |   return true;
127 | }
128 | 
129 | 
130 | static bool
131 | decorate_ipv4_dot_decimal (const char* in)
132 | {
133 |   struct in_addr adr;
134 |   int s;
135 | 
136 |   s = inet_pton (AF_INET, in, &adr);
137 | 
138 |   if (s < 0)
139 |     die (SORT_FAILURE, errno, _("inet_pton (AF_INET) failed"));
140 | 
141 |   if (s == 0)
142 |     {
143 |       error (0, 0, _("invalid dot-decimal IPv4 address %s"), quote (in));
144 |       return false;
145 |     }
146 | 
147 |   printf ("%08X", ntohl (adr.s_addr));
148 |   return true;
149 | }
150 | 
151 | 
152 | static bool
153 | decorate_ipv6 (const char* in)
154 | {
155 |   struct in6_addr adr;
156 |   int s;
157 | 
158 |   s = inet_pton (AF_INET6, in, &adr);
159 | 
160 |   if (s < 0)
161 |     die (SORT_FAILURE, errno, _("inet_pton (AF_INET6) failed"));
162 | 
163 |   if (s == 0)
164 |     {
165 |       error (0, 0, _("invalid IPv6 address %s"), quote (in));
166 |       return false;
167 |     }
168 | 
169 |   /* A portable way to print IPv6 binary representation. */
170 |   for (int i=0;i<16;i+=2)
171 |     {
172 |       printf ("%02X%02X", adr.s6_addr[i], adr.s6_addr[i+1]);
173 |       if (i != 14)
174 |         fputc (':', stdout);
175 |     }
176 | 
177 |   return true;
178 | }
179 | 
180 | 
181 | static bool
182 | decorate_ipv6_ipv4 (const char* in, uint32_t mapping)
183 | {
184 |   struct in_addr adr4;
185 |   struct in6_addr adr6;
186 |   int s4, s6;
187 | 
188 |   s4 = inet_pton (AF_INET, in, &adr4);
189 |   s6 = inet_pton (AF_INET6, in, &adr6);
190 | 
191 |   if (s4 < 0 && s6 < 0)
192 |     die (SORT_FAILURE, errno, _("inet_pton failed for AF_INET and AF_INET6"));
193 | 
194 |   if (!(s4 > 0 || s6 > 0))
195 |     {
196 |       error (0, 0, _("invalid IP address %s"), quote (in));
197 |       return false;
198 |     }
199 | 
200 |   if (s6)
201 |     for (int i=0;i<16;++i)
202 |       printf ("%02X", adr6.s6_addr[i]);
203 |   else
204 |     printf ("%024X%08X", mapping, ntohl (adr4.s_addr));
205 | 
206 |   return true;
207 | }
208 | 
209 | 
210 | static bool
211 | decorate_ipv6_ipv4_mapped (const char* in)
212 | {
213 |   return decorate_ipv6_ipv4 (in, 0xFFFF);
214 | }
215 | 
216 | 
217 | static bool
218 | decorate_ipv6_ipv4_compat (const char* in)
219 | {
220 |   return decorate_ipv6_ipv4 (in, 0);
221 | }
222 | 
223 | 
224 | 
225 | struct conversions_t builtin_conversions[] = {
226 |   { "as-is",      "copy as-is", decorate_as_is },     /* for debugging */
227 |   { "roman",      "roman numerals", decorate_roman_numerals },
228 |   { "strlen",     "length (in bytes) of the specified field", decorate_strlen },
229 |   { "ipv4",       "dotted-decimal IPv4 addresses", decorate_ipv4_dot_decimal },
230 |   { "ipv6",       "IPv6 addresses", decorate_ipv6 },
231 |   { "ipv4inet",   "number-and-dots IPv4 addresses (incl. octal, hex values)",
232 |     decorate_ipv4_inet_addr },
233 |   { "ipv6v4map",  "IPv6 and IPv4 (as IPv4-Mapped IPv6) addresses",
234 |     decorate_ipv6_ipv4_mapped},
235 |   { "ipv6v4comp", "IPv6 and IPv4 (as IPv4-Compatible IPv6) addresses",
236 |     decorate_ipv6_ipv4_compat},
237 |   { NULL,         NULL, 0 }
238 | };
239 | 


--------------------------------------------------------------------------------
/src/crosstab.c:
--------------------------------------------------------------------------------
  1 | /* GNU Datamash - perform simple calculation on input data
  2 | 
  3 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  4 | 
  5 |    This file is part of GNU Datamash.
  6 | 
  7 |    GNU Datamash is free software: you can redistribute it and/or modify
  8 |    it under the terms of the GNU General Public License as published by
  9 |    the Free Software Foundation, either version 3 of the License, or
 10 |    (at your option) any later version.
 11 | 
 12 |    GNU Datamash is distributed in the hope that it will be useful,
 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 |    GNU General Public License for more details.
 16 | 
 17 |    You should have received a copy of the GNU General Public License
 18 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 19 | */
 20 | 
 21 | /* Written by Assaf Gordon */
 22 | #include <config.h>
 23 | 
 24 | #include <stdlib.h>
 25 | #include <limits.h>
 26 | #include <string.h>
 27 | #include <assert.h>
 28 | 
 29 | #include "hash.h"
 30 | #include "hashcode-string2.h"
 31 | #include "xalloc.h"
 32 | 
 33 | #include "system.h"
 34 | #include "crosstab.h"
 35 | #include "utils.h"
 36 | #include "text-options.h"
 37 | 
 38 | 
 39 | static bool _GL_ATTRIBUTE_PURE
 40 | str_comparator (const void* a, const void* b)
 41 | {
 42 |   assert (a!=NULL && b!=NULL);                   /* LCOV_EXCL_LINE */
 43 |   if (a==b)
 44 |     return true;
 45 |   return (STREQ ((const char*)a, (const char*)b));
 46 | }
 47 | 
 48 | static size_t _GL_ATTRIBUTE_PURE
 49 | hash_crosstab_data_cell (void const *x, size_t tablesize)
 50 | {
 51 |   struct crosstab_datacell *dc = (struct crosstab_datacell*)x;
 52 | 
 53 |   const char *s;
 54 |   size_t h = 0;
 55 | #define SIZE_BITS (sizeof (size_t) * CHAR_BIT)
 56 | 
 57 |   for (s = dc->row_name; *s; s++)
 58 |     h = *s + ((h << 9) | (h >> (SIZE_BITS - 9)));
 59 |   for (s = dc->col_name; *s; s++)
 60 |     h = *s + ((h << 9) | (h >> (SIZE_BITS - 9)));
 61 | 
 62 |   return h % tablesize;
 63 | }
 64 | 
 65 | static bool _GL_ATTRIBUTE_PURE
 66 | crosstab_datacell_comparator (const void* a, const void* b)
 67 | {
 68 |   assert (a!=NULL && b!=NULL);                   /* LCOV_EXCL_LINE */
 69 |   if (a==b)
 70 |     return true;
 71 |   const struct crosstab_datacell *da = (struct crosstab_datacell*)a;
 72 |   const struct crosstab_datacell *db = (struct crosstab_datacell*)b;
 73 |   return (STREQ (da->row_name, db->row_name)
 74 |           && STREQ (da->col_name, db->col_name));
 75 | }
 76 | 
 77 | 
 78 | static struct crosstab_datacell*
 79 | new_datacell (const char* row, const char* col, const char* data)
 80 | {
 81 |   struct crosstab_datacell *dc = xmalloc (sizeof (struct crosstab_datacell));
 82 |   dc->row_name = row;
 83 |   dc->col_name = col;
 84 |   dc->data = xstrdup (data);
 85 |   return dc;
 86 | }
 87 | 
 88 | static void
 89 | crosstab_datacell_free (void *a)
 90 | {
 91 |   struct crosstab_datacell *dc = (struct crosstab_datacell*)a;
 92 |   dc->row_name = NULL;
 93 |   dc->col_name = NULL;
 94 |   /* syntax-check doesn't like casting the argument to free; free
 95 |      doesn't like const values passed to it. */
 96 |   void *data = (void*)dc->data;
 97 |   free (data);
 98 |   dc->data = NULL;
 99 |   free (dc);
100 | }
101 | 
102 | /* Setup needed variables for the cross-tabulation */
103 | struct crosstab*
104 | crosstab_init ()
105 | {
106 |   struct crosstab *ct = XMALLOC (struct crosstab);
107 | 
108 |   ct->rows    = hash_initialize (1000,NULL,hash_pjw,str_comparator,free);
109 |   ct->columns = hash_initialize (1000,NULL,hash_pjw,str_comparator,free);
110 |   ct->data    = hash_initialize (1000,NULL,hash_crosstab_data_cell,
111 |                                 crosstab_datacell_comparator,
112 |                                 crosstab_datacell_free);
113 |   return ct;
114 | }
115 | 
116 | void
117 | crosstab_free (struct crosstab* ct)
118 | {
119 |   assert (ct!=NULL);                             /* LCOV_EXCL_LINE */
120 |   hash_free (ct->rows);
121 |   ct->rows = NULL;
122 |   hash_free (ct->columns);
123 |   ct->columns = NULL;
124 |   hash_free (ct->data);
125 |   ct->data = NULL;
126 |   free (ct);
127 | }
128 | 
129 | /* Add new cross-tabulation result */
130 | void
131 | crosstab_add_result (struct crosstab* ct,
132 |                       const char* row, const char* col, const char* data)
133 | {
134 |   const char* r = hash_lookup (ct->rows, row);
135 |   if (r==NULL)
136 |     r = hash_insert (ct->rows, xstrdup (row));
137 | 
138 |   const char* c = hash_lookup (ct->columns, col);
139 |   if (c==NULL)
140 |     c = hash_insert (ct->columns, xstrdup (col));
141 | 
142 |   struct crosstab_datacell *ctdc = new_datacell (r,c,data);
143 |   struct crosstab_datacell *existing_ctdc = hash_insert (ct->data, ctdc);
144 |   if (ctdc != existing_ctdc) {
145 |     crosstab_datacell_free (ctdc);
146 |   }
147 | }
148 | 
149 | 
150 | /* Print table */
151 | void
152 | crosstab_print (const struct crosstab* ct)
153 | {
154 |   const size_t n_rows = hash_get_n_entries (ct->rows);
155 |   char** rows_list = XNMALLOC (n_rows,char*);
156 |   hash_get_entries (ct->rows, (void**)rows_list, n_rows);
157 |   qsort (rows_list, n_rows, sizeof (char*), cmpstringp);
158 | 
159 |   const size_t n_cols = hash_get_n_entries (ct->columns);
160 |   char** cols_list = XNMALLOC (n_cols,char*);
161 |   hash_get_entries (ct->columns, (void**)cols_list, n_cols);
162 |   qsort (cols_list, n_cols, sizeof (char*), cmpstringp);
163 | 
164 |   /* Print columns */
165 |   for (size_t c = 0; c < n_cols; ++c)
166 |     {
167 |       print_field_separator ();
168 |       fputs (cols_list[c], stdout);
169 |     }
170 |   print_line_separator ();
171 | 
172 |   /* Print rows */
173 |   for (size_t r = 0; r < n_rows; ++r)
174 |     {
175 |       fputs (rows_list[r], stdout);
176 | 
177 |       for (size_t c = 0; c < n_cols; ++c)
178 |         {
179 |           struct crosstab_datacell curr;
180 |           curr.row_name = rows_list[r];
181 |           curr.col_name = cols_list[c];
182 | 
183 |           const struct crosstab_datacell *dc = hash_lookup (ct->data, &curr);
184 |           print_field_separator ();
185 |           fputs ((dc==NULL)?missing_field_filler:dc->data, stdout);
186 |         }
187 | 
188 |       print_line_separator ();
189 |     }
190 | 
191 |   free (rows_list);
192 |   free (cols_list);
193 | }
194 | /* vim: set cinoptions=>4,n-2,{2,^-2,:2,=2,g0,h2,p5,t0,+2,(0,u0,w1,m1: */
195 | /* vim: set shiftwidth=2: */
196 | /* vim: set tabstop=8: */
197 | /* vim: set expandtab: */
198 | 


--------------------------------------------------------------------------------
/tests/datamash-crosstab.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 |   Tests for 'transpose' and 'reverse' operation modes.
  5 | 
  6 | 
  7 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  8 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  9 | 
 10 |    This file is part of GNU Datamash.
 11 | 
 12 |    GNU Datamash is free software: you can redistribute it and/or modify
 13 |    it under the terms of the GNU General Public License as published by
 14 |    the Free Software Foundation, either version 3 of the License, or
 15 |    (at your option) any later version.
 16 | 
 17 |    GNU Datamash is distributed in the hope that it will be useful,
 18 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |    GNU General Public License for more details.
 21 | 
 22 |    You should have received a copy of the GNU General Public License
 23 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 24 | 
 25 |    Written by Assaf Gordon.
 26 | =cut
 27 | use strict;
 28 | use warnings;
 29 | use List::Util qw/max/;
 30 | use Data::Dumper;
 31 | 
 32 | # Until a better way comes along to auto-use Coreutils Perl modules
 33 | # as in the coreutils' autotools system.
 34 | use Coreutils;
 35 | use CuSkip;
 36 | use CuTmpdir qw(datamash);
 37 | 
 38 | (my $program_name = $0) =~ s|.*/||;
 39 | my $prog_bin = 'datamash';
 40 | 
 41 | my $prog = `$prog_bin ---print-progname`;
 42 | 
 43 | # Turn off localization of executable's output.
 44 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 45 | 
 46 | my $in1=<<'EOF';
 47 | a	x	1
 48 | a	y	2
 49 | a	z	3
 50 | b	x	4
 51 | b	y	5
 52 | b	z	6
 53 | c	x	7
 54 | c	y	8
 55 | c	z	9
 56 | EOF
 57 | 
 58 | my $out1_first=<<'EOF';
 59 | 	x	y	z
 60 | a	1	2	3
 61 | b	4	5	6
 62 | c	7	8	9
 63 | EOF
 64 | 
 65 | my $out1_count=<<'EOF';
 66 | 	x	y	z
 67 | a	1	1	1
 68 | b	1	1	1
 69 | c	1	1	1
 70 | EOF
 71 | 
 72 | #unsorted input with duplicates
 73 | my $in2=<<'EOF';
 74 | a	x	1
 75 | a	y	2
 76 | a	x	3
 77 | EOF
 78 | 
 79 | # when using 'first' operation
 80 | my $out2_first=<<'EOF';
 81 | 	x	y
 82 | a	1	2
 83 | EOF
 84 | 
 85 | # when using 'last' operation without sorting the data
 86 | # this output is considered incorrect...
 87 | # TODO: perhaps warn the user about it (like join warns of unsorted input).
 88 | my $out2_last_unsorted=<<'EOF';
 89 | 	x	y
 90 | a	1	2
 91 | EOF
 92 | 
 93 | # when using 'last' operation with sorting the data,
 94 | # correct value is shown
 95 | my $out2_last_sorted=<<'EOF';
 96 | 	x	y
 97 | a	3	2
 98 | EOF
 99 | 
100 | my $out2_count_unsorted=<<'EOF';
101 | 	x	y
102 | a	1	1
103 | EOF
104 | 
105 | my $out2_count_sorted=<<'EOF';
106 | 	x	y
107 | a	2	1
108 | EOF
109 | 
110 | # using 'sum' without sorting the data.
111 | # this output is considered incorrect...
112 | # TODO: perhaps warn the user about it (like join warns of unsorted input).
113 | my $out2_sum_unsorted=<<'EOF';
114 | 	x	y
115 | a	1	2
116 | EOF
117 | 
118 | # using 'sum' with sorting the data
119 | my $out2_sum_sorted=<<'EOF';
120 | 	x	y
121 | a	4	2
122 | EOF
123 | 
124 | #input with missing values (b/y is missing)
125 | my $in3=<<'EOF';
126 | a	x	1
127 | a	y	2
128 | b	x	3
129 | EOF
130 | 
131 | # default filler is 'N/A'
132 | my $out3_na=<<'EOF';
133 | 	x	y
134 | a	1	2
135 | b	3	N/A
136 | EOF
137 | 
138 | # custom filler 'XX'
139 | my $out3_xx=<<'EOF';
140 | 	x	y
141 | a	1	2
142 | b	3	XX
143 | EOF
144 | 
145 | my $in4=<<'EOF';
146 | x y
147 | 1 0.5
148 | 2 1
149 | 3 1.5
150 | 4 2
151 | EOF
152 | 
153 | my $out4_hdr=<<'EOF';
154 | GroupBy(x)	GroupBy(y)	count(x)
155 | 	0.5	1	1.5	2
156 | 1	1	N/A	N/A	N/A
157 | 2	N/A	1	N/A	N/A
158 | 3	N/A	N/A	1	N/A
159 | 4	N/A	N/A	N/A	1
160 | EOF
161 | 
162 | my $out4_no_hdr=<<'EOF';
163 | 	0.5	1	1.5	2
164 | 1	1	N/A	N/A	N/A
165 | 2	N/A	1	N/A	N/A
166 | 3	N/A	N/A	1	N/A
167 | 4	N/A	N/A	N/A	1
168 | EOF
169 | 
170 | my @Tests =
171 | (
172 |   ['c1','crosstab 1,2 first 3', {IN_PIPE=>$in1}, {OUT=>$out1_first}],
173 |   ['c2','ct 1,2 first 3',       {IN_PIPE=>$in1}, {OUT=>$out1_first}],
174 |   ['c3','ct 1,2 count 1',       {IN_PIPE=>$in1}, {OUT=>$out1_count}],
175 |   ['c4','ct 1-2 count 1',       {IN_PIPE=>$in1}, {OUT=>$out1_count}],
176 | 
177 |   # Default operation is count
178 |   ['c5','ct 1,2',               {IN_PIPE=>$in1}, {OUT=>$out1_count}],
179 | 
180 |   # test unsorted input with duplicates
181 |   ['c10','ct 1,2 first 3',       {IN_PIPE=>$in2}, {OUT=>$out2_first}],
182 | 
183 |   ['c11','   ct 1,2 last 3',     {IN_PIPE=>$in2}, {OUT=>$out2_last_unsorted}],
184 |   ['c12','-s ct 1,2 last 3',     {IN_PIPE=>$in2}, {OUT=>$out2_last_sorted}],
185 | 
186 |   ['c13','   ct 1,2 sum 3',      {IN_PIPE=>$in2}, {OUT=>$out2_sum_unsorted}],
187 |   ['c14','-s ct 1,2 sum 3',      {IN_PIPE=>$in2}, {OUT=>$out2_sum_sorted}],
188 | 
189 |   # test default operation (count) on unsorted data
190 |   ['c15','   ct 1,2 count 3',    {IN_PIPE=>$in2}, {OUT=>$out2_count_unsorted}],
191 |   ['c16','-s ct 1,2 count 3',    {IN_PIPE=>$in2}, {OUT=>$out2_count_sorted}],
192 | 
193 |   # test headers
194 |   ['c17','-W --header-in --header-out ct x,y',
195 |     {IN_PIPE=>$in4}, {OUT=>$out4_hdr}],
196 |   ['c18','-W --header-in ct x,y',
197 |     {IN_PIPE=>$in4}, {OUT=>$out4_no_hdr}],
198 | 
199 |   # Test missing values
200 |   ['c30','ct 1,2 first 3',       {IN_PIPE=>$in3}, {OUT=>$out3_na}],
201 |   ['c31','--filler XX ct 1,2 first 3',       {IN_PIPE=>$in3}, {OUT=>$out3_xx}],
202 | 
203 |   # Test wrong usage
204 |   ['e1',  'ct',  {IN_PIPE=>""}, {EXIT=>1},
205 |     {ERR=>"$prog: missing field for operation 'crosstab'\n"}],
206 |   ['e2',  'ct 1',  {IN_PIPE=>""}, {EXIT=>1},
207 |     {ERR=>"$prog: crosstab requires exactly 2 fields, found 1\n"}],
208 |   ['e3',  'ct 1,2,3,4',  {IN_PIPE=>""}, {EXIT=>1},
209 |     {ERR=>"$prog: crosstab requires exactly 2 fields, found 4\n"}],
210 |   ['e4',  'ct 1,2 md5 4', {IN_PIPE=>""}, {EXIT=>1},
211 |     {ERR=>"$prog: conflicting operation found: expecting crosstab " .
212 |           "operations, but found line operation 'md5'\n"}],
213 |   ['e5',  'ct 1,2 sum 1,2', {IN_PIPE=>""}, {EXIT=>1},
214 |     {ERR=>"$prog: crosstab supports one operation, found 2\n"}],
215 |   ['e6',  'ct 1,2 min 2 max 2', {IN_PIPE=>""}, {EXIT=>1},
216 |     {ERR=>"$prog: crosstab supports one operation, found 2\n"}],
217 |   ['e7',  'ct 1:2', {IN_PIPE=>""}, {EXIT=>1},
218 |     {ERR=>"$prog: invalid field pair for operation 'crosstab'\n"}],
219 |   ['e8',  'ct 1-3', {IN_PIPE=>""}, {EXIT=>1},
220 |     {ERR=>"$prog: crosstab requires exactly 2 fields, found 3\n"}],
221 | );
222 | 
223 | my $save_temps = $ENV{SAVE_TEMPS};
224 | my $verbose = $ENV{VERBOSE};
225 | 
226 | my $fail = run_tests ($program_name, $prog_bin, \@Tests, $save_temps, $verbose);
227 | exit $fail;
228 | 


--------------------------------------------------------------------------------
/tests/datamash-check.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 |   Tests for 'check' operation mode
  5 | 
  6 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  7 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  8 | 
  9 |    This file is part of GNU Datamash.
 10 | 
 11 |    GNU Datamash is free software: you can redistribute it and/or modify
 12 |    it under the terms of the GNU General Public License as published by
 13 |    the Free Software Foundation, either version 3 of the License, or
 14 |    (at your option) any later version.
 15 | 
 16 |    GNU Datamash is distributed in the hope that it will be useful,
 17 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 |    GNU General Public License for more details.
 20 | 
 21 |    You should have received a copy of the GNU General Public License
 22 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 |    Written by Assaf Gordon.
 25 | =cut
 26 | use strict;
 27 | use warnings;
 28 | use List::Util qw/max/;
 29 | use Data::Dumper;
 30 | 
 31 | # Until a better way comes along to auto-use Coreutils Perl modules
 32 | # as in the coreutils' autotools system.
 33 | use Coreutils;
 34 | use CuSkip;
 35 | use CuTmpdir qw(datamash);
 36 | 
 37 | (my $program_name = $0) =~ s|.*/||;
 38 | my $prog_bin = 'datamash';
 39 | 
 40 | ## Cross-Compiling portability hack:
 41 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 42 | ##  the full path of the binary, if the binary is on the $PATH.
 43 | ##  So we try to detect what is the actual returned value of the program
 44 | ##  in case of an error.
 45 | my $prog = `$prog_bin --foobar 2>&1 | head -n 1 | cut -f1 -d:`;
 46 | chomp $prog if $prog;
 47 | $prog = $prog_bin unless $prog;
 48 | 
 49 | # Turn off localization of executable's output.
 50 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 51 | 
 52 | 
 53 | my $in1=<<'EOF';
 54 | A	1	!
 55 | B	2	@
 56 | C	3	#
 57 | D	4	$
 58 | E	5	%
 59 | EOF
 60 | 
 61 | my $in2=<<'EOF';
 62 | A	1
 63 | B
 64 | C	3
 65 | EOF
 66 | 
 67 | my $in3=<<'EOF';
 68 | A
 69 | EOF
 70 | 
 71 | my $in4=<<'EOF';
 72 | #comment
 73 | A
 74 | ;comment
 75 | 
 76 | B
 77 | EOF
 78 | 
 79 | 
 80 | my @Tests =
 81 | (
 82 |   # Simple transpose and reverse
 83 |   ['c1',  'check', {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 84 | 
 85 |   # Variations on command-line parsing
 86 |   ['c2',  'check 3 field',   {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 87 |   ['c3',  'check 3 fields',  {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 88 |   ['c4',  'check 3 col',     {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 89 |   ['c5',  'check 3 columns', {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 90 |   ['c6',  'check 3 column',  {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 91 | 
 92 |   ['c7',  'check field 3',   {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 93 |   ['c8',  'check fields 3',  {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 94 |   ['c9',  'check col 3',     {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 95 |   ['c10', 'check columns 3', {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 96 |   ['c11', 'check column 3',  {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 97 | 
 98 |   ['c12', 'check 5 lines',   {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
 99 |   ['c13', 'check 5 line',    {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
100 |   ['c14', 'check 5 rows',    {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
101 |   ['c15', 'check 5 row',     {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
102 | 
103 |   ['c16', 'check lines 5',   {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
104 |   ['c17', 'check line 5',    {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
105 |   ['c18', 'check row 5',     {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
106 |   ['c19', 'check rows 5',    {IN_PIPE=>$in1}, {OUT=>"5 lines, 3 fields\n"}],
107 | 
108 | 
109 |   # Duplicated options
110 |   ['e1', 'check rows 5 lines 6',  {IN_PIPE=>$in1}, {EXIT=>1},
111 |     {ERR=>"$prog: number of lines/rows already set in operation 'check'\n"}],
112 |   ['e2', 'check fields 6 fields 1',  {IN_PIPE=>$in1}, {EXIT=>1},
113 |     {ERR=>"$prog: number of fields/columns already set " .
114 |           "in operation 'check'\n"}],
115 | 
116 |   # Invalid values
117 |   ['e3', 'check 0 lines',  {IN_PIPE=>$in1}, {EXIT=>1},
118 |     {ERR=>"$prog: invalid value zero for lines/fields in operation 'check'\n"}],
119 |   ['e4', 'check 0 fields',  {IN_PIPE=>$in1}, {EXIT=>1},
120 |     {ERR=>"$prog: invalid value zero for lines/fields in operation 'check'\n"}],
121 | 
122 | 
123 | 
124 |   # Check lines
125 |   ['c40', 'check 4 lines',  {IN_PIPE=>$in1}, {EXIT=>1},
126 |     {ERR=>"$prog: check failed: input had 5 lines (expecting 4)\n"}],
127 |   ['c41', 'check 6 lines',  {IN_PIPE=>$in1}, {EXIT=>1},
128 |     {ERR=>"$prog: check failed: input had 5 lines (expecting 6)\n"}],
129 |   ['c42', 'check 6 lines',  {IN_PIPE=>""}, {EXIT=>1},
130 |     {ERR=>"$prog: check failed: input had 0 lines (expecting 6)\n"}],
131 | 
132 |   # Check fields
133 |   ['c60', 'check 2 fields',  {IN_PIPE=>$in1}, {EXIT=>1},
134 |     {ERR=>"line 1 (3 fields):\n" .
135 |           "  A\t1\t!\n" .
136 |           "$prog: check failed: line 1 has 3 fields (expecting 2)\n"}],
137 | 
138 | 
139 |   # Check matrix structure, no expected number of fields
140 |   ['c61', 'check',  {IN_PIPE=>$in2}, {EXIT=>1},
141 |     {ERR=>"line 1 (2 fields):\n" .
142 |           "  A\t1\n" .
143 |           "line 2 (1 fields):\n" .
144 |           "  B\n" .
145 |           "$prog: check failed: line 2 has 1 fields (previous line had 2)\n"}],
146 | 
147 |   # With expected number of fields
148 |   ['c62', 'check 2 fields',  {IN_PIPE=>$in2}, {EXIT=>1},
149 |     {ERR=>"line 2 (1 fields):\n" .
150 |           "  B\n" .
151 |           "$prog: check failed: line 2 has 1 fields (expecting 2)\n"}],
152 | 
153 |   # no special treatment for comments or empty lines by default
154 |   ['c63', 'check', {IN_PIPE=>$in4}, {EXIT=>1},
155 |     {ERR=>"line 3 (1 fields):\n" .
156 |           "  ;comment\n" .
157 |           "line 4 (0 fields):\n" .
158 |           "  \n" .
159 |           "$prog: check failed: line 4 has 0 fields (previous line had 1)\n"}],
160 |   # --skip-comments skips only comment lines, but not empty lines
161 |   ['c64', '--skip-comments check', {IN_PIPE=>$in4}, {EXIT=>1},
162 |     {ERR=>"line 1 (1 fields):\n" .
163 |           "  A\n" .
164 |           "line 2 (0 fields):\n" .
165 |           "  \n" .
166 |           "$prog: check failed: line 2 has 0 fields (previous line had 1)\n"}],
167 |   # --vnlog skips both comment and empty lines, but only '#' starts a comment
168 |   # (the first line is actually the vnlog header and requires the same number
169 |   #  of fields as the data lines, but this test is about ignoring empty lines)
170 |   ['c65', '--vnlog check', {IN_PIPE=>$in4}, {OUT=>"3 lines, 1 field\n"}],
171 | );
172 | 
173 | my $save_temps = $ENV{SAVE_TEMPS};
174 | my $verbose = $ENV{VERBOSE};
175 | 
176 | my $fail = run_tests ($program_name, $prog_bin, \@Tests, $save_temps, $verbose);
177 | exit $fail;
178 | 


--------------------------------------------------------------------------------
/tests/decorate-sort-tests.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for decorate
  4 | 
  5 |    Copyright (C) 2020-2021 Assaf Gordon <assafgordon@gmail.com>
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | 
 28 | # Until a better way comes along to auto-use Coreutils Perl modules
 29 | # as in the coreutils' autotools system.
 30 | use Coreutils;
 31 | use CuSkip;
 32 | use CuTmpdir qw(decorate);
 33 | use MIME::Base64 ;
 34 | 
 35 | (my $program_name = $0) =~ s|.*/||;
 36 | my $prog = 'decorate';
 37 | $prog .= " --sort-cmd=/usr/bin/sort" if $^O eq "netbsd";
 38 | 
 39 | # TODO: add localization tests with "grouping"
 40 | # Turn off localization of executable's output.
 41 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 42 | 
 43 | 
 44 | my $in1=<<'EOF';
 45 | I   1.20.30.41
 46 | II  1.20.30.1
 47 | IV  1.10.30.14
 48 | M   1.2.10.3
 49 | M   192.168.43.1
 50 | II  1.20.30.41
 51 | D   192.168.17.8
 52 | C   1.2.10.3
 53 | L   192.168.17.10
 54 | EOF
 55 | 
 56 | 
 57 | my $out1_dec_roman=<<'EOF';
 58 | I   1.20.30.41
 59 | II  1.20.30.1
 60 | II  1.20.30.41
 61 | IV  1.10.30.14
 62 | L   192.168.17.10
 63 | C   1.2.10.3
 64 | D   192.168.17.8
 65 | M   1.2.10.3
 66 | M   192.168.43.1
 67 | EOF
 68 | 
 69 | my $out1_dec_ipv4=<<'EOF';
 70 | C   1.2.10.3
 71 | M   1.2.10.3
 72 | IV  1.10.30.14
 73 | II  1.20.30.1
 74 | I   1.20.30.41
 75 | II  1.20.30.41
 76 | D   192.168.17.8
 77 | L   192.168.17.10
 78 | M   192.168.43.1
 79 | EOF
 80 | 
 81 | my $out1_dec_ipv4_stable=<<'EOF';
 82 | M   1.2.10.3
 83 | C   1.2.10.3
 84 | IV  1.10.30.14
 85 | II  1.20.30.1
 86 | I   1.20.30.41
 87 | II  1.20.30.41
 88 | D   192.168.17.8
 89 | L   192.168.17.10
 90 | M   192.168.43.1
 91 | EOF
 92 | 
 93 | 
 94 | my $out1_dec_ipv4_rev=<<'EOF';
 95 | M   192.168.43.1
 96 | L   192.168.17.10
 97 | D   192.168.17.8
 98 | I   1.20.30.41
 99 | II  1.20.30.41
100 | II  1.20.30.1
101 | IV  1.10.30.14
102 | C   1.2.10.3
103 | M   1.2.10.3
104 | EOF
105 | 
106 | my $out1_dec_ipv4_rev_header1=<<'EOF';
107 | I   1.20.30.41
108 | M   192.168.43.1
109 | L   192.168.17.10
110 | D   192.168.17.8
111 | II  1.20.30.41
112 | II  1.20.30.1
113 | IV  1.10.30.14
114 | C   1.2.10.3
115 | M   1.2.10.3
116 | EOF
117 | 
118 | my $out1_dec_ipv4_rev_header2=<<'EOF';
119 | I   1.20.30.41
120 | II  1.20.30.1
121 | M   192.168.43.1
122 | L   192.168.17.10
123 | D   192.168.17.8
124 | II  1.20.30.41
125 | IV  1.10.30.14
126 | C   1.2.10.3
127 | M   1.2.10.3
128 | EOF
129 | 
130 | 
131 | my $out1_dec_roman_ipv4rev=<<'EOF';
132 | I   1.20.30.41
133 | II  1.20.30.41
134 | II  1.20.30.1
135 | IV  1.10.30.14
136 | L   192.168.17.10
137 | C   1.2.10.3
138 | D   192.168.17.8
139 | M   192.168.43.1
140 | M   1.2.10.3
141 | EOF
142 | 
143 | my $out1_dec_ipv4_romanrev=<<'EOF';
144 | M   1.2.10.3
145 | C   1.2.10.3
146 | IV  1.10.30.14
147 | II  1.20.30.1
148 | II  1.20.30.41
149 | I   1.20.30.41
150 | D   192.168.17.8
151 | L   192.168.17.10
152 | M   192.168.43.1
153 | EOF
154 | 
155 | 
156 | 
157 | my $out1_dec_roman_k2=<<'EOF';
158 | I   1.20.30.41
159 | II  1.20.30.1
160 | II  1.20.30.41
161 | IV  1.10.30.14
162 | L   192.168.17.10
163 | C   1.2.10.3
164 | D   192.168.17.8
165 | M   1.2.10.3
166 | M   192.168.43.1
167 | EOF
168 | 
169 | my $out1_dec_k2n_roman=<<'EOF';
170 | IV  1.10.30.14
171 | I   1.20.30.41
172 | II  1.20.30.1
173 | II  1.20.30.41
174 | C   1.2.10.3
175 | M   1.2.10.3
176 | L   192.168.17.10
177 | D   192.168.17.8
178 | M   192.168.43.1
179 | EOF
180 | 
181 | my $in2=<<'EOF';
182 | 203.0.113.47
183 | 192.0.2.33
184 | 203.0.113.0
185 | 192.0.2.3
186 | 0.0.0.0
187 | ::ffff:192.0.2.42
188 | 2001:db8:6:5:4:3:2:1
189 | 2001:Db8::
190 | ::192.0.2.41
191 | ::1
192 | EOF
193 | 
194 | my $out2_ipv6v4map=<<'EOF';
195 | ::1
196 | ::192.0.2.41
197 | 0.0.0.0
198 | 192.0.2.3
199 | 192.0.2.33
200 | ::ffff:192.0.2.42
201 | 203.0.113.0
202 | 203.0.113.47
203 | 2001:Db8::
204 | 2001:db8:6:5:4:3:2:1
205 | EOF
206 | 
207 | my $out2_ipv6v4comp=<<'EOF';
208 | 0.0.0.0
209 | ::1
210 | 192.0.2.3
211 | 192.0.2.33
212 | ::192.0.2.41
213 | 203.0.113.0
214 | 203.0.113.47
215 | ::ffff:192.0.2.42
216 | 2001:Db8::
217 | 2001:db8:6:5:4:3:2:1
218 | EOF
219 | 
220 | 
221 | my @Tests =
222 | (
223 |   ['s1', '-k1,1:roman', {IN_PIPE=>$in1}, {OUT => $out1_dec_roman}],
224 |   ['s2', '-k2,2:ipv4',  {IN_PIPE=>$in1}, {OUT => $out1_dec_ipv4}],
225 |   ['s3', '-k2,2:ipv4 --stable', {IN_PIPE=>$in1},
226 |     {OUT => $out1_dec_ipv4_stable}],
227 |   ['s4', '-k2,2r:ipv4',  {IN_PIPE=>$in1}, {OUT => $out1_dec_ipv4_rev}],
228 | 
229 |   ['s5', '-k1,1:roman -k2,2r:ipv4' , {IN_PIPE=>$in1},
230 |     {OUT => $out1_dec_roman_ipv4rev}],
231 |   ['s6', '-k2,2:ipv4 -k1r,1:roman' , {IN_PIPE=>$in1},
232 |     {OUT => $out1_dec_ipv4_romanrev}],
233 | 
234 |   ['s10', '-k1,1:roman -k2,2' , {IN_PIPE=>$in1},
235 |     {OUT => $out1_dec_roman_k2}],
236 |   ['s11', '-k2n,2 -k1,1:roman' , {IN_PIPE=>$in1},
237 |     {OUT => $out1_dec_k2n_roman}],
238 | 
239 |   ['s12', '-k1,1:ipv6v4map',  {IN_PIPE=>$in2}, {OUT => $out2_ipv6v4map}],
240 |   ['s13', '-k1,1:ipv6v4comp', {IN_PIPE=>$in2}, {OUT => $out2_ipv6v4comp}],
241 | 
242 | 
243 |   # Sort with header lines
244 |   ['sh1', '-H -k2,2r:ipv4', {IN_PIPE=>$in1}, {OUT=>$out1_dec_ipv4_rev_header1}],
245 |   ['sh2', '--header=2 -k2,2r:ipv4',  {IN_PIPE=>$in1},
246 |     {OUT => $out1_dec_ipv4_rev_header2}],
247 |   # More header lines than in the input
248 |   ['sh3', '--header=9 -k2,2r:ipv4',  {IN_PIPE=>$in1}, {OUT => $in1}],
249 |   ['sh4', '--header=10 -k2,2r:ipv4',  {IN_PIPE=>$in1}, {OUT => $in1}],
250 | 
251 | );
252 | 
253 | # Repeat all tests with --debug option, ensure it does not cause any regression
254 | my @debug_tests;
255 | foreach my $t (@Tests)
256 |   {
257 |     # Skip tests with EXIT!=0 or ERR_SUBST part
258 |     # (as '--debug' requires its own ERR_SUBST).
259 |     my $exit_val;
260 |     my $have_err_subst;
261 |     foreach my $e (@$t)
262 |       {
263 |         next unless ref $e && ref $e eq 'HASH';
264 |         $exit_val = $e->{EXIT} if defined $e->{EXIT};
265 |         $have_err_subst = 1 if defined $e->{ERR_SUBST};
266 |       }
267 |     next if $exit_val || $have_err_subst;
268 | 
269 |     # Duplicate the test, add '--debug' argument
270 |     my @newt = @$t;
271 |     $newt[0] = 'dbg_' . $newt[0];
272 |     $newt[1] = '---debug ' . $newt[1];
273 | 
274 |     # Discard all debug printouts before comparing output
275 |     push @newt, {ERR_SUBST => q!s/.*\n//m!};
276 | 
277 |     push @debug_tests, \@newt;
278 |   }
279 | push @Tests, @debug_tests;
280 | 
281 | 
282 | my $save_temps = $ENV{SAVE_TEMPS};
283 | my $verbose = $ENV{VERBOSE};
284 | 
285 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
286 | exit $fail;
287 | 


--------------------------------------------------------------------------------
/tests/datamash-transpose.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 |   Tests for 'transpose' and 'reverse' operation modes.
  5 | 
  6 | 
  7 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  8 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  9 | 
 10 |    This file is part of GNU Datamash.
 11 | 
 12 |    GNU Datamash is free software: you can redistribute it and/or modify
 13 |    it under the terms of the GNU General Public License as published by
 14 |    the Free Software Foundation, either version 3 of the License, or
 15 |    (at your option) any later version.
 16 | 
 17 |    GNU Datamash is distributed in the hope that it will be useful,
 18 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |    GNU General Public License for more details.
 21 | 
 22 |    You should have received a copy of the GNU General Public License
 23 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 24 | 
 25 |    Written by Assaf Gordon.
 26 | =cut
 27 | use strict;
 28 | use warnings;
 29 | use List::Util qw/max/;
 30 | use Data::Dumper;
 31 | 
 32 | # Until a better way comes along to auto-use Coreutils Perl modules
 33 | # as in the coreutils' autotools system.
 34 | use Coreutils;
 35 | use CuSkip;
 36 | use CuTmpdir qw(datamash);
 37 | 
 38 | (my $program_name = $0) =~ s|.*/||;
 39 | my $prog_bin = 'datamash';
 40 | 
 41 | ## Cross-Compiling portability hack:
 42 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 43 | ##  the full path of the binary, if the binary is on the $PATH.
 44 | ##  So we try to detect what is the actual returned value of the program
 45 | ##  in case of an error.
 46 | my $prog = `$prog_bin --foobar 2>&1 | head -n 1 | cut -f1 -d:`;
 47 | chomp $prog if $prog;
 48 | $prog = $prog_bin unless $prog;
 49 | 
 50 | # Turn off localization of executable's output.
 51 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 52 | 
 53 | sub perl_reverse_fields
 54 | {
 55 | 	my $field_sep = shift;
 56 | 	my $input = shift;
 57 | 	return join ("\n",
 58 | 		map {
 59 | 		   join($field_sep,
 60 | 		          reverse(
 61 | 			           split /$field_sep/, $_
 62 | 			         )
 63 | 		       )
 64 | 		}
 65 | 		split /\n/, $input) . "\n";
 66 | }
 67 | 
 68 | sub perl_transpose
 69 | {
 70 | 	my $field_sep = shift;
 71 | 	my $filler = shift ;
 72 | 	my $input = shift;
 73 | 	my @lines = map { [ split /$field_sep/, $_ ] } split /\n/, $input;
 74 | 	my $max_field = max ( map{ scalar(@$_) } @lines );
 75 | 
 76 | 	my @output;
 77 | 	foreach my $i ( 0 .. ( $max_field - 1) ) {
 78 | 		my @new_line ;
 79 | 		foreach my $l (@lines) {
 80 | 			push @new_line,
 81 | 			     (scalar(@$l) > $i) ? $l->[$i] : $filler;
 82 | 		}
 83 | 
 84 | 		push @output, join($field_sep, @new_line);
 85 | 	}
 86 | 
 87 | 	return join("\n", @output) . "\n";
 88 | }
 89 | 
 90 | my $in1=<<'EOF';
 91 | A	1	!
 92 | B	2	@
 93 | C	3	#
 94 | D	4	$
 95 | E	5	%
 96 | EOF
 97 | 
 98 | my $in2 = $in1;
 99 | $in2 =~ s/\t/:/gms;
100 | 
101 | my $in3=<<'EOF';
102 | A	1
103 | B
104 | C	3
105 | EOF
106 | 
107 | my $in4=<<'EOF';
108 | A
109 | B
110 | C
111 | EOF
112 | 
113 | my $in5="A\tB\tC\tD\n";
114 | 
115 | my $in6="A\n";
116 | 
117 | my $in7="";
118 | 
119 | my $out1_rev = perl_reverse_fields ( "\t", $in1 );
120 | my $out2_rev = perl_reverse_fields ( ":",  $in2 );
121 | my $out3_rev = perl_reverse_fields ( "\t", $in3 );
122 | my $out4_rev = perl_reverse_fields ( "\t", $in4 );
123 | my $out5_rev = perl_reverse_fields ( "\t", $in5 );
124 | 
125 | my $out1_tr = perl_transpose ( "\t", "N/A", $in1 );
126 | my $out2_tr = perl_transpose ( ":", "N/A", $in2 ) ;
127 | my $out3_tr = perl_transpose ("\t", "N/A", $in3 ) ;
128 | my $out3_filler_tr = perl_transpose ("\t", "xxx", $in3 ) ;
129 | my $out4_tr = perl_transpose ("\t", "N/A", $in4 ) ;
130 | my $out5_tr = perl_transpose ("\t", "N/A", $in5 ) ;
131 | 
132 | my $in_hdr1=<<'EOF';
133 | X:Y
134 | 1:a
135 | 2:b
136 | EOF
137 | 
138 | 
139 | # Transposing with missing value in the last line
140 | # (bug in 1.1.0 would result in 'c' being silently dropped).
141 | my $in_missing1=<<'EOF';
142 | a	b	c
143 | 1	2
144 | EOF
145 | my $out_missing1=<<'EOF';
146 | a	1
147 | b	2
148 | c	N/A
149 | EOF
150 | 
151 | my @Tests =
152 | (
153 |   # Simple transpose and reverse
154 |   ['tr1',  'transpose', {IN_PIPE=>$in1}, {OUT=>$out1_tr}],
155 |   ['rev1', 'reverse',   {IN_PIPE=>$in1}, {OUT=>$out1_rev}],
156 | 
157 |   # non-tab delimiter
158 |   ['tr2',  '-t: transpose', {IN_PIPE=>$in2}, {OUT=>$out2_tr}],
159 |   ['rev2', '-t: reverse',   {IN_PIPE=>$in2}, {OUT=>$out2_rev}],
160 | 
161 |   # missing fields, strict mode
162 |   ['tr3',  'transpose', {IN_PIPE=>$in3}, {EXIT=>1},
163 |     {OUT_SUBST=>'s/.*//'},
164 |     {ERR=>"$prog: transpose input error: line 2 has 1 fields ".
165 | 	        "(previous lines had 2);\n" .
166 |           "see --help to disable strict mode\n"}],
167 |   ['rev3', 'reverse',   {IN_PIPE=>$in3}, {EXIT=>1},
168 |     {OUT_SUBST=>'s/.*//s'},
169 |     {ERR=>"$prog: reverse-field input error: line 2 has 1 fields ".
170 | 	        "(previous lines had 2);\n" .
171 |           "see --help to disable strict mode\n"}],
172 | 
173 |   # missing fields, non-strict mode
174 |   ['tr4',  '--no-strict transpose', {IN_PIPE=>$in3}, {OUT=>$out3_tr}],
175 |   ['rev4', '--no-strict reverse',   {IN_PIPE=>$in3}, {OUT=>$out3_rev}],
176 |   ['tr4.1', '--no-strict --filler xxx transpose',
177 |     {IN_PIPE=>$in3}, {OUT=>$out3_filler_tr}],
178 | 
179 | 
180 |   # Single column
181 |   ['tr5',  'transpose', {IN_PIPE=>$in4}, {OUT=>$out4_tr}],
182 |   ['rev5', 'reverse',   {IN_PIPE=>$in4}, {OUT=>$out4_rev}],
183 | 
184 |   # Single row
185 |   ['tr6',  'transpose', {IN_PIPE=>$in5}, {OUT=>$out5_tr}],
186 |   ['rev6', 'reverse',   {IN_PIPE=>$in5}, {OUT=>$out5_rev}],
187 | 
188 |   # Single field
189 |   ['tr7',  'transpose', {IN_PIPE=>$in6}, {OUT=>$in6}],
190 |   ['rev7', 'reverse',   {IN_PIPE=>$in6}, {OUT=>$in6}],
191 | 
192 |   # Empty input
193 |   ['tr8',  'transpose', {IN_PIPE=>$in7}, {OUT=>""}],
194 |   ['rev8', 'reverse',   {IN_PIPE=>$in7}, {OUT=>""}],
195 | 
196 |   # Extra operands
197 |   ['tr9',  'transpose aaa', {IN_PIPE=>''}, {EXIT=>1},
198 |     {ERR=>"$prog: extra operand 'aaa'\n"}],
199 |   ['rev9', 'reverse aaa', {IN_PIPE=>''}, {EXIT=>1},
200 |     {ERR=>"$prog: extra operand 'aaa'\n"}],
201 | 
202 |   # empty input
203 |   ['tr10',  'transpose', {IN_PIPE=>""}, {OUT=>""}],
204 |   ['rev10', 'reverse',   {IN_PIPE=>""}, {OUT=>""}],
205 | 
206 |   # Reverse with header combinations
207 |   ['rev-hdr1','-H reverse', {IN_PIPE=>""}, {OUT=>""}],
208 |   ['rev-hdr2','--header-in reverse', {IN_PIPE=>""}, {OUT=>""}],
209 |   ['rev-hdr3','-t: reverse', {IN_PIPE=>$in_hdr1},
210 |     {OUT=>"Y:X\na:1\nb:2\n"}],
211 |   ['rev-hdr4','-t: -H reverse', {IN_PIPE=>$in_hdr1},
212 |     {OUT=>"Y:X\na:1\nb:2\n"}],
213 |   # first line is header line, discard it (there's no --header-out).
214 |   ['rev-hdr5','-t: --header-in reverse', {IN_PIPE=>$in_hdr1},
215 |     {OUT=>"a:1\nb:2\n"}],
216 |   # Generate a new header, assuming the first line is a NOT header line.
217 |   ['rev-hdr6','-t: --header-out reverse', {IN_PIPE=>$in_hdr1},
218 |     {OUT=>"field-2:field-1\nY:X\na:1\nb:2\n"}],
219 | 
220 |   # bug uncovered by report in:
221 |   # http://lists.gnu.org/archive/html/bug-datamash/2016-09/msg00000.html
222 |   ['msg1', '--no-strict transpose', {IN_PIPE=>$in_missing1},
223 |     {OUT=>$out_missing1}],
224 | );
225 | 
226 | my $save_temps = $ENV{SAVE_TEMPS};
227 | my $verbose = $ENV{VERBOSE};
228 | 
229 | my $fail = run_tests ($program_name, $prog_bin, \@Tests, $save_temps, $verbose);
230 | exit $fail;
231 | 


--------------------------------------------------------------------------------
/tests/datamash-pair-tests.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | =pod
  3 |   Unit Tests for GNU Datamash - perform simple calculation on input data
  4 | 
  5 |    Copyright (C) 2013-2021 Assaf Gordon <assafgordon@gmail.com>
  6 |    Copyright (C) 2022-2025 Timothy Rice <trice@posteo.net>
  7 | 
  8 |    This file is part of GNU Datamash.
  9 | 
 10 |    GNU Datamash is free software: you can redistribute it and/or modify
 11 |    it under the terms of the GNU General Public License as published by
 12 |    the Free Software Foundation, either version 3 of the License, or
 13 |    (at your option) any later version.
 14 | 
 15 |    GNU Datamash is distributed in the hope that it will be useful,
 16 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 |    GNU General Public License for more details.
 19 | 
 20 |    You should have received a copy of the GNU General Public License
 21 |    along with GNU Datamash.  If not, see <https://www.gnu.org/licenses/>.
 22 | 
 23 |    Written by Assaf Gordon.
 24 | =cut
 25 | use strict;
 26 | use warnings;
 27 | 
 28 | # Until a better way comes along to auto-use Coreutils Perl modules
 29 | # as in the coreutils' autotools system.
 30 | use Coreutils;
 31 | use CuSkip;
 32 | use CuTmpdir qw(datamash);
 33 | use MIME::Base64 ;
 34 | 
 35 | (my $program_name = $0) =~ s|.*/||;
 36 | my $prog_bin = 'datamash';
 37 | 
 38 | ## Cross-Compiling portability hack:
 39 | ##  under qemu/binfmt, argv[0] (which is used to report errors) will contain
 40 | ##  the full path of the binary, if the binary is on the $PATH.
 41 | ##  So we try to detect what is the actual returned value of the program
 42 | ##  in case of an error.
 43 | my $prog = `$prog_bin ---print-progname`;
 44 | $prog = $prog_bin unless $prog;
 45 | 
 46 | # TODO: add localization tests with "grouping"
 47 | # Turn off localization of executable's output.
 48 | @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
 49 | 
 50 | ##
 51 | ## Portability hack:
 52 | ## find the exact wording of 'nan' and inf (not-a-number).
 53 | ## It's lower case in GNU/Linux,FreeBSD,OpenBSD,
 54 | ## but is "NaN" on Illumos/OpenSolaris
 55 | my $nan = `$prog ---print-nan`;
 56 | die "test infrastructure failed: can't determine 'nan' string" unless $nan;
 57 | my $inf = `$prog ---print-inf`;
 58 | die "test infrastructure failed: can't determine 'inf' string" unless $inf;
 59 | 
 60 | =pod
 61 | Equivalent R code
 62 | 
 63 |     pop.sd=function(x)(sqrt(var(x)*(length(x)-1)/length(x)))
 64 |     smp.sd=sd
 65 | 
 66 |     # alternatively, use the built-in covariance function:
 67 |     # smp.cov=cov
 68 |     smp.cov <- function(x,y) {
 69 |       stopifnot(identical(length(x), length(y)))
 70 |       sum((x - mean(x)) * (y - mean(y))) / (length(x) - 1)
 71 |     }
 72 |     pop.cov <- function(x,y) {
 73 |       stopifnot(identical(length(x), length(y)))
 74 |       sum((x - mean(x)) * (y - mean(y))) / (length(x) )
 75 |    }
 76 | 
 77 |    # alternative, use the built-in covariance fuction:
 78 |    #  smp.pearsoncor=cor
 79 |    smp.pearsoncor=function(x,y) { smp.cov(x,y)/ ( smp.sd(x)*smp.sd(y) ) }
 80 |    pop.pearsoncor=function(x,y) { pop.cov(x,y)/ ( pop.sd(x)*pop.sd(y) ) }
 81 | 
 82 |    in1.x=c(-0.49,0.14,1.62,2.76,-0.46,3.28,-0.01,2.90,2.46,1.52)
 83 |    in1.y=c(-0.21,-0.16,1.86,1.81,0.39,4.17,0.38,1.90,2.69,0.78)
 84 | 
 85 |    in2.x = c(1.599,-1.011,-1.687,5.070,6.944,7.934,2.134,5.150,
 86 |              10.197,11.427,10.379,14.867,11.399,13.479,18.328,16.573,
 87 |              17.804,18.694,16.690,21.805)
 88 |    in2.y = seq(20)
 89 | 
 90 | =cut
 91 | 
 92 | my $in1=<<"EOF";
 93 | -0.49	-0.21
 94 | 0.14	-0.16
 95 | 1.62	1.86
 96 | 2.76	1.81
 97 | -0.46	0.39
 98 | 3.28	4.17
 99 | -0.01	0.38
100 | 2.90	1.90
101 | 2.46	2.69
102 | 1.52	0.78
103 | EOF
104 | 
105 | 
106 | my $out1_scov=<<'EOF';
107 | 1.802
108 | EOF
109 | 
110 | my $out1_pcov=<<'EOF';
111 | 1.622
112 | EOF
113 | 
114 | my $out1_pcov_hdr=<<'EOF';
115 | pcov(field-1,field-2)
116 | 1.622
117 | EOF
118 | 
119 | my $out1_dotprod_hdr=<<'EOF';
120 | dotprod(field-1,field-2)
121 | 34.896
122 | EOF
123 | 
124 | my $in2=<<'EOF';
125 | 1.599	1
126 | -1.011	2
127 | -1.687	3
128 | 5.070	4
129 | 6.944	5
130 | 7.934	6
131 | 2.134	7
132 | 5.150	8
133 | 10.197	9
134 | 11.427	10
135 | 10.379	11
136 | 14.867	12
137 | 11.399	13
138 | 13.479	14
139 | 18.328	15
140 | 16.573	16
141 | 17.804	17
142 | 18.694	18
143 | 16.690	19
144 | 21.805	20
145 | EOF
146 | 
147 | my $out2_p=<<'EOF';
148 | 0.944
149 | EOF
150 | 
151 | my $out2_s=<<'EOF';
152 | 0.944
153 | EOF
154 | 
155 | my $in3=<<'EOF';
156 | 1	2
157 | EOF
158 | 
159 | my $in4=<<'EOF';
160 | NA	NA
161 | EOF
162 | 
163 | my $in5=<<'EOF';
164 | 1	2
165 | 2	NA
166 | 3	6
167 | EOF
168 | 
169 | my $in6=<<'EOF';
170 | x y
171 | 1 0.5
172 | 2 1
173 | 3 1.5
174 | 4 2
175 | EOF
176 | 
177 | my $out6_pcov_hdr=<<'EOF';
178 | pcov(x,y)
179 | 0.625
180 | EOF
181 | 
182 | my $out6_scov_hdr=<<'EOF';
183 | scov(x,y)
184 | 0.833
185 | EOF
186 | 
187 | my $out6_ppears_hdr=<<'EOF';
188 | ppearson(x,y)
189 | 1
190 | EOF
191 | 
192 | my $out6_spears_hdr=<<'EOF';
193 | spearson(x,y)
194 | 1
195 | EOF
196 | 
197 | my $out6_dotprod_hdr=<<'EOF';
198 | dotprod(x,y)
199 | 15
200 | EOF
201 | 
202 | my @Tests =
203 | (
204 |   ['c1', 'scov 1:2', {IN_PIPE=>$in1}, {OUT=>$out1_scov}],
205 |   ['c2', 'pcov 1:2', {IN_PIPE=>$in1}, {OUT=>$out1_pcov}],
206 |   ['dp1', 'dotprod 1:2', {IN_PIPE=>$in1}, {OUT=>"34.896\n"}],
207 | 
208 |   # Pair with output headers - only one field and header should be printed
209 |   ['c3', '--header-out pcov 1:2', {IN_PIPE=>$in1}, {OUT=>$out1_pcov_hdr}],
210 |   ['c3_hin_p', '-W --header-in --header-out pcov x:y',
211 |     {IN_PIPE=>$in6}, {OUT=>$out6_pcov_hdr}],
212 |   ['c3_hin_s', '-W --header-in --header-out scov x:y',
213 |     {IN_PIPE=>$in6}, {OUT=>$out6_scov_hdr}],
214 | 
215 |   ['p1', 'ppearson 1:2', {IN_PIPE=>$in2}, {OUT=>$out2_p}],
216 |   ['p1_hin', '-W --header-in --header-out ppearson x:y',
217 |     {IN_PIPE=>$in6}, {OUT=>$out6_ppears_hdr}],
218 |   ['p2', 'spearson 1:2', {IN_PIPE=>$in2}, {OUT=>$out2_s}],
219 |   ['p2_hin', '-W --header-in --header-out spearson x:y',
220 |     {IN_PIPE=>$in6}, {OUT=>$out6_spears_hdr}],
221 | 
222 |   ['dp2', '--header-out dotprod 1:2',
223 |     {IN_PIPE=>$in1}, {OUT=>$out1_dotprod_hdr}],
224 |   ['dp3', '-W --header-in --header-out dotprod x:y',
225 |     {IN_PIPE=>$in6}, {OUT=>$out6_dotprod_hdr}],
226 | 
227 |   # Test operations on edge-cases of input (one items, no items,
228 |   # different number of items)
229 |   ['c4', 'scov 1:2',     {IN_PIPE=>$in3}, {OUT=>"$nan\n"}],
230 |   ['p4', 'spearson 1:2', {IN_PIPE=>$in3}, {OUT=>"$nan\n"}],
231 | 
232 |   ['c5', '--narm scov 1:2',     {IN_PIPE=>$in4}, {OUT=>"$nan\n"}],
233 |   ['p5', '--narm spearson 1:2', {IN_PIPE=>$in4}, {OUT=>"$nan\n"}],
234 |   ['dp5', '--narm dotprod 1:2', {IN_PIPE=>$in4}, {OUT=>"$nan\n"}],
235 | 
236 |   ['c6', '--narm scov 1:2',     {IN_PIPE=>$in5}, {EXIT=>1},
237 |     {ERR=>"$prog: input error for operation 'scov': " .
238 |           "fields 1,2 have different number of items\n"}],
239 |   ['p6', '--narm spearson 1:2', {IN_PIPE=>$in5}, {EXIT=>1},
240 |     {ERR=>"$prog: input error for operation 'spearson': " .
241 |           "fields 1,2 have different number of items\n"}],
242 |   ['dp6', '--narm dotprod 1:2', {IN_PIPE=>$in5}, {EXIT=>1},
243 |     {ERR=>"$prog: input error for operation 'dotprod': " .
244 |           "fields 1,2 have different number of items\n"}],
245 | );
246 | 
247 | my $save_temps = $ENV{SAVE_TEMPS};
248 | my $verbose = $ENV{VERBOSE};
249 | 
250 | ##
251 | ## For each test, trim the resulting value to maximum three digits
252 | ## after the decimal point.
253 | ##
254 | for my $t (@Tests) {
255 |  push @{$t}, {OUT_SUBST=>'s/^(-?\d+\.\d{1,3})\d*/\1/'};
256 | }
257 | 
258 | 
259 | my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
260 | exit $fail;
261 | 


--------------------------------------------------------------------------------