├── dev
    ├── tests
    │   ├── source
    │   │   ├── lorem_pdf.pdf
    │   │   ├── lorem_png.png
    │   │   ├── lorem_tif.tif
    │   │   └── lorem_searchable_pdf.pdf
    │   ├── shunit2
    │   │   ├── shunit2_standalone_test.sh
    │   │   ├── lib
    │   │   │   ├── shlib
    │   │   │   └── versions
    │   │   ├── gen_test_report.sh
    │   │   ├── shunit2_failures_test.sh
    │   │   ├── CODE_OF_CONDUCT.md
    │   │   ├── test_runner
    │   │   ├── shunit2_asserts_test.sh
    │   │   ├── shunit2_test_helpers
    │   │   ├── shunit2_macros_test.sh
    │   │   ├── shunit2_misc_test.sh
    │   │   ├── LICENSE
    │   │   └── README.md
    │   ├── conf
    │   │   ├── default.conf
    │   │   └── service.conf
    │   └── run_tests.sh
    ├── bootstrap.sh
    ├── merge.sh
    ├── common_install.sh
    └── n_pmocr.sh
├── .travis.yml
├── pmocr-srv@.service
├── LICENCE.TXT
├── pmocr-srv
├── README.md
├── CHANGELOG.md
└── default.conf


/dev/tests/source/lorem_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_pdf.pdf


--------------------------------------------------------------------------------
/dev/tests/source/lorem_png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_png.png


--------------------------------------------------------------------------------
/dev/tests/source/lorem_tif.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_tif.tif


--------------------------------------------------------------------------------
/dev/tests/source/lorem_searchable_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_searchable_pdf.pdf


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language:
 2 |     bash
 3 | 
 4 | os:
 5 |     linux
 6 |     osx
 7 | 
 8 | before_install:
 9 |     - sudo apt-get update -y
10 |     - sudo apt-get install tesseract-ocr tesseract-ocr-osd tesseract-ocr-eng ghostscript inotify-tools poppler-utils -y
11 | 
12 | script:
13 |     TRAVIS_RUN=true dev/tests/run_tests.sh
14 | 
15 | 


--------------------------------------------------------------------------------
/pmocr-srv@.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=pmocr - monitors a local directory and gives any new file to your favorite OCR engine
 3 | After=local-fs.target
 4 | Requires=local-fs.target
 5 | 
 6 | [Service]
 7 | Type=simple
 8 | ExecStart=/usr/local/bin/pmocr.sh --service --config=/etc/pmocr/%i
 9 | SuccessExitStatus=0 2
10 | 
11 | [Install]
12 | WantedBy=multi-user.target
13 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/shunit2_standalone_test.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | # vim:et:ft=sh:sts=2:sw=2
 3 | #
 4 | # shUnit2 unit test for standalone operation.
 5 | #
 6 | # Copyright 2010-2017 Kate Ward. All Rights Reserved.
 7 | # Released under the Apache 2.0 license.
 8 | #
 9 | # Author: kate.ward@forestent.com (Kate Ward)
10 | # https://github.com/kward/shunit2
11 | #
12 | # This unit test is purely to test that calling shunit2 directly, while passing
13 | # the name of a unit test script, works. When run, this script determines if it
14 | # is running as a standalone program, and calls main() if it is.
15 | #
16 | ### ShellCheck http://www.shellcheck.net/
17 | # $() are not fully portable (POSIX != portable).
18 | #   shellcheck disable=SC2006
19 | # Disable source following.
20 | #   shellcheck disable=SC1090,SC1091
21 | 
22 | ARGV0="`basename "$0"`"
23 | 
24 | # Load test helpers.
25 | . ./shunit2_test_helpers
26 | 
27 | testStandalone() {
28 |   assertTrue "${SHUNIT_TRUE}"
29 | }
30 | 
31 | main() {
32 |   ${TH_SHUNIT} "${ARGV0}"
33 | }
34 | 
35 | # Are we running as a standalone?
36 | if [ "${ARGV0}" = 'shunit2_test_standalone.sh' ]; then
37 |   if [ $# -gt 0 ]; then main "$@"; else main; fi
38 | fi
39 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/lib/shlib:
--------------------------------------------------------------------------------
 1 | # vim:et:ft=sh:sts=2:sw=2
 2 | #
 3 | # Copyright 2008 Kate Ward. All Rights Reserved.
 4 | # Released under the LGPL (GNU Lesser General Public License).
 5 | #
 6 | # Author: kate.ward@forestent.com (Kate Ward)
 7 | #
 8 | # Library of shell functions.
 9 | 
10 | # Convert a relative path into it's absolute equivalent.
11 | #
12 | # This function will automatically prepend the current working directory if the
13 | # path is not already absolute. It then removes all parent references (../) to
14 | # reconstruct the proper absolute path.
15 | #
16 | # Args:
17 | #   shlib_path_: string: relative path
18 | # Outputs:
19 | #   string: absolute path
20 | shlib_relToAbsPath()
21 | {
22 |   shlib_path_=$1
23 | 
24 |   # prepend current directory to relative paths
25 |   echo "${shlib_path_}" |grep '^/' >/dev/null 2>&1 \
26 |       || shlib_path_="${PWD}/${shlib_path_}"
27 | 
28 |   # clean up the path. if all seds supported true regular expressions, then
29 |   # this is what it would be:
30 |   shlib_old_=${shlib_path_}
31 |   while true; do
32 |     shlib_new_=`echo "${shlib_old_}" |sed 's/[^/]*\/\.\.\/*//;s/\/\.\//\//'`
33 |     [ "${shlib_old_}" = "${shlib_new_}" ] && break
34 |     shlib_old_=${shlib_new_}
35 |   done
36 |   echo "${shlib_new_}"
37 | 
38 |   unset shlib_path_ shlib_old_ shlib_new_
39 | }
40 | 


--------------------------------------------------------------------------------
/LICENCE.TXT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2016, Orsiris de Jong. ozy@netpower.fr
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the author nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/dev/bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ## dev pre-processor bootstrap rev 2019052001
 4 | ## Yeah !!! A really tech sounding name... In fact it's just include emulation in bash
 5 | 
 6 | function Usage {
 7 | 	echo "$0 - Quick and dirty preprocessor for including ofunctions into programs"
 8 | 	echo "Creates and executes $0.tmp.sh"
 9 | 	echo "Usage:"
10 | 	echo ""
11 | 	echo "$0 --program=osync|obackup|pmocr [options to pass to program]"
12 | 	echo "Can also be run with BASHVERBOSE=yes environment variable in order  to prefix program with bash -x"
13 | }
14 | 
15 | 
16 | if [ ! -f "./merge.sh" ]; then
17 | 	echo "Plrase run bootstrap.sh from osync/dev directory."
18 | 	exit 1
19 | fi
20 | 
21 | bootstrapProgram=""
22 | opts=()
23 | outputFileName="$0"
24 | 
25 | for i in "${@}"; do
26 |         case "$i" in
27 |                 --program=*)
28 |                 bootstrapProgram="${i##*=}"
29 | 		;;
30 | 		*)
31 | 		opts+=("$i")
32 | 		;;
33 | 	esac
34 | done
35 | 
36 | if [ "$bootstrapProgram" == "" ]; then
37 | 	Usage
38 | 	exit 128
39 | else
40 | 	source "merge.sh"
41 | 
42 | 	__PREPROCESSOR_PROGRAM=$bootstrapProgram
43 | 	__PREPROCESSOR_PROGRAM_EXEC="n_$bootstrapProgram.sh"
44 | 	__PREPROCESSOR_Constants
45 | 
46 | 	if [ ! -f "$__PREPROCESSOR_PROGRAM_EXEC" ]; then
47 | 		echo "Cannot find file $__PREPROCESSOR_PROGRAM executable [n_$bootstrapProgram.sh]."
48 | 		exit 1
49 | 	fi
50 | fi
51 | 
52 | cp "$__PREPROCESSOR_PROGRAM_EXEC" "$outputFileName.tmp.sh"
53 | if [ $? != 0 ]; then
54 | 	echo "Cannot copy original file [$__PREPROCESSOR_PROGRAM_EXEC] to [$outputFileName.tmp.sh]."
55 | 	exit 1
56 | fi
57 | for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do
58 | 	__PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "$outputFileName.tmp.sh"
59 | done
60 | chmod +x "$outputFileName.tmp.sh"
61 | if [ $? != 0 ]; then
62 | 	echo "Cannot make [$outputFileName] executable."
63 | 	exit 1
64 | fi
65 | 
66 | # Termux fix
67 | if type termux-fix-shebang > /dev/null 2>&1; then
68 | 	termux-fix-shebang "$outputFileName.tmp.sh"
69 | fi
70 | 
71 | if [ "$BASHVERBOSE" == "yes" ]; then
72 | 	bash -x "$outputFileName.tmp.sh" "${opts[@]}"
73 | else
74 | 	"$outputFileName.tmp.sh" "${opts[@]}"
75 | fi
76 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/gen_test_report.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | # vim:et:ft=sh:sts=2:sw=2
 3 | #
 4 | # This script runs the provided unit tests and sends the output to the
 5 | # appropriate file.
 6 | #
 7 | # Copyright 2008-2017 Kate Ward. All Rights Reserved.
 8 | # Released under the Apache 2.0 license.
 9 | #
10 | # Author: kate.ward@forestent.com (Kate Ward)
11 | # https://github.com/kward/shunit2
12 | #
13 | # Source following.
14 | #   shellcheck disable=SC1090,SC1091
15 | # FLAGS variables are dynamically created.
16 | #   shellcheck disable=SC2154
17 | # Disagree with [ p ] && [ q ] vs [ p -a -q ] recommendation.
18 | #   shellcheck disable=SC2166
19 | 
20 | # Treat unset variables as an error.
21 | set -u
22 | 
23 | die() {
24 |   [ $# -gt 0 ] && echo "error: $*" >&2
25 |   exit 1
26 | }
27 | 
28 | BASE_DIR=$(dirname "$0")
29 | LIB_DIR="${BASE_DIR}/lib"
30 | 
31 | ### Load libraries.
32 | . "${LIB_DIR}/shflags" || die 'unable to load shflags library'
33 | . "${LIB_DIR}/shlib" || die 'unable to load shlib library'
34 | . "${LIB_DIR}/versions" || die 'unable to load versions library'
35 | 
36 | # Redefining BASE_DIR now that we have the shlib functions. We need BASE_DIR so
37 | # that we can properly load things, even in the event that this script is called
38 | # from a different directory.
39 | BASE_DIR=$(shlib_relToAbsPath "${BASE_DIR}")
40 | 
41 | # Define flags.
42 | os_name=$(versions_osName |sed 's/ /_/g')
43 | os_version=$(versions_osVersion)
44 | 
45 | DEFINE_boolean force false 'force overwrite' f
46 | DEFINE_string output_dir "${TMPDIR}" 'output dir' d
47 | DEFINE_string output_file "${os_name}-${os_version}.txt" 'output file' o
48 | DEFINE_string runner 'test_runner' 'unit test runner' r
49 | DEFINE_boolean dry_run false "suppress logging to a file" n
50 | 
51 | main() {
52 |   # Determine output filename.
53 |   # shellcheck disable=SC2154
54 |   output="${FLAGS_output_dir:+${FLAGS_output_dir}/}${FLAGS_output_file}"
55 |   output=$(shlib_relToAbsPath "${output}")
56 | 
57 |   # Checks.
58 |   if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" -a -f "${output}" ]; then
59 |     if [ "${FLAGS_force}" -eq "${FLAGS_TRUE}" ]; then
60 |       rm -f "${output}"
61 |     else
62 |       echo "not overwriting '${output}'" >&2
63 |       exit "${FLAGS_ERROR}"
64 |     fi
65 |   fi
66 |   if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" ]; then
67 |     touch "${output}" 2>/dev/null || die "unable to write to '${output}'"
68 |   fi
69 | 
70 |   # Run tests.
71 |   (
72 |     if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" ]; then
73 |       "./${FLAGS_runner}" |tee "${output}"
74 |     else
75 |       "./${FLAGS_runner}"
76 |     fi
77 |   )
78 | 
79 |   if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" ]; then
80 |     echo >&2
81 |     echo "Output written to '${output}'." >&2
82 |   fi
83 | }
84 | 
85 | FLAGS "$@" || exit $?
86 | [ "${FLAGS_help}" -eq "${FLAGS_FALSE}" ] || exit
87 | eval set -- "${FLAGS_ARGV}"
88 | main "${@:-}"
89 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/shunit2_failures_test.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | # vim:et:ft=sh:sts=2:sw=2
 3 | #
 4 | # shUnit2 unit test for failure functions
 5 | #
 6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved.
 7 | # Released under the LGPL (GNU Lesser General Public License)
 8 | #
 9 | # Author: kate.ward@forestent.com (Kate Ward)
10 | # https://github.com/kward/shunit2
11 | #
12 | # Disable source following.
13 | #   shellcheck disable=SC1090,SC1091
14 | 
15 | # These variables will be overridden by the test helpers.
16 | stdoutF="${TMPDIR:-/tmp}/STDOUT"
17 | stderrF="${TMPDIR:-/tmp}/STDERR"
18 | 
19 | # Load test helpers.
20 | . ./shunit2_test_helpers
21 | 
22 | testFail() {
23 |   ( fail >"${stdoutF}" 2>"${stderrF}" )
24 |   th_assertFalseWithOutput 'fail' $? "${stdoutF}" "${stderrF}"
25 | 
26 |   ( fail "${MSG}" >"${stdoutF}" 2>"${stderrF}" )
27 |   th_assertFalseWithOutput 'fail with msg' $? "${stdoutF}" "${stderrF}"
28 | 
29 |   ( fail arg1 >"${stdoutF}" 2>"${stderrF}" )
30 |   th_assertFalseWithOutput 'too many arguments' $? "${stdoutF}" "${stderrF}"
31 | }
32 | 
33 | testFailNotEquals() {
34 |   ( failNotEquals 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
35 |   th_assertFalseWithOutput 'same' $? "${stdoutF}" "${stderrF}"
36 | 
37 |   ( failNotEquals "${MSG}" 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
38 |   th_assertFalseWithOutput 'same with msg' $? "${stdoutF}" "${stderrF}"
39 | 
40 |   ( failNotEquals 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
41 |   th_assertFalseWithOutput 'not same' $? "${stdoutF}" "${stderrF}"
42 | 
43 |   ( failNotEquals '' '' >"${stdoutF}" 2>"${stderrF}" )
44 |   th_assertFalseWithOutput 'null values' $? "${stdoutF}" "${stderrF}"
45 | 
46 |   ( failNotEquals >"${stdoutF}" 2>"${stderrF}" )
47 |   th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}"
48 | 
49 |   ( failNotEquals arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" )
50 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
51 | }
52 | 
53 | testFailSame() {
54 |   ( failSame 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
55 |   th_assertFalseWithOutput 'same' $? "${stdoutF}" "${stderrF}"
56 | 
57 |   ( failSame "${MSG}" 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
58 |   th_assertFalseWithOutput 'same with msg' $? "${stdoutF}" "${stderrF}"
59 | 
60 |   ( failSame 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
61 |   th_assertFalseWithOutput 'not same' $? "${stdoutF}" "${stderrF}"
62 | 
63 |   ( failSame '' '' >"${stdoutF}" 2>"${stderrF}" )
64 |   th_assertFalseWithOutput 'null values' $? "${stdoutF}" "${stderrF}"
65 | 
66 |   ( failSame >"${stdoutF}" 2>"${stderrF}" )
67 |   th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}"
68 | 
69 |   ( failSame arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" )
70 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
71 | }
72 | 
73 | oneTimeSetUp() {
74 |   th_oneTimeSetUp
75 | 
76 |   MSG='This is a test message'
77 | }
78 | 
79 | # Load and run shUnit2.
80 | # shellcheck disable=SC2034
81 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT=$0
82 | . "${TH_SHUNIT}"
83 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at kate.ward@forestent.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/pmocr-srv:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | #
  3 | # Automatic OCR Service
  4 | #
  5 | # chkconfig: 35 55 25
  6 | # description: monitors a local directory and gives any new file to your favorite OCR engine
  7 | # processname: /usr/local/bin/pmocr.sh
  8 | # pidfile: /var/run/pmocr
  9 | 
 10 | ### BEGIN INIT INFO
 11 | # Provides:             pmocr-srv
 12 | # Required-Start:       $local_fs $time
 13 | # Required-Stop:        $local_fs $time
 14 | # Default-Start:        2 3 4 5
 15 | # Default-Stop:         0 1 6
 16 | # Short-Description:    pmocr daemon
 17 | # Description:          OCR wrapper service
 18 | ### END INIT INFO
 19 | 
 20 | prog=pmocr
 21 | progexec=pmocr.sh
 22 | progpath=/usr/local/bin
 23 | confdir=/etc/$prog
 24 | pidfile=/var/run/$prog
 25 | SCRIPT_BUILD=2018122101
 26 | 
 27 | if [ ! -f $progpath/$progexec ] && [ ! -f $progexec ]
 28 | then
 29 | 	echo "Cannot find $prog executable in $progpath nor in local path."
 30 | 	exit 1
 31 | fi
 32 | 
 33 | if [ ! -w $(dirname $pidfile) ]
 34 | then
 35 | 	pidfile=./$prog
 36 | fi
 37 | 
 38 | start() {
 39 | 	if ! ls "$confdir/"*.conf > /dev/null 2>&1; then
 40 | 		echo "Cannot find any configuration files in $confdir."
 41 | 		exit 1
 42 | 	fi
 43 | 
 44 | 	errno=0
 45 | 
 46 | 	for cfgfile in "$confdir"/*.conf
 47 | 	do
 48 | 		if [ -f $progpath/$progexec ]
 49 | 		then
 50 | 			$progpath/$progexec --config=$cfgfile --service > /dev/null 2>&1 &
 51 | 		elif [ -f ./$progexec ]
 52 | 		then
 53 | 			./$progexec --config=$cfgfile --service > /dev/null 2>&1 &
 54 | 		else
 55 | 			echo "Cannot find $prog executable in $progpath"
 56 | 			exit 1
 57 | 		fi
 58 | 
 59 | 		pid=$!
 60 | 		retval=$?
 61 | 
 62 | 		if [ $? == 0 ]
 63 | 		then
 64 | 			echo $pid > "$pidfile-$(basename $cfgfile)"
 65 | 			echo "$prog successfully started for configuration file $cfgfile"
 66 | 		else
 67 | 			echo "Cannot start $prog for configuration file $cfgfile"
 68 | 			errno = 1
 69 | 		fi
 70 | 	done
 71 | 
 72 | 	exit $errno
 73 | }
 74 | 
 75 | stop() {
 76 | 	if [ ! -f $pidfile-* ]
 77 | 	then
 78 | 		echo "No running $prog instances found."
 79 | 		exit 1
 80 | 	fi
 81 | 
 82 | 	for pfile in $pidfile-*
 83 | 	do
 84 | 		if ps -p$(cat $pfile) > /dev/null 2>&1
 85 | 		then
 86 | 			kill -TERM $(cat $pfile)
 87 | 			if [ $? == 0 ]
 88 | 			then
 89 | 				rm -f $pfile
 90 | 				echo "$prog instance $(basename $pfile) stopped."
 91 | 			else
 92 | 				echo "Cannot stop $prog instance $(basename $pfile)"
 93 | 			fi
 94 | 		else
 95 | 			rm -f $pfile
 96 | 			echo "$prog instance $pfile (pid $(cat $pfile)) is dead but pidfile exists."
 97 | 		fi
 98 | 	done
 99 | }
100 | 
101 | status() {
102 | 	if [ ! -f $pidfile-* ]
103 | 	then
104 | 		echo "Cannot find any running $prog instance."
105 | 		exit 1
106 | 	fi
107 | 
108 | 	errno=0
109 | 
110 | 	for pfile in $pidfile-*
111 | 	do
112 | 		if ps -p$(cat $pfile) > /dev/null 2>&1
113 | 		then
114 | 			echo "$prog instance $(basename $pfile) is running (pid $(cat $pfile))"
115 | 		else
116 | 			echo "$prog instance $pfile (pid $(cat $pfile)) is dead but pidfile exists."
117 | 			errno=1
118 | 		fi
119 | 	done
120 | 
121 | 	exit $errno
122 | }
123 | 
124 | case "$1" in
125 | 	start)
126 | 	start
127 | 	;;
128 | 	stop)
129 | 	stop
130 | 	;;
131 | 	restart)
132 | 	stop
133 | 	start
134 | 	;;
135 | 	status)
136 | 	status
137 | 	;;
138 | 	condrestart|try-restart)
139 | 	status || exit 0
140 | 	restart
141 | 	;;
142 | 	*)
143 | 	echo "Usage: $0 {start|stop|restart|status}"
144 | 	;;
145 | esac
146 | 
147 | exit 0
148 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/test_runner:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # vim:et:ft=sh:sts=2:sw=2
  3 | #
  4 | # Unit test suite runner.
  5 | #
  6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved.
  7 | # Released under the Apache 2.0 license.
  8 | #
  9 | # Author: kate.ward@forestent.com (Kate Ward)
 10 | # https://github.com/kward/shlib
 11 | #
 12 | # This script runs all the unit tests that can be found, and generates a nice
 13 | # report of the tests.
 14 | #
 15 | ### ShellCheck (http://www.shellcheck.net/)
 16 | # Disable source following.
 17 | #   shellcheck disable=SC1090,SC1091
 18 | # expr may be antiquated, but it is the only solution in some cases.
 19 | #   shellcheck disable=SC2003
 20 | # $() are not fully portable (POSIX != portable).
 21 | #   shellcheck disable=SC2006
 22 | 
 23 | # Return if test_runner already loaded.
 24 | [ -z "${RUNNER_LOADED:-}" ] || return 0
 25 | RUNNER_LOADED=0
 26 | 
 27 | RUNNER_ARGV0=`basename "$0"`
 28 | RUNNER_SHELLS='/bin/sh ash /bin/bash /bin/dash /bin/ksh /bin/pdksh /bin/zsh'
 29 | RUNNER_TEST_SUFFIX='_test.sh'
 30 | 
 31 | runner_warn() { echo "runner:WARN $*" >&2; }
 32 | runner_error() { echo "runner:ERROR $*" >&2; }
 33 | runner_fatal() { echo "runner:FATAL $*" >&2; exit 1; }
 34 | 
 35 | runner_usage() {
 36 |   echo "usage: ${RUNNER_ARGV0} [-e key=val ...] [-s shell(s)] [-t test(s)]"
 37 | }
 38 | 
 39 | _runner_tests() { echo ./*${RUNNER_TEST_SUFFIX} |sed 's#./##g'; }
 40 | _runner_testName() {
 41 |   # shellcheck disable=SC1117
 42 |   _runner_testName_=`expr "${1:-}" : "\(.*\)${RUNNER_TEST_SUFFIX}"`
 43 |   if [ -n "${_runner_testName_}" ]; then
 44 |     echo "${_runner_testName_}"
 45 |   else
 46 |     echo 'unknown'
 47 |   fi
 48 |   unset _runner_testName_
 49 | }
 50 | 
 51 | main() {
 52 |   # Find and load versions library.
 53 |   for _runner_dir_ in . ${LIB_DIR:-lib}; do
 54 |     if [ -r "${_runner_dir_}/versions" ]; then
 55 |       _runner_lib_dir_="${_runner_dir_}"
 56 |       break
 57 |     fi
 58 |   done
 59 |   [ -n "${_runner_lib_dir_}" ] || runner_fatal 'Unable to find versions library.'
 60 |   . "${_runner_lib_dir_}/versions" || runner_fatal 'Unable to load versions library.'
 61 |   unset _runner_dir_ _runner_lib_dir_
 62 | 
 63 |   # Process command line flags.
 64 |   env=''
 65 |   while getopts 'e:hs:t:' opt; do
 66 |     case ${opt} in
 67 |       e)  # set an environment variable
 68 |         key=`expr "${OPTARG}" : '\([^=]*\)='`
 69 |         val=`expr "${OPTARG}" : '[^=]*=\(.*\)'`
 70 |         # shellcheck disable=SC2166
 71 |         if [ -z "${key}" -o -z "${val}" ]; then
 72 |           runner_usage
 73 |           exit 1
 74 |         fi
 75 |         eval "${key}='${val}'"
 76 |         eval "export ${key}"
 77 |         env="${env:+${env} }${key}"
 78 |         ;;
 79 |       h) runner_usage; exit 0 ;;  # help output
 80 |       s) shells=${OPTARG} ;;  # list of shells to run
 81 |       t) tests=${OPTARG} ;;  # list of tests to run
 82 |       *) runner_usage; exit 1 ;;
 83 |     esac
 84 |   done
 85 |   shift "`expr ${OPTIND} - 1`"
 86 | 
 87 |   # Fill shells and/or tests.
 88 |   shells=${shells:-${RUNNER_SHELLS}}
 89 |   [ -z "${tests}" ] && tests=`_runner_tests`
 90 | 
 91 |   # Error checking.
 92 |   if [ -z "${tests}" ]; then
 93 |     runner_error 'no tests found to run; exiting'
 94 |     exit 1
 95 |   fi
 96 | 
 97 |   cat <<EOF
 98 | #------------------------------------------------------------------------------
 99 | # System data.
100 | #
101 | 
102 | $ uname -mprsv
103 | `uname -mprsv`
104 | 
105 | OS Name: `versions_osName`
106 | OS Version: `versions_osVersion`
107 | 
108 | ### Test run info.
109 | shells: ${shells}
110 | tests: ${tests}
111 | EOF
112 | for key in ${env}; do
113 |   eval "echo \"${key}=\$${key}\""
114 | done
115 | 
116 | # Run tests.
117 | for shell in ${shells}; do
118 |   echo
119 | 
120 |   cat <<EOF
121 | 
122 | #------------------------------------------------------------------------------
123 | # Running the test suite with ${shell}.
124 | #
125 | EOF
126 | 
127 |     # Check for existence of shell.
128 |     shell_bin=${shell}
129 |     shell_name=''
130 |     shell_present=${FALSE}
131 |     case ${shell} in
132 |       ash)
133 |         shell_bin=`which busybox |grep -v '^no busybox'`
134 |         [ $? -eq "${TRUE}" -a -n "${shell_bin}" ] && shell_present="${TRUE}"
135 |         shell_bin="${shell_bin} ash"
136 |         shell_name=${shell}
137 |         ;;
138 |       *)
139 |         [ -x "${shell_bin}" ] && shell_present="${TRUE}"
140 |         shell_name=`basename "${shell}"`
141 |         ;;
142 |     esac
143 |     if [ "${shell_present}" -eq "${FALSE}" ]; then
144 |       runner_warn "unable to run tests with the ${shell_name} shell"
145 |       continue
146 |     fi
147 | 
148 |     shell_version=`versions_shellVersion "${shell}"`
149 | 
150 |     echo "shell name: ${shell_name}"
151 |     echo "shell version: ${shell_version}"
152 | 
153 |     # Execute the tests.
154 |     for t in ${tests}; do
155 |       echo
156 |       echo "--- Executing the '`_runner_testName "${t}"`' test suite. ---"
157 |       # ${shell_bin} needs word splitting.
158 |       #   shellcheck disable=SC2086
159 |       ( exec ${shell_bin} "./${t}" 2>&1; )
160 |     done
161 |   done
162 | }
163 | 
164 | # Execute main() if this is run in standalone mode (i.e. not from a unit test).
165 | [ -z "${SHUNIT_VERSION}" ] && main "$@"
166 | 


--------------------------------------------------------------------------------
/dev/merge.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ## MERGE 2022022301
  4 | 
  5 | ## Merges ofunctions.sh and n_program.sh into program.sh
  6 | ## Adds installer
  7 | 
  8 | PROGRAM=merge
  9 | INSTANCE_ID=dev
 10 | 
 11 | function Usage {
 12 | 	echo "Merges ofunctions.sh and n_program.sh into debug_program.sh and ../program.sh"
 13 | 	echo "Usage"
 14 | 	echo "$0 osync|obackup|pmocr"
 15 | }
 16 | 
 17 | function __PREPROCESSOR_Merge {
 18 | 	local nPROGRAM="$1"
 19 | 
 20 | 	if [ -f "$nPROGRAM" ]; then
 21 | 		Logger "$nPROGRAM is not found in local path." "CRITICAL"
 22 | 		exit 1
 23 | 	fi
 24 | 
 25 | 	VERSION=$(grep "PROGRAM_VERSION=" n_$nPROGRAM.sh)
 26 | 	VERSION=${VERSION#*=}
 27 | 	__PREPROCESSOR_Constants
 28 | 
 29 | 	__PREPROCESSOR_Unexpand "n_$nPROGRAM.sh" "debug_$nPROGRAM.sh"
 30 | 
 31 | 	for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do
 32 | 		__PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "debug_$nPROGRAM.sh"
 33 | 	done
 34 | 
 35 | 	__PREPROCESSOR_CleanDebug "debug_$nPROGRAM.sh" "../$nPROGRAM.sh"
 36 | }
 37 | 
 38 | function __PREPROCESSOR_Constants {
 39 | 	PARANOIA_DEBUG_LINE="#__WITH_PARANOIA_DEBUG"
 40 | 	PARANOIA_DEBUG_BEGIN="#__BEGIN_WITH_PARANOIA_DEBUG"
 41 | 	PARANOIA_DEBUG_END="#__END_WITH_PARANOIA_DEBUG"
 42 | 
 43 | 	__PREPROCESSOR_SUBSETS=(
 44 | 	'#### OFUNCTIONS FULL SUBSET ####'
 45 | 	'#### OFUNCTIONS MINI SUBSET ####'
 46 | 	'#### OFUNCTIONS MICRO SUBSET ####'
 47 | 	'#### PoorMansRandomGenerator SUBSET ####'
 48 | 	'#### _OFUNCTIONS_BOOTSTRAP SUBSET ####'
 49 | 	'#### RUN_DIR SUBSET ####'
 50 | 	'#### DEBUG SUBSET ####'
 51 | 	'#### TrapError SUBSET ####'
 52 | 	'#### RemoteLogger SUBSET ####'
 53 | 	'#### Logger SUBSET ####'
 54 | 	'#### GetLocalOS SUBSET ####'
 55 | 	'#### IsInteger SUBSET ####'
 56 | 	'#### UrlEncode SUBSET ####'
 57 | 	'#### HumanToNumeric SUBSET ####'
 58 | 	'#### ArrayContains SUBSET ####'
 59 | 	'#### VerComp SUBSET ####'
 60 | 	'#### GetConfFileValue SUBSET ####'
 61 | 	'#### SetConfFileValue SUBSET ####'
 62 | 	'#### CheckRFC822 SUBSET ####'
 63 | 	'#### CleanUp SUBSET ####'
 64 | 	'#### GenericTrapQuit SUBSET ####'
 65 | 	'#### FileMove SUBSET ####'
 66 |         '#### InotifyWaitPoller SUBSET ####'
 67 | 	)
 68 | }
 69 | 
 70 | function __PREPROCESSOR_Unexpand {
 71 | 	local source="${1}"
 72 | 	local destination="${2}"
 73 | 
 74 | 	unexpand "$source" > "$destination"
 75 | 	if [ $? != 0 ]; then
 76 | 		Logger "Cannot unexpand [$source] to [$destination]." "CRITICAL"
 77 | 		exit 1
 78 | 	fi
 79 | }
 80 | 
 81 | function __PREPROCESSOR_MergeSubset {
 82 | 	local subsetBegin="${1}"
 83 | 	local subsetEnd="${2}"
 84 | 	local subsetFile="${3}"
 85 | 	local mergedFile="${4}"
 86 | 
 87 | 	sed -n "/$subsetBegin/,/$subsetEnd/p" "$subsetFile" > "$subsetFile.$subsetBegin"
 88 | 	if [ $? != 0 ]; then
 89 | 		Logger "Cannot sed subset [$subsetBegin -- $subsetEnd] in [$subsetFile]." "CRTICIAL"
 90 | 		exit 1
 91 | 	fi
 92 | 	sed "/include $subsetBegin/r $subsetFile.$subsetBegin" "$mergedFile" | grep -v -E "$subsetBegin\$|$subsetEnd\$" > "$mergedFile.tmp"
 93 | 	if [ $? != 0 ]; then
 94 | 		Logger "Cannot add subset [$subsetBegin] to [$mergedFile]." "CRITICAL"
 95 | 		exit 1
 96 | 	fi
 97 | 	rm -f "$subsetFile.$subsetBegin"
 98 | 	if [ $? != 0 ]; then
 99 | 		Logger "Cannot remove temporary subset [$subsetFile.$subsetBegin]." "CRITICAL"
100 | 		exit 1
101 | 	fi
102 | 
103 | 	rm -f "$mergedFile"
104 | 	if [ $? != 0 ]; then
105 | 		Logger "Cannot remove merged original file [$mergedFile]." "CRITICAL"
106 | 		exit 1
107 | 	fi
108 | 
109 | 	mv "$mergedFile.tmp" "$mergedFile"
110 | 	if [ $? != 0 ]; then
111 | 		Logger "Cannot move merged tmp file to original [$mergedFile]." "CRITICAL"
112 | 		exit 1
113 | 	fi
114 | }
115 | 
116 | function __PREPROCESSOR_CleanDebug {
117 | 	local source="${1}"
118 | 	local destination="${2:-$source}"
119 | 
120 | 	sed '/'$PARANOIA_DEBUG_BEGIN'/,/'$PARANOIA_DEBUG_END'/d' "$source" | grep -v "$PARANOIA_DEBUG_LINE" > "$destination.tmp"
121 | 	if [ $? != 0 ]; then
122 | 		Logger "Cannot remove PARANOIA_DEBUG code from standard build." "CRITICAL"
123 | 		exit 1
124 | 	else
125 | 		mv -f "$destination.tmp" "$destination"
126 | 		if [ $? -ne 0 ]; then
127 | 			Logger "Cannot move [$destination.tmp] to [$destination]." "CRITICAL"
128 | 			exit 1
129 | 		fi
130 | 	fi
131 | 
132 | 	chmod +x "$source"
133 | 	if [ $? != 0 ]; then
134 | 		Logger "Cannot chmod [$source]." "CRITICAL"
135 | 		exit 1
136 | 	else
137 | 		Logger "Prepared [$source]." "NOTICE"
138 | 	fi
139 | 
140 | 	if [ "$source" != "$destination" ]; then
141 | 
142 | 		chmod +x "$destination"
143 | 		if [ $? != 0 ]; then
144 | 			Logger "Cannot chmod [$destination]." "CRITICAL"
145 | 			exit 1
146 | 		else
147 | 			Logger "Prepared [$destination]." "NOTICE"
148 | 		fi
149 | 	fi
150 | }
151 | 
152 | function __PREPROCESSOR_CopyCommons {
153 | 	local nPROGRAM="$1"
154 | 
155 | 	sed "s/\[prgname\]/$nPROGRAM/g" common_install.sh > ../install.sh
156 | 	if [ $? != 0 ]; then
157 | 		Logger "Cannot assemble install." "CRITICAL"
158 | 		exit 1
159 | 	fi
160 | 
161 | 	for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do
162 | 		__PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "../install.sh"
163 | 	done
164 | 
165 | 	__PREPROCESSOR_CleanDebug "../install.sh"
166 | 
167 | 	if [ -f "common_batch.sh" ]; then
168 | 		sed "s/\[prgname\]/$nPROGRAM/g" common_batch.sh > ../$nPROGRAM-batch.sh
169 | 		if [ $? != 0 ]; then
170 | 			Logger "Cannot assemble batch runner." "CRITICAL"
171 | 			exit 1
172 | 		fi
173 | 
174 | 		for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do
175 | 			__PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "../$nPROGRAM-batch.sh"
176 | 		done
177 | 
178 | 		__PREPROCESSOR_CleanDebug "../$nPROGRAM-batch.sh"
179 | 	fi
180 | }
181 | 
182 | # If sourced don't do anything
183 | if [ "$(basename $0)" == "merge.sh" ]; then
184 | 	source "./ofunctions.sh"
185 | 	if [ $? != 0 ]; then
186 | 		echo "Please run $0 in dev directory with ofunctions.sh"
187 | 		exit 1
188 | 	fi
189 | 	trap GenericTrapQuit TERM EXIT HUP QUIT
190 | 
191 | 	if [ "$1" == "osync" ]; then
192 | 		__PREPROCESSOR_Merge osync
193 | 		__PREPROCESSOR_CopyCommons osync
194 | 	elif [ "$1" == "obackup" ]; then
195 | 		__PREPROCESSOR_Merge obackup
196 | 		__PREPROCESSOR_CopyCommons obackup
197 | 	elif [ "$1" == "pmocr" ]; then
198 | 		__PREPROCESSOR_Merge pmocr
199 | 		__PREPROCESSOR_CopyCommons pmocr
200 | 	else
201 | 		echo "No valid program given."
202 | 		Usage
203 | 		exit 1
204 | 	fi
205 | fi
206 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## pmOCR (poor man's OCR tool)
  2 | 
  3 | [![Build Status](https://travis-ci.org/deajan/pmOCR.svg?branch=master)](https://travis-ci.org/deajan/pmOCR) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) [![GitHub Release](https://img.shields.io/github/release/deajan/pmOCR.svg?label=Latest)](https://github.com/deajan/pmOCR/releases/latest)
  4 | 
  5 | ## This project has been archived !
  6 | It has been fun improving my bash skills while I begun coding this in like 2015.  
  7 | I initially planned to produce a better, python based version of this, but then I found **OCRmyPDF** project, which already does a great job ;)  
  8 | See https://github.com/ocrmypdf/OCRmyPDF for more info.
  9 | 
 10 | If you're interested in Document Management Systems, also checkout [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx), which is a fully open source, using OCRmyPDF.  
 11 | 
 12 | Farewell, my old bash project.
 13 | 
 14 | ## pmOCR
 15 | 
 16 | A multicore batch & service wrapper script for Tesseract v3/v4/v5 (https://github.com/tesseract-ocr/) or ABBYY CLI OCR 11 FOR LINUX based on Finereader Engine 11 optical character recognition (www.ocr4linux.com).
 17 | 
 18 | Conversions support tiff/jpg/png/pdf/bmp to PDF, TXT and CSV (also DOCX and XSLX for Abbyy OCR). It can actually support any other format that your OCR engine can handle.
 19 | 
 20 | This wrapper can work both in batch and service mode.
 21 | 
 22 | In batch mode, it's used as commandline tool for processing multiple files at once, being able to output one or more formats.
 23 | 
 24 | In service mode, it will monitor directories and launch OCR conversions as soon as new files get into the directories.
 25 | Since v1.8.0, it can also monitor NFS / SMB mountpoints with new integrated inotifywait emulation poller.
 26 | 
 27 | pmOCR has the following options:
 28 | - Include current date into the output filename
 29 | - Ignore already OCRed PDF files based on font detection and / or file suffix
 30 | - Delete or move input file after successful conversion
 31 | 
 32 | ## Install it
 33 | 
 34 |     $ git clone https://github.com/deajan/pmOCR
 35 |     $ cd pmOCR
 36 |     $ ./install.sh
 37 | 
 38 | You will need pdffonts util (from poppler-utils package).
 39 | Optionally, you can install inotifywait (from inotify-tools package).
 40 | 
 41 | If you are using tesseract OCR, please install tesseract-osd and tesseract-[your language] (sometimes called tesseract-ocr-osd).
 42 | You will also need ImageMagick in order to be able to transform bitmap PDF documents to indexed PDFs.
 43 | 
 44 | ## Batch mode
 45 | 
 46 | Use pmocr to batch process all files in a given directory and its subdirectories.
 47 | 
 48 | Use --help for command line usage.
 49 | 
 50 | Example:
 51 | 
 52 |     $ pmocr.sh --batch --target=pdf --skip-txt-pdf --delete-input /some/path
 53 |     $ pmocr.sh --batch --target=pdf --target=csv --suffix=processed /some/path
 54 |     
 55 | If pmOCR wasn't installed, you may run it directly with a configuration file like:
 56 | 
 57 |     $ ./pmocr.sh --config=./default.conf --batch -p /some/path
 58 | 
 59 | ## OCR Configuration
 60 | 
 61 | pmOCR uses a default config stored in /etc/pmocr/default.conf
 62 | You may change it's contents or clone it and have pmOCR use an alternative configuration with:
 63 | 
 64 |     $ pmocr.sh --config=/etc/pmocr/myConfig.conf --batch --target=csv /some/path
 65 | 
 66 | ## Service mode
 67 | 
 68 | Service mode monitors directories and their subdirectories and launched an OCR conversion whenever a new file is written.
 69 | Keep in mind that only file creations are monitored. File moves aren't.
 70 | 
 71 | pmocr is written to monitor up to 5 directories, each producing a different target format (PDF, DOCX, XLSX, TXT & CSV). Comment out a folder to disable it's monitoring.
 72 | 
 73 | There's also an option to avoid passing PDFs to the OCR engine that already contain text.
 74 | 
 75 | After installation, please configure /etc/pmocr/default.conf in order to monitor the directories you need, and adjust your specific options.
 76 | 
 77 | Launch service (initV style)
 78 | service pmocr-srv start
 79 | 
 80 | Launch service (systemd style)
 81 | systemctl start pmocr-srv@default.service
 82 | 
 83 | Check service state (initV style)
 84 | service pmocr-srv status
 85 | 
 86 | Check service state (systemd style)
 87 | systemctl status pmocr-srv@default.service
 88 | 
 89 | ## Multiple service instances
 90 | 
 91 | In order to monitor multiple directories with different OCR settings, you need to duplicate /etc/pmocr/default.conf configuration file.
 92 | When launching pmOCR service with initV, each config file will create an instance.
 93 | With systemD, you have to launch a service for each config file. Example for configs /etc/pmocr/default.conf and /etc/pmocr/other.conf
 94 | 
 95 |     $ systemctl start pmocr-srv@default.conf
 96 |     $ systemctl start pmocr-srv@other.conf
 97 | 
 98 | ## Support for OCR engines
 99 | 
100 | Has been tested so far with:
101 | - ABBYY FineReader OCR Engine 11 CLI for Linux releases R2 (v 11.1.6.562411), R3 (v 11.1.9.622165) and R6 (v 11.1.14.707470)
102 | - Tesseract-ocr 3.0.4
103 | - Tesseract-ocr 4.0.0 and 4.0.12
104 | - Tesseract-ocr 5.0.0 and 5.0.1
105 | 
106 | Tesseract mode also uses ghostscript to convert PDF files to an intermediary TIFF format in order to process them.
107 | 
108 | It should virtually work with any engine as long as you adjust the parameters.
109 | 
110 | Parameters include any arguments to pass to the OCR program depending on the target format.
111 | 
112 | ## Support for OCR Preprocessors
113 | 
114 | ABBYY has in integrated preprocessor in order to enhance recognition qualitiy whereas Tesseract relies on external tools.
115 | pmOCR can use a preprocessor like ImageMagick to deskew / clear noise / render white background and remove black borders. 
116 | ImageMagick preprocessor is configured, and enabled by default to be used with Tesseract.
117 | 
118 | ## Tesseract caveats
119 | 
120 | When no OSD / language data is installed, tesseract will still process documents, but the quality may suffer.
121 | While pmocr will warn you about this, the conversion still happens.
122 | Please make sure to install all necessary addons for tesseract.
123 | 
124 | ## Troubleshooting
125 | 
126 | Please check /var/log/pmocr.log or ./pmocr.log file for errors.
127 | 
128 | Filenames containing special characters should work, nevertheless, if your file doesn't get converted, try to rename it and copy it again to the monitored directory or batch process it again.
129 | 
130 | By default, failing to prevent files will add a prefix '_OCR_ERR' + date to the filename.
131 | In order to reprocess those files, the prefix has to be removed with the following command
132 | 
133 |     $ find /monitor/path -iname "*_OCR_ERR.*" -print0 | xargs -0 -I {} sh -c 'export file="{}"; mv "$file" "${file//_OCR_ERR/}"'
134 | 
135 | If using tesseract to create searchable PDF files, please make sure to have version 3.03 or better installed.
136 | 


--------------------------------------------------------------------------------
/dev/tests/conf/default.conf:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ###### pmOCR - batch & service wrapper for OCR tools
  4 | ###### (C) 2014-2018 by Orsiris de Jong (www.netpower.fr)
  5 | ###### pmOCR v1.5.4 - 1.6.0 config file 2018122101
  6 | CONFIG_FILE_REVISION=1
  7 | 
  8 | ## ---------- GENERAL OPTIONS
  9 | 
 10 | ## Instance identification
 11 | INSTANCE_ID=MyOCRServer
 12 | 
 13 | ## List of allowed extensions for input files
 14 | FILES_TO_PROCES="\(pdf\|tif\|tiff\|png\|jpg\|jpeg\|bmp\|pcx\|dcx\)"
 15 | 
 16 | ## Number of OCR subprocesses to start simultaneously. Should not exceed the number of CPU cores for best performance.
 17 | NUMBER_OF_PROCESSES=4
 18 | 
 19 | ## The output file user and group ownership may be copied from input file (works only if executed as root).
 20 | PRESERVE_OWNERSHIP=no
 21 | ## Output file permissions. Defaults to 644 (works only if executed as root).
 22 | FILE_PERMISSIONS=
 23 | 
 24 | ## OCR Engine, adjust *_OCR_ENGINE_ARGS to fit your needs, especially for language settings
 25 | 
 26 | # Acceptable values are abbyyocr11, tesseract3
 27 | OCR_ENGINE=tesseract3
 28 | 
 29 | ## ---------- OCR Engine arguments
 30 | 
 31 | 	# AbbyyOCR11 Engine Arguments
 32 | 	#############################
 33 | 
 34 | ## lpp = load predefinied profil / TextExtraction_Acuraccy = name of the predefinied profile / -adb = Detect barcodes / -ido = Detect and rotate image orientation / -adtop = Detect text embedded in images
 35 | ## -rl = List of languages for the document (French,English,Spanish) / recc = Enhanced character confidence
 36 | ##### PDF related arguments : -pfs = PDF Export preset (balanced) / -pacm = PDF/A standards (pdfa-3a) / ptem = Specifies the mode of export of recognized text into PDF (PDF/A) format.
 37 | ##### DOCX related arguments :-dheb  = Highlights uncertainly recognized characters with the background color when exporting to DOCX format (color definied by deb parameter).
 38 | ##### -deb 0xFFFF00 (yellow highlights)
 39 | ##### XLSX related arguments :  -xlto = only export text from table / -xlrf = remove formating from text / -xllrm = This option allows setting the mode of retaining the original document tables' layout in the output XLSX file (Default, ExactDocument, ExactLines) 
 40 | 
 41 | ## Full path to OCR engine
 42 | 
 43 | ABBYY_OCR_ENGINE_EXEC=/usr/local/bin/abbyyocr11
 44 | 
 45 | # Quality may be set to Balanced, MaxSpeed, MaxQuality, MinSize
 46 | ABBYY_PDF_QUALITY=Balanced
 47 | ABBYY_PDF_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -pfs $ABBYY_PDF_QUALITY -pacm Pdfa_3a -ptem ImageOnText -f pdf'
 48 | ABBYY_WORD_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -f docx'
 49 | ABBYY_EXCEL_OCR_ENGINE_ARGS=' -lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -rpihp -xlrf -xllrm ExactLines -f xlsx'
 50 | ABBYY_TEXT_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults'
 51 | ABBYY_CSV_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults'
 52 | ABBYY_OCR_ENGINE_INPUT_ARG='-if'
 53 | ABBYY_OCR_ENGINE_OUTPUT_ARG='-of'
 54 | 
 55 | 
 56 | 	# tesseract 3.x Engine Arguments
 57 | 	################################
 58 | 
 59 | ## Full path to OCR engine
 60 | 
 61 | TESSERACT_OCR_ENGINE_EXEC=/usr/bin/tesseract
 62 | TESSERACT_PDF_OCR_ENGINE_ARGS='pdf'
 63 | TESSERACT_TEXT_OCR_ENGINE_ARGS=''
 64 | TESSERACT_CSV_OCR_ENGINE_ARGS=''
 65 | TESSERACT_OCR_ENGINE_INPUT_ARG='-l eng' # Language setting
 66 | TESSERACT_OCR_ENGINE_OUTPUT_ARG=
 67 | ## tesseract 3 intermediary transformation of PDF to TIFF
 68 | TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/gs
 69 | TESSERACT_PDF_TO_TIFF_OPTS=' -q -dNOPAUSE -r300x300 -sDEVICE=tiff32nc -sCompression=lzw -dBATCH -sOUTPUTFILE='
 70 | 
 71 | 	# Preprocessor Arguments (only for tesseract)
 72 | 	#############################################
 73 | 
 74 | ## Optional preprocessor to correct scanned images (don't use this for abbyy11 which already contains it's own preprocessor)
 75 | ## Uncomment OCR_PREPROCESSOR_EXEC lines to use it
 76 | ## See http://www.imagemagick.org/discourse-server/viewtopic.php?t=22226 for examples
 77 | 
 78 | #OCR_PREPROCESSOR_EXEC=/usr/bin/convert
 79 | OCR_PREPROCESSOR_ARGS='-respect-parenthesis \( -compress LZW -density 300 -bordercolor black -border 1 -trim +repage -fill white -draw "color 0,0 floodfill" -alpha off -shave 1x1 \) \( -bordercolor black -border 2 -fill white -draw "color 0,0 floodfill" -alpha off -shave 0x1 -deskew 40 +repage \)'
 80 | OCR_PREPROCESSOR_INPUT_ARG=''
 81 | OCR_PREPROCESSOR_OUTPUT_ARG=''
 82 | 
 83 | #######################################################################
 84 | ### THE FOLLOWING PARAMETERS ARE USED WHEN pmOCR IS RUN AS SERVICE ####
 85 | ###     YOU MAY SET THEM IN COMMAND LINE WHEN USING BATCH MODE     ####
 86 | #######################################################################
 87 | 
 88 | ## List of alert mails separated by spaces
 89 | DESTINATION_MAILS="infrastructure@example.com"
 90 | 
 91 | ## Optional change of mail body encoding (using iconv)
 92 | ## By default, all mails are sent in UTF-8 format without header (because of maximum compatibility of all platforms)
 93 | ## You may specify an optional encoding here (like "ISO-8859-1" or whatever iconv can handle)
 94 | MAIL_BODY_CHARSET=""
 95 | 
 96 | ## Directories to monitor (Leave variables empty in order to disable specific monitoring).
 97 | ## As of today, Tesseract only handles PDF, TXT and CSV
 98 | PDF_MONITOR_DIR="/storage/service_ocr/PDF"
 99 | WORD_MONITOR_DIR="/storage/service_ocr/WORD"
100 | EXCEL_MONITOR_DIR="/storage/service_ocr/EXCEL"
101 | TEXT_MONITOR_DIR="/storage/service_ocr/TEXT"
102 | CSV_MONITOR_DIR="/storage/service_ocr/CSV"
103 | 
104 | PDF_EXTENSION=".pdf"
105 | WORD_EXTENSION=".docx"
106 | EXCEL_EXTENSION=".xlsx"
107 | TEXT_EXTENSION=".txt"
108 | CSV_EXTENSION=".csv"
109 | 
110 | ## Move original file after successful processing into a path that will be ignored by the monitor.
111 | ## Enabling this setting automatically disables DELETE_ORIGINAL and FILENAME_SUFFIX values.
112 | #MOVE_ORIGINAL_ON_SUCCESS="/storage/service_ocr/done"
113 | 
114 | ## Move failed to process file into a path that will be ignored by the monitor.
115 | ## Enabling this setting automatically disables FAILED_FILENAME_SUFFIX value.
116 | #MOVE_ORIGINAL_ON_FAILURE="/storage/service_ocr/failed"
117 | 
118 | ## Adds an optional following suffix to OCRed files (ex: input.tiff becomes input_OCR.pdf). Any file containing this suffix will be ignored. Can be left empty.
119 | FILENAME_SUFFIX="_OCR"
120 | 
121 | ## Add the following suffix to failed files in order to prevent them from being processed in a loop. Can be left empty.
122 | FAILED_FILENAME_SUFFIX="_OCR_ERR"
123 | 
124 | ## Delete original file upon successful processing.
125 | DELETE_ORIGINAL=no
126 | 
127 | # Alternative check if PDFs are already OCRed (checks if a pdf contains a font). This will prevent images integrated in already indexed PDFs to get OCRed.
128 | CHECK_PDF=no
129 | 
130 | ## Add some extra info to the filename. Example here adds a pseudo ISO 8601 timestamp after a dot (pseudo because the colon sign would render the filename quite weird).
131 | ## Keep variables between singlequotes if you want them to expand at runtime. Leave this variable empty if you don't want to add anything (is also added to moved files).
132 | FILENAME_ADDITION='.$(date --utc +"%Y-%m-%dT%H-%M-%SZ")'
133 | 
134 | ## Max time before triggering a forced OCR run when no file actions are detected
135 | MAX_TIME=3600
136 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/shunit2_asserts_test.sh:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # vim:et:ft=sh:sts=2:sw=2
  3 | #
  4 | # shunit2 unit test for assert functions.
  5 | #
  6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved.
  7 | # Released under the Apache 2.0 license.
  8 | #
  9 | # Author: kate.ward@forestent.com (Kate Ward)
 10 | # https://github.com/kward/shunit2
 11 | #
 12 | # Disable source following.
 13 | #   shellcheck disable=SC1090,SC1091
 14 | 
 15 | # These variables will be overridden by the test helpers.
 16 | stdoutF="${TMPDIR:-/tmp}/STDOUT"
 17 | stderrF="${TMPDIR:-/tmp}/STDERR"
 18 | 
 19 | # Load test helpers.
 20 | . ./shunit2_test_helpers
 21 | 
 22 | commonEqualsSame() {
 23 |   fn=$1
 24 | 
 25 |   ( ${fn} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
 26 |   th_assertTrueWithNoOutput 'equal' $? "${stdoutF}" "${stderrF}"
 27 | 
 28 |   ( ${fn} "${MSG}" 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
 29 |   th_assertTrueWithNoOutput 'equal; with msg' $? "${stdoutF}" "${stderrF}"
 30 | 
 31 |   ( ${fn} 'abc def' 'abc def' >"${stdoutF}" 2>"${stderrF}" )
 32 |   th_assertTrueWithNoOutput 'equal with spaces' $? "${stdoutF}" "${stderrF}"
 33 | 
 34 |   ( ${fn} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
 35 |   th_assertFalseWithOutput 'not equal' $? "${stdoutF}" "${stderrF}"
 36 | 
 37 |   ( ${fn} '' '' >"${stdoutF}" 2>"${stderrF}" )
 38 |   th_assertTrueWithNoOutput 'null values' $? "${stdoutF}" "${stderrF}"
 39 | 
 40 |   ( ${fn} arg1 >"${stdoutF}" 2>"${stderrF}" )
 41 |   th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}"
 42 | 
 43 |   ( ${fn} arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" )
 44 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
 45 | }
 46 | 
 47 | commonNotEqualsSame() {
 48 |   fn=$1
 49 | 
 50 |   ( ${fn} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
 51 |   th_assertTrueWithNoOutput 'not same' $? "${stdoutF}" "${stderrF}"
 52 | 
 53 |   ( ${fn} "${MSG}" 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
 54 |   th_assertTrueWithNoOutput 'not same, with msg' $? "${stdoutF}" "${stderrF}"
 55 | 
 56 |   ( ${fn} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
 57 |   th_assertFalseWithOutput 'same' $? "${stdoutF}" "${stderrF}"
 58 | 
 59 |   ( ${fn} '' '' >"${stdoutF}" 2>"${stderrF}" )
 60 |   th_assertFalseWithOutput 'null values' $? "${stdoutF}" "${stderrF}"
 61 | 
 62 |   ( ${fn} arg1 >"${stdoutF}" 2>"${stderrF}" )
 63 |   th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}"
 64 | 
 65 |   ( ${fn} arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" )
 66 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
 67 | }
 68 | 
 69 | testAssertEquals() {
 70 |   commonEqualsSame 'assertEquals'
 71 | }
 72 | 
 73 | testAssertNotEquals() {
 74 |   commonNotEqualsSame 'assertNotEquals'
 75 | }
 76 | 
 77 | testAssertSame() {
 78 |   commonEqualsSame 'assertSame'
 79 | }
 80 | 
 81 | testAssertNotSame() {
 82 |   commonNotEqualsSame 'assertNotSame'
 83 | }
 84 | 
 85 | testAssertNull() {
 86 |   ( assertNull '' >"${stdoutF}" 2>"${stderrF}" )
 87 |   th_assertTrueWithNoOutput 'null' $? "${stdoutF}" "${stderrF}"
 88 | 
 89 |   ( assertNull "${MSG}" '' >"${stdoutF}" 2>"${stderrF}" )
 90 |   th_assertTrueWithNoOutput 'null, with msg' $? "${stdoutF}" "${stderrF}"
 91 | 
 92 |   ( assertNull 'x' >"${stdoutF}" 2>"${stderrF}" )
 93 |   th_assertFalseWithOutput 'not null' $? "${stdoutF}" "${stderrF}"
 94 | 
 95 |   ( assertNull >"${stdoutF}" 2>"${stderrF}" )
 96 |   th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}"
 97 | 
 98 |   ( assertNull arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" )
 99 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
100 | }
101 | 
102 | testAssertNotNull()
103 | {
104 |   ( assertNotNull 'x' >"${stdoutF}" 2>"${stderrF}" )
105 |   th_assertTrueWithNoOutput 'not null' $? "${stdoutF}" "${stderrF}"
106 | 
107 |   ( assertNotNull "${MSG}" 'x' >"${stdoutF}" 2>"${stderrF}" )
108 |   th_assertTrueWithNoOutput 'not null, with msg' $? "${stdoutF}" "${stderrF}"
109 | 
110 |   ( assertNotNull 'x"b' >"${stdoutF}" 2>"${stderrF}" )
111 |   th_assertTrueWithNoOutput 'not null, with double-quote' $? \
112 |       "${stdoutF}" "${stderrF}"
113 | 
114 |   ( assertNotNull "x'b" >"${stdoutF}" 2>"${stderrF}" )
115 |   th_assertTrueWithNoOutput 'not null, with single-quote' $? \
116 |       "${stdoutF}" "${stderrF}"
117 | 
118 |   # shellcheck disable=SC2016
119 |   ( assertNotNull 'x$b' >"${stdoutF}" 2>"${stderrF}" )
120 |   th_assertTrueWithNoOutput 'not null, with dollar' $? \
121 |       "${stdoutF}" "${stderrF}"
122 | 
123 |   ( assertNotNull 'x`b' >"${stdoutF}" 2>"${stderrF}" )
124 |   th_assertTrueWithNoOutput 'not null, with backtick' $? \
125 |       "${stdoutF}" "${stderrF}"
126 | 
127 |   ( assertNotNull '' >"${stdoutF}" 2>"${stderrF}" )
128 |   th_assertFalseWithOutput 'null' $? "${stdoutF}" "${stderrF}"
129 | 
130 |   # There is no test for too few arguments as $1 might actually be null.
131 | 
132 |   ( assertNotNull arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" )
133 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
134 | }
135 | 
136 | testAssertTrue() {
137 |   ( assertTrue 0 >"${stdoutF}" 2>"${stderrF}" )
138 |   th_assertTrueWithNoOutput 'true' $? "${stdoutF}" "${stderrF}"
139 | 
140 |   ( assertTrue "${MSG}" 0 >"${stdoutF}" 2>"${stderrF}" )
141 |   th_assertTrueWithNoOutput 'true, with msg' $? "${stdoutF}" "${stderrF}"
142 | 
143 |   ( assertTrue '[ 0 -eq 0 ]' >"${stdoutF}" 2>"${stderrF}" )
144 |   th_assertTrueWithNoOutput 'true condition' $? "${stdoutF}" "${stderrF}"
145 | 
146 |   ( assertTrue 1 >"${stdoutF}" 2>"${stderrF}" )
147 |   th_assertFalseWithOutput 'false' $? "${stdoutF}" "${stderrF}"
148 | 
149 |   ( assertTrue '[ 0 -eq 1 ]' >"${stdoutF}" 2>"${stderrF}" )
150 |   th_assertFalseWithOutput 'false condition' $? "${stdoutF}" "${stderrF}"
151 | 
152 |   ( assertTrue '' >"${stdoutF}" 2>"${stderrF}" )
153 |   th_assertFalseWithOutput 'null' $? "${stdoutF}" "${stderrF}"
154 | 
155 |   ( assertTrue >"${stdoutF}" 2>"${stderrF}" )
156 |   th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}"
157 | 
158 |   ( assertTrue arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" )
159 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
160 | }
161 | 
162 | testAssertFalse() {
163 |   ( assertFalse 1 >"${stdoutF}" 2>"${stderrF}" )
164 |   th_assertTrueWithNoOutput 'false' $? "${stdoutF}" "${stderrF}"
165 | 
166 |   ( assertFalse "${MSG}" 1 >"${stdoutF}" 2>"${stderrF}" )
167 |   th_assertTrueWithNoOutput 'false, with msg' $? "${stdoutF}" "${stderrF}"
168 | 
169 |   ( assertFalse '[ 0 -eq 1 ]' >"${stdoutF}" 2>"${stderrF}" )
170 |   th_assertTrueWithNoOutput 'false condition' $? "${stdoutF}" "${stderrF}"
171 | 
172 |   ( assertFalse 0 >"${stdoutF}" 2>"${stderrF}" )
173 |   th_assertFalseWithOutput 'true' $? "${stdoutF}" "${stderrF}"
174 | 
175 |   ( assertFalse '[ 0 -eq 0 ]' >"${stdoutF}" 2>"${stderrF}" )
176 |   th_assertFalseWithOutput 'true condition' $? "${stdoutF}" "${stderrF}"
177 | 
178 |   ( assertFalse '' >"${stdoutF}" 2>"${stderrF}" )
179 |   th_assertFalseWithOutput 'true condition' $? "${stdoutF}" "${stderrF}"
180 | 
181 |   ( assertFalse >"${stdoutF}" 2>"${stderrF}" )
182 |   th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}"
183 | 
184 |   ( assertFalse arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" )
185 |   th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}"
186 | }
187 | 
188 | oneTimeSetUp() {
189 |   th_oneTimeSetUp
190 | 
191 |   MSG='This is a test message'
192 | }
193 | 
194 | # Load and run shunit2.
195 | # shellcheck disable=SC2034
196 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT=$0
197 | . "${TH_SHUNIT}"
198 | 


--------------------------------------------------------------------------------
/dev/tests/conf/service.conf:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ###### pmOCR - batch & service wrapper for OCR tools
  4 | ###### (C) 2014-2018 by Orsiris de Jong (www.netpower.fr)
  5 | ###### pmOCR v1.5.4 - 1.6.0 config file 2018122101
  6 | CONFIG_FILE_REVISION=1
  7 | 
  8 | ## ---------- GENERAL OPTIONS
  9 | 
 10 | ## Instance identification
 11 | INSTANCE_ID=MyOCRServer
 12 | 
 13 | ## List of allowed extensions for input files
 14 | FILES_TO_PROCES="\(pdf\|tif\|tiff\|png\|jpg\|jpeg\|bmp\|pcx\|dcx\)"
 15 | 
 16 | ## Number of OCR subprocesses to start simultaneously. Should not exceed the number of CPU cores for best performance.
 17 | NUMBER_OF_PROCESSES=4
 18 | 
 19 | ## The output file user and group ownership may be copied from input file (works only if executed as root).
 20 | PRESERVE_OWNERSHIP=no
 21 | ## Output file permissions. Defaults to 644 (works only if executed as root).
 22 | FILE_PERMISSIONS=
 23 | 
 24 | ## OCR Engine, adjust *_OCR_ENGINE_ARGS to fit your needs, especially for language settings
 25 | 
 26 | # Acceptable values are abbyyocr11, tesseract3
 27 | OCR_ENGINE=tesseract3
 28 | 
 29 | ## ---------- OCR Engine arguments
 30 | 
 31 | 	# AbbyyOCR11 Engine Arguments
 32 | 	#############################
 33 | 
 34 | ## lpp = load predefinied profil / TextExtraction_Acuraccy = name of the predefinied profile / -adb = Detect barcodes / -ido = Detect and rotate image orientation / -adtop = Detect text embedded in images
 35 | ## -rl = List of languages for the document (French,English,Spanish) / recc = Enhanced character confidence
 36 | ##### PDF related arguments : -pfs = PDF Export preset (balanced) / -pacm = PDF/A standards (pdfa-3a) / ptem = Specifies the mode of export of recognized text into PDF (PDF/A) format.
 37 | ##### DOCX related arguments :-dheb  = Highlights uncertainly recognized characters with the background color when exporting to DOCX format (color definied by deb parameter).
 38 | ##### -deb 0xFFFF00 (yellow highlights)
 39 | ##### XLSX related arguments :  -xlto = only export text from table / -xlrf = remove formating from text / -xllrm = This option allows setting the mode of retaining the original document tables' layout in the output XLSX file (Default, ExactDocument, ExactLines) 
 40 | 
 41 | ## Full path to OCR engine
 42 | 
 43 | ABBYY_OCR_ENGINE_EXEC=/usr/local/bin/abbyyocr11
 44 | 
 45 | # Quality may be set to Balanced, MaxSpeed, MaxQuality, MinSize
 46 | ABBYY_PDF_QUALITY=Balanced
 47 | ABBYY_PDF_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -pfs $ABBYY_PDF_QUALITY -pacm Pdfa_3a -ptem ImageOnText -f pdf'
 48 | ABBYY_WORD_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -f docx'
 49 | ABBYY_EXCEL_OCR_ENGINE_ARGS=' -lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -rpihp -xlrf -xllrm ExactLines -f xlsx'
 50 | ABBYY_TEXT_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults'
 51 | ABBYY_CSV_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults'
 52 | ABBYY_OCR_ENGINE_INPUT_ARG='-if'
 53 | ABBYY_OCR_ENGINE_OUTPUT_ARG='-of'
 54 | 
 55 | 
 56 | 	# tesseract 3.x Engine Arguments
 57 | 	################################
 58 | 
 59 | ## Full path to OCR engine
 60 | 
 61 | TESSERACT_OCR_ENGINE_EXEC=/usr/bin/tesseract
 62 | TESSERACT_PDF_OCR_ENGINE_ARGS='pdf'
 63 | TESSERACT_TEXT_OCR_ENGINE_ARGS=''
 64 | TESSERACT_CSV_OCR_ENGINE_ARGS=''
 65 | TESSERACT_OCR_ENGINE_INPUT_ARG='-l eng' # Language setting
 66 | TESSERACT_OCR_ENGINE_OUTPUT_ARG=
 67 | ## tesseract 3 intermediary transformation of PDF to TIFF
 68 | TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/gs
 69 | TESSERACT_PDF_TO_TIFF_OPTS=' -q -dNOPAUSE -r300x300 -sDEVICE=tiff32nc -sCompression=lzw -dBATCH -sOUTPUTFILE='
 70 | 
 71 | 	# Preprocessor Arguments (only for tesseract)
 72 | 	#############################################
 73 | 
 74 | ## Optional preprocessor to correct scanned images (don't use this for abbyy11 which already contains it's own preprocessor)
 75 | ## Uncomment OCR_PREPROCESSOR_EXEC lines to use it
 76 | ## See http://www.imagemagick.org/discourse-server/viewtopic.php?t=22226 for examples
 77 | 
 78 | #OCR_PREPROCESSOR_EXEC=/usr/bin/convert
 79 | OCR_PREPROCESSOR_ARGS='-respect-parenthesis \( -compress LZW -density 300 -bordercolor black -border 1 -trim +repage -fill white -draw "color 0,0 floodfill" -alpha off -shave 1x1 \) \( -bordercolor black -border 2 -fill white -draw "color 0,0 floodfill" -alpha off -shave 0x1 -deskew 40 +repage \)'
 80 | OCR_PREPROCESSOR_INPUT_ARG=''
 81 | OCR_PREPROCESSOR_OUTPUT_ARG=''
 82 | 
 83 | #######################################################################
 84 | ### THE FOLLOWING PARAMETERS ARE USED WHEN pmOCR IS RUN AS SERVICE ####
 85 | ###     YOU MAY SET THEM IN COMMAND LINE WHEN USING BATCH MODE     ####
 86 | #######################################################################
 87 | 
 88 | ## List of alert mails separated by spaces
 89 | DESTINATION_MAILS="infrastructure@example.com"
 90 | 
 91 | ## Optional change of mail body encoding (using iconv)
 92 | ## By default, all mails are sent in UTF-8 format without header (because of maximum compatibility of all platforms)
 93 | ## You may specify an optional encoding here (like "ISO-8859-1" or whatever iconv can handle)
 94 | MAIL_BODY_CHARSET=""
 95 | 
 96 | ## Directories to monitor (Leave variables empty in order to disable specific monitoring).
 97 | ## As of today, Tesseract only handles PDF, TXT and CSV
 98 | PDF_MONITOR_DIR="${HOME}/pmocr-tests/service/PDF"
 99 | #WORD_MONITOR_DIR="/storage/service_ocr/WORD"
100 | #EXCEL_MONITOR_DIR="/storage/service_ocr/EXCEL"
101 | TEXT_MONITOR_DIR="${HOME}/pmocr-tests/service/TEXT"
102 | CSV_MONITOR_DIR="${HOME}/pmocr-tests/service/CSV"
103 | 
104 | PDF_EXTENSION=".pdf"
105 | WORD_EXTENSION=".docx"
106 | EXCEL_EXTENSION=".xlsx"
107 | TEXT_EXTENSION=".txt"
108 | CSV_EXTENSION=".csv"
109 | 
110 | ## Move original file after successful processing into a path that will be ignored by the monitor.
111 | ## Enabling this setting automatically disables DELETE_ORIGINAL and FILENAME_SUFFIX values.
112 | #MOVE_ORIGINAL_ON_SUCCESS="/storage/service_ocr/done"
113 | 
114 | ## Move failed to process file into a path that will be ignored by the monitor.
115 | ## Enabling this setting automatically disables FAILED_FILENAME_SUFFIX value.
116 | #MOVE_ORIGINAL_ON_FAILURE="/storage/service_ocr/failed"
117 | 
118 | ## Adds an optional following suffix to OCRed files (ex: input.tiff becomes input_OCR.pdf). Any file containing this suffix will be ignored. Can be left empty.
119 | FILENAME_SUFFIX="_OCR"
120 | 
121 | ## Add the following suffix to failed files in order to prevent them from being processed in a loop. Can be left empty.
122 | FAILED_FILENAME_SUFFIX="_OCR_ERR"
123 | 
124 | ## Delete original file upon successful processing.
125 | DELETE_ORIGINAL=no
126 | 
127 | # Alternative check if PDFs are already OCRed (checks if a pdf contains a font). This will prevent images integrated in already indexed PDFs to get OCRed.
128 | CHECK_PDF=yes
129 | 
130 | ## Add some extra info to the filename. Example here adds a pseudo ISO 8601 timestamp after a dot (pseudo because the colon sign would render the filename quite weird).
131 | ## Keep variables between singlequotes if you want them to expand at runtime. Leave this variable empty if you don't want to add anything (is also added to moved files).
132 | FILENAME_ADDITION='.$(date --utc +"%Y-%m-%dT%H-%M-%SZ")'
133 | 
134 | ## Max time before triggering a forced OCR run when no file actions are detected
135 | MAX_TIME=3600
136 | MOVE_ORIGINAL_ON_SUCCESS=
137 | MOVE_ORIGINAL_ON_FAILURE=
138 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/shunit2_test_helpers:
--------------------------------------------------------------------------------
  1 | # vim:et:ft=sh:sts=2:sw=2
  2 | #
  3 | # shUnit2 unit test common functions
  4 | #
  5 | # Copyright 2008 Kate Ward. All Rights Reserved.
  6 | # Released under the Apache 2.0 license.
  7 | #
  8 | # Author: kate.ward@forestent.com (Kate Ward)
  9 | # https://github.com/kward/shunit2
 10 | #
 11 | ### ShellCheck (http://www.shellcheck.net/)
 12 | # Commands are purposely escaped so they can be mocked outside shUnit2.
 13 | #   shellcheck disable=SC1001,SC1012
 14 | # expr may be antiquated, but it is the only solution in some cases.
 15 | #   shellcheck disable=SC2003
 16 | # $() are not fully portable (POSIX != portable).
 17 | #   shellcheck disable=SC2006
 18 | 
 19 | # Treat unset variables as an error when performing parameter expansion.
 20 | set -u
 21 | 
 22 | # Set shwordsplit for zsh.
 23 | \[ -n "${ZSH_VERSION:-}" ] && setopt shwordsplit
 24 | 
 25 | #
 26 | # Constants.
 27 | #
 28 | 
 29 | # Path to shUnit2 library. Can be overridden by setting SHUNIT_INC.
 30 | TH_SHUNIT=${SHUNIT_INC:-./shunit2}; export TH_SHUNIT
 31 | 
 32 | # Configure debugging. Set the DEBUG environment variable to any
 33 | # non-empty value to enable debug output, or TRACE to enable trace
 34 | # output.
 35 | TRACE=${TRACE:+'th_trace '}
 36 | \[ -n "${TRACE}" ] && DEBUG=1
 37 | \[ -z "${TRACE}" ] && TRACE=':'
 38 | 
 39 | DEBUG=${DEBUG:+'th_debug '}
 40 | \[ -z "${DEBUG}" ] && DEBUG=':'
 41 | 
 42 | #
 43 | # Variables.
 44 | #
 45 | 
 46 | th_RANDOM=0
 47 | 
 48 | #
 49 | # Functions.
 50 | #
 51 | 
 52 | # Logging functions.
 53 | th_trace() { echo "${MY_NAME}:TRACE $*" >&2; }
 54 | th_debug() { echo "${MY_NAME}:DEBUG $*" >&2; }
 55 | th_info() { echo "${MY_NAME}:INFO $*" >&2; }
 56 | th_warn() { echo "${MY_NAME}:WARN $*" >&2; }
 57 | th_error() { echo "${MY_NAME}:ERROR $*" >&2; }
 58 | th_fatal() { echo "${MY_NAME}:FATAL $*" >&2; }
 59 | 
 60 | # Output subtest name.
 61 | th_subtest() { echo " $*" >&2; }
 62 | 
 63 | th_oneTimeSetUp() {
 64 |   # These files will be cleaned up automatically by shUnit2.
 65 |   stdoutF="${SHUNIT_TMPDIR}/stdout"
 66 |   stderrF="${SHUNIT_TMPDIR}/stderr"
 67 |   returnF="${SHUNIT_TMPDIR}/return"
 68 |   expectedF="${SHUNIT_TMPDIR}/expected"
 69 |   export stdoutF stderrF returnF expectedF
 70 | }
 71 | 
 72 | # Generate a random number.
 73 | th_generateRandom() {
 74 |   tfgr_random=${th_RANDOM}
 75 | 
 76 |   while \[ "${tfgr_random}" = "${th_RANDOM}" ]; do
 77 |     # shellcheck disable=SC2039
 78 |     if \[ -n "${RANDOM:-}" ]; then
 79 |       # $RANDOM works
 80 |       # shellcheck disable=SC2039
 81 |       tfgr_random=${RANDOM}${RANDOM}${RANDOM}$$
 82 |     elif \[ -r '/dev/urandom' ]; then
 83 |       tfgr_random=`od -vAn -N4 -tu4 </dev/urandom |sed 's/^[^0-9]*//'`
 84 |     else
 85 |       tfgr_date=`date '+%H%M%S'`
 86 |       tfgr_random=`expr "${tfgr_date}" \* $$`
 87 |       unset tfgr_date
 88 |     fi
 89 |     \[ "${tfgr_random}" = "${th_RANDOM}" ] && sleep 1
 90 |   done
 91 | 
 92 |   th_RANDOM=${tfgr_random}
 93 |   unset tfgr_random
 94 | }
 95 | 
 96 | # This section returns the data section from the specified section of a file. A
 97 | # data section is defined by a [header], one or more lines of data, and then a
 98 | # blank line.
 99 | th_getDataSect() {
100 |   th_sgrep "\\[$1\\]" "$2" |sed '1d'
101 | }
102 | 
103 | # This function greps a section from a file. a section is defined as a group of
104 | # lines preceded and followed by blank lines..
105 | th_sgrep() {
106 |   th_pattern_=$1
107 |   shift
108 | 
109 |   # shellcheck disable=SC2068
110 |   sed -e '/./{H;$!d;}' -e "x;/${th_pattern_}/"'!d;' $@ |sed '1d'
111 | 
112 |   unset th_pattern_
113 | }
114 | 
115 | # Custom assert that checks for true return value (0), and no output to STDOUT
116 | # or STDERR. If a non-zero return value is encountered, the output of STDERR
117 | # will be output.
118 | #
119 | # Args:
120 | #  th_test_: string: name of the subtest
121 | #  th_rtrn_: integer: the return value of the subtest performed
122 | #  th_stdout_: string: filename where stdout was redirected to
123 | #  th_stderr_: string: filename where stderr was redirected to
124 | th_assertTrueWithNoOutput() {
125 |   th_test_=$1
126 |   th_rtrn_=$2
127 |   th_stdout_=$3
128 |   th_stderr_=$4
129 | 
130 |   assertTrue "${th_test_}; expected return value of zero" "${th_rtrn_}"
131 |   \[ "${th_rtrn_}" -ne "${SHUNIT_TRUE}" ] && \cat "${th_stderr_}"
132 |   assertFalse "${th_test_}; expected no output to STDOUT" \
133 |       "[ -s '${th_stdout_}' ]"
134 |   assertFalse "${th_test_}; expected no output to STDERR" \
135 |       "[ -s '${th_stderr_}' ]"
136 | 
137 |   unset th_test_ th_rtrn_ th_stdout_ th_stderr_
138 | }
139 | 
140 | # Custom assert that checks for non-zero return value, output to STDOUT, but no
141 | # output to STDERR.
142 | #
143 | # Args:
144 | #  th_test_: string: name of the subtest
145 | #  th_rtrn_: integer: the return value of the subtest performed
146 | #  th_stdout_: string: filename where stdout was redirected to
147 | #  th_stderr_: string: filename where stderr was redirected to
148 | th_assertFalseWithOutput()
149 | {
150 |   th_test_=$1
151 |   th_rtrn_=$2
152 |   th_stdout_=$3
153 |   th_stderr_=$4
154 | 
155 |   assertFalse "${th_test_}; expected non-zero return value" "${th_rtrn_}"
156 |   assertTrue "${th_test_}; expected output to STDOUT" \
157 |       "[ -s '${th_stdout_}' ]"
158 |   assertFalse "${th_test_}; expected no output to STDERR" \
159 |       "[ -s '${th_stderr_}' ]"
160 |   \[ -s "${th_stdout_}" -a ! -s "${th_stderr_}" ] || \
161 |       _th_showOutput "${SHUNIT_FALSE}" "${th_stdout_}" "${th_stderr_}"
162 | 
163 |   unset th_test_ th_rtrn_ th_stdout_ th_stderr_
164 | }
165 | 
166 | # Custom assert that checks for non-zero return value, no output to STDOUT, but
167 | # output to STDERR.
168 | #
169 | # Args:
170 | #  th_test_: string: name of the subtest
171 | #  th_rtrn_: integer: the return value of the subtest performed
172 | #  th_stdout_: string: filename where stdout was redirected to
173 | #  th_stderr_: string: filename where stderr was redirected to
174 | th_assertFalseWithError() {
175 |   th_test_=$1
176 |   th_rtrn_=$2
177 |   th_stdout_=$3
178 |   th_stderr_=$4
179 | 
180 |   assertFalse "${th_test_}; expected non-zero return value" "${th_rtrn_}"
181 |   assertFalse "${th_test_}; expected no output to STDOUT" \
182 |       "[ -s '${th_stdout_}' ]"
183 |   assertTrue "${th_test_}; expected output to STDERR" \
184 |       "[ -s '${th_stderr_}' ]"
185 |   \[ ! -s "${th_stdout_}" -a -s "${th_stderr_}" ] || \
186 |       _th_showOutput "${SHUNIT_FALSE}" "${th_stdout_}" "${th_stderr_}"
187 | 
188 |   unset th_test_ th_rtrn_ th_stdout_ th_stderr_
189 | }
190 | 
191 | # Some shells, zsh on Solaris in particular, return immediately from a sub-shell
192 | # when a non-zero return value is encountered. To properly catch these values,
193 | # they are either written to disk, or recognized as an error the file is empty.
194 | th_clearReturn() { cp /dev/null "${returnF}"; }
195 | th_queryReturn() {
196 |   if \[ -s "${returnF}" ]; then
197 |     th_return=`\cat "${returnF}"`
198 |   else
199 |     th_return=${SHUNIT_ERROR}
200 |   fi
201 |   export th_return
202 | }
203 | 
204 | # Providing external and internal calls to the showOutput helper function.
205 | th_showOutput() { _th_showOutput "$@"; }
206 | _th_showOutput() {
207 |   _th_return_=$1
208 |   _th_stdout_=$2
209 |   _th_stderr_=$3
210 | 
211 |   isSkipping
212 |   if \[ $? -eq "${SHUNIT_FALSE}" -a "${_th_return_}" != "${SHUNIT_TRUE}" ]; then
213 |     if \[ -n "${_th_stdout_}" -a -s "${_th_stdout_}" ]; then
214 |       echo '>>> STDOUT' >&2
215 |       \cat "${_th_stdout_}" >&2
216 |     fi
217 |     if \[ -n "${_th_stderr_}" -a -s "${_th_stderr_}" ]; then
218 |       echo '>>> STDERR' >&2
219 |       \cat "${_th_stderr_}" >&2
220 |     fi
221 |     if \[ -n "${_th_stdout_}" -o -n "${_th_stderr_}" ]; then
222 |       echo '<<< end output' >&2
223 |     fi
224 |   fi
225 | 
226 |   unset _th_return_ _th_stdout_ _th_stderr_
227 | }
228 | 
229 | #
230 | # Main.
231 | #
232 | 
233 | ${TRACE} 'trace output enabled'
234 | ${DEBUG} 'debug output enabled'
235 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | RECENT CHANGES
  2 | --------------
  3 | 
  4 | 26 Feb 2022: v1.8.1
  5 | 
  6 | - Added a configurable poller interval
  7 | - Added service recovery when directory to monitor is not writable
  8 | - Fixed upgrades with newer configuration files
  9 | - Fixed sporadic errors with preprocessed images being detected by poller
 10 | 
 11 | 23 Feb 2022: v1.8.0
 12 | 
 13 | - Added internal inotifywait emulation that can deal with events on NFS / SMB shares where inotify events won't happen
 14 | - Highly speed up OCR by bypassing checks on non modified files
 15 | - Speed up OCR_Dispatch by checking already OCRed PDFs before launching OCR function
 16 | - Inclusions and exclusions are now case insensitive in order to make sure we play right with Windows rules too
 17 | 
 18 | 29 Dec 2021: v1.7.0 (never released)
 19 | 
 20 | - Tested Tesseract 5.X engine
 21 | - Improved optional preprocessor commandline
 22 | 	- Added antialiasing
 23 | 	- Added text sharpening
 24 | - Removed earlier ghostscript dependency
 25 | - Fixed installer message when no wget is present
 26 | - Updated ofunctions
 27 | 
 28 | 11 Jul 2019: v1.6.1
 29 | 
 30 | - Tested Tesseract 4.x engine
 31 | 	- Renamed "tesseract3" engine to "tesseract" since we work with 3.02+ / 4.x
 32 | 	- Added TESSERACT_OPTIONAL_ARGS in config file
 33 | - Improved handling of open files being deferred for later OCR
 34 | - Fixed automatic service shutdown in RHEL 6/7 (automatic /tmp directory cleanup removing service file)
 35 | - Updated ofunctions
 36 | 	- Moved from yes/no parameters to bash booleans
 37 | 	- Compatibility with elder config is preserved
 38 | 	- Better cleanup
 39 | - Fixed installer typos
 40 | 
 41 | 21 Dec 2018: v1.6.0
 42 | 
 43 | - Simplified config file syntax for OCR_ENGINE selection
 44 | - Added config file revision check
 45 | - Fixed logs not writing correctly in service mode and batch mode (OCR_Dispatch and lower function Logger doesn't work in)
 46 | - Fixed --no-text argument
 47 | - Added --failed-suffix and --no-failed-suffix batch options
 48 | - Skipping files currently being written to (workaround for slow file transfers), leaving them for next run
 49 | - Add nanoseconds to filename if output file already exists on move
 50 | - More clear preflight error messages
 51 | - Updated ofunctions
 52 | 	- RFC822 email compliance checks
 53 | 	- New more complete ExecTasks function to replace ParallelExec
 54 | 	- Fix log sending with double compressed extensions
 55 | 	- Minor fixes
 56 | - Fixed return code for initV style service file
 57 |  - Upgraded shunit2 test framework to v2.1.8pre (git commit 07bb329)
 58 | 
 59 | 21 Avr 2017: v1.5.7
 60 | 
 61 | - Fixed a bug cleaning the SERVICE_MONITOR file after each run
 62 | 
 63 | 20 Avr 2017: v1.5.6
 64 | 
 65 | - Added tesseract version preflight checks
 66 | - Added unit test framework (basic functionnality yet)
 67 | 	- Added batch tests
 68 | 		- File suffixes & no suffixes
 69 | 		- File text / date additions
 70 | 		- Skip searchable pdf tests
 71 | 		- Delete original upon successful processing
 72 | 	- Added service tests
 73 | 		- Basic PDF / TXT / CSV tests
 74 | 		- File moves on success & failure
 75 | - Fixed SERVICE-MONITOR file (run file) created in root
 76 | - Fixed CSV transformation didn't work
 77 | - Fixed a low severity security issue where log & run files are world readable
 78 | - Fixed some installer strings
 79 | - Tmp files are now cleaned on the fly after each dispatch
 80 | 
 81 | 13 Mar 2017: v1.5.4
 82 | 
 83 | - Support for moving files after processing
 84 | 	- Failing to move files will automatically rename them
 85 | - Better installer with --remove support
 86 | - Mail alerts can now be encoded differently than UTF-8
 87 | - Updated ofunctions from obackup / osync
 88 | 
 89 | 06 Feb 2017: v1.5.2
 90 | 
 91 | - Service improvements
 92 | 	- A forced run is done every MAX_WAIT seconds
 93 | 	- OCR is run on service start
 94 | 	- Moved files now also trigger an OCR run
 95 | - Prevent overwriting multiple failed files with same source filename
 96 | - Updated ofunctions from osync & obackup projects allowing to address multiple issues
 97 | 	- Improved mail function
 98 | 	- Improved ParallelExec function
 99 | 	- Improved logging functionality
100 | 
101 | 21 Oct 2016: v1.5
102 | 
103 | - Added ownership preservation option
104 | - Added optional file permission mask to replace default new file permissions
105 | - Added the possibility to use an image preprocesser (Imagemagick is preconfigured but not enabled by default)
106 | - Corrected an issue where a failed service run may end up in an infinite loop by adding a failed OCR file suffix
107 | - Made a workaround for Tesseract throwing an error when OSD data is missing but not exiting with a failure code
108 | - Fixed intermediary PDF2TIFF transformation used with Tesseract
109 | - Fixed --suffix option was ignored
110 | - Recoded service execution asynchronously
111 | 	- Fixed a bug when a file is added while the OCR process is already runnning, the file won't be processed until another file is added
112 | - Chaned unix process signals to be posix compliant
113 | - Fixed file suffix exclusion also excluded files that contained the suffix anywhere in the filename
114 | - Enhanced parallel execution for huge file sets
115 | - Improved cpu usage on idle
116 | - Changed the way pmocr works
117 | 	- Splitted pmocr.sh config into separate config files so updates don't overwrite current config anymore
118 | 	- Updated service files to run multiple instances
119 | 	- Updated install script to handle config files
120 | - Added parallel execution for multicore systems
121 | - Improved tesseract 3 support
122 | 	- Added text output format
123 | 	- Added csv output format (with csv hack)
124 | 	- Remove intermediary txt files produced by tesseract
125 | - Improved logging
126 | - Improved code compliance
127 | - Various minor fixes from ofunctions updates
128 | 
129 | 15 Aug 2016: v1.4.2
130 | - Removed keep logging statement from WaitForTaskCompletion function
131 | - Fixed rare bug where original PDF file gets deleted without succeded transformation
132 | - Removed NO_DELETE_SUFFIX that is not used anymore
133 | - More debug logs
134 | - Updated ofunctions from other projects
135 | 
136 | 06 Aug 2016: v1.4.1
137 | - Fixed mail alerts not sent
138 | - Improved debugging and logging
139 | - Merged dev builder with other projects
140 | - Cleaned code (a bit)
141 | 
142 | 04 Aug 2016: v1.4
143 | - Merged more recent common function set
144 | - Improved logging
145 | - Improved installer
146 | - Added a systemd unit file
147 | - Added pdf2tiff intermediary transformation for tesseract3 to support pdf input (thanks to mhelff, https://github.com/mhelff)
148 | - Set pdf conversion as default choice in batch mode
149 | - Added preflight checks for tesseract3 engine
150 | - Refactored code that became totally unreadable for human being :)
151 | - Improved sub process terminate code
152 | - Improved daemon logging
153 | - Improved mail alert support in daemon mode
154 | 
155 | 03 Mar 2016: v1.3
156 | - Merged function codebase with osync and obackup
157 | - Fixed file extension should not change when DELETE_ORIGINAL=no
158 | - Added a suffix to original files for recognition
159 | - Fixed detection of PDFs already containing text (pdffonts should output more than 2 lines if embedded fonts are found)
160 | - Added minimal email alerts
161 | - Ported some code from osync/obackup
162 | - Added LSB info to init script for Debian based distros
163 | - Check for service directories before launching service
164 | - Added better KillChilds function on exit in service mode
165 | - Changed code to be code style V2 compliant
166 | - Added support for tesseract 3.x
167 | - Added options to suppress suffix and text in batch process
168 | 
169 | 31 Aug 2015: v1.2
170 | - Added all input file formats that abbyyocr11 supports
171 | - Fixed find command to allow case insensitive input extensions
172 | - Minor improvements in logging, and code readability
173 | - Added full commandline batch mode
174 | - Added option to delete input file after successful processing
175 | - Added option to suppress OCRed filename suffix
176 | - New option to avoid passing PDFs already containing text to the OCR engine
177 | - New option to add a trivial value to the output filename (like a date)
178 | 
179 | 23 Aug 2015: v1.04
180 | - Fixed multiple problems with spaces in filenames and exclusion patterns
181 | - Minor fixes for logging
182 | - Renamed all pmOCR instances to pmocr
183 | 


--------------------------------------------------------------------------------
/default.conf:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ###### pmOCR - batch & service wrapper for OCR tools
  4 | ###### (C) 2014-2022 by Orsiris de Jong (www.netpower.fr)
  5 | ###### pmOCR v1.5.4 - 1.8.2 config file 2022050801
  6 | CONFIG_FILE_REVISION=1
  7 | 
  8 | ## ---------- GENERAL OPTIONS
  9 | 
 10 | ## Instance identification
 11 | INSTANCE_ID=MyOCRServer
 12 | 
 13 | ## List of allowed extensions for input files
 14 | FILES_TO_PROCESS="\(pdf\|tif\|tiff\|png\|jpg\|jpeg\|bmp\|pcx\|dcx\)"
 15 | 
 16 | ## Number of OCR subprocesses to start simultaneously. Should not exceed the number of CPU cores for best performance.
 17 | NUMBER_OF_PROCESSES=4
 18 | 
 19 | ## The output file user and group ownership may be copied from input file (works only if executed as root).
 20 | PRESERVE_OWNERSHIP=no
 21 | ## Output file permissions. Defaults to 644 (works only if executed as root).
 22 | FILE_PERMISSIONS=
 23 | 
 24 | ## OCR Engine, adjust *_OCR_ENGINE_ARGS to fit your needs, especially for language settings
 25 | 
 26 | # Acceptable values are abbyyocr11, tesseract (tesseract 3.x, 4.x or 5.x)
 27 | OCR_ENGINE=tesseract
 28 | 
 29 | # File detection strategy:
 30 | # true: use inotifywait (works when mountpoint is local)
 31 | # false: use integrated inotifywait emulation which does work even on SMB/NFS shares, but takes more resources since it's poller based (poller interval is measured in seconds)
 32 | INOTIFYWAIT_SUPPORT=false
 33 | INOTIFY_POLLER_INTERVAL=30
 34 | 
 35 | ## ---------- OCR Engine arguments
 36 | 
 37 | 	# AbbyyOCR11 Engine Arguments
 38 | 	#############################
 39 | 
 40 | ## lpp = load predefinied profil / TextExtraction_Acuraccy = name of the predefinied profile / -adb = Detect barcodes / -ido = Detect and rotate image orientation / -adtop = Detect text embedded in images
 41 | ## -rl = List of languages for the document (French,English,Spanish) / recc = Enhanced character confidence
 42 | ##### PDF related arguments : -pfs = PDF Export preset (balanced) / -pacm = PDF/A standards (pdfa-3a) / ptem = Specifies the mode of export of recognized text into PDF (PDF/A) format.
 43 | ##### DOCX related arguments :-dheb  = Highlights uncertainly recognized characters with the background color when exporting to DOCX format (color definied by deb parameter).
 44 | ##### -deb 0xFFFF00 (yellow highlights)
 45 | ##### XLSX related arguments :  -xlto = only export text from table / -xlrf = remove formating from text / -xllrm = This option allows setting the mode of retaining the original document tables' layout in the output XLSX file (Default, ExactDocument, ExactLines) 
 46 | 
 47 | ## Full path to OCR engine
 48 | 
 49 | ABBYY_OCR_ENGINE_EXEC=/usr/local/bin/abbyyocr11
 50 | 
 51 | # Quality may be set to Balanced, MaxSpeed, MaxQuality, MinSize
 52 | ABBYY_PDF_QUALITY=Balanced
 53 | ABBYY_PDF_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -pfs $ABBYY_PDF_QUALITY -pacm Pdfa_3a -ptem ImageOnText -f pdf'
 54 | ABBYY_WORD_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -f docx'
 55 | ABBYY_EXCEL_OCR_ENGINE_ARGS=' -lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -rpihp -xlrf -xllrm ExactLines -f xlsx'
 56 | ABBYY_TEXT_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults'
 57 | ABBYY_CSV_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults'
 58 | ABBYY_OCR_ENGINE_INPUT_ARG='-if'
 59 | ABBYY_OCR_ENGINE_OUTPUT_ARG='-of'
 60 | 
 61 | 
 62 | 	# tesseract Engine Arguments
 63 | 	################################
 64 | 
 65 | ## Working resolution for tesseract preprocessor and intermediary transformations
 66 | ## Should be equal to the highest resolution of scanned documents. Good values are 300-600, but 600 is quite CPU hungry
 67 | RESOLUTION=600
 68 | 
 69 | ## Full path to OCR engine
 70 | TESSERACT_OCR_ENGINE_EXEC=/usr/bin/tesseract
 71 | TESSERACT_PDF_OCR_ENGINE_ARGS='pdf'
 72 | TESSERACT_TEXT_OCR_ENGINE_ARGS=''
 73 | TESSERACT_CSV_OCR_ENGINE_ARGS=''
 74 | TESSERACT_OCR_ENGINE_INPUT_ARG='-l eng' # Language setting
 75 | TESSERACT_OCR_ENGINE_OUTPUT_ARG=''
 76 | ## tesseract intermediary transformation of PDF to TIFF
 77 | TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/convert
 78 | TESSERACT_PDF_TO_TIFF_OPTS='-density '${RESOLUTION}' -compress lzw'
 79 | # Elder ghostscript conversion
 80 | #TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/gs
 81 | #TESSERACT_PDF_TO_TIFF_OPTS=' -q -dNOPAUSE -r'${RESOLUTION}'x'${RESOLUTION}' -sDEVICE=tiff32nc -sCompression=lzw -dBATCH -sOUTPUTFILE='
 82 | 
 83 | ## Tesseract optional arguments
 84 | ## Example for Tesseract 4.x/5.x OCR LTSM engine selection (see tesseract --help-extra)
 85 | # oem 0 is legacy engine, which as of tesseract 5.0.0 release with github/tesseract/tessdata traineddata gives better results
 86 | # oem 1 is LTSM engine
 87 | TESSERACT_OPTIONAL_ARGS='--oem 0'
 88 | 
 89 | 	# Preprocessor Arguments (only for tesseract)
 90 | 	#############################################
 91 | 
 92 | ## Optional preprocessor to correct scanned images (don't use this for abbyy11 which already contains it's own preprocessor)
 93 | ## Uncomment OCR_PREPROCESSOR_EXEC lines to use it
 94 | ## See http://www.imagemagick.org/discourse-server/viewtopic.php?t=22226 for examples
 95 | 
 96 | OCR_PREPROCESSOR_EXEC=/usr/bin/convert
 97 | OCR_PREPROCESSOR_ARGS='-units PixelsPerInch -respect-parenthesis \( -compress lzw -density '${RESOLUTION}' -bordercolor black -border 1 -trim +repage -fill white -draw "color 0,0 floodfill" -alpha off -shave 1x1 \) \( -bordercolor black -border 2 -fill white -draw "color 0,0 floodfill" -alpha off -shave 0x1 -deskew 40 +repage \) -antialias -sharpen 0x3'
 98 | OCR_PREPROCESSOR_INPUT_ARG=''
 99 | OCR_PREPROCESSOR_OUTPUT_ARG=''
100 | 
101 | #######################################################################
102 | ### THE FOLLOWING PARAMETERS ARE USED WHEN pmOCR IS RUN AS SERVICE ####
103 | ###     YOU MAY SET THEM IN COMMAND LINE WHEN USING BATCH MODE     ####
104 | #######################################################################
105 | 
106 | ## List of alert mails separated by spaces
107 | DESTINATION_MAILS="infrastructure@example.com"
108 | 
109 | ## Optional change of mail body encoding (using iconv)
110 | ## By default, all mails are sent in UTF-8 format without header (because of maximum compatibility of all platforms)
111 | ## You may specify an optional encoding here (like "ISO-8859-1" or whatever iconv can handle)
112 | MAIL_BODY_CHARSET=""
113 | 
114 | ## Directories to monitor (Leave variables empty in order to disable specific monitoring).
115 | ## As of today, Tesseract only handles PDF, TXT and CSV
116 | PDF_MONITOR_DIR="/storage/service_ocr/PDF"
117 | WORD_MONITOR_DIR="/storage/service_ocr/WORD"
118 | EXCEL_MONITOR_DIR="/storage/service_ocr/EXCEL"
119 | TEXT_MONITOR_DIR="/storage/service_ocr/TEXT"
120 | CSV_MONITOR_DIR="/storage/service_ocr/CSV"
121 | 
122 | PDF_EXTENSION=".pdf"
123 | WORD_EXTENSION=".docx"
124 | EXCEL_EXTENSION=".xlsx"
125 | TEXT_EXTENSION=".txt"
126 | CSV_EXTENSION=".csv"
127 | 
128 | ## Move original file after successful processing into a path that will be ignored by the monitor.
129 | ## Enabling this setting by removing comment automatically disables DELETE_ORIGINAL and FILENAME_SUFFIX values.
130 | #MOVE_ORIGINAL_ON_SUCCESS="/storage/service_ocr/done"
131 | 
132 | ## Move failed to process file into a path that will be ignored by the monitor.
133 | ## Enabling this setting by removing comment automatically disables FAILED_FILENAME_SUFFIX value.
134 | #MOVE_ORIGINAL_ON_FAILURE="/storage/service_ocr/failed"
135 | 
136 | ## Adds an optional following suffix to OCRed files (ex: input.tiff becomes input_OCR.pdf). Any file containing this suffix will be ignored. Can be left empty.
137 | FILENAME_SUFFIX="_OCR"
138 | 
139 | ## Add the following suffix to failed files in order to prevent them from being processed in a loop. Can be left empty.
140 | FAILED_FILENAME_SUFFIX="_OCR_ERR"
141 | 
142 | ## Delete original file upon successful processing (has no effect if MOVE_ORIGINAL_ON_SUCCESS is set) (true/false)
143 | DELETE_ORIGINAL=false
144 | 
145 | # Alternative check if PDFs are already OCRed (checks if a pdf contains a font). This will prevent images integrated in already indexed PDFs to get OCRed. (true/false)
146 | CHECK_PDF=true
147 | 
148 | ## Add some extra info to the filename. Example here adds a pseudo ISO 8601 timestamp after a dot (pseudo because the colon sign would render the filename quite weird).
149 | ## Keep variables between singlequotes if you want them to expand at runtime. Leave this variable empty if you don't want to add anything (is also added to moved files).
150 | FILENAME_ADDITION='.$(date --utc +"%Y-%m-%dT%H-%M-%SZ")'
151 | 
152 | ## Max time before triggering a forced OCR run when no file actions are detected
153 | MAX_TIME=3600
154 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/shunit2_macros_test.sh:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # vim:et:ft=sh:sts=2:sw=2
  3 | #
  4 | # shunit2 unit test for macros.
  5 | #
  6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved.
  7 | # Released under the Apache 2.0 license.
  8 | #
  9 | # Author: kate.ward@forestent.com (Kate Ward)
 10 | # https://github.com/kward/shunit2
 11 | #
 12 | ### ShellCheck http://www.shellcheck.net/
 13 | # Disable source following.
 14 | #   shellcheck disable=SC1090,SC1091
 15 | # Presence of LINENO variable is checked.
 16 | #   shellcheck disable=SC2039
 17 | 
 18 | # These variables will be overridden by the test helpers.
 19 | stdoutF="${TMPDIR:-/tmp}/STDOUT"
 20 | stderrF="${TMPDIR:-/tmp}/STDERR"
 21 | 
 22 | # Load test helpers.
 23 | . ./shunit2_test_helpers
 24 | 
 25 | testAssertEquals() {
 26 |   # Start skipping if LINENO not available.
 27 |   [ -z "${LINENO:-}" ] && startSkipping
 28 | 
 29 |   ( ${_ASSERT_EQUALS_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
 30 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 31 |   rtrn=$?
 32 |   assertTrue '_ASSERT_EQUALS_ failure' ${rtrn}
 33 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 34 | 
 35 |   ( ${_ASSERT_EQUALS_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
 36 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 37 |   rtrn=$?
 38 |   assertTrue '_ASSERT_EQUALS_ w/ msg failure' ${rtrn}
 39 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 40 | }
 41 | 
 42 | testAssertNotEquals() {
 43 |   # Start skipping if LINENO not available.
 44 |   [ -z "${LINENO:-}" ] && startSkipping
 45 | 
 46 |   ( ${_ASSERT_NOT_EQUALS_} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
 47 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 48 |   rtrn=$?
 49 |   assertTrue '_ASSERT_NOT_EQUALS_ failure' ${rtrn}
 50 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 51 | 
 52 |   ( ${_ASSERT_NOT_EQUALS_} '"some msg"' 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
 53 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 54 |   rtrn=$?
 55 |   assertTrue '_ASSERT_NOT_EQUALS_ w/ msg failure' ${rtrn}
 56 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 57 | }
 58 | 
 59 | testSame() {
 60 |   # Start skipping if LINENO not available.
 61 |   [ -z "${LINENO:-}" ] && startSkipping
 62 | 
 63 |   ( ${_ASSERT_SAME_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
 64 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 65 |   rtrn=$?
 66 |   assertTrue '_ASSERT_SAME_ failure' ${rtrn}
 67 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 68 | 
 69 |   ( ${_ASSERT_SAME_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
 70 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 71 |   rtrn=$?
 72 |   assertTrue '_ASSERT_SAME_ w/ msg failure' ${rtrn}
 73 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 74 | }
 75 | 
 76 | testNotSame() {
 77 |   # Start skipping if LINENO not available.
 78 |   [ -z "${LINENO:-}" ] && startSkipping
 79 | 
 80 |   ( ${_ASSERT_NOT_SAME_} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
 81 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 82 |   rtrn=$?
 83 |   assertTrue '_ASSERT_NOT_SAME_ failure' ${rtrn}
 84 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 85 | 
 86 |   ( ${_ASSERT_NOT_SAME_} '"some msg"' 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
 87 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 88 |   rtrn=$?
 89 |   assertTrue '_ASSERT_NOT_SAME_ w/ msg failure' ${rtrn}
 90 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
 91 | }
 92 | 
 93 | testNull() {
 94 |   # Start skipping if LINENO not available.
 95 |   [ -z "${LINENO:-}" ] && startSkipping
 96 | 
 97 |   ( ${_ASSERT_NULL_} 'x' >"${stdoutF}" 2>"${stderrF}" )
 98 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
 99 |   rtrn=$?
100 |   assertTrue '_ASSERT_NULL_ failure' ${rtrn}
101 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
102 | 
103 |   ( ${_ASSERT_NULL_} '"some msg"' 'x' >"${stdoutF}" 2>"${stderrF}" )
104 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
105 |   rtrn=$?
106 |   assertTrue '_ASSERT_NULL_ w/ msg failure' ${rtrn}
107 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
108 | }
109 | 
110 | testNotNull()
111 | {
112 |   # start skipping if LINENO not available
113 |   [ -z "${LINENO:-}" ] && startSkipping
114 | 
115 |   ( ${_ASSERT_NOT_NULL_} '' >"${stdoutF}" 2>"${stderrF}" )
116 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
117 |   rtrn=$?
118 |   assertTrue '_ASSERT_NOT_NULL_ failure' ${rtrn}
119 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
120 | 
121 |   ( ${_ASSERT_NOT_NULL_} '"some msg"' '""' >"${stdoutF}" 2>"${stderrF}" )
122 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
123 |   rtrn=$?
124 |   assertTrue '_ASSERT_NOT_NULL_ w/ msg failure' ${rtrn}
125 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stdoutF}" "${stderrF}" >&2
126 | }
127 | 
128 | testAssertTrue() {
129 |   # Start skipping if LINENO not available.
130 |   [ -z "${LINENO:-}" ] && startSkipping
131 | 
132 |   ( ${_ASSERT_TRUE_} "${SHUNIT_FALSE}" >"${stdoutF}" 2>"${stderrF}" )
133 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
134 |   rtrn=$?
135 |   assertTrue '_ASSERT_TRUE_ failure' ${rtrn}
136 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
137 | 
138 |   ( ${_ASSERT_TRUE_} '"some msg"' "${SHUNIT_FALSE}" >"${stdoutF}" 2>"${stderrF}" )
139 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
140 |   rtrn=$?
141 |   assertTrue '_ASSERT_TRUE_ w/ msg failure' ${rtrn}
142 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
143 | }
144 | 
145 | testAssertFalse() {
146 |   # Start skipping if LINENO not available.
147 |   [ -z "${LINENO:-}" ] && startSkipping
148 | 
149 |   ( ${_ASSERT_FALSE_} "${SHUNIT_TRUE}" >"${stdoutF}" 2>"${stderrF}" )
150 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
151 |   rtrn=$?
152 |   assertTrue '_ASSERT_FALSE_ failure' ${rtrn}
153 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
154 | 
155 |   ( ${_ASSERT_FALSE_} '"some msg"' "${SHUNIT_TRUE}" >"${stdoutF}" 2>"${stderrF}" )
156 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
157 |   rtrn=$?
158 |   assertTrue '_ASSERT_FALSE_ w/ msg failure' ${rtrn}
159 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
160 | }
161 | 
162 | testFail() {
163 |   # Start skipping if LINENO not available.
164 |   [ -z "${LINENO:-}" ] && startSkipping
165 | 
166 |   ( ${_FAIL_} >"${stdoutF}" 2>"${stderrF}" )
167 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
168 |   rtrn=$?
169 |   assertTrue '_FAIL_ failure' ${rtrn}
170 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
171 | 
172 |   ( ${_FAIL_} '"some msg"' >"${stdoutF}" 2>"${stderrF}" )
173 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
174 |   rtrn=$?
175 |   assertTrue '_FAIL_ w/ msg failure' ${rtrn}
176 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
177 | }
178 | 
179 | testFailNotEquals()
180 | {
181 |   # start skipping if LINENO not available
182 |   [ -z "${LINENO:-}" ] && startSkipping
183 | 
184 |   ( ${_FAIL_NOT_EQUALS_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
185 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
186 |   rtrn=$?
187 |   assertTrue '_FAIL_NOT_EQUALS_ failure' ${rtrn}
188 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
189 | 
190 |   ( ${_FAIL_NOT_EQUALS_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
191 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
192 |   rtrn=$?
193 |   assertTrue '_FAIL_NOT_EQUALS_ w/ msg failure' ${rtrn}
194 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
195 | }
196 | 
197 | testFailSame() {
198 |   # Start skipping if LINENO not available.
199 |   [ -z "${LINENO:-}" ] && startSkipping
200 | 
201 |   ( ${_FAIL_SAME_} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
202 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
203 |   rtrn=$?
204 |   assertTrue '_FAIL_SAME_ failure' ${rtrn}
205 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
206 | 
207 |   ( ${_FAIL_SAME_} '"some msg"' 'x' 'x' >"${stdoutF}" 2>"${stderrF}" )
208 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
209 |   rtrn=$?
210 |   assertTrue '_FAIL_SAME_ w/ msg failure' ${rtrn}
211 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
212 | }
213 | 
214 | testFailNotSame() {
215 |   # Start skipping if LINENO not available.
216 |   [ -z "${LINENO:-}" ] && startSkipping
217 | 
218 |   ( ${_FAIL_NOT_SAME_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
219 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
220 |   rtrn=$?
221 |   assertTrue '_FAIL_NOT_SAME_ failure' ${rtrn}
222 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
223 | 
224 |   ( ${_FAIL_NOT_SAME_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" )
225 |   grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null
226 |   rtrn=$?
227 |   assertTrue '_FAIL_NOT_SAME_ w/ msg failure' ${rtrn}
228 |   [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2
229 | }
230 | 
231 | oneTimeSetUp() {
232 |   th_oneTimeSetUp
233 | }
234 | 
235 | # Disable output coloring as it breaks the tests.
236 | SHUNIT_COLOR='none'; export SHUNIT_COLOR
237 | 
238 | # Load and run shUnit2.
239 | # shellcheck disable=SC2034
240 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT="$0"
241 | . "${TH_SHUNIT}"
242 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/lib/versions:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # vim:et:ft=sh:sts=2:sw=2
  3 | #
  4 | # Versions determines the versions of all installed shells.
  5 | #
  6 | # Copyright 2008-2018 Kate Ward. All Rights Reserved.
  7 | # Released under the Apache 2.0 License.
  8 | #
  9 | # Author: kate.ward@forestent.com (Kate Ward)
 10 | # https://github.com/kward/shlib
 11 | #
 12 | # This library provides reusable functions that determine actual names and
 13 | # versions of installed shells and the OS. The library can also be run as a
 14 | # script if set executable.
 15 | #
 16 | # Disable checks that aren't fully portable (POSIX != portable).
 17 | # shellcheck disable=SC2006
 18 | 
 19 | ARGV0=`basename "$0"`
 20 | LSB_RELEASE='/etc/lsb-release'
 21 | VERSIONS_SHELLS='ash /bin/bash /bin/dash /bin/ksh /bin/pdksh /bin/zsh /bin/sh /usr/xpg4/bin/sh /sbin/sh'
 22 | 
 23 | true; TRUE=$?
 24 | false; FALSE=$?
 25 | ERROR=2
 26 | 
 27 | UNAME_R=`uname -r`
 28 | UNAME_S=`uname -s`
 29 | 
 30 | __versions_haveStrings=${ERROR}
 31 | 
 32 | versions_osName() {
 33 |   os_name_='unrecognized'
 34 |   os_system_=${UNAME_S}
 35 |   os_release_=${UNAME_R}
 36 |   case ${os_system_} in
 37 |     CYGWIN_NT-*) os_name_='Cygwin' ;;
 38 |     Darwin)
 39 |       os_name_=`/usr/bin/sw_vers -productName`
 40 |       os_version_=`versions_osVersion`
 41 |       case ${os_version_} in
 42 |         10.4|10.4.[0-9]*) os_name_='Mac OS X Tiger' ;;
 43 |         10.5|10.5.[0-9]*) os_name_='Mac OS X Leopard' ;;
 44 |         10.6|10.6.[0-9]*) os_name_='Mac OS X Snow Leopard' ;;
 45 |         10.7|10.7.[0-9]*) os_name_='Mac OS X Lion' ;;
 46 |         10.8|10.8.[0-9]*) os_name_='Mac OS X Mountain Lion' ;;
 47 |         10.9|10.9.[0-9]*) os_name_='Mac OS X Mavericks' ;;
 48 |         10.10|10.10.[0-9]*) os_name_='Mac OS X Yosemite' ;;
 49 |         10.11|10.11.[0-9]*) os_name_='Mac OS X El Capitan' ;;
 50 |         10.12|10.12.[0-9]*) os_name_='macOS Sierra' ;;
 51 |         10.13|10.13.[0-9]*) os_name_='macOS High Sierra' ;;
 52 |         *) os_name_='macOS' ;;
 53 |       esac
 54 |       ;;
 55 |     FreeBSD) os_name_='FreeBSD' ;;
 56 |     Linux) os_name_='Linux' ;;
 57 |     SunOS)
 58 |       os_name_='SunOS'
 59 |       if [ -r '/etc/release' ]; then
 60 |         if grep 'OpenSolaris' /etc/release >/dev/null; then
 61 |           os_name_='OpenSolaris'
 62 |         else
 63 |           os_name_='Solaris'
 64 |         fi
 65 |       fi
 66 |       ;;
 67 |   esac
 68 | 
 69 |   echo ${os_name_}
 70 |   unset os_name_ os_system_ os_release_ os_version_
 71 | }
 72 | 
 73 | versions_osVersion() {
 74 |   os_version_='unrecognized'
 75 |   os_system_=${UNAME_S}
 76 |   os_release_=${UNAME_R}
 77 |   case ${os_system_} in
 78 |     CYGWIN_NT-*)
 79 |       os_version_=`expr "${os_release_}" : '\([0-9]*\.[0-9]\.[0-9]*\).*'`
 80 |       ;;
 81 |     Darwin)
 82 |       os_version_=`/usr/bin/sw_vers -productVersion`
 83 |       ;;
 84 |     FreeBSD)
 85 |       os_version_=`expr "${os_release_}" : '\([0-9]*\.[0-9]*\)-.*'`
 86 |       ;;
 87 |     Linux)
 88 |       if [ -r '/etc/os-release' ]; then
 89 |           os_version_=`awk -F= '$1~/PRETTY_NAME/{print $2}' /etc/os-release \
 90 |             |sed 's/"//g'`
 91 |       elif [ -r '/etc/redhat-release' ]; then
 92 |         os_version_=`cat /etc/redhat-release`
 93 |       elif [ -r '/etc/SuSE-release' ]; then
 94 |         os_version_=`head -n 1 /etc/SuSE-release`
 95 |       elif [ -r "${LSB_RELEASE}" ]; then
 96 |         if grep -q 'DISTRIB_ID=Ubuntu' "${LSB_RELEASE}"; then
 97 |           # shellcheck disable=SC2002
 98 |           os_version_=`cat "${LSB_RELEASE}" \
 99 |             |awk -F= '$1~/DISTRIB_DESCRIPTION/{print $2}' \
100 |             |sed 's/"//g;s/ /-/g'`
101 |         fi
102 |       fi
103 |       ;;
104 |     SunOS)
105 |       if [ -r '/etc/release' ]; then
106 |         if grep 'OpenSolaris' /etc/release >/dev/null; then  # OpenSolaris
107 |           os_version_=`grep 'OpenSolaris' /etc/release |awk '{print $2"("$3")"}'`
108 |         else  # Solaris
109 |           major_=`echo "${os_release_}" |sed 's/[0-9]*\.\([0-9]*\)/\1/'`
110 |           minor_=`grep Solaris /etc/release |sed 's/[^u]*\(u[0-9]*\).*/\1/'`
111 |           os_version_="${major_}${minor_}"
112 |         fi
113 |       fi
114 |       ;;
115 |   esac
116 | 
117 |   echo "${os_version_}"
118 |   unset os_release_ os_system_ os_version_ major_ minor_
119 | }
120 | 
121 | versions_shellVersion() {
122 |   shell_=$1
123 | 
124 |   shell_present_=${FALSE}
125 |   case "${shell_}" in
126 |     ash) [ -x '/bin/busybox' ] && shell_present_=${TRUE} ;;
127 |     *) [ -x "${shell_}" ] && shell_present_=${TRUE} ;;
128 |   esac
129 |   if [ ${shell_present_} -eq ${FALSE} ]; then
130 |     echo 'not installed'
131 |     return ${FALSE}
132 |   fi
133 | 
134 |   version_=''
135 |   case ${shell_} in
136 |     /sbin/sh) ;; # SunOS
137 |     /usr/xpg4/bin/sh)
138 |       version_=`versions_shell_xpg4 "${shell_}"`
139 |       ;; # SunOS
140 |     */sh)
141 |       # This could be one of any number of shells. Try until one fits.
142 |       version_=''
143 |       [ -z "${version_}" ] && version_=`versions_shell_bash "${shell_}"`
144 |       # dash cannot be self determined yet
145 |       [ -z "${version_}" ] && version_=`versions_shell_ksh "${shell_}"`
146 |       # pdksh is covered in versions_shell_ksh()
147 |       [ -z "${version_}" ] && version_=`versions_shell_xpg4 "${shell_}"`
148 |       [ -z "${version_}" ] && version_=`versions_shell_zsh "${shell_}"`
149 |       ;;
150 |     ash) version_=`versions_shell_ash "${shell_}"` ;;
151 |     */bash) version_=`versions_shell_bash "${shell_}"` ;;
152 |     */dash)
153 |       # Assuming Ubuntu Linux until somebody comes up with a better test. The
154 |       # following test will return an empty string if dash is not installed.
155 |       version_=`versions_shell_dash`
156 |       ;;
157 |     */ksh) version_=`versions_shell_ksh "${shell_}"` ;;
158 |     */pdksh) version_=`versions_shell_pdksh "${shell_}"` ;;
159 |     */zsh) version_=`versions_shell_zsh "${shell_}"` ;;
160 |     *) version_='invalid'
161 |   esac
162 | 
163 |   echo "${version_:-unknown}"
164 |   unset shell_ version_
165 | }
166 | 
167 | # The ash shell is included in BusyBox.
168 | versions_shell_ash() {
169 |   busybox --help |head -1 |sed 's/BusyBox v\([0-9.]*\) .*/\1/'
170 | }
171 | 
172 | versions_shell_bash() {
173 |   $1 --version : 2>&1 |grep 'GNU bash' |sed 's/.*version \([^ ]*\).*/\1/'
174 | }
175 | 
176 | versions_shell_dash() {
177 |   eval dpkg >/dev/null 2>&1
178 |   [ $? -eq 127 ] && return  # Return if dpkg not found.
179 | 
180 |   dpkg -l |grep ' dash ' |awk '{print $3}'
181 | }
182 | 
183 | versions_shell_ksh() {
184 |   versions_shell_=$1
185 |   versions_version_=''
186 | 
187 |   # Try a few different ways to figure out the version.
188 |   versions_version_=`${versions_shell_} --version : 2>&1`
189 |   # shellcheck disable=SC2181
190 |   if [ $? -eq 0 ]; then
191 |     versions_version_=`echo "${versions_version_}" \
192 |       |sed 's/.*\([0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]\).*/\1/'`
193 |   else
194 |     versions_version_=''
195 |   fi
196 |   if [ -z "${versions_version_}" ]; then
197 |     _versions_have_strings
198 |     versions_version_=`strings "${versions_shell_}" 2>&1 \
199 |       |grep Version \
200 |       |sed 's/^.*Version \(.*\)$/\1/;s/ s+ \$$//;s/ /-/g'`
201 |   fi
202 |   if [ -z "${versions_version_}" ]; then
203 |     versions_version_=`versions_shell_pdksh "${versions_shell_}"`
204 |   fi
205 | 
206 |   echo "${versions_version_}"
207 |   unset versions_shell_ versions_version_
208 | }
209 | 
210 | versions_shell_pdksh() {
211 |   _versions_have_strings
212 |   strings "$1" 2>&1 \
213 |   |grep 'PD KSH' \
214 |   |sed -e 's/.*PD KSH \(.*\)/\1/;s/ /-/g'
215 | }
216 | 
217 | versions_shell_xpg4() {
218 |   _versions_have_strings
219 |   strings "$1" 2>&1 \
220 |   |grep 'Version' \
221 |   |sed -e 's/^@(#)Version //'
222 | }
223 | 
224 | versions_shell_zsh() {
225 |   versions_shell_=$1
226 | 
227 |   # Try a few different ways to figure out the version.
228 |   # shellcheck disable=SC2016
229 |   versions_version_=`echo 'echo ${ZSH_VERSION}' |${versions_shell_}`
230 |   if [ -z "${versions_version_}" ]; then
231 |     versions_version_=`${versions_shell_} --version : 2>&1`
232 |     # shellcheck disable=SC2181
233 |     if [ $? -eq 0 ]; then
234 |       versions_version_=`echo "${versions_version_}" |awk '{print $2}'`
235 |     else
236 |       versions_version_=''
237 |     fi
238 |   fi
239 | 
240 |   echo "${versions_version_}"
241 |   unset versions_shell_ versions_version_
242 | }
243 | 
244 | # Determine if the 'strings' binary installed.
245 | _versions_have_strings() {
246 |   [ ${__versions_haveStrings} -ne ${ERROR} ] && return
247 |   if eval strings /dev/null >/dev/null 2>&1; then
248 |     __versions_haveStrings=${TRUE}
249 |     return
250 |   fi
251 | 
252 |   echo 'WARN: strings not installed. try installing binutils?' >&2
253 |   __versions_haveStrings=${FALSE}
254 | }
255 | 
256 | versions_main() {
257 |   # Treat unset variables as an error.
258 |   set -u
259 | 
260 |   os_name=`versions_osName`
261 |   os_version=`versions_osVersion`
262 |   echo "os: ${os_name} version: ${os_version}"
263 | 
264 |   for shell in ${VERSIONS_SHELLS}; do
265 |     shell_version=`versions_shellVersion "${shell}"`
266 |     echo "shell: ${shell} version: ${shell_version}"
267 |   done
268 | }
269 | 
270 | if [ "${ARGV0}" = 'versions' ]; then
271 |   versions_main "$@"
272 | fi
273 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/shunit2_misc_test.sh:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # vim:et:ft=sh:sts=2:sw=2
  3 | #
  4 | # shUnit2 unit tests of miscellaneous things
  5 | #
  6 | # Copyright 2008-2018 Kate Ward. All Rights Reserved.
  7 | # Released under the Apache 2.0 license.
  8 | #
  9 | # Author: kate.ward@forestent.com (Kate Ward)
 10 | # https://github.com/kward/shunit2
 11 | #
 12 | ### ShellCheck http://www.shellcheck.net/
 13 | # $() are not fully portable (POSIX != portable).
 14 | #   shellcheck disable=SC2006
 15 | # Disable source following.
 16 | #   shellcheck disable=SC1090,SC1091
 17 | # Not wanting to escape single quotes.
 18 | #   shellcheck disable=SC1003
 19 | 
 20 | # These variables will be overridden by the test helpers.
 21 | stdoutF="${TMPDIR:-/tmp}/STDOUT"
 22 | stderrF="${TMPDIR:-/tmp}/STDERR"
 23 | 
 24 | # Load test helpers.
 25 | . ./shunit2_test_helpers
 26 | 
 27 | # Note: the test script is prefixed with '#' chars so that shUnit2 does not
 28 | # incorrectly interpret the embedded functions as real functions.
 29 | testUnboundVariable() {
 30 |   unittestF="${SHUNIT_TMPDIR}/unittest"
 31 |   sed 's/^#//' >"${unittestF}" <<EOF
 32 | ## Treat unset variables as an error when performing parameter expansion.
 33 | #set -u
 34 | #
 35 | #boom() { x=\$1; }  # This function goes boom if no parameters are passed!
 36 | #test_boom() {
 37 | #  assertEquals 1 1
 38 | #  boom  # No parameter given
 39 | #  assertEquals 0 \$?
 40 | #}
 41 | #SHUNIT_COLOR='none'
 42 | #. ${TH_SHUNIT}
 43 | EOF
 44 |   ( exec "${SHUNIT_SHELL:-sh}" "${unittestF}" >"${stdoutF}" 2>"${stderrF}" )
 45 |   assertFalse 'expected a non-zero exit value' $?
 46 |   grep '^ASSERT:Unknown failure' "${stdoutF}" >/dev/null
 47 |   assertTrue 'assert message was not generated' $?
 48 |   grep '^Ran [0-9]* test' "${stdoutF}" >/dev/null
 49 |   assertTrue 'test count message was not generated' $?
 50 |   grep '^FAILED' "${stdoutF}" >/dev/null
 51 |   assertTrue 'failure message was not generated' $?
 52 | }
 53 | 
 54 | # assertEquals repeats message argument.
 55 | # https://github.com/kward/shunit2/issues/7
 56 | testIssue7() {
 57 |   # Disable coloring so 'ASSERT:' lines can be matched correctly.
 58 |   _shunit_configureColor 'none'
 59 | 
 60 |   ( assertEquals 'Some message.' 1 2 >"${stdoutF}" 2>"${stderrF}" )
 61 |   diff "${stdoutF}" - >/dev/null <<EOF
 62 | ASSERT:Some message. expected:<1> but was:<2>
 63 | EOF
 64 |   rtrn=$?
 65 |   assertEquals "${SHUNIT_TRUE}" "${rtrn}"
 66 |   [ "${rtrn}" -eq "${SHUNIT_TRUE}" ] || cat "${stderrF}" >&2
 67 | }
 68 | 
 69 | # Support prefixes on test output.
 70 | # https://github.com/kward/shunit2/issues/29
 71 | testIssue29() {
 72 |   unittestF="${SHUNIT_TMPDIR}/unittest"
 73 |   sed 's/^#//' >"${unittestF}" <<EOF
 74 | ## Support test prefixes.
 75 | #test_assert() { assertTrue ${SHUNIT_TRUE}; }
 76 | #SHUNIT_COLOR='none'
 77 | #SHUNIT_TEST_PREFIX='--- '
 78 | #. ${TH_SHUNIT}
 79 | EOF
 80 |   ( exec "${SHUNIT_SHELL:-sh}" "${unittestF}" >"${stdoutF}" 2>"${stderrF}" )
 81 |   grep '^--- test_assert' "${stdoutF}" >/dev/null
 82 |   rtrn=$?
 83 |   assertEquals "${SHUNIT_TRUE}" "${rtrn}"
 84 |   [ "${rtrn}" -eq "${SHUNIT_TRUE}" ] || cat "${stdoutF}" >&2
 85 | }
 86 | 
 87 | # shUnit2 should not exit with 0 when it has syntax errors.
 88 | # https://github.com/kward/shunit2/issues/69
 89 | testIssue69() {
 90 |   unittestF="${SHUNIT_TMPDIR}/unittest"
 91 | 
 92 |   for t in Equals NotEquals Null NotNull Same NotSame True False; do
 93 |     assert="assert${t}"
 94 |     sed 's/^#//' >"${unittestF}" <<EOF
 95 | ## Asserts with invalid argument counts should be counted as failures.
 96 | #test_assert() { ${assert}; }
 97 | #SHUNIT_COLOR='none'
 98 | #. ${TH_SHUNIT}
 99 | EOF
100 |     ( exec "${SHUNIT_SHELL:-sh}" "${unittestF}" >"${stdoutF}" 2>"${stderrF}" )
101 |     grep '^FAILED' "${stdoutF}" >/dev/null
102 |     assertTrue "failure message for ${assert} was not generated" $?
103 |   done
104 | }
105 | 
106 | # Ensure that test fails if setup/teardown functions fail.
107 | testIssue77() {
108 | 	unittestF="${SHUNIT_TMPDIR}/unittest"
109 |   for func in oneTimeSetUp setUp tearDown oneTimeTearDown; do
110 |     sed 's/^#//' >"${unittestF}" <<EOF
111 | ## Environment failure should end test.
112 | #${func}() { return ${SHUNIT_FALSE}; }
113 | #test_true() { assertTrue ${SHUNIT_TRUE}; }
114 | #SHUNIT_COLOR='none'
115 | #. ${TH_SHUNIT}
116 | EOF
117 |     ( exec "${SHUNIT_SHELL:-sh}" "${unittestF}" >"${stdoutF}" 2>"${stderrF}" )
118 |     grep '^FAILED' "${stdoutF}" >/dev/null
119 |     assertTrue "failure of ${func}() did not end test" $?
120 |   done
121 | }
122 | 
123 | # Ensure a test failure is recorded for code containing syntax errors.
124 | # https://github.com/kward/shunit2/issues/84
125 | testIssue84() {
126 |   unittestF="${SHUNIT_TMPDIR}/unittest"
127 |   sed 's/^#//' >"${unittestF}" <<\EOF
128 | ## Function with syntax error.
129 | #syntax_error() { ${!#3442} -334 a$@2[1]; }
130 | #test_syntax_error() {
131 | #  syntax_error
132 | #  assertTrue ${SHUNIT_TRUE}
133 | #}
134 | #SHUNIT_COLOR='none'
135 | #SHUNIT_TEST_PREFIX='--- '
136 | #. ${TH_SHUNIT}
137 | EOF
138 |   ( exec "${SHUNIT_SHELL:-sh}" "${unittestF}" >"${stdoutF}" 2>"${stderrF}" )
139 |   grep '^FAILED' "${stdoutF}" >/dev/null
140 |   assertTrue "failure message for ${assert} was not generated" $?
141 | }
142 | 
143 | testPrepForSourcing() {
144 |   assertEquals '/abc' "`_shunit_prepForSourcing '/abc'`"
145 |   assertEquals './abc' "`_shunit_prepForSourcing './abc'`"
146 |   assertEquals './abc' "`_shunit_prepForSourcing 'abc'`"
147 | }
148 | 
149 | testEscapeCharInStr() {
150 |   while read -r desc char str want; do
151 |     got=`_shunit_escapeCharInStr "${char}" "${str}"`
152 |     assertEquals "${desc}" "${want}" "${got}"
153 |   done <<'EOF'
154 | backslash      \ ''       ''
155 | backslash_pre  \ \def     \\def
156 | backslash_mid  \ abc\def  abc\\def
157 | backslash_post \ abc\     abc\\
158 | quote          " ''       ''
159 | quote_pre      " "def     \"def
160 | quote_mid      " abc"def  abc\"def
161 | quote_post     " abc"     abc\"
162 | string         $ ''       ''
163 | string_pre     $ $def     \$def
164 | string_mid     $ abc$def  abc\$def
165 | string_post    $ abc$     abc\$
166 | EOF
167 | 
168 |   # TODO(20170924:kward) fix or remove.
169 | #  actual=`_shunit_escapeCharInStr "'" ''`
170 | #  assertEquals '' "${actual}"
171 | #  assertEquals "abc\\'" `_shunit_escapeCharInStr "'" "abc'"`
172 | #  assertEquals "abc\\'def" `_shunit_escapeCharInStr "'" "abc'def"`
173 | #  assertEquals "\\'def" `_shunit_escapeCharInStr "'" "'def"`
174 | 
175 | #  # Must put the backtick in a variable so the shell doesn't misinterpret it
176 | #  # while inside a backticked sequence (e.g. `echo '`'` would fail).
177 | #  backtick='`'
178 | #  actual=`_shunit_escapeCharInStr ${backtick} ''`
179 | #  assertEquals '' "${actual}"
180 | #  assertEquals '\`abc' \
181 | #      `_shunit_escapeCharInStr "${backtick}" ${backtick}'abc'`
182 | #  assertEquals 'abc\`' \
183 | #      `_shunit_escapeCharInStr "${backtick}" 'abc'${backtick}`
184 | #  assertEquals 'abc\`def' \
185 | #      `_shunit_escapeCharInStr "${backtick}" 'abc'${backtick}'def'`
186 | }
187 | 
188 | testEscapeCharInStr_specialChars() {
189 |   # Make sure our forward slash doesn't upset sed.
190 |   assertEquals '/' "`_shunit_escapeCharInStr '\' '/'`"
191 | 
192 |   # Some shells escape these differently.
193 |   # TODO(20170924:kward) fix or remove.
194 |   #assertEquals '\\a' `_shunit_escapeCharInStr '\' '\a'`
195 |   #assertEquals '\\b' `_shunit_escapeCharInStr '\' '\b'`
196 | }
197 | 
198 | # Test the various ways of declaring functions.
199 | #
200 | # Prefixing (then stripping) with comment symbol so these functions aren't
201 | # treated as real functions by shUnit2.
202 | testExtractTestFunctions() {
203 |   f="${SHUNIT_TMPDIR}/extract_test_functions"
204 |   sed 's/^#//' <<EOF >"${f}"
205 | ## Function on a single line.
206 | #testABC() { echo 'ABC'; }
207 | ## Multi-line function with '{' on next line.
208 | #test_def()
209 | # {
210 | #  echo 'def'
211 | #}
212 | ## Multi-line function with '{' on first line.
213 | #testG3 () {
214 | #  echo 'G3'
215 | #}
216 | ## Function with numerical values in name.
217 | #function test4() { echo '4'; }
218 | ## Leading space in front of function.
219 | #	test5() { echo '5'; }
220 | ## Function with '_' chars in name.
221 | #some_test_function() { echo 'some func'; }
222 | ## Function that sets variables.
223 | #func_with_test_vars() {
224 | #  testVariable=1234
225 | #}
226 | EOF
227 | 
228 |   actual=`_shunit_extractTestFunctions "${f}"`
229 |   assertEquals 'testABC test_def testG3 test4 test5' "${actual}"
230 | }
231 | 
232 | # Test that certain external commands sometimes "stubbed" by users
233 | # are escaped. See Issue #54.
234 | testProtectedCommands() {
235 |   for c in mkdir rm cat chmod; do
236 |     grep "^[^#]*${c} " "${TH_SHUNIT}" | grep -qv "command ${c}"
237 |     assertFalse "external call to ${c} not protected somewhere" $?
238 |   done
239 |   grep '^[^#]*[^ ]  *\[' "${TH_SHUNIT}" | grep -qv 'command \['
240 |   assertFalse "call to [ ... ] not protected somewhere" $?
241 |   grep '^[^#]*  *\.' "${TH_SHUNIT}" | grep -qv 'command \.'
242 |   assertFalse "call to . not protected somewhere" $?
243 | }
244 | 
245 | setUp() {
246 |   for f in "${stdoutF}" "${stderrF}"; do
247 |     cp /dev/null "${f}"
248 |   done
249 | 
250 |   # Reconfigure coloring as some tests override default behavior.
251 |   _shunit_configureColor "${SHUNIT_COLOR_DEFAULT}"
252 | }
253 | 
254 | oneTimeSetUp() {
255 |   SHUNIT_COLOR_DEFAULT="${SHUNIT_COLOR}"
256 |   th_oneTimeSetUp
257 | }
258 | 
259 | # Load and run shUnit2.
260 | # shellcheck disable=SC2034
261 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT=$0
262 | . "${TH_SHUNIT}"
263 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/dev/common_install.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | ## Installer script suitable for osync / obackup / pmocr
  4 | 
  5 | PROGRAM=[prgname]
  6 | 
  7 | PROGRAM_VERSION=$(grep "PROGRAM_VERSION=" $PROGRAM.sh)
  8 | PROGRAM_VERSION=${PROGRAM_VERSION#*=}
  9 | PROGRAM_BINARY=$PROGRAM".sh"
 10 | PROGRAM_BATCH=$PROGRAM"-batch.sh"
 11 | SSH_FILTER="ssh_filter.sh"
 12 | 
 13 | SCRIPT_BUILD=2020112901
 14 | INSTANCE_ID="installer-$SCRIPT_BUILD"
 15 | 
 16 | ## osync / obackup / pmocr / zsnap install script
 17 | ## Tested on RHEL / CentOS 6 & 7, Fedora 23, Debian 7 & 8, Mint 17 and FreeBSD 8, 10 and 11
 18 | ## Please adapt this to fit your distro needs
 19 | 
 20 | include #### OFUNCTIONS MICRO SUBSET ####
 21 | 
 22 | # Get current install.sh path from http://stackoverflow.com/a/246128/2635443
 23 | SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 24 | 
 25 | _LOGGER_SILENT=false
 26 | _STATS=1
 27 | ACTION="install"
 28 | FAKEROOT=""
 29 | 
 30 | ## Default log file
 31 | if [ -w "$FAKEROOT/var/log" ]; then
 32 | 	LOG_FILE="$FAKEROOT/var/log/$PROGRAM-install.log"
 33 | elif ([ "$HOME" != "" ] && [ -w "$HOME" ]); then
 34 | 	LOG_FILE="$HOME/$PROGRAM-install.log"
 35 | else
 36 | 	LOG_FILE="./$PROGRAM-install.log"
 37 | fi
 38 | 
 39 | include #### UrlEncode SUBSET ####
 40 | include #### GetLocalOS SUBSET ####
 41 | include #### GetConfFileValue SUBSET ####
 42 | include #### CleanUp SUBSET ####
 43 | include #### GenericTrapQuit SUBSET ####
 44 | 
 45 | function SetLocalOSSettings {
 46 | 	USER=root
 47 | 	DO_INIT=true
 48 | 
 49 | 	# LOCAL_OS and LOCAL_OS_FULL are global variables set at GetLocalOS
 50 | 
 51 | 	case $LOCAL_OS in
 52 | 		*"BSD"*)
 53 | 		GROUP=wheel
 54 | 		;;
 55 | 		*"MacOSX"*)
 56 | 		GROUP=admin
 57 | 		DO_INIT=false
 58 | 		;;
 59 | 		*"Cygwin"*|*"Android"*|*"msys"*|*"BusyBox"*)
 60 | 		USER=""
 61 | 		GROUP=""
 62 | 		DO_INIT=false
 63 | 		;;
 64 | 		*)
 65 | 		GROUP=root
 66 | 		;;
 67 | 	esac
 68 | 
 69 | 	if [ "$LOCAL_OS" == "Android" ] || [ "$LOCAL_OS" == "BusyBox" ]; then
 70 | 		Logger "Cannot be installed on [$LOCAL_OS]. Please use $PROGRAM.sh directly." "CRITICAL"
 71 | 		exit 1
 72 | 	fi
 73 | 
 74 | 	if ([ "$USER" != "" ] && [ "$(whoami)" != "$USER" ] && [ "$FAKEROOT" == "" ]); then
 75 | 		Logger "Must be run as $USER." "CRITICAL"
 76 | 		exit 1
 77 | 	fi
 78 | 
 79 | 	OS=$(UrlEncode "$LOCAL_OS_FULL")
 80 | }
 81 | 
 82 | function GetInit {
 83 | 	if [ -f /sbin/openrc-run ]; then
 84 | 		init="openrc"
 85 | 		Logger "Detected openrc." "NOTICE"
 86 | 	elif [ -f /sbin/init ]; then
 87 | 		if file /sbin/init | grep systemd > /dev/null; then
 88 | 			init="systemd"
 89 | 			Logger "Detected systemd." "NOTICE"
 90 | 		else
 91 | 			init="initV"
 92 | 			Logger "Detected initV." "NOTICE"
 93 | 		fi
 94 | 	else
 95 | 		Logger "Can't detect initV, systemd or openRC. Service files won't be installed. You can still run $PROGRAM manually or via cron." "WARN"
 96 | 		init="none"
 97 | 	fi
 98 | }
 99 | 
100 | function CreateDir {
101 | 	local dir="${1}"
102 | 	local dirMask="${2}"
103 | 	local dirUser="${3}"
104 | 	local dirGroup="${4}"
105 | 
106 | 	if [ ! -d "$dir" ]; then
107 | 		(
108 | 		if [ $(IsInteger $dirMask) -eq 1 ]; then
109 | 			umask $dirMask
110 | 		fi
111 | 		mkdir -p "$dir"
112 | 		)
113 | 		if [ $? == 0 ]; then
114 | 			Logger "Created directory [$dir]." "NOTICE"
115 | 		else
116 | 			Logger "Cannot create directory [$dir]." "CRITICAL"
117 | 			exit 1
118 | 		fi
119 | 	fi
120 | 
121 | 	if [ "$dirUser" != "" ]; then
122 | 		userGroup="$dirUser"
123 | 		if [ "$dirGroup" != "" ]; then
124 | 			userGroup="$userGroup"":$dirGroup"
125 | 		fi
126 | 		chown "$userGroup" "$dir"
127 | 		if [ $? != 0 ]; then
128 | 			Logger "Could not set directory ownership on [$dir] to [$userGroup]." "CRITICAL"
129 | 			exit 1
130 | 		else
131 | 			Logger "Set file ownership on [$dir] to [$userGroup]." "NOTICE"
132 | 		fi
133 | 	fi
134 | }
135 | 
136 | function CopyFile {
137 | 	local sourcePath="${1}"
138 | 	local destPath="${2}"
139 | 	local sourceFileName="${3}"
140 | 	local destFileName="${4}"
141 | 	local fileMod="${5}"
142 | 	local fileUser="${6}"
143 | 	local fileGroup="${7}"
144 | 	local overwrite="${8:-false}"
145 | 
146 | 	local userGroup=""
147 | 
148 | 	if [ "$destFileName" == "" ]; then
149 | 		destFileName="$sourceFileName"
150 | 	fi
151 | 
152 | 	if [ -f "$destPath/$destFileName" ] && [ $overwrite == false ]; then
153 | 		destFileName="$sourceFileName.new"
154 | 		Logger "Copying [$sourceFileName] to [$destPath/$destFileName]." "NOTICE"
155 | 	fi
156 | 
157 | 	cp "$sourcePath/$sourceFileName" "$destPath/$destFileName"
158 | 	if [ $? != 0 ]; then
159 | 		Logger "Cannot copy [$sourcePath/$sourceFileName] to [$destPath/$destFileName]. Make sure to run install script in the directory containing all other files." "CRITICAL"
160 | 		Logger "Also make sure you have permissions to write to [$BIN_DIR]." "ERROR"
161 | 		exit 1
162 | 	else
163 | 		Logger "Copied [$sourcePath/$sourceFileName] to [$destPath/$destFileName]." "NOTICE"
164 | 		if [ "$(IsInteger $fileMod)" -eq 1 ]; then
165 | 			chmod "$fileMod" "$destPath/$destFileName"
166 | 			if [ $? != 0 ]; then
167 | 				Logger "Cannot set file permissions of [$destPath/$destFileName] to [$fileMod]." "CRITICAL"
168 | 				exit 1
169 | 			else
170 | 				Logger "Set file permissions to [$fileMod] on [$destPath/$destFileName]." "NOTICE"
171 | 			fi
172 | 		elif [ "$fileMod" != "" ]; then
173 | 			Logger "Bogus filemod [$fileMod] for [$destPath] given." "WARN"
174 | 		fi
175 | 
176 | 		if [ "$fileUser" != "" ]; then
177 | 			userGroup="$fileUser"
178 | 
179 | 			if [ "$fileGroup" != "" ]; then
180 | 				userGroup="$userGroup"":$fileGroup"
181 | 			fi
182 | 
183 | 			chown "$userGroup" "$destPath/$destFileName"
184 | 			if [ $? != 0 ]; then
185 | 				Logger "Could not set file ownership on [$destPath/$destFileName] to [$userGroup]." "CRITICAL"
186 | 				exit 1
187 | 			else
188 | 				Logger "Set file ownership on [$destPath/$destFileName] to [$userGroup]." "NOTICE"
189 | 			fi
190 | 		fi
191 | 	fi
192 | }
193 | 
194 | function CopyExampleFiles {
195 | 	exampleFiles=()
196 | 	exampleFiles[0]="sync.conf.example"		# osync
197 | 	exampleFiles[1]="host_backup.conf.example"	# obackup
198 | 	exampleFiles[2]="exclude.list.example"		# osync & obackup
199 | 	exampleFiles[3]="snapshot.conf.example"		# zsnap
200 | 	exampleFiles[4]="default.conf"			# pmocr
201 | 
202 | 	for file in "${exampleFiles[@]}"; do
203 | 		if [ -f "$SCRIPT_PATH/$file" ]; then
204 | 			CopyFile "$SCRIPT_PATH" "$CONF_DIR" "$file" "$file" "" "" "" false
205 | 		fi
206 | 	done
207 | }
208 | 
209 | function CopyProgram {
210 | 	binFiles=()
211 | 	binFiles[0]="$PROGRAM_BINARY"
212 | 	if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "obackup" ]; then
213 | 		binFiles[1]="$PROGRAM_BATCH"
214 | 		binFiles[2]="$SSH_FILTER"
215 | 	fi
216 | 
217 | 	local user=""
218 | 	local group=""
219 | 
220 | 	if ([ "$USER" != "" ] && [ "$FAKEROOT" == "" ]); then
221 | 		user="$USER"
222 | 	fi
223 | 	if ([ "$GROUP" != "" ] && [ "$FAKEROOT" == "" ]); then
224 | 		group="$GROUP"
225 | 	fi
226 | 
227 | 	for file in "${binFiles[@]}"; do
228 | 		CopyFile "$SCRIPT_PATH" "$BIN_DIR" "$file" "$file" 755 "$user" "$group" true
229 | 	done
230 | }
231 | 
232 | function CopyServiceFiles {
233 | 	if ([ "$init" == "systemd" ] && [ -f "$SCRIPT_PATH/$SERVICE_FILE_SYSTEMD_SYSTEM" ]); then
234 | 		CreateDir "$SERVICE_DIR_SYSTEMD_SYSTEM"
235 | 		CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_SYSTEM" "$SERVICE_FILE_SYSTEMD_SYSTEM" "$SERVICE_FILE_SYSTEMD_SYSTEM" "" "" "" true
236 | 		if [ -f "$SCRIPT_PATH/$SERVICE_FILE_SYSTEMD_USER" ]; then
237 | 			CreateDir "$SERVICE_DIR_SYSTEMD_USER"
238 | 			CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_USER" "$SERVICE_FILE_SYSTEMD_USER" "$SERVICE_FILE_SYSTEMD_USER" "" "" "" true
239 | 		fi
240 | 
241 | 		if [ -f "$SCRIPT_PATH/$TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM" ]; then
242 | 			CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_SYSTEM" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM" "" "" "" true
243 | 			Logger "Created optional service [$TARGET_HELPER_SERVICE_NAME] with same specifications as below." "NOTICE"
244 | 		fi
245 | 		if [ -f "$SCRIPT_PATH/$TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER" ]; then
246 | 			CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_USER" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER" "" "" "" true
247 | 		fi
248 | 
249 | 
250 | 		Logger "Created [$SERVICE_NAME] service in [$SERVICE_DIR_SYSTEMD_SYSTEM] and [$SERVICE_DIR_SYSTEMD_USER]." "NOTICE"
251 | 		Logger "Can be activated with [systemctl start SERVICE_NAME@instance.conf] where instance.conf is the name of the config file in $CONF_DIR." "NOTICE"
252 | 		Logger "Can be enabled on boot with [systemctl enable $SERVICE_NAME@instance.conf]." "NOTICE"
253 | 		Logger "In userland, active with [systemctl --user start $SERVICE_NAME@instance.conf]." "NOTICE"
254 | 	elif ([ "$init" == "initV" ] && [ -f "$SCRIPT_PATH/$SERVICE_FILE_INIT" ] && [ -d "$SERVICE_DIR_INIT" ]); then
255 | 		#CreateDir "$SERVICE_DIR_INIT"
256 | 		CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_INIT" "$SERVICE_FILE_INIT" "$SERVICE_FILE_INIT" "755" "" "" true
257 | 		if [ -f "$SCRIPT_PATH/$TARGET_HELPER_SERVICE_FILE_INIT" ]; then
258 | 			CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_INIT" "$TARGET_HELPER_SERVICE_FILE_INIT" "$TARGET_HELPER_SERVICE_FILE_INIT" "755" "" "" true
259 | 			Logger "Created optional service [$TARGET_HELPER_SERVICE_NAME] with same specifications as below." "NOTICE"
260 | 		fi
261 | 		Logger "Created [$SERVICE_NAME] service in [$SERVICE_DIR_INIT]." "NOTICE"
262 | 		Logger "Can be activated with [service $SERVICE_FILE_INIT start]." "NOTICE"
263 | 		Logger "Can be enabled on boot with [chkconfig $SERVICE_FILE_INIT on]." "NOTICE"
264 | 	elif ([ "$init" == "openrc" ] && [ -f "$SCRIPT_PATH/$SERVICE_FILE_OPENRC" ] && [ -d "$SERVICE_DIR_OPENRC" ]); then
265 | 		# Rename service to usual service file
266 | 		CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_OPENRC" "$SERVICE_FILE_OPENRC" "$SERVICE_FILE_INIT" "755" "" "" true
267 | 		if [ -f "$SCRPT_PATH/$TARGET_HELPER_SERVICE_FILE_OPENRC" ]; then
268 | 			CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_OPENRC" "$TARGET_HELPER_SERVICE_FILE_OPENRC" "$TARGET_HELPER_SERVICE_FILE_OPENRC" "755" "" "" true
269 | 			Logger "Created optional service [$TARGET_HELPER_SERVICE_NAME] with same specifications as below." "NOTICE"
270 | 		fi
271 | 		Logger "Created [$SERVICE_NAME] service in [$SERVICE_DIR_OPENRC]." "NOTICE"
272 | 		Logger "Can be activated with [rc-update add $SERVICE_NAME.instance] where instance is a configuration file found in /etc/osync." "NOTICE"
273 | 	else
274 | 		Logger "Cannot properly find how to deal with init on this system. Skipping service file installation." "NOTICE"
275 | 	fi
276 | }
277 | 
278 | function Statistics {
279 | 	if type wget > /dev/null 2>&1; then
280 | 		wget -qO- "$STATS_LINK" > /dev/null 2>&1
281 | 		if [ $? == 0 ]; then
282 | 			return 0
283 | 		fi
284 | 	fi
285 | 
286 | 	if type curl > /dev/null 2>&1; then
287 | 		curl "$STATS_LINK" -o /dev/null > /dev/null 2>&1
288 | 		if [ $? == 0 ]; then
289 | 			return 0
290 | 		fi
291 | 	fi
292 | 
293 | 	Logger "Neiter wget nor curl could be used for. Cannot run statistics. Use the provided link please." "WARN"
294 | 	return 1
295 | }
296 | 
297 | function RemoveFile {
298 | 	local file="${1}"
299 | 
300 | 	if [ -f "$file" ]; then
301 | 		rm -f "$file"
302 | 		if [ $? != 0 ]; then
303 | 			Logger "Could not remove file [$file]." "ERROR"
304 | 		else
305 | 			Logger "Removed file [$file]." "NOTICE"
306 | 		fi
307 | 	else
308 | 		Logger "File [$file] not found. Skipping." "NOTICE"
309 | 	fi
310 | }
311 | 
312 | function RemoveAll {
313 | 	RemoveFile "$BIN_DIR/$PROGRAM_BINARY"
314 | 
315 | 	if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "obackup" ]; then
316 | 		RemoveFile "$BIN_DIR/$PROGRAM_BATCH"
317 | 	fi
318 | 
319 | 	if [ ! -f "$BIN_DIR/osync.sh" ] && [ ! -f "$BIN_DIR/obackup.sh" ]; then		# Check if any other program requiring ssh filter is present before removal
320 | 		RemoveFile "$BIN_DIR/$SSH_FILTER"
321 | 	else
322 | 		Logger "Skipping removal of [$BIN_DIR/$SSH_FILTER] because other programs present that need it." "NOTICE"
323 | 	fi
324 | 	RemoveFile "$SERVICE_DIR_SYSTEMD_SYSTEM/$SERVICE_FILE_SYSTEMD_SYSTEM"
325 | 	RemoveFile "$SERVICE_DIR_SYSTEMD_USER/$SERVICE_FILE_SYSTEMD_USER"
326 | 	RemoveFile "$SERVICE_DIR_INIT/$SERVICE_FILE_INIT"
327 | 
328 | 	RemoveFile "$TARGET_HELPER_SERVICE_DIR_SYSTEMD_SYSTEM/$SERVICE_FILE_SYSTEMD_SYSTEM"
329 | 	RemoveFile "$TARGET_HELPER_SERVICE_DIR_SYSTEMD_USER/$SERVICE_FILE_SYSTEMD_USER"
330 | 	RemoveFile "$TARGET_HELPER_SERVICE_DIR_INIT/$SERVICE_FILE_INIT"
331 | 
332 | 	Logger "Skipping configuration files in [$CONF_DIR]. You may remove this directory manually." "NOTICE"
333 | }
334 | 
335 | function Usage {
336 | 	echo "Installs $PROGRAM into $BIN_DIR"
337 | 	echo "options:"
338 | 	echo "--silent		Will log and bypass user interaction."
339 | 	echo "--no-stats	Used with --silent in order to refuse sending anonymous install stats."
340 | 	echo "--remove          Remove the program."
341 | 	echo "--prefix=/path    Use prefix to install path."
342 | 	exit 127
343 | }
344 | 
345 | ############################## Script entry point
346 | 
347 | function GetCommandlineArguments {
348 |         for i in "$@"; do
349 |                 case $i in
350 | 			--prefix=*)
351 |                         FAKEROOT="${i##*=}"
352 |                         ;;
353 | 			--silent)
354 | 			_LOGGER_SILENT=true
355 | 			;;
356 | 			--no-stats)
357 | 			_STATS=0
358 | 			;;
359 | 			--remove)
360 | 			ACTION="uninstall"
361 | 			;;
362 | 			--help|-h|-?)
363 | 			Usage
364 | 			;;
365 |                         *)
366 | 			Logger "Unknown option '$i'" "ERROR"
367 | 			Usage
368 | 			exit
369 |                         ;;
370 |                 esac
371 | 	done
372 | }
373 | 
374 | GetCommandlineArguments "$@"
375 | 
376 | CONF_DIR=$FAKEROOT/etc/$PROGRAM
377 | BIN_DIR="$FAKEROOT/usr/local/bin"
378 | SERVICE_DIR_INIT=$FAKEROOT/etc/init.d
379 | # Should be /usr/lib/systemd/system, but /lib/systemd/system exists on debian & rhel / fedora
380 | SERVICE_DIR_SYSTEMD_SYSTEM=$FAKEROOT/lib/systemd/system
381 | SERVICE_DIR_SYSTEMD_USER=$FAKEROOT/etc/systemd/user
382 | SERVICE_DIR_OPENRC=$FAKEROOT/etc/init.d
383 | 
384 | if [ "$PROGRAM" == "osync" ]; then
385 | 	SERVICE_NAME="osync-srv"
386 | 	TARGET_HELPER_SERVICE_NAME="osync-target-helper-srv"
387 | 
388 | 	TARGET_HELPER_SERVICE_FILE_INIT="$TARGET_HELPER_SERVICE_NAME"
389 | 	TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM="$TARGET_HELPER_SERVICE_NAME@.service"
390 | 	TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER="$TARGET_HELPER_SERVICE_NAME@.service.user"
391 | 	TARGET_HELPER_SERVICE_FILE_OPENRC="$TARGET_HELPER_SERVICE_NAME-openrc"
392 | elif [ "$PROGRAM" == "pmocr" ]; then
393 | 	SERVICE_NAME="pmocr-srv"
394 | fi
395 | 
396 | SERVICE_FILE_INIT="$SERVICE_NAME"
397 | SERVICE_FILE_SYSTEMD_SYSTEM="$SERVICE_NAME@.service"
398 | SERVICE_FILE_SYSTEMD_USER="$SERVICE_NAME@.service.user"
399 | SERVICE_FILE_OPENRC="$SERVICE_NAME-openrc"
400 | 
401 | ## Generic code
402 | 
403 | trap GenericTrapQuit TERM EXIT HUP QUIT
404 | 
405 | if [ ! -w "$(dirname $LOG_FILE)" ]; then
406 |         echo "Cannot write to log [$(dirname $LOG_FILE)]."
407 | else
408 |         Logger "Script begin, logging to [$LOG_FILE]." "DEBUG"
409 | fi
410 | 
411 | # Set default umask
412 | umask 0022
413 | 
414 | GetLocalOS
415 | SetLocalOSSettings
416 | # On Mac OS this always produces a warning which causes the installer to fail with exit code 2
417 | # Since we know it won't work anyway, and that's fine, just skip this step
418 | if $DO_INIT; then
419 | 	GetInit
420 | fi
421 | 
422 | STATS_LINK="http://instcount.netpower.fr?program=$PROGRAM&version=$PROGRAM_VERSION&os=$OS&action=$ACTION"
423 | 
424 | if [ "$ACTION" == "uninstall" ]; then
425 | 	RemoveAll
426 | 	Logger "$PROGRAM uninstalled." "NOTICE"
427 | else
428 | 	CreateDir "$CONF_DIR"
429 | 	CreateDir "$BIN_DIR"
430 | 	CopyExampleFiles
431 | 	CopyProgram
432 | 	if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "pmocr" ]; then
433 | 		CopyServiceFiles
434 | 	fi
435 | 	Logger "$PROGRAM installed. Use with $BIN_DIR/$PROGRAM_BINARY" "NOTICE"
436 | 	if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "obackup" ]; then
437 | 		echo ""
438 | 		Logger "If connecting remotely, consider setup ssh filter to enhance security." "NOTICE"
439 | 		echo ""
440 | 	fi
441 | fi
442 | 
443 | if [ $_STATS -eq 1 ]; then
444 | 	if [ $_LOGGER_SILENT == true ]; then
445 | 		Statistics
446 | 	else
447 | 		Logger "In order to make usage statistics, the script would like to connect to $STATS_LINK" "NOTICE"
448 | 		read -r -p "No data except those in the url will be send. Allow [Y/n] " response
449 | 		case $response in
450 | 			[nN])
451 | 			exit
452 | 			;;
453 | 			*)
454 | 			Statistics
455 | 			exit $?
456 | 			;;
457 | 		esac
458 | 	fi
459 | fi
460 | 


--------------------------------------------------------------------------------
/dev/tests/run_tests.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # pmocr test suite 2018032501
  4 | 
  5 | PMOCR_DIR="$(pwd)"
  6 | PMOCR_DIR=${PMOCR_DIR%%/dev*}
  7 | DEV_DIR="$PMOCR_DIR/dev"
  8 | TESTS_DIR="$DEV_DIR/tests"
  9 | SOURCE_DIR="$TESTS_DIR/source"
 10 | CONF_DIR="$TESTS_DIR/conf"
 11 | 
 12 | TMP_FILE="$DEV_DIR/tmp"
 13 | 
 14 | BATCH_CONF="default.conf"
 15 | SERVICE_CONF="service.conf"
 16 | 
 17 | PMOCR_EXECUTABLE="pmocr.sh"
 18 | PMOCR_DEV_EXECUTABLE="dev/n_pmocr.sh"
 19 | 
 20 | PMOCR_TESTS_DIR="${HOME}/pmocr-tests"
 21 | 
 22 | BATCH_DIR="batch"
 23 | SERVICE_DIR="service"
 24 | SUCCEED_DIR="succesful"
 25 | FAILURE_DIR="failed"
 26 | 
 27 | PDF_DIR="PDF"
 28 | TXT_DIR="TEXT"
 29 | CSV_DIR="CSV"
 30 | 
 31 | SOURCE_FILE_1="lorem_tif.tif"
 32 | SOURCE_FILE_2="lorem_png.png"
 33 | SOURCE_FILE_3="lorem_pdf.pdf"
 34 | SOURCE_FILE_4="lorem_searchable_pdf.pdf"
 35 | 
 36 | # Force killing remaining services on aborted test runs
 37 | 
 38 | #trap TrapQuit TERM EXIT HUP QUIT
 39 | 
 40 | function TrapQuit {
 41 |         local result
 42 | 
 43 |         if [ -f "$SERVICE_MONITOR_FILE" ]; then
 44 |                 rm -f "$SERVICE_MONITOR_FILE"
 45 |         fi
 46 | 
 47 |         CleanUp
 48 |         KillChilds $$ > /dev/null 2>&1
 49 |         result=$?
 50 |         if [ $result -eq 0 ]; then
 51 |                 Logger "Service $PROGRAM stopped instance [$INSTANCE_ID] with pid [$$]." "NOTICE"
 52 |         else
 53 |             	Logger "Service $PROGRAM couldn't properly stop instance [$INSTANCE_ID] with pid [$$]." "ERROR"
 54 |         fi
 55 | 	exit $?
 56 | }
 57 | 
 58 | function PrepareLocalDirs () {
 59 | 	# Remote dirs are the same as local dirs, so no problem here
 60 | 	if [ -d "$PMOCR_TESTS_DIR" ]; then
 61 | 		rm -rf "$PMOCR_TESTS_DIR"
 62 | 	fi
 63 | 	mkdir -p "$PMOCR_TESTS_DIR"
 64 | 	mkdir "$PMOCR_TESTS_DIR/$BATCH_DIR"
 65 | 	mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR"
 66 | 	mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
 67 | 	mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR"
 68 | 	mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR"
 69 | 	mkdir "$PMOCR_TESTS_DIR/$SUCCEED_DIR"
 70 | 	mkdir "$PMOCR_TESTS_DIR/$FAILURE_DIR"
 71 | }
 72 | 
 73 | function CopyTestFiles () {
 74 | 	cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$BATCH_DIR"
 75 | 	cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$BATCH_DIR"
 76 | 	cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$BATCH_DIR"
 77 | 	cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$BATCH_DIR"
 78 | 
 79 | 	cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
 80 | 	cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
 81 | 	cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
 82 | 	cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
 83 | 
 84 | 	cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR"
 85 | 	cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR"
 86 | 	cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR"
 87 | 	cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR"
 88 | 
 89 | 	cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR"
 90 | 	cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR"
 91 | 	cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR"
 92 | 	cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR"
 93 | }
 94 | 
 95 | function oneTimeSetUp () {
 96 | 	START_TIME=$SECONDS
 97 | 
 98 | 	source "$DEV_DIR/ofunctions.sh"
 99 | 
100 | 	# set default umask
101 | 	umask 0022
102 | 
103 | 	GetLocalOS
104 | 
105 | 	echo "Running on $LOCAL_OS_FULL"
106 | 
107 | 	echo ""
108 | 	echo -e "Running tesseract\n$(tesseract -v)"
109 | 
110 | 	#TODO: Assuming that macos has the same syntax than bsd here
111 |         if [ "$LOCAL_OS" == "msys" ] || [ "$LOCAL_OS" == "Cygwin" ]; then
112 |                 SUDO_CMD=""
113 |         elif [ "$LOCAL_OS" == "BSD" ] || [ "$LOCAL_OS" == "MacOSX" ]; then
114 |                 SUDO_CMD=""
115 |         else
116 |                 SUDO_CMD="sudo"
117 |         fi
118 | 
119 | 	# Getting tesseract version
120 |         TESSERACT_VERSION=$(tesseract -v 2>&1 | head -n 1 | awk '{print $2}')
121 | 	echo "Running with tesseract $TESSERACT_VERSION"
122 | 
123 | 	# Clean run and log files
124 | 	if [ -f /var/log/pmocr.log ]; then
125 | 		rm -f /var/log/pmocr.log
126 | 	fi
127 | 
128 | 	rm -f /tmp/pmocr.*
129 | 
130 | 	SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_SUCCESS" ""
131 | 	SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_FAILURE" ""
132 | }
133 | 
134 | function oneTimeTearDown () {
135 | 
136 | 	#TODO: uncomment this when dev is done
137 | 	#rm -rf "$PMOCR_TESTS_DIR"
138 | 
139 | 	cd "$OSYNC_DIR"
140 |         $SUDO_CMD ./install.sh --remove --no-stats
141 |         assertEquals "Uninstall failed" "0" $?
142 | 
143 | 
144 | 	ELAPSED_TIME=$(($SECONDS - $START_TIME))
145 | 	echo "It took $ELAPSED_TIME seconds to run these tests."
146 | }
147 | 
148 | #function setUp () {
149 | #}
150 | 
151 | # This test has to be done everytime in order for main executable to be fresh
152 | function test_Merge () {
153 | 	cd "$DEV_DIR"
154 | 	./merge.sh pmocr
155 | 	assertEquals "Merging code" "0" $?
156 | 
157 | 	cd "$PMOCR_DIR"
158 |         $SUDO_CMD ./install.sh --no-stats
159 |         assertEquals "Install failed" "0" $?
160 | 
161 | 	# Overwrite standard config file with tesseract one
162 | 	#$SUDO_CMD cp -f "$CONF_DIR/$BATCH_CONF" /etc/default/default.conf
163 | }
164 | 
165 | function test_batch () {
166 | 	local outputFile
167 | 
168 | 	cd "$PMOCR_DIR"
169 | 
170 |         # Testing batch output for formats pdf, txt and csv
171 |         # Don't test for pdf output if tesseract version is lower than 3.03
172 |         if [ $(VerComp "$TESSERACT_VERSION" "3.03") -lt 2 ]; then
173 |                 batchParm=(-p -t -c)
174 |                 batchOutputFormat=(pdf txt csv)
175 |         else
176 |                 batchParm=(-t -c)
177 |                 batchOutputFormat=(txt csv)
178 |         fi
179 | 
180 |         for i in $(seq 0 $((${#batchParm[@]}-1))); do
181 | 
182 | 		otherParm=(' ' -k -d --suffix=TESTSUFFIX --no-suffix --text=TESTTEXT)
183 | 		for parm in "${otherParm[@]}"; do
184 | 
185 | 			PrepareLocalDirs
186 | 			CopyTestFiles
187 | 
188 | 			echo "Running batch run with parameters ${batchParm[$i]} ${parm}"
189 | 			./$PMOCR_EXECUTABLE --batch ${batchParm[$i]} ${parm} --config="$CONF_DIR/$BATCH_CONF" "$PMOCR_TESTS_DIR/$BATCH_DIR"
190 | 			assertEquals "Batch run with parameter ${batchParm[$i]} ${parm}" "0" $?
191 | 
192 | 
193 | 			# Standard run with default options
194 | 			if [ "$parm" == " " ]; then
195 | 				# Two transformed files should be present
196 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}*_OCR.${batchOutputFormat[$i]}"
197 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
198 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
199 | 
200 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}*_OCR.${batchOutputFormat[$i]}"
201 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
202 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
203 | 
204 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}*_OCR.${batchOutputFormat[$i]}"
205 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
206 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
207 | 
208 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}*_OCR.${batchOutputFormat[$i]}"
209 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
210 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
211 | 
212 | 				# Original files should be renamed with _OCR
213 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}_OCR.${SOURCE_FILE_1##*.}"
214 | 				[ -f "$outputFile" ]
215 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
216 | 
217 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}_OCR.${SOURCE_FILE_2##*.}"
218 | 				[ -f "$outputFile" ]
219 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
220 | 
221 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}_OCR.${SOURCE_FILE_3##*.}"
222 | 				[ -f "$outputFile" ]
223 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
224 | 
225 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}_OCR.${SOURCE_FILE_3##*.}"
226 | 				[ -f "$outputFile" ]
227 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
228 | 
229 | 			# Run with skip already searchable PDFs
230 | 			elif [ "$parm" == "-k" ]; then
231 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}_OCR.${SOURCE_FILE_1##*.}"
232 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
233 | 				assertEquals "Missing batch output file for searchable PDF test [$outputFile]" "0" $?
234 | 
235 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}_OCR.${SOURCE_FILE_2##*.}"
236 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
237 | 				assertEquals "Missing batch output file for searchable PDF test [$outputFile]" "0" $?
238 | 
239 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}_OCR.${SOURCE_FILE_3##*.}"
240 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
241 | 				assertEquals "Missing batch output file for searchable PDF test [$outputFile]" "0" $?
242 | 
243 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}_OCR.${SOURCE_FILE_4##*.}"
244 | 				[ $(WildcardFileExists "$outputFile") -eq 0 ]
245 | 				assertEquals "Searchable PDF test file should not be present [$outputFile]" "0" $?
246 | 
247 | 			# Run and delete originals on success
248 | 			elif [ "$parm" == "-d" ]; then
249 | 				[ ! -f "$SOURCE_FILE_1" ]
250 | 				assertEquals "Original file [$SOURCE_FILE_1] not deleted" "0" $?
251 | 
252 | 				[ ! -f "$SOURCE_FILE_2" ]
253 | 				assertEquals "Original file [$SOURCE_FILE_2] not deleted" "0" $?
254 | 
255 | 				[ ! -f "$SOURCE_FILE_3" ]
256 | 				assertEquals "Original file [$SOURCE_FILE_3] not deleted" "0" $?
257 | 
258 | 				[ ! -f "$SOURCE_FILE_4" ]
259 | 				assertEquals "Original file [$SOURCE_FILE_4] not deleted" "0" $?
260 | 
261 | 			# Replace _OCR with another suffix
262 | 			elif [ "$parm" == "--suffix=TESTSUFFIX" ]; then
263 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}*TESTSUFFIX.${SOURCE_FILE_1##*.}"
264 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
265 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
266 | 
267 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}*TESTSUFFIX.${SOURCE_FILE_2##*.}"
268 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
269 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
270 | 
271 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}*TESTSUFFIX.${SOURCE_FILE_3##*.}"
272 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
273 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
274 | 
275 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}*TESTSUFFIX.${SOURCE_FILE_4##*.}"
276 | 				[ $(WildcardFileExists "$outputFile") -eq 1 ]
277 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
278 | 
279 | 			# Remove suffixes
280 | 			elif [ "$parm" == "--no-suffix" ]; then
281 | 				find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_1%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}"
282 | 				assertEquals "Bogus batch output file without suffix" "0" $?
283 | 
284 | 				find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_2%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}"
285 | 				assertEquals "Bogus batch output file without suffix" "0" $?
286 | 
287 | 				find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_3%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}"
288 | 				assertEquals "Bogus batch output file without suffix" "0" $?
289 | 
290 | 				find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_4%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}"
291 | 				assertEquals "Bogus batch output file without suffix" "0" $?
292 | 
293 | 			# Add another text
294 | 			elif [ "$parm" == "--text=TESTTEXT" ]; then
295 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}"
296 | 				[ -f "$outputFile" ]
297 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
298 | 
299 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}"
300 | 				[ -f "$outputFile" ]
301 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
302 | 
303 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}"
304 | 				[ -f "$outputFile" ]
305 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
306 | 
307 | 				outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}"
308 | 				[ -f "$outputFile" ]
309 | 				assertEquals "Missing batch output file [$outputFile]" "0" $?
310 | 
311 | 			fi
312 | 		done
313 | 	done
314 | }
315 | 
316 | function test_StandardService () {
317 | 	local pid
318 | 	local numberFiles
319 | 
320 | 	cd "$PMOCR_DIR"
321 | 
322 | 	PrepareLocalDirs
323 | 	CopyTestFiles
324 | 
325 | 	./$PMOCR_EXECUTABLE --service --config="$CONF_DIR/$SERVICE_CONF" &
326 | 	pid=$!
327 | 
328 | 
329 | 	if [ ! $pid -ne 0 ]; then
330 | 	assertEquals "Instance not launched, pid [$pid]" "1" $?
331 | 	else
332 | 		echo "Launched instance with pid [$pid]."
333 | 	fi
334 | 
335 | 	# Trivial wait time for pmocr to process files
336 | 	sleep 60
337 | 
338 | 	# Don't test PDF output on tesseract <= 3.02
339 |         if [ $(VerComp "$TESSERACT_VERSION" "3.03") -ne 2 ]; then
340 | 		numberFiles=$(find "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" -type f  | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\_OCR.pdf" | wc -l)
341 | 		[ $numberFiles -eq 3 ]
342 | 		assertEquals "Service run pdf transformed files found number invalid [$numberFiles]" "0" $?
343 | 	fi
344 | 
345 | 	numberFiles=$(find "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR" -type f  | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\_OCR.txt" | wc -l)
346 | 	[ $numberFiles -eq 3 ]
347 | 	assertEquals "Service run txt transformed files found number invalid [$numberFiles]" "0" $?
348 | 
349 | 	numberFiles=$(find "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR" -type f  | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\_OCR.csv" | wc -l)
350 | 	[ $numberFiles -eq 3 ]
351 | 	assertEquals "Service run csv transformed files found number invalid [$numberFiles]" "0" $?
352 | 
353 | 	kill -TERM $pid && sleep 5
354 | 	KillChilds $pid
355 | }
356 | 
357 | function test_MovedFilesService () {
358 | 	local pid
359 | 	local numberFiles
360 | 
361 | 	SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_SUCCESS" "$PMOCR_TESTS_DIR/$SUCCEED_DIR"
362 | 	SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_FAILURE" "$PMOCR_TESTS_DIR/$FAILURE_DIR"
363 | 
364 | 	cd "$PMOCR_DIR"
365 | 
366 | 	PrepareLocalDirs
367 | 	CopyTestFiles
368 | 
369 | 	./$PMOCR_EXECUTABLE --service --config="$CONF_DIR/$SERVICE_CONF" &
370 | 	pid=$!
371 | 
372 | 	if [ ! $pid -ne 0 ]; then
373 | 	assertEquals "Instance not launched, pid [$pid]" "1" $?
374 | 	else
375 | 		echo "Launched instance with pid [$pid]."
376 | 	fi
377 | 
378 | 	# Trivial wait time for pmocr to process files
379 | 	sleep 90
380 | 
381 | 	# Don't test PDF output on tesseract <= 3.02
382 |         if [ $(VerComp "$TESSERACT_VERSION" "3.03") -ne 2 ]; then
383 | 		numberFiles=$(find "$PMOCR_TESTS_DIR/$SUCCEED_DIR" -type f  | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.pdf" | wc -l)
384 | 		[ $numberFiles -eq 3 ]
385 | 		assertEquals "Service run pdf transformed files found number invalid [$numberFiles]" "0" $?
386 | 	fi
387 | 
388 | 	numberFiles=$(find "$PMOCR_TESTS_DIR/$SUCCEED_DIR" -type f  | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.png" | wc -l)
389 | 	[ $numberFiles -eq 3 ]
390 | 	assertEquals "Service run txt transformed files found number invalid [$numberFiles]" "0" $?
391 | 
392 | 	numberFiles=$(find "$PMOCR_TESTS_DIR/$SUCCEED_DIR" -type f  | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.tif" | wc -l)
393 | 	[ $numberFiles -eq 3 ]
394 | 	assertEquals "Service run csv transformed files found number invalid [$numberFiles]" "0" $?
395 | 
396 | 
397 | 	kill -TERM $pid && sleep 5
398 | 	KillChilds $pid
399 | 
400 | 	PrepareLocalDirs
401 | 	./$PMOCR_EXECUTABLE --service --config="$CONF_DIR/$SERVICE_CONF" &
402 | 	pid=$!
403 | 
404 | 	[ ! $pid -ne 0 ]
405 | 	assertEquals "Instance not launched, pid [$pid]" "1" $?
406 | 
407 | 	# Make sure next transformations will fail in order to move originals to failed dir
408 | 	sleep 2
409 | 	OCR_ENGINE_EXEC=$(GetConfFileValue "$CONF_DIR/$SERVICE_CONF" "TESSERACT_OCR_ENGINE_EXEC")
410 | 	$SUDO_CMD mv $OCR_ENGINE_EXEC $OCR_ENGINE_EXEC"-alt"
411 | 
412 | 	#CopyTestFiles
413 | 	# Only copy PDF files in order to not have doubles
414 | 	cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
415 | 	cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
416 | 	cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
417 | 	cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR"
418 | 
419 | 
420 | 
421 | 	# Trivial wait time for pmocr to process files
422 | 	sleep 60
423 | 
424 | 	# Test for failed files presence (3 files only)
425 | 	numberFiles=$(find "$PMOCR_TESTS_DIR/$FAILURE_DIR" -type f  | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.(pdf|tif|png)" | wc -l)
426 | 	[ $numberFiles -eq 3 ]
427 | 	assertEquals "Service run pdf transformed files found number invalid [$numberFiles]" "0" $?
428 | 
429 | 	# Rename OCR engine to make it great again
430 | 	$SUDO_CMD mv $OCR_ENGINE_EXEC"-alt" $OCR_ENGINE_EXEC
431 | 
432 | 	kill -TERM $pid && sleep 5
433 | 	KillChilds $pid
434 | 
435 | 	SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_SUCCESS" ""
436 | 	SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_FAILURE" ""
437 | }
438 | 
439 | function test_WaitForTaskCompletion () {
440 | 	local pids
441 | 
442 | 	# Tests if wait for task completion works correctly with ofunctions v2
443 | 
444 | 	# Standard wait
445 | 	sleep 1 &
446 | 	pids="$!"
447 | 	sleep 2 &
448 | 	pids="$pids;$!"
449 | 	WaitForTaskCompletion $pids 0 0 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]}
450 | 	assertEquals "WaitForTaskCompletion test 1" "0" $?
451 | 
452 | 	# Standard wait with warning
453 | 	sleep 2 &
454 | 	pids="$!"
455 | 	sleep 5 &
456 | 	pids="$pids;$!"
457 | 
458 | 	WaitForTaskCompletion $pids 3 0 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]}
459 | 	assertEquals "WaitForTaskCompletion test 2" "0" $?
460 | 
461 | 	# Both pids are killed
462 | 	sleep 5 &
463 | 	pids="$!"
464 | 	sleep 5 &
465 | 	pids="$pids;$!"
466 | 
467 | 	WaitForTaskCompletion $pids 0 2 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]}
468 | 	assertEquals "WaitForTaskCompletion test 3" "2" $?
469 | 
470 | 	# One of two pids are killed
471 | 	sleep 2 &
472 | 	pids="$!"
473 | 	sleep 10 &
474 | 	pids="$pids;$!"
475 | 
476 | 	WaitForTaskCompletion $pids 0 3 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]}
477 | 	assertEquals "WaitForTaskCompletion test 4" "1" $?
478 | 
479 | 	# Count since script begin, the following should output two warnings and both pids should get killed
480 | 	sleep 20 &
481 | 	pids="$!"
482 | 	sleep 20 &
483 | 	pids="$pids;$!"
484 | 
485 | 	WaitForTaskCompletion $pids 3 5 $SLEEP_TIME $KEEP_LOGGING false true false ${FUNCNAME[0]}
486 | 	assertEquals "WaitForTaskCompletion test 5" "2" $?
487 | }
488 | 
489 | function test_ParallelExec () {
490 | 	# work with ofunction v2
491 | 
492 | 	# Test if parallelExec works correctly in array mode
493 | 
494 | 	cmd="sleep 2;sleep 2;sleep 2;sleep 2"
495 | 	ParallelExec 4 "$cmd"
496 | 	assertEquals "ParallelExec test 1" "0" $?
497 | 
498 | 	cmd="sleep 2;du /none;sleep 2"
499 | 	ParallelExec 2 "$cmd"
500 | 	assertEquals "ParallelExec test 2" "1" $?
501 | 
502 | 	cmd="sleep 4;du /none;sleep 3;du /none;sleep 2"
503 | 	ParallelExec 3 "$cmd"
504 | 	assertEquals "ParallelExec test 3" "2" $?
505 | 
506 | 	# Test if parallelExec works correctly in file mode
507 | 
508 | 	echo "sleep 2" > "$TMP_FILE"
509 | 	echo "sleep 2" >> "$TMP_FILE"
510 | 	echo "sleep 2" >> "$TMP_FILE"
511 | 	echo "sleep 2" >> "$TMP_FILE"
512 | 	ParallelExec 4 "$TMP_FILE" true
513 | 	assertEquals "ParallelExec test 4" "0" $?
514 | 
515 | 	echo "sleep 2" > "$TMP_FILE"
516 | 	echo "du /nome" >> "$TMP_FILE"
517 | 	echo "sleep 2" >> "$TMP_FILE"
518 | 	ParallelExec 2 "$TMP_FILE" true
519 | 	assertEquals "ParallelExec test 5" "1" $?
520 | 
521 | 	echo "sleep 4" > "$TMP_FILE"
522 | 	echo "du /none" >> "$TMP_FILE"
523 | 	echo "sleep 3" >> "$TMP_FILE"
524 | 	echo "du /none" >> "$TMP_FILE"
525 | 	echo "sleep 2" >> "$TMP_FILE"
526 | 	ParallelExec 3 "$TMP_FILE" true
527 | 	assertEquals "ParallelExec test 6" "2" $?
528 | 
529 | 	#function ParallelExec $numberOfProcesses $commandsArg $readFromFile $softTime $HardTime $sleepTime $keepLogging $counting $Spinner $noError $callerName
530 | 	# Test if parallelExec works correctly in array mode with full  time control
531 | 
532 | 	cmd="sleep 5;sleep 5;sleep 5;sleep 5;sleep 5"
533 | 	ParallelExec 4 "$cmd" false 1 0 .05 3600 true true false ${FUNCNAME[0]}
534 | 	assertEquals "ParallelExec full test 1" "0" $?
535 | 
536 | 	cmd="sleep 2;du /none;sleep 2;sleep 2;sleep 4"
537 | 	ParallelExec 2 "$cmd" false 0 0 .1 2 true false false ${FUNCNAME[0]}
538 | 	assertEquals "ParallelExec full test 2" "1" $?
539 | 
540 | 	cmd="sleep 4;du /none;sleep 3;du /none;sleep 2"
541 | 	ParallelExec 3 "$cmd" false 1 2 .05 7000 true true false ${FUNCNAME[0]}
542 | 	assertNotEquals "ParallelExec full test 3" "0" $?
543 | 
544 | }
545 | 
546 | #function test_outputLogs () {
547 | #	echo ""
548 | #	echo "Log output:"
549 | #	echo ""
550 | #	cat ${HOME}/pmocr.log
551 | #}
552 | 
553 | . "$TESTS_DIR/shunit2/shunit2"
554 | 


--------------------------------------------------------------------------------
/dev/tests/shunit2/README.md:
--------------------------------------------------------------------------------
  1 | # shUnit2
  2 | 
  3 | shUnit2 is a [xUnit](http://en.wikipedia.org/wiki/XUnit) unit test framework for Bourne based shell scripts, and it is designed to work in a similar manner to [JUnit](http://www.junit.org), [PyUnit](http://pyunit.sourceforge.net), etc.. If you have ever had the desire to write a unit test for a shell script, shUnit2 can do the job.
  4 | 
  5 | [![Travis CI](https://img.shields.io/travis/kward/shunit2.svg)](https://travis-ci.org/kward/shunit2)
  6 | 
  7 | ## Table of Contents
  8 | * [Introduction](#introduction)
  9 |   * [Credits / Contributors](#credits-contributors)
 10 |   * [Feedback](#feedback)
 11 | * [Quickstart](#quickstart)
 12 | * [Function Reference](#function-reference)
 13 |   * [General Info](#general-info)
 14 |   * [Asserts](#asserts)
 15 |   * [Failures](#failures)
 16 |   * [Setup/Teardown](#setup-teardown)
 17 |   * [Skipping](#skipping)
 18 |   * [Suites](#suites)
 19 | * [Advanced Usage](#advanced-usage)
 20 |   * [Some constants you can use](#some-constants-you-can-use)
 21 |   * [Error Handling](#error-handling)
 22 |   * [Including Line Numbers in Asserts (Macros)](#including-line-numbers-in-asserts-macros)
 23 |   * [Test Skipping](#test-skipping)
 24 | * [Appendix](#appendix)
 25 |   * [Getting help](#getting-help)
 26 |   * [Zsh](#zsh)
 27 | 
 28 | ---
 29 | ## <a name="introduction"></a> Introduction
 30 | shUnit2 was originally developed to provide a consistent testing solution for [log4sh][log4sh], a shell based logging framework similar to [log4j](http://logging.apache.org). During the development of that product, a repeated problem of having things work just fine under one shell (`/bin/bash` on Linux to be specific), and then not working under another shell (`/bin/sh` on Solaris) kept coming up. Although several simple tests were run, they were not adequate and did not catch some corner cases. The decision was finally made to write a proper unit test framework after multiple brown-bag releases were made. _Research was done to look for an existing product that met the testing requirements, but no adequate product was found._
 31 | 
 32 | Tested Operating Systems (varies over time)
 33 | 
 34 | * Cygwin
 35 | * FreeBSD (user supported)
 36 | * Linux (Gentoo, Ubuntu)
 37 | * Mac OS X
 38 | * Solaris 8, 9, 10 (inc. OpenSolaris)
 39 | 
 40 | Tested Shells
 41 | 
 42 | * Bourne Shell (__sh__)
 43 | * BASH - GNU Bourne Again SHell (__bash__)
 44 | * DASH (__dash__)
 45 | * Korn Shell (__ksh__)
 46 | * pdksh - Public Domain Korn Shell (__pdksh__)
 47 | * zsh - Zsh (__zsh__) (since 2.1.2) _please see the Zsh shell errata for more information_
 48 | 
 49 | See the appropriate Release Notes for this release (`doc/RELEASE_NOTES-X.X.X.txt`) for the list of actual versions tested.
 50 | 
 51 | ### <a name="credits-contributors"></a> Credits / Contributors
 52 | A list of contributors to shUnit2 can be found in `doc/contributors.md`. Many thanks go out to all those who have contributed to make this a better tool.
 53 | 
 54 | shUnit2 is the original product of many hours of work by Kate Ward, the primary author of the code. For related software, check out https://github.com/kward.
 55 | 
 56 | ### <a name="feedback"></a> Feedback
 57 | Feedback is most certainly welcome for this document. Send your additions, comments and criticisms to the shunit2-users@google.com mailing list.
 58 | 
 59 | ---
 60 | 
 61 | ## <a name="quickstart"></a> Quickstart
 62 | This section will give a very quick start to running unit tests with shUnit2. More information is located in later sections.
 63 | 
 64 | Here is a quick sample script to show how easy it is to write a unit test in shell. _Note: the script as it stands expects that you are running it from the "examples" directory._
 65 | 
 66 | ```sh
 67 | #! /bin/sh
 68 | # file: examples/equality_test.sh
 69 | 
 70 | testEquality() {
 71 |   assertEquals 1 1
 72 | }
 73 | 
 74 | # Load shUnit2.
 75 | . ./shunit2
 76 | ```
 77 | 
 78 | Running the unit test should give results similar to the following.
 79 | 
 80 | ```console
 81 | $ cd examples
 82 | $ ./equality_test.sh
 83 | testEquality
 84 | 
 85 | Ran 1 test.
 86 | 
 87 | OK
 88 | ```
 89 | 
 90 | W00t! You've just run your first successful unit test. So, what just happened? Quite a bit really, and it all happened simply by sourcing the `shunit2` library. The basic functionality for the script above goes like this:
 91 | 
 92 | * When shUnit2 is sourced, it will walk through any functions defined whose name starts with the string `test`, and add those to an internal list of tests to execute. Once a list of test functions to be run has been determined, shunit2 will go to work.
 93 | * Before any tests are executed, shUnit2 again looks for a function, this time one named `oneTimeSetUp()`. If it exists, it will be run. This function is normally used to setup the environment for all tests to be run. Things like creating directories for output or setting environment variables are good to place here. Just so you know, you can also declare a corresponding function named `oneTimeTearDown()` function that does the same thing, but once all the tests have been completed. It is good for removing temporary directories, etc.
 94 | * shUnit2 is now ready to run tests. Before doing so though, it again looks for another function that might be declared, one named `setUp()`. If the function exists, it will be run before each test. It is good for resetting the environment so that each test starts with a clean slate. **At this stage, the first test is finally run.** The success of the test is recorded for a report that will be generated later. After the test is run, shUnit2 looks for a final function that might be declared, one named `tearDown()`. If it exists, it will be run after each test. It is a good place for cleaning up after each test, maybe doing things like removing files that were created, or removing directories. This set of steps, `setUp() > test() > tearDown()`, is repeated for all of the available tests.
 95 | * Once all the work is done, shUnit2 will generate the nice report you saw above. A summary of all the successes and failures will be given so that you know how well your code is doing.
 96 | 
 97 | We should now try adding a test that fails. Change your unit test to look like this.
 98 | 
 99 | ```sh
100 | #! /bin/sh
101 | # file: examples/party_test.sh
102 | 
103 | testEquality() {
104 |   assertEquals 1 1
105 | }
106 | 
107 | testPartyLikeItIs1999() {
108 |   year=`date '+%Y'`
109 |   assertEquals "It's not 1999 :-(" '1999' "${year}"
110 | }
111 | 
112 | # Load shUnit2.
113 | . ./shunit2
114 | ```
115 | 
116 | So, what did you get? I guess it told you that this isn't 1999. Bummer, eh? Hopefully, you noticed a couple of things that were different about the second test. First, we added an optional message that the user will see if the assert fails. Second, we did comparisons of strings instead of integers as in the first test. It doesn't matter whether you are testing for equality of strings or integers. Both work equally well with shUnit2.
117 | 
118 | Hopefully, this is enough to get you started with unit testing. If you want a ton more examples, take a look at the tests provided with [log4sh][log4sh] or [shFlags][shflags]. Both provide excellent examples of more advanced usage. shUnit2 was after all written to meet the unit testing need that [log4sh][log4sh] had.
119 | 
120 | ---
121 | 
122 | ## <a name="function-reference"></a> Function Reference
123 | 
124 | ### <a name="general-info"></a> General Info
125 | 
126 | Any string values passed should be properly quoted -- they should must be surrounded by single-quote (`'`) or double-quote (`"`) characters -- so that the shell will properly parse them.
127 | 
128 | ### <a name="asserts"></a> Asserts
129 | 
130 | `assertEquals [message] expected actual`
131 | 
132 | Asserts that _expected_ and _actual_ are equal to one another. The _expected_ and _actual_ values can be either strings or integer values as both will be treated as strings. The _message_ is optional, and must be quoted.
133 | 
134 | `assertNotEquals [message] unexpected actual`
135 | 
136 | Asserts that _unexpected_ and _actual_ are not equal to one another. The _unexpected_ and _actual_ values can be either strings or integer values as both will be treaded as strings. The _message_ is optional, and must be quoted.
137 | 
138 | `assertSame [message] expected actual`
139 | 
140 | This function is functionally equivalent to `assertEquals`.
141 | 
142 | `assertNotSame [message] unexpected actual`
143 | 
144 | This function is functionally equivalent to `assertNotEquals`.
145 | 
146 | `assertNull [message] value`
147 | 
148 | Asserts that _value_ is _null_, or in shell terms, a zero-length string. The _value_ must be a string as an integer value does not translate into a zero-length string. The _message_ is optional, and must be quoted.
149 | 
150 | `assertNotNull [message] value`
151 | 
152 | Asserts that _value_ is _not null_, or in shell terms, a non-empty string. The _value_ may be a string or an integer as the later will be parsed as a non-empty string value. The _message_ is optional, and must be quoted.
153 | 
154 | `assertTrue [message] condition`
155 | 
156 | Asserts that a given shell test _condition_ is _true_. The condition can be as simple as a shell _true_ value (the value `0` -- equivalent to `${SHUNIT_TRUE}`), or a more sophisticated shell conditional expression. The _message_ is optional, and must be quoted.
157 | 
158 | A sophisticated shell conditional expression is equivalent to what the __if__ or __while__ shell built-ins would use (more specifically, what the __test__ command would use). Testing for example whether some value is greater than another value can be done this way.
159 | 
160 | `assertTrue "[ 34 -gt 23 ]"`
161 | 
162 | Testing for the ability to read a file can also be done. This particular test will fail.
163 | 
164 | `assertTrue 'test failed' "[ -r /some/non-existant/file' ]"`
165 | 
166 | As the expressions are standard shell __test__ expressions, it is possible to string multiple expressions together with `-a` and `-o` in the standard fashion. This test will succeed as the entire expression evaluates to _true_.
167 | 
168 | `assertTrue 'test failed' '[ 1 -eq 1 -a 2 -eq 2 ]'`
169 | 
170 | _One word of warning: be very careful with your quoting as shell is not the most forgiving of bad quoting, and things will fail in strange ways._
171 | 
172 | `assertFalse [message] condition`
173 | 
174 | Asserts that a given shell test _condition_ is _false_. The condition can be as simple as a shell _false_ value (the value `1` -- equivalent to `${SHUNIT_FALSE}`), or a more sophisticated shell conditional expression. The _message_ is optional, and must be quoted.
175 | 
176 | _For examples of more sophisticated expressions, see `assertTrue`._
177 | 
178 | ### <a name="failures"></a> Failures
179 | 
180 | Just to clarify, failures __do not__ test the various arguments against one another. Failures simply fail, optionally with a message, and that is all they do. If you need to test arguments against one another, use asserts.
181 | 
182 | If all failures do is fail, why might one use them? There are times when you may have some very complicated logic that you need to test, and the simple asserts provided are simply not adequate. You can do your own validation of the code, use an `assertTrue ${SHUNIT_TRUE}` if your own tests succeeded, and use a failure to record a failure.
183 | 
184 | `fail [message]`
185 | 
186 | Fails the test immediately. The _message_ is optional, and must be quoted.
187 | 
188 | `failNotEquals [message] unexpected actual`
189 | 
190 | Fails the test immediately, reporting that the _unexpected_ and _actual_ values are not equal to one another. The _message_ is optional, and must be quoted.
191 | 
192 | _Note: no actual comparison of unexpected and actual is done._
193 | 
194 | `failSame [message] expected actual`
195 | 
196 | Fails the test immediately, reporting that the _expected_ and _actual_ values are the same. The _message_ is optional, and must be quoted.
197 | 
198 | _Note: no actual comparison of expected and actual is done._
199 | 
200 | `failNotSame [message] expected actual`
201 | 
202 | Fails the test immediately, reporting that the _expected_ and _actual_ values are not the same. The _message_ is optional, and must be quoted.
203 | 
204 | _Note: no actual comparison of expected and actual is done._
205 | 
206 | ### <a name="setup-teardown"></a> Setup/Teardown
207 | 
208 | `oneTimeSetUp`
209 | 
210 | This function can be be optionally overridden by the user in their test suite.
211 | 
212 | If this function exists, it will be called once before any tests are run. It is useful to prepare a common environment for all tests.
213 | 
214 | `oneTimeTearDown`
215 | 
216 | This function can be be optionally overridden by the user in their test suite.
217 | 
218 | If this function exists, it will be called once after all tests are completed. It is useful to clean up the environment after all tests.
219 | 
220 | `setUp`
221 | 
222 | This function can be be optionally overridden by the user in their test suite.
223 | 
224 | If this function exists, it will be called before each test is run. It is useful to reset the environment before each test.
225 | 
226 | `tearDown`
227 | 
228 | This function can be be optionally overridden by the user in their test suite.
229 | 
230 | If this function exists, it will be called after each test completes. It is useful to clean up the environment after each test.
231 | 
232 | ### <a name="skipping"></a> Skipping
233 | 
234 | `startSkipping`
235 | 
236 | This function forces the remaining _assert_ and _fail_ functions to be "skipped", i.e. they will have no effect. Each function skipped will be recorded so that the total of asserts and fails will not be altered.
237 | 
238 | `endSkipping`
239 | 
240 | This function returns calls to the _assert_ and _fail_ functions to their default behavior, i.e. they will be called.
241 | 
242 | `isSkipping`
243 | 
244 | This function returns the current state of skipping. It can be compared against `${SHUNIT_TRUE}` or `${SHUNIT_FALSE}` if desired.
245 | 
246 | ### <a name="suites"></a> Suites
247 | 
248 | The default behavior of shUnit2 is that all tests will be found dynamically. If you have a specific set of tests you want to run, or you don't want to use the standard naming scheme of prefixing your tests with `test`, these functions are for you. Most users will never use them though.
249 | 
250 | `suite`
251 | 
252 | This function can be optionally overridden by the user in their test suite.
253 | 
254 | If this function exists, it will be called when `shunit2` is sourced. If it does not exist, shUnit2 will search the parent script for all functions beginning with the word `test`, and they will be added dynamically to the test suite.
255 | 
256 | `suite_addTest name`
257 | 
258 | This function adds a function named _name_ to the list of tests scheduled for execution as part of this test suite. This function should only be called from within the `suite()` function.
259 | 
260 | ---
261 | 
262 | ## <a name="advanced-usage"></a> Advanced Usage
263 | 
264 | ### <a name="some-constants-you-can-use"></a> Some constants you can use
265 | 
266 | There are several constants provided by shUnit2 as variables that might be of use to you.
267 | 
268 | *Predefined*
269 | 
270 | | Constant        | Value |
271 | | --------------- | ----- |
272 | | SHUNIT\_TRUE    | Standard shell `true` value (the integer value 0). |
273 | | SHUNIT\_FALSE   | Standard shell `false` value (the integer value 1). |
274 | | SHUNIT\_ERROR   | The integer value 2. |
275 | | SHUNIT\_TMPDIR  | Path to temporary directory that will be automatically cleaned up upon exit of shUnit2. |
276 | | SHUNIT\_VERSION | The version of shUnit2 you are running. |
277 | 
278 | *User defined*
279 | 
280 | | Constant          | Value |
281 | | ----------------- | ----- |
282 | | SHUNIT\_CMD\_EXPR | Override which `expr` command is used. By default `expr` is used, except on BSD systems where `gexpr` is used. |
283 | | SHUNIT\_COLOR     | Enable colorized output. Options are 'auto', 'always', or 'never', with 'auto' being the default. |
284 | | SHUNIT\_PARENT    | The filename of the shell script containing the tests. This is needed specifically for Zsh support. |
285 | | SHUNIT\_TEST\_PREFIX | Define this variable to add a prefix in front of each test name that is output in the test report. |
286 | 
287 | ### <a name="error-handling"></a> Error handling
288 | 
289 | The constants values `SHUNIT_TRUE`, `SHUNIT_FALSE`, and `SHUNIT_ERROR` are returned from nearly every function to indicate the success or failure of the function. Additionally the variable `flags_error` is filled with a detailed error message if any function returns with a `SHUNIT_ERROR` value.
290 | 
291 | ### <a name="including-line-numbers-in-asserts-macros"></a> Including Line Numbers in Asserts (Macros)
292 | 
293 | If you include lots of assert statements in an individual test function, it can become difficult to determine exactly which assert was thrown unless your messages are unique. To help somewhat, line numbers can be included in the assert messages. To enable this, a special shell "macro" must be used rather than the standard assert calls. _Shell doesn't actually have macros; the name is used here as the operation is similar to a standard macro._
294 | 
295 | For example, to include line numbers for a `assertEquals()` function call, replace the `assertEquals()` with `${_ASSERT_EQUALS_}`.
296 | 
297 | _**Example** -- Asserts with and without line numbers_
298 | ```sh
299 | #! /bin/sh
300 | # file: examples/lineno_test.sh
301 | 
302 | testLineNo() {
303 |   # This assert will have line numbers included (e.g. "ASSERT:[123] ...").
304 |   echo "ae: ${_ASSERT_EQUALS_}"
305 |   ${_ASSERT_EQUALS_} 'not equal' 1 2
306 | 
307 |   # This assert will not have line numbers included (e.g. "ASSERT: ...").
308 |   assertEquals 'not equal' 1 2
309 | }
310 | 
311 | # Load shUnit2.
312 | . ./shunit2
313 | ```
314 | 
315 | Notes:
316 | 
317 | 1. Due to how shell parses command-line arguments, all strings used with macros should be quoted twice. Namely, single-quotes must be converted to single-double-quotes, and vice-versa. If the string being passed is absolutely for sure not empty, the extra quoting is not necessary.<br/><br/>Normal `assertEquals` call.<br/>`assertEquals 'some message' 'x' ''`<br/><br/>Macro `_ASSERT_EQUALS_` call. Note the extra quoting around the _message_ and the _null_ value.<br/>`_ASSERT_EQUALS_ '"some message"' 'x' '""'`
318 | 
319 | 1. Line numbers are not supported in all shells. If a shell does not support them, no errors will be thrown. Supported shells include: __bash__ (>=3.0), __ksh__, __pdksh__, and __zsh__.
320 | 
321 | ### <a name="test-skipping"></a> Test Skipping
322 | 
323 | There are times where the test code you have written is just not applicable to the system you are running on. This section describes how to skip these tests but maintain the total test count.
324 | 
325 | Probably the easiest example would be shell code that is meant to run under the __bash__ shell, but the unit test is running under the Bourne shell. There are things that just won't work. The following test code demonstrates two sample functions, one that will be run under any shell, and the another that will run only under the __bash__ shell.
326 | 
327 | _**Example** -- math include_
328 | ```sh
329 | # file: examples/math.inc.
330 | 
331 | add_generic() {
332 |   num_a=$1
333 |   num_b=$2
334 | 
335 |   expr $1 + $2
336 | }
337 | 
338 | add_bash() {
339 |   num_a=$1
340 |   num_b=$2
341 | 
342 |   echo $(($1 + $2))
343 | }
344 | ```
345 | 
346 | And here is a corresponding unit test that correctly skips the `add_bash()` function when the unit test is not running under the __bash__ shell.
347 | 
348 | _**Example** -- math unit test_
349 | ```sh
350 | #! /bin/sh
351 | # file: examples/math_test.sh
352 | 
353 | testAdding() {
354 |   result=`add_generic 1 2`
355 |   assertEquals \
356 |       "the result of '${result}' was wrong" \
357 |       3 "${result}"
358 | 
359 |   # Disable non-generic tests.
360 |   [ -z "${BASH_VERSION:-}" ] && startSkipping
361 | 
362 |   result=`add_bash 1 2`
363 |   assertEquals \
364 |       "the result of '${result}' was wrong" \
365 |       3 "${result}"
366 | }
367 | 
368 | oneTimeSetUp() {
369 |   # Load include to test.
370 |   . ./math.inc
371 | }
372 | 
373 | # Load and run shUnit2.
374 | . ./shunit2
375 | ```
376 | 
377 | Running the above test under the __bash__ shell will result in the following output.
378 | 
379 | ```console
380 | $ /bin/bash math_test.sh
381 | testAdding
382 | 
383 | Ran 1 test.
384 | 
385 | OK
386 | ```
387 | 
388 | But, running the test under any other Unix shell will result in the following output.
389 | 
390 | ```console
391 | $ /bin/ksh math_test.sh
392 | testAdding
393 | 
394 | Ran 1 test.
395 | 
396 | OK (skipped=1)
397 | ```
398 | 
399 | As you can see, the total number of tests has not changed, but the report indicates that some tests were skipped.
400 | 
401 | Skipping can be controlled with the following functions: `startSkipping()`, `endSkipping()`, and `isSkipping()`. Once skipping is enabled, it will remain enabled until the end of the current test function call, after which skipping is disabled.
402 | 
403 | ---
404 | 
405 | ## <a name="appendix"></a> Appendix
406 | 
407 | ### <a name="getting-help"></a> Getting Help
408 | 
409 | For help, please send requests to either the shunit2-users@googlegroups.com mailing list (archives available on the web at http://groups.google.com/group/shunit2-users) or directly to Kate Ward <kate dot ward at forestent dot com>.
410 | 
411 | ### <a name="zsh"></a> Zsh
412 | 
413 | For compatibility with Zsh, there is one requirement that must be met -- the `shwordsplit` option must be set. There are three ways to accomplish this.
414 | 
415 | 1. In the unit-test script, add the following shell code snippet before sourcing the `shunit2` library.
416 | 
417 | ```sh
418 | setopt shwordsplit
419 | ```
420 | 
421 | 1. When invoking __zsh__ from either the command-line or as a script with `#!`, add the `-y` parameter.
422 | 
423 | ```sh
424 | #! /bin/zsh -y
425 | ```
426 | 
427 | 1. When invoking __zsh__ from the command-line, add `-o shwordsplit --` as parameters before the script name.
428 | 
429 | ```console
430 | $ zsh -o shwordsplit -- some_script
431 | ```
432 | 
433 | [log4sh]: https://github.com/kward/log4sh
434 | [shflags]: https://github.com/kward/shflags
435 | 


--------------------------------------------------------------------------------
/dev/n_pmocr.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | PROGRAM="pmocr" # Automatic OCR service that monitors a directory and launches a OCR instance as soon as a document arrives
  4 | AUTHOR="(C) 2015-2022 by Orsiris de Jong"
  5 | CONTACT="http://www.netpower.fr - ozy@netpower.fr"
  6 | PROGRAM_VERSION=1.8.2
  7 | PROGRAM_BUILD=2022050901
  8 | 
  9 | CONFIG_FILE_REVISION_REQUIRED=1
 10 | 
 11 | ### Tested Abbyy OCR versions 11 (discontinued by Abbyy, support is deprecated)
 12 | TESTED_TESSERACT_VERSIONS="3.04, 4.1.2, 5.0.1"
 13 | 
 14 | ## Debug parameter for service
 15 | if [ "$_DEBUG" == "" ]; then
 16 | 	_DEBUG=no
 17 | fi
 18 | 
 19 | _LOGGER_PREFIX="date"
 20 | KEEP_LOGGING=0
 21 | DEFAULT_CONFIG_FILE="/etc/pmocr/default.conf"
 22 | 
 23 | # Set default wait time before forced run
 24 | if [ "$MAX_WAIT" == "" ]; then
 25 | 	MAX_WAIT=86400 # One day in seconds
 26 | fi
 27 | 
 28 | include #### OFUNCTIONS MINI SUBSET ####
 29 | include #### VerComp SUBSET ####
 30 | include #### GetConfFileValue SUBSET ####
 31 | include #### InotifyWaitPoller SUBSET ####
 32 | 
 33 | # Change all booleans with "yes" or "no" to true / false for v2 config syntax compatibility
 34 | function UpdateBooleans {
 35 |         local update
 36 |         local booleans
 37 | 
 38 |         declare -a booleans=(DELETE_ORIGINAL CHECK_PDF)
 39 | 
 40 |         for i in "${booleans[@]}"; do
 41 |                 update="if [ \"\$$i\" == \"yes\" ]; then $i=true; fi; if [ \"\$$i\" == \"no\" ]; then $i=false; fi"
 42 |                 eval "$update"
 43 |         done
 44 | }
 45 | 
 46 | 
 47 | function CheckEnvironment {
 48 | 	if [ "$OCR_ENGINE_EXEC" != "" ]; then
 49 | 		if ! type "$OCR_ENGINE_EXEC" > /dev/null 2>&1; then
 50 | 			Logger "OCR engine executable [$OCR_ENGINE_EXEC] not present. Please adjust in your pmocr configuration file." "CRITICAL"
 51 | 			exit 1
 52 | 		fi
 53 | 	else
 54 | 		Logger "No OCR engine selected. Please configure it in [$CONFIG_FILE]." "CRITICAL"
 55 | 		exit 1
 56 | 	fi
 57 | 
 58 | 	if [ "$OCR_PREPROCESSOR_EXEC" != "" ]; then
 59 | 		if ! type "$OCR_PREPROCESSOR_EXEC" > /dev/null 2>&1; then
 60 | 			Logger "OCR preprocessor executable [$OCR_PREPROCESSOR_EXEC] not present. Please adjust in your pmocr configuration file." "CRITICAL"
 61 | 			exit 1
 62 | 		fi
 63 | 	fi
 64 | 
 65 | 	if [ "$_SERVICE_RUN" == true ]; then
 66 | 		if [ "$USE_INOTIFYWAIT" == true ]; then
 67 | 			if ! type inotifywait > /dev/null 2>&1; then
 68 | 				Logger "inotifywait not present (see inotify-tools package ?)." "CRITICAL"
 69 | 				exit 1
 70 | 			fi
 71 | 		fi
 72 | 
 73 | 		if ! type pgrep > /dev/null 2>&1; then
 74 | 			Logger "pgrep not present." "CRITICAL"
 75 | 			exit 1
 76 | 		fi
 77 | 
 78 | 		if ! type lsof > /dev/null 2>&1; then
 79 | 			Logger "lsof not present." "CRITICAL"
 80 | 			exit 1
 81 | 		fi
 82 | 
 83 | 		if [ "$PDF_MONITOR_DIR" != "" ]; then
 84 | 			if [ ! -w "$PDF_MONITOR_DIR" ]; then
 85 | 				Logger "Directory [$PDF_MONITOR_DIR] not writable." "ERROR"
 86 | 			fi
 87 | 		fi
 88 | 
 89 | 		if [ "$WORD_MONITOR_DIR" != "" ]; then
 90 | 			if [ ! -w "$WORD_MONITOR_DIR" ]; then
 91 | 				Logger "Directory [$WORD_MONITOR_DIR] not writable." "ERROR"
 92 | 			fi
 93 | 		fi
 94 | 
 95 | 		if [ "$EXCEL_MONITOR_DIR" != "" ]; then
 96 | 			if [ ! -w "$EXCEL_MONITOR_DIR" ]; then
 97 | 				Logger "Directory [$EXCEL_MONITOR_DIR] not writable." "ERROR"
 98 | 			fi
 99 | 		fi
100 | 
101 | 		if [ "$TEXT_MONITOR_DIR" != "" ]; then
102 | 			if [ ! -w "$TEXT_MONITOR_DIR" ]; then
103 | 				Logger "Directory [$TEXT_MONITOR_DIR] not writable." "ERROR"
104 | 			fi
105 | 		fi
106 | 
107 | 		if [ "$CSV_MONITOR_DIR" != "" ]; then
108 | 			if [ ! -w "$CSV_MONITOR_DIR" ]; then
109 | 				Logger "Directory [$CSV_MONITOR_DIR] not writable." "ERROR"
110 | 			fi
111 | 		fi
112 | 	fi
113 | 
114 | 	if [ "$CHECK_PDF" == true ]; then
115 | 		if ! type pdffonts > /dev/null 2>&1; then
116 | 			Logger "pdffonts not present (see poppler-utils package ?)." "CRITICAL"
117 | 			exit 1
118 | 		fi
119 | 	fi
120 | 
121 | 	if [ "$OCR_ENGINE" == "tesseract" ] || [ "$OCR_ENGINE" == "tesseract3" ]; then
122 | 		if ! type "$PDF_TO_TIFF_EXEC" > /dev/null 2>&1; then
123 | 			Logger "PDF to TIFF conversion executable [$PDF_TO_TIFF_EXEC] not present. Please install ImageMagick (for convert) or ghostscript (for gs)." "CRITICAL"
124 | 			exit 1
125 | 		fi
126 | 
127 | 		TESSERACT_VERSION=$(tesseract -v 2>&1 | head -n 1 | awk '{print $2}')
128 | 		if [ $(VerComp "$TESSERACT_VERSION" "3.00") -gt 1 ]; then
129 | 			Logger "Tesseract version [$TESSERACT_VERSION] is not supported. Please use version 3.x or better." "CRITICAL"
130 | 			Logger "Known working tesseract versions are $TESTED_TESSERACT_VERSIONS." "CRITICAL"
131 | 			exit 1
132 | 		fi
133 | 	fi
134 | }
135 | 
136 | function TrapQuit {
137 | 	local result
138 | 
139 | 	if [ -f "$SERVICE_MONITOR_FILE" ]; then
140 | 		rm -f "$SERVICE_MONITOR_FILE" > /dev/null 2>&1
141 | 	fi
142 | 
143 | 	KillChilds $$ > /dev/null 2>&1
144 | 	result=$?
145 | 	if [ $result -eq 0 ]; then
146 | 		Logger "$PROGRAM stopped instance [$INSTANCE_ID] with pid [$$]." "NOTICE"
147 | 	else
148 | 		Logger "$PROGRAM couldn't properly stop instance [$INSTANCE_ID] with pid [$$]." "ERROR"
149 | 	fi
150 | 	CleanUp
151 | 	exit $?
152 | }
153 | 
154 | function SetOCREngineOptions {
155 | 	__CheckArguments 0 $# "$@"		#__WITH_PARANOIA_DEBUG
156 | 
157 | 	if [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then
158 | 		OCR_ENGINE_EXEC="$TESSERACT_OCR_ENGINE_EXEC"
159 | 		PDF_OCR_ENGINE_ARGS="$TESSERACT_PDF_OCR_ENGINE_ARGS"
160 | 		TEXT_OCR_ENGINE_ARGS="$TESSERACT_TEXT_OCR_ENGINE_ARGS"
161 | 		CSV_OCR_ENGINE_ARGS="$TESSERACT_CSV_OCR_ENGINE_ARGS"
162 | 		OCR_ENGINE_INPUT_ARG="$TESSERACT_OCR_ENGINE_INPUT_ARG"
163 | 		OCR_ENGINE_OUTPUT_ARG="$TESSERACT_OCR_ENGINE_OUTPUT_ARG"
164 | 
165 | 		PDF_TO_TIFF_EXEC="$TESSERACT_PDF_TO_TIFF_EXEC"
166 | 		PDF_TO_TIFF_OPTS="$TESSERACT_PDF_TO_TIFF_OPTS"
167 | 
168 | 	elif [ "$OCR_ENGINE" == "abbyyocr11" ]; then
169 | 		OCR_ENGINE_EXEC="$ABBYY_OCR_ENGINE_EXEC"
170 | 		PDF_OCR_ENGINE_ARGS="$ABBYY_PDF_OCR_ENGINE_ARGS"
171 | 		WORD_OCR_ENGINE_ARGS="$ABBYY_WORD_OCR_ENGINE_ARGS"
172 | 		EXCEL_OCR_ENGINE_ARGS="$ABBYY_EXCEL_OCR_ENGINE_ARGS"
173 | 		TEXT_OCR_ENGINE_ARGS="$ABBYY_TEXT_OCR_ENGINE_ARGS"
174 | 		CSV_OCR_ENGINE_ARGS="$ABBYY_CSV_OCR_ENGINE_ARGS"
175 | 		OCR_ENGINE_INPUT_ARG="$ABBYY_OCR_ENGINE_INPUT_ARG"
176 | 		OCR_ENGINE_OUTPUT_ARG="$ABBYY_OCR_ENGINE_OUTPUT_ARG"
177 | 
178 | 	else
179 | 		Logger "Bogus OCR_ENGINE selected." "CRITICAL"
180 | 		exit 1
181 | 	fi
182 | }
183 | 
184 | function OCR {
185 | 	local inputFileName="$1" 		# Contains full path of file to OCR
186 | 	local fileExtension="$2" 		# Filename extension of output file
187 | 	local ocrEngineArgs="$3" 		# OCR engine specific arguments
188 | 	local csvHack="${4:-false}" 		# CSV Hack boolean
189 | 
190 | 	__CheckArguments 2-4 $# "$@"		#__WITH_PARANOIA_DEBUG
191 | 
192 | 	local findExcludes
193 | 	local tmpFilePreprocessor
194 | 	local tmpFileIntermediary
195 | 	local renamedFileName
196 | 	local outputFileName
197 | 
198 | 	local currentTSTAMP
199 | 
200 | 	local cmd
201 | 	local subcmd
202 | 	local result
203 | 
204 | 	local alert=false
205 | 		if [ $_SILENT != true ]; then
206 | 			Logger "Processing file [$inputFileName]." "ALWAYS"
207 | 		fi
208 | 
209 | 
210 | 		# Expand $FILENAME_ADDITION
211 | 		eval "outputFileName=\"${inputFileName%.*}$FILENAME_ADDITION$FILENAME_SUFFIX\""
212 | 
213 | 		# Add check to see whether outputFileName already exists, if so, add a random timestamp
214 | 		if [ -f "$outputFileName" ] || [ -f "$outputFileName$fileExtension" ]; then
215 | 			outputFileName="$outputFileName$(date '+%N')"
216 | 		fi
217 | 
218 | 
219 | 		# Perform intermediary transformation of input pdf file to tiff if OCR_ENGINE is tesseract and no preprocessor is set
220 | 		if ([ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]) && [[ "$inputFileName" == *.[pP][dD][fF] ]] && [ "$OCR_PREPROCESSOR_EXEC" == "" ]; then
221 | 			tmpFileIntermediary="${inputFileName%.*}.__pmOCR_intermediary_.tif"
222 | 			subcmd="MAGICK_THREAD_LIMIT=$NUMBER_OF_PROCESSES $PDF_TO_TIFF_EXEC $PDF_TO_TIFF_OPTS \"$inputFileName\" \"$tmpFileIntermediary\" > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\""
223 | 			Logger "Executing: $subcmd" "DEBUG"
224 | 			eval "$subcmd"
225 | 			result=$?
226 | 			if [ $result -ne 0 ]; then
227 | 				Logger "$PDF_TO_TIFF_EXEC intermediary transformation failed." "ERROR"
228 | 				Logger "Truncated output:\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "DEBUG"
229 | 				alert=true
230 | 			else
231 | 				fileToProcess="$tmpFileIntermediary"
232 | 			fi
233 | 		else
234 | 			fileToProcess="$inputFileName"
235 | 		fi
236 | 
237 | 		# Run OCR Preprocessor
238 | 		if [ -f "$fileToProcess" ] && [ "$OCR_PREPROCESSOR_EXEC" != "" ]; then
239 | 			tmpFilePreprocessor="${fileToProcess%.*}.__pmOCR_preprocessed_.tif"
240 | 			subcmd="MAGICK_THREAD_LIMIT=$NUMBER_OF_PROCESSES $OCR_PREPROCESSOR_EXEC $OCR_PREPROCESSOR_ARGS $OCR_PREPROCESSOR_INPUT_ARGS\"$fileToProcess\" $OCR_PREPROCESSOR_OUTPUT_ARG\"$tmpFilePreprocessor\" > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\""
241 | 			# This is never logged on screen since we use a subshell, but will be logged to logfile
242 | 			Logger "Executing $subcmd" "DEBUG"
243 | 			eval "$subcmd"
244 | 			result=$?
245 | 			if [ $result -ne 0 ]; then
246 | 				Logger "$OCR_PREPROCESSOR_EXEC preprocesser failed." "ERROR"
247 | 				Logger "Truncated output\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "DEBUG"
248 | 				alert=true
249 | 			else
250 | 				fileToProcess="$tmpFilePreprocessor"
251 | 			fi
252 | 		fi
253 | 
254 | 		if [ -f "$fileToProcess" ]; then
255 | 			# Run Abbyy OCR
256 | 			if [ "$OCR_ENGINE" == "abbyyocr11" ]; then
257 | 				cmd="$OCR_ENGINE_EXEC $OCR_ENGINE_INPUT_ARG \"$fileToProcess\" $ocrEngineArgs $OCR_ENGINE_OUTPUT_ARG \"$outputFileName$fileExtension\" > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\" 2>&1"
258 | 				#TODO: THIS IS NEVER LOGGED
259 | 				Logger "Executing: $cmd" "DEBUG"
260 | 				eval "$cmd"
261 | 				result=$?
262 | 
263 | 			# Run Tesseract OCR + Intermediary transformation
264 | 			elif [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then
265 | 				# Empty tmp log file first
266 | 				echo "" > "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP"
267 | 				cmd="$OCR_ENGINE_EXEC $TESSERACT_OPTIONAL_ARGS $OCR_ENGINE_INPUT_ARG \"$fileToProcess\" $OCR_ENGINE_OUTPUT_ARG \"$outputFileName\" $ocrEngineArgs > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\" 2> \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.error.$SCRIPT_PID.$TSTAMP\""
268 | 				#TODO: THIS IS NEVER LOGGED
269 | 				Logger "Executing: $cmd" "DEBUG"
270 | 				eval "$cmd"
271 | 				result=$?
272 | 
273 | 				# Workaround for tesseract complaining about missing OSD data but still processing file without changing exit code
274 | 				# Tesseract may also return 0 exit code with error "read_params_file: Can't open pdf"
275 | 				if [ $result -eq 0 ] && grep -i "error" "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.error.$SCRIPT_PID.$TSTAMP"; then
276 | 					result=9999
277 | 					Logger "Tesseract produced errors while transforming the document." "WARN"
278 | 					Logger "Truncated output\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "NOTICE"
279 | 					Logger "Truncated output\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.error.$SCRIPT_PID.$TSTAMP")" "NOTICE"
280 | 					alert=true
281 | 				fi
282 | 
283 | 				# Fix for tesseract pdf output also outputs txt format
284 | 				if [ "$fileExtension" == ".pdf" ] && [ -f "$outputFileName$TEXT_EXTENSION" ]; then
285 | 					rm -f "$outputFileName$TEXT_EXTENSION" > /dev/null 2>&1
286 | 					if [ $? != 0 ]; then
287 | 						Logger "Cannot remove temporary txt file [$outputFileName$TEXT_EXTENSION]." "WARN"
288 | 						alert=true
289 | 					fi
290 | 				fi
291 | 			else
292 | 				Logger "Bogus ocr engine [$OCR_ENGINE]. Please edit file [$(basename "$0")] and set [OCR_ENGINE] value." "ERROR"
293 | 			fi
294 | 		fi
295 | 
296 | 		# Remove temporary files
297 | 		if [ -f "$tmpFileIntermediary" ]; then
298 | 			rm -f "$tmpFileIntermediary" > /dev/null 2>&1
299 | 			if [ $? != 0 ]; then
300 | 				Logger "Cannot remove temporary file [$tmpFileIntermediary]." " WARN"
301 | 				alert=true
302 | 			fi
303 | 		fi
304 | 		if [ -f "$tmpFilePreprocessor" ]; then
305 | 			rm -f "$tmpFilePreprocessor" > /dev/null 2>&1
306 | 			if [ $? != 0 ]; then
307 | 				Logger "Cannot remove temporary file [$tmpFilePreprocessor]." " WARN"
308 | 				alert=true
309 | 			fi
310 | 		fi
311 | 
312 | 		if [ $result != 0 ]; then
313 | 			Logger "Could not process file [$inputFileName] (OCR error code $result). See logs." "ERROR"
314 | 			Logger "Truncated OCR Engine Output:\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "ERROR"
315 | 			alert=true
316 | 
317 | 			if [ "$MOVE_ORIGINAL_ON_FAILURE" != "" ]; then
318 | 				if [ ! -w "$MOVE_ORIGINAL_ON_FAILURE" ]; then
319 | 					Logger "Cannot write to folder [$MOVE_ORIGINAL_ON_FAILURE]. Will not move file [$inputFileName]." "WARN"
320 | 				else
321 | 					eval "renamedFileName=\"${inputFileName%.*}$FILENAME_ADDITION.${inputFileName##*.}\""
322 | 					mv "$inputFileName" "$MOVE_ORIGINAL_ON_FAILURE/$(basename "$renamedFileName")"
323 | 					if [ $? != 0 ]; then
324 | 						Logger "Cannot move [$inputFileName] to [$MOVE_ORIGINAL_ON_FAILURE/$(basename "$renamedFileName")]. Will rename it." "WARN"
325 | 						alert=true
326 | 					fi
327 | 				fi
328 | 			fi
329 | 
330 | 			if [ -f "$inputFileName" ]; then
331 | 				# Add error suffix so failed files won't be run again and create a loop
332 | 				# Add $TSAMP in order to avoid overwriting older files
333 | 				renamedFileName="${inputFileName%.*}$FAILED_FILENAME_SUFFIX.${inputFileName##*.}"
334 | 				if [ "$inputFileName" != "$renamedFileName" ]; then
335 | 					Logger "Renaming file [$inputFileName] to [$renamedFileName] in order to exclude it from next run." "WARN"
336 | 					mv "$inputFileName" "$renamedFileName"
337 | 					if [ $? != 0 ]; then
338 | 						Logger "Cannot move [$inputFileName] to [$renamedFileName]." "WARN"
339 | 						alert=true
340 | 					fi
341 | 				fi
342 | 			fi
343 | 		else
344 | 			# Convert 4 spaces or more to semi colon (hack to transform txt output to CSV)
345 | 			if [ $csvHack == true ]; then
346 | 				Logger "Applying CSV hack" "DEBUG"
347 | 				if [ "$OCR_ENGINE" == "abbyyocr11" ]; then
348 | 					sed -i.tmp 's/   */;/g' "$outputFileName$fileExtension"
349 | 					if [ $? == 0 ]; then
350 | 						rm -f "$outputFileName$fileExtension.tmp" > /dev/null 2>&1
351 | 						if [ $? != 0 ]; then
352 | 							Logger "Cannot delete temporary file [$outputFileName$fileExtension.tmp]." "WARN"
353 | 							alert=true
354 | 						fi
355 | 					else
356 | 						Logger "Cannot use csvhack on [$outputFileName$fileExtension]." "WARN"
357 | 						alert=true
358 | 					fi
359 | 				fi
360 | 
361 | 				if [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then
362 | 					sed 's/   */;/g' "$outputFileName$TEXT_EXTENSION" > "$outputFileName$CSV_EXTENSION"
363 | 					if [ $? == 0 ]; then
364 | 						rm -f "$outputFileName$TEXT_EXTENSION" > /dev/null 2>&1
365 | 						if [ $? != 0 ]; then
366 | 							Logger "Cannot delete temporary file [$outputFileName$TEXT_EXTENSION]." "WARN"
367 | 							alert=true
368 | 						fi
369 | 					else
370 | 						Logger "Cannot use csvhack on [$outputFileName$TEXT_EXTENSION]." "WARN"
371 | 						alert=true
372 | 					fi
373 | 				fi
374 | 			fi
375 | 
376 | 			# Apply permissions and ownership
377 | 			if [ "$PRESERVE_OWNERSHIP" == true ]; then
378 | 				chown --reference "$inputFileName" "$outputFileName$fileExtension"
379 | 				if [ $? != 0 ]; then
380 | 					Logger "Cannot chown [$outputfileName$fileExtension] with reference from [$inputFileName]." "WARN"
381 | 					alert=true
382 | 				fi
383 | 			fi
384 | 			if [ $(IsInteger "$FILE_PERMISSIONS") -eq 1 ]; then
385 | 				chmod $FILE_PERMISSIONS "$outputFileName$fileExtension"
386 | 				if [ $? != 0 ]; then
387 | 					Logger "Cannot mod [$outputfileName$fileExtension] with [$FILE_PERMISSIONS]." "WARN"
388 | 					alert=true
389 | 				fi
390 | 			elif [ "$PRESERVE_OWNERSHIP" == true ]; then
391 | 				chmod --reference "$inputFileName" "$outputFileName$fileExtension"
392 | 				if [ $? != 0 ]; then
393 | 					Logger "Cannot chmod [$outputfileName$fileExtension] with reference from [$inputFileName]." "WARN"
394 | 					alert=true
395 | 				fi
396 | 			fi
397 | 
398 | 			if [ "$MOVE_ORIGINAL_ON_SUCCESS" != "" ]; then
399 | 				if [ ! -w "$MOVE_ORIGINAL_ON_SUCCESS" ]; then
400 | 					Logger "Cannot write to folder [$MOVE_ORIGINAL_ON_SUCCESS]. Will not move file [$inputFileName]." "WARN"
401 | 					alert=true
402 | 				else
403 | 					eval "renamedFileName=\"${inputFileName%.*}$FILENAME_ADDITION.${inputFileName##*.}\""
404 | 					mv "$inputFileName" "$MOVE_ORIGINAL_ON_SUCCESS/$(basename "$renamedFileName")"
405 | 					if [ $? != 0 ]; then
406 | 						Logger "Cannot move [$inputFileName] to [$MOVE_ORIGINAL_ON_SUCCESS/$(basename "$renamedFileName")]." "WARN"
407 | 						alert=true
408 | 					fi
409 | 				fi
410 | 			elif [ "$DELETE_ORIGINAL" == true ]; then
411 | 				Logger "Deleting file [$inputFileName]." "DEBUG"
412 | 				rm -f "$inputFileName" > /dev/null 2>&1
413 | 				if [ $? != 0 ]; then
414 | 					Logger "Cannot delete [$inputFileName]." "WARN"
415 | 					alert=true
416 | 				fi
417 | 			fi
418 | 
419 | 			if [ -f "$inputFileName" ]; then
420 | 				renamedFileName="${inputFileName%.*}$FILENAME_SUFFIX.${inputFileName##*.}"
421 | 				Logger "Renaming file [$inputFileName] to [$renamedFileName]." "DEBUG"
422 | 				mv "$inputFileName" "$renamedFileName"
423 | 				if [ $? != 0 ]; then
424 | 					Logger "Cannot move [$inputFileName] to [$renamedFileName]." "WARN"
425 | 					alert=true
426 | 				fi
427 | 			fi
428 | 
429 | 			if [ $_SILENT != true ]; then
430 | 				Logger "Processed file [$inputFileName]." "ALWAYS"
431 | 			fi
432 | 		fi
433 | 
434 | 		if [ $alert == true ]; then
435 | 			SendAlert
436 | 			exit $result
437 | 		else
438 | 			exit 0
439 | 		fi
440 | }
441 | 
442 | function OCR_Dispatch {
443 | 	local directoryToProcess="$1" 		#(contains some path)
444 | 	local fileExtension="$2" 		#(filename endings to exclude from processing)
445 | 	local ocrEngineArgs="$3" 		#(transformation specific arguments)
446 | 	local csvHack="$4" 			#(CSV transformation flag)
447 | 
448 | 	__CheckArguments 2-4 $# "$@"		#__WITH_PARANOIA_DEBUG
449 | 
450 | 	local findExcludes
451 | 	local moveSuccessExclude
452 | 	local moveFailureExclude
453 | 	local failedFindExcludes
454 | 	local cmd
455 | 	local retval
456 | 
457 | 	## CHECK find excludes
458 | 	if [ "$FILENAME_SUFFIX" != "" ]; then
459 | 		findExcludes="*$FILENAME_SUFFIX.*"
460 | 	else
461 | 		findExcludes=""
462 | 	fi
463 | 
464 | 	if [ -d "$MOVE_ORIGINAL_ON_SUCCESS" ]; then
465 | 		moveSuccessExclude="$MOVE_ORIGINAL_ON_SUCCESS/*"
466 | 	fi
467 | 
468 | 	if [ -d "$MOVE_ORIGINAL_ON_FAILURE" ]; then
469 | 		moveFailureExclude="$MOVE_ORIGINAL_ON_FAILURE/*"
470 | 	fi
471 | 
472 | 	if [ "$FAILED_FILENAME_SUFFIX" != "" ]; then
473 | 		failedFindExcludes="*$FAILED_FILENAME_SUFFIX.*"
474 | 	else
475 | 		failedFindExcludes=""
476 | 	fi
477 | 
478 | 	if [ -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" ]; then
479 | 		rm -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" > /dev/null 2>&1
480 | 	fi
481 | 
482 | 	# Old way of doing
483 | 	#find "$directoryToProcess" -type f -iregex ".*\.$FILES_TO_PROCESS" ! -name "$findExcludes" -and ! -wholename "$moveSuccessExclude" -and ! -wholename "$moveFailureExclude" -and ! -name "$failedFindExcludes" -print0 | xargs -0 -I {} echo "OCR \"{}\" \"$fileExtension\" \"$ocrEngineArgs\" \"$csvHack\"" >> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP"
484 | 
485 | 	touch "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP"
486 | 	if [ -f "$EVENT_LOG_FILE" ]; then
487 | 		Logger "OCR dispatch launched via poller result method." "DEBUG"
488 | 	else
489 | 		Logger "OCR dispatch launched via find method." "DEBUG"
490 | 	fi
491 | 
492 | 	while IFS= read -r -d $'\0' file; do
493 | 		[ "$file" == "./" ] && continue
494 | 		if [ "$CHECK_PDF" == true ] && [ $(pdffonts "$file" 2> /dev/null | wc -l) -ge 3 ]; then
495 | 			Logger "Skipping file [$file] already containing text." "VERBOSE"
496 | 			continue
497 | 		fi
498 | 
499 | 		# Check if file is currently being written to (mitigates slow transfer files being processed before transfer is finished)
500 | 		if ! lsof -f -- "$file" > /dev/null 2>&1; then
501 | 			if [ "$_BATCH_RUN" == true ]; then
502 | 				Logger "Preparing to process [$file]." "NOTICE"
503 | 			fi
504 | 			echo "OCR \"$file\" \"$fileExtension\" \"$ocrEngineArgs\" \"$csvHack\"" >> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP"
505 | 		else
506 | 			if [ "$_BATCH_RUN" == true ]; then
507 | 				Logger "Cannot process file [$file] currently in use." "ALWAYS"
508 | 			else
509 | 				Logger "Deferring file [$file] currently being written to." "ALWAYS"
510 | 				kill -USR1 $SCRIPT_PID
511 | 			fi
512 | 		fi
513 | 	# if InotifyWaitPoller result file exists, prefer it to find directive
514 | 	# Fallback to full file traversal if no file exists
515 | 	done < <([ -f "$EVENT_LOG_FILE" ] && cat "$EVENT_LOG_FILE" && rm -f "$EVENT_LOG_FILE" || find "$directoryToProcess" -type f -iregex ".*\.$FILES_TO_PROCESS" ! -regex ".*\.__pmOCR_preprocessed_\..*" ! -name "$findExcludes" -and ! -wholename "$moveSuccessExclude" -and ! -wholename "$moveFailureExclude" -and ! -name "$failedFindExcludes" -print0)
516 | 
517 | 	ExecTasks "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" "${FUNCNAME[0]}" true 0 0 3600 0 true .05 $KEEP_LOGGING false false false $NUMBER_OF_PROCESSES
518 | 	retval=$?
519 | 	if [ $retval -ne 0 ]; then
520 | 		Logger "Failed OCR_Dispatch run." "ERROR"
521 | 	fi
522 | 	[ -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" ] && rm -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" > /dev/null 2>&1
523 | 	return $retval
524 | }
525 | 
526 | # Run OCR_Dispatch once, if a new request comes when a run is active, run it again once
527 | function DispatchRunner {
528 | 	if [ $DISPATCH_NEEDED -lt 2 ]; then
529 | 		DISPATCH_NEEDED=$((DISPATCH_NEEDED+1))
530 | 	fi
531 | 
532 | 	while [ $DISPATCH_NEEDED -gt 0 ] && [ $DISPATCH_RUNS == false ]; do
533 | 		DISPATCH_RUNS=true
534 | 		if [ "$PDF_MONITOR_DIR" != "" ]; then
535 | 			OCR_Dispatch "$PDF_MONITOR_DIR" "$PDF_EXTENSION" "$PDF_OCR_ENGINE_ARGS" false
536 | 		fi
537 | 
538 | 		if [ "$WORD_MONITOR_DIR" != "" ]; then
539 | 			OCR_Dispatch "$WORD_MONITOR_DIR" "$WORD_EXTENSION" "$WORD_OCR_ENGINE_ARGS" false
540 | 		fi
541 | 
542 | 		if [ "$EXCEL_MONITOR_DIR" != "" ]; then
543 | 			OCR_Dispatch "$EXCEL_MONITOR_DIR" "$EXCEL_EXTENSION" "$EXCEL_OCR_ENGINE_ARGS" false
544 | 		fi
545 | 
546 | 		if [ "$TEXT_MONITOR_DIR" != "" ]; then
547 | 			OCR_Dispatch "$TEXT_MONITOR_DIR" "$TEXT_EXTENSION" "$TEXT_OCR_ENGINE_ARGS" false
548 | 		fi
549 | 
550 | 		if [ "$CSV_MONITOR_DIR" != "" ]; then
551 | 			OCR_Dispatch "$CSV_MONITOR_DIR" "$CSV_EXTENSION" "$CSV_OCR_ENGINE_ARGS" true
552 | 		fi
553 | 		DISPATCH_NEEDED=$((DISPATCH_NEEDED-1))
554 | 		DISPATCH_RUNS=false
555 | 	done
556 | }
557 | 
558 | function OCR_service {
559 | 	## Function arguments
560 | 	local directoryToProcess="${1}" 	#(contains some path)
561 | 	local fileExtension="${2}" 		#(filename endings to exclude from processing)
562 | 
563 | 	__CheckArguments 2 $# "$@"		#__WITH_PARANOIA_DEBUG
564 | 
565 | 	local cmd
566 | 	local dirAvailable=true
567 | 	local justStarted=true
568 | 	local moveSuccessExclude
569 | 	local moveFailureExclude
570 | 
571 | 	if [ -d "$MOVE_ORIGINAL_ON_SUCCESS" ]; then
572 | 		moveSuccessExclude="--exclude \"$MOVE_ORIGINAL_ON_SUCCESS\""
573 | 		moveSuccessExcludePoller="$MOVE_ORIGINAL_ON_SUCCESS/.*"
574 | 	fi
575 | 
576 | 	if [ -d "$MOVE_ORIGINAL_ON_FAILURE" ]; then
577 | 		moveFailureExclude="--exclude \"$MOVE_ORIGINAL_ON_FAILURE\""
578 | 		moveFailureExcludePoller="$MOVE_ORIGINAL_ON_FAILURE/.*"
579 | 	fi
580 | 
581 | 	Logger "Starting $PROGRAM instance [$INSTANCE_ID] for directory [$directoryToProcess], converting to [$fileExtension]." "ALWAYS"
582 | 	while [ -f "$SERVICE_MONITOR_FILE" ];do
583 | 		# Have a first run on start
584 | 		while [ ! -w "$directoryToProcess" ]; do
585 | 			Logger "Directory [$directoryToProcess] is not writable. Trying again in an hour." "ERROR"
586 | 			sleep 3600
587 | 			dirAvailable=false
588 | 		done
589 | 
590 | 		if [ "$dirAvailable" == false ]; then
591 | 			Logger "Directory [$directoryToProcess] is available again. Resuming monitoring." "ERROR"
592 | 			dirAvailable=true
593 | 		fi
594 | 
595 | 		if [ $justStarted == true ]; then
596 | 			kill -USR1 $SCRIPT_PID
597 | 			justStarted=false
598 | 		fi
599 | 		Logger "Looking for changes in [$directoryToProcess]" "NOTICE"
600 | 		# If file modifications occur, send a signal so DispatchRunner is run
601 | 		if [ "$USE_INOTIFYWAIT" == true ]; then
602 | 			cmd="inotifywait --exclude \"(.*)$FILENAME_SUFFIX$fileExtension\" --exclude \"(.*)$FAILED_FILENAME_SUFFIX$fileExtension\" $moveSuccessExclude $moveFailureExclude  -qq -r -e create,move \"$directoryToProcess\" --timeout $MAX_WAIT"
603 | 			eval $cmd
604 | 		else
605 | 			Logger "Running InotifyWaitPoller process" "VERBOSE"
606 | 			# InotifyWaitPoller paths includes excludes recursive monitor_mode event_log_file events timeout
607 | 			InotifyWaitPoller "$directoryToProcess" ".*\.$FILES_TO_PROCESS" ".*$FILENAME_SUFFIX$fileExtension;.*$FAILED_FILENAME_SUFFIX;.*\.__pmOCR_preprocessed_\..*;$fileExtension;$moveSuccessExcludePoller;$moveFailureExcludePoller" true false "$EVENT_LOG_FILE" "CREATE,MODIFY,MOVED_TO" $MAX_WAIT $INOTIFY_POLLER_INTERVAL
608 | 		fi
609 | 		Logger "Changes detected in [$directoryToProcess]" "NOTICE"
610 | 		kill -USR1 $SCRIPT_PID
611 | 		# Update SERVICE_MONITOR_FILE to prevent automatic old file cleanup in /tmp directory (happens in RHEL 6/7)
612 | 		echo "$SCRIPT_PID" > "$SERVICE_MONITOR_FILE"
613 | 	done
614 | }
615 | 
616 | function Usage {
617 | 	echo ""
618 | 	echo "$PROGRAM $PROGRAM_VERSION $PROGRAM_BUILD"
619 | 	echo "$AUTHOR"
620 | 	echo "$CONTACT"
621 | 	echo ""
622 | 	echo "You may adjust file default config in /etc/pmocr/default.conf according to your OCR needs (language, ocr engine, etc)."
623 | 	echo ""
624 | 	echo "$PROGRAM can be launched as a directory monitoring service using \"service $PROGRAM-srv start\" or \"systemctl start $PROGRAM-srv\" or in batch processing mode"
625 | 	echo "Batch mode usage:"
626 | 	echo "$PROGRAM.sh --batch [options] /path/to/folder"
627 | 	echo ""
628 | 	echo "[OPTIONS]"
629 | 	echo "--config=/path/to/config  Use an alternative OCR config file."
630 | 	echo "-p, --target=PDF          Creates a PDF document (default)"
631 | 	echo "-w, --target=DOCX         Creates a WORD document"
632 | 	echo "-e, --target=XLSX         Creates an EXCEL document"
633 | 	echo "-t, --target=TXT         Creates a text file"
634 | 	echo "-c, --target=CSV          Creates a CSV file"
635 | 	echo "(multiple targets can be set)"
636 | 	echo ""
637 | 	echo "-k, --skip-txt-pdf        Skips PDF files already containing indexable text"
638 | 	echo "-d, --delete-input        Deletes input file after processing ( preventing them to be processed again)"
639 | 	echo "--suffix=...              Adds a given suffix to the output filename (in order to not process them again, ex: pdf to pdf conversion)."
640 | 	echo "                          By default, the suffix is '_OCR'"
641 | 	echo "--no-suffix               Won't add any suffix to the output filename"
642 | 	echo "--failed-suffix=...       Adds a given suffix to failed files (in order not to process them again. Defaults to '_OCR_ERR'"
643 | 	echo "--no-failed-suffix        Won't add any suffix to failed conversion filenames"
644 | 	echo "--text=...                Adds a given text / variable to the output filename (ex: --text='$(date +%Y)')."
645 | 	echo "                          By default, the text is the conversion date in pseudo ISO format."
646 | 	echo "--no-text                 Won't add any text to the output filename"
647 | 	echo "-s, --silent              Will not output anything to stdout except errors"
648 | 	echo "-v, --verbose             Verbose output"
649 | 	echo "--service                 Run as service"
650 | 	echo ""
651 | 	exit 128
652 | }
653 | 
654 | #### SCRIPT ENTRY POINT ####
655 | 
656 | trap TrapQuit EXIT
657 | 
658 | _SILENT=false
659 | skip_txt_pdf=false
660 | delete_input=false
661 | suffix=""
662 | no_suffix=false
663 | failed_suffix=""
664 | no_failed_suffix=false
665 | no_text=false
666 | _BATCH_RUN=fase
667 | _SERVICE_RUN=false
668 | 
669 | pdf=false
670 | docx=false
671 | xlsx=false
672 | txt=false
673 | csv=false
674 | 
675 | INOTIFY_POLLER_INTERVAL=30
676 | 
677 | function GetCommandlineArguments {
678 | 	for i in "$@"
679 | 	do
680 | 		case "$i" in
681 | 			--config=*)
682 | 			CONFIG_FILE="${i##*=}"
683 | 			;;
684 | 			--batch)
685 | 			_BATCH_RUN=true
686 | 			;;
687 | 			--service)
688 | 			_SERVICE_RUN=true
689 | 			;;
690 | 			--silent|-s)
691 | 			_SILENT=true
692 | 			;;
693 | 			--verbose|-v)
694 | 			_LOGGER_VERBOSE=true
695 | 			;;
696 | 			-p|--target=PDF|--target=pdf)
697 | 			pdf=true
698 | 			;;
699 | 			-w|--target=DOCX|--target=docx)
700 | 			docx=true
701 | 			;;
702 | 			-e|--target=XLSX|--target=xlsx)
703 | 			xlsx=true
704 | 			;;
705 | 			-t|--target=TXT|--target=txt)
706 | 			txt=true
707 | 			;;
708 | 			-c|--target=CSV|--target=csv)
709 | 			csv=true
710 | 			;;
711 | 			-k|--skip-txt-pdf)
712 | 			skip_txt_pdf=true
713 | 			;;
714 | 			-d|--delete-input)
715 | 			delete_input=true
716 | 			;;
717 | 			--suffix=*)
718 | 			suffix="${i##*=}"
719 | 			;;
720 | 			--no-suffix)
721 | 			no_suffix=true
722 | 			;;
723 | 			--suffix=*)
724 | 			failed_suffix="${i##*=}"
725 | 			;;
726 | 			--no-failed-suffix)
727 | 			no_failed_suffix=true
728 | 			;;
729 | 			--text=*)
730 | 			text="${i##*=}"
731 | 			;;
732 | 			--no-text)
733 | 			no_text=true
734 | 			;;
735 | 			--help|-h|--version|-v|-?)
736 | 			Usage
737 | 			;;
738 | 		esac
739 | 	done
740 | }
741 | 
742 | GetCommandlineArguments "${@}"
743 | 
744 | if [ "$CONFIG_FILE" != "" ]; then
745 | 	LoadConfigFile "$CONFIG_FILE" $CONFIG_FILE_REVISION_REQUIRED
746 | else
747 | 	LoadConfigFile "$DEFAULT_CONFIG_FILE" $CONFIG_FILE_REVISION_REQUIRED
748 | fi
749 | 
750 | # Keep compat with earlier typo in config file
751 | if [ "$FILES_TO_PROCESS" == "" ] && [ "$FILES_TO_PROCES" != "" ]; then
752 | 	FILES_TO_PROCESS="$FILES_TO_PROCES"
753 | fi
754 | 
755 | # Reload GetCommandlineArguments in order to allow override config values with runtime arguments
756 | GetCommandlineArguments "${@}"
757 | 
758 | UpdateBooleans
759 | SetOCREngineOptions
760 | 
761 | if [ "$LOGFILE" == "" ]; then
762 |         if [ -w /var/log ]; then
763 |                 LOG_FILE="/var/log/$PROGRAM.$INSTANCE_ID.log"
764 |         elif ([ "$HOME" != "" ] && [ -w "$HOME" ]); then
765 |                 LOG_FILE="$HOME/$PROGRAM.$INSTANCE_ID.log"
766 |         else
767 |                 LOG_FILE="./$PROGRAM.$INSTANCE_ID.log"
768 |         fi
769 | else
770 |         LOG_FILE="$LOGFILE"
771 | fi
772 | if [ ! -w "$(dirname "$LOG_FILE")" ]; then
773 |         echo "Cannot write to log [$(dirname "$LOG_FILE")]."
774 | else
775 |         Logger "Script begin, logging to [$LOG_FILE]." "DEBUG"
776 | fi
777 | 
778 | # This file must not be cleaned with CleanUp function, hence it's naming scheme is different
779 | SERVICE_MONITOR_FILE="$RUN_DIR/$PROGRAM.$INSTANCE_ID.$SCRIPT_PID.$TSTAMP.SERVICE-MONITOR.run"
780 | 
781 | # Set default conversion format
782 | if [ $pdf == false ] && [ $docx == false ] && [ $xlsx == false ] && [ $txt == false ] && [ $csv == false ]; then
783 | 	pdf=true
784 | fi
785 | 
786 | # Add default values
787 | if [ "$FILENAME_SUFFIX" == "" ]; then
788 | 	FILENAME_SUFFIX="_OCR"
789 | fi
790 | if [ "$FAILED_FILENAME_SUFFIX" == "" ]; then
791 | 	FAILED_FILENAME_SUFFIX="_OCR_ERR"
792 | fi
793 | 
794 | # Commandline arguments override default config
795 | if [ $_BATCH_RUN == true ]; then
796 | 	if [ $skip_txt_pdf == true ]; then
797 | 		CHECK_PDF=true
798 | 	fi
799 | 
800 | 	if [ $no_suffix == true ]; then
801 | 		FILENAME_SUFFIX=""
802 | 	fi
803 | 
804 | 	if  [ "$suffix" != "" ]; then
805 | 		FILENAME_SUFFIX="$suffix"
806 | 	fi
807 | 
808 | 	if [ $no_failed_suffix == true ]; then
809 | 		FAILED_FILENAME_SUFFIX=""
810 | 	fi
811 | 
812 | 	if  [ "$failed_suffix" != "" ]; then
813 | 		FAILED_FILENAME_SUFFIX="$failed_suffix"
814 | 	fi
815 | 
816 | 	if [ "$text" != "" ]; then
817 | 		FILENAME_ADDITION="$text"
818 | 	fi
819 | 
820 | 	if [ $no_text == true ]; then
821 | 		FILENAME_ADDITION=""
822 | 	fi
823 | 
824 | 	if [ $delete_input == true ]; then
825 | 		DELETE_ORIGINAL=true
826 | 	fi
827 | fi
828 | 
829 | CheckEnvironment
830 | 
831 | if [ $_SERVICE_RUN == true ]; then
832 | 	trap DispatchRunner USR1
833 | 	trap TrapQuit TERM EXIT HUP QUIT
834 | 
835 | 	EVENT_LOG_FILE="$RUN_DIR/$PROGRAM.eventLog.$SCRIPT_PID.$TSTAMP"
836 | 	echo "$SCRIPT_PID" > "$SERVICE_MONITOR_FILE"
837 | 	if [ $? != 0 ]; then
838 | 		Logger "Cannot write service file [$SERVICE_MONITOR_FILE]." "CRITICAL"
839 | 		exit 1
840 | 	fi
841 | 
842 | 	if [ $_LOGGER_VERBOSE == false ]; then
843 | 		_LOGGER_ERR_ONLY=true
844 | 	fi
845 | 
846 | 	# Global variable for DispatchRunner function
847 | 	DISPATCH_NEEDED=0
848 | 	DISPATCH_RUNS=false
849 | 
850 | 	Logger "Service $PROGRAM instance [$INSTANCE_ID] pid [$$] started as [$LOCAL_USER] on [$LOCAL_HOST] using $OCR_ENGINE." "ALWAYS"
851 | 
852 | 	if [ "$PDF_MONITOR_DIR" != "" ]; then
853 | 		OCR_service "$PDF_MONITOR_DIR" "$PDF_EXTENSION" &
854 | 	fi
855 | 
856 | 	if [ "$WORD_MONITOR_DIR" != "" ]; then
857 | 		OCR_service "$WORD_MONITOR_DIR" "$WORD_EXTENSION" &
858 | 	fi
859 | 
860 | 	if [ "$EXCEL_MONITOR_DIR" != "" ]; then
861 | 		OCR_service "$EXCEL_MONITOR_DIR" "$EXCEL_EXTENSION" &
862 | 	fi
863 | 
864 | 	if [ "$TEXT_MONITOR_DIR" != "" ]; then
865 | 		OCR_service "$TEXT_MONITOR_DIR" "$TEXT_EXTENSION" &
866 | 	fi
867 | 
868 | 	if [ "$CSV_MONITOR_DIR" != "" ]; then
869 | 		OCR_service "$CSV_MONITOR_DIR" "$CSV_EXTENSION" &
870 | 	fi
871 | 
872 | 	# Keep running until trap function quits
873 | 	while true
874 | 	do
875 | 		# Keep low value so main script will execute USR1 trapped function
876 | 		sleep 1
877 | 	done
878 | 
879 | elif [ $_BATCH_RUN == true ]; then
880 | 
881 | 	# Get last argument that should be a path
882 | 	batchPath="${@: -1}"
883 | 	if [ ! -d "$batchPath" ]; then
884 | 		Logger "Missing path." "ERROR"
885 | 		Usage
886 | 	fi
887 | 
888 | 	Logger "Running $PROGRAM $PROGRAM_VERSION as batch" "NOTICE"
889 | 
890 | 	if [ $pdf == true ]; then
891 | 		if [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then
892 | 			result=$(VerComp "$TESSERACT_VERSION" "3.02")
893 |                 	if [ $result -eq 2 ] || [ $result -eq 0 ]; then
894 |                         	Logger "Tesseract version $TESSERACT_VERSION is not supported to create searchable PDFs. Please use 3.03 or better." "CRITICAL"
895 |                         	exit 1
896 |                 	fi
897 | 		fi
898 | 
899 | 		Logger "Beginning PDF OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE"
900 | 		OCR_Dispatch "$batchPath" "$PDF_EXTENSION" "$PDF_OCR_ENGINE_ARGS" false
901 | 		Logger "Batch ended." "NOTICE"
902 | 	fi
903 | 
904 | 	if [ $docx == true ]; then
905 | 		Logger "Beginning DOCX OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE"
906 | 		OCR_Dispatch "$batchPath" "$WORD_EXTENSION" "$WORD_OCR_ENGINE_ARGS" false
907 | 		Logger "Batch ended." "NOTICE"
908 | 	fi
909 | 
910 | 	if [ $xlsx == true ]; then
911 | 		Logger "Beginning XLSX OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE"
912 | 		OCR_Dispatch "$batchPath" "$EXCEL_EXTENSION" "$EXCEL_OCR_ENGINE_ARGS" false
913 | 		Logger "batch ended." "NOTICE"
914 | 	fi
915 | 
916 | 	if [ $txt == true ]; then
917 | 		Logger "Beginning TEXT OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE"
918 | 		OCR_Dispatch "$batchPath" "$TEXT_EXTENSION" "$TEXT_OCR_ENGINE_ARGS" false
919 | 		Logger "batch ended." "NOTICE"
920 | 	fi
921 | 
922 | 	if [ $csv == true ]; then
923 | 		Logger "Beginning CSV OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE"
924 | 		OCR_Dispatch "$batchPath" "$CSV_EXTENSION" "$CSV_OCR_ENGINE_ARGS" true
925 | 		Logger "Batch ended." "NOTICE"
926 | 	fi
927 | 
928 | else
929 | 	Logger "$PROGRAM must be run as a system service (using service file or --service argument) or in batch mode with --batch parameter." "ERROR"
930 | 	Usage
931 | fi
932 | 


--------------------------------------------------------------------------------