├── dev ├── tests │ ├── source │ │ ├── lorem_pdf.pdf │ │ ├── lorem_png.png │ │ ├── lorem_tif.tif │ │ └── lorem_searchable_pdf.pdf │ ├── shunit2 │ │ ├── shunit2_standalone_test.sh │ │ ├── lib │ │ │ ├── shlib │ │ │ └── versions │ │ ├── gen_test_report.sh │ │ ├── shunit2_failures_test.sh │ │ ├── CODE_OF_CONDUCT.md │ │ ├── test_runner │ │ ├── shunit2_asserts_test.sh │ │ ├── shunit2_test_helpers │ │ ├── shunit2_macros_test.sh │ │ ├── shunit2_misc_test.sh │ │ ├── LICENSE │ │ └── README.md │ ├── conf │ │ ├── default.conf │ │ └── service.conf │ └── run_tests.sh ├── bootstrap.sh ├── merge.sh ├── common_install.sh └── n_pmocr.sh ├── .travis.yml ├── pmocr-srv@.service ├── LICENCE.TXT ├── pmocr-srv ├── README.md ├── CHANGELOG.md └── default.conf /dev/tests/source/lorem_pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_pdf.pdf -------------------------------------------------------------------------------- /dev/tests/source/lorem_png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_png.png -------------------------------------------------------------------------------- /dev/tests/source/lorem_tif.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_tif.tif -------------------------------------------------------------------------------- /dev/tests/source/lorem_searchable_pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deajan/pmOCR/HEAD/dev/tests/source/lorem_searchable_pdf.pdf -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: 2 | bash 3 | 4 | os: 5 | linux 6 | osx 7 | 8 | before_install: 9 | - sudo apt-get update -y 10 | - sudo apt-get install tesseract-ocr tesseract-ocr-osd tesseract-ocr-eng ghostscript inotify-tools poppler-utils -y 11 | 12 | script: 13 | TRAVIS_RUN=true dev/tests/run_tests.sh 14 | 15 | -------------------------------------------------------------------------------- /pmocr-srv@.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=pmocr - monitors a local directory and gives any new file to your favorite OCR engine 3 | After=local-fs.target 4 | Requires=local-fs.target 5 | 6 | [Service] 7 | Type=simple 8 | ExecStart=/usr/local/bin/pmocr.sh --service --config=/etc/pmocr/%i 9 | SuccessExitStatus=0 2 10 | 11 | [Install] 12 | WantedBy=multi-user.target 13 | -------------------------------------------------------------------------------- /dev/tests/shunit2/shunit2_standalone_test.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # shUnit2 unit test for standalone operation. 5 | # 6 | # Copyright 2010-2017 Kate Ward. All Rights Reserved. 7 | # Released under the Apache 2.0 license. 8 | # 9 | # Author: kate.ward@forestent.com (Kate Ward) 10 | # https://github.com/kward/shunit2 11 | # 12 | # This unit test is purely to test that calling shunit2 directly, while passing 13 | # the name of a unit test script, works. When run, this script determines if it 14 | # is running as a standalone program, and calls main() if it is. 15 | # 16 | ### ShellCheck http://www.shellcheck.net/ 17 | # $() are not fully portable (POSIX != portable). 18 | # shellcheck disable=SC2006 19 | # Disable source following. 20 | # shellcheck disable=SC1090,SC1091 21 | 22 | ARGV0="`basename "$0"`" 23 | 24 | # Load test helpers. 25 | . ./shunit2_test_helpers 26 | 27 | testStandalone() { 28 | assertTrue "${SHUNIT_TRUE}" 29 | } 30 | 31 | main() { 32 | ${TH_SHUNIT} "${ARGV0}" 33 | } 34 | 35 | # Are we running as a standalone? 36 | if [ "${ARGV0}" = 'shunit2_test_standalone.sh' ]; then 37 | if [ $# -gt 0 ]; then main "$@"; else main; fi 38 | fi 39 | -------------------------------------------------------------------------------- /dev/tests/shunit2/lib/shlib: -------------------------------------------------------------------------------- 1 | # vim:et:ft=sh:sts=2:sw=2 2 | # 3 | # Copyright 2008 Kate Ward. All Rights Reserved. 4 | # Released under the LGPL (GNU Lesser General Public License). 5 | # 6 | # Author: kate.ward@forestent.com (Kate Ward) 7 | # 8 | # Library of shell functions. 9 | 10 | # Convert a relative path into it's absolute equivalent. 11 | # 12 | # This function will automatically prepend the current working directory if the 13 | # path is not already absolute. It then removes all parent references (../) to 14 | # reconstruct the proper absolute path. 15 | # 16 | # Args: 17 | # shlib_path_: string: relative path 18 | # Outputs: 19 | # string: absolute path 20 | shlib_relToAbsPath() 21 | { 22 | shlib_path_=$1 23 | 24 | # prepend current directory to relative paths 25 | echo "${shlib_path_}" |grep '^/' >/dev/null 2>&1 \ 26 | || shlib_path_="${PWD}/${shlib_path_}" 27 | 28 | # clean up the path. if all seds supported true regular expressions, then 29 | # this is what it would be: 30 | shlib_old_=${shlib_path_} 31 | while true; do 32 | shlib_new_=`echo "${shlib_old_}" |sed 's/[^/]*\/\.\.\/*//;s/\/\.\//\//'` 33 | [ "${shlib_old_}" = "${shlib_new_}" ] && break 34 | shlib_old_=${shlib_new_} 35 | done 36 | echo "${shlib_new_}" 37 | 38 | unset shlib_path_ shlib_old_ shlib_new_ 39 | } 40 | -------------------------------------------------------------------------------- /LICENCE.TXT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2016, Orsiris de Jong. ozy@netpower.fr 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the author nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /dev/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ## dev pre-processor bootstrap rev 2019052001 4 | ## Yeah !!! A really tech sounding name... In fact it's just include emulation in bash 5 | 6 | function Usage { 7 | echo "$0 - Quick and dirty preprocessor for including ofunctions into programs" 8 | echo "Creates and executes $0.tmp.sh" 9 | echo "Usage:" 10 | echo "" 11 | echo "$0 --program=osync|obackup|pmocr [options to pass to program]" 12 | echo "Can also be run with BASHVERBOSE=yes environment variable in order to prefix program with bash -x" 13 | } 14 | 15 | 16 | if [ ! -f "./merge.sh" ]; then 17 | echo "Plrase run bootstrap.sh from osync/dev directory." 18 | exit 1 19 | fi 20 | 21 | bootstrapProgram="" 22 | opts=() 23 | outputFileName="$0" 24 | 25 | for i in "${@}"; do 26 | case "$i" in 27 | --program=*) 28 | bootstrapProgram="${i##*=}" 29 | ;; 30 | *) 31 | opts+=("$i") 32 | ;; 33 | esac 34 | done 35 | 36 | if [ "$bootstrapProgram" == "" ]; then 37 | Usage 38 | exit 128 39 | else 40 | source "merge.sh" 41 | 42 | __PREPROCESSOR_PROGRAM=$bootstrapProgram 43 | __PREPROCESSOR_PROGRAM_EXEC="n_$bootstrapProgram.sh" 44 | __PREPROCESSOR_Constants 45 | 46 | if [ ! -f "$__PREPROCESSOR_PROGRAM_EXEC" ]; then 47 | echo "Cannot find file $__PREPROCESSOR_PROGRAM executable [n_$bootstrapProgram.sh]." 48 | exit 1 49 | fi 50 | fi 51 | 52 | cp "$__PREPROCESSOR_PROGRAM_EXEC" "$outputFileName.tmp.sh" 53 | if [ $? != 0 ]; then 54 | echo "Cannot copy original file [$__PREPROCESSOR_PROGRAM_EXEC] to [$outputFileName.tmp.sh]." 55 | exit 1 56 | fi 57 | for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do 58 | __PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "$outputFileName.tmp.sh" 59 | done 60 | chmod +x "$outputFileName.tmp.sh" 61 | if [ $? != 0 ]; then 62 | echo "Cannot make [$outputFileName] executable." 63 | exit 1 64 | fi 65 | 66 | # Termux fix 67 | if type termux-fix-shebang > /dev/null 2>&1; then 68 | termux-fix-shebang "$outputFileName.tmp.sh" 69 | fi 70 | 71 | if [ "$BASHVERBOSE" == "yes" ]; then 72 | bash -x "$outputFileName.tmp.sh" "${opts[@]}" 73 | else 74 | "$outputFileName.tmp.sh" "${opts[@]}" 75 | fi 76 | -------------------------------------------------------------------------------- /dev/tests/shunit2/gen_test_report.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # This script runs the provided unit tests and sends the output to the 5 | # appropriate file. 6 | # 7 | # Copyright 2008-2017 Kate Ward. All Rights Reserved. 8 | # Released under the Apache 2.0 license. 9 | # 10 | # Author: kate.ward@forestent.com (Kate Ward) 11 | # https://github.com/kward/shunit2 12 | # 13 | # Source following. 14 | # shellcheck disable=SC1090,SC1091 15 | # FLAGS variables are dynamically created. 16 | # shellcheck disable=SC2154 17 | # Disagree with [ p ] && [ q ] vs [ p -a -q ] recommendation. 18 | # shellcheck disable=SC2166 19 | 20 | # Treat unset variables as an error. 21 | set -u 22 | 23 | die() { 24 | [ $# -gt 0 ] && echo "error: $*" >&2 25 | exit 1 26 | } 27 | 28 | BASE_DIR=$(dirname "$0") 29 | LIB_DIR="${BASE_DIR}/lib" 30 | 31 | ### Load libraries. 32 | . "${LIB_DIR}/shflags" || die 'unable to load shflags library' 33 | . "${LIB_DIR}/shlib" || die 'unable to load shlib library' 34 | . "${LIB_DIR}/versions" || die 'unable to load versions library' 35 | 36 | # Redefining BASE_DIR now that we have the shlib functions. We need BASE_DIR so 37 | # that we can properly load things, even in the event that this script is called 38 | # from a different directory. 39 | BASE_DIR=$(shlib_relToAbsPath "${BASE_DIR}") 40 | 41 | # Define flags. 42 | os_name=$(versions_osName |sed 's/ /_/g') 43 | os_version=$(versions_osVersion) 44 | 45 | DEFINE_boolean force false 'force overwrite' f 46 | DEFINE_string output_dir "${TMPDIR}" 'output dir' d 47 | DEFINE_string output_file "${os_name}-${os_version}.txt" 'output file' o 48 | DEFINE_string runner 'test_runner' 'unit test runner' r 49 | DEFINE_boolean dry_run false "suppress logging to a file" n 50 | 51 | main() { 52 | # Determine output filename. 53 | # shellcheck disable=SC2154 54 | output="${FLAGS_output_dir:+${FLAGS_output_dir}/}${FLAGS_output_file}" 55 | output=$(shlib_relToAbsPath "${output}") 56 | 57 | # Checks. 58 | if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" -a -f "${output}" ]; then 59 | if [ "${FLAGS_force}" -eq "${FLAGS_TRUE}" ]; then 60 | rm -f "${output}" 61 | else 62 | echo "not overwriting '${output}'" >&2 63 | exit "${FLAGS_ERROR}" 64 | fi 65 | fi 66 | if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" ]; then 67 | touch "${output}" 2>/dev/null || die "unable to write to '${output}'" 68 | fi 69 | 70 | # Run tests. 71 | ( 72 | if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" ]; then 73 | "./${FLAGS_runner}" |tee "${output}" 74 | else 75 | "./${FLAGS_runner}" 76 | fi 77 | ) 78 | 79 | if [ "${FLAGS_dry_run}" -eq "${FLAGS_FALSE}" ]; then 80 | echo >&2 81 | echo "Output written to '${output}'." >&2 82 | fi 83 | } 84 | 85 | FLAGS "$@" || exit $? 86 | [ "${FLAGS_help}" -eq "${FLAGS_FALSE}" ] || exit 87 | eval set -- "${FLAGS_ARGV}" 88 | main "${@:-}" 89 | -------------------------------------------------------------------------------- /dev/tests/shunit2/shunit2_failures_test.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # shUnit2 unit test for failure functions 5 | # 6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved. 7 | # Released under the LGPL (GNU Lesser General Public License) 8 | # 9 | # Author: kate.ward@forestent.com (Kate Ward) 10 | # https://github.com/kward/shunit2 11 | # 12 | # Disable source following. 13 | # shellcheck disable=SC1090,SC1091 14 | 15 | # These variables will be overridden by the test helpers. 16 | stdoutF="${TMPDIR:-/tmp}/STDOUT" 17 | stderrF="${TMPDIR:-/tmp}/STDERR" 18 | 19 | # Load test helpers. 20 | . ./shunit2_test_helpers 21 | 22 | testFail() { 23 | ( fail >"${stdoutF}" 2>"${stderrF}" ) 24 | th_assertFalseWithOutput 'fail' $? "${stdoutF}" "${stderrF}" 25 | 26 | ( fail "${MSG}" >"${stdoutF}" 2>"${stderrF}" ) 27 | th_assertFalseWithOutput 'fail with msg' $? "${stdoutF}" "${stderrF}" 28 | 29 | ( fail arg1 >"${stdoutF}" 2>"${stderrF}" ) 30 | th_assertFalseWithOutput 'too many arguments' $? "${stdoutF}" "${stderrF}" 31 | } 32 | 33 | testFailNotEquals() { 34 | ( failNotEquals 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 35 | th_assertFalseWithOutput 'same' $? "${stdoutF}" "${stderrF}" 36 | 37 | ( failNotEquals "${MSG}" 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 38 | th_assertFalseWithOutput 'same with msg' $? "${stdoutF}" "${stderrF}" 39 | 40 | ( failNotEquals 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 41 | th_assertFalseWithOutput 'not same' $? "${stdoutF}" "${stderrF}" 42 | 43 | ( failNotEquals '' '' >"${stdoutF}" 2>"${stderrF}" ) 44 | th_assertFalseWithOutput 'null values' $? "${stdoutF}" "${stderrF}" 45 | 46 | ( failNotEquals >"${stdoutF}" 2>"${stderrF}" ) 47 | th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}" 48 | 49 | ( failNotEquals arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" ) 50 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 51 | } 52 | 53 | testFailSame() { 54 | ( failSame 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 55 | th_assertFalseWithOutput 'same' $? "${stdoutF}" "${stderrF}" 56 | 57 | ( failSame "${MSG}" 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 58 | th_assertFalseWithOutput 'same with msg' $? "${stdoutF}" "${stderrF}" 59 | 60 | ( failSame 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 61 | th_assertFalseWithOutput 'not same' $? "${stdoutF}" "${stderrF}" 62 | 63 | ( failSame '' '' >"${stdoutF}" 2>"${stderrF}" ) 64 | th_assertFalseWithOutput 'null values' $? "${stdoutF}" "${stderrF}" 65 | 66 | ( failSame >"${stdoutF}" 2>"${stderrF}" ) 67 | th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}" 68 | 69 | ( failSame arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" ) 70 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 71 | } 72 | 73 | oneTimeSetUp() { 74 | th_oneTimeSetUp 75 | 76 | MSG='This is a test message' 77 | } 78 | 79 | # Load and run shUnit2. 80 | # shellcheck disable=SC2034 81 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT=$0 82 | . "${TH_SHUNIT}" 83 | -------------------------------------------------------------------------------- /dev/tests/shunit2/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at kate.ward@forestent.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /pmocr-srv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Automatic OCR Service 4 | # 5 | # chkconfig: 35 55 25 6 | # description: monitors a local directory and gives any new file to your favorite OCR engine 7 | # processname: /usr/local/bin/pmocr.sh 8 | # pidfile: /var/run/pmocr 9 | 10 | ### BEGIN INIT INFO 11 | # Provides: pmocr-srv 12 | # Required-Start: $local_fs $time 13 | # Required-Stop: $local_fs $time 14 | # Default-Start: 2 3 4 5 15 | # Default-Stop: 0 1 6 16 | # Short-Description: pmocr daemon 17 | # Description: OCR wrapper service 18 | ### END INIT INFO 19 | 20 | prog=pmocr 21 | progexec=pmocr.sh 22 | progpath=/usr/local/bin 23 | confdir=/etc/$prog 24 | pidfile=/var/run/$prog 25 | SCRIPT_BUILD=2018122101 26 | 27 | if [ ! -f $progpath/$progexec ] && [ ! -f $progexec ] 28 | then 29 | echo "Cannot find $prog executable in $progpath nor in local path." 30 | exit 1 31 | fi 32 | 33 | if [ ! -w $(dirname $pidfile) ] 34 | then 35 | pidfile=./$prog 36 | fi 37 | 38 | start() { 39 | if ! ls "$confdir/"*.conf > /dev/null 2>&1; then 40 | echo "Cannot find any configuration files in $confdir." 41 | exit 1 42 | fi 43 | 44 | errno=0 45 | 46 | for cfgfile in "$confdir"/*.conf 47 | do 48 | if [ -f $progpath/$progexec ] 49 | then 50 | $progpath/$progexec --config=$cfgfile --service > /dev/null 2>&1 & 51 | elif [ -f ./$progexec ] 52 | then 53 | ./$progexec --config=$cfgfile --service > /dev/null 2>&1 & 54 | else 55 | echo "Cannot find $prog executable in $progpath" 56 | exit 1 57 | fi 58 | 59 | pid=$! 60 | retval=$? 61 | 62 | if [ $? == 0 ] 63 | then 64 | echo $pid > "$pidfile-$(basename $cfgfile)" 65 | echo "$prog successfully started for configuration file $cfgfile" 66 | else 67 | echo "Cannot start $prog for configuration file $cfgfile" 68 | errno = 1 69 | fi 70 | done 71 | 72 | exit $errno 73 | } 74 | 75 | stop() { 76 | if [ ! -f $pidfile-* ] 77 | then 78 | echo "No running $prog instances found." 79 | exit 1 80 | fi 81 | 82 | for pfile in $pidfile-* 83 | do 84 | if ps -p$(cat $pfile) > /dev/null 2>&1 85 | then 86 | kill -TERM $(cat $pfile) 87 | if [ $? == 0 ] 88 | then 89 | rm -f $pfile 90 | echo "$prog instance $(basename $pfile) stopped." 91 | else 92 | echo "Cannot stop $prog instance $(basename $pfile)" 93 | fi 94 | else 95 | rm -f $pfile 96 | echo "$prog instance $pfile (pid $(cat $pfile)) is dead but pidfile exists." 97 | fi 98 | done 99 | } 100 | 101 | status() { 102 | if [ ! -f $pidfile-* ] 103 | then 104 | echo "Cannot find any running $prog instance." 105 | exit 1 106 | fi 107 | 108 | errno=0 109 | 110 | for pfile in $pidfile-* 111 | do 112 | if ps -p$(cat $pfile) > /dev/null 2>&1 113 | then 114 | echo "$prog instance $(basename $pfile) is running (pid $(cat $pfile))" 115 | else 116 | echo "$prog instance $pfile (pid $(cat $pfile)) is dead but pidfile exists." 117 | errno=1 118 | fi 119 | done 120 | 121 | exit $errno 122 | } 123 | 124 | case "$1" in 125 | start) 126 | start 127 | ;; 128 | stop) 129 | stop 130 | ;; 131 | restart) 132 | stop 133 | start 134 | ;; 135 | status) 136 | status 137 | ;; 138 | condrestart|try-restart) 139 | status || exit 0 140 | restart 141 | ;; 142 | *) 143 | echo "Usage: $0 {start|stop|restart|status}" 144 | ;; 145 | esac 146 | 147 | exit 0 148 | -------------------------------------------------------------------------------- /dev/tests/shunit2/test_runner: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # Unit test suite runner. 5 | # 6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved. 7 | # Released under the Apache 2.0 license. 8 | # 9 | # Author: kate.ward@forestent.com (Kate Ward) 10 | # https://github.com/kward/shlib 11 | # 12 | # This script runs all the unit tests that can be found, and generates a nice 13 | # report of the tests. 14 | # 15 | ### ShellCheck (http://www.shellcheck.net/) 16 | # Disable source following. 17 | # shellcheck disable=SC1090,SC1091 18 | # expr may be antiquated, but it is the only solution in some cases. 19 | # shellcheck disable=SC2003 20 | # $() are not fully portable (POSIX != portable). 21 | # shellcheck disable=SC2006 22 | 23 | # Return if test_runner already loaded. 24 | [ -z "${RUNNER_LOADED:-}" ] || return 0 25 | RUNNER_LOADED=0 26 | 27 | RUNNER_ARGV0=`basename "$0"` 28 | RUNNER_SHELLS='/bin/sh ash /bin/bash /bin/dash /bin/ksh /bin/pdksh /bin/zsh' 29 | RUNNER_TEST_SUFFIX='_test.sh' 30 | 31 | runner_warn() { echo "runner:WARN $*" >&2; } 32 | runner_error() { echo "runner:ERROR $*" >&2; } 33 | runner_fatal() { echo "runner:FATAL $*" >&2; exit 1; } 34 | 35 | runner_usage() { 36 | echo "usage: ${RUNNER_ARGV0} [-e key=val ...] [-s shell(s)] [-t test(s)]" 37 | } 38 | 39 | _runner_tests() { echo ./*${RUNNER_TEST_SUFFIX} |sed 's#./##g'; } 40 | _runner_testName() { 41 | # shellcheck disable=SC1117 42 | _runner_testName_=`expr "${1:-}" : "\(.*\)${RUNNER_TEST_SUFFIX}"` 43 | if [ -n "${_runner_testName_}" ]; then 44 | echo "${_runner_testName_}" 45 | else 46 | echo 'unknown' 47 | fi 48 | unset _runner_testName_ 49 | } 50 | 51 | main() { 52 | # Find and load versions library. 53 | for _runner_dir_ in . ${LIB_DIR:-lib}; do 54 | if [ -r "${_runner_dir_}/versions" ]; then 55 | _runner_lib_dir_="${_runner_dir_}" 56 | break 57 | fi 58 | done 59 | [ -n "${_runner_lib_dir_}" ] || runner_fatal 'Unable to find versions library.' 60 | . "${_runner_lib_dir_}/versions" || runner_fatal 'Unable to load versions library.' 61 | unset _runner_dir_ _runner_lib_dir_ 62 | 63 | # Process command line flags. 64 | env='' 65 | while getopts 'e:hs:t:' opt; do 66 | case ${opt} in 67 | e) # set an environment variable 68 | key=`expr "${OPTARG}" : '\([^=]*\)='` 69 | val=`expr "${OPTARG}" : '[^=]*=\(.*\)'` 70 | # shellcheck disable=SC2166 71 | if [ -z "${key}" -o -z "${val}" ]; then 72 | runner_usage 73 | exit 1 74 | fi 75 | eval "${key}='${val}'" 76 | eval "export ${key}" 77 | env="${env:+${env} }${key}" 78 | ;; 79 | h) runner_usage; exit 0 ;; # help output 80 | s) shells=${OPTARG} ;; # list of shells to run 81 | t) tests=${OPTARG} ;; # list of tests to run 82 | *) runner_usage; exit 1 ;; 83 | esac 84 | done 85 | shift "`expr ${OPTIND} - 1`" 86 | 87 | # Fill shells and/or tests. 88 | shells=${shells:-${RUNNER_SHELLS}} 89 | [ -z "${tests}" ] && tests=`_runner_tests` 90 | 91 | # Error checking. 92 | if [ -z "${tests}" ]; then 93 | runner_error 'no tests found to run; exiting' 94 | exit 1 95 | fi 96 | 97 | cat <&1; ) 160 | done 161 | done 162 | } 163 | 164 | # Execute main() if this is run in standalone mode (i.e. not from a unit test). 165 | [ -z "${SHUNIT_VERSION}" ] && main "$@" 166 | -------------------------------------------------------------------------------- /dev/merge.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ## MERGE 2022022301 4 | 5 | ## Merges ofunctions.sh and n_program.sh into program.sh 6 | ## Adds installer 7 | 8 | PROGRAM=merge 9 | INSTANCE_ID=dev 10 | 11 | function Usage { 12 | echo "Merges ofunctions.sh and n_program.sh into debug_program.sh and ../program.sh" 13 | echo "Usage" 14 | echo "$0 osync|obackup|pmocr" 15 | } 16 | 17 | function __PREPROCESSOR_Merge { 18 | local nPROGRAM="$1" 19 | 20 | if [ -f "$nPROGRAM" ]; then 21 | Logger "$nPROGRAM is not found in local path." "CRITICAL" 22 | exit 1 23 | fi 24 | 25 | VERSION=$(grep "PROGRAM_VERSION=" n_$nPROGRAM.sh) 26 | VERSION=${VERSION#*=} 27 | __PREPROCESSOR_Constants 28 | 29 | __PREPROCESSOR_Unexpand "n_$nPROGRAM.sh" "debug_$nPROGRAM.sh" 30 | 31 | for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do 32 | __PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "debug_$nPROGRAM.sh" 33 | done 34 | 35 | __PREPROCESSOR_CleanDebug "debug_$nPROGRAM.sh" "../$nPROGRAM.sh" 36 | } 37 | 38 | function __PREPROCESSOR_Constants { 39 | PARANOIA_DEBUG_LINE="#__WITH_PARANOIA_DEBUG" 40 | PARANOIA_DEBUG_BEGIN="#__BEGIN_WITH_PARANOIA_DEBUG" 41 | PARANOIA_DEBUG_END="#__END_WITH_PARANOIA_DEBUG" 42 | 43 | __PREPROCESSOR_SUBSETS=( 44 | '#### OFUNCTIONS FULL SUBSET ####' 45 | '#### OFUNCTIONS MINI SUBSET ####' 46 | '#### OFUNCTIONS MICRO SUBSET ####' 47 | '#### PoorMansRandomGenerator SUBSET ####' 48 | '#### _OFUNCTIONS_BOOTSTRAP SUBSET ####' 49 | '#### RUN_DIR SUBSET ####' 50 | '#### DEBUG SUBSET ####' 51 | '#### TrapError SUBSET ####' 52 | '#### RemoteLogger SUBSET ####' 53 | '#### Logger SUBSET ####' 54 | '#### GetLocalOS SUBSET ####' 55 | '#### IsInteger SUBSET ####' 56 | '#### UrlEncode SUBSET ####' 57 | '#### HumanToNumeric SUBSET ####' 58 | '#### ArrayContains SUBSET ####' 59 | '#### VerComp SUBSET ####' 60 | '#### GetConfFileValue SUBSET ####' 61 | '#### SetConfFileValue SUBSET ####' 62 | '#### CheckRFC822 SUBSET ####' 63 | '#### CleanUp SUBSET ####' 64 | '#### GenericTrapQuit SUBSET ####' 65 | '#### FileMove SUBSET ####' 66 | '#### InotifyWaitPoller SUBSET ####' 67 | ) 68 | } 69 | 70 | function __PREPROCESSOR_Unexpand { 71 | local source="${1}" 72 | local destination="${2}" 73 | 74 | unexpand "$source" > "$destination" 75 | if [ $? != 0 ]; then 76 | Logger "Cannot unexpand [$source] to [$destination]." "CRITICAL" 77 | exit 1 78 | fi 79 | } 80 | 81 | function __PREPROCESSOR_MergeSubset { 82 | local subsetBegin="${1}" 83 | local subsetEnd="${2}" 84 | local subsetFile="${3}" 85 | local mergedFile="${4}" 86 | 87 | sed -n "/$subsetBegin/,/$subsetEnd/p" "$subsetFile" > "$subsetFile.$subsetBegin" 88 | if [ $? != 0 ]; then 89 | Logger "Cannot sed subset [$subsetBegin -- $subsetEnd] in [$subsetFile]." "CRTICIAL" 90 | exit 1 91 | fi 92 | sed "/include $subsetBegin/r $subsetFile.$subsetBegin" "$mergedFile" | grep -v -E "$subsetBegin\$|$subsetEnd\$" > "$mergedFile.tmp" 93 | if [ $? != 0 ]; then 94 | Logger "Cannot add subset [$subsetBegin] to [$mergedFile]." "CRITICAL" 95 | exit 1 96 | fi 97 | rm -f "$subsetFile.$subsetBegin" 98 | if [ $? != 0 ]; then 99 | Logger "Cannot remove temporary subset [$subsetFile.$subsetBegin]." "CRITICAL" 100 | exit 1 101 | fi 102 | 103 | rm -f "$mergedFile" 104 | if [ $? != 0 ]; then 105 | Logger "Cannot remove merged original file [$mergedFile]." "CRITICAL" 106 | exit 1 107 | fi 108 | 109 | mv "$mergedFile.tmp" "$mergedFile" 110 | if [ $? != 0 ]; then 111 | Logger "Cannot move merged tmp file to original [$mergedFile]." "CRITICAL" 112 | exit 1 113 | fi 114 | } 115 | 116 | function __PREPROCESSOR_CleanDebug { 117 | local source="${1}" 118 | local destination="${2:-$source}" 119 | 120 | sed '/'$PARANOIA_DEBUG_BEGIN'/,/'$PARANOIA_DEBUG_END'/d' "$source" | grep -v "$PARANOIA_DEBUG_LINE" > "$destination.tmp" 121 | if [ $? != 0 ]; then 122 | Logger "Cannot remove PARANOIA_DEBUG code from standard build." "CRITICAL" 123 | exit 1 124 | else 125 | mv -f "$destination.tmp" "$destination" 126 | if [ $? -ne 0 ]; then 127 | Logger "Cannot move [$destination.tmp] to [$destination]." "CRITICAL" 128 | exit 1 129 | fi 130 | fi 131 | 132 | chmod +x "$source" 133 | if [ $? != 0 ]; then 134 | Logger "Cannot chmod [$source]." "CRITICAL" 135 | exit 1 136 | else 137 | Logger "Prepared [$source]." "NOTICE" 138 | fi 139 | 140 | if [ "$source" != "$destination" ]; then 141 | 142 | chmod +x "$destination" 143 | if [ $? != 0 ]; then 144 | Logger "Cannot chmod [$destination]." "CRITICAL" 145 | exit 1 146 | else 147 | Logger "Prepared [$destination]." "NOTICE" 148 | fi 149 | fi 150 | } 151 | 152 | function __PREPROCESSOR_CopyCommons { 153 | local nPROGRAM="$1" 154 | 155 | sed "s/\[prgname\]/$nPROGRAM/g" common_install.sh > ../install.sh 156 | if [ $? != 0 ]; then 157 | Logger "Cannot assemble install." "CRITICAL" 158 | exit 1 159 | fi 160 | 161 | for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do 162 | __PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "../install.sh" 163 | done 164 | 165 | __PREPROCESSOR_CleanDebug "../install.sh" 166 | 167 | if [ -f "common_batch.sh" ]; then 168 | sed "s/\[prgname\]/$nPROGRAM/g" common_batch.sh > ../$nPROGRAM-batch.sh 169 | if [ $? != 0 ]; then 170 | Logger "Cannot assemble batch runner." "CRITICAL" 171 | exit 1 172 | fi 173 | 174 | for subset in "${__PREPROCESSOR_SUBSETS[@]}"; do 175 | __PREPROCESSOR_MergeSubset "$subset" "${subset//SUBSET/SUBSET END}" "ofunctions.sh" "../$nPROGRAM-batch.sh" 176 | done 177 | 178 | __PREPROCESSOR_CleanDebug "../$nPROGRAM-batch.sh" 179 | fi 180 | } 181 | 182 | # If sourced don't do anything 183 | if [ "$(basename $0)" == "merge.sh" ]; then 184 | source "./ofunctions.sh" 185 | if [ $? != 0 ]; then 186 | echo "Please run $0 in dev directory with ofunctions.sh" 187 | exit 1 188 | fi 189 | trap GenericTrapQuit TERM EXIT HUP QUIT 190 | 191 | if [ "$1" == "osync" ]; then 192 | __PREPROCESSOR_Merge osync 193 | __PREPROCESSOR_CopyCommons osync 194 | elif [ "$1" == "obackup" ]; then 195 | __PREPROCESSOR_Merge obackup 196 | __PREPROCESSOR_CopyCommons obackup 197 | elif [ "$1" == "pmocr" ]; then 198 | __PREPROCESSOR_Merge pmocr 199 | __PREPROCESSOR_CopyCommons pmocr 200 | else 201 | echo "No valid program given." 202 | Usage 203 | exit 1 204 | fi 205 | fi 206 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## pmOCR (poor man's OCR tool) 2 | 3 | [![Build Status](https://travis-ci.org/deajan/pmOCR.svg?branch=master)](https://travis-ci.org/deajan/pmOCR) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) [![GitHub Release](https://img.shields.io/github/release/deajan/pmOCR.svg?label=Latest)](https://github.com/deajan/pmOCR/releases/latest) 4 | 5 | ## This project has been archived ! 6 | It has been fun improving my bash skills while I begun coding this in like 2015. 7 | I initially planned to produce a better, python based version of this, but then I found **OCRmyPDF** project, which already does a great job ;) 8 | See https://github.com/ocrmypdf/OCRmyPDF for more info. 9 | 10 | If you're interested in Document Management Systems, also checkout [paperless-ngx](https://github.com/paperless-ngx/paperless-ngx), which is a fully open source, using OCRmyPDF. 11 | 12 | Farewell, my old bash project. 13 | 14 | ## pmOCR 15 | 16 | A multicore batch & service wrapper script for Tesseract v3/v4/v5 (https://github.com/tesseract-ocr/) or ABBYY CLI OCR 11 FOR LINUX based on Finereader Engine 11 optical character recognition (www.ocr4linux.com). 17 | 18 | Conversions support tiff/jpg/png/pdf/bmp to PDF, TXT and CSV (also DOCX and XSLX for Abbyy OCR). It can actually support any other format that your OCR engine can handle. 19 | 20 | This wrapper can work both in batch and service mode. 21 | 22 | In batch mode, it's used as commandline tool for processing multiple files at once, being able to output one or more formats. 23 | 24 | In service mode, it will monitor directories and launch OCR conversions as soon as new files get into the directories. 25 | Since v1.8.0, it can also monitor NFS / SMB mountpoints with new integrated inotifywait emulation poller. 26 | 27 | pmOCR has the following options: 28 | - Include current date into the output filename 29 | - Ignore already OCRed PDF files based on font detection and / or file suffix 30 | - Delete or move input file after successful conversion 31 | 32 | ## Install it 33 | 34 | $ git clone https://github.com/deajan/pmOCR 35 | $ cd pmOCR 36 | $ ./install.sh 37 | 38 | You will need pdffonts util (from poppler-utils package). 39 | Optionally, you can install inotifywait (from inotify-tools package). 40 | 41 | If you are using tesseract OCR, please install tesseract-osd and tesseract-[your language] (sometimes called tesseract-ocr-osd). 42 | You will also need ImageMagick in order to be able to transform bitmap PDF documents to indexed PDFs. 43 | 44 | ## Batch mode 45 | 46 | Use pmocr to batch process all files in a given directory and its subdirectories. 47 | 48 | Use --help for command line usage. 49 | 50 | Example: 51 | 52 | $ pmocr.sh --batch --target=pdf --skip-txt-pdf --delete-input /some/path 53 | $ pmocr.sh --batch --target=pdf --target=csv --suffix=processed /some/path 54 | 55 | If pmOCR wasn't installed, you may run it directly with a configuration file like: 56 | 57 | $ ./pmocr.sh --config=./default.conf --batch -p /some/path 58 | 59 | ## OCR Configuration 60 | 61 | pmOCR uses a default config stored in /etc/pmocr/default.conf 62 | You may change it's contents or clone it and have pmOCR use an alternative configuration with: 63 | 64 | $ pmocr.sh --config=/etc/pmocr/myConfig.conf --batch --target=csv /some/path 65 | 66 | ## Service mode 67 | 68 | Service mode monitors directories and their subdirectories and launched an OCR conversion whenever a new file is written. 69 | Keep in mind that only file creations are monitored. File moves aren't. 70 | 71 | pmocr is written to monitor up to 5 directories, each producing a different target format (PDF, DOCX, XLSX, TXT & CSV). Comment out a folder to disable it's monitoring. 72 | 73 | There's also an option to avoid passing PDFs to the OCR engine that already contain text. 74 | 75 | After installation, please configure /etc/pmocr/default.conf in order to monitor the directories you need, and adjust your specific options. 76 | 77 | Launch service (initV style) 78 | service pmocr-srv start 79 | 80 | Launch service (systemd style) 81 | systemctl start pmocr-srv@default.service 82 | 83 | Check service state (initV style) 84 | service pmocr-srv status 85 | 86 | Check service state (systemd style) 87 | systemctl status pmocr-srv@default.service 88 | 89 | ## Multiple service instances 90 | 91 | In order to monitor multiple directories with different OCR settings, you need to duplicate /etc/pmocr/default.conf configuration file. 92 | When launching pmOCR service with initV, each config file will create an instance. 93 | With systemD, you have to launch a service for each config file. Example for configs /etc/pmocr/default.conf and /etc/pmocr/other.conf 94 | 95 | $ systemctl start pmocr-srv@default.conf 96 | $ systemctl start pmocr-srv@other.conf 97 | 98 | ## Support for OCR engines 99 | 100 | Has been tested so far with: 101 | - ABBYY FineReader OCR Engine 11 CLI for Linux releases R2 (v 11.1.6.562411), R3 (v 11.1.9.622165) and R6 (v 11.1.14.707470) 102 | - Tesseract-ocr 3.0.4 103 | - Tesseract-ocr 4.0.0 and 4.0.12 104 | - Tesseract-ocr 5.0.0 and 5.0.1 105 | 106 | Tesseract mode also uses ghostscript to convert PDF files to an intermediary TIFF format in order to process them. 107 | 108 | It should virtually work with any engine as long as you adjust the parameters. 109 | 110 | Parameters include any arguments to pass to the OCR program depending on the target format. 111 | 112 | ## Support for OCR Preprocessors 113 | 114 | ABBYY has in integrated preprocessor in order to enhance recognition qualitiy whereas Tesseract relies on external tools. 115 | pmOCR can use a preprocessor like ImageMagick to deskew / clear noise / render white background and remove black borders. 116 | ImageMagick preprocessor is configured, and enabled by default to be used with Tesseract. 117 | 118 | ## Tesseract caveats 119 | 120 | When no OSD / language data is installed, tesseract will still process documents, but the quality may suffer. 121 | While pmocr will warn you about this, the conversion still happens. 122 | Please make sure to install all necessary addons for tesseract. 123 | 124 | ## Troubleshooting 125 | 126 | Please check /var/log/pmocr.log or ./pmocr.log file for errors. 127 | 128 | Filenames containing special characters should work, nevertheless, if your file doesn't get converted, try to rename it and copy it again to the monitored directory or batch process it again. 129 | 130 | By default, failing to prevent files will add a prefix '_OCR_ERR' + date to the filename. 131 | In order to reprocess those files, the prefix has to be removed with the following command 132 | 133 | $ find /monitor/path -iname "*_OCR_ERR.*" -print0 | xargs -0 -I {} sh -c 'export file="{}"; mv "$file" "${file//_OCR_ERR/}"' 134 | 135 | If using tesseract to create searchable PDF files, please make sure to have version 3.03 or better installed. 136 | -------------------------------------------------------------------------------- /dev/tests/conf/default.conf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ###### pmOCR - batch & service wrapper for OCR tools 4 | ###### (C) 2014-2018 by Orsiris de Jong (www.netpower.fr) 5 | ###### pmOCR v1.5.4 - 1.6.0 config file 2018122101 6 | CONFIG_FILE_REVISION=1 7 | 8 | ## ---------- GENERAL OPTIONS 9 | 10 | ## Instance identification 11 | INSTANCE_ID=MyOCRServer 12 | 13 | ## List of allowed extensions for input files 14 | FILES_TO_PROCES="\(pdf\|tif\|tiff\|png\|jpg\|jpeg\|bmp\|pcx\|dcx\)" 15 | 16 | ## Number of OCR subprocesses to start simultaneously. Should not exceed the number of CPU cores for best performance. 17 | NUMBER_OF_PROCESSES=4 18 | 19 | ## The output file user and group ownership may be copied from input file (works only if executed as root). 20 | PRESERVE_OWNERSHIP=no 21 | ## Output file permissions. Defaults to 644 (works only if executed as root). 22 | FILE_PERMISSIONS= 23 | 24 | ## OCR Engine, adjust *_OCR_ENGINE_ARGS to fit your needs, especially for language settings 25 | 26 | # Acceptable values are abbyyocr11, tesseract3 27 | OCR_ENGINE=tesseract3 28 | 29 | ## ---------- OCR Engine arguments 30 | 31 | # AbbyyOCR11 Engine Arguments 32 | ############################# 33 | 34 | ## lpp = load predefinied profil / TextExtraction_Acuraccy = name of the predefinied profile / -adb = Detect barcodes / -ido = Detect and rotate image orientation / -adtop = Detect text embedded in images 35 | ## -rl = List of languages for the document (French,English,Spanish) / recc = Enhanced character confidence 36 | ##### PDF related arguments : -pfs = PDF Export preset (balanced) / -pacm = PDF/A standards (pdfa-3a) / ptem = Specifies the mode of export of recognized text into PDF (PDF/A) format. 37 | ##### DOCX related arguments :-dheb = Highlights uncertainly recognized characters with the background color when exporting to DOCX format (color definied by deb parameter). 38 | ##### -deb 0xFFFF00 (yellow highlights) 39 | ##### XLSX related arguments : -xlto = only export text from table / -xlrf = remove formating from text / -xllrm = This option allows setting the mode of retaining the original document tables' layout in the output XLSX file (Default, ExactDocument, ExactLines) 40 | 41 | ## Full path to OCR engine 42 | 43 | ABBYY_OCR_ENGINE_EXEC=/usr/local/bin/abbyyocr11 44 | 45 | # Quality may be set to Balanced, MaxSpeed, MaxQuality, MinSize 46 | ABBYY_PDF_QUALITY=Balanced 47 | ABBYY_PDF_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -pfs $ABBYY_PDF_QUALITY -pacm Pdfa_3a -ptem ImageOnText -f pdf' 48 | ABBYY_WORD_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -f docx' 49 | ABBYY_EXCEL_OCR_ENGINE_ARGS=' -lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -rpihp -xlrf -xllrm ExactLines -f xlsx' 50 | ABBYY_TEXT_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults' 51 | ABBYY_CSV_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults' 52 | ABBYY_OCR_ENGINE_INPUT_ARG='-if' 53 | ABBYY_OCR_ENGINE_OUTPUT_ARG='-of' 54 | 55 | 56 | # tesseract 3.x Engine Arguments 57 | ################################ 58 | 59 | ## Full path to OCR engine 60 | 61 | TESSERACT_OCR_ENGINE_EXEC=/usr/bin/tesseract 62 | TESSERACT_PDF_OCR_ENGINE_ARGS='pdf' 63 | TESSERACT_TEXT_OCR_ENGINE_ARGS='' 64 | TESSERACT_CSV_OCR_ENGINE_ARGS='' 65 | TESSERACT_OCR_ENGINE_INPUT_ARG='-l eng' # Language setting 66 | TESSERACT_OCR_ENGINE_OUTPUT_ARG= 67 | ## tesseract 3 intermediary transformation of PDF to TIFF 68 | TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/gs 69 | TESSERACT_PDF_TO_TIFF_OPTS=' -q -dNOPAUSE -r300x300 -sDEVICE=tiff32nc -sCompression=lzw -dBATCH -sOUTPUTFILE=' 70 | 71 | # Preprocessor Arguments (only for tesseract) 72 | ############################################# 73 | 74 | ## Optional preprocessor to correct scanned images (don't use this for abbyy11 which already contains it's own preprocessor) 75 | ## Uncomment OCR_PREPROCESSOR_EXEC lines to use it 76 | ## See http://www.imagemagick.org/discourse-server/viewtopic.php?t=22226 for examples 77 | 78 | #OCR_PREPROCESSOR_EXEC=/usr/bin/convert 79 | OCR_PREPROCESSOR_ARGS='-respect-parenthesis \( -compress LZW -density 300 -bordercolor black -border 1 -trim +repage -fill white -draw "color 0,0 floodfill" -alpha off -shave 1x1 \) \( -bordercolor black -border 2 -fill white -draw "color 0,0 floodfill" -alpha off -shave 0x1 -deskew 40 +repage \)' 80 | OCR_PREPROCESSOR_INPUT_ARG='' 81 | OCR_PREPROCESSOR_OUTPUT_ARG='' 82 | 83 | ####################################################################### 84 | ### THE FOLLOWING PARAMETERS ARE USED WHEN pmOCR IS RUN AS SERVICE #### 85 | ### YOU MAY SET THEM IN COMMAND LINE WHEN USING BATCH MODE #### 86 | ####################################################################### 87 | 88 | ## List of alert mails separated by spaces 89 | DESTINATION_MAILS="infrastructure@example.com" 90 | 91 | ## Optional change of mail body encoding (using iconv) 92 | ## By default, all mails are sent in UTF-8 format without header (because of maximum compatibility of all platforms) 93 | ## You may specify an optional encoding here (like "ISO-8859-1" or whatever iconv can handle) 94 | MAIL_BODY_CHARSET="" 95 | 96 | ## Directories to monitor (Leave variables empty in order to disable specific monitoring). 97 | ## As of today, Tesseract only handles PDF, TXT and CSV 98 | PDF_MONITOR_DIR="/storage/service_ocr/PDF" 99 | WORD_MONITOR_DIR="/storage/service_ocr/WORD" 100 | EXCEL_MONITOR_DIR="/storage/service_ocr/EXCEL" 101 | TEXT_MONITOR_DIR="/storage/service_ocr/TEXT" 102 | CSV_MONITOR_DIR="/storage/service_ocr/CSV" 103 | 104 | PDF_EXTENSION=".pdf" 105 | WORD_EXTENSION=".docx" 106 | EXCEL_EXTENSION=".xlsx" 107 | TEXT_EXTENSION=".txt" 108 | CSV_EXTENSION=".csv" 109 | 110 | ## Move original file after successful processing into a path that will be ignored by the monitor. 111 | ## Enabling this setting automatically disables DELETE_ORIGINAL and FILENAME_SUFFIX values. 112 | #MOVE_ORIGINAL_ON_SUCCESS="/storage/service_ocr/done" 113 | 114 | ## Move failed to process file into a path that will be ignored by the monitor. 115 | ## Enabling this setting automatically disables FAILED_FILENAME_SUFFIX value. 116 | #MOVE_ORIGINAL_ON_FAILURE="/storage/service_ocr/failed" 117 | 118 | ## Adds an optional following suffix to OCRed files (ex: input.tiff becomes input_OCR.pdf). Any file containing this suffix will be ignored. Can be left empty. 119 | FILENAME_SUFFIX="_OCR" 120 | 121 | ## Add the following suffix to failed files in order to prevent them from being processed in a loop. Can be left empty. 122 | FAILED_FILENAME_SUFFIX="_OCR_ERR" 123 | 124 | ## Delete original file upon successful processing. 125 | DELETE_ORIGINAL=no 126 | 127 | # Alternative check if PDFs are already OCRed (checks if a pdf contains a font). This will prevent images integrated in already indexed PDFs to get OCRed. 128 | CHECK_PDF=no 129 | 130 | ## Add some extra info to the filename. Example here adds a pseudo ISO 8601 timestamp after a dot (pseudo because the colon sign would render the filename quite weird). 131 | ## Keep variables between singlequotes if you want them to expand at runtime. Leave this variable empty if you don't want to add anything (is also added to moved files). 132 | FILENAME_ADDITION='.$(date --utc +"%Y-%m-%dT%H-%M-%SZ")' 133 | 134 | ## Max time before triggering a forced OCR run when no file actions are detected 135 | MAX_TIME=3600 136 | -------------------------------------------------------------------------------- /dev/tests/shunit2/shunit2_asserts_test.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # shunit2 unit test for assert functions. 5 | # 6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved. 7 | # Released under the Apache 2.0 license. 8 | # 9 | # Author: kate.ward@forestent.com (Kate Ward) 10 | # https://github.com/kward/shunit2 11 | # 12 | # Disable source following. 13 | # shellcheck disable=SC1090,SC1091 14 | 15 | # These variables will be overridden by the test helpers. 16 | stdoutF="${TMPDIR:-/tmp}/STDOUT" 17 | stderrF="${TMPDIR:-/tmp}/STDERR" 18 | 19 | # Load test helpers. 20 | . ./shunit2_test_helpers 21 | 22 | commonEqualsSame() { 23 | fn=$1 24 | 25 | ( ${fn} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 26 | th_assertTrueWithNoOutput 'equal' $? "${stdoutF}" "${stderrF}" 27 | 28 | ( ${fn} "${MSG}" 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 29 | th_assertTrueWithNoOutput 'equal; with msg' $? "${stdoutF}" "${stderrF}" 30 | 31 | ( ${fn} 'abc def' 'abc def' >"${stdoutF}" 2>"${stderrF}" ) 32 | th_assertTrueWithNoOutput 'equal with spaces' $? "${stdoutF}" "${stderrF}" 33 | 34 | ( ${fn} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 35 | th_assertFalseWithOutput 'not equal' $? "${stdoutF}" "${stderrF}" 36 | 37 | ( ${fn} '' '' >"${stdoutF}" 2>"${stderrF}" ) 38 | th_assertTrueWithNoOutput 'null values' $? "${stdoutF}" "${stderrF}" 39 | 40 | ( ${fn} arg1 >"${stdoutF}" 2>"${stderrF}" ) 41 | th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}" 42 | 43 | ( ${fn} arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" ) 44 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 45 | } 46 | 47 | commonNotEqualsSame() { 48 | fn=$1 49 | 50 | ( ${fn} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 51 | th_assertTrueWithNoOutput 'not same' $? "${stdoutF}" "${stderrF}" 52 | 53 | ( ${fn} "${MSG}" 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 54 | th_assertTrueWithNoOutput 'not same, with msg' $? "${stdoutF}" "${stderrF}" 55 | 56 | ( ${fn} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 57 | th_assertFalseWithOutput 'same' $? "${stdoutF}" "${stderrF}" 58 | 59 | ( ${fn} '' '' >"${stdoutF}" 2>"${stderrF}" ) 60 | th_assertFalseWithOutput 'null values' $? "${stdoutF}" "${stderrF}" 61 | 62 | ( ${fn} arg1 >"${stdoutF}" 2>"${stderrF}" ) 63 | th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}" 64 | 65 | ( ${fn} arg1 arg2 arg3 arg4 >"${stdoutF}" 2>"${stderrF}" ) 66 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 67 | } 68 | 69 | testAssertEquals() { 70 | commonEqualsSame 'assertEquals' 71 | } 72 | 73 | testAssertNotEquals() { 74 | commonNotEqualsSame 'assertNotEquals' 75 | } 76 | 77 | testAssertSame() { 78 | commonEqualsSame 'assertSame' 79 | } 80 | 81 | testAssertNotSame() { 82 | commonNotEqualsSame 'assertNotSame' 83 | } 84 | 85 | testAssertNull() { 86 | ( assertNull '' >"${stdoutF}" 2>"${stderrF}" ) 87 | th_assertTrueWithNoOutput 'null' $? "${stdoutF}" "${stderrF}" 88 | 89 | ( assertNull "${MSG}" '' >"${stdoutF}" 2>"${stderrF}" ) 90 | th_assertTrueWithNoOutput 'null, with msg' $? "${stdoutF}" "${stderrF}" 91 | 92 | ( assertNull 'x' >"${stdoutF}" 2>"${stderrF}" ) 93 | th_assertFalseWithOutput 'not null' $? "${stdoutF}" "${stderrF}" 94 | 95 | ( assertNull >"${stdoutF}" 2>"${stderrF}" ) 96 | th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}" 97 | 98 | ( assertNull arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" ) 99 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 100 | } 101 | 102 | testAssertNotNull() 103 | { 104 | ( assertNotNull 'x' >"${stdoutF}" 2>"${stderrF}" ) 105 | th_assertTrueWithNoOutput 'not null' $? "${stdoutF}" "${stderrF}" 106 | 107 | ( assertNotNull "${MSG}" 'x' >"${stdoutF}" 2>"${stderrF}" ) 108 | th_assertTrueWithNoOutput 'not null, with msg' $? "${stdoutF}" "${stderrF}" 109 | 110 | ( assertNotNull 'x"b' >"${stdoutF}" 2>"${stderrF}" ) 111 | th_assertTrueWithNoOutput 'not null, with double-quote' $? \ 112 | "${stdoutF}" "${stderrF}" 113 | 114 | ( assertNotNull "x'b" >"${stdoutF}" 2>"${stderrF}" ) 115 | th_assertTrueWithNoOutput 'not null, with single-quote' $? \ 116 | "${stdoutF}" "${stderrF}" 117 | 118 | # shellcheck disable=SC2016 119 | ( assertNotNull 'x$b' >"${stdoutF}" 2>"${stderrF}" ) 120 | th_assertTrueWithNoOutput 'not null, with dollar' $? \ 121 | "${stdoutF}" "${stderrF}" 122 | 123 | ( assertNotNull 'x`b' >"${stdoutF}" 2>"${stderrF}" ) 124 | th_assertTrueWithNoOutput 'not null, with backtick' $? \ 125 | "${stdoutF}" "${stderrF}" 126 | 127 | ( assertNotNull '' >"${stdoutF}" 2>"${stderrF}" ) 128 | th_assertFalseWithOutput 'null' $? "${stdoutF}" "${stderrF}" 129 | 130 | # There is no test for too few arguments as $1 might actually be null. 131 | 132 | ( assertNotNull arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" ) 133 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 134 | } 135 | 136 | testAssertTrue() { 137 | ( assertTrue 0 >"${stdoutF}" 2>"${stderrF}" ) 138 | th_assertTrueWithNoOutput 'true' $? "${stdoutF}" "${stderrF}" 139 | 140 | ( assertTrue "${MSG}" 0 >"${stdoutF}" 2>"${stderrF}" ) 141 | th_assertTrueWithNoOutput 'true, with msg' $? "${stdoutF}" "${stderrF}" 142 | 143 | ( assertTrue '[ 0 -eq 0 ]' >"${stdoutF}" 2>"${stderrF}" ) 144 | th_assertTrueWithNoOutput 'true condition' $? "${stdoutF}" "${stderrF}" 145 | 146 | ( assertTrue 1 >"${stdoutF}" 2>"${stderrF}" ) 147 | th_assertFalseWithOutput 'false' $? "${stdoutF}" "${stderrF}" 148 | 149 | ( assertTrue '[ 0 -eq 1 ]' >"${stdoutF}" 2>"${stderrF}" ) 150 | th_assertFalseWithOutput 'false condition' $? "${stdoutF}" "${stderrF}" 151 | 152 | ( assertTrue '' >"${stdoutF}" 2>"${stderrF}" ) 153 | th_assertFalseWithOutput 'null' $? "${stdoutF}" "${stderrF}" 154 | 155 | ( assertTrue >"${stdoutF}" 2>"${stderrF}" ) 156 | th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}" 157 | 158 | ( assertTrue arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" ) 159 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 160 | } 161 | 162 | testAssertFalse() { 163 | ( assertFalse 1 >"${stdoutF}" 2>"${stderrF}" ) 164 | th_assertTrueWithNoOutput 'false' $? "${stdoutF}" "${stderrF}" 165 | 166 | ( assertFalse "${MSG}" 1 >"${stdoutF}" 2>"${stderrF}" ) 167 | th_assertTrueWithNoOutput 'false, with msg' $? "${stdoutF}" "${stderrF}" 168 | 169 | ( assertFalse '[ 0 -eq 1 ]' >"${stdoutF}" 2>"${stderrF}" ) 170 | th_assertTrueWithNoOutput 'false condition' $? "${stdoutF}" "${stderrF}" 171 | 172 | ( assertFalse 0 >"${stdoutF}" 2>"${stderrF}" ) 173 | th_assertFalseWithOutput 'true' $? "${stdoutF}" "${stderrF}" 174 | 175 | ( assertFalse '[ 0 -eq 0 ]' >"${stdoutF}" 2>"${stderrF}" ) 176 | th_assertFalseWithOutput 'true condition' $? "${stdoutF}" "${stderrF}" 177 | 178 | ( assertFalse '' >"${stdoutF}" 2>"${stderrF}" ) 179 | th_assertFalseWithOutput 'true condition' $? "${stdoutF}" "${stderrF}" 180 | 181 | ( assertFalse >"${stdoutF}" 2>"${stderrF}" ) 182 | th_assertFalseWithError 'too few arguments' $? "${stdoutF}" "${stderrF}" 183 | 184 | ( assertFalse arg1 arg2 arg3 >"${stdoutF}" 2>"${stderrF}" ) 185 | th_assertFalseWithError 'too many arguments' $? "${stdoutF}" "${stderrF}" 186 | } 187 | 188 | oneTimeSetUp() { 189 | th_oneTimeSetUp 190 | 191 | MSG='This is a test message' 192 | } 193 | 194 | # Load and run shunit2. 195 | # shellcheck disable=SC2034 196 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT=$0 197 | . "${TH_SHUNIT}" 198 | -------------------------------------------------------------------------------- /dev/tests/conf/service.conf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ###### pmOCR - batch & service wrapper for OCR tools 4 | ###### (C) 2014-2018 by Orsiris de Jong (www.netpower.fr) 5 | ###### pmOCR v1.5.4 - 1.6.0 config file 2018122101 6 | CONFIG_FILE_REVISION=1 7 | 8 | ## ---------- GENERAL OPTIONS 9 | 10 | ## Instance identification 11 | INSTANCE_ID=MyOCRServer 12 | 13 | ## List of allowed extensions for input files 14 | FILES_TO_PROCES="\(pdf\|tif\|tiff\|png\|jpg\|jpeg\|bmp\|pcx\|dcx\)" 15 | 16 | ## Number of OCR subprocesses to start simultaneously. Should not exceed the number of CPU cores for best performance. 17 | NUMBER_OF_PROCESSES=4 18 | 19 | ## The output file user and group ownership may be copied from input file (works only if executed as root). 20 | PRESERVE_OWNERSHIP=no 21 | ## Output file permissions. Defaults to 644 (works only if executed as root). 22 | FILE_PERMISSIONS= 23 | 24 | ## OCR Engine, adjust *_OCR_ENGINE_ARGS to fit your needs, especially for language settings 25 | 26 | # Acceptable values are abbyyocr11, tesseract3 27 | OCR_ENGINE=tesseract3 28 | 29 | ## ---------- OCR Engine arguments 30 | 31 | # AbbyyOCR11 Engine Arguments 32 | ############################# 33 | 34 | ## lpp = load predefinied profil / TextExtraction_Acuraccy = name of the predefinied profile / -adb = Detect barcodes / -ido = Detect and rotate image orientation / -adtop = Detect text embedded in images 35 | ## -rl = List of languages for the document (French,English,Spanish) / recc = Enhanced character confidence 36 | ##### PDF related arguments : -pfs = PDF Export preset (balanced) / -pacm = PDF/A standards (pdfa-3a) / ptem = Specifies the mode of export of recognized text into PDF (PDF/A) format. 37 | ##### DOCX related arguments :-dheb = Highlights uncertainly recognized characters with the background color when exporting to DOCX format (color definied by deb parameter). 38 | ##### -deb 0xFFFF00 (yellow highlights) 39 | ##### XLSX related arguments : -xlto = only export text from table / -xlrf = remove formating from text / -xllrm = This option allows setting the mode of retaining the original document tables' layout in the output XLSX file (Default, ExactDocument, ExactLines) 40 | 41 | ## Full path to OCR engine 42 | 43 | ABBYY_OCR_ENGINE_EXEC=/usr/local/bin/abbyyocr11 44 | 45 | # Quality may be set to Balanced, MaxSpeed, MaxQuality, MinSize 46 | ABBYY_PDF_QUALITY=Balanced 47 | ABBYY_PDF_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -pfs $ABBYY_PDF_QUALITY -pacm Pdfa_3a -ptem ImageOnText -f pdf' 48 | ABBYY_WORD_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -f docx' 49 | ABBYY_EXCEL_OCR_ENGINE_ARGS=' -lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -rpihp -xlrf -xllrm ExactLines -f xlsx' 50 | ABBYY_TEXT_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults' 51 | ABBYY_CSV_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults' 52 | ABBYY_OCR_ENGINE_INPUT_ARG='-if' 53 | ABBYY_OCR_ENGINE_OUTPUT_ARG='-of' 54 | 55 | 56 | # tesseract 3.x Engine Arguments 57 | ################################ 58 | 59 | ## Full path to OCR engine 60 | 61 | TESSERACT_OCR_ENGINE_EXEC=/usr/bin/tesseract 62 | TESSERACT_PDF_OCR_ENGINE_ARGS='pdf' 63 | TESSERACT_TEXT_OCR_ENGINE_ARGS='' 64 | TESSERACT_CSV_OCR_ENGINE_ARGS='' 65 | TESSERACT_OCR_ENGINE_INPUT_ARG='-l eng' # Language setting 66 | TESSERACT_OCR_ENGINE_OUTPUT_ARG= 67 | ## tesseract 3 intermediary transformation of PDF to TIFF 68 | TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/gs 69 | TESSERACT_PDF_TO_TIFF_OPTS=' -q -dNOPAUSE -r300x300 -sDEVICE=tiff32nc -sCompression=lzw -dBATCH -sOUTPUTFILE=' 70 | 71 | # Preprocessor Arguments (only for tesseract) 72 | ############################################# 73 | 74 | ## Optional preprocessor to correct scanned images (don't use this for abbyy11 which already contains it's own preprocessor) 75 | ## Uncomment OCR_PREPROCESSOR_EXEC lines to use it 76 | ## See http://www.imagemagick.org/discourse-server/viewtopic.php?t=22226 for examples 77 | 78 | #OCR_PREPROCESSOR_EXEC=/usr/bin/convert 79 | OCR_PREPROCESSOR_ARGS='-respect-parenthesis \( -compress LZW -density 300 -bordercolor black -border 1 -trim +repage -fill white -draw "color 0,0 floodfill" -alpha off -shave 1x1 \) \( -bordercolor black -border 2 -fill white -draw "color 0,0 floodfill" -alpha off -shave 0x1 -deskew 40 +repage \)' 80 | OCR_PREPROCESSOR_INPUT_ARG='' 81 | OCR_PREPROCESSOR_OUTPUT_ARG='' 82 | 83 | ####################################################################### 84 | ### THE FOLLOWING PARAMETERS ARE USED WHEN pmOCR IS RUN AS SERVICE #### 85 | ### YOU MAY SET THEM IN COMMAND LINE WHEN USING BATCH MODE #### 86 | ####################################################################### 87 | 88 | ## List of alert mails separated by spaces 89 | DESTINATION_MAILS="infrastructure@example.com" 90 | 91 | ## Optional change of mail body encoding (using iconv) 92 | ## By default, all mails are sent in UTF-8 format without header (because of maximum compatibility of all platforms) 93 | ## You may specify an optional encoding here (like "ISO-8859-1" or whatever iconv can handle) 94 | MAIL_BODY_CHARSET="" 95 | 96 | ## Directories to monitor (Leave variables empty in order to disable specific monitoring). 97 | ## As of today, Tesseract only handles PDF, TXT and CSV 98 | PDF_MONITOR_DIR="${HOME}/pmocr-tests/service/PDF" 99 | #WORD_MONITOR_DIR="/storage/service_ocr/WORD" 100 | #EXCEL_MONITOR_DIR="/storage/service_ocr/EXCEL" 101 | TEXT_MONITOR_DIR="${HOME}/pmocr-tests/service/TEXT" 102 | CSV_MONITOR_DIR="${HOME}/pmocr-tests/service/CSV" 103 | 104 | PDF_EXTENSION=".pdf" 105 | WORD_EXTENSION=".docx" 106 | EXCEL_EXTENSION=".xlsx" 107 | TEXT_EXTENSION=".txt" 108 | CSV_EXTENSION=".csv" 109 | 110 | ## Move original file after successful processing into a path that will be ignored by the monitor. 111 | ## Enabling this setting automatically disables DELETE_ORIGINAL and FILENAME_SUFFIX values. 112 | #MOVE_ORIGINAL_ON_SUCCESS="/storage/service_ocr/done" 113 | 114 | ## Move failed to process file into a path that will be ignored by the monitor. 115 | ## Enabling this setting automatically disables FAILED_FILENAME_SUFFIX value. 116 | #MOVE_ORIGINAL_ON_FAILURE="/storage/service_ocr/failed" 117 | 118 | ## Adds an optional following suffix to OCRed files (ex: input.tiff becomes input_OCR.pdf). Any file containing this suffix will be ignored. Can be left empty. 119 | FILENAME_SUFFIX="_OCR" 120 | 121 | ## Add the following suffix to failed files in order to prevent them from being processed in a loop. Can be left empty. 122 | FAILED_FILENAME_SUFFIX="_OCR_ERR" 123 | 124 | ## Delete original file upon successful processing. 125 | DELETE_ORIGINAL=no 126 | 127 | # Alternative check if PDFs are already OCRed (checks if a pdf contains a font). This will prevent images integrated in already indexed PDFs to get OCRed. 128 | CHECK_PDF=yes 129 | 130 | ## Add some extra info to the filename. Example here adds a pseudo ISO 8601 timestamp after a dot (pseudo because the colon sign would render the filename quite weird). 131 | ## Keep variables between singlequotes if you want them to expand at runtime. Leave this variable empty if you don't want to add anything (is also added to moved files). 132 | FILENAME_ADDITION='.$(date --utc +"%Y-%m-%dT%H-%M-%SZ")' 133 | 134 | ## Max time before triggering a forced OCR run when no file actions are detected 135 | MAX_TIME=3600 136 | MOVE_ORIGINAL_ON_SUCCESS= 137 | MOVE_ORIGINAL_ON_FAILURE= 138 | -------------------------------------------------------------------------------- /dev/tests/shunit2/shunit2_test_helpers: -------------------------------------------------------------------------------- 1 | # vim:et:ft=sh:sts=2:sw=2 2 | # 3 | # shUnit2 unit test common functions 4 | # 5 | # Copyright 2008 Kate Ward. All Rights Reserved. 6 | # Released under the Apache 2.0 license. 7 | # 8 | # Author: kate.ward@forestent.com (Kate Ward) 9 | # https://github.com/kward/shunit2 10 | # 11 | ### ShellCheck (http://www.shellcheck.net/) 12 | # Commands are purposely escaped so they can be mocked outside shUnit2. 13 | # shellcheck disable=SC1001,SC1012 14 | # expr may be antiquated, but it is the only solution in some cases. 15 | # shellcheck disable=SC2003 16 | # $() are not fully portable (POSIX != portable). 17 | # shellcheck disable=SC2006 18 | 19 | # Treat unset variables as an error when performing parameter expansion. 20 | set -u 21 | 22 | # Set shwordsplit for zsh. 23 | \[ -n "${ZSH_VERSION:-}" ] && setopt shwordsplit 24 | 25 | # 26 | # Constants. 27 | # 28 | 29 | # Path to shUnit2 library. Can be overridden by setting SHUNIT_INC. 30 | TH_SHUNIT=${SHUNIT_INC:-./shunit2}; export TH_SHUNIT 31 | 32 | # Configure debugging. Set the DEBUG environment variable to any 33 | # non-empty value to enable debug output, or TRACE to enable trace 34 | # output. 35 | TRACE=${TRACE:+'th_trace '} 36 | \[ -n "${TRACE}" ] && DEBUG=1 37 | \[ -z "${TRACE}" ] && TRACE=':' 38 | 39 | DEBUG=${DEBUG:+'th_debug '} 40 | \[ -z "${DEBUG}" ] && DEBUG=':' 41 | 42 | # 43 | # Variables. 44 | # 45 | 46 | th_RANDOM=0 47 | 48 | # 49 | # Functions. 50 | # 51 | 52 | # Logging functions. 53 | th_trace() { echo "${MY_NAME}:TRACE $*" >&2; } 54 | th_debug() { echo "${MY_NAME}:DEBUG $*" >&2; } 55 | th_info() { echo "${MY_NAME}:INFO $*" >&2; } 56 | th_warn() { echo "${MY_NAME}:WARN $*" >&2; } 57 | th_error() { echo "${MY_NAME}:ERROR $*" >&2; } 58 | th_fatal() { echo "${MY_NAME}:FATAL $*" >&2; } 59 | 60 | # Output subtest name. 61 | th_subtest() { echo " $*" >&2; } 62 | 63 | th_oneTimeSetUp() { 64 | # These files will be cleaned up automatically by shUnit2. 65 | stdoutF="${SHUNIT_TMPDIR}/stdout" 66 | stderrF="${SHUNIT_TMPDIR}/stderr" 67 | returnF="${SHUNIT_TMPDIR}/return" 68 | expectedF="${SHUNIT_TMPDIR}/expected" 69 | export stdoutF stderrF returnF expectedF 70 | } 71 | 72 | # Generate a random number. 73 | th_generateRandom() { 74 | tfgr_random=${th_RANDOM} 75 | 76 | while \[ "${tfgr_random}" = "${th_RANDOM}" ]; do 77 | # shellcheck disable=SC2039 78 | if \[ -n "${RANDOM:-}" ]; then 79 | # $RANDOM works 80 | # shellcheck disable=SC2039 81 | tfgr_random=${RANDOM}${RANDOM}${RANDOM}$$ 82 | elif \[ -r '/dev/urandom' ]; then 83 | tfgr_random=`od -vAn -N4 -tu4 >> STDOUT' >&2 215 | \cat "${_th_stdout_}" >&2 216 | fi 217 | if \[ -n "${_th_stderr_}" -a -s "${_th_stderr_}" ]; then 218 | echo '>>> STDERR' >&2 219 | \cat "${_th_stderr_}" >&2 220 | fi 221 | if \[ -n "${_th_stdout_}" -o -n "${_th_stderr_}" ]; then 222 | echo '<<< end output' >&2 223 | fi 224 | fi 225 | 226 | unset _th_return_ _th_stdout_ _th_stderr_ 227 | } 228 | 229 | # 230 | # Main. 231 | # 232 | 233 | ${TRACE} 'trace output enabled' 234 | ${DEBUG} 'debug output enabled' 235 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | RECENT CHANGES 2 | -------------- 3 | 4 | 26 Feb 2022: v1.8.1 5 | 6 | - Added a configurable poller interval 7 | - Added service recovery when directory to monitor is not writable 8 | - Fixed upgrades with newer configuration files 9 | - Fixed sporadic errors with preprocessed images being detected by poller 10 | 11 | 23 Feb 2022: v1.8.0 12 | 13 | - Added internal inotifywait emulation that can deal with events on NFS / SMB shares where inotify events won't happen 14 | - Highly speed up OCR by bypassing checks on non modified files 15 | - Speed up OCR_Dispatch by checking already OCRed PDFs before launching OCR function 16 | - Inclusions and exclusions are now case insensitive in order to make sure we play right with Windows rules too 17 | 18 | 29 Dec 2021: v1.7.0 (never released) 19 | 20 | - Tested Tesseract 5.X engine 21 | - Improved optional preprocessor commandline 22 | - Added antialiasing 23 | - Added text sharpening 24 | - Removed earlier ghostscript dependency 25 | - Fixed installer message when no wget is present 26 | - Updated ofunctions 27 | 28 | 11 Jul 2019: v1.6.1 29 | 30 | - Tested Tesseract 4.x engine 31 | - Renamed "tesseract3" engine to "tesseract" since we work with 3.02+ / 4.x 32 | - Added TESSERACT_OPTIONAL_ARGS in config file 33 | - Improved handling of open files being deferred for later OCR 34 | - Fixed automatic service shutdown in RHEL 6/7 (automatic /tmp directory cleanup removing service file) 35 | - Updated ofunctions 36 | - Moved from yes/no parameters to bash booleans 37 | - Compatibility with elder config is preserved 38 | - Better cleanup 39 | - Fixed installer typos 40 | 41 | 21 Dec 2018: v1.6.0 42 | 43 | - Simplified config file syntax for OCR_ENGINE selection 44 | - Added config file revision check 45 | - Fixed logs not writing correctly in service mode and batch mode (OCR_Dispatch and lower function Logger doesn't work in) 46 | - Fixed --no-text argument 47 | - Added --failed-suffix and --no-failed-suffix batch options 48 | - Skipping files currently being written to (workaround for slow file transfers), leaving them for next run 49 | - Add nanoseconds to filename if output file already exists on move 50 | - More clear preflight error messages 51 | - Updated ofunctions 52 | - RFC822 email compliance checks 53 | - New more complete ExecTasks function to replace ParallelExec 54 | - Fix log sending with double compressed extensions 55 | - Minor fixes 56 | - Fixed return code for initV style service file 57 | - Upgraded shunit2 test framework to v2.1.8pre (git commit 07bb329) 58 | 59 | 21 Avr 2017: v1.5.7 60 | 61 | - Fixed a bug cleaning the SERVICE_MONITOR file after each run 62 | 63 | 20 Avr 2017: v1.5.6 64 | 65 | - Added tesseract version preflight checks 66 | - Added unit test framework (basic functionnality yet) 67 | - Added batch tests 68 | - File suffixes & no suffixes 69 | - File text / date additions 70 | - Skip searchable pdf tests 71 | - Delete original upon successful processing 72 | - Added service tests 73 | - Basic PDF / TXT / CSV tests 74 | - File moves on success & failure 75 | - Fixed SERVICE-MONITOR file (run file) created in root 76 | - Fixed CSV transformation didn't work 77 | - Fixed a low severity security issue where log & run files are world readable 78 | - Fixed some installer strings 79 | - Tmp files are now cleaned on the fly after each dispatch 80 | 81 | 13 Mar 2017: v1.5.4 82 | 83 | - Support for moving files after processing 84 | - Failing to move files will automatically rename them 85 | - Better installer with --remove support 86 | - Mail alerts can now be encoded differently than UTF-8 87 | - Updated ofunctions from obackup / osync 88 | 89 | 06 Feb 2017: v1.5.2 90 | 91 | - Service improvements 92 | - A forced run is done every MAX_WAIT seconds 93 | - OCR is run on service start 94 | - Moved files now also trigger an OCR run 95 | - Prevent overwriting multiple failed files with same source filename 96 | - Updated ofunctions from osync & obackup projects allowing to address multiple issues 97 | - Improved mail function 98 | - Improved ParallelExec function 99 | - Improved logging functionality 100 | 101 | 21 Oct 2016: v1.5 102 | 103 | - Added ownership preservation option 104 | - Added optional file permission mask to replace default new file permissions 105 | - Added the possibility to use an image preprocesser (Imagemagick is preconfigured but not enabled by default) 106 | - Corrected an issue where a failed service run may end up in an infinite loop by adding a failed OCR file suffix 107 | - Made a workaround for Tesseract throwing an error when OSD data is missing but not exiting with a failure code 108 | - Fixed intermediary PDF2TIFF transformation used with Tesseract 109 | - Fixed --suffix option was ignored 110 | - Recoded service execution asynchronously 111 | - Fixed a bug when a file is added while the OCR process is already runnning, the file won't be processed until another file is added 112 | - Chaned unix process signals to be posix compliant 113 | - Fixed file suffix exclusion also excluded files that contained the suffix anywhere in the filename 114 | - Enhanced parallel execution for huge file sets 115 | - Improved cpu usage on idle 116 | - Changed the way pmocr works 117 | - Splitted pmocr.sh config into separate config files so updates don't overwrite current config anymore 118 | - Updated service files to run multiple instances 119 | - Updated install script to handle config files 120 | - Added parallel execution for multicore systems 121 | - Improved tesseract 3 support 122 | - Added text output format 123 | - Added csv output format (with csv hack) 124 | - Remove intermediary txt files produced by tesseract 125 | - Improved logging 126 | - Improved code compliance 127 | - Various minor fixes from ofunctions updates 128 | 129 | 15 Aug 2016: v1.4.2 130 | - Removed keep logging statement from WaitForTaskCompletion function 131 | - Fixed rare bug where original PDF file gets deleted without succeded transformation 132 | - Removed NO_DELETE_SUFFIX that is not used anymore 133 | - More debug logs 134 | - Updated ofunctions from other projects 135 | 136 | 06 Aug 2016: v1.4.1 137 | - Fixed mail alerts not sent 138 | - Improved debugging and logging 139 | - Merged dev builder with other projects 140 | - Cleaned code (a bit) 141 | 142 | 04 Aug 2016: v1.4 143 | - Merged more recent common function set 144 | - Improved logging 145 | - Improved installer 146 | - Added a systemd unit file 147 | - Added pdf2tiff intermediary transformation for tesseract3 to support pdf input (thanks to mhelff, https://github.com/mhelff) 148 | - Set pdf conversion as default choice in batch mode 149 | - Added preflight checks for tesseract3 engine 150 | - Refactored code that became totally unreadable for human being :) 151 | - Improved sub process terminate code 152 | - Improved daemon logging 153 | - Improved mail alert support in daemon mode 154 | 155 | 03 Mar 2016: v1.3 156 | - Merged function codebase with osync and obackup 157 | - Fixed file extension should not change when DELETE_ORIGINAL=no 158 | - Added a suffix to original files for recognition 159 | - Fixed detection of PDFs already containing text (pdffonts should output more than 2 lines if embedded fonts are found) 160 | - Added minimal email alerts 161 | - Ported some code from osync/obackup 162 | - Added LSB info to init script for Debian based distros 163 | - Check for service directories before launching service 164 | - Added better KillChilds function on exit in service mode 165 | - Changed code to be code style V2 compliant 166 | - Added support for tesseract 3.x 167 | - Added options to suppress suffix and text in batch process 168 | 169 | 31 Aug 2015: v1.2 170 | - Added all input file formats that abbyyocr11 supports 171 | - Fixed find command to allow case insensitive input extensions 172 | - Minor improvements in logging, and code readability 173 | - Added full commandline batch mode 174 | - Added option to delete input file after successful processing 175 | - Added option to suppress OCRed filename suffix 176 | - New option to avoid passing PDFs already containing text to the OCR engine 177 | - New option to add a trivial value to the output filename (like a date) 178 | 179 | 23 Aug 2015: v1.04 180 | - Fixed multiple problems with spaces in filenames and exclusion patterns 181 | - Minor fixes for logging 182 | - Renamed all pmOCR instances to pmocr 183 | -------------------------------------------------------------------------------- /default.conf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ###### pmOCR - batch & service wrapper for OCR tools 4 | ###### (C) 2014-2022 by Orsiris de Jong (www.netpower.fr) 5 | ###### pmOCR v1.5.4 - 1.8.2 config file 2022050801 6 | CONFIG_FILE_REVISION=1 7 | 8 | ## ---------- GENERAL OPTIONS 9 | 10 | ## Instance identification 11 | INSTANCE_ID=MyOCRServer 12 | 13 | ## List of allowed extensions for input files 14 | FILES_TO_PROCESS="\(pdf\|tif\|tiff\|png\|jpg\|jpeg\|bmp\|pcx\|dcx\)" 15 | 16 | ## Number of OCR subprocesses to start simultaneously. Should not exceed the number of CPU cores for best performance. 17 | NUMBER_OF_PROCESSES=4 18 | 19 | ## The output file user and group ownership may be copied from input file (works only if executed as root). 20 | PRESERVE_OWNERSHIP=no 21 | ## Output file permissions. Defaults to 644 (works only if executed as root). 22 | FILE_PERMISSIONS= 23 | 24 | ## OCR Engine, adjust *_OCR_ENGINE_ARGS to fit your needs, especially for language settings 25 | 26 | # Acceptable values are abbyyocr11, tesseract (tesseract 3.x, 4.x or 5.x) 27 | OCR_ENGINE=tesseract 28 | 29 | # File detection strategy: 30 | # true: use inotifywait (works when mountpoint is local) 31 | # false: use integrated inotifywait emulation which does work even on SMB/NFS shares, but takes more resources since it's poller based (poller interval is measured in seconds) 32 | INOTIFYWAIT_SUPPORT=false 33 | INOTIFY_POLLER_INTERVAL=30 34 | 35 | ## ---------- OCR Engine arguments 36 | 37 | # AbbyyOCR11 Engine Arguments 38 | ############################# 39 | 40 | ## lpp = load predefinied profil / TextExtraction_Acuraccy = name of the predefinied profile / -adb = Detect barcodes / -ido = Detect and rotate image orientation / -adtop = Detect text embedded in images 41 | ## -rl = List of languages for the document (French,English,Spanish) / recc = Enhanced character confidence 42 | ##### PDF related arguments : -pfs = PDF Export preset (balanced) / -pacm = PDF/A standards (pdfa-3a) / ptem = Specifies the mode of export of recognized text into PDF (PDF/A) format. 43 | ##### DOCX related arguments :-dheb = Highlights uncertainly recognized characters with the background color when exporting to DOCX format (color definied by deb parameter). 44 | ##### -deb 0xFFFF00 (yellow highlights) 45 | ##### XLSX related arguments : -xlto = only export text from table / -xlrf = remove formating from text / -xllrm = This option allows setting the mode of retaining the original document tables' layout in the output XLSX file (Default, ExactDocument, ExactLines) 46 | 47 | ## Full path to OCR engine 48 | 49 | ABBYY_OCR_ENGINE_EXEC=/usr/local/bin/abbyyocr11 50 | 51 | # Quality may be set to Balanced, MaxSpeed, MaxQuality, MinSize 52 | ABBYY_PDF_QUALITY=Balanced 53 | ABBYY_PDF_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -pfs $ABBYY_PDF_QUALITY -pacm Pdfa_3a -ptem ImageOnText -f pdf' 54 | ABBYY_WORD_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -f docx' 55 | ABBYY_EXCEL_OCR_ENGINE_ARGS=' -lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -rpihp -xlrf -xllrm ExactLines -f xlsx' 56 | ABBYY_TEXT_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults' 57 | ABBYY_CSV_OCR_ENGINE_ARGS='-lpp TextExtraction_Accuracy -adb -ido -adtop -rl French,English,Spanish -recc -trl -f TextUnicodeDefaults' 58 | ABBYY_OCR_ENGINE_INPUT_ARG='-if' 59 | ABBYY_OCR_ENGINE_OUTPUT_ARG='-of' 60 | 61 | 62 | # tesseract Engine Arguments 63 | ################################ 64 | 65 | ## Working resolution for tesseract preprocessor and intermediary transformations 66 | ## Should be equal to the highest resolution of scanned documents. Good values are 300-600, but 600 is quite CPU hungry 67 | RESOLUTION=600 68 | 69 | ## Full path to OCR engine 70 | TESSERACT_OCR_ENGINE_EXEC=/usr/bin/tesseract 71 | TESSERACT_PDF_OCR_ENGINE_ARGS='pdf' 72 | TESSERACT_TEXT_OCR_ENGINE_ARGS='' 73 | TESSERACT_CSV_OCR_ENGINE_ARGS='' 74 | TESSERACT_OCR_ENGINE_INPUT_ARG='-l eng' # Language setting 75 | TESSERACT_OCR_ENGINE_OUTPUT_ARG='' 76 | ## tesseract intermediary transformation of PDF to TIFF 77 | TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/convert 78 | TESSERACT_PDF_TO_TIFF_OPTS='-density '${RESOLUTION}' -compress lzw' 79 | # Elder ghostscript conversion 80 | #TESSERACT_PDF_TO_TIFF_EXEC=/usr/bin/gs 81 | #TESSERACT_PDF_TO_TIFF_OPTS=' -q -dNOPAUSE -r'${RESOLUTION}'x'${RESOLUTION}' -sDEVICE=tiff32nc -sCompression=lzw -dBATCH -sOUTPUTFILE=' 82 | 83 | ## Tesseract optional arguments 84 | ## Example for Tesseract 4.x/5.x OCR LTSM engine selection (see tesseract --help-extra) 85 | # oem 0 is legacy engine, which as of tesseract 5.0.0 release with github/tesseract/tessdata traineddata gives better results 86 | # oem 1 is LTSM engine 87 | TESSERACT_OPTIONAL_ARGS='--oem 0' 88 | 89 | # Preprocessor Arguments (only for tesseract) 90 | ############################################# 91 | 92 | ## Optional preprocessor to correct scanned images (don't use this for abbyy11 which already contains it's own preprocessor) 93 | ## Uncomment OCR_PREPROCESSOR_EXEC lines to use it 94 | ## See http://www.imagemagick.org/discourse-server/viewtopic.php?t=22226 for examples 95 | 96 | OCR_PREPROCESSOR_EXEC=/usr/bin/convert 97 | OCR_PREPROCESSOR_ARGS='-units PixelsPerInch -respect-parenthesis \( -compress lzw -density '${RESOLUTION}' -bordercolor black -border 1 -trim +repage -fill white -draw "color 0,0 floodfill" -alpha off -shave 1x1 \) \( -bordercolor black -border 2 -fill white -draw "color 0,0 floodfill" -alpha off -shave 0x1 -deskew 40 +repage \) -antialias -sharpen 0x3' 98 | OCR_PREPROCESSOR_INPUT_ARG='' 99 | OCR_PREPROCESSOR_OUTPUT_ARG='' 100 | 101 | ####################################################################### 102 | ### THE FOLLOWING PARAMETERS ARE USED WHEN pmOCR IS RUN AS SERVICE #### 103 | ### YOU MAY SET THEM IN COMMAND LINE WHEN USING BATCH MODE #### 104 | ####################################################################### 105 | 106 | ## List of alert mails separated by spaces 107 | DESTINATION_MAILS="infrastructure@example.com" 108 | 109 | ## Optional change of mail body encoding (using iconv) 110 | ## By default, all mails are sent in UTF-8 format without header (because of maximum compatibility of all platforms) 111 | ## You may specify an optional encoding here (like "ISO-8859-1" or whatever iconv can handle) 112 | MAIL_BODY_CHARSET="" 113 | 114 | ## Directories to monitor (Leave variables empty in order to disable specific monitoring). 115 | ## As of today, Tesseract only handles PDF, TXT and CSV 116 | PDF_MONITOR_DIR="/storage/service_ocr/PDF" 117 | WORD_MONITOR_DIR="/storage/service_ocr/WORD" 118 | EXCEL_MONITOR_DIR="/storage/service_ocr/EXCEL" 119 | TEXT_MONITOR_DIR="/storage/service_ocr/TEXT" 120 | CSV_MONITOR_DIR="/storage/service_ocr/CSV" 121 | 122 | PDF_EXTENSION=".pdf" 123 | WORD_EXTENSION=".docx" 124 | EXCEL_EXTENSION=".xlsx" 125 | TEXT_EXTENSION=".txt" 126 | CSV_EXTENSION=".csv" 127 | 128 | ## Move original file after successful processing into a path that will be ignored by the monitor. 129 | ## Enabling this setting by removing comment automatically disables DELETE_ORIGINAL and FILENAME_SUFFIX values. 130 | #MOVE_ORIGINAL_ON_SUCCESS="/storage/service_ocr/done" 131 | 132 | ## Move failed to process file into a path that will be ignored by the monitor. 133 | ## Enabling this setting by removing comment automatically disables FAILED_FILENAME_SUFFIX value. 134 | #MOVE_ORIGINAL_ON_FAILURE="/storage/service_ocr/failed" 135 | 136 | ## Adds an optional following suffix to OCRed files (ex: input.tiff becomes input_OCR.pdf). Any file containing this suffix will be ignored. Can be left empty. 137 | FILENAME_SUFFIX="_OCR" 138 | 139 | ## Add the following suffix to failed files in order to prevent them from being processed in a loop. Can be left empty. 140 | FAILED_FILENAME_SUFFIX="_OCR_ERR" 141 | 142 | ## Delete original file upon successful processing (has no effect if MOVE_ORIGINAL_ON_SUCCESS is set) (true/false) 143 | DELETE_ORIGINAL=false 144 | 145 | # Alternative check if PDFs are already OCRed (checks if a pdf contains a font). This will prevent images integrated in already indexed PDFs to get OCRed. (true/false) 146 | CHECK_PDF=true 147 | 148 | ## Add some extra info to the filename. Example here adds a pseudo ISO 8601 timestamp after a dot (pseudo because the colon sign would render the filename quite weird). 149 | ## Keep variables between singlequotes if you want them to expand at runtime. Leave this variable empty if you don't want to add anything (is also added to moved files). 150 | FILENAME_ADDITION='.$(date --utc +"%Y-%m-%dT%H-%M-%SZ")' 151 | 152 | ## Max time before triggering a forced OCR run when no file actions are detected 153 | MAX_TIME=3600 154 | -------------------------------------------------------------------------------- /dev/tests/shunit2/shunit2_macros_test.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # shunit2 unit test for macros. 5 | # 6 | # Copyright 2008-2017 Kate Ward. All Rights Reserved. 7 | # Released under the Apache 2.0 license. 8 | # 9 | # Author: kate.ward@forestent.com (Kate Ward) 10 | # https://github.com/kward/shunit2 11 | # 12 | ### ShellCheck http://www.shellcheck.net/ 13 | # Disable source following. 14 | # shellcheck disable=SC1090,SC1091 15 | # Presence of LINENO variable is checked. 16 | # shellcheck disable=SC2039 17 | 18 | # These variables will be overridden by the test helpers. 19 | stdoutF="${TMPDIR:-/tmp}/STDOUT" 20 | stderrF="${TMPDIR:-/tmp}/STDERR" 21 | 22 | # Load test helpers. 23 | . ./shunit2_test_helpers 24 | 25 | testAssertEquals() { 26 | # Start skipping if LINENO not available. 27 | [ -z "${LINENO:-}" ] && startSkipping 28 | 29 | ( ${_ASSERT_EQUALS_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 30 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 31 | rtrn=$? 32 | assertTrue '_ASSERT_EQUALS_ failure' ${rtrn} 33 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 34 | 35 | ( ${_ASSERT_EQUALS_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 36 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 37 | rtrn=$? 38 | assertTrue '_ASSERT_EQUALS_ w/ msg failure' ${rtrn} 39 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 40 | } 41 | 42 | testAssertNotEquals() { 43 | # Start skipping if LINENO not available. 44 | [ -z "${LINENO:-}" ] && startSkipping 45 | 46 | ( ${_ASSERT_NOT_EQUALS_} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 47 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 48 | rtrn=$? 49 | assertTrue '_ASSERT_NOT_EQUALS_ failure' ${rtrn} 50 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 51 | 52 | ( ${_ASSERT_NOT_EQUALS_} '"some msg"' 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 53 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 54 | rtrn=$? 55 | assertTrue '_ASSERT_NOT_EQUALS_ w/ msg failure' ${rtrn} 56 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 57 | } 58 | 59 | testSame() { 60 | # Start skipping if LINENO not available. 61 | [ -z "${LINENO:-}" ] && startSkipping 62 | 63 | ( ${_ASSERT_SAME_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 64 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 65 | rtrn=$? 66 | assertTrue '_ASSERT_SAME_ failure' ${rtrn} 67 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 68 | 69 | ( ${_ASSERT_SAME_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 70 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 71 | rtrn=$? 72 | assertTrue '_ASSERT_SAME_ w/ msg failure' ${rtrn} 73 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 74 | } 75 | 76 | testNotSame() { 77 | # Start skipping if LINENO not available. 78 | [ -z "${LINENO:-}" ] && startSkipping 79 | 80 | ( ${_ASSERT_NOT_SAME_} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 81 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 82 | rtrn=$? 83 | assertTrue '_ASSERT_NOT_SAME_ failure' ${rtrn} 84 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 85 | 86 | ( ${_ASSERT_NOT_SAME_} '"some msg"' 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 87 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 88 | rtrn=$? 89 | assertTrue '_ASSERT_NOT_SAME_ w/ msg failure' ${rtrn} 90 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 91 | } 92 | 93 | testNull() { 94 | # Start skipping if LINENO not available. 95 | [ -z "${LINENO:-}" ] && startSkipping 96 | 97 | ( ${_ASSERT_NULL_} 'x' >"${stdoutF}" 2>"${stderrF}" ) 98 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 99 | rtrn=$? 100 | assertTrue '_ASSERT_NULL_ failure' ${rtrn} 101 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 102 | 103 | ( ${_ASSERT_NULL_} '"some msg"' 'x' >"${stdoutF}" 2>"${stderrF}" ) 104 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 105 | rtrn=$? 106 | assertTrue '_ASSERT_NULL_ w/ msg failure' ${rtrn} 107 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 108 | } 109 | 110 | testNotNull() 111 | { 112 | # start skipping if LINENO not available 113 | [ -z "${LINENO:-}" ] && startSkipping 114 | 115 | ( ${_ASSERT_NOT_NULL_} '' >"${stdoutF}" 2>"${stderrF}" ) 116 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 117 | rtrn=$? 118 | assertTrue '_ASSERT_NOT_NULL_ failure' ${rtrn} 119 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 120 | 121 | ( ${_ASSERT_NOT_NULL_} '"some msg"' '""' >"${stdoutF}" 2>"${stderrF}" ) 122 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 123 | rtrn=$? 124 | assertTrue '_ASSERT_NOT_NULL_ w/ msg failure' ${rtrn} 125 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stdoutF}" "${stderrF}" >&2 126 | } 127 | 128 | testAssertTrue() { 129 | # Start skipping if LINENO not available. 130 | [ -z "${LINENO:-}" ] && startSkipping 131 | 132 | ( ${_ASSERT_TRUE_} "${SHUNIT_FALSE}" >"${stdoutF}" 2>"${stderrF}" ) 133 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 134 | rtrn=$? 135 | assertTrue '_ASSERT_TRUE_ failure' ${rtrn} 136 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 137 | 138 | ( ${_ASSERT_TRUE_} '"some msg"' "${SHUNIT_FALSE}" >"${stdoutF}" 2>"${stderrF}" ) 139 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 140 | rtrn=$? 141 | assertTrue '_ASSERT_TRUE_ w/ msg failure' ${rtrn} 142 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 143 | } 144 | 145 | testAssertFalse() { 146 | # Start skipping if LINENO not available. 147 | [ -z "${LINENO:-}" ] && startSkipping 148 | 149 | ( ${_ASSERT_FALSE_} "${SHUNIT_TRUE}" >"${stdoutF}" 2>"${stderrF}" ) 150 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 151 | rtrn=$? 152 | assertTrue '_ASSERT_FALSE_ failure' ${rtrn} 153 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 154 | 155 | ( ${_ASSERT_FALSE_} '"some msg"' "${SHUNIT_TRUE}" >"${stdoutF}" 2>"${stderrF}" ) 156 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 157 | rtrn=$? 158 | assertTrue '_ASSERT_FALSE_ w/ msg failure' ${rtrn} 159 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 160 | } 161 | 162 | testFail() { 163 | # Start skipping if LINENO not available. 164 | [ -z "${LINENO:-}" ] && startSkipping 165 | 166 | ( ${_FAIL_} >"${stdoutF}" 2>"${stderrF}" ) 167 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 168 | rtrn=$? 169 | assertTrue '_FAIL_ failure' ${rtrn} 170 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 171 | 172 | ( ${_FAIL_} '"some msg"' >"${stdoutF}" 2>"${stderrF}" ) 173 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 174 | rtrn=$? 175 | assertTrue '_FAIL_ w/ msg failure' ${rtrn} 176 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 177 | } 178 | 179 | testFailNotEquals() 180 | { 181 | # start skipping if LINENO not available 182 | [ -z "${LINENO:-}" ] && startSkipping 183 | 184 | ( ${_FAIL_NOT_EQUALS_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 185 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 186 | rtrn=$? 187 | assertTrue '_FAIL_NOT_EQUALS_ failure' ${rtrn} 188 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 189 | 190 | ( ${_FAIL_NOT_EQUALS_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 191 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 192 | rtrn=$? 193 | assertTrue '_FAIL_NOT_EQUALS_ w/ msg failure' ${rtrn} 194 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 195 | } 196 | 197 | testFailSame() { 198 | # Start skipping if LINENO not available. 199 | [ -z "${LINENO:-}" ] && startSkipping 200 | 201 | ( ${_FAIL_SAME_} 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 202 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 203 | rtrn=$? 204 | assertTrue '_FAIL_SAME_ failure' ${rtrn} 205 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 206 | 207 | ( ${_FAIL_SAME_} '"some msg"' 'x' 'x' >"${stdoutF}" 2>"${stderrF}" ) 208 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 209 | rtrn=$? 210 | assertTrue '_FAIL_SAME_ w/ msg failure' ${rtrn} 211 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 212 | } 213 | 214 | testFailNotSame() { 215 | # Start skipping if LINENO not available. 216 | [ -z "${LINENO:-}" ] && startSkipping 217 | 218 | ( ${_FAIL_NOT_SAME_} 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 219 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 220 | rtrn=$? 221 | assertTrue '_FAIL_NOT_SAME_ failure' ${rtrn} 222 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 223 | 224 | ( ${_FAIL_NOT_SAME_} '"some msg"' 'x' 'y' >"${stdoutF}" 2>"${stderrF}" ) 225 | grep '^ASSERT:\[[0-9]*\] *' "${stdoutF}" >/dev/null 226 | rtrn=$? 227 | assertTrue '_FAIL_NOT_SAME_ w/ msg failure' ${rtrn} 228 | [ "${rtrn}" -ne "${SHUNIT_TRUE}" ] && cat "${stderrF}" >&2 229 | } 230 | 231 | oneTimeSetUp() { 232 | th_oneTimeSetUp 233 | } 234 | 235 | # Disable output coloring as it breaks the tests. 236 | SHUNIT_COLOR='none'; export SHUNIT_COLOR 237 | 238 | # Load and run shUnit2. 239 | # shellcheck disable=SC2034 240 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT="$0" 241 | . "${TH_SHUNIT}" 242 | -------------------------------------------------------------------------------- /dev/tests/shunit2/lib/versions: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # Versions determines the versions of all installed shells. 5 | # 6 | # Copyright 2008-2018 Kate Ward. All Rights Reserved. 7 | # Released under the Apache 2.0 License. 8 | # 9 | # Author: kate.ward@forestent.com (Kate Ward) 10 | # https://github.com/kward/shlib 11 | # 12 | # This library provides reusable functions that determine actual names and 13 | # versions of installed shells and the OS. The library can also be run as a 14 | # script if set executable. 15 | # 16 | # Disable checks that aren't fully portable (POSIX != portable). 17 | # shellcheck disable=SC2006 18 | 19 | ARGV0=`basename "$0"` 20 | LSB_RELEASE='/etc/lsb-release' 21 | VERSIONS_SHELLS='ash /bin/bash /bin/dash /bin/ksh /bin/pdksh /bin/zsh /bin/sh /usr/xpg4/bin/sh /sbin/sh' 22 | 23 | true; TRUE=$? 24 | false; FALSE=$? 25 | ERROR=2 26 | 27 | UNAME_R=`uname -r` 28 | UNAME_S=`uname -s` 29 | 30 | __versions_haveStrings=${ERROR} 31 | 32 | versions_osName() { 33 | os_name_='unrecognized' 34 | os_system_=${UNAME_S} 35 | os_release_=${UNAME_R} 36 | case ${os_system_} in 37 | CYGWIN_NT-*) os_name_='Cygwin' ;; 38 | Darwin) 39 | os_name_=`/usr/bin/sw_vers -productName` 40 | os_version_=`versions_osVersion` 41 | case ${os_version_} in 42 | 10.4|10.4.[0-9]*) os_name_='Mac OS X Tiger' ;; 43 | 10.5|10.5.[0-9]*) os_name_='Mac OS X Leopard' ;; 44 | 10.6|10.6.[0-9]*) os_name_='Mac OS X Snow Leopard' ;; 45 | 10.7|10.7.[0-9]*) os_name_='Mac OS X Lion' ;; 46 | 10.8|10.8.[0-9]*) os_name_='Mac OS X Mountain Lion' ;; 47 | 10.9|10.9.[0-9]*) os_name_='Mac OS X Mavericks' ;; 48 | 10.10|10.10.[0-9]*) os_name_='Mac OS X Yosemite' ;; 49 | 10.11|10.11.[0-9]*) os_name_='Mac OS X El Capitan' ;; 50 | 10.12|10.12.[0-9]*) os_name_='macOS Sierra' ;; 51 | 10.13|10.13.[0-9]*) os_name_='macOS High Sierra' ;; 52 | *) os_name_='macOS' ;; 53 | esac 54 | ;; 55 | FreeBSD) os_name_='FreeBSD' ;; 56 | Linux) os_name_='Linux' ;; 57 | SunOS) 58 | os_name_='SunOS' 59 | if [ -r '/etc/release' ]; then 60 | if grep 'OpenSolaris' /etc/release >/dev/null; then 61 | os_name_='OpenSolaris' 62 | else 63 | os_name_='Solaris' 64 | fi 65 | fi 66 | ;; 67 | esac 68 | 69 | echo ${os_name_} 70 | unset os_name_ os_system_ os_release_ os_version_ 71 | } 72 | 73 | versions_osVersion() { 74 | os_version_='unrecognized' 75 | os_system_=${UNAME_S} 76 | os_release_=${UNAME_R} 77 | case ${os_system_} in 78 | CYGWIN_NT-*) 79 | os_version_=`expr "${os_release_}" : '\([0-9]*\.[0-9]\.[0-9]*\).*'` 80 | ;; 81 | Darwin) 82 | os_version_=`/usr/bin/sw_vers -productVersion` 83 | ;; 84 | FreeBSD) 85 | os_version_=`expr "${os_release_}" : '\([0-9]*\.[0-9]*\)-.*'` 86 | ;; 87 | Linux) 88 | if [ -r '/etc/os-release' ]; then 89 | os_version_=`awk -F= '$1~/PRETTY_NAME/{print $2}' /etc/os-release \ 90 | |sed 's/"//g'` 91 | elif [ -r '/etc/redhat-release' ]; then 92 | os_version_=`cat /etc/redhat-release` 93 | elif [ -r '/etc/SuSE-release' ]; then 94 | os_version_=`head -n 1 /etc/SuSE-release` 95 | elif [ -r "${LSB_RELEASE}" ]; then 96 | if grep -q 'DISTRIB_ID=Ubuntu' "${LSB_RELEASE}"; then 97 | # shellcheck disable=SC2002 98 | os_version_=`cat "${LSB_RELEASE}" \ 99 | |awk -F= '$1~/DISTRIB_DESCRIPTION/{print $2}' \ 100 | |sed 's/"//g;s/ /-/g'` 101 | fi 102 | fi 103 | ;; 104 | SunOS) 105 | if [ -r '/etc/release' ]; then 106 | if grep 'OpenSolaris' /etc/release >/dev/null; then # OpenSolaris 107 | os_version_=`grep 'OpenSolaris' /etc/release |awk '{print $2"("$3")"}'` 108 | else # Solaris 109 | major_=`echo "${os_release_}" |sed 's/[0-9]*\.\([0-9]*\)/\1/'` 110 | minor_=`grep Solaris /etc/release |sed 's/[^u]*\(u[0-9]*\).*/\1/'` 111 | os_version_="${major_}${minor_}" 112 | fi 113 | fi 114 | ;; 115 | esac 116 | 117 | echo "${os_version_}" 118 | unset os_release_ os_system_ os_version_ major_ minor_ 119 | } 120 | 121 | versions_shellVersion() { 122 | shell_=$1 123 | 124 | shell_present_=${FALSE} 125 | case "${shell_}" in 126 | ash) [ -x '/bin/busybox' ] && shell_present_=${TRUE} ;; 127 | *) [ -x "${shell_}" ] && shell_present_=${TRUE} ;; 128 | esac 129 | if [ ${shell_present_} -eq ${FALSE} ]; then 130 | echo 'not installed' 131 | return ${FALSE} 132 | fi 133 | 134 | version_='' 135 | case ${shell_} in 136 | /sbin/sh) ;; # SunOS 137 | /usr/xpg4/bin/sh) 138 | version_=`versions_shell_xpg4 "${shell_}"` 139 | ;; # SunOS 140 | */sh) 141 | # This could be one of any number of shells. Try until one fits. 142 | version_='' 143 | [ -z "${version_}" ] && version_=`versions_shell_bash "${shell_}"` 144 | # dash cannot be self determined yet 145 | [ -z "${version_}" ] && version_=`versions_shell_ksh "${shell_}"` 146 | # pdksh is covered in versions_shell_ksh() 147 | [ -z "${version_}" ] && version_=`versions_shell_xpg4 "${shell_}"` 148 | [ -z "${version_}" ] && version_=`versions_shell_zsh "${shell_}"` 149 | ;; 150 | ash) version_=`versions_shell_ash "${shell_}"` ;; 151 | */bash) version_=`versions_shell_bash "${shell_}"` ;; 152 | */dash) 153 | # Assuming Ubuntu Linux until somebody comes up with a better test. The 154 | # following test will return an empty string if dash is not installed. 155 | version_=`versions_shell_dash` 156 | ;; 157 | */ksh) version_=`versions_shell_ksh "${shell_}"` ;; 158 | */pdksh) version_=`versions_shell_pdksh "${shell_}"` ;; 159 | */zsh) version_=`versions_shell_zsh "${shell_}"` ;; 160 | *) version_='invalid' 161 | esac 162 | 163 | echo "${version_:-unknown}" 164 | unset shell_ version_ 165 | } 166 | 167 | # The ash shell is included in BusyBox. 168 | versions_shell_ash() { 169 | busybox --help |head -1 |sed 's/BusyBox v\([0-9.]*\) .*/\1/' 170 | } 171 | 172 | versions_shell_bash() { 173 | $1 --version : 2>&1 |grep 'GNU bash' |sed 's/.*version \([^ ]*\).*/\1/' 174 | } 175 | 176 | versions_shell_dash() { 177 | eval dpkg >/dev/null 2>&1 178 | [ $? -eq 127 ] && return # Return if dpkg not found. 179 | 180 | dpkg -l |grep ' dash ' |awk '{print $3}' 181 | } 182 | 183 | versions_shell_ksh() { 184 | versions_shell_=$1 185 | versions_version_='' 186 | 187 | # Try a few different ways to figure out the version. 188 | versions_version_=`${versions_shell_} --version : 2>&1` 189 | # shellcheck disable=SC2181 190 | if [ $? -eq 0 ]; then 191 | versions_version_=`echo "${versions_version_}" \ 192 | |sed 's/.*\([0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]\).*/\1/'` 193 | else 194 | versions_version_='' 195 | fi 196 | if [ -z "${versions_version_}" ]; then 197 | _versions_have_strings 198 | versions_version_=`strings "${versions_shell_}" 2>&1 \ 199 | |grep Version \ 200 | |sed 's/^.*Version \(.*\)$/\1/;s/ s+ \$$//;s/ /-/g'` 201 | fi 202 | if [ -z "${versions_version_}" ]; then 203 | versions_version_=`versions_shell_pdksh "${versions_shell_}"` 204 | fi 205 | 206 | echo "${versions_version_}" 207 | unset versions_shell_ versions_version_ 208 | } 209 | 210 | versions_shell_pdksh() { 211 | _versions_have_strings 212 | strings "$1" 2>&1 \ 213 | |grep 'PD KSH' \ 214 | |sed -e 's/.*PD KSH \(.*\)/\1/;s/ /-/g' 215 | } 216 | 217 | versions_shell_xpg4() { 218 | _versions_have_strings 219 | strings "$1" 2>&1 \ 220 | |grep 'Version' \ 221 | |sed -e 's/^@(#)Version //' 222 | } 223 | 224 | versions_shell_zsh() { 225 | versions_shell_=$1 226 | 227 | # Try a few different ways to figure out the version. 228 | # shellcheck disable=SC2016 229 | versions_version_=`echo 'echo ${ZSH_VERSION}' |${versions_shell_}` 230 | if [ -z "${versions_version_}" ]; then 231 | versions_version_=`${versions_shell_} --version : 2>&1` 232 | # shellcheck disable=SC2181 233 | if [ $? -eq 0 ]; then 234 | versions_version_=`echo "${versions_version_}" |awk '{print $2}'` 235 | else 236 | versions_version_='' 237 | fi 238 | fi 239 | 240 | echo "${versions_version_}" 241 | unset versions_shell_ versions_version_ 242 | } 243 | 244 | # Determine if the 'strings' binary installed. 245 | _versions_have_strings() { 246 | [ ${__versions_haveStrings} -ne ${ERROR} ] && return 247 | if eval strings /dev/null >/dev/null 2>&1; then 248 | __versions_haveStrings=${TRUE} 249 | return 250 | fi 251 | 252 | echo 'WARN: strings not installed. try installing binutils?' >&2 253 | __versions_haveStrings=${FALSE} 254 | } 255 | 256 | versions_main() { 257 | # Treat unset variables as an error. 258 | set -u 259 | 260 | os_name=`versions_osName` 261 | os_version=`versions_osVersion` 262 | echo "os: ${os_name} version: ${os_version}" 263 | 264 | for shell in ${VERSIONS_SHELLS}; do 265 | shell_version=`versions_shellVersion "${shell}"` 266 | echo "shell: ${shell} version: ${shell_version}" 267 | done 268 | } 269 | 270 | if [ "${ARGV0}" = 'versions' ]; then 271 | versions_main "$@" 272 | fi 273 | -------------------------------------------------------------------------------- /dev/tests/shunit2/shunit2_misc_test.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # vim:et:ft=sh:sts=2:sw=2 3 | # 4 | # shUnit2 unit tests of miscellaneous things 5 | # 6 | # Copyright 2008-2018 Kate Ward. All Rights Reserved. 7 | # Released under the Apache 2.0 license. 8 | # 9 | # Author: kate.ward@forestent.com (Kate Ward) 10 | # https://github.com/kward/shunit2 11 | # 12 | ### ShellCheck http://www.shellcheck.net/ 13 | # $() are not fully portable (POSIX != portable). 14 | # shellcheck disable=SC2006 15 | # Disable source following. 16 | # shellcheck disable=SC1090,SC1091 17 | # Not wanting to escape single quotes. 18 | # shellcheck disable=SC1003 19 | 20 | # These variables will be overridden by the test helpers. 21 | stdoutF="${TMPDIR:-/tmp}/STDOUT" 22 | stderrF="${TMPDIR:-/tmp}/STDERR" 23 | 24 | # Load test helpers. 25 | . ./shunit2_test_helpers 26 | 27 | # Note: the test script is prefixed with '#' chars so that shUnit2 does not 28 | # incorrectly interpret the embedded functions as real functions. 29 | testUnboundVariable() { 30 | unittestF="${SHUNIT_TMPDIR}/unittest" 31 | sed 's/^#//' >"${unittestF}" <"${stdoutF}" 2>"${stderrF}" ) 45 | assertFalse 'expected a non-zero exit value' $? 46 | grep '^ASSERT:Unknown failure' "${stdoutF}" >/dev/null 47 | assertTrue 'assert message was not generated' $? 48 | grep '^Ran [0-9]* test' "${stdoutF}" >/dev/null 49 | assertTrue 'test count message was not generated' $? 50 | grep '^FAILED' "${stdoutF}" >/dev/null 51 | assertTrue 'failure message was not generated' $? 52 | } 53 | 54 | # assertEquals repeats message argument. 55 | # https://github.com/kward/shunit2/issues/7 56 | testIssue7() { 57 | # Disable coloring so 'ASSERT:' lines can be matched correctly. 58 | _shunit_configureColor 'none' 59 | 60 | ( assertEquals 'Some message.' 1 2 >"${stdoutF}" 2>"${stderrF}" ) 61 | diff "${stdoutF}" - >/dev/null < but was:<2> 63 | EOF 64 | rtrn=$? 65 | assertEquals "${SHUNIT_TRUE}" "${rtrn}" 66 | [ "${rtrn}" -eq "${SHUNIT_TRUE}" ] || cat "${stderrF}" >&2 67 | } 68 | 69 | # Support prefixes on test output. 70 | # https://github.com/kward/shunit2/issues/29 71 | testIssue29() { 72 | unittestF="${SHUNIT_TMPDIR}/unittest" 73 | sed 's/^#//' >"${unittestF}" <"${stdoutF}" 2>"${stderrF}" ) 81 | grep '^--- test_assert' "${stdoutF}" >/dev/null 82 | rtrn=$? 83 | assertEquals "${SHUNIT_TRUE}" "${rtrn}" 84 | [ "${rtrn}" -eq "${SHUNIT_TRUE}" ] || cat "${stdoutF}" >&2 85 | } 86 | 87 | # shUnit2 should not exit with 0 when it has syntax errors. 88 | # https://github.com/kward/shunit2/issues/69 89 | testIssue69() { 90 | unittestF="${SHUNIT_TMPDIR}/unittest" 91 | 92 | for t in Equals NotEquals Null NotNull Same NotSame True False; do 93 | assert="assert${t}" 94 | sed 's/^#//' >"${unittestF}" <"${stdoutF}" 2>"${stderrF}" ) 101 | grep '^FAILED' "${stdoutF}" >/dev/null 102 | assertTrue "failure message for ${assert} was not generated" $? 103 | done 104 | } 105 | 106 | # Ensure that test fails if setup/teardown functions fail. 107 | testIssue77() { 108 | unittestF="${SHUNIT_TMPDIR}/unittest" 109 | for func in oneTimeSetUp setUp tearDown oneTimeTearDown; do 110 | sed 's/^#//' >"${unittestF}" <"${stdoutF}" 2>"${stderrF}" ) 118 | grep '^FAILED' "${stdoutF}" >/dev/null 119 | assertTrue "failure of ${func}() did not end test" $? 120 | done 121 | } 122 | 123 | # Ensure a test failure is recorded for code containing syntax errors. 124 | # https://github.com/kward/shunit2/issues/84 125 | testIssue84() { 126 | unittestF="${SHUNIT_TMPDIR}/unittest" 127 | sed 's/^#//' >"${unittestF}" <<\EOF 128 | ## Function with syntax error. 129 | #syntax_error() { ${!#3442} -334 a$@2[1]; } 130 | #test_syntax_error() { 131 | # syntax_error 132 | # assertTrue ${SHUNIT_TRUE} 133 | #} 134 | #SHUNIT_COLOR='none' 135 | #SHUNIT_TEST_PREFIX='--- ' 136 | #. ${TH_SHUNIT} 137 | EOF 138 | ( exec "${SHUNIT_SHELL:-sh}" "${unittestF}" >"${stdoutF}" 2>"${stderrF}" ) 139 | grep '^FAILED' "${stdoutF}" >/dev/null 140 | assertTrue "failure message for ${assert} was not generated" $? 141 | } 142 | 143 | testPrepForSourcing() { 144 | assertEquals '/abc' "`_shunit_prepForSourcing '/abc'`" 145 | assertEquals './abc' "`_shunit_prepForSourcing './abc'`" 146 | assertEquals './abc' "`_shunit_prepForSourcing 'abc'`" 147 | } 148 | 149 | testEscapeCharInStr() { 150 | while read -r desc char str want; do 151 | got=`_shunit_escapeCharInStr "${char}" "${str}"` 152 | assertEquals "${desc}" "${want}" "${got}" 153 | done <<'EOF' 154 | backslash \ '' '' 155 | backslash_pre \ \def \\def 156 | backslash_mid \ abc\def abc\\def 157 | backslash_post \ abc\ abc\\ 158 | quote " '' '' 159 | quote_pre " "def \"def 160 | quote_mid " abc"def abc\"def 161 | quote_post " abc" abc\" 162 | string $ '' '' 163 | string_pre $ $def \$def 164 | string_mid $ abc$def abc\$def 165 | string_post $ abc$ abc\$ 166 | EOF 167 | 168 | # TODO(20170924:kward) fix or remove. 169 | # actual=`_shunit_escapeCharInStr "'" ''` 170 | # assertEquals '' "${actual}" 171 | # assertEquals "abc\\'" `_shunit_escapeCharInStr "'" "abc'"` 172 | # assertEquals "abc\\'def" `_shunit_escapeCharInStr "'" "abc'def"` 173 | # assertEquals "\\'def" `_shunit_escapeCharInStr "'" "'def"` 174 | 175 | # # Must put the backtick in a variable so the shell doesn't misinterpret it 176 | # # while inside a backticked sequence (e.g. `echo '`'` would fail). 177 | # backtick='`' 178 | # actual=`_shunit_escapeCharInStr ${backtick} ''` 179 | # assertEquals '' "${actual}" 180 | # assertEquals '\`abc' \ 181 | # `_shunit_escapeCharInStr "${backtick}" ${backtick}'abc'` 182 | # assertEquals 'abc\`' \ 183 | # `_shunit_escapeCharInStr "${backtick}" 'abc'${backtick}` 184 | # assertEquals 'abc\`def' \ 185 | # `_shunit_escapeCharInStr "${backtick}" 'abc'${backtick}'def'` 186 | } 187 | 188 | testEscapeCharInStr_specialChars() { 189 | # Make sure our forward slash doesn't upset sed. 190 | assertEquals '/' "`_shunit_escapeCharInStr '\' '/'`" 191 | 192 | # Some shells escape these differently. 193 | # TODO(20170924:kward) fix or remove. 194 | #assertEquals '\\a' `_shunit_escapeCharInStr '\' '\a'` 195 | #assertEquals '\\b' `_shunit_escapeCharInStr '\' '\b'` 196 | } 197 | 198 | # Test the various ways of declaring functions. 199 | # 200 | # Prefixing (then stripping) with comment symbol so these functions aren't 201 | # treated as real functions by shUnit2. 202 | testExtractTestFunctions() { 203 | f="${SHUNIT_TMPDIR}/extract_test_functions" 204 | sed 's/^#//' <"${f}" 205 | ## Function on a single line. 206 | #testABC() { echo 'ABC'; } 207 | ## Multi-line function with '{' on next line. 208 | #test_def() 209 | # { 210 | # echo 'def' 211 | #} 212 | ## Multi-line function with '{' on first line. 213 | #testG3 () { 214 | # echo 'G3' 215 | #} 216 | ## Function with numerical values in name. 217 | #function test4() { echo '4'; } 218 | ## Leading space in front of function. 219 | # test5() { echo '5'; } 220 | ## Function with '_' chars in name. 221 | #some_test_function() { echo 'some func'; } 222 | ## Function that sets variables. 223 | #func_with_test_vars() { 224 | # testVariable=1234 225 | #} 226 | EOF 227 | 228 | actual=`_shunit_extractTestFunctions "${f}"` 229 | assertEquals 'testABC test_def testG3 test4 test5' "${actual}" 230 | } 231 | 232 | # Test that certain external commands sometimes "stubbed" by users 233 | # are escaped. See Issue #54. 234 | testProtectedCommands() { 235 | for c in mkdir rm cat chmod; do 236 | grep "^[^#]*${c} " "${TH_SHUNIT}" | grep -qv "command ${c}" 237 | assertFalse "external call to ${c} not protected somewhere" $? 238 | done 239 | grep '^[^#]*[^ ] *\[' "${TH_SHUNIT}" | grep -qv 'command \[' 240 | assertFalse "call to [ ... ] not protected somewhere" $? 241 | grep '^[^#]* *\.' "${TH_SHUNIT}" | grep -qv 'command \.' 242 | assertFalse "call to . not protected somewhere" $? 243 | } 244 | 245 | setUp() { 246 | for f in "${stdoutF}" "${stderrF}"; do 247 | cp /dev/null "${f}" 248 | done 249 | 250 | # Reconfigure coloring as some tests override default behavior. 251 | _shunit_configureColor "${SHUNIT_COLOR_DEFAULT}" 252 | } 253 | 254 | oneTimeSetUp() { 255 | SHUNIT_COLOR_DEFAULT="${SHUNIT_COLOR}" 256 | th_oneTimeSetUp 257 | } 258 | 259 | # Load and run shUnit2. 260 | # shellcheck disable=SC2034 261 | [ -n "${ZSH_VERSION:-}" ] && SHUNIT_PARENT=$0 262 | . "${TH_SHUNIT}" 263 | -------------------------------------------------------------------------------- /dev/tests/shunit2/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dev/common_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ## Installer script suitable for osync / obackup / pmocr 4 | 5 | PROGRAM=[prgname] 6 | 7 | PROGRAM_VERSION=$(grep "PROGRAM_VERSION=" $PROGRAM.sh) 8 | PROGRAM_VERSION=${PROGRAM_VERSION#*=} 9 | PROGRAM_BINARY=$PROGRAM".sh" 10 | PROGRAM_BATCH=$PROGRAM"-batch.sh" 11 | SSH_FILTER="ssh_filter.sh" 12 | 13 | SCRIPT_BUILD=2020112901 14 | INSTANCE_ID="installer-$SCRIPT_BUILD" 15 | 16 | ## osync / obackup / pmocr / zsnap install script 17 | ## Tested on RHEL / CentOS 6 & 7, Fedora 23, Debian 7 & 8, Mint 17 and FreeBSD 8, 10 and 11 18 | ## Please adapt this to fit your distro needs 19 | 20 | include #### OFUNCTIONS MICRO SUBSET #### 21 | 22 | # Get current install.sh path from http://stackoverflow.com/a/246128/2635443 23 | SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 24 | 25 | _LOGGER_SILENT=false 26 | _STATS=1 27 | ACTION="install" 28 | FAKEROOT="" 29 | 30 | ## Default log file 31 | if [ -w "$FAKEROOT/var/log" ]; then 32 | LOG_FILE="$FAKEROOT/var/log/$PROGRAM-install.log" 33 | elif ([ "$HOME" != "" ] && [ -w "$HOME" ]); then 34 | LOG_FILE="$HOME/$PROGRAM-install.log" 35 | else 36 | LOG_FILE="./$PROGRAM-install.log" 37 | fi 38 | 39 | include #### UrlEncode SUBSET #### 40 | include #### GetLocalOS SUBSET #### 41 | include #### GetConfFileValue SUBSET #### 42 | include #### CleanUp SUBSET #### 43 | include #### GenericTrapQuit SUBSET #### 44 | 45 | function SetLocalOSSettings { 46 | USER=root 47 | DO_INIT=true 48 | 49 | # LOCAL_OS and LOCAL_OS_FULL are global variables set at GetLocalOS 50 | 51 | case $LOCAL_OS in 52 | *"BSD"*) 53 | GROUP=wheel 54 | ;; 55 | *"MacOSX"*) 56 | GROUP=admin 57 | DO_INIT=false 58 | ;; 59 | *"Cygwin"*|*"Android"*|*"msys"*|*"BusyBox"*) 60 | USER="" 61 | GROUP="" 62 | DO_INIT=false 63 | ;; 64 | *) 65 | GROUP=root 66 | ;; 67 | esac 68 | 69 | if [ "$LOCAL_OS" == "Android" ] || [ "$LOCAL_OS" == "BusyBox" ]; then 70 | Logger "Cannot be installed on [$LOCAL_OS]. Please use $PROGRAM.sh directly." "CRITICAL" 71 | exit 1 72 | fi 73 | 74 | if ([ "$USER" != "" ] && [ "$(whoami)" != "$USER" ] && [ "$FAKEROOT" == "" ]); then 75 | Logger "Must be run as $USER." "CRITICAL" 76 | exit 1 77 | fi 78 | 79 | OS=$(UrlEncode "$LOCAL_OS_FULL") 80 | } 81 | 82 | function GetInit { 83 | if [ -f /sbin/openrc-run ]; then 84 | init="openrc" 85 | Logger "Detected openrc." "NOTICE" 86 | elif [ -f /sbin/init ]; then 87 | if file /sbin/init | grep systemd > /dev/null; then 88 | init="systemd" 89 | Logger "Detected systemd." "NOTICE" 90 | else 91 | init="initV" 92 | Logger "Detected initV." "NOTICE" 93 | fi 94 | else 95 | Logger "Can't detect initV, systemd or openRC. Service files won't be installed. You can still run $PROGRAM manually or via cron." "WARN" 96 | init="none" 97 | fi 98 | } 99 | 100 | function CreateDir { 101 | local dir="${1}" 102 | local dirMask="${2}" 103 | local dirUser="${3}" 104 | local dirGroup="${4}" 105 | 106 | if [ ! -d "$dir" ]; then 107 | ( 108 | if [ $(IsInteger $dirMask) -eq 1 ]; then 109 | umask $dirMask 110 | fi 111 | mkdir -p "$dir" 112 | ) 113 | if [ $? == 0 ]; then 114 | Logger "Created directory [$dir]." "NOTICE" 115 | else 116 | Logger "Cannot create directory [$dir]." "CRITICAL" 117 | exit 1 118 | fi 119 | fi 120 | 121 | if [ "$dirUser" != "" ]; then 122 | userGroup="$dirUser" 123 | if [ "$dirGroup" != "" ]; then 124 | userGroup="$userGroup"":$dirGroup" 125 | fi 126 | chown "$userGroup" "$dir" 127 | if [ $? != 0 ]; then 128 | Logger "Could not set directory ownership on [$dir] to [$userGroup]." "CRITICAL" 129 | exit 1 130 | else 131 | Logger "Set file ownership on [$dir] to [$userGroup]." "NOTICE" 132 | fi 133 | fi 134 | } 135 | 136 | function CopyFile { 137 | local sourcePath="${1}" 138 | local destPath="${2}" 139 | local sourceFileName="${3}" 140 | local destFileName="${4}" 141 | local fileMod="${5}" 142 | local fileUser="${6}" 143 | local fileGroup="${7}" 144 | local overwrite="${8:-false}" 145 | 146 | local userGroup="" 147 | 148 | if [ "$destFileName" == "" ]; then 149 | destFileName="$sourceFileName" 150 | fi 151 | 152 | if [ -f "$destPath/$destFileName" ] && [ $overwrite == false ]; then 153 | destFileName="$sourceFileName.new" 154 | Logger "Copying [$sourceFileName] to [$destPath/$destFileName]." "NOTICE" 155 | fi 156 | 157 | cp "$sourcePath/$sourceFileName" "$destPath/$destFileName" 158 | if [ $? != 0 ]; then 159 | Logger "Cannot copy [$sourcePath/$sourceFileName] to [$destPath/$destFileName]. Make sure to run install script in the directory containing all other files." "CRITICAL" 160 | Logger "Also make sure you have permissions to write to [$BIN_DIR]." "ERROR" 161 | exit 1 162 | else 163 | Logger "Copied [$sourcePath/$sourceFileName] to [$destPath/$destFileName]." "NOTICE" 164 | if [ "$(IsInteger $fileMod)" -eq 1 ]; then 165 | chmod "$fileMod" "$destPath/$destFileName" 166 | if [ $? != 0 ]; then 167 | Logger "Cannot set file permissions of [$destPath/$destFileName] to [$fileMod]." "CRITICAL" 168 | exit 1 169 | else 170 | Logger "Set file permissions to [$fileMod] on [$destPath/$destFileName]." "NOTICE" 171 | fi 172 | elif [ "$fileMod" != "" ]; then 173 | Logger "Bogus filemod [$fileMod] for [$destPath] given." "WARN" 174 | fi 175 | 176 | if [ "$fileUser" != "" ]; then 177 | userGroup="$fileUser" 178 | 179 | if [ "$fileGroup" != "" ]; then 180 | userGroup="$userGroup"":$fileGroup" 181 | fi 182 | 183 | chown "$userGroup" "$destPath/$destFileName" 184 | if [ $? != 0 ]; then 185 | Logger "Could not set file ownership on [$destPath/$destFileName] to [$userGroup]." "CRITICAL" 186 | exit 1 187 | else 188 | Logger "Set file ownership on [$destPath/$destFileName] to [$userGroup]." "NOTICE" 189 | fi 190 | fi 191 | fi 192 | } 193 | 194 | function CopyExampleFiles { 195 | exampleFiles=() 196 | exampleFiles[0]="sync.conf.example" # osync 197 | exampleFiles[1]="host_backup.conf.example" # obackup 198 | exampleFiles[2]="exclude.list.example" # osync & obackup 199 | exampleFiles[3]="snapshot.conf.example" # zsnap 200 | exampleFiles[4]="default.conf" # pmocr 201 | 202 | for file in "${exampleFiles[@]}"; do 203 | if [ -f "$SCRIPT_PATH/$file" ]; then 204 | CopyFile "$SCRIPT_PATH" "$CONF_DIR" "$file" "$file" "" "" "" false 205 | fi 206 | done 207 | } 208 | 209 | function CopyProgram { 210 | binFiles=() 211 | binFiles[0]="$PROGRAM_BINARY" 212 | if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "obackup" ]; then 213 | binFiles[1]="$PROGRAM_BATCH" 214 | binFiles[2]="$SSH_FILTER" 215 | fi 216 | 217 | local user="" 218 | local group="" 219 | 220 | if ([ "$USER" != "" ] && [ "$FAKEROOT" == "" ]); then 221 | user="$USER" 222 | fi 223 | if ([ "$GROUP" != "" ] && [ "$FAKEROOT" == "" ]); then 224 | group="$GROUP" 225 | fi 226 | 227 | for file in "${binFiles[@]}"; do 228 | CopyFile "$SCRIPT_PATH" "$BIN_DIR" "$file" "$file" 755 "$user" "$group" true 229 | done 230 | } 231 | 232 | function CopyServiceFiles { 233 | if ([ "$init" == "systemd" ] && [ -f "$SCRIPT_PATH/$SERVICE_FILE_SYSTEMD_SYSTEM" ]); then 234 | CreateDir "$SERVICE_DIR_SYSTEMD_SYSTEM" 235 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_SYSTEM" "$SERVICE_FILE_SYSTEMD_SYSTEM" "$SERVICE_FILE_SYSTEMD_SYSTEM" "" "" "" true 236 | if [ -f "$SCRIPT_PATH/$SERVICE_FILE_SYSTEMD_USER" ]; then 237 | CreateDir "$SERVICE_DIR_SYSTEMD_USER" 238 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_USER" "$SERVICE_FILE_SYSTEMD_USER" "$SERVICE_FILE_SYSTEMD_USER" "" "" "" true 239 | fi 240 | 241 | if [ -f "$SCRIPT_PATH/$TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM" ]; then 242 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_SYSTEM" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM" "" "" "" true 243 | Logger "Created optional service [$TARGET_HELPER_SERVICE_NAME] with same specifications as below." "NOTICE" 244 | fi 245 | if [ -f "$SCRIPT_PATH/$TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER" ]; then 246 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_SYSTEMD_USER" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER" "$TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER" "" "" "" true 247 | fi 248 | 249 | 250 | Logger "Created [$SERVICE_NAME] service in [$SERVICE_DIR_SYSTEMD_SYSTEM] and [$SERVICE_DIR_SYSTEMD_USER]." "NOTICE" 251 | Logger "Can be activated with [systemctl start SERVICE_NAME@instance.conf] where instance.conf is the name of the config file in $CONF_DIR." "NOTICE" 252 | Logger "Can be enabled on boot with [systemctl enable $SERVICE_NAME@instance.conf]." "NOTICE" 253 | Logger "In userland, active with [systemctl --user start $SERVICE_NAME@instance.conf]." "NOTICE" 254 | elif ([ "$init" == "initV" ] && [ -f "$SCRIPT_PATH/$SERVICE_FILE_INIT" ] && [ -d "$SERVICE_DIR_INIT" ]); then 255 | #CreateDir "$SERVICE_DIR_INIT" 256 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_INIT" "$SERVICE_FILE_INIT" "$SERVICE_FILE_INIT" "755" "" "" true 257 | if [ -f "$SCRIPT_PATH/$TARGET_HELPER_SERVICE_FILE_INIT" ]; then 258 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_INIT" "$TARGET_HELPER_SERVICE_FILE_INIT" "$TARGET_HELPER_SERVICE_FILE_INIT" "755" "" "" true 259 | Logger "Created optional service [$TARGET_HELPER_SERVICE_NAME] with same specifications as below." "NOTICE" 260 | fi 261 | Logger "Created [$SERVICE_NAME] service in [$SERVICE_DIR_INIT]." "NOTICE" 262 | Logger "Can be activated with [service $SERVICE_FILE_INIT start]." "NOTICE" 263 | Logger "Can be enabled on boot with [chkconfig $SERVICE_FILE_INIT on]." "NOTICE" 264 | elif ([ "$init" == "openrc" ] && [ -f "$SCRIPT_PATH/$SERVICE_FILE_OPENRC" ] && [ -d "$SERVICE_DIR_OPENRC" ]); then 265 | # Rename service to usual service file 266 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_OPENRC" "$SERVICE_FILE_OPENRC" "$SERVICE_FILE_INIT" "755" "" "" true 267 | if [ -f "$SCRPT_PATH/$TARGET_HELPER_SERVICE_FILE_OPENRC" ]; then 268 | CopyFile "$SCRIPT_PATH" "$SERVICE_DIR_OPENRC" "$TARGET_HELPER_SERVICE_FILE_OPENRC" "$TARGET_HELPER_SERVICE_FILE_OPENRC" "755" "" "" true 269 | Logger "Created optional service [$TARGET_HELPER_SERVICE_NAME] with same specifications as below." "NOTICE" 270 | fi 271 | Logger "Created [$SERVICE_NAME] service in [$SERVICE_DIR_OPENRC]." "NOTICE" 272 | Logger "Can be activated with [rc-update add $SERVICE_NAME.instance] where instance is a configuration file found in /etc/osync." "NOTICE" 273 | else 274 | Logger "Cannot properly find how to deal with init on this system. Skipping service file installation." "NOTICE" 275 | fi 276 | } 277 | 278 | function Statistics { 279 | if type wget > /dev/null 2>&1; then 280 | wget -qO- "$STATS_LINK" > /dev/null 2>&1 281 | if [ $? == 0 ]; then 282 | return 0 283 | fi 284 | fi 285 | 286 | if type curl > /dev/null 2>&1; then 287 | curl "$STATS_LINK" -o /dev/null > /dev/null 2>&1 288 | if [ $? == 0 ]; then 289 | return 0 290 | fi 291 | fi 292 | 293 | Logger "Neiter wget nor curl could be used for. Cannot run statistics. Use the provided link please." "WARN" 294 | return 1 295 | } 296 | 297 | function RemoveFile { 298 | local file="${1}" 299 | 300 | if [ -f "$file" ]; then 301 | rm -f "$file" 302 | if [ $? != 0 ]; then 303 | Logger "Could not remove file [$file]." "ERROR" 304 | else 305 | Logger "Removed file [$file]." "NOTICE" 306 | fi 307 | else 308 | Logger "File [$file] not found. Skipping." "NOTICE" 309 | fi 310 | } 311 | 312 | function RemoveAll { 313 | RemoveFile "$BIN_DIR/$PROGRAM_BINARY" 314 | 315 | if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "obackup" ]; then 316 | RemoveFile "$BIN_DIR/$PROGRAM_BATCH" 317 | fi 318 | 319 | if [ ! -f "$BIN_DIR/osync.sh" ] && [ ! -f "$BIN_DIR/obackup.sh" ]; then # Check if any other program requiring ssh filter is present before removal 320 | RemoveFile "$BIN_DIR/$SSH_FILTER" 321 | else 322 | Logger "Skipping removal of [$BIN_DIR/$SSH_FILTER] because other programs present that need it." "NOTICE" 323 | fi 324 | RemoveFile "$SERVICE_DIR_SYSTEMD_SYSTEM/$SERVICE_FILE_SYSTEMD_SYSTEM" 325 | RemoveFile "$SERVICE_DIR_SYSTEMD_USER/$SERVICE_FILE_SYSTEMD_USER" 326 | RemoveFile "$SERVICE_DIR_INIT/$SERVICE_FILE_INIT" 327 | 328 | RemoveFile "$TARGET_HELPER_SERVICE_DIR_SYSTEMD_SYSTEM/$SERVICE_FILE_SYSTEMD_SYSTEM" 329 | RemoveFile "$TARGET_HELPER_SERVICE_DIR_SYSTEMD_USER/$SERVICE_FILE_SYSTEMD_USER" 330 | RemoveFile "$TARGET_HELPER_SERVICE_DIR_INIT/$SERVICE_FILE_INIT" 331 | 332 | Logger "Skipping configuration files in [$CONF_DIR]. You may remove this directory manually." "NOTICE" 333 | } 334 | 335 | function Usage { 336 | echo "Installs $PROGRAM into $BIN_DIR" 337 | echo "options:" 338 | echo "--silent Will log and bypass user interaction." 339 | echo "--no-stats Used with --silent in order to refuse sending anonymous install stats." 340 | echo "--remove Remove the program." 341 | echo "--prefix=/path Use prefix to install path." 342 | exit 127 343 | } 344 | 345 | ############################## Script entry point 346 | 347 | function GetCommandlineArguments { 348 | for i in "$@"; do 349 | case $i in 350 | --prefix=*) 351 | FAKEROOT="${i##*=}" 352 | ;; 353 | --silent) 354 | _LOGGER_SILENT=true 355 | ;; 356 | --no-stats) 357 | _STATS=0 358 | ;; 359 | --remove) 360 | ACTION="uninstall" 361 | ;; 362 | --help|-h|-?) 363 | Usage 364 | ;; 365 | *) 366 | Logger "Unknown option '$i'" "ERROR" 367 | Usage 368 | exit 369 | ;; 370 | esac 371 | done 372 | } 373 | 374 | GetCommandlineArguments "$@" 375 | 376 | CONF_DIR=$FAKEROOT/etc/$PROGRAM 377 | BIN_DIR="$FAKEROOT/usr/local/bin" 378 | SERVICE_DIR_INIT=$FAKEROOT/etc/init.d 379 | # Should be /usr/lib/systemd/system, but /lib/systemd/system exists on debian & rhel / fedora 380 | SERVICE_DIR_SYSTEMD_SYSTEM=$FAKEROOT/lib/systemd/system 381 | SERVICE_DIR_SYSTEMD_USER=$FAKEROOT/etc/systemd/user 382 | SERVICE_DIR_OPENRC=$FAKEROOT/etc/init.d 383 | 384 | if [ "$PROGRAM" == "osync" ]; then 385 | SERVICE_NAME="osync-srv" 386 | TARGET_HELPER_SERVICE_NAME="osync-target-helper-srv" 387 | 388 | TARGET_HELPER_SERVICE_FILE_INIT="$TARGET_HELPER_SERVICE_NAME" 389 | TARGET_HELPER_SERVICE_FILE_SYSTEMD_SYSTEM="$TARGET_HELPER_SERVICE_NAME@.service" 390 | TARGET_HELPER_SERVICE_FILE_SYSTEMD_USER="$TARGET_HELPER_SERVICE_NAME@.service.user" 391 | TARGET_HELPER_SERVICE_FILE_OPENRC="$TARGET_HELPER_SERVICE_NAME-openrc" 392 | elif [ "$PROGRAM" == "pmocr" ]; then 393 | SERVICE_NAME="pmocr-srv" 394 | fi 395 | 396 | SERVICE_FILE_INIT="$SERVICE_NAME" 397 | SERVICE_FILE_SYSTEMD_SYSTEM="$SERVICE_NAME@.service" 398 | SERVICE_FILE_SYSTEMD_USER="$SERVICE_NAME@.service.user" 399 | SERVICE_FILE_OPENRC="$SERVICE_NAME-openrc" 400 | 401 | ## Generic code 402 | 403 | trap GenericTrapQuit TERM EXIT HUP QUIT 404 | 405 | if [ ! -w "$(dirname $LOG_FILE)" ]; then 406 | echo "Cannot write to log [$(dirname $LOG_FILE)]." 407 | else 408 | Logger "Script begin, logging to [$LOG_FILE]." "DEBUG" 409 | fi 410 | 411 | # Set default umask 412 | umask 0022 413 | 414 | GetLocalOS 415 | SetLocalOSSettings 416 | # On Mac OS this always produces a warning which causes the installer to fail with exit code 2 417 | # Since we know it won't work anyway, and that's fine, just skip this step 418 | if $DO_INIT; then 419 | GetInit 420 | fi 421 | 422 | STATS_LINK="http://instcount.netpower.fr?program=$PROGRAM&version=$PROGRAM_VERSION&os=$OS&action=$ACTION" 423 | 424 | if [ "$ACTION" == "uninstall" ]; then 425 | RemoveAll 426 | Logger "$PROGRAM uninstalled." "NOTICE" 427 | else 428 | CreateDir "$CONF_DIR" 429 | CreateDir "$BIN_DIR" 430 | CopyExampleFiles 431 | CopyProgram 432 | if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "pmocr" ]; then 433 | CopyServiceFiles 434 | fi 435 | Logger "$PROGRAM installed. Use with $BIN_DIR/$PROGRAM_BINARY" "NOTICE" 436 | if [ "$PROGRAM" == "osync" ] || [ "$PROGRAM" == "obackup" ]; then 437 | echo "" 438 | Logger "If connecting remotely, consider setup ssh filter to enhance security." "NOTICE" 439 | echo "" 440 | fi 441 | fi 442 | 443 | if [ $_STATS -eq 1 ]; then 444 | if [ $_LOGGER_SILENT == true ]; then 445 | Statistics 446 | else 447 | Logger "In order to make usage statistics, the script would like to connect to $STATS_LINK" "NOTICE" 448 | read -r -p "No data except those in the url will be send. Allow [Y/n] " response 449 | case $response in 450 | [nN]) 451 | exit 452 | ;; 453 | *) 454 | Statistics 455 | exit $? 456 | ;; 457 | esac 458 | fi 459 | fi 460 | -------------------------------------------------------------------------------- /dev/tests/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # pmocr test suite 2018032501 4 | 5 | PMOCR_DIR="$(pwd)" 6 | PMOCR_DIR=${PMOCR_DIR%%/dev*} 7 | DEV_DIR="$PMOCR_DIR/dev" 8 | TESTS_DIR="$DEV_DIR/tests" 9 | SOURCE_DIR="$TESTS_DIR/source" 10 | CONF_DIR="$TESTS_DIR/conf" 11 | 12 | TMP_FILE="$DEV_DIR/tmp" 13 | 14 | BATCH_CONF="default.conf" 15 | SERVICE_CONF="service.conf" 16 | 17 | PMOCR_EXECUTABLE="pmocr.sh" 18 | PMOCR_DEV_EXECUTABLE="dev/n_pmocr.sh" 19 | 20 | PMOCR_TESTS_DIR="${HOME}/pmocr-tests" 21 | 22 | BATCH_DIR="batch" 23 | SERVICE_DIR="service" 24 | SUCCEED_DIR="succesful" 25 | FAILURE_DIR="failed" 26 | 27 | PDF_DIR="PDF" 28 | TXT_DIR="TEXT" 29 | CSV_DIR="CSV" 30 | 31 | SOURCE_FILE_1="lorem_tif.tif" 32 | SOURCE_FILE_2="lorem_png.png" 33 | SOURCE_FILE_3="lorem_pdf.pdf" 34 | SOURCE_FILE_4="lorem_searchable_pdf.pdf" 35 | 36 | # Force killing remaining services on aborted test runs 37 | 38 | #trap TrapQuit TERM EXIT HUP QUIT 39 | 40 | function TrapQuit { 41 | local result 42 | 43 | if [ -f "$SERVICE_MONITOR_FILE" ]; then 44 | rm -f "$SERVICE_MONITOR_FILE" 45 | fi 46 | 47 | CleanUp 48 | KillChilds $$ > /dev/null 2>&1 49 | result=$? 50 | if [ $result -eq 0 ]; then 51 | Logger "Service $PROGRAM stopped instance [$INSTANCE_ID] with pid [$$]." "NOTICE" 52 | else 53 | Logger "Service $PROGRAM couldn't properly stop instance [$INSTANCE_ID] with pid [$$]." "ERROR" 54 | fi 55 | exit $? 56 | } 57 | 58 | function PrepareLocalDirs () { 59 | # Remote dirs are the same as local dirs, so no problem here 60 | if [ -d "$PMOCR_TESTS_DIR" ]; then 61 | rm -rf "$PMOCR_TESTS_DIR" 62 | fi 63 | mkdir -p "$PMOCR_TESTS_DIR" 64 | mkdir "$PMOCR_TESTS_DIR/$BATCH_DIR" 65 | mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR" 66 | mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 67 | mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR" 68 | mkdir "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR" 69 | mkdir "$PMOCR_TESTS_DIR/$SUCCEED_DIR" 70 | mkdir "$PMOCR_TESTS_DIR/$FAILURE_DIR" 71 | } 72 | 73 | function CopyTestFiles () { 74 | cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$BATCH_DIR" 75 | cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$BATCH_DIR" 76 | cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$BATCH_DIR" 77 | cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$BATCH_DIR" 78 | 79 | cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 80 | cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 81 | cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 82 | cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 83 | 84 | cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR" 85 | cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR" 86 | cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR" 87 | cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR" 88 | 89 | cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR" 90 | cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR" 91 | cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR" 92 | cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR" 93 | } 94 | 95 | function oneTimeSetUp () { 96 | START_TIME=$SECONDS 97 | 98 | source "$DEV_DIR/ofunctions.sh" 99 | 100 | # set default umask 101 | umask 0022 102 | 103 | GetLocalOS 104 | 105 | echo "Running on $LOCAL_OS_FULL" 106 | 107 | echo "" 108 | echo -e "Running tesseract\n$(tesseract -v)" 109 | 110 | #TODO: Assuming that macos has the same syntax than bsd here 111 | if [ "$LOCAL_OS" == "msys" ] || [ "$LOCAL_OS" == "Cygwin" ]; then 112 | SUDO_CMD="" 113 | elif [ "$LOCAL_OS" == "BSD" ] || [ "$LOCAL_OS" == "MacOSX" ]; then 114 | SUDO_CMD="" 115 | else 116 | SUDO_CMD="sudo" 117 | fi 118 | 119 | # Getting tesseract version 120 | TESSERACT_VERSION=$(tesseract -v 2>&1 | head -n 1 | awk '{print $2}') 121 | echo "Running with tesseract $TESSERACT_VERSION" 122 | 123 | # Clean run and log files 124 | if [ -f /var/log/pmocr.log ]; then 125 | rm -f /var/log/pmocr.log 126 | fi 127 | 128 | rm -f /tmp/pmocr.* 129 | 130 | SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_SUCCESS" "" 131 | SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_FAILURE" "" 132 | } 133 | 134 | function oneTimeTearDown () { 135 | 136 | #TODO: uncomment this when dev is done 137 | #rm -rf "$PMOCR_TESTS_DIR" 138 | 139 | cd "$OSYNC_DIR" 140 | $SUDO_CMD ./install.sh --remove --no-stats 141 | assertEquals "Uninstall failed" "0" $? 142 | 143 | 144 | ELAPSED_TIME=$(($SECONDS - $START_TIME)) 145 | echo "It took $ELAPSED_TIME seconds to run these tests." 146 | } 147 | 148 | #function setUp () { 149 | #} 150 | 151 | # This test has to be done everytime in order for main executable to be fresh 152 | function test_Merge () { 153 | cd "$DEV_DIR" 154 | ./merge.sh pmocr 155 | assertEquals "Merging code" "0" $? 156 | 157 | cd "$PMOCR_DIR" 158 | $SUDO_CMD ./install.sh --no-stats 159 | assertEquals "Install failed" "0" $? 160 | 161 | # Overwrite standard config file with tesseract one 162 | #$SUDO_CMD cp -f "$CONF_DIR/$BATCH_CONF" /etc/default/default.conf 163 | } 164 | 165 | function test_batch () { 166 | local outputFile 167 | 168 | cd "$PMOCR_DIR" 169 | 170 | # Testing batch output for formats pdf, txt and csv 171 | # Don't test for pdf output if tesseract version is lower than 3.03 172 | if [ $(VerComp "$TESSERACT_VERSION" "3.03") -lt 2 ]; then 173 | batchParm=(-p -t -c) 174 | batchOutputFormat=(pdf txt csv) 175 | else 176 | batchParm=(-t -c) 177 | batchOutputFormat=(txt csv) 178 | fi 179 | 180 | for i in $(seq 0 $((${#batchParm[@]}-1))); do 181 | 182 | otherParm=(' ' -k -d --suffix=TESTSUFFIX --no-suffix --text=TESTTEXT) 183 | for parm in "${otherParm[@]}"; do 184 | 185 | PrepareLocalDirs 186 | CopyTestFiles 187 | 188 | echo "Running batch run with parameters ${batchParm[$i]} ${parm}" 189 | ./$PMOCR_EXECUTABLE --batch ${batchParm[$i]} ${parm} --config="$CONF_DIR/$BATCH_CONF" "$PMOCR_TESTS_DIR/$BATCH_DIR" 190 | assertEquals "Batch run with parameter ${batchParm[$i]} ${parm}" "0" $? 191 | 192 | 193 | # Standard run with default options 194 | if [ "$parm" == " " ]; then 195 | # Two transformed files should be present 196 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}*_OCR.${batchOutputFormat[$i]}" 197 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 198 | assertEquals "Missing batch output file [$outputFile]" "0" $? 199 | 200 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}*_OCR.${batchOutputFormat[$i]}" 201 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 202 | assertEquals "Missing batch output file [$outputFile]" "0" $? 203 | 204 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}*_OCR.${batchOutputFormat[$i]}" 205 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 206 | assertEquals "Missing batch output file [$outputFile]" "0" $? 207 | 208 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}*_OCR.${batchOutputFormat[$i]}" 209 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 210 | assertEquals "Missing batch output file [$outputFile]" "0" $? 211 | 212 | # Original files should be renamed with _OCR 213 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}_OCR.${SOURCE_FILE_1##*.}" 214 | [ -f "$outputFile" ] 215 | assertEquals "Missing batch output file [$outputFile]" "0" $? 216 | 217 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}_OCR.${SOURCE_FILE_2##*.}" 218 | [ -f "$outputFile" ] 219 | assertEquals "Missing batch output file [$outputFile]" "0" $? 220 | 221 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}_OCR.${SOURCE_FILE_3##*.}" 222 | [ -f "$outputFile" ] 223 | assertEquals "Missing batch output file [$outputFile]" "0" $? 224 | 225 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}_OCR.${SOURCE_FILE_3##*.}" 226 | [ -f "$outputFile" ] 227 | assertEquals "Missing batch output file [$outputFile]" "0" $? 228 | 229 | # Run with skip already searchable PDFs 230 | elif [ "$parm" == "-k" ]; then 231 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}_OCR.${SOURCE_FILE_1##*.}" 232 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 233 | assertEquals "Missing batch output file for searchable PDF test [$outputFile]" "0" $? 234 | 235 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}_OCR.${SOURCE_FILE_2##*.}" 236 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 237 | assertEquals "Missing batch output file for searchable PDF test [$outputFile]" "0" $? 238 | 239 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}_OCR.${SOURCE_FILE_3##*.}" 240 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 241 | assertEquals "Missing batch output file for searchable PDF test [$outputFile]" "0" $? 242 | 243 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}_OCR.${SOURCE_FILE_4##*.}" 244 | [ $(WildcardFileExists "$outputFile") -eq 0 ] 245 | assertEquals "Searchable PDF test file should not be present [$outputFile]" "0" $? 246 | 247 | # Run and delete originals on success 248 | elif [ "$parm" == "-d" ]; then 249 | [ ! -f "$SOURCE_FILE_1" ] 250 | assertEquals "Original file [$SOURCE_FILE_1] not deleted" "0" $? 251 | 252 | [ ! -f "$SOURCE_FILE_2" ] 253 | assertEquals "Original file [$SOURCE_FILE_2] not deleted" "0" $? 254 | 255 | [ ! -f "$SOURCE_FILE_3" ] 256 | assertEquals "Original file [$SOURCE_FILE_3] not deleted" "0" $? 257 | 258 | [ ! -f "$SOURCE_FILE_4" ] 259 | assertEquals "Original file [$SOURCE_FILE_4] not deleted" "0" $? 260 | 261 | # Replace _OCR with another suffix 262 | elif [ "$parm" == "--suffix=TESTSUFFIX" ]; then 263 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}*TESTSUFFIX.${SOURCE_FILE_1##*.}" 264 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 265 | assertEquals "Missing batch output file [$outputFile]" "0" $? 266 | 267 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}*TESTSUFFIX.${SOURCE_FILE_2##*.}" 268 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 269 | assertEquals "Missing batch output file [$outputFile]" "0" $? 270 | 271 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}*TESTSUFFIX.${SOURCE_FILE_3##*.}" 272 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 273 | assertEquals "Missing batch output file [$outputFile]" "0" $? 274 | 275 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}*TESTSUFFIX.${SOURCE_FILE_4##*.}" 276 | [ $(WildcardFileExists "$outputFile") -eq 1 ] 277 | assertEquals "Missing batch output file [$outputFile]" "0" $? 278 | 279 | # Remove suffixes 280 | elif [ "$parm" == "--no-suffix" ]; then 281 | find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_1%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}" 282 | assertEquals "Bogus batch output file without suffix" "0" $? 283 | 284 | find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_2%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}" 285 | assertEquals "Bogus batch output file without suffix" "0" $? 286 | 287 | find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_3%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}" 288 | assertEquals "Bogus batch output file without suffix" "0" $? 289 | 290 | find "$PMOCR_TESTS_DIR/$BATCH_DIR" | egrep "${SOURCE_FILE_4%%.*}\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.${batchOutputFormat[$i]}" 291 | assertEquals "Bogus batch output file without suffix" "0" $? 292 | 293 | # Add another text 294 | elif [ "$parm" == "--text=TESTTEXT" ]; then 295 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_1%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}" 296 | [ -f "$outputFile" ] 297 | assertEquals "Missing batch output file [$outputFile]" "0" $? 298 | 299 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_2%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}" 300 | [ -f "$outputFile" ] 301 | assertEquals "Missing batch output file [$outputFile]" "0" $? 302 | 303 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_3%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}" 304 | [ -f "$outputFile" ] 305 | assertEquals "Missing batch output file [$outputFile]" "0" $? 306 | 307 | outputFile="$PMOCR_TESTS_DIR/$BATCH_DIR/${SOURCE_FILE_4%%.*}TESTTEXT_OCR.${batchOutputFormat[$i]}" 308 | [ -f "$outputFile" ] 309 | assertEquals "Missing batch output file [$outputFile]" "0" $? 310 | 311 | fi 312 | done 313 | done 314 | } 315 | 316 | function test_StandardService () { 317 | local pid 318 | local numberFiles 319 | 320 | cd "$PMOCR_DIR" 321 | 322 | PrepareLocalDirs 323 | CopyTestFiles 324 | 325 | ./$PMOCR_EXECUTABLE --service --config="$CONF_DIR/$SERVICE_CONF" & 326 | pid=$! 327 | 328 | 329 | if [ ! $pid -ne 0 ]; then 330 | assertEquals "Instance not launched, pid [$pid]" "1" $? 331 | else 332 | echo "Launched instance with pid [$pid]." 333 | fi 334 | 335 | # Trivial wait time for pmocr to process files 336 | sleep 60 337 | 338 | # Don't test PDF output on tesseract <= 3.02 339 | if [ $(VerComp "$TESSERACT_VERSION" "3.03") -ne 2 ]; then 340 | numberFiles=$(find "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" -type f | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\_OCR.pdf" | wc -l) 341 | [ $numberFiles -eq 3 ] 342 | assertEquals "Service run pdf transformed files found number invalid [$numberFiles]" "0" $? 343 | fi 344 | 345 | numberFiles=$(find "$PMOCR_TESTS_DIR/$SERVICE_DIR/$TXT_DIR" -type f | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\_OCR.txt" | wc -l) 346 | [ $numberFiles -eq 3 ] 347 | assertEquals "Service run txt transformed files found number invalid [$numberFiles]" "0" $? 348 | 349 | numberFiles=$(find "$PMOCR_TESTS_DIR/$SERVICE_DIR/$CSV_DIR" -type f | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\_OCR.csv" | wc -l) 350 | [ $numberFiles -eq 3 ] 351 | assertEquals "Service run csv transformed files found number invalid [$numberFiles]" "0" $? 352 | 353 | kill -TERM $pid && sleep 5 354 | KillChilds $pid 355 | } 356 | 357 | function test_MovedFilesService () { 358 | local pid 359 | local numberFiles 360 | 361 | SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_SUCCESS" "$PMOCR_TESTS_DIR/$SUCCEED_DIR" 362 | SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_FAILURE" "$PMOCR_TESTS_DIR/$FAILURE_DIR" 363 | 364 | cd "$PMOCR_DIR" 365 | 366 | PrepareLocalDirs 367 | CopyTestFiles 368 | 369 | ./$PMOCR_EXECUTABLE --service --config="$CONF_DIR/$SERVICE_CONF" & 370 | pid=$! 371 | 372 | if [ ! $pid -ne 0 ]; then 373 | assertEquals "Instance not launched, pid [$pid]" "1" $? 374 | else 375 | echo "Launched instance with pid [$pid]." 376 | fi 377 | 378 | # Trivial wait time for pmocr to process files 379 | sleep 90 380 | 381 | # Don't test PDF output on tesseract <= 3.02 382 | if [ $(VerComp "$TESSERACT_VERSION" "3.03") -ne 2 ]; then 383 | numberFiles=$(find "$PMOCR_TESTS_DIR/$SUCCEED_DIR" -type f | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.pdf" | wc -l) 384 | [ $numberFiles -eq 3 ] 385 | assertEquals "Service run pdf transformed files found number invalid [$numberFiles]" "0" $? 386 | fi 387 | 388 | numberFiles=$(find "$PMOCR_TESTS_DIR/$SUCCEED_DIR" -type f | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.png" | wc -l) 389 | [ $numberFiles -eq 3 ] 390 | assertEquals "Service run txt transformed files found number invalid [$numberFiles]" "0" $? 391 | 392 | numberFiles=$(find "$PMOCR_TESTS_DIR/$SUCCEED_DIR" -type f | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.tif" | wc -l) 393 | [ $numberFiles -eq 3 ] 394 | assertEquals "Service run csv transformed files found number invalid [$numberFiles]" "0" $? 395 | 396 | 397 | kill -TERM $pid && sleep 5 398 | KillChilds $pid 399 | 400 | PrepareLocalDirs 401 | ./$PMOCR_EXECUTABLE --service --config="$CONF_DIR/$SERVICE_CONF" & 402 | pid=$! 403 | 404 | [ ! $pid -ne 0 ] 405 | assertEquals "Instance not launched, pid [$pid]" "1" $? 406 | 407 | # Make sure next transformations will fail in order to move originals to failed dir 408 | sleep 2 409 | OCR_ENGINE_EXEC=$(GetConfFileValue "$CONF_DIR/$SERVICE_CONF" "TESSERACT_OCR_ENGINE_EXEC") 410 | $SUDO_CMD mv $OCR_ENGINE_EXEC $OCR_ENGINE_EXEC"-alt" 411 | 412 | #CopyTestFiles 413 | # Only copy PDF files in order to not have doubles 414 | cp "$SOURCE_DIR/$SOURCE_FILE_1" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 415 | cp "$SOURCE_DIR/$SOURCE_FILE_2" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 416 | cp "$SOURCE_DIR/$SOURCE_FILE_3" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 417 | cp "$SOURCE_DIR/$SOURCE_FILE_4" "$PMOCR_TESTS_DIR/$SERVICE_DIR/$PDF_DIR" 418 | 419 | 420 | 421 | # Trivial wait time for pmocr to process files 422 | sleep 60 423 | 424 | # Test for failed files presence (3 files only) 425 | numberFiles=$(find "$PMOCR_TESTS_DIR/$FAILURE_DIR" -type f | egrep "*\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}-[0-9]{2}-[0-9]{2}Z\.(pdf|tif|png)" | wc -l) 426 | [ $numberFiles -eq 3 ] 427 | assertEquals "Service run pdf transformed files found number invalid [$numberFiles]" "0" $? 428 | 429 | # Rename OCR engine to make it great again 430 | $SUDO_CMD mv $OCR_ENGINE_EXEC"-alt" $OCR_ENGINE_EXEC 431 | 432 | kill -TERM $pid && sleep 5 433 | KillChilds $pid 434 | 435 | SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_SUCCESS" "" 436 | SetConfFileValue "$CONF_DIR/$SERVICE_CONF" "MOVE_ORIGINAL_ON_FAILURE" "" 437 | } 438 | 439 | function test_WaitForTaskCompletion () { 440 | local pids 441 | 442 | # Tests if wait for task completion works correctly with ofunctions v2 443 | 444 | # Standard wait 445 | sleep 1 & 446 | pids="$!" 447 | sleep 2 & 448 | pids="$pids;$!" 449 | WaitForTaskCompletion $pids 0 0 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]} 450 | assertEquals "WaitForTaskCompletion test 1" "0" $? 451 | 452 | # Standard wait with warning 453 | sleep 2 & 454 | pids="$!" 455 | sleep 5 & 456 | pids="$pids;$!" 457 | 458 | WaitForTaskCompletion $pids 3 0 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]} 459 | assertEquals "WaitForTaskCompletion test 2" "0" $? 460 | 461 | # Both pids are killed 462 | sleep 5 & 463 | pids="$!" 464 | sleep 5 & 465 | pids="$pids;$!" 466 | 467 | WaitForTaskCompletion $pids 0 2 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]} 468 | assertEquals "WaitForTaskCompletion test 3" "2" $? 469 | 470 | # One of two pids are killed 471 | sleep 2 & 472 | pids="$!" 473 | sleep 10 & 474 | pids="$pids;$!" 475 | 476 | WaitForTaskCompletion $pids 0 3 $SLEEP_TIME $KEEP_LOGGING true true false ${FUNCNAME[0]} 477 | assertEquals "WaitForTaskCompletion test 4" "1" $? 478 | 479 | # Count since script begin, the following should output two warnings and both pids should get killed 480 | sleep 20 & 481 | pids="$!" 482 | sleep 20 & 483 | pids="$pids;$!" 484 | 485 | WaitForTaskCompletion $pids 3 5 $SLEEP_TIME $KEEP_LOGGING false true false ${FUNCNAME[0]} 486 | assertEquals "WaitForTaskCompletion test 5" "2" $? 487 | } 488 | 489 | function test_ParallelExec () { 490 | # work with ofunction v2 491 | 492 | # Test if parallelExec works correctly in array mode 493 | 494 | cmd="sleep 2;sleep 2;sleep 2;sleep 2" 495 | ParallelExec 4 "$cmd" 496 | assertEquals "ParallelExec test 1" "0" $? 497 | 498 | cmd="sleep 2;du /none;sleep 2" 499 | ParallelExec 2 "$cmd" 500 | assertEquals "ParallelExec test 2" "1" $? 501 | 502 | cmd="sleep 4;du /none;sleep 3;du /none;sleep 2" 503 | ParallelExec 3 "$cmd" 504 | assertEquals "ParallelExec test 3" "2" $? 505 | 506 | # Test if parallelExec works correctly in file mode 507 | 508 | echo "sleep 2" > "$TMP_FILE" 509 | echo "sleep 2" >> "$TMP_FILE" 510 | echo "sleep 2" >> "$TMP_FILE" 511 | echo "sleep 2" >> "$TMP_FILE" 512 | ParallelExec 4 "$TMP_FILE" true 513 | assertEquals "ParallelExec test 4" "0" $? 514 | 515 | echo "sleep 2" > "$TMP_FILE" 516 | echo "du /nome" >> "$TMP_FILE" 517 | echo "sleep 2" >> "$TMP_FILE" 518 | ParallelExec 2 "$TMP_FILE" true 519 | assertEquals "ParallelExec test 5" "1" $? 520 | 521 | echo "sleep 4" > "$TMP_FILE" 522 | echo "du /none" >> "$TMP_FILE" 523 | echo "sleep 3" >> "$TMP_FILE" 524 | echo "du /none" >> "$TMP_FILE" 525 | echo "sleep 2" >> "$TMP_FILE" 526 | ParallelExec 3 "$TMP_FILE" true 527 | assertEquals "ParallelExec test 6" "2" $? 528 | 529 | #function ParallelExec $numberOfProcesses $commandsArg $readFromFile $softTime $HardTime $sleepTime $keepLogging $counting $Spinner $noError $callerName 530 | # Test if parallelExec works correctly in array mode with full time control 531 | 532 | cmd="sleep 5;sleep 5;sleep 5;sleep 5;sleep 5" 533 | ParallelExec 4 "$cmd" false 1 0 .05 3600 true true false ${FUNCNAME[0]} 534 | assertEquals "ParallelExec full test 1" "0" $? 535 | 536 | cmd="sleep 2;du /none;sleep 2;sleep 2;sleep 4" 537 | ParallelExec 2 "$cmd" false 0 0 .1 2 true false false ${FUNCNAME[0]} 538 | assertEquals "ParallelExec full test 2" "1" $? 539 | 540 | cmd="sleep 4;du /none;sleep 3;du /none;sleep 2" 541 | ParallelExec 3 "$cmd" false 1 2 .05 7000 true true false ${FUNCNAME[0]} 542 | assertNotEquals "ParallelExec full test 3" "0" $? 543 | 544 | } 545 | 546 | #function test_outputLogs () { 547 | # echo "" 548 | # echo "Log output:" 549 | # echo "" 550 | # cat ${HOME}/pmocr.log 551 | #} 552 | 553 | . "$TESTS_DIR/shunit2/shunit2" 554 | -------------------------------------------------------------------------------- /dev/tests/shunit2/README.md: -------------------------------------------------------------------------------- 1 | # shUnit2 2 | 3 | shUnit2 is a [xUnit](http://en.wikipedia.org/wiki/XUnit) unit test framework for Bourne based shell scripts, and it is designed to work in a similar manner to [JUnit](http://www.junit.org), [PyUnit](http://pyunit.sourceforge.net), etc.. If you have ever had the desire to write a unit test for a shell script, shUnit2 can do the job. 4 | 5 | [![Travis CI](https://img.shields.io/travis/kward/shunit2.svg)](https://travis-ci.org/kward/shunit2) 6 | 7 | ## Table of Contents 8 | * [Introduction](#introduction) 9 | * [Credits / Contributors](#credits-contributors) 10 | * [Feedback](#feedback) 11 | * [Quickstart](#quickstart) 12 | * [Function Reference](#function-reference) 13 | * [General Info](#general-info) 14 | * [Asserts](#asserts) 15 | * [Failures](#failures) 16 | * [Setup/Teardown](#setup-teardown) 17 | * [Skipping](#skipping) 18 | * [Suites](#suites) 19 | * [Advanced Usage](#advanced-usage) 20 | * [Some constants you can use](#some-constants-you-can-use) 21 | * [Error Handling](#error-handling) 22 | * [Including Line Numbers in Asserts (Macros)](#including-line-numbers-in-asserts-macros) 23 | * [Test Skipping](#test-skipping) 24 | * [Appendix](#appendix) 25 | * [Getting help](#getting-help) 26 | * [Zsh](#zsh) 27 | 28 | --- 29 | ## Introduction 30 | shUnit2 was originally developed to provide a consistent testing solution for [log4sh][log4sh], a shell based logging framework similar to [log4j](http://logging.apache.org). During the development of that product, a repeated problem of having things work just fine under one shell (`/bin/bash` on Linux to be specific), and then not working under another shell (`/bin/sh` on Solaris) kept coming up. Although several simple tests were run, they were not adequate and did not catch some corner cases. The decision was finally made to write a proper unit test framework after multiple brown-bag releases were made. _Research was done to look for an existing product that met the testing requirements, but no adequate product was found._ 31 | 32 | Tested Operating Systems (varies over time) 33 | 34 | * Cygwin 35 | * FreeBSD (user supported) 36 | * Linux (Gentoo, Ubuntu) 37 | * Mac OS X 38 | * Solaris 8, 9, 10 (inc. OpenSolaris) 39 | 40 | Tested Shells 41 | 42 | * Bourne Shell (__sh__) 43 | * BASH - GNU Bourne Again SHell (__bash__) 44 | * DASH (__dash__) 45 | * Korn Shell (__ksh__) 46 | * pdksh - Public Domain Korn Shell (__pdksh__) 47 | * zsh - Zsh (__zsh__) (since 2.1.2) _please see the Zsh shell errata for more information_ 48 | 49 | See the appropriate Release Notes for this release (`doc/RELEASE_NOTES-X.X.X.txt`) for the list of actual versions tested. 50 | 51 | ### Credits / Contributors 52 | A list of contributors to shUnit2 can be found in `doc/contributors.md`. Many thanks go out to all those who have contributed to make this a better tool. 53 | 54 | shUnit2 is the original product of many hours of work by Kate Ward, the primary author of the code. For related software, check out https://github.com/kward. 55 | 56 | ### Feedback 57 | Feedback is most certainly welcome for this document. Send your additions, comments and criticisms to the shunit2-users@google.com mailing list. 58 | 59 | --- 60 | 61 | ## Quickstart 62 | This section will give a very quick start to running unit tests with shUnit2. More information is located in later sections. 63 | 64 | Here is a quick sample script to show how easy it is to write a unit test in shell. _Note: the script as it stands expects that you are running it from the "examples" directory._ 65 | 66 | ```sh 67 | #! /bin/sh 68 | # file: examples/equality_test.sh 69 | 70 | testEquality() { 71 | assertEquals 1 1 72 | } 73 | 74 | # Load shUnit2. 75 | . ./shunit2 76 | ``` 77 | 78 | Running the unit test should give results similar to the following. 79 | 80 | ```console 81 | $ cd examples 82 | $ ./equality_test.sh 83 | testEquality 84 | 85 | Ran 1 test. 86 | 87 | OK 88 | ``` 89 | 90 | W00t! You've just run your first successful unit test. So, what just happened? Quite a bit really, and it all happened simply by sourcing the `shunit2` library. The basic functionality for the script above goes like this: 91 | 92 | * When shUnit2 is sourced, it will walk through any functions defined whose name starts with the string `test`, and add those to an internal list of tests to execute. Once a list of test functions to be run has been determined, shunit2 will go to work. 93 | * Before any tests are executed, shUnit2 again looks for a function, this time one named `oneTimeSetUp()`. If it exists, it will be run. This function is normally used to setup the environment for all tests to be run. Things like creating directories for output or setting environment variables are good to place here. Just so you know, you can also declare a corresponding function named `oneTimeTearDown()` function that does the same thing, but once all the tests have been completed. It is good for removing temporary directories, etc. 94 | * shUnit2 is now ready to run tests. Before doing so though, it again looks for another function that might be declared, one named `setUp()`. If the function exists, it will be run before each test. It is good for resetting the environment so that each test starts with a clean slate. **At this stage, the first test is finally run.** The success of the test is recorded for a report that will be generated later. After the test is run, shUnit2 looks for a final function that might be declared, one named `tearDown()`. If it exists, it will be run after each test. It is a good place for cleaning up after each test, maybe doing things like removing files that were created, or removing directories. This set of steps, `setUp() > test() > tearDown()`, is repeated for all of the available tests. 95 | * Once all the work is done, shUnit2 will generate the nice report you saw above. A summary of all the successes and failures will be given so that you know how well your code is doing. 96 | 97 | We should now try adding a test that fails. Change your unit test to look like this. 98 | 99 | ```sh 100 | #! /bin/sh 101 | # file: examples/party_test.sh 102 | 103 | testEquality() { 104 | assertEquals 1 1 105 | } 106 | 107 | testPartyLikeItIs1999() { 108 | year=`date '+%Y'` 109 | assertEquals "It's not 1999 :-(" '1999' "${year}" 110 | } 111 | 112 | # Load shUnit2. 113 | . ./shunit2 114 | ``` 115 | 116 | So, what did you get? I guess it told you that this isn't 1999. Bummer, eh? Hopefully, you noticed a couple of things that were different about the second test. First, we added an optional message that the user will see if the assert fails. Second, we did comparisons of strings instead of integers as in the first test. It doesn't matter whether you are testing for equality of strings or integers. Both work equally well with shUnit2. 117 | 118 | Hopefully, this is enough to get you started with unit testing. If you want a ton more examples, take a look at the tests provided with [log4sh][log4sh] or [shFlags][shflags]. Both provide excellent examples of more advanced usage. shUnit2 was after all written to meet the unit testing need that [log4sh][log4sh] had. 119 | 120 | --- 121 | 122 | ## Function Reference 123 | 124 | ### General Info 125 | 126 | Any string values passed should be properly quoted -- they should must be surrounded by single-quote (`'`) or double-quote (`"`) characters -- so that the shell will properly parse them. 127 | 128 | ### Asserts 129 | 130 | `assertEquals [message] expected actual` 131 | 132 | Asserts that _expected_ and _actual_ are equal to one another. The _expected_ and _actual_ values can be either strings or integer values as both will be treated as strings. The _message_ is optional, and must be quoted. 133 | 134 | `assertNotEquals [message] unexpected actual` 135 | 136 | Asserts that _unexpected_ and _actual_ are not equal to one another. The _unexpected_ and _actual_ values can be either strings or integer values as both will be treaded as strings. The _message_ is optional, and must be quoted. 137 | 138 | `assertSame [message] expected actual` 139 | 140 | This function is functionally equivalent to `assertEquals`. 141 | 142 | `assertNotSame [message] unexpected actual` 143 | 144 | This function is functionally equivalent to `assertNotEquals`. 145 | 146 | `assertNull [message] value` 147 | 148 | Asserts that _value_ is _null_, or in shell terms, a zero-length string. The _value_ must be a string as an integer value does not translate into a zero-length string. The _message_ is optional, and must be quoted. 149 | 150 | `assertNotNull [message] value` 151 | 152 | Asserts that _value_ is _not null_, or in shell terms, a non-empty string. The _value_ may be a string or an integer as the later will be parsed as a non-empty string value. The _message_ is optional, and must be quoted. 153 | 154 | `assertTrue [message] condition` 155 | 156 | Asserts that a given shell test _condition_ is _true_. The condition can be as simple as a shell _true_ value (the value `0` -- equivalent to `${SHUNIT_TRUE}`), or a more sophisticated shell conditional expression. The _message_ is optional, and must be quoted. 157 | 158 | A sophisticated shell conditional expression is equivalent to what the __if__ or __while__ shell built-ins would use (more specifically, what the __test__ command would use). Testing for example whether some value is greater than another value can be done this way. 159 | 160 | `assertTrue "[ 34 -gt 23 ]"` 161 | 162 | Testing for the ability to read a file can also be done. This particular test will fail. 163 | 164 | `assertTrue 'test failed' "[ -r /some/non-existant/file' ]"` 165 | 166 | As the expressions are standard shell __test__ expressions, it is possible to string multiple expressions together with `-a` and `-o` in the standard fashion. This test will succeed as the entire expression evaluates to _true_. 167 | 168 | `assertTrue 'test failed' '[ 1 -eq 1 -a 2 -eq 2 ]'` 169 | 170 | _One word of warning: be very careful with your quoting as shell is not the most forgiving of bad quoting, and things will fail in strange ways._ 171 | 172 | `assertFalse [message] condition` 173 | 174 | Asserts that a given shell test _condition_ is _false_. The condition can be as simple as a shell _false_ value (the value `1` -- equivalent to `${SHUNIT_FALSE}`), or a more sophisticated shell conditional expression. The _message_ is optional, and must be quoted. 175 | 176 | _For examples of more sophisticated expressions, see `assertTrue`._ 177 | 178 | ### Failures 179 | 180 | Just to clarify, failures __do not__ test the various arguments against one another. Failures simply fail, optionally with a message, and that is all they do. If you need to test arguments against one another, use asserts. 181 | 182 | If all failures do is fail, why might one use them? There are times when you may have some very complicated logic that you need to test, and the simple asserts provided are simply not adequate. You can do your own validation of the code, use an `assertTrue ${SHUNIT_TRUE}` if your own tests succeeded, and use a failure to record a failure. 183 | 184 | `fail [message]` 185 | 186 | Fails the test immediately. The _message_ is optional, and must be quoted. 187 | 188 | `failNotEquals [message] unexpected actual` 189 | 190 | Fails the test immediately, reporting that the _unexpected_ and _actual_ values are not equal to one another. The _message_ is optional, and must be quoted. 191 | 192 | _Note: no actual comparison of unexpected and actual is done._ 193 | 194 | `failSame [message] expected actual` 195 | 196 | Fails the test immediately, reporting that the _expected_ and _actual_ values are the same. The _message_ is optional, and must be quoted. 197 | 198 | _Note: no actual comparison of expected and actual is done._ 199 | 200 | `failNotSame [message] expected actual` 201 | 202 | Fails the test immediately, reporting that the _expected_ and _actual_ values are not the same. The _message_ is optional, and must be quoted. 203 | 204 | _Note: no actual comparison of expected and actual is done._ 205 | 206 | ### Setup/Teardown 207 | 208 | `oneTimeSetUp` 209 | 210 | This function can be be optionally overridden by the user in their test suite. 211 | 212 | If this function exists, it will be called once before any tests are run. It is useful to prepare a common environment for all tests. 213 | 214 | `oneTimeTearDown` 215 | 216 | This function can be be optionally overridden by the user in their test suite. 217 | 218 | If this function exists, it will be called once after all tests are completed. It is useful to clean up the environment after all tests. 219 | 220 | `setUp` 221 | 222 | This function can be be optionally overridden by the user in their test suite. 223 | 224 | If this function exists, it will be called before each test is run. It is useful to reset the environment before each test. 225 | 226 | `tearDown` 227 | 228 | This function can be be optionally overridden by the user in their test suite. 229 | 230 | If this function exists, it will be called after each test completes. It is useful to clean up the environment after each test. 231 | 232 | ### Skipping 233 | 234 | `startSkipping` 235 | 236 | This function forces the remaining _assert_ and _fail_ functions to be "skipped", i.e. they will have no effect. Each function skipped will be recorded so that the total of asserts and fails will not be altered. 237 | 238 | `endSkipping` 239 | 240 | This function returns calls to the _assert_ and _fail_ functions to their default behavior, i.e. they will be called. 241 | 242 | `isSkipping` 243 | 244 | This function returns the current state of skipping. It can be compared against `${SHUNIT_TRUE}` or `${SHUNIT_FALSE}` if desired. 245 | 246 | ### Suites 247 | 248 | The default behavior of shUnit2 is that all tests will be found dynamically. If you have a specific set of tests you want to run, or you don't want to use the standard naming scheme of prefixing your tests with `test`, these functions are for you. Most users will never use them though. 249 | 250 | `suite` 251 | 252 | This function can be optionally overridden by the user in their test suite. 253 | 254 | If this function exists, it will be called when `shunit2` is sourced. If it does not exist, shUnit2 will search the parent script for all functions beginning with the word `test`, and they will be added dynamically to the test suite. 255 | 256 | `suite_addTest name` 257 | 258 | This function adds a function named _name_ to the list of tests scheduled for execution as part of this test suite. This function should only be called from within the `suite()` function. 259 | 260 | --- 261 | 262 | ## Advanced Usage 263 | 264 | ### Some constants you can use 265 | 266 | There are several constants provided by shUnit2 as variables that might be of use to you. 267 | 268 | *Predefined* 269 | 270 | | Constant | Value | 271 | | --------------- | ----- | 272 | | SHUNIT\_TRUE | Standard shell `true` value (the integer value 0). | 273 | | SHUNIT\_FALSE | Standard shell `false` value (the integer value 1). | 274 | | SHUNIT\_ERROR | The integer value 2. | 275 | | SHUNIT\_TMPDIR | Path to temporary directory that will be automatically cleaned up upon exit of shUnit2. | 276 | | SHUNIT\_VERSION | The version of shUnit2 you are running. | 277 | 278 | *User defined* 279 | 280 | | Constant | Value | 281 | | ----------------- | ----- | 282 | | SHUNIT\_CMD\_EXPR | Override which `expr` command is used. By default `expr` is used, except on BSD systems where `gexpr` is used. | 283 | | SHUNIT\_COLOR | Enable colorized output. Options are 'auto', 'always', or 'never', with 'auto' being the default. | 284 | | SHUNIT\_PARENT | The filename of the shell script containing the tests. This is needed specifically for Zsh support. | 285 | | SHUNIT\_TEST\_PREFIX | Define this variable to add a prefix in front of each test name that is output in the test report. | 286 | 287 | ### Error handling 288 | 289 | The constants values `SHUNIT_TRUE`, `SHUNIT_FALSE`, and `SHUNIT_ERROR` are returned from nearly every function to indicate the success or failure of the function. Additionally the variable `flags_error` is filled with a detailed error message if any function returns with a `SHUNIT_ERROR` value. 290 | 291 | ### Including Line Numbers in Asserts (Macros) 292 | 293 | If you include lots of assert statements in an individual test function, it can become difficult to determine exactly which assert was thrown unless your messages are unique. To help somewhat, line numbers can be included in the assert messages. To enable this, a special shell "macro" must be used rather than the standard assert calls. _Shell doesn't actually have macros; the name is used here as the operation is similar to a standard macro._ 294 | 295 | For example, to include line numbers for a `assertEquals()` function call, replace the `assertEquals()` with `${_ASSERT_EQUALS_}`. 296 | 297 | _**Example** -- Asserts with and without line numbers_ 298 | ```sh 299 | #! /bin/sh 300 | # file: examples/lineno_test.sh 301 | 302 | testLineNo() { 303 | # This assert will have line numbers included (e.g. "ASSERT:[123] ..."). 304 | echo "ae: ${_ASSERT_EQUALS_}" 305 | ${_ASSERT_EQUALS_} 'not equal' 1 2 306 | 307 | # This assert will not have line numbers included (e.g. "ASSERT: ..."). 308 | assertEquals 'not equal' 1 2 309 | } 310 | 311 | # Load shUnit2. 312 | . ./shunit2 313 | ``` 314 | 315 | Notes: 316 | 317 | 1. Due to how shell parses command-line arguments, all strings used with macros should be quoted twice. Namely, single-quotes must be converted to single-double-quotes, and vice-versa. If the string being passed is absolutely for sure not empty, the extra quoting is not necessary.

Normal `assertEquals` call.
`assertEquals 'some message' 'x' ''`

Macro `_ASSERT_EQUALS_` call. Note the extra quoting around the _message_ and the _null_ value.
`_ASSERT_EQUALS_ '"some message"' 'x' '""'` 318 | 319 | 1. Line numbers are not supported in all shells. If a shell does not support them, no errors will be thrown. Supported shells include: __bash__ (>=3.0), __ksh__, __pdksh__, and __zsh__. 320 | 321 | ### Test Skipping 322 | 323 | There are times where the test code you have written is just not applicable to the system you are running on. This section describes how to skip these tests but maintain the total test count. 324 | 325 | Probably the easiest example would be shell code that is meant to run under the __bash__ shell, but the unit test is running under the Bourne shell. There are things that just won't work. The following test code demonstrates two sample functions, one that will be run under any shell, and the another that will run only under the __bash__ shell. 326 | 327 | _**Example** -- math include_ 328 | ```sh 329 | # file: examples/math.inc. 330 | 331 | add_generic() { 332 | num_a=$1 333 | num_b=$2 334 | 335 | expr $1 + $2 336 | } 337 | 338 | add_bash() { 339 | num_a=$1 340 | num_b=$2 341 | 342 | echo $(($1 + $2)) 343 | } 344 | ``` 345 | 346 | And here is a corresponding unit test that correctly skips the `add_bash()` function when the unit test is not running under the __bash__ shell. 347 | 348 | _**Example** -- math unit test_ 349 | ```sh 350 | #! /bin/sh 351 | # file: examples/math_test.sh 352 | 353 | testAdding() { 354 | result=`add_generic 1 2` 355 | assertEquals \ 356 | "the result of '${result}' was wrong" \ 357 | 3 "${result}" 358 | 359 | # Disable non-generic tests. 360 | [ -z "${BASH_VERSION:-}" ] && startSkipping 361 | 362 | result=`add_bash 1 2` 363 | assertEquals \ 364 | "the result of '${result}' was wrong" \ 365 | 3 "${result}" 366 | } 367 | 368 | oneTimeSetUp() { 369 | # Load include to test. 370 | . ./math.inc 371 | } 372 | 373 | # Load and run shUnit2. 374 | . ./shunit2 375 | ``` 376 | 377 | Running the above test under the __bash__ shell will result in the following output. 378 | 379 | ```console 380 | $ /bin/bash math_test.sh 381 | testAdding 382 | 383 | Ran 1 test. 384 | 385 | OK 386 | ``` 387 | 388 | But, running the test under any other Unix shell will result in the following output. 389 | 390 | ```console 391 | $ /bin/ksh math_test.sh 392 | testAdding 393 | 394 | Ran 1 test. 395 | 396 | OK (skipped=1) 397 | ``` 398 | 399 | As you can see, the total number of tests has not changed, but the report indicates that some tests were skipped. 400 | 401 | Skipping can be controlled with the following functions: `startSkipping()`, `endSkipping()`, and `isSkipping()`. Once skipping is enabled, it will remain enabled until the end of the current test function call, after which skipping is disabled. 402 | 403 | --- 404 | 405 | ## Appendix 406 | 407 | ### Getting Help 408 | 409 | For help, please send requests to either the shunit2-users@googlegroups.com mailing list (archives available on the web at http://groups.google.com/group/shunit2-users) or directly to Kate Ward . 410 | 411 | ### Zsh 412 | 413 | For compatibility with Zsh, there is one requirement that must be met -- the `shwordsplit` option must be set. There are three ways to accomplish this. 414 | 415 | 1. In the unit-test script, add the following shell code snippet before sourcing the `shunit2` library. 416 | 417 | ```sh 418 | setopt shwordsplit 419 | ``` 420 | 421 | 1. When invoking __zsh__ from either the command-line or as a script with `#!`, add the `-y` parameter. 422 | 423 | ```sh 424 | #! /bin/zsh -y 425 | ``` 426 | 427 | 1. When invoking __zsh__ from the command-line, add `-o shwordsplit --` as parameters before the script name. 428 | 429 | ```console 430 | $ zsh -o shwordsplit -- some_script 431 | ``` 432 | 433 | [log4sh]: https://github.com/kward/log4sh 434 | [shflags]: https://github.com/kward/shflags 435 | -------------------------------------------------------------------------------- /dev/n_pmocr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PROGRAM="pmocr" # Automatic OCR service that monitors a directory and launches a OCR instance as soon as a document arrives 4 | AUTHOR="(C) 2015-2022 by Orsiris de Jong" 5 | CONTACT="http://www.netpower.fr - ozy@netpower.fr" 6 | PROGRAM_VERSION=1.8.2 7 | PROGRAM_BUILD=2022050901 8 | 9 | CONFIG_FILE_REVISION_REQUIRED=1 10 | 11 | ### Tested Abbyy OCR versions 11 (discontinued by Abbyy, support is deprecated) 12 | TESTED_TESSERACT_VERSIONS="3.04, 4.1.2, 5.0.1" 13 | 14 | ## Debug parameter for service 15 | if [ "$_DEBUG" == "" ]; then 16 | _DEBUG=no 17 | fi 18 | 19 | _LOGGER_PREFIX="date" 20 | KEEP_LOGGING=0 21 | DEFAULT_CONFIG_FILE="/etc/pmocr/default.conf" 22 | 23 | # Set default wait time before forced run 24 | if [ "$MAX_WAIT" == "" ]; then 25 | MAX_WAIT=86400 # One day in seconds 26 | fi 27 | 28 | include #### OFUNCTIONS MINI SUBSET #### 29 | include #### VerComp SUBSET #### 30 | include #### GetConfFileValue SUBSET #### 31 | include #### InotifyWaitPoller SUBSET #### 32 | 33 | # Change all booleans with "yes" or "no" to true / false for v2 config syntax compatibility 34 | function UpdateBooleans { 35 | local update 36 | local booleans 37 | 38 | declare -a booleans=(DELETE_ORIGINAL CHECK_PDF) 39 | 40 | for i in "${booleans[@]}"; do 41 | update="if [ \"\$$i\" == \"yes\" ]; then $i=true; fi; if [ \"\$$i\" == \"no\" ]; then $i=false; fi" 42 | eval "$update" 43 | done 44 | } 45 | 46 | 47 | function CheckEnvironment { 48 | if [ "$OCR_ENGINE_EXEC" != "" ]; then 49 | if ! type "$OCR_ENGINE_EXEC" > /dev/null 2>&1; then 50 | Logger "OCR engine executable [$OCR_ENGINE_EXEC] not present. Please adjust in your pmocr configuration file." "CRITICAL" 51 | exit 1 52 | fi 53 | else 54 | Logger "No OCR engine selected. Please configure it in [$CONFIG_FILE]." "CRITICAL" 55 | exit 1 56 | fi 57 | 58 | if [ "$OCR_PREPROCESSOR_EXEC" != "" ]; then 59 | if ! type "$OCR_PREPROCESSOR_EXEC" > /dev/null 2>&1; then 60 | Logger "OCR preprocessor executable [$OCR_PREPROCESSOR_EXEC] not present. Please adjust in your pmocr configuration file." "CRITICAL" 61 | exit 1 62 | fi 63 | fi 64 | 65 | if [ "$_SERVICE_RUN" == true ]; then 66 | if [ "$USE_INOTIFYWAIT" == true ]; then 67 | if ! type inotifywait > /dev/null 2>&1; then 68 | Logger "inotifywait not present (see inotify-tools package ?)." "CRITICAL" 69 | exit 1 70 | fi 71 | fi 72 | 73 | if ! type pgrep > /dev/null 2>&1; then 74 | Logger "pgrep not present." "CRITICAL" 75 | exit 1 76 | fi 77 | 78 | if ! type lsof > /dev/null 2>&1; then 79 | Logger "lsof not present." "CRITICAL" 80 | exit 1 81 | fi 82 | 83 | if [ "$PDF_MONITOR_DIR" != "" ]; then 84 | if [ ! -w "$PDF_MONITOR_DIR" ]; then 85 | Logger "Directory [$PDF_MONITOR_DIR] not writable." "ERROR" 86 | fi 87 | fi 88 | 89 | if [ "$WORD_MONITOR_DIR" != "" ]; then 90 | if [ ! -w "$WORD_MONITOR_DIR" ]; then 91 | Logger "Directory [$WORD_MONITOR_DIR] not writable." "ERROR" 92 | fi 93 | fi 94 | 95 | if [ "$EXCEL_MONITOR_DIR" != "" ]; then 96 | if [ ! -w "$EXCEL_MONITOR_DIR" ]; then 97 | Logger "Directory [$EXCEL_MONITOR_DIR] not writable." "ERROR" 98 | fi 99 | fi 100 | 101 | if [ "$TEXT_MONITOR_DIR" != "" ]; then 102 | if [ ! -w "$TEXT_MONITOR_DIR" ]; then 103 | Logger "Directory [$TEXT_MONITOR_DIR] not writable." "ERROR" 104 | fi 105 | fi 106 | 107 | if [ "$CSV_MONITOR_DIR" != "" ]; then 108 | if [ ! -w "$CSV_MONITOR_DIR" ]; then 109 | Logger "Directory [$CSV_MONITOR_DIR] not writable." "ERROR" 110 | fi 111 | fi 112 | fi 113 | 114 | if [ "$CHECK_PDF" == true ]; then 115 | if ! type pdffonts > /dev/null 2>&1; then 116 | Logger "pdffonts not present (see poppler-utils package ?)." "CRITICAL" 117 | exit 1 118 | fi 119 | fi 120 | 121 | if [ "$OCR_ENGINE" == "tesseract" ] || [ "$OCR_ENGINE" == "tesseract3" ]; then 122 | if ! type "$PDF_TO_TIFF_EXEC" > /dev/null 2>&1; then 123 | Logger "PDF to TIFF conversion executable [$PDF_TO_TIFF_EXEC] not present. Please install ImageMagick (for convert) or ghostscript (for gs)." "CRITICAL" 124 | exit 1 125 | fi 126 | 127 | TESSERACT_VERSION=$(tesseract -v 2>&1 | head -n 1 | awk '{print $2}') 128 | if [ $(VerComp "$TESSERACT_VERSION" "3.00") -gt 1 ]; then 129 | Logger "Tesseract version [$TESSERACT_VERSION] is not supported. Please use version 3.x or better." "CRITICAL" 130 | Logger "Known working tesseract versions are $TESTED_TESSERACT_VERSIONS." "CRITICAL" 131 | exit 1 132 | fi 133 | fi 134 | } 135 | 136 | function TrapQuit { 137 | local result 138 | 139 | if [ -f "$SERVICE_MONITOR_FILE" ]; then 140 | rm -f "$SERVICE_MONITOR_FILE" > /dev/null 2>&1 141 | fi 142 | 143 | KillChilds $$ > /dev/null 2>&1 144 | result=$? 145 | if [ $result -eq 0 ]; then 146 | Logger "$PROGRAM stopped instance [$INSTANCE_ID] with pid [$$]." "NOTICE" 147 | else 148 | Logger "$PROGRAM couldn't properly stop instance [$INSTANCE_ID] with pid [$$]." "ERROR" 149 | fi 150 | CleanUp 151 | exit $? 152 | } 153 | 154 | function SetOCREngineOptions { 155 | __CheckArguments 0 $# "$@" #__WITH_PARANOIA_DEBUG 156 | 157 | if [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then 158 | OCR_ENGINE_EXEC="$TESSERACT_OCR_ENGINE_EXEC" 159 | PDF_OCR_ENGINE_ARGS="$TESSERACT_PDF_OCR_ENGINE_ARGS" 160 | TEXT_OCR_ENGINE_ARGS="$TESSERACT_TEXT_OCR_ENGINE_ARGS" 161 | CSV_OCR_ENGINE_ARGS="$TESSERACT_CSV_OCR_ENGINE_ARGS" 162 | OCR_ENGINE_INPUT_ARG="$TESSERACT_OCR_ENGINE_INPUT_ARG" 163 | OCR_ENGINE_OUTPUT_ARG="$TESSERACT_OCR_ENGINE_OUTPUT_ARG" 164 | 165 | PDF_TO_TIFF_EXEC="$TESSERACT_PDF_TO_TIFF_EXEC" 166 | PDF_TO_TIFF_OPTS="$TESSERACT_PDF_TO_TIFF_OPTS" 167 | 168 | elif [ "$OCR_ENGINE" == "abbyyocr11" ]; then 169 | OCR_ENGINE_EXEC="$ABBYY_OCR_ENGINE_EXEC" 170 | PDF_OCR_ENGINE_ARGS="$ABBYY_PDF_OCR_ENGINE_ARGS" 171 | WORD_OCR_ENGINE_ARGS="$ABBYY_WORD_OCR_ENGINE_ARGS" 172 | EXCEL_OCR_ENGINE_ARGS="$ABBYY_EXCEL_OCR_ENGINE_ARGS" 173 | TEXT_OCR_ENGINE_ARGS="$ABBYY_TEXT_OCR_ENGINE_ARGS" 174 | CSV_OCR_ENGINE_ARGS="$ABBYY_CSV_OCR_ENGINE_ARGS" 175 | OCR_ENGINE_INPUT_ARG="$ABBYY_OCR_ENGINE_INPUT_ARG" 176 | OCR_ENGINE_OUTPUT_ARG="$ABBYY_OCR_ENGINE_OUTPUT_ARG" 177 | 178 | else 179 | Logger "Bogus OCR_ENGINE selected." "CRITICAL" 180 | exit 1 181 | fi 182 | } 183 | 184 | function OCR { 185 | local inputFileName="$1" # Contains full path of file to OCR 186 | local fileExtension="$2" # Filename extension of output file 187 | local ocrEngineArgs="$3" # OCR engine specific arguments 188 | local csvHack="${4:-false}" # CSV Hack boolean 189 | 190 | __CheckArguments 2-4 $# "$@" #__WITH_PARANOIA_DEBUG 191 | 192 | local findExcludes 193 | local tmpFilePreprocessor 194 | local tmpFileIntermediary 195 | local renamedFileName 196 | local outputFileName 197 | 198 | local currentTSTAMP 199 | 200 | local cmd 201 | local subcmd 202 | local result 203 | 204 | local alert=false 205 | if [ $_SILENT != true ]; then 206 | Logger "Processing file [$inputFileName]." "ALWAYS" 207 | fi 208 | 209 | 210 | # Expand $FILENAME_ADDITION 211 | eval "outputFileName=\"${inputFileName%.*}$FILENAME_ADDITION$FILENAME_SUFFIX\"" 212 | 213 | # Add check to see whether outputFileName already exists, if so, add a random timestamp 214 | if [ -f "$outputFileName" ] || [ -f "$outputFileName$fileExtension" ]; then 215 | outputFileName="$outputFileName$(date '+%N')" 216 | fi 217 | 218 | 219 | # Perform intermediary transformation of input pdf file to tiff if OCR_ENGINE is tesseract and no preprocessor is set 220 | if ([ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]) && [[ "$inputFileName" == *.[pP][dD][fF] ]] && [ "$OCR_PREPROCESSOR_EXEC" == "" ]; then 221 | tmpFileIntermediary="${inputFileName%.*}.__pmOCR_intermediary_.tif" 222 | subcmd="MAGICK_THREAD_LIMIT=$NUMBER_OF_PROCESSES $PDF_TO_TIFF_EXEC $PDF_TO_TIFF_OPTS \"$inputFileName\" \"$tmpFileIntermediary\" > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\"" 223 | Logger "Executing: $subcmd" "DEBUG" 224 | eval "$subcmd" 225 | result=$? 226 | if [ $result -ne 0 ]; then 227 | Logger "$PDF_TO_TIFF_EXEC intermediary transformation failed." "ERROR" 228 | Logger "Truncated output:\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "DEBUG" 229 | alert=true 230 | else 231 | fileToProcess="$tmpFileIntermediary" 232 | fi 233 | else 234 | fileToProcess="$inputFileName" 235 | fi 236 | 237 | # Run OCR Preprocessor 238 | if [ -f "$fileToProcess" ] && [ "$OCR_PREPROCESSOR_EXEC" != "" ]; then 239 | tmpFilePreprocessor="${fileToProcess%.*}.__pmOCR_preprocessed_.tif" 240 | subcmd="MAGICK_THREAD_LIMIT=$NUMBER_OF_PROCESSES $OCR_PREPROCESSOR_EXEC $OCR_PREPROCESSOR_ARGS $OCR_PREPROCESSOR_INPUT_ARGS\"$fileToProcess\" $OCR_PREPROCESSOR_OUTPUT_ARG\"$tmpFilePreprocessor\" > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\"" 241 | # This is never logged on screen since we use a subshell, but will be logged to logfile 242 | Logger "Executing $subcmd" "DEBUG" 243 | eval "$subcmd" 244 | result=$? 245 | if [ $result -ne 0 ]; then 246 | Logger "$OCR_PREPROCESSOR_EXEC preprocesser failed." "ERROR" 247 | Logger "Truncated output\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "DEBUG" 248 | alert=true 249 | else 250 | fileToProcess="$tmpFilePreprocessor" 251 | fi 252 | fi 253 | 254 | if [ -f "$fileToProcess" ]; then 255 | # Run Abbyy OCR 256 | if [ "$OCR_ENGINE" == "abbyyocr11" ]; then 257 | cmd="$OCR_ENGINE_EXEC $OCR_ENGINE_INPUT_ARG \"$fileToProcess\" $ocrEngineArgs $OCR_ENGINE_OUTPUT_ARG \"$outputFileName$fileExtension\" > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\" 2>&1" 258 | #TODO: THIS IS NEVER LOGGED 259 | Logger "Executing: $cmd" "DEBUG" 260 | eval "$cmd" 261 | result=$? 262 | 263 | # Run Tesseract OCR + Intermediary transformation 264 | elif [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then 265 | # Empty tmp log file first 266 | echo "" > "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" 267 | cmd="$OCR_ENGINE_EXEC $TESSERACT_OPTIONAL_ARGS $OCR_ENGINE_INPUT_ARG \"$fileToProcess\" $OCR_ENGINE_OUTPUT_ARG \"$outputFileName\" $ocrEngineArgs > \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP\" 2> \"$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.error.$SCRIPT_PID.$TSTAMP\"" 268 | #TODO: THIS IS NEVER LOGGED 269 | Logger "Executing: $cmd" "DEBUG" 270 | eval "$cmd" 271 | result=$? 272 | 273 | # Workaround for tesseract complaining about missing OSD data but still processing file without changing exit code 274 | # Tesseract may also return 0 exit code with error "read_params_file: Can't open pdf" 275 | if [ $result -eq 0 ] && grep -i "error" "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.error.$SCRIPT_PID.$TSTAMP"; then 276 | result=9999 277 | Logger "Tesseract produced errors while transforming the document." "WARN" 278 | Logger "Truncated output\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "NOTICE" 279 | Logger "Truncated output\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.error.$SCRIPT_PID.$TSTAMP")" "NOTICE" 280 | alert=true 281 | fi 282 | 283 | # Fix for tesseract pdf output also outputs txt format 284 | if [ "$fileExtension" == ".pdf" ] && [ -f "$outputFileName$TEXT_EXTENSION" ]; then 285 | rm -f "$outputFileName$TEXT_EXTENSION" > /dev/null 2>&1 286 | if [ $? != 0 ]; then 287 | Logger "Cannot remove temporary txt file [$outputFileName$TEXT_EXTENSION]." "WARN" 288 | alert=true 289 | fi 290 | fi 291 | else 292 | Logger "Bogus ocr engine [$OCR_ENGINE]. Please edit file [$(basename "$0")] and set [OCR_ENGINE] value." "ERROR" 293 | fi 294 | fi 295 | 296 | # Remove temporary files 297 | if [ -f "$tmpFileIntermediary" ]; then 298 | rm -f "$tmpFileIntermediary" > /dev/null 2>&1 299 | if [ $? != 0 ]; then 300 | Logger "Cannot remove temporary file [$tmpFileIntermediary]." " WARN" 301 | alert=true 302 | fi 303 | fi 304 | if [ -f "$tmpFilePreprocessor" ]; then 305 | rm -f "$tmpFilePreprocessor" > /dev/null 2>&1 306 | if [ $? != 0 ]; then 307 | Logger "Cannot remove temporary file [$tmpFilePreprocessor]." " WARN" 308 | alert=true 309 | fi 310 | fi 311 | 312 | if [ $result != 0 ]; then 313 | Logger "Could not process file [$inputFileName] (OCR error code $result). See logs." "ERROR" 314 | Logger "Truncated OCR Engine Output:\n$(head -c16384 "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP")" "ERROR" 315 | alert=true 316 | 317 | if [ "$MOVE_ORIGINAL_ON_FAILURE" != "" ]; then 318 | if [ ! -w "$MOVE_ORIGINAL_ON_FAILURE" ]; then 319 | Logger "Cannot write to folder [$MOVE_ORIGINAL_ON_FAILURE]. Will not move file [$inputFileName]." "WARN" 320 | else 321 | eval "renamedFileName=\"${inputFileName%.*}$FILENAME_ADDITION.${inputFileName##*.}\"" 322 | mv "$inputFileName" "$MOVE_ORIGINAL_ON_FAILURE/$(basename "$renamedFileName")" 323 | if [ $? != 0 ]; then 324 | Logger "Cannot move [$inputFileName] to [$MOVE_ORIGINAL_ON_FAILURE/$(basename "$renamedFileName")]. Will rename it." "WARN" 325 | alert=true 326 | fi 327 | fi 328 | fi 329 | 330 | if [ -f "$inputFileName" ]; then 331 | # Add error suffix so failed files won't be run again and create a loop 332 | # Add $TSAMP in order to avoid overwriting older files 333 | renamedFileName="${inputFileName%.*}$FAILED_FILENAME_SUFFIX.${inputFileName##*.}" 334 | if [ "$inputFileName" != "$renamedFileName" ]; then 335 | Logger "Renaming file [$inputFileName] to [$renamedFileName] in order to exclude it from next run." "WARN" 336 | mv "$inputFileName" "$renamedFileName" 337 | if [ $? != 0 ]; then 338 | Logger "Cannot move [$inputFileName] to [$renamedFileName]." "WARN" 339 | alert=true 340 | fi 341 | fi 342 | fi 343 | else 344 | # Convert 4 spaces or more to semi colon (hack to transform txt output to CSV) 345 | if [ $csvHack == true ]; then 346 | Logger "Applying CSV hack" "DEBUG" 347 | if [ "$OCR_ENGINE" == "abbyyocr11" ]; then 348 | sed -i.tmp 's/ */;/g' "$outputFileName$fileExtension" 349 | if [ $? == 0 ]; then 350 | rm -f "$outputFileName$fileExtension.tmp" > /dev/null 2>&1 351 | if [ $? != 0 ]; then 352 | Logger "Cannot delete temporary file [$outputFileName$fileExtension.tmp]." "WARN" 353 | alert=true 354 | fi 355 | else 356 | Logger "Cannot use csvhack on [$outputFileName$fileExtension]." "WARN" 357 | alert=true 358 | fi 359 | fi 360 | 361 | if [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then 362 | sed 's/ */;/g' "$outputFileName$TEXT_EXTENSION" > "$outputFileName$CSV_EXTENSION" 363 | if [ $? == 0 ]; then 364 | rm -f "$outputFileName$TEXT_EXTENSION" > /dev/null 2>&1 365 | if [ $? != 0 ]; then 366 | Logger "Cannot delete temporary file [$outputFileName$TEXT_EXTENSION]." "WARN" 367 | alert=true 368 | fi 369 | else 370 | Logger "Cannot use csvhack on [$outputFileName$TEXT_EXTENSION]." "WARN" 371 | alert=true 372 | fi 373 | fi 374 | fi 375 | 376 | # Apply permissions and ownership 377 | if [ "$PRESERVE_OWNERSHIP" == true ]; then 378 | chown --reference "$inputFileName" "$outputFileName$fileExtension" 379 | if [ $? != 0 ]; then 380 | Logger "Cannot chown [$outputfileName$fileExtension] with reference from [$inputFileName]." "WARN" 381 | alert=true 382 | fi 383 | fi 384 | if [ $(IsInteger "$FILE_PERMISSIONS") -eq 1 ]; then 385 | chmod $FILE_PERMISSIONS "$outputFileName$fileExtension" 386 | if [ $? != 0 ]; then 387 | Logger "Cannot mod [$outputfileName$fileExtension] with [$FILE_PERMISSIONS]." "WARN" 388 | alert=true 389 | fi 390 | elif [ "$PRESERVE_OWNERSHIP" == true ]; then 391 | chmod --reference "$inputFileName" "$outputFileName$fileExtension" 392 | if [ $? != 0 ]; then 393 | Logger "Cannot chmod [$outputfileName$fileExtension] with reference from [$inputFileName]." "WARN" 394 | alert=true 395 | fi 396 | fi 397 | 398 | if [ "$MOVE_ORIGINAL_ON_SUCCESS" != "" ]; then 399 | if [ ! -w "$MOVE_ORIGINAL_ON_SUCCESS" ]; then 400 | Logger "Cannot write to folder [$MOVE_ORIGINAL_ON_SUCCESS]. Will not move file [$inputFileName]." "WARN" 401 | alert=true 402 | else 403 | eval "renamedFileName=\"${inputFileName%.*}$FILENAME_ADDITION.${inputFileName##*.}\"" 404 | mv "$inputFileName" "$MOVE_ORIGINAL_ON_SUCCESS/$(basename "$renamedFileName")" 405 | if [ $? != 0 ]; then 406 | Logger "Cannot move [$inputFileName] to [$MOVE_ORIGINAL_ON_SUCCESS/$(basename "$renamedFileName")]." "WARN" 407 | alert=true 408 | fi 409 | fi 410 | elif [ "$DELETE_ORIGINAL" == true ]; then 411 | Logger "Deleting file [$inputFileName]." "DEBUG" 412 | rm -f "$inputFileName" > /dev/null 2>&1 413 | if [ $? != 0 ]; then 414 | Logger "Cannot delete [$inputFileName]." "WARN" 415 | alert=true 416 | fi 417 | fi 418 | 419 | if [ -f "$inputFileName" ]; then 420 | renamedFileName="${inputFileName%.*}$FILENAME_SUFFIX.${inputFileName##*.}" 421 | Logger "Renaming file [$inputFileName] to [$renamedFileName]." "DEBUG" 422 | mv "$inputFileName" "$renamedFileName" 423 | if [ $? != 0 ]; then 424 | Logger "Cannot move [$inputFileName] to [$renamedFileName]." "WARN" 425 | alert=true 426 | fi 427 | fi 428 | 429 | if [ $_SILENT != true ]; then 430 | Logger "Processed file [$inputFileName]." "ALWAYS" 431 | fi 432 | fi 433 | 434 | if [ $alert == true ]; then 435 | SendAlert 436 | exit $result 437 | else 438 | exit 0 439 | fi 440 | } 441 | 442 | function OCR_Dispatch { 443 | local directoryToProcess="$1" #(contains some path) 444 | local fileExtension="$2" #(filename endings to exclude from processing) 445 | local ocrEngineArgs="$3" #(transformation specific arguments) 446 | local csvHack="$4" #(CSV transformation flag) 447 | 448 | __CheckArguments 2-4 $# "$@" #__WITH_PARANOIA_DEBUG 449 | 450 | local findExcludes 451 | local moveSuccessExclude 452 | local moveFailureExclude 453 | local failedFindExcludes 454 | local cmd 455 | local retval 456 | 457 | ## CHECK find excludes 458 | if [ "$FILENAME_SUFFIX" != "" ]; then 459 | findExcludes="*$FILENAME_SUFFIX.*" 460 | else 461 | findExcludes="" 462 | fi 463 | 464 | if [ -d "$MOVE_ORIGINAL_ON_SUCCESS" ]; then 465 | moveSuccessExclude="$MOVE_ORIGINAL_ON_SUCCESS/*" 466 | fi 467 | 468 | if [ -d "$MOVE_ORIGINAL_ON_FAILURE" ]; then 469 | moveFailureExclude="$MOVE_ORIGINAL_ON_FAILURE/*" 470 | fi 471 | 472 | if [ "$FAILED_FILENAME_SUFFIX" != "" ]; then 473 | failedFindExcludes="*$FAILED_FILENAME_SUFFIX.*" 474 | else 475 | failedFindExcludes="" 476 | fi 477 | 478 | if [ -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" ]; then 479 | rm -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" > /dev/null 2>&1 480 | fi 481 | 482 | # Old way of doing 483 | #find "$directoryToProcess" -type f -iregex ".*\.$FILES_TO_PROCESS" ! -name "$findExcludes" -and ! -wholename "$moveSuccessExclude" -and ! -wholename "$moveFailureExclude" -and ! -name "$failedFindExcludes" -print0 | xargs -0 -I {} echo "OCR \"{}\" \"$fileExtension\" \"$ocrEngineArgs\" \"$csvHack\"" >> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" 484 | 485 | touch "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" 486 | if [ -f "$EVENT_LOG_FILE" ]; then 487 | Logger "OCR dispatch launched via poller result method." "DEBUG" 488 | else 489 | Logger "OCR dispatch launched via find method." "DEBUG" 490 | fi 491 | 492 | while IFS= read -r -d $'\0' file; do 493 | [ "$file" == "./" ] && continue 494 | if [ "$CHECK_PDF" == true ] && [ $(pdffonts "$file" 2> /dev/null | wc -l) -ge 3 ]; then 495 | Logger "Skipping file [$file] already containing text." "VERBOSE" 496 | continue 497 | fi 498 | 499 | # Check if file is currently being written to (mitigates slow transfer files being processed before transfer is finished) 500 | if ! lsof -f -- "$file" > /dev/null 2>&1; then 501 | if [ "$_BATCH_RUN" == true ]; then 502 | Logger "Preparing to process [$file]." "NOTICE" 503 | fi 504 | echo "OCR \"$file\" \"$fileExtension\" \"$ocrEngineArgs\" \"$csvHack\"" >> "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" 505 | else 506 | if [ "$_BATCH_RUN" == true ]; then 507 | Logger "Cannot process file [$file] currently in use." "ALWAYS" 508 | else 509 | Logger "Deferring file [$file] currently being written to." "ALWAYS" 510 | kill -USR1 $SCRIPT_PID 511 | fi 512 | fi 513 | # if InotifyWaitPoller result file exists, prefer it to find directive 514 | # Fallback to full file traversal if no file exists 515 | done < <([ -f "$EVENT_LOG_FILE" ] && cat "$EVENT_LOG_FILE" && rm -f "$EVENT_LOG_FILE" || find "$directoryToProcess" -type f -iregex ".*\.$FILES_TO_PROCESS" ! -regex ".*\.__pmOCR_preprocessed_\..*" ! -name "$findExcludes" -and ! -wholename "$moveSuccessExclude" -and ! -wholename "$moveFailureExclude" -and ! -name "$failedFindExcludes" -print0) 516 | 517 | ExecTasks "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" "${FUNCNAME[0]}" true 0 0 3600 0 true .05 $KEEP_LOGGING false false false $NUMBER_OF_PROCESSES 518 | retval=$? 519 | if [ $retval -ne 0 ]; then 520 | Logger "Failed OCR_Dispatch run." "ERROR" 521 | fi 522 | [ -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" ] && rm -f "$RUN_DIR/$PROGRAM.${FUNCNAME[0]}.$SCRIPT_PID.$TSTAMP" > /dev/null 2>&1 523 | return $retval 524 | } 525 | 526 | # Run OCR_Dispatch once, if a new request comes when a run is active, run it again once 527 | function DispatchRunner { 528 | if [ $DISPATCH_NEEDED -lt 2 ]; then 529 | DISPATCH_NEEDED=$((DISPATCH_NEEDED+1)) 530 | fi 531 | 532 | while [ $DISPATCH_NEEDED -gt 0 ] && [ $DISPATCH_RUNS == false ]; do 533 | DISPATCH_RUNS=true 534 | if [ "$PDF_MONITOR_DIR" != "" ]; then 535 | OCR_Dispatch "$PDF_MONITOR_DIR" "$PDF_EXTENSION" "$PDF_OCR_ENGINE_ARGS" false 536 | fi 537 | 538 | if [ "$WORD_MONITOR_DIR" != "" ]; then 539 | OCR_Dispatch "$WORD_MONITOR_DIR" "$WORD_EXTENSION" "$WORD_OCR_ENGINE_ARGS" false 540 | fi 541 | 542 | if [ "$EXCEL_MONITOR_DIR" != "" ]; then 543 | OCR_Dispatch "$EXCEL_MONITOR_DIR" "$EXCEL_EXTENSION" "$EXCEL_OCR_ENGINE_ARGS" false 544 | fi 545 | 546 | if [ "$TEXT_MONITOR_DIR" != "" ]; then 547 | OCR_Dispatch "$TEXT_MONITOR_DIR" "$TEXT_EXTENSION" "$TEXT_OCR_ENGINE_ARGS" false 548 | fi 549 | 550 | if [ "$CSV_MONITOR_DIR" != "" ]; then 551 | OCR_Dispatch "$CSV_MONITOR_DIR" "$CSV_EXTENSION" "$CSV_OCR_ENGINE_ARGS" true 552 | fi 553 | DISPATCH_NEEDED=$((DISPATCH_NEEDED-1)) 554 | DISPATCH_RUNS=false 555 | done 556 | } 557 | 558 | function OCR_service { 559 | ## Function arguments 560 | local directoryToProcess="${1}" #(contains some path) 561 | local fileExtension="${2}" #(filename endings to exclude from processing) 562 | 563 | __CheckArguments 2 $# "$@" #__WITH_PARANOIA_DEBUG 564 | 565 | local cmd 566 | local dirAvailable=true 567 | local justStarted=true 568 | local moveSuccessExclude 569 | local moveFailureExclude 570 | 571 | if [ -d "$MOVE_ORIGINAL_ON_SUCCESS" ]; then 572 | moveSuccessExclude="--exclude \"$MOVE_ORIGINAL_ON_SUCCESS\"" 573 | moveSuccessExcludePoller="$MOVE_ORIGINAL_ON_SUCCESS/.*" 574 | fi 575 | 576 | if [ -d "$MOVE_ORIGINAL_ON_FAILURE" ]; then 577 | moveFailureExclude="--exclude \"$MOVE_ORIGINAL_ON_FAILURE\"" 578 | moveFailureExcludePoller="$MOVE_ORIGINAL_ON_FAILURE/.*" 579 | fi 580 | 581 | Logger "Starting $PROGRAM instance [$INSTANCE_ID] for directory [$directoryToProcess], converting to [$fileExtension]." "ALWAYS" 582 | while [ -f "$SERVICE_MONITOR_FILE" ];do 583 | # Have a first run on start 584 | while [ ! -w "$directoryToProcess" ]; do 585 | Logger "Directory [$directoryToProcess] is not writable. Trying again in an hour." "ERROR" 586 | sleep 3600 587 | dirAvailable=false 588 | done 589 | 590 | if [ "$dirAvailable" == false ]; then 591 | Logger "Directory [$directoryToProcess] is available again. Resuming monitoring." "ERROR" 592 | dirAvailable=true 593 | fi 594 | 595 | if [ $justStarted == true ]; then 596 | kill -USR1 $SCRIPT_PID 597 | justStarted=false 598 | fi 599 | Logger "Looking for changes in [$directoryToProcess]" "NOTICE" 600 | # If file modifications occur, send a signal so DispatchRunner is run 601 | if [ "$USE_INOTIFYWAIT" == true ]; then 602 | cmd="inotifywait --exclude \"(.*)$FILENAME_SUFFIX$fileExtension\" --exclude \"(.*)$FAILED_FILENAME_SUFFIX$fileExtension\" $moveSuccessExclude $moveFailureExclude -qq -r -e create,move \"$directoryToProcess\" --timeout $MAX_WAIT" 603 | eval $cmd 604 | else 605 | Logger "Running InotifyWaitPoller process" "VERBOSE" 606 | # InotifyWaitPoller paths includes excludes recursive monitor_mode event_log_file events timeout 607 | InotifyWaitPoller "$directoryToProcess" ".*\.$FILES_TO_PROCESS" ".*$FILENAME_SUFFIX$fileExtension;.*$FAILED_FILENAME_SUFFIX;.*\.__pmOCR_preprocessed_\..*;$fileExtension;$moveSuccessExcludePoller;$moveFailureExcludePoller" true false "$EVENT_LOG_FILE" "CREATE,MODIFY,MOVED_TO" $MAX_WAIT $INOTIFY_POLLER_INTERVAL 608 | fi 609 | Logger "Changes detected in [$directoryToProcess]" "NOTICE" 610 | kill -USR1 $SCRIPT_PID 611 | # Update SERVICE_MONITOR_FILE to prevent automatic old file cleanup in /tmp directory (happens in RHEL 6/7) 612 | echo "$SCRIPT_PID" > "$SERVICE_MONITOR_FILE" 613 | done 614 | } 615 | 616 | function Usage { 617 | echo "" 618 | echo "$PROGRAM $PROGRAM_VERSION $PROGRAM_BUILD" 619 | echo "$AUTHOR" 620 | echo "$CONTACT" 621 | echo "" 622 | echo "You may adjust file default config in /etc/pmocr/default.conf according to your OCR needs (language, ocr engine, etc)." 623 | echo "" 624 | echo "$PROGRAM can be launched as a directory monitoring service using \"service $PROGRAM-srv start\" or \"systemctl start $PROGRAM-srv\" or in batch processing mode" 625 | echo "Batch mode usage:" 626 | echo "$PROGRAM.sh --batch [options] /path/to/folder" 627 | echo "" 628 | echo "[OPTIONS]" 629 | echo "--config=/path/to/config Use an alternative OCR config file." 630 | echo "-p, --target=PDF Creates a PDF document (default)" 631 | echo "-w, --target=DOCX Creates a WORD document" 632 | echo "-e, --target=XLSX Creates an EXCEL document" 633 | echo "-t, --target=TXT Creates a text file" 634 | echo "-c, --target=CSV Creates a CSV file" 635 | echo "(multiple targets can be set)" 636 | echo "" 637 | echo "-k, --skip-txt-pdf Skips PDF files already containing indexable text" 638 | echo "-d, --delete-input Deletes input file after processing ( preventing them to be processed again)" 639 | echo "--suffix=... Adds a given suffix to the output filename (in order to not process them again, ex: pdf to pdf conversion)." 640 | echo " By default, the suffix is '_OCR'" 641 | echo "--no-suffix Won't add any suffix to the output filename" 642 | echo "--failed-suffix=... Adds a given suffix to failed files (in order not to process them again. Defaults to '_OCR_ERR'" 643 | echo "--no-failed-suffix Won't add any suffix to failed conversion filenames" 644 | echo "--text=... Adds a given text / variable to the output filename (ex: --text='$(date +%Y)')." 645 | echo " By default, the text is the conversion date in pseudo ISO format." 646 | echo "--no-text Won't add any text to the output filename" 647 | echo "-s, --silent Will not output anything to stdout except errors" 648 | echo "-v, --verbose Verbose output" 649 | echo "--service Run as service" 650 | echo "" 651 | exit 128 652 | } 653 | 654 | #### SCRIPT ENTRY POINT #### 655 | 656 | trap TrapQuit EXIT 657 | 658 | _SILENT=false 659 | skip_txt_pdf=false 660 | delete_input=false 661 | suffix="" 662 | no_suffix=false 663 | failed_suffix="" 664 | no_failed_suffix=false 665 | no_text=false 666 | _BATCH_RUN=fase 667 | _SERVICE_RUN=false 668 | 669 | pdf=false 670 | docx=false 671 | xlsx=false 672 | txt=false 673 | csv=false 674 | 675 | INOTIFY_POLLER_INTERVAL=30 676 | 677 | function GetCommandlineArguments { 678 | for i in "$@" 679 | do 680 | case "$i" in 681 | --config=*) 682 | CONFIG_FILE="${i##*=}" 683 | ;; 684 | --batch) 685 | _BATCH_RUN=true 686 | ;; 687 | --service) 688 | _SERVICE_RUN=true 689 | ;; 690 | --silent|-s) 691 | _SILENT=true 692 | ;; 693 | --verbose|-v) 694 | _LOGGER_VERBOSE=true 695 | ;; 696 | -p|--target=PDF|--target=pdf) 697 | pdf=true 698 | ;; 699 | -w|--target=DOCX|--target=docx) 700 | docx=true 701 | ;; 702 | -e|--target=XLSX|--target=xlsx) 703 | xlsx=true 704 | ;; 705 | -t|--target=TXT|--target=txt) 706 | txt=true 707 | ;; 708 | -c|--target=CSV|--target=csv) 709 | csv=true 710 | ;; 711 | -k|--skip-txt-pdf) 712 | skip_txt_pdf=true 713 | ;; 714 | -d|--delete-input) 715 | delete_input=true 716 | ;; 717 | --suffix=*) 718 | suffix="${i##*=}" 719 | ;; 720 | --no-suffix) 721 | no_suffix=true 722 | ;; 723 | --suffix=*) 724 | failed_suffix="${i##*=}" 725 | ;; 726 | --no-failed-suffix) 727 | no_failed_suffix=true 728 | ;; 729 | --text=*) 730 | text="${i##*=}" 731 | ;; 732 | --no-text) 733 | no_text=true 734 | ;; 735 | --help|-h|--version|-v|-?) 736 | Usage 737 | ;; 738 | esac 739 | done 740 | } 741 | 742 | GetCommandlineArguments "${@}" 743 | 744 | if [ "$CONFIG_FILE" != "" ]; then 745 | LoadConfigFile "$CONFIG_FILE" $CONFIG_FILE_REVISION_REQUIRED 746 | else 747 | LoadConfigFile "$DEFAULT_CONFIG_FILE" $CONFIG_FILE_REVISION_REQUIRED 748 | fi 749 | 750 | # Keep compat with earlier typo in config file 751 | if [ "$FILES_TO_PROCESS" == "" ] && [ "$FILES_TO_PROCES" != "" ]; then 752 | FILES_TO_PROCESS="$FILES_TO_PROCES" 753 | fi 754 | 755 | # Reload GetCommandlineArguments in order to allow override config values with runtime arguments 756 | GetCommandlineArguments "${@}" 757 | 758 | UpdateBooleans 759 | SetOCREngineOptions 760 | 761 | if [ "$LOGFILE" == "" ]; then 762 | if [ -w /var/log ]; then 763 | LOG_FILE="/var/log/$PROGRAM.$INSTANCE_ID.log" 764 | elif ([ "$HOME" != "" ] && [ -w "$HOME" ]); then 765 | LOG_FILE="$HOME/$PROGRAM.$INSTANCE_ID.log" 766 | else 767 | LOG_FILE="./$PROGRAM.$INSTANCE_ID.log" 768 | fi 769 | else 770 | LOG_FILE="$LOGFILE" 771 | fi 772 | if [ ! -w "$(dirname "$LOG_FILE")" ]; then 773 | echo "Cannot write to log [$(dirname "$LOG_FILE")]." 774 | else 775 | Logger "Script begin, logging to [$LOG_FILE]." "DEBUG" 776 | fi 777 | 778 | # This file must not be cleaned with CleanUp function, hence it's naming scheme is different 779 | SERVICE_MONITOR_FILE="$RUN_DIR/$PROGRAM.$INSTANCE_ID.$SCRIPT_PID.$TSTAMP.SERVICE-MONITOR.run" 780 | 781 | # Set default conversion format 782 | if [ $pdf == false ] && [ $docx == false ] && [ $xlsx == false ] && [ $txt == false ] && [ $csv == false ]; then 783 | pdf=true 784 | fi 785 | 786 | # Add default values 787 | if [ "$FILENAME_SUFFIX" == "" ]; then 788 | FILENAME_SUFFIX="_OCR" 789 | fi 790 | if [ "$FAILED_FILENAME_SUFFIX" == "" ]; then 791 | FAILED_FILENAME_SUFFIX="_OCR_ERR" 792 | fi 793 | 794 | # Commandline arguments override default config 795 | if [ $_BATCH_RUN == true ]; then 796 | if [ $skip_txt_pdf == true ]; then 797 | CHECK_PDF=true 798 | fi 799 | 800 | if [ $no_suffix == true ]; then 801 | FILENAME_SUFFIX="" 802 | fi 803 | 804 | if [ "$suffix" != "" ]; then 805 | FILENAME_SUFFIX="$suffix" 806 | fi 807 | 808 | if [ $no_failed_suffix == true ]; then 809 | FAILED_FILENAME_SUFFIX="" 810 | fi 811 | 812 | if [ "$failed_suffix" != "" ]; then 813 | FAILED_FILENAME_SUFFIX="$failed_suffix" 814 | fi 815 | 816 | if [ "$text" != "" ]; then 817 | FILENAME_ADDITION="$text" 818 | fi 819 | 820 | if [ $no_text == true ]; then 821 | FILENAME_ADDITION="" 822 | fi 823 | 824 | if [ $delete_input == true ]; then 825 | DELETE_ORIGINAL=true 826 | fi 827 | fi 828 | 829 | CheckEnvironment 830 | 831 | if [ $_SERVICE_RUN == true ]; then 832 | trap DispatchRunner USR1 833 | trap TrapQuit TERM EXIT HUP QUIT 834 | 835 | EVENT_LOG_FILE="$RUN_DIR/$PROGRAM.eventLog.$SCRIPT_PID.$TSTAMP" 836 | echo "$SCRIPT_PID" > "$SERVICE_MONITOR_FILE" 837 | if [ $? != 0 ]; then 838 | Logger "Cannot write service file [$SERVICE_MONITOR_FILE]." "CRITICAL" 839 | exit 1 840 | fi 841 | 842 | if [ $_LOGGER_VERBOSE == false ]; then 843 | _LOGGER_ERR_ONLY=true 844 | fi 845 | 846 | # Global variable for DispatchRunner function 847 | DISPATCH_NEEDED=0 848 | DISPATCH_RUNS=false 849 | 850 | Logger "Service $PROGRAM instance [$INSTANCE_ID] pid [$$] started as [$LOCAL_USER] on [$LOCAL_HOST] using $OCR_ENGINE." "ALWAYS" 851 | 852 | if [ "$PDF_MONITOR_DIR" != "" ]; then 853 | OCR_service "$PDF_MONITOR_DIR" "$PDF_EXTENSION" & 854 | fi 855 | 856 | if [ "$WORD_MONITOR_DIR" != "" ]; then 857 | OCR_service "$WORD_MONITOR_DIR" "$WORD_EXTENSION" & 858 | fi 859 | 860 | if [ "$EXCEL_MONITOR_DIR" != "" ]; then 861 | OCR_service "$EXCEL_MONITOR_DIR" "$EXCEL_EXTENSION" & 862 | fi 863 | 864 | if [ "$TEXT_MONITOR_DIR" != "" ]; then 865 | OCR_service "$TEXT_MONITOR_DIR" "$TEXT_EXTENSION" & 866 | fi 867 | 868 | if [ "$CSV_MONITOR_DIR" != "" ]; then 869 | OCR_service "$CSV_MONITOR_DIR" "$CSV_EXTENSION" & 870 | fi 871 | 872 | # Keep running until trap function quits 873 | while true 874 | do 875 | # Keep low value so main script will execute USR1 trapped function 876 | sleep 1 877 | done 878 | 879 | elif [ $_BATCH_RUN == true ]; then 880 | 881 | # Get last argument that should be a path 882 | batchPath="${@: -1}" 883 | if [ ! -d "$batchPath" ]; then 884 | Logger "Missing path." "ERROR" 885 | Usage 886 | fi 887 | 888 | Logger "Running $PROGRAM $PROGRAM_VERSION as batch" "NOTICE" 889 | 890 | if [ $pdf == true ]; then 891 | if [ "$OCR_ENGINE" == "tesseract3" ] || [ "$OCR_ENGINE" == "tesseract" ]; then 892 | result=$(VerComp "$TESSERACT_VERSION" "3.02") 893 | if [ $result -eq 2 ] || [ $result -eq 0 ]; then 894 | Logger "Tesseract version $TESSERACT_VERSION is not supported to create searchable PDFs. Please use 3.03 or better." "CRITICAL" 895 | exit 1 896 | fi 897 | fi 898 | 899 | Logger "Beginning PDF OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE" 900 | OCR_Dispatch "$batchPath" "$PDF_EXTENSION" "$PDF_OCR_ENGINE_ARGS" false 901 | Logger "Batch ended." "NOTICE" 902 | fi 903 | 904 | if [ $docx == true ]; then 905 | Logger "Beginning DOCX OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE" 906 | OCR_Dispatch "$batchPath" "$WORD_EXTENSION" "$WORD_OCR_ENGINE_ARGS" false 907 | Logger "Batch ended." "NOTICE" 908 | fi 909 | 910 | if [ $xlsx == true ]; then 911 | Logger "Beginning XLSX OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE" 912 | OCR_Dispatch "$batchPath" "$EXCEL_EXTENSION" "$EXCEL_OCR_ENGINE_ARGS" false 913 | Logger "batch ended." "NOTICE" 914 | fi 915 | 916 | if [ $txt == true ]; then 917 | Logger "Beginning TEXT OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE" 918 | OCR_Dispatch "$batchPath" "$TEXT_EXTENSION" "$TEXT_OCR_ENGINE_ARGS" false 919 | Logger "batch ended." "NOTICE" 920 | fi 921 | 922 | if [ $csv == true ]; then 923 | Logger "Beginning CSV OCR recognition of files in [$batchPath] using $OCR_ENGINE." "NOTICE" 924 | OCR_Dispatch "$batchPath" "$CSV_EXTENSION" "$CSV_OCR_ENGINE_ARGS" true 925 | Logger "Batch ended." "NOTICE" 926 | fi 927 | 928 | else 929 | Logger "$PROGRAM must be run as a system service (using service file or --service argument) or in batch mode with --batch parameter." "ERROR" 930 | Usage 931 | fi 932 | --------------------------------------------------------------------------------