├── ACKNOWLEDGEMENTS ├── LICENSE ├── NOTICE ├── README.md ├── check_connection_qty ├── check_database_connection ├── check_database_size ├── check_db2diag ├── check_diff_db_conf ├── check_diff_db_ddl ├── check_diff_db_sec ├── check_diff_os ├── check_hadr_status ├── check_instance_memory ├── check_instance_up ├── check_io_cleaners ├── check_last_backup ├── check_lock_wait ├── check_log_consumption ├── check_log_usage ├── check_memory_usage ├── check_mon_scripts_running ├── check_on_cluster ├── check_open_files ├── check_tablespace_size ├── check_utilities └── template /ACKNOWLEDGEMENTS: -------------------------------------------------------------------------------- 1 | Many of these scripts are based on the published Nagios plugins written by 2 | Felipe Alkain de Souza at: 3 | http://www.dbatodba.com/db2/scripts-db2/nagios-plugins 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Monitor DB2 with Nagios 2 | Copyright 2013 Andres Gomez Casanova 3 | https://github.com/angoca/monitor-db2-with-nagios 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | monitor-db2-with-nagios 2 | ======================= 3 | 4 | Welcome to the monitor-db2-with-nagios project! 5 | 6 | Here you will find the sources, the wiki, and a bug tracker. 7 | 8 | This project aims to provide a set of open source tools to monitor DB2. The 9 | monitoring is limited to control that a value is between a range. The output of 10 | the scripts, allows you to create graphs and see the behavior of the monitored 11 | elements. 12 | 13 | Each script is autonomous, it means there are not dependencies between files, 14 | and any modification of the behaviour will be and affect the script. 15 | 16 | The template file is provided to create new scripts based on it. It was written 17 | in a way that you just need to fill the TODO with what you want to monitor. 18 | 19 | For more information about how to use these scripts, please visit the Wiki: 20 | [https://github.com/angoca/monitor-db2-with-nagios/wiki] 21 | 22 | If you have seen a problem or you have any comments, please feel free to open 23 | an issue and tell us your issue: 24 | [https://github.com/angoca/monitor-db2-with-nagios/issues] 25 | 26 | References: 27 | 28 | * Nagios plug-in development guidelines. http://nagiosplug.sourceforge.net/developer-guidelines.html 29 | * Nagios Plugin API. http://nagios.sourceforge.net/docs/3_0/pluginapi.html 30 | * Nagios Plugins. http://nagios.sourceforge.net/docs/3_0/plugins.html 31 | 32 | -------------------------------------------------------------------------------- /check_database_connection: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # This script tries to connect to the database, to see if it is available. 22 | # 23 | # This script was based on the plugins written by Felipe Alkain de Souza. 24 | # http://www.dbatodba.com/db2/scripts-db2/nagios-plugins 25 | # 26 | # In some UNIX the long parameters are not available. 27 | # 28 | # In order to be used in Nagios, you have to configure the following. 29 | # 30 | # # 'check_database_connection' command definition 31 | # define command { 32 | # command_name check_database_connection 33 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_database_connection -i '$ARG1$' -d '$ARG2$'" 34 | # } 35 | # define service{ 36 | # host_name db2server 37 | # service_description Database connectivity 38 | # check_command check_database_connection!/home/db2inst1!sample 39 | # use generic-service 40 | # } 41 | # 42 | # When using NRPE: 43 | # 44 | # In the database server: 45 | # 46 | # command[check_database_connection]=/home/nagios/scripts/check_database_connection -i /home/db2inst1 -d sample 47 | # 48 | # Then modify the path according to your script location, instance home dir, 49 | # database and thresholds. 50 | # 51 | # In the Nagios server: 52 | # 53 | # define service{ 54 | # host_name db2server 55 | # service_description Database connectivity 56 | # check_command check_nrpe_1arg!check_database_connection 57 | # use generic-service 58 | # } 59 | # 60 | # It is necessary to execute this script with a user with connect privilege. 61 | # 62 | # Parameters: 63 | # -d | --database : Database name. 64 | # -h | --help : Show the help of this script. 65 | # -i | --instance : Home directory of the instance. Usually it is 66 | # /home/db2inst1. 67 | # -K | --mk : Change the output for Check_MK. 68 | # -T | --trace : Trace mode. Writes output in /tmp. 69 | # -v | --verbose : Execute the program in verbose mode. 70 | # -V | --version : Show the current version of this script. 71 | # 72 | # In AIX, the long name options are not supported. 73 | # 74 | # The exit codes are the standard for Nagios. 75 | # 76 | # 0 The plugin was able to check the service and it appeared to be functioning 77 | # properly. 78 | # 1 The plugin was able to check the service, but it appeared to be above some 79 | # "warning" threshold or did not appear to be working properly. 80 | # 2 The plugin detected that either the service was not running or it was above 81 | # some "critical" threshold. 82 | # 3 Invalid command line arguments were supplied to the plugin or low-level 83 | # failures internal to the plugin (such as unable to fork, or open a tcp 84 | # socket) that prevent it from performing the specified operation. 85 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 86 | # are outside of the control of plugins and should generally NOT be reported 87 | # as UNKNOWN states. 88 | # 89 | # Author: Andres Gomez Casanova 90 | # Version: v1.2 2015-10-15 91 | 92 | # Flag for debugging. 93 | #set -xv 94 | 95 | # Locale to print messages in English. Prevent language problems. 96 | export LANG=en_US 97 | 98 | # Version of this script. 99 | function print_revision { 100 | echo Andres Gomez Casanova - AngocA 101 | echo v1.2 2015-10-15 102 | } 103 | # Function to show the help 104 | function print_usage { 105 | /bin/cat <<__EOT 106 | Usage: ${1} { -i instanceHomeDirectory -d databaseName [-K] 107 | | -h | -V } [-T][-v] 108 | __EOT 109 | } 110 | 111 | function print_help { 112 | print_revision 113 | print_usage ${1} 114 | # Max 80 chars width. 115 | /bin/cat <<__EOT 116 | ------------------------------------------------------------------------------- 117 | This script checks the connectivity to a database. 118 | -d | --database STRING 119 | Database name. 120 | -h | --help 121 | Shows the current documentation. 122 | -i | --instance STRING 123 | Instance home directory. It is usually /home/db2inst1. 124 | -K | --mk 125 | Changes the output to be compatible with Check_MK. 126 | -T | --trace 127 | Trace mode: writes date and output in /tmp. 128 | -v | --verbose 129 | Executes the script in verbose mode (multiple times). 130 | -V | --version 131 | Shows the current version of this script. 132 | 133 | In AIX, the long name options are not supported. 134 | __EOT 135 | } 136 | 137 | # Variable to control the flow execution. Prevent Spaghetti code. 138 | CONTINUE=true 139 | 140 | # Nagios return codes 141 | OK=0 142 | WARNING=1 143 | CRITICAL=2 144 | UNKNOWN=3 145 | # This is the returned code. 146 | RETURN=${UNKNOWN} 147 | 148 | # Nagios Output 149 | # Text output 80 chars | Optional Perf Data Line 1 150 | # Long text Line 1 151 | # Long text Line 2 | Optional Perf Data Line 2 152 | # Optional Perf Data Line 3 153 | OUTPUT= 154 | PERFORMANCE= 155 | LONG_OUTPUT= 156 | LONG_PERFORMANCE= 157 | PERF_MK="-" 158 | 159 | APPL_NAME=$(basename ${0}) 160 | 161 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 162 | 163 | # Checks the lock file does not exist. 164 | # The lock file is the way the command was called with its parameters 165 | # without spaces. 166 | COPY_ARGS=("${@}") 167 | LOCK_FILE= 168 | for VALUE in "${COPY_ARGS[@]}" ; do 169 | LOCK_FILE="${LOCK_FILE}${VALUE}" 170 | done 171 | LOCK_FILE=${LOCK_FILE//\//} 172 | LOCK_FILE=${LOCK_FILE//\\/} 173 | LOCK_FILE=${LOCK_FILE//\:/} 174 | LOCK_FILE=${LOCK_FILE//\*/} 175 | LOCK_FILE=${LOCK_FILE//\|/} 176 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 177 | if [[ ! -r ${LOCK_FILE} ]] ; then 178 | echo $$ > ${LOCK_FILE} 179 | LOCKED=true 180 | else 181 | # If it exist, then check if the process is running. 182 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 183 | # If process is not running, delete it. 184 | if [[ ${EXIST} == "" ]] ; then 185 | rm ${LOCK_FILE} 186 | if [[ ! -r ${LOCK_FILE} ]] ; then 187 | echo $$ > ${LOCK_FILE} 188 | LOCKED=true 189 | else 190 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 191 | CONTINUE=false 192 | RETURN=${UNKNOWN} 193 | fi 194 | else 195 | OUTPUT="An instance of the script with the same parameters is already running." 196 | CONTINUE=false 197 | RETURN=${UNKNOWN} 198 | fi 199 | fi 200 | 201 | if [[ ${#} -eq 0 ]] ; then 202 | print_usage ${APPL_NAME} 203 | RETURN=${UNKNOWN} 204 | CONTINUE=false 205 | fi 206 | 207 | # Checks the operating system. geopt works different in AIX than in Linux. 208 | OS=$(uname) 209 | 210 | if [[ "${OS:0:5}" == "Linux" ]] ; then 211 | # The following requires GNU getopt. See the following discussion. 212 | # http://stackoverflow.com/questions/402377 213 | 214 | TEMP=$(getopt -o d:hi:KTvV --long database:,help,instance:,mk,trace,verbose,version \ 215 | -n ${APPL_NAME} -- "${@}") 216 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 217 | set -- $(getopt d:hi:KTvV: ${*}) 218 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 219 | echo "This plugin is not yet supported in your platform." 220 | echo "Please create a ticket in GitHub if you want to enable your current platform." 221 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 222 | RETURN=${UNKNOWN} 223 | CONTINUE=false 224 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 225 | echo "This plugin is not supported in Cygwin" 226 | RETURN=${UNKNOWN} 227 | CONTINUE=false 228 | else 229 | echo "The platform is unknown: ${OS}" 230 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 231 | RETURN=${UNKNOWN} 232 | CONTINUE=false 233 | fi 234 | 235 | if [[ ${?} -ne 0 ]] ; then 236 | print_usage ${APPL_NAME} 237 | RETURN=${UNKNOWN} 238 | CONTINUE=false 239 | fi 240 | 241 | if [[ ${CONTINUE} == true ]] ; then 242 | if [[ "${OS}" == "Linux" ]] ; then 243 | # Note the quotes around ${TEMP}: they are essential! 244 | eval set -- "${TEMP}" 245 | fi 246 | HELP=false 247 | VERSION=false 248 | CHECK_MK=false 249 | # Verbosity level 250 | VERBOSE=0 251 | # Trace activated 252 | TRACE=false 253 | LOG=/tmp/${APPL_NAME}.log 254 | INSTANCE_HOME= 255 | DATABASE_NAME= 256 | if [[ "${OS:0:5}" == "Linux" ]] ; then 257 | while true; do 258 | case "${1}" in 259 | -d | --database ) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 260 | -h | --help ) HELP=true ; shift ;; 261 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 262 | -K | --mk ) CHECK_MK=true ; shift ;; 263 | -T | --trace ) TRACE=true ; shift ;; 264 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 265 | -V | --version ) VERSION=true ; shift ;; 266 | -- ) shift ; break ;; 267 | * ) break ;; 268 | esac 269 | done 270 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 271 | while [[ $1 != -- ]] ; do 272 | case "${1}" in 273 | -d) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 274 | -h) HELP=true ; shift ;; 275 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 276 | -K) CHECK_MK=true ; shift ;; 277 | -T) TRACE=true ; shift ;; 278 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 279 | -V) VERSION=true ; shift ;; 280 | * ) break ;; 281 | esac 282 | done 283 | fi 284 | fi 285 | 286 | if [[ ${TRACE} == true ]] ; then 287 | echo ">>>>>" >> ${LOG} 288 | date >> ${LOG} 289 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 290 | echo "PID ${$}" >> ${LOG} 291 | fi 292 | 293 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 294 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, database:${DATABASE_NAME}" 295 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 296 | 297 | if [[ ${VERBOSE} -ge 2 ]] ; then 298 | echo ${ECHO} 299 | fi 300 | 301 | if [[ ${TRACE} == true ]] ; then 302 | echo "PARAMS:${ECHO}" >> ${LOG} 303 | fi 304 | 305 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 306 | print_help ${APPL_NAME} 307 | RETURN=${UNKNOWN} 308 | CONTINUE=false 309 | fi 310 | 311 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 312 | print_revision ${APPL_NAME} 313 | RETURN=${UNKNOWN} 314 | CONTINUE=false 315 | fi 316 | 317 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 318 | print_usage ${APPL_NAME} 319 | RETURN=${UNKNOWN} 320 | CONTINUE=false 321 | fi 322 | 323 | if [[ ${CONTINUE} == true && ${DATABASE_NAME} == "" ]] ; then 324 | print_usage ${APPL_NAME} 325 | RETURN=${UNKNOWN} 326 | CONTINUE=false 327 | fi 328 | 329 | if [[ ${CONTINUE} == true ]] ; then 330 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 331 | # Load the DB2 profile. 332 | . ${INSTANCE_HOME}/sqllib/db2profile 333 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 334 | else 335 | OUTPUT="Instance directory is invalid." 336 | RETURN=${UNKNOWN} 337 | CONTINUE=false 338 | fi 339 | fi 340 | 341 | if [[ ${CONTINUE} == true ]] ; then 342 | COMMAND_DATABASE="db2 list db directory" 343 | if [[ ${VERBOSE} -ge 2 ]] ; then 344 | echo "COMMAND: ${COMMAND_DATABASE}" 345 | fi 346 | DATABASE=$(${COMMAND_DATABASE}) 347 | if [[ ${TRACE} == true ]] ; then 348 | echo "RESULT:'${DATABASE}'" >> ${LOG} 349 | fi 350 | DATABASE=$(printf '%s\n' "${DATABASE}" | awk '/Database alias/ {print $4}' | grep -iw ${DATABASE_NAME}) 351 | if [[ ${VERBOSE} -ge 3 ]] ; then 352 | echo "RESULT:'${DATABASE}'" 353 | fi 354 | if [[ ${DATABASE} == "" ]] ; then 355 | OUTPUT="The database ${DATABASE_NAME} is not catalogued." 356 | RETURN=${UNKNOWN} 357 | CONTINUE=false 358 | fi 359 | fi 360 | 361 | if [[ ${CONTINUE} == true ]] ; then 362 | COMMAND_ACTIVE="db2 list active databases" 363 | if [[ ${VERBOSE} -ge 2 ]] ; then 364 | echo "COMMAND: ${COMMAND_ACTIVE}" 365 | fi 366 | ACTIVE=$(${COMMAND_ACTIVE}) 367 | if [[ ${TRACE} == true ]] ; then 368 | echo "RESULT:'${ACTIVE}'" >> ${LOG} 369 | fi 370 | ACTIVE=$(printf '%s\n' "${ACTIVE}" | awk '/Database name/ {print $4}' | grep -iw ${DATABASE_NAME}) 371 | if [[ ${VERBOSE} -ge 3 ]] ; then 372 | echo "RESULT:'${ACTIVE}'" 373 | fi 374 | 375 | if [[ ${ACTIVE} == "" ]] ; then 376 | OUTPUT_ACTIVE="The database is not active. " 377 | LONG_OUTPUT="${OUTPUT_ACTIVE}" 378 | LONG_PERFORMANCE_1="'Database_Active'=0.2;0.5" 379 | else 380 | OUTPUT_ACTIVE="The database is active. " 381 | LONG_OUTPUT="${OUTPUT_ACTIVE}" 382 | LONG_PERFORMANCE_1="'Database_Active'=0.8;0.5" 383 | fi 384 | 385 | COMMAND_CONNECTABLE="db2 -a connect to ${DATABASE_NAME}" 386 | if [[ ${VERBOSE} -ge 2 ]] ; then 387 | echo "COMMAND: ${COMMAND_CONNECTABLE}" 388 | fi 389 | CONNECTABLE=$(${COMMAND_CONNECTABLE}) 390 | if [[ ${TRACE} == true ]] ; then 391 | echo "RESULT:'${CONNECTABLE}'" >> ${LOG} 392 | fi 393 | CONNECTABLE=$(printf '%s\n' "${CONNECTABLE}" | awk '/sqlcode/ {print $7}') 394 | if [[ ${VERBOSE} -ge 3 ]] ; then 395 | echo "RESULT:'${CONNECTABLE}'" 396 | fi 397 | 398 | if [[ ${CONNECTABLE} -eq 0 ]] ; then 399 | OUTPUT="OK Connection to database ${DATABASE_NAME}. "${OUTPUT_ACTIVE} 400 | RETURN=${OK} 401 | PERFORMANCE="'Connectable_Database'=0.9;0.6;0.3" 402 | elif [[ ${CONNECTABLE} -eq -20157 ]] ; then 403 | OUTPUT="The database is in quiesce mode. "${OUTPUT_ACTIVE} 404 | RETURN=${WARNING} 405 | PERFORMANCE="'Connectable_Database'=0.4;0.6;0.3" 406 | else 407 | OUTPUT="A connection to database ${DATABASE_NAME} was not successful. "${OUTPUT_ACTIVE} 408 | LONG_OUTPUT="${LONG_OUTPUT} ${CONNECTABLE}" 409 | RETURN=${CRITICAL} 410 | PERFORMANCE="'Connectable_Database'=0.1;0.6;0.3" 411 | fi 412 | 413 | # Check for HADR Window replay 414 | COMMAND_ROLE="db2 get db cfg for ${DATABASE_NAME}" 415 | if [[ ${VERBOSE} -ge 2 ]] ; then 416 | echo "COMMAND: ${COMMAND_ROLE}" 417 | fi 418 | ROLE=$(${COMMAND_ROLE}) 419 | if [[ ${TRACE} == true ]] ; then 420 | echo "RESULT:'${ROLE}'" >> ${LOG} 421 | fi 422 | ROLE=$(printf '%s\n' "${ROLE}" | awk '/HADR database role/ {print $5}') 423 | if [[ ${VERBOSE} -ge 3 ]] ; then 424 | echo "RESULT:'${ROLE}'" 425 | fi 426 | if [[ ${ROLE} == "STANDBY" ]] ; then 427 | COMMAND_REPLAY="db2pd -db wfscpd -hadr" 428 | if [[ ${VERBOSE} -ge 2 ]] ; then 429 | echo "COMMAND: ${COMMAND_REPLAY}" 430 | fi 431 | REPLAY=$(${COMMAND_REPLAY}) 432 | if [[ ${TRACE} == true ]] ; then 433 | echo "RESULT:'${REPLAY}'" >> ${LOG} 434 | fi 435 | REPLAY=$(printf '%s\n' "${REPLAY}" | awk '/^Active/ {print "active"}') 436 | if [[ ${VERBOSE} -ge 3 ]] ; then 437 | echo "RESULT:'${REPLAY}'" 438 | fi 439 | if [[ ${REPLAY} == "active" ]] ; then 440 | LONG_PERFORMANCE_2="HADR-replay=0.3;0.5" 441 | else 442 | LONG_PERFORMANCE_2="HADR-replay=0.7;0.5" 443 | fi 444 | fi 445 | LONG_PERFORMANCE="${LONG_PERFORMANCE_1} ${LONG_PERFORMANCE_2}" 446 | if [[ ${LONG_PERFORMANCE_2} == "" ]] ; then 447 | PERF_MK="${PERFORMANCE}|${LONG_PERFORMANCE_1}" 448 | else 449 | PERF_MK="${PERFORMANCE}|${LONG_PERFORMANCE_1}|${LONG_PERFORMANCE_2}" 450 | fi 451 | fi 452 | 453 | # Prints the output. 454 | if [[ ${OUTPUT} == "" ]] ; then 455 | OUTPUT="Note: The test was not executed." 456 | fi 457 | # Builds the output. 458 | if [[ ${CHECK_MK} == true ]] ; then 459 | echo "${RETURN} databaseConnection-${INSTANCE_NAME}-${DATABASE_NAME} ${PERF_MK} ${OUTPUT}" 460 | else 461 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 462 | fi 463 | # Returns the error code. 464 | if [[ ${VERBOSE} -ge 2 ]] ; then 465 | echo "Return code: ${RETURN}" 466 | fi 467 | if [[ ${TRACE} == true ]] ; then 468 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 469 | date >> ${LOG} 470 | echo -e "<<<<<\n" >> ${LOG} 471 | fi 472 | 473 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 474 | rm ${LOCK_FILE} 475 | fi 476 | 477 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 478 | 479 | exit ${RETURN} 480 | 481 | -------------------------------------------------------------------------------- /check_diff_db_ddl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015,2016 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # Checks modifications in the structure of the objects - DDL with the db2look 22 | # command. 23 | # 24 | # This scripts uses git. If using the default directory, then each time the sever 25 | # is restarted the git history is lost. 26 | # 27 | # The db2look could fail due to lack of bind in the sqllib/bnd directory: 28 | # db2 "bind db2lkfun.bnd BLOCKING ALL GRANT PUBLIC sqlerror continue" 29 | # db2 "bind db2look.bnd BLOCKING ALL GRANT PUBLIC sqlerror continue" 30 | # db2 grant execute on function SYSPROC.MON_GET_CONTAINER to user nagios 31 | # 32 | # Based on this blog written by Ian Bjorhovde: 33 | # http://www.idug.org/p/bl/et/blogaid=571 34 | # 35 | # # 'check_database_size' command definition 36 | # define command { 37 | # command_name check_diff_db_conf 38 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -t 20 -C "scripts/check_diff_db_conf -i '$ARG1$' -d '$ARG2$'" 39 | # } 40 | # define service{ 41 | # host_name db2sever 42 | # service_description Database configuration 43 | # check_command check_diff_db_conf!/home/db2inst1!sample 44 | # use generic-service 45 | # } 46 | # 47 | # When using NRPE: 48 | # 49 | # In the database server: 50 | # 51 | # command[check_diff_db_conf]=/home/nagios/scripts/check_diff_db_conf -i /home/db2inst1 -d sample 52 | # 53 | # Then modify the path according to your script location, instance home dir, 54 | # database, refresh rate and thresholds. 55 | # 56 | # In the Nagios server: 57 | # 58 | # define service{ 59 | # host_name db2server 60 | # service_description Database configuration 61 | # check_command check_nrpe_1arg!check_diff_db_conf 62 | # use generic-service 63 | # } 64 | # 65 | # Parameters: 66 | # -d | --database : Database name. 67 | # -D | --directory : Directory to create the history (Default is /tmp) 68 | # -h | --help : Show the help of this script. 69 | # -i | --instance : Home directory of the instance. Usually it is 70 | # /home/db2inst1. 71 | # -K | --mk : Change the output for Check_MK. 72 | # -T | --trace : Trace mode. Writes output in /tmp. 73 | # -v | --verbose : Execute the program in verbose mode. 74 | # -V | --version : Show the current version of this script. 75 | # 76 | # In AIX, the long name options are not supported. 77 | # 78 | # The exit codes are the standard for Nagios. 79 | # 80 | # 0 The plugin was able to check the service and it appeared to be functioning 81 | # properly. 82 | # 1 The plugin was able to check the service, but it appeared to be above some 83 | # "warning" threshold or did not appear to be working properly. 84 | # 2 The plugin detected that either the service was not running or it was above 85 | # some "critical" threshold. 86 | # 3 Invalid command line arguments were supplied to the plugin or low-level 87 | # failures internal to the plugin (such as unable to fork, or open a tcp 88 | # socket) that prevent it from performing the specified operation. 89 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 90 | # are outside of the control of plugins and should generally NOT be reported 91 | # as UNKNOWN states. 92 | # 93 | # Author: Andres Gomez Casanova 94 | # Version: v1.0 2016-12-19 95 | 96 | # Flag for debugging. 97 | #set -xv 98 | 99 | # Locale to print messages in English. Prevent language problems. 100 | export LANG=en_US 101 | 102 | # Version of this script. 103 | function print_revision { 104 | echo Andres Gomez Casanova - AngocA 105 | echo v1.0 2016-12-19 106 | } 107 | # Function to show the help 108 | function print_usage { 109 | /bin/cat <<__EOT 110 | Usage: ${1} { -i instanceHomeDirectory -d databaseName [-K][-D directory] | -h | -V } 111 | [-T][-v] 112 | __EOT 113 | } 114 | 115 | function print_help { 116 | print_revision 117 | print_usage ${1} 118 | # Max 80 chars width. 119 | /bin/cat <<__EOT 120 | ------------------------------------------------------------------------------- 121 | Checks changes in the database configuration. 122 | -d | --database STRING 123 | Database name. 124 | -D | --directory STRING 125 | Directory where the history is created. By default is /tmp 126 | -h | --help 127 | Shows the current documentation. 128 | -i | --instance STRING 129 | Instance home directory. It is usually /home/db2inst1. 130 | -K | --mk 131 | Changes the output to be compatible with Check_MK. 132 | -T | --trace 133 | Trace mode: writes date and output in /tmp. 134 | -v | --verbose 135 | Executes the script in verbose mode (multiple times). 136 | -V | --version 137 | Shows the current version of this script. 138 | 139 | In AIX, the long name options are not supported. 140 | __EOT 141 | } 142 | 143 | # Variable to control the flow execution. Prevent Spaghetti code. 144 | CONTINUE=true 145 | 146 | # Nagios return codes 147 | OK=0 148 | WARNING=1 149 | CRITICAL=2 150 | UNKNOWN=3 151 | # This is the returned code. 152 | RETURN=${UNKNOWN} 153 | 154 | # Nagios Output 155 | # Text output 80 chars | Optional Perf Data Line 1 156 | # Long text Line 1 157 | # Long text Line 2 | Optional Perf Data Line 2 158 | # Optional Perf Data Line 3 159 | OUTPUT= 160 | PERFORMANCE= 161 | LONG_OUTPUT= 162 | LONG_PERFORMANCE= 163 | PERF_MK="-" 164 | 165 | APPL_NAME=$(basename ${0}) 166 | 167 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 168 | 169 | # Checks the lock file does not exist. 170 | # The lock file is the way the command was called with its parameters 171 | # without spaces. 172 | COPY_ARGS=("${@}") 173 | LOCK_FILE= 174 | for VALUE in "${COPY_ARGS[@]}" ; do 175 | LOCK_FILE="${LOCK_FILE}${VALUE}" 176 | done 177 | LOCK_FILE=${LOCK_FILE//\//} 178 | LOCK_FILE=${LOCK_FILE//\\/} 179 | LOCK_FILE=${LOCK_FILE//\:/} 180 | LOCK_FILE=${LOCK_FILE//\*/} 181 | LOCK_FILE=${LOCK_FILE//\|/} 182 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 183 | if [[ ! -r ${LOCK_FILE} ]] ; then 184 | echo $$ > ${LOCK_FILE} 185 | LOCKED=true 186 | else 187 | # If it exist, then check if the process is running. 188 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 189 | # If process is not running, delete it. 190 | if [[ ${EXIST} == "" ]] ; then 191 | rm ${LOCK_FILE} 192 | if [[ ! -r ${LOCK_FILE} ]] ; then 193 | echo $$ > ${LOCK_FILE} 194 | LOCKED=true 195 | else 196 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 197 | CONTINUE=false 198 | RETURN=${UNKNOWN} 199 | fi 200 | else 201 | OUTPUT="An instance of the script with the same parameters is already running." 202 | CONTINUE=false 203 | RETURN=${UNKNOWN} 204 | fi 205 | fi 206 | 207 | if [[ ${#} -eq 0 ]] ; then 208 | print_usage ${APPL_NAME} 209 | RETURN=${UNKNOWN} 210 | CONTINUE=false 211 | fi 212 | 213 | # Checks the operating system. geopt works different in AIX than in Linux. 214 | OS=$(uname) 215 | 216 | if [[ "${OS:0:5}" == "Linux" ]] ; then 217 | # The following requires GNU getopt. See the following discussion. 218 | # http://stackoverflow.com/questions/402377 219 | TEMP=$(getopt -o c:d:D:hi:KTvVw: --long critical:,database:,directory:,help,instance:,mk,trace,verbose,version,warning: \ 220 | -n ${APPL_NAME} -- "${@}") 221 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 222 | set -- $(getopt c:d:D:hi:KTvVw: ${*}) 223 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 224 | echo "This plugin is not yet supported in your platform." 225 | echo "Please create a ticket in GitHub if you want to enable your current platform." 226 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 227 | RETURN=${UNKNOWN} 228 | CONTINUE=false 229 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 230 | echo "This plugin is not supported in Cygwin" 231 | RETURN=${UNKNOWN} 232 | CONTINUE=false 233 | else 234 | echo "The platform is unknown: ${OS}" 235 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 236 | RETURN=${UNKNOWN} 237 | CONTINUE=false 238 | fi 239 | 240 | if [[ ${?} -ne 0 ]] ; then 241 | print_usage ${APPL_NAME} 242 | RETURN=${UNKNOWN} 243 | CONTINUE=false 244 | fi 245 | 246 | if [[ ${CONTINUE} == true ]] ; then 247 | if [[ "${OS}" == "Linux" ]] ; then 248 | # Note the quotes around ${TEMP}: they are essential! 249 | eval set -- "${TEMP}" 250 | fi 251 | HELP=false 252 | VERSION=false 253 | CHECK_MK=false 254 | # Verbosity level 255 | VERBOSE=0 256 | # Trace activated 257 | TRACE=false 258 | LOG=/tmp/${APPL_NAME}.log 259 | INSTANCE_HOME= 260 | DATABASE_NAME= 261 | HISTORY_DIRECTORY=/tmp 262 | WARNING_THRES=1 263 | CRITICAL_THRES=2 264 | if [[ "${OS:0:5}" == "Linux" ]] ; then 265 | while true; do 266 | case "${1}" in 267 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 268 | -d | --database ) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 269 | -D | --directory ) HISTORY_DIRECTORY=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 270 | -h | --help ) HELP=true ; shift ;; 271 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 272 | -K | --mk ) CHECK_MK=true ; shift ;; 273 | -T | --trace ) TRACE=true ; shift ;; 274 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 275 | -V | --version ) VERSION=true ; shift ;; 276 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 277 | -- ) shift ; break ;; 278 | * ) break ;; 279 | esac 280 | done 281 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 282 | while [[ $1 != -- ]] ; do 283 | case "${1}" in 284 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 285 | -d) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 286 | -D) HISTORY_DIRECTORY=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 287 | -h) HELP=true ; shift ;; 288 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 289 | -K) CHECK_MK=true ; shift ;; 290 | -T) TRACE=true ; shift ;; 291 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 292 | -V) VERSION=true ; shift ;; 293 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 294 | * ) break ;; 295 | esac 296 | done 297 | fi 298 | fi 299 | 300 | if [[ ${TRACE} == true ]] ; then 301 | echo ">>>>>" >> ${LOG} 302 | date >> ${LOG} 303 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 304 | echo "PID ${$}" >> ${LOG} 305 | fi 306 | 307 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 308 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, database:${DATABASE_NAME}" 309 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 310 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 311 | 312 | if [[ ${VERBOSE} -ge 2 ]] ; then 313 | echo ${ECHO} 314 | fi 315 | 316 | if [[ ${TRACE} == true ]] ; then 317 | echo "PARAMS:${ECHO}" >> ${LOG} 318 | fi 319 | 320 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 321 | print_help ${APPL_NAME} 322 | RETURN=${UNKNOWN} 323 | CONTINUE=false 324 | fi 325 | 326 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 327 | print_revision ${APPL_NAME} 328 | RETURN=${UNKNOWN} 329 | CONTINUE=false 330 | fi 331 | 332 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 333 | print_usage ${APPL_NAME} 334 | RETURN=${UNKNOWN} 335 | CONTINUE=false 336 | fi 337 | 338 | if [[ ${CONTINUE} == true && ${DATABASE_NAME} == "" ]] ; then 339 | print_usage ${APPL_NAME} 340 | RETURN=${UNKNOWN} 341 | CONTINUE=false 342 | fi 343 | 344 | if [[ ${CONTINUE} == true 345 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 346 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 347 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 348 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 349 | print_usage ${APPL_NAME} 350 | echo "Warning threshold should be less than critical threshold." 351 | echo "Threshold should be greater than 0." 352 | RETURN=${UNKNOWN} 353 | CONTINUE=false 354 | fi 355 | 356 | if [[ ${CONTINUE} == true ]] ; then 357 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 358 | # Load the DB2 profile. 359 | . ${INSTANCE_HOME}/sqllib/db2profile 360 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 361 | else 362 | OUTPUT="Instance directory is invalid." 363 | RETURN=${UNKNOWN} 364 | CONTINUE=false 365 | fi 366 | fi 367 | 368 | if [[ ${CONTINUE} == true ]] ; then 369 | COMMAND_DATABASE="db2 list db directory" 370 | if [[ ${VERBOSE} -ge 2 ]] ; then 371 | echo "COMMAND: ${COMMAND_DATABASE}" 372 | fi 373 | DATABASE=$(${COMMAND_DATABASE}) 374 | if [[ ${TRACE} == true ]] ; then 375 | echo "RESULT:'${DATABASE}'" >> ${LOG} 376 | fi 377 | DATABASE=$(printf '%s\n' "${DATABASE}" | awk '/Database alias/ {print $4}' | grep -iw ${DATABASE_NAME}) 378 | if [[ ${VERBOSE} -ge 3 ]] ; then 379 | echo "RESULT:'${DATABASE}'" 380 | fi 381 | if [[ ${DATABASE} == "" ]] ; then 382 | OUTPUT="The database ${DATABASE_NAME} is not catalogued." 383 | RETURN=${UNKNOWN} 384 | CONTINUE=false 385 | fi 386 | fi 387 | 388 | if [[ ${CONTINUE} == true ]] ; then 389 | # Defines the directory and files that have the configuration. 390 | DIR=${HISTORY_DIRECTORY}/${APPL_NAME}/${INSTANCE_HOME//\//}_${DATABASE_NAME} 391 | FILE1=${DIR}/ddl.log 392 | 393 | # Creates directory 394 | if [[ ! -d ${DIR} ]] ; then 395 | if [[ ${VERBOSE} -ge 1 ]] ; then 396 | echo "Directory ${DIR} does not exist" 397 | fi 398 | mkdir -p ${DIR} 399 | cd ${DIR} 400 | git init > /dev/null 401 | FIRST_EXEC=true 402 | else 403 | if [[ ${VERBOSE} -ge 1 ]] ; then 404 | echo "Directory does exist: ${DIR}" 405 | fi 406 | cd ${DIR} 407 | fi 408 | 409 | rm ${FILE1} 410 | # Redirects the configuration to the files 411 | db2look -e -d ${DATABASE_NAME} -l -o ${FILE1} 2> /dev/null 412 | if [[ -r ${FILE1} ]] ; then 413 | cat ${FILE1} | tail -n +3 | grep -v -E "ALTER SEQUENCE .* RESTART WITH" > ${FILE1}_tmp 414 | mv ${FILE1}_tmp ${FILE1} 415 | else 416 | OUTPUT="The file was not generated" 417 | RETURN=${UNKNOWN} 418 | CONTINUE=false 419 | fi 420 | fi 421 | 422 | if [[ ${CONTINUE} == true ]] ; then 423 | 424 | if [[ -n ${FIRST_EXEC} ]] ; then 425 | if [[ ${VERBOSE} -ge 1 ]] ; then 426 | echo "Adding files" 427 | fi 428 | git add ${FILE1} > /dev/null 429 | git config user.email "${USER}@${HOSTNAME}" > /dev/null 430 | git config user.name "Nagios User - ${USERNAME}" > /dev/null 431 | git commit ${FILE1} -m "Initial commit" > /dev/null 432 | 433 | OUTPUT="First execution, nothing to compare" 434 | PERFORMANCE="'Changes'=0" 435 | RETURN=${OK} 436 | else 437 | if [[ ${VERBOSE} -ge 1 ]] ; then 438 | echo "Looking for differences" 439 | fi 440 | # Gets and sets changes in Git 441 | DIFF_FILE1=$(git diff ${FILE1}) 442 | git commit ${FILE1} -m "Changes in DDL $(date +'%Y-%m-%d-%H.%M.%S')" >/dev/null 443 | 444 | if [[ "${DIFF_FILE1}" == "" ]] ; then 445 | OUTPUT="No changes in the configuration" 446 | PERFORMANCE="'Changes'=1" 447 | RETURN=${OK} 448 | else 449 | if [[ ${VERBOSE} -ge 1 ]] ; then 450 | echo "There were changes in the configuration" 451 | fi 452 | if [[ ${VERBOSE} -ge 2 ]] ; then 453 | echo -E ${DIFF_FILE1} 454 | fi 455 | 456 | PERFORMANCE_VALUE=1 457 | if [[ "${DIFF_FILE1}" != "" ]] ; then 458 | OUTPUT="${OUTPUT}Changes in DDL - " 459 | PERFORMANCE_VALUE="$((${PERFORMANCE_VALUE} + 1))" 460 | RETURN=${WARNING} 461 | fi 462 | PERFORMANCE="'Changes'=${PERFORMANCE_VALUE}" 463 | fi 464 | fi 465 | PERF_MK=${PERFORMANCE} 466 | fi 467 | 468 | # Prints the output. 469 | if [[ ${OUTPUT} == "" ]] ; then 470 | OUTPUT="Note: The test was not executed." 471 | fi 472 | # Builds the output. 473 | if [[ ${CHECK_MK} == true ]] ; then 474 | echo "${RETURN} diffDBConf${TYPE}-${INSTANCE_NAME}-${DATABASE_NAME} ${PERF_MK} ${OUTPUT}" 475 | else 476 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 477 | fi 478 | # Returns the error code. 479 | if [[ ${VERBOSE} -ge 2 ]] ; then 480 | echo "Return code: ${RETURN}" 481 | fi 482 | if [[ ${TRACE} == true ]] ; then 483 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 484 | date >> ${LOG} 485 | echo -e "<<<<<\n" >> ${LOG} 486 | fi 487 | 488 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 489 | rm ${LOCK_FILE} 490 | fi 491 | 492 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 493 | 494 | exit ${RETURN} 495 | -------------------------------------------------------------------------------- /check_diff_os: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # Checks modifications in the OS configuration, including security and some 22 | # configuration elements related to db2. 23 | # 24 | # This scripts uses git. If using the default directory, then each time the sever 25 | # is restarted the git history is lost. 26 | # 27 | # Based on this blog written by Ian Bjorhovde: 28 | # http://www.idug.org/p/bl/et/blogaid=571 29 | # 30 | # # 'check_diff_os' command definition 31 | # define command { 32 | # command_name check_diff_os 33 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -t 20 -C "scripts/check_diff_os -i '$ARG1$' -d '$ARG2$'" 34 | # } 35 | # define service{ 36 | # host_name db2sever 37 | # service_description OS configuration and security 38 | # check_command check_diff_os!/home/db2inst1!sample 39 | # use generic-service 40 | # } 41 | # 42 | # When using NRPE: 43 | # 44 | # In the database server: 45 | # 46 | # command[check_diff_os]=/home/nagios/scripts/check_diff_os -i /home/db2inst1 -d sample 47 | # 48 | # Then modify the path according to your script location, refresh rate and thresholds. 49 | # 50 | # In the Nagios server: 51 | # 52 | # define service{ 53 | # host_name db2server 54 | # service_description OS configuration and security 55 | # check_command check_nrpe_1arg!check_diff_os 56 | # use generic-service 57 | # } 58 | # 59 | # Parameters: 60 | # -D | --directory : Directory to create the history (Default is /tmp) 61 | # -h | --help : Show the help of this script. 62 | # -K | --mk : Change the output for Check_MK. 63 | # -T | --trace : Trace mode. Writes output in /tmp. 64 | # -v | --verbose : Execute the program in verbose mode. 65 | # -V | --version : Show the current version of this script. 66 | # 67 | # In AIX, the long name options are not supported. 68 | # 69 | # The exit codes are the standard for Nagios. 70 | # 71 | # 0 The plugin was able to check the service and it appeared to be functioning 72 | # properly. 73 | # 1 The plugin was able to check the service, but it appeared to be above some 74 | # "warning" threshold or did not appear to be working properly. 75 | # 2 The plugin detected that either the service was not running or it was above 76 | # some "critical" threshold. 77 | # 3 Invalid command line arguments were supplied to the plugin or low-level 78 | # failures internal to the plugin (such as unable to fork, or open a tcp 79 | # socket) that prevent it from performing the specified operation. 80 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 81 | # are outside of the control of plugins and should generally NOT be reported 82 | # as UNKNOWN states. 83 | # 84 | # Author: Andres Gomez Casanova 85 | # Version: v1.0 2016-12-20 86 | 87 | # Flag for debugging. 88 | #set -xv 89 | 90 | # Locale to print messages in English. Prevent language problems. 91 | export LANG=en_US 92 | 93 | # Version of this script. 94 | function print_revision { 95 | echo Andres Gomez Casanova - AngocA 96 | echo v1.0 2016-12-19 97 | } 98 | # Function to show the help 99 | function print_usage { 100 | /bin/cat <<__EOT 101 | Usage: ${1} { [-K][-D directory] | -h | -V } 102 | [-T][-v] 103 | __EOT 104 | } 105 | 106 | function print_help { 107 | print_revision 108 | print_usage ${1} 109 | # Max 80 chars width. 110 | /bin/cat <<__EOT 111 | ------------------------------------------------------------------------------- 112 | Checks changes in the OS configuration and security. 113 | -D | --directory STRING 114 | Directory where the history is created. By default is /tmp 115 | -h | --help 116 | Shows the current documentation. 117 | -K | --mk 118 | Changes the output to be compatible with Check_MK. 119 | -T | --trace 120 | Trace mode: writes date and output in /tmp. 121 | -v | --verbose 122 | Executes the script in verbose mode (multiple times). 123 | -V | --version 124 | Shows the current version of this script. 125 | 126 | In AIX, the long name options are not supported. 127 | __EOT 128 | } 129 | 130 | # Variable to control the flow execution. Prevent Spaghetti code. 131 | CONTINUE=true 132 | 133 | # Nagios return codes 134 | OK=0 135 | WARNING=1 136 | CRITICAL=2 137 | UNKNOWN=3 138 | # This is the returned code. 139 | RETURN=${UNKNOWN} 140 | 141 | # Nagios Output 142 | # Text output 80 chars | Optional Perf Data Line 1 143 | # Long text Line 1 144 | # Long text Line 2 | Optional Perf Data Line 2 145 | # Optional Perf Data Line 3 146 | OUTPUT= 147 | PERFORMANCE= 148 | LONG_OUTPUT= 149 | LONG_PERFORMANCE= 150 | PERF_MK="-" 151 | 152 | APPL_NAME=$(basename ${0}) 153 | 154 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 155 | 156 | # Checks the lock file does not exist. 157 | # The lock file is the way the command was called with its parameters 158 | # without spaces. 159 | COPY_ARGS=("${@}") 160 | LOCK_FILE= 161 | for VALUE in "${COPY_ARGS[@]}" ; do 162 | LOCK_FILE="${LOCK_FILE}${VALUE}" 163 | done 164 | LOCK_FILE=${LOCK_FILE//\//} 165 | LOCK_FILE=${LOCK_FILE//\\/} 166 | LOCK_FILE=${LOCK_FILE//\:/} 167 | LOCK_FILE=${LOCK_FILE//\*/} 168 | LOCK_FILE=${LOCK_FILE//\|/} 169 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 170 | if [[ ! -r ${LOCK_FILE} ]] ; then 171 | echo $$ > ${LOCK_FILE} 172 | LOCKED=true 173 | else 174 | # If it exist, then check if the process is running. 175 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 176 | # If process is not running, delete it. 177 | if [[ ${EXIST} == "" ]] ; then 178 | rm ${LOCK_FILE} 179 | if [[ ! -r ${LOCK_FILE} ]] ; then 180 | echo $$ > ${LOCK_FILE} 181 | LOCKED=true 182 | else 183 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 184 | CONTINUE=false 185 | RETURN=${UNKNOWN} 186 | fi 187 | else 188 | OUTPUT="An instance of the script with the same parameters is already running." 189 | CONTINUE=false 190 | RETURN=${UNKNOWN} 191 | fi 192 | fi 193 | 194 | # Checks the operating system. geopt works different in AIX than in Linux. 195 | OS=$(uname) 196 | 197 | if [[ "${OS:0:5}" == "Linux" ]] ; then 198 | # The following requires GNU getopt. See the following discussion. 199 | # http://stackoverflow.com/questions/402377 200 | 201 | # TODO Add the arguments here. One char in -o, multiple char in -long. 202 | TEMP=$(getopt -o D:hKTvV --long directory:,help,mk,trace,verbose,version \ 203 | -n ${APPL_NAME} -- "${@}") 204 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 205 | set -- $(getopt D:hKTvV ${*}) 206 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 207 | echo "This plugin is not yet supported in your platform." 208 | echo "Please create a ticket in GitHub if you want to enable your current platform." 209 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 210 | RETURN=${UNKNOWN} 211 | CONTINUE=false 212 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 213 | echo "This plugin is not supported in Cygwin" 214 | RETURN=${UNKNOWN} 215 | CONTINUE=false 216 | else 217 | echo "The platform is unknown: ${OS}" 218 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 219 | RETURN=${UNKNOWN} 220 | CONTINUE=false 221 | fi 222 | 223 | if [[ ${?} -ne 0 ]] ; then 224 | print_usage ${APPL_NAME} 225 | RETURN=${UNKNOWN} 226 | CONTINUE=false 227 | fi 228 | 229 | if [[ ${CONTINUE} == true ]] ; then 230 | if [[ "${OS}" == "Linux" ]] ; then 231 | # Note the quotes around ${TEMP}: they are essential! 232 | eval set -- "${TEMP}" 233 | fi 234 | HELP=false 235 | VERSION=false 236 | CHECK_MK=false 237 | # Verbosity level 238 | VERBOSE=0 239 | # Trace activated 240 | TRACE=false 241 | LOG=/tmp/${APPL_NAME}.log 242 | HISTORY_DIRECTORY=/tmp 243 | WARNING_THRES=1 244 | CRITICAL_THRES=2 245 | if [[ "${OS:0:5}" == "Linux" ]] ; then 246 | while true; do 247 | case "${1}" in 248 | -D | --directory ) HISTORY_DIRECTORY=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 249 | -h | --help ) HELP=true ; shift ;; 250 | -K | --mk ) CHECK_MK=true ; shift ;; 251 | -T | --trace ) TRACE=true ; shift ;; 252 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 253 | -V | --version ) VERSION=true ; shift ;; 254 | -- ) shift ; break ;; 255 | * ) break ;; 256 | esac 257 | done 258 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 259 | while [[ $1 != -- ]] ; do 260 | case "${1}" in 261 | -D) HISTORY_DIRECTORY=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 262 | -h) HELP=true ; shift ;; 263 | -K) CHECK_MK=true ; shift ;; 264 | -T) TRACE=true ; shift ;; 265 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 266 | -V) VERSION=true ; shift ;; 267 | * ) break ;; 268 | esac 269 | done 270 | fi 271 | fi 272 | 273 | if [[ ${TRACE} == true ]] ; then 274 | echo ">>>>>" >> ${LOG} 275 | date >> ${LOG} 276 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 277 | echo "PID ${$}" >> ${LOG} 278 | fi 279 | 280 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 281 | ECHO="${ECHO}, check_mk:${CHECK_MK}, directory:${HISTORY_DIRECTORY}" 282 | 283 | if [[ ${VERBOSE} -ge 2 ]] ; then 284 | echo ${ECHO} 285 | fi 286 | 287 | if [[ ${TRACE} == true ]] ; then 288 | echo "PARAMS:${ECHO}" >> ${LOG} 289 | fi 290 | 291 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 292 | print_help ${APPL_NAME} 293 | RETURN=${UNKNOWN} 294 | CONTINUE=false 295 | fi 296 | 297 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 298 | print_revision ${APPL_NAME} 299 | RETURN=${UNKNOWN} 300 | CONTINUE=false 301 | fi 302 | 303 | if [[ ${CONTINUE} == true ]] ; then 304 | # Defines the directory and files that have the configuration. 305 | DIR=${HISTORY_DIRECTORY}/${APPL_NAME} 306 | FILE1=${DIR}/sysctl.log 307 | FILE2=${DIR}/passwd.log 308 | FILE3=${DIR}/group.log 309 | 310 | # Creates directory 311 | if [[ ! -d ${DIR} ]] ; then 312 | if [[ ${VERBOSE} -ge 1 ]] ; then 313 | echo "Directory ${DIR} does not exist" 314 | fi 315 | mkdir -p ${DIR} 316 | cd ${DIR} 317 | git init > /dev/null 318 | FIRST_EXEC=true 319 | else 320 | if [[ ${VERBOSE} -ge 1 ]] ; then 321 | echo "Directory does exist: ${DIR}" 322 | fi 323 | cd ${DIR} 324 | fi 325 | 326 | # Redirects the os configuration to the files 327 | cat /etc/sysctl.conf > ${FILE1} 328 | cat /etc/passwd > ${FILE2} 329 | cat /etc/group > ${FILE3} 330 | 331 | if [[ -n ${FIRST_EXEC} ]] ; then 332 | if [[ ${VERBOSE} -ge 1 ]] ; then 333 | echo "Adding files" 334 | fi 335 | git add ${FILE1} ${FILE2} ${FILE3} > /dev/null 336 | git config user.email "${USER}@${HOSTNAME}" > /dev/null 337 | git config user.name "Nagios User - ${USERNAME}" > /dev/null 338 | git commit ${FILE1} ${FILE2} ${FILE3} -m "Initial commit" > /dev/null 339 | 340 | OUTPUT="First execution, nothing to compare" 341 | PERFORMANCE="'Changes'=0" 342 | RETURN=${OK} 343 | else 344 | if [[ ${VERBOSE} -ge 1 ]] ; then 345 | echo "Looking for differences" 346 | fi 347 | # Gets and sets changes in Git 348 | DIFF_FILE1=$(git diff ${FILE1}) 349 | DIFF_FILE2=$(git diff ${FILE2}) 350 | DIFF_FILE3=$(git diff ${FILE3}) 351 | git commit ${FILE1} -m "Changes in kernel $(date +'%Y-%m-%d-%H.%M.%S')" >/dev/null 352 | git commit ${FILE2} -m "Changes in passwd file $(date +'%Y-%m-%d-%H.%M.%S')" > /dev/null 353 | git commit ${FILE3} -m "Changes in group file $(date +'%Y-%m-%d-%H.%M.%S')" > /dev/null 354 | 355 | if [[ "${DIFF_FILE1}" == "" && "${DIFF_FILE2}" == "" 356 | && "${DIFF_FILE3}" == "" ]] ; then 357 | OUTPUT="No changes in the configuration" 358 | PERFORMANCE="'Changes'=1" 359 | RETURN=${OK} 360 | else 361 | if [[ ${VERBOSE} -ge 1 ]] ; then 362 | echo "There were changes in the configuration" 363 | fi 364 | if [[ ${VERBOSE} -ge 2 ]] ; then 365 | echo -E ${DIFF_FILE1} 366 | echo -E ${DIFF_FILE2} 367 | echo -E ${DIFF_FILE3} 368 | fi 369 | 370 | PERFORMANCE_VALUE=1 371 | if [[ "${DIFF_FILE1}" != "" ]] ; then 372 | OUTPUT="${OUTPUT}Changes in the kernel - " 373 | PERFORMANCE_VALUE="$((${PERFORMANCE_VALUE} + 1))" 374 | RETURN=${WARNING} 375 | fi 376 | if [[ "${DIFF_FILE2}" != "" ]] ; then 377 | OUTPUT="${OUTPUT}Changes in the passwd file - " 378 | PERFORMANCE_VALUE="$((${PERFORMANCE_VALUE} + 1))" 379 | RETURN=${WARNING} 380 | fi 381 | if [[ "${DIFF_FILE3}" != "" ]] ; then 382 | OUTPUT="${OUTPUT}Changes in the config file - " 383 | PERFORMANCE_VALUE="$((${PERFORMANCE_VALUE} + 1))" 384 | RETURN=${WARNING} 385 | fi 386 | PERFORMANCE="'Changes'=${PERFORMANCE_VALUE}" 387 | fi 388 | fi 389 | PERF_MK=${PERFORMANCE} 390 | fi 391 | 392 | # Prints the output. 393 | if [[ ${OUTPUT} == "" ]] ; then 394 | OUTPUT="Note: The test was not executed." 395 | fi 396 | # Builds the output. 397 | if [[ ${CHECK_MK} == true ]] ; then 398 | echo "${RETURN} diffOs${TYPE} ${PERF_MK} ${OUTPUT}" 399 | else 400 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 401 | fi 402 | # Returns the error code. 403 | if [[ ${VERBOSE} -ge 2 ]] ; then 404 | echo "Return code: ${RETURN}" 405 | fi 406 | if [[ ${TRACE} == true ]] ; then 407 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 408 | date >> ${LOG} 409 | echo -e "<<<<<\n" >> ${LOG} 410 | fi 411 | 412 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 413 | rm ${LOCK_FILE} 414 | fi 415 | 416 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 417 | 418 | exit ${RETURN} 419 | 420 | -------------------------------------------------------------------------------- /check_hadr_status: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # Checks the HADR status by looking if both databases are connected. 22 | # 23 | # Here comes the documentation 24 | # 25 | # # 'check_hadr_status' command definition 26 | # define command { 27 | # command_name check_hadr_status 28 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -t 20 -C "scripts/check_hadr_status -i '$ARG1$' -d '$ARG2$'" 29 | # } 30 | # define service{ 31 | # host_name db2server 32 | # service_description HADR status 33 | # check_command check_hadr_status!/home/db2inst1!sample 34 | # use generic-service 35 | # } 36 | # 37 | # When using NRPE: 38 | # 39 | # In the database server: 40 | # 41 | # command[check_hadr_status]=/home/nagios/scripts/check_hadr_status -i /home/db2inst1 -d sample 42 | # 43 | # Then modify the path according to your script location, instance home dir, 44 | # database and thresholds. 45 | # 46 | # In the Nagios server: 47 | # 48 | # define service{ 49 | # host_name db2server 50 | # service_description HADR status 51 | # check_command check_nrpe_1arg!hadr_status 52 | # use generic-service 53 | # } 54 | 55 | # Parameters: 56 | # -d | --database : Database name. 57 | # -h | --help : Show the help of this script. 58 | # -i | --instance : Home directory of the instance. Usually it is 59 | # /home/db2inst1. 60 | # -K | --mk : Change the output for Check_MK. 61 | # -T | --trace : Trace mode. Writes output in /tmp. 62 | # -v | --verbose : Execute the program in verbose mode. 63 | # -V | --version : Show the current version of this script. 64 | # 65 | # The exit codes are the standard for Nagios. 66 | # 67 | # 0 The plugin was able to check the service and it appeared to be functioning 68 | # properly. 69 | # 1 The plugin was able to check the service, but it appeared to be above some 70 | # "warning" threshold or did not appear to be working properly. 71 | # 2 The plugin detected that either the service was not running or it was above 72 | # some "critical" threshold. 73 | # 3 Invalid command line arguments were supplied to the plugin or low-level 74 | # failures internal to the plugin (such as unable to fork, or open a tcp 75 | # socket) that prevent it from performing the specified operation. 76 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 77 | # are outside of the control of plugins and should generally NOT be reported 78 | # as UNKNOWN states. 79 | # 80 | # Author: Andres Gomez Casanova 81 | # Version: v1.1 2013-05-26 82 | 83 | # Flag for debugging. 84 | #set -xv 85 | 86 | # Locale to print messages in English. Prevent language problems. 87 | export LANG=en_US 88 | 89 | # Version of this script. 90 | function print_revision { 91 | echo Andres Gomez Casanova - AngocA 92 | echo v1.1 2013-05-26 93 | } 94 | # Function to show the help 95 | function print_usage { 96 | /bin/cat <<__EOT 97 | Usage: ${1} { -i instanceHomeDirectory -d databaseName [-K] | -h | -V } 98 | [-T][-v] 99 | __EOT 100 | } 101 | 102 | function print_help { 103 | print_revision 104 | print_usage ${1} 105 | # Max 80 chars width. 106 | /bin/cat <<__EOT 107 | ------------------------------------------------------------------------------- 108 | Checks the HADR status of the database. 109 | -d | --database STRING 110 | Database name. 111 | -h | --help 112 | Shows the current documentation. 113 | -i | --instance STRING 114 | Instance home directory. It is usually /home/db2inst1. 115 | -K | --mk 116 | Changes the output to be compatible with Check_MK. 117 | -T | --trace 118 | Trace mode: writes date and output in /tmp. 119 | -v | --verbose 120 | Executes the script in verbose mode (multiple times). 121 | -V | --version 122 | Shows the current version of this script. 123 | __EOT 124 | } 125 | 126 | # Variable to control the flow execution. Prevent Spaghetti code. 127 | CONTINUE=true 128 | 129 | # Nagios return codes 130 | OK=0 131 | WARNING=1 132 | CRITICAL=2 133 | UNKNOWN=3 134 | # This is the returned code. 135 | RETURN=${UNKNOWN} 136 | 137 | # Nagios Output 138 | # Text output 80 chars | Optional Perf Data Line 1 139 | # Long text Line 1 140 | # Long text Line 2 | Optional Perf Data Line 2 141 | # Optional Perf Data Line 3 142 | OUTPUT= 143 | PERFORMANCE= 144 | LONG_OUTPUT= 145 | LONG_PERFORMANCE= 146 | PERF_MK="-" 147 | 148 | APPL_NAME=$(basename ${0}) 149 | 150 | if [[ ${#} -eq 0 ]] ; then 151 | print_usage ${APPL_NAME} 152 | RETURN=${UNKNOWN} 153 | CONTINUE=false 154 | fi 155 | 156 | # The following requieres GNU getopt. See the following discusion. 157 | # http://stackoverflow.com/questions/402377 158 | 159 | TEMP=$(getopt -o d:hi:KTvV --long database:,help,instance:,mk,trace,verbose,version \ 160 | -n ${APPL_NAME} -- "${@}") 161 | 162 | if [[ ${?} -ne 0 ]] ; then 163 | print_usage ${APPL_NAME} 164 | RETURN=${UNKNOWN} 165 | CONTINUE=false 166 | fi 167 | 168 | if [[ ${CONTINUE} == true ]] ; then 169 | # Note the quotes around ${TEMP}: they are essential! 170 | eval set -- "${TEMP}" 171 | 172 | HELP=false 173 | VERSION=false 174 | CHECK_MK=false 175 | # Verbosity level 176 | VERBOSE=0 177 | # Trace activated 178 | TRACE=false 179 | LOG=/tmp/${APPL_NAME}.log 180 | INSTANCE_HOME= 181 | DATABASE_NAME= 182 | while true; do 183 | case "${1}" in 184 | -d | --database ) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 185 | -h | --help ) HELP=true ; shift ;; 186 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 187 | -K | --mk ) CHECK_MK=true ; shift ;; 188 | -T | --trace ) TRACE=true ; shift ;; 189 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 190 | -V | --version ) VERSION=true ; shift ;; 191 | -- ) shift; break ;; 192 | * ) break ;; 193 | esac 194 | done 195 | fi 196 | 197 | if [[ ${TRACE} == true ]] ; then 198 | echo ">>>>>" >> ${LOG} 199 | date >> ${LOG} 200 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 201 | echo "PID ${$}" >> ${LOG} 202 | fi 203 | 204 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 205 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 206 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, database:${DATABASE_NAME}" 207 | 208 | if [[ ${VERBOSE} -ge 2 ]] ; then 209 | echo ${ECHO} 210 | fi 211 | 212 | if [[ ${TRACE} == true ]] ; then 213 | echo "PARAMS:${ECHO}" >> ${LOG} 214 | fi 215 | 216 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 217 | print_help ${APPL_NAME} 218 | RETURN=${UNKNOWN} 219 | CONTINUE=false 220 | fi 221 | 222 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 223 | print_revision ${APPL_NAME} 224 | RETURN=${UNKNOWN} 225 | CONTINUE=false 226 | fi 227 | 228 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 229 | print_usage ${APPL_NAME} 230 | RETURN=${UNKNOWN} 231 | CONTINUE=false 232 | fi 233 | 234 | if [[ ${CONTINUE} == true && ${DATABASE_NAME} == "" ]] ; then 235 | print_usage ${APPL_NAME} 236 | RETURN=${UNKNOWN} 237 | CONTINUE=false 238 | fi 239 | 240 | if [[ ${CONTINUE} == true ]] ; then 241 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 242 | # Load the DB2 profile. 243 | . ${INSTANCE_HOME}/sqllib/db2profile 244 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 245 | else 246 | OUTPUT="Instance directory is invalid." 247 | RETURN=${UNKNOWN} 248 | CONTINUE=false 249 | fi 250 | fi 251 | 252 | if [[ ${CONTINUE} == true ]] ; then 253 | COMMAND_DATABASE="db2 list db directory" 254 | if [[ ${VERBOSE} -ge 2 ]] ; then 255 | echo "COMMAND: ${COMMAND_DATABASE}" 256 | fi 257 | DATABASE=$(${COMMAND_DATABASE}) 258 | if [[ ${TRACE} == true ]] ; then 259 | echo "RESULT:'${DATABASE}'" >> ${LOG} 260 | fi 261 | DATABASE=$(printf '%s\n' "${DATABASE}" | awk '/Database alias/ {print $4}' | grep -iw ${DATABASE_NAME}) 262 | if [[ ${VERBOSE} -ge 3 ]] ; then 263 | echo "RESULT:'${DATABASE}'" 264 | fi 265 | 266 | if [[ ${DATABASE} == "" ]] ; then 267 | OUTPUT="The database ${DATABASE_NAME} is not cataloged." 268 | RETURN=${UNKNOWN} 269 | CONTINUE=false 270 | fi 271 | fi 272 | 273 | if [[ ${CONTINUE} == true ]] ; then 274 | COMMAND_VERSION="db2level" 275 | if [[ ${VERBOSE} -ge 2 ]] ; then 276 | echo "COMMAND: ${COMMAND_VERSION}" 277 | fi 278 | VERSION=$(${COMMAND_VERSION}) 279 | if [[ ${TRACE} == true ]] ; then 280 | echo "RESULT:'${VERSION}'" >> ${LOG} 281 | fi 282 | VERSION=$(printf '%s\n' "${VERSION}" | awk '/Informational tokens/ {print $5}' | awk -F. '{print $1"."$2}') 283 | if [[ ${VERBOSE} -ge 3 ]] ; then 284 | echo "RESULT:'${VERSION}'" 285 | fi 286 | if [[ ( ${VERSION} != "v10.5" ) && ( ${VERSION} != "v10.1" ) && ( ${VERSION} != "v9.5" ) ]] ; then 287 | OUTPUT="The current DB2 version (${VERSION}) is not supported." 288 | RETURN=${UNKNOWN} 289 | CONTINUE=false 290 | fi 291 | fi 292 | 293 | if [[ ${CONTINUE} == true ]] ; then 294 | COMMAND_ROLE="db2 get db cfg for ${DATABASE_NAME}" 295 | if [[ ${VERBOSE} -ge 2 ]] ; then 296 | echo "COMMAND: ${COMMAND_ROLE}" 297 | fi 298 | ROLE=$(${COMMAND_ROLE}) 299 | if [[ ${TRACE} == true ]] ; then 300 | echo "RESULT:'${ROLE}'" >> ${LOG} 301 | fi 302 | ROLE=$(printf '%s\n' "${ROLE}" | awk '/HADR database role/ {print $5}') 303 | if [[ ${VERBOSE} -ge 3 ]] ; then 304 | echo "RESULT:'${ROLE}'" 305 | fi 306 | if [[ ${ROLE} == "STANDARD" ]] ; then 307 | OUTPUT="Database is not in HADR" 308 | RETURN=${UNKNOWN} 309 | else 310 | COMMAND_HADR="db2pd -db ${DATABASE_NAME} -hadr" 311 | if [[ ${VERBOSE} -ge 2 ]] ; then 312 | echo "COMMAND: ${COMMAND_HADR}" 313 | fi 314 | OUTPUT_HADR=$(${COMMAND_HADR}) 315 | if [[ ${TRACE} == true ]] ; then 316 | echo "RESULT:'${OUTPUT_HADR}'" >> ${LOG} 317 | fi 318 | if [[ ${VERSION} == "v9.7" ]] ; then 319 | CONNECTED=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/onnected/ {print $1}' | tail -1) 320 | if [[ ${ROLE} == "PRIMARY" ]] ; then 321 | HADR_STATUS=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/^Primary / {print $2}') 322 | if [[ ${VERBOSE} -ge 3 ]] ; then 323 | echo "RESULT:'${CONNECTED}'" 324 | echo "RESULT:'${HADR_STATUS}'" 325 | fi 326 | if [[ ${HADR_STATUS} == "Peer" ]] ; then 327 | OUTPUT="OK. Database is primary and peer" 328 | RETURN=${OK} 329 | elif [[ ${HADR_STATUS} == "Disconnected" ]] ; then 330 | OUTPUT="Standby database is not reachable (${CONNECTED})." 331 | RETURN=${CRITICAL} 332 | else 333 | OUTPUT="Database is primary and not peer. ${HADR_STATUS} ${CONNECTED}" 334 | RETURN=${UNKNOWN} 335 | fi 336 | elif [[ ${ROLE} == "STANDBY" ]] ; then 337 | HADR_STATUS=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/^Standby/ {print $2}') 338 | if [[ ${VERBOSE} -ge 3 ]] ; then 339 | echo "RESULT:'${CONNECTED}'" 340 | echo "RESULT:'${HADR_STATUS}'" 341 | fi 342 | if [[ ${HADR_STATUS} == "Peer" ]] ; then 343 | OUTPUT="OK. Database is standby and peer" 344 | RETURN=${OK} 345 | elif [[ ${HADR_STATUS} == "DisconnectedPeer" ]] ; then 346 | OUTPUT="Pimary database is not reachable (${CONNECTED})." 347 | RETURN=${CRITICAL} 348 | elif [[ ${HADR_STATUS} == "RemoteCatchupPending" ]] ; then 349 | OUTPUT="StandBy in remote catchup pending. Make sure database primary is reachable (${CONNECTED})." 350 | RETRUN=${WARNING} 351 | else 352 | OUTPUT="Database is standby and not peer. ${HADR_STATUS} ${CONNECTED}" 353 | RETURN=${UNKNOWN} 354 | fi 355 | # Divided by 1000 (3E8) 356 | HEX_DIFF=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/LOG/ {print $3}' | cut -dx -f2 | sed 'N;s/\n/-/' | awk '{print "ibase=16;("$1")/3E8"}' | bc) 357 | PERFORMANCE="HexDiff_K=${HEX_DIFF}" 358 | PAGE_DIFF=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/LOG/ {print $2}' | sed 'N;s/\n/-/' | bc) 359 | LOG_DIFF=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/LOG/ {print $1}' | cut -d. -f1 | sed 's/^.//' | sed 'N;s/\n/-/' | bc) 360 | if [[ ${PAGE_DIFF} -lt 0 ]] ; then 361 | # The 2000 value is based on observation, but not sure. 362 | PAGE_DIFF=$((${PAGE_DIFF}+2000)) 363 | fi 364 | LONG_PERFORMANCE="PageDiff=${PAGE_DIFF}\nLogDiff=${LOG_DIFF}" 365 | PERF_MK="${PERFORMANCE}|${PAGE_DIFF}|${LOG_DIFF}" 366 | else 367 | OUTPUT="Database role is unknown (${ROLE})" 368 | RETURN=${UNKNOWN} 369 | CONTINUE=false 370 | fi 371 | elif [[ ( ${VERSION} == "v10.5" ) || ( ${VERSION} == "v10.1" ) ]] ; then 372 | CONNECTED=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/HADR_CONNECT_STATUS / {print $3}') 373 | HADR_STATUS=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/HADR_STATE/ {print $3}') 374 | if [[ ${VERBOSE} -ge 3 ]] ; then 375 | echo "RESULT:'${CONNECTED}'" 376 | echo "RESULT:'${HADR_STATUS}'" 377 | fi 378 | if [[ ${ROLE} == "PRIMARY" ]] ; then 379 | if [[ ${HADR_STATUS} == "PEER" ]] ; then 380 | OUTPUT="OK. Database is primary and peer" 381 | RETURN=${OK} 382 | elif [[ ${HADR_STATUS} == "DISCONNECTED" ]] ; then 383 | OUTPUT="Standby database is not reachable (${CONNECTED})." 384 | RETURN=${CRITICAL} 385 | else 386 | OUTPUT="Database is primary and not peer. ${HADR_STATUS} ${CONNECTED}" 387 | RETURN=${UNKNOWN} 388 | fi 389 | elif [[ ${ROLE} == "STANDBY" ]] ; then 390 | if [[ ${HADR_STATUS} == "PEER" ]] ; then 391 | OUTPUT="OK. Database is standby and peer" 392 | RETURN=${OK} 393 | elif [[ ${HADR_STATUS} == "DISCONNECTED" ]] ; then 394 | OUTPUT="Pimary database is not reachable (${CONNECTED})." 395 | RETURN=${CRITICAL} 396 | elif [[ ${HADR_STATUS} == "REMOTE_CATCHUP_PENDING" ]] ; then 397 | OUTPUT="StandBy in remote catchup pending. Make sure database primary is reachable (${CONNECTED})." 398 | RETRUN=${WARNING} 399 | else 400 | OUTPUT="Database is standby and not peer. ${HADR_STATUS} ${CONNECTED}" 401 | RETURN=${UNKNOWN} 402 | fi 403 | PAGE_PRI=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/PRIMARY_LOG_FILE/ {print $4}' | awk -F, '{print $1}') 404 | PAGE_STA=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/STANDBY_LOG_FILE/ {print $4}' | awk -F, '{print $1}') 405 | PAGE_DIFF=$((${PAGE_PRI}-${PAGE_STA})) 406 | LOG_PRI=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/PRIMARY_LOG_FILE/ {print $r3}' | cut -c2-8) 407 | LOG_STA=$(printf '%s\n' "${OUTPUT_HADR}" | awk '/STANDBY_LOG_FILE/ {print $r3}' | cut -c2-8) 408 | LOG_DIFF=$((${LOG_PRI}-${LOG_STA})) 409 | if [[ ${PAGE_DIFF} -lt 0 ]] ; then 410 | # The 2000 value is based on observation, but not sure. 411 | PAGE_DIFF=$((${PAGE_DIFF}+2000)) 412 | fi 413 | LONG_PERFORMANCE="PageDiff=${PAGE_DIFF}\nLogDiff=${LOG_DIFF}" 414 | PERF_MK="${PERFORMANCE}|${PAGE_DIFF}|${LOG_DIFF}" 415 | else 416 | OUTPUT="Database role is unknown (${ROLE})" 417 | RETURN=${UNKNOWN} 418 | CONTINUE=false 419 | fi 420 | fi 421 | fi 422 | fi 423 | # Prints the output. 424 | if [[ ${OUTPUT} == "" ]] ; then 425 | OUTPUT="Note: The test was not executed." 426 | fi 427 | # Builds the output. 428 | if [[ ${CHECK_MK} == true ]] ; then 429 | echo "${RETURN} hadrStatus-${INSTANCE_NAME}-${DATABASE_NAME} ${PERF_MK} ${OUTPUT}" 430 | else 431 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 432 | fi 433 | # Returns the error code. 434 | if [[ ${VERBOSE} -ge 2 ]] ; then 435 | echo "Return code: ${RETURN}" 436 | fi 437 | if [[ ${TRACE} == true ]] ; then 438 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 439 | date >> ${LOG} 440 | echo -e "<<<<<\n" >> ${LOG} 441 | fi 442 | exit ${RETURN} 443 | -------------------------------------------------------------------------------- /check_instance_memory: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # Checks the memory usage by an instance. It does not take into account the 22 | # bufferpools, or other memory dumps, it just takes the result of the query. 23 | # The query is executed against the first connectable database of an instance. 24 | # The user used to execute the script should have a special privileges in at 25 | # least one of the database of the instance analyzed; the privilege are: 26 | # db2 grant execute on package NULLID.SQLC2H21 to user nagios 27 | # db2 grant execute on specific function SYSPROC.ADMIN_GET_DBP_MEM_USAGE to user nagios 28 | # db2 grant execute on specific function SYSPROC.ADMIN_GET_DBP_MEM_USAGE_AP to user nagios 29 | # db2 db2 grant usage on workload SYSDEFAULTUSERWORKLOAD to user nagios 30 | # 31 | # In order to be used in Nagios, you have to configure the following. 32 | # 33 | # # 'check_instance_memory' command definition 34 | # define command { 35 | # command_name check_instance_memory 36 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_instance_memory -i '$ARG1$'" 37 | # } 38 | # define service{ 39 | # host_name db2server 40 | # service_description Memory used by the instance 41 | # check_command check_instance_memory!/home/db2inst1 42 | # use generic-service 43 | # } 44 | # 45 | # When using NRPE: 46 | # 47 | # In the database server: 48 | # 49 | # command[check_instance_memory]=/home/nagios/scripts/check_instance_memory -i /home/db2inst1 50 | # 51 | # Then modify the path according to your script location, instance home dir, 52 | # database, filter and thresholds. 53 | # 54 | # In the Nagios server: 55 | # 56 | # define service{ 57 | # host_name db2server 58 | # service_description Memory used by the instance 59 | # check_command check_nrpe_1arg!check_connection_qty 60 | # use generic-service 61 | # } 62 | # 63 | # Parameters: 64 | # -c | --critical : Quantity of used memory to be considered as critical. 65 | # -h | --help : Show the help of this script. 66 | # -i | --instance : Home directory of the instance. Usually it is 67 | # /home/db2inst1. 68 | # -K | --mk : Change the output for Check_MK. 69 | # -l | --limit : Memory limit. If nothing, the script always returns OK. 70 | # -T | --trace : Trace mode. Writes output in /tmp. 71 | # -v | --verbose : Execute the program in verbose mode. 72 | # -V | --version : Show the current version of this script. 73 | # -w | --warning : Quantity of used memory to be considered as critical. 74 | # 75 | # In AIX, the long name options are not supported. 76 | # 77 | # The exit codes are the standard for Nagios. 78 | # 79 | # 0 The plugin was able to check the service and it appeared to be functioning 80 | # properly. 81 | # 1 The plugin was able to check the service, but it appeared to be above some 82 | # "warning" threshold or did not appear to be working properly. 83 | # 2 The plugin detected that either the service was not running or it was above 84 | # some "critical" threshold. 85 | # 3 Invalid command line arguments were supplied to the plugin or low-level 86 | # failures internal to the plugin (such as unable to fork, or open a tcp 87 | # socket) that prevent it from performing the specified operation. 88 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 89 | # are outside of the control of plugins and should generally NOT be reported 90 | # as UNKNOWN states. 91 | # 92 | # Author: Andres Gomez Casanova 93 | # Version: v1.1 2015-10-15 94 | 95 | # Flag for debugging. 96 | #set -xv 97 | 98 | # Locale to print messages in English. Prevent language problems. 99 | export LANG=en_US 100 | 101 | # Version of this script. 102 | function print_revision { 103 | echo Andres Gomez Casanova - AngocA 104 | echo v1.1 2015-10-15 105 | } 106 | # Function to show the help 107 | function print_usage { 108 | /bin/cat <<__EOT 109 | Usage: ${1} { -i instanceHomeDirectory [-l][-c][-w][-K] | -h 110 | | -V }[-T][-v] 111 | __EOT 112 | } 113 | 114 | function print_help { 115 | print_revision 116 | print_usage ${1} 117 | # Max 80 chars width. 118 | /bin/cat <<__EOT 119 | ------------------------------------------------------------------------------- 120 | Checks the memory usage at an instance (all nodes). 121 | 122 | -c | --critical INTEGER 123 | Quantity of memory considered as critical. Limit option should be active. 124 | Default 900. 125 | -h | --help 126 | Shows the current documentation. 127 | -i | --instance STRING 128 | Instance home directory. It is usually /home/db2inst1. 129 | -K | --mk 130 | Changes the output to be compatible with Check_MK. 131 | -l | --limit 132 | Check the usage with a given limit (thresholds). If this option is not 133 | given, the script always returns OK. 134 | -T | --trace 135 | Trace mode: writes date and output in /tmp. 136 | -v | --verbose 137 | Executes the script in verbose mode (multiple times). 138 | -V | --version 139 | Shows the current version of this script. 140 | -w | --warning INTEGER 141 | Quantity of memory considered as warning. Limit option should be active. 142 | Default 800. 143 | 144 | In AIX, the long name options are not supported. 145 | __EOT 146 | } 147 | 148 | # Variable to control the flow execution. Prevent Spaghetti code. 149 | CONTINUE=true 150 | 151 | # Nagios return codes 152 | OK=0 153 | WARNING=1 154 | CRITICAL=2 155 | UNKNOWN=3 156 | # This is the returned code. 157 | RETURN=${UNKNOWN} 158 | 159 | # Nagios Output 160 | # Text output 80 chars | Optional Perf Data Line 1 161 | # Long text Line 1 162 | # Long text Line 2 | Optional Perf Data Line 2 163 | # Optional Perf Data Line 3 164 | OUTPUT= 165 | PERFORMANCE= 166 | LONG_OUTPUT= 167 | LONG_PERFORMANCE= 168 | PERF_MK="-" 169 | 170 | APPL_NAME=$(basename ${0}) 171 | 172 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 173 | 174 | # Checks the lock file does not exist. 175 | # The lock file is the way the command was called with its parameters 176 | # without spaces. 177 | COPY_ARGS=("${@}") 178 | LOCK_FILE= 179 | for VALUE in "${COPY_ARGS[@]}" ; do 180 | LOCK_FILE="${LOCK_FILE}${VALUE}" 181 | done 182 | LOCK_FILE=${LOCK_FILE//\//} 183 | LOCK_FILE=${LOCK_FILE//\\/} 184 | LOCK_FILE=${LOCK_FILE//\:/} 185 | LOCK_FILE=${LOCK_FILE//\*/} 186 | LOCK_FILE=${LOCK_FILE//\|/} 187 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 188 | if [[ ! -r ${LOCK_FILE} ]] ; then 189 | echo $$ > ${LOCK_FILE} 190 | LOCKED=true 191 | else 192 | # If it exist, then check if the process is running. 193 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 194 | # If process is not running, delete it. 195 | if [[ ${EXIST} == "" ]] ; then 196 | rm ${LOCK_FILE} 197 | if [[ ! -r ${LOCK_FILE} ]] ; then 198 | echo $$ > ${LOCK_FILE} 199 | LOCKED=true 200 | else 201 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 202 | CONTINUE=false 203 | RETURN=${UNKNOWN} 204 | fi 205 | else 206 | OUTPUT="An instance of the script with the same parameters is already running." 207 | CONTINUE=false 208 | RETURN=${UNKNOWN} 209 | fi 210 | fi 211 | 212 | if [[ ${#} -eq 0 ]] ; then 213 | print_usage ${APPL_NAME} 214 | RETURN=${UNKNOWN} 215 | CONTINUE=false 216 | fi 217 | 218 | # Checks the operating system. geopt works different in AIX than in Linux. 219 | OS=$(uname) 220 | 221 | if [[ "${OS:0:5}" == "Linux" ]] ; then 222 | # The following requires GNU getopt. See the following discussion. 223 | # http://stackoverflow.com/questions/402377 224 | 225 | TEMP=$(getopt -o c:hi:KlTvVw: --long critical:,help,instance:,mk,limit,trace,verbose,version,warning: \ 226 | -n ${APPL_NAME} -- "${@}") 227 | 228 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 229 | set -- $(getopt c:hi:KlTvVw: ${*}) 230 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 231 | echo "This plugin is not yet supported in your platform." 232 | echo "Please create a ticket in GitHub if you want to enable your current platform." 233 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 234 | RETURN=${UNKNOWN} 235 | CONTINUE=false 236 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 237 | echo "This plugin is not supported in Cygwin" 238 | RETURN=${UNKNOWN} 239 | CONTINUE=false 240 | else 241 | echo "The platform is unknown: ${OS}" 242 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 243 | RETURN=${UNKNOWN} 244 | CONTINUE=false 245 | fi 246 | 247 | if [[ ${?} -ne 0 ]] ; then 248 | print_usage ${APPL_NAME} 249 | RETURN=${UNKNOWN} 250 | CONTINUE=false 251 | fi 252 | 253 | if [[ ${CONTINUE} == true ]] ; then 254 | if [[ "${OS}" == "Linux" ]] ; then 255 | # Note the quotes around ${TEMP}: they are essential! 256 | eval set -- "${TEMP}" 257 | fi 258 | HELP=false 259 | VERSION=false 260 | CHECK_MK=false 261 | # Verbosity level 262 | VERBOSE=0 263 | # Trace activated 264 | TRACE=false 265 | LOG=/tmp/${APPL_NAME}.log 266 | INSTANCE_HOME= 267 | LIMIT=false 268 | WARNING_THRES=800 269 | CRITICAL_THRES=900 270 | if [[ "${OS:0:5}" == "Linux" ]] ; then 271 | while true; do 272 | case "${1}" in 273 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 274 | -h | --help ) HELP=true ; shift ;; 275 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 276 | -K | --mk ) CHECK_MK=true ; shift ;; 277 | -l | --limit ) LIMIT=true ; shift ;; 278 | -T | --trace ) TRACE=true ; shift ;; 279 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 280 | -V | --version ) VERSION=true ; shift ;; 281 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 282 | -- ) shift ; break ;; 283 | * ) break ;; 284 | esac 285 | done 286 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 287 | while [[ $1 != -- ]] ; do 288 | case "${1}" in 289 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 290 | -h) HELP=true ; shift ;; 291 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 292 | -K | --mk ) CHECK_MK=true ; shift ;; 293 | -l) LIMIT=true ; shift ;; 294 | -T) TRACE=true ; shift ;; 295 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 296 | -V) VERSION=true ; shift ;; 297 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 298 | * ) break ;; 299 | esac 300 | done 301 | fi 302 | fi 303 | 304 | if [[ ${TRACE} == true ]] ; then 305 | echo ">>>>>" >> ${LOG} 306 | date >> ${LOG} 307 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 308 | echo "PID ${$}" >> ${LOG} 309 | fi 310 | 311 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 312 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, limit:${LIMIT}" 313 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 314 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 315 | 316 | if [[ ${VERBOSE} -ge 2 ]] ; then 317 | echo ${ECHO} 318 | fi 319 | 320 | if [[ ${TRACE} == true ]] ; then 321 | echo "PARAMS:${ECHO}" >> ${LOG} 322 | fi 323 | 324 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 325 | print_help ${APPL_NAME} 326 | RETURN=${UNKNOWN} 327 | CONTINUE=false 328 | fi 329 | 330 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 331 | print_revision ${APPL_NAME} 332 | RETURN=${UNKNOWN} 333 | CONTINUE=false 334 | fi 335 | 336 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 337 | print_usage ${APPL_NAME} 338 | RETURN=${UNKNOWN} 339 | CONTINUE=false 340 | fi 341 | 342 | if [[ ${CONTINUE} == true 343 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 344 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 345 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 346 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 347 | print_usage ${APPL_NAME} 348 | echo "Warning threshold should be less than critical threshold." 349 | echo "Threshold should be greater than 0." 350 | RETURN=${UNKNOWN} 351 | CONTINUE=false 352 | fi 353 | 354 | if [[ ${CONTINUE} == true ]] ; then 355 | # Trims the characters. 356 | INSTANCE_HOME=$(echo ${INSTANCE_HOME} | cut -d' ' -f1) 357 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 358 | # Load the DB2 profile. 359 | . ${INSTANCE_HOME}/sqllib/db2profile 360 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 361 | else 362 | OUTPUT="Instance directory is invalid." 363 | RETURN=${UNKNOWN} 364 | CONTINUE=false 365 | fi 366 | fi 367 | 368 | if [[ ${CONTINUE} == true ]] ; then 369 | COMMAND_DATABASE="db2 list db directory" 370 | if [[ ${VERBOSE} -ge 2 ]] ; then 371 | echo "COMMAND: ${COMMAND_DATABASE}" 372 | fi 373 | DATABASE=$(${COMMAND_DATABASE}) 374 | if [[ ${TRACE} == true ]] ; then 375 | echo "RESULT:'${DATABASE}'" >> ${LOG} 376 | fi 377 | DB_LIST=$(printf '%s\n' "${DATABASE}" | awk '/Database alias/ {print $4}') 378 | if [[ ${VERBOSE} -ge 3 ]] ; then 379 | echo "RESULT:'${DB_LIST}'" 380 | fi 381 | 382 | COMMAND=" 383 | SELECT 384 | DBPARTITIONNUM, 385 | MAX_PARTITION_MEM/1048576 AS MAX_MEM_MB, 386 | CURRENT_PARTITION_MEM/1048576 AS CURRENT_MEM_MB 387 | FROM TABLE (SYSPROC.ADMIN_GET_DBP_MEM_USAGE()) AS T;" 388 | if [[ ${VERBOSE} -ge 2 ]] ; then 389 | echo "COMMAND: ${COMMAND}" 390 | fi 391 | CONT=true 392 | while read -r LINE && ${CONT} == true ; do 393 | DATABASE=${LINE} 394 | RESULT=$(db2 connect to ${DATABASE} > /dev/null ; echo ${COMMAND} | db2 +p -tx ; db2 connect reset > /dev/null) 395 | if [[ ${VERBOSE} -ge 3 ]] ; then 396 | echo "RESULT:'${RESULT}'" 397 | fi 398 | # Check for errors 399 | CONN_ERROR=$(echo ${RESULT} | grep SQL1024) 400 | 401 | SELECT_ERROR=$(echo ${RESULT} | grep SQL0551) 402 | if [[ ${CONN_ERROR} == "error" ]] ; then 403 | LONG_OUTPUT="${LONG_OUTPUT}-${RESULT}" 404 | unset RESULT 405 | if [[ ${VERBOSE} -ge 2 ]] ; then 406 | echo "Connection error at ${DATABASE}" 407 | fi 408 | elif [[ ${SELECT_ERROR} != "" ]] ; then 409 | LONG_OUTPUT="${LONG_OUTPUT}-${RESULT}" 410 | unset RESULT 411 | if [[ ${VERBOSE} -ge 2 ]] ; then 412 | echo "Lack of permissions at ${DATABASE}" 413 | fi 414 | else 415 | RESULT_ERROR=$(echo ${RESULT} | grep SQL) 416 | if [[ -z ${RESULT_ERROR} ]] ; then 417 | # No error for the query result, thus break the while. 418 | CONT=false 419 | fi 420 | fi 421 | done < <(printf '%s\n' "${DB_LIST}" ) 422 | # There was not any database connection. Empty result. 423 | if [[ -z ${RESULT} ]] ; then 424 | OUTPUT="No connection to the databases in the instance at ${INSTANCE_HOME}." 425 | OUTPUT="${OUTPUT} Or the user does not have the required authorization" 426 | OUTPUT="${OUTPUT} to perform the query in any database." 427 | RETURN=${UNKNOWN} 428 | else 429 | while read -r LINE ; do 430 | NODE=$(echo ${LINE} | awk '{print $1}') 431 | MAX=$(echo ${LINE} | awk '{print $2}') 432 | CURRENT=$(echo ${LINE} | awk '{print $3}') 433 | if [[ ${LIMIT} == true ]] ; then 434 | if [[ ${CURRENT} -lt ${WARNING_THRES} ]] ; then 435 | OUTPUT="${OUTPUT} OK for node ${NODE}." 436 | if [[ ${RETURN} -gt ${OK} ]] ; then 437 | RETURN=${RETURN} 438 | else 439 | RETURN=${OK} 440 | fi 441 | elif [[ ${CURRENT} -lt ${CRITICAL_THRES} ]] ; then 442 | OUTPUT="${OUTPUT} Memory usage is increasing at node ${NODE}." 443 | if [[ ${RETURN} -gt ${WARNING} ]] ; then 444 | RETURN=${RETURN} 445 | else 446 | RETURN=${WARNING} 447 | fi 448 | else 449 | OUTPUT="${OUTPUT} Memory usage is critical at node ${NODE}." 450 | RETURN=${CRITICAL} 451 | fi 452 | fi 453 | if [[ ${LIMIT} == false ]] ; then 454 | OUTPUT="OK, memory usage is automatically." 455 | RETURN=${OK} 456 | fi 457 | LONG_OUTPUT="${LONG_OUTPUT} At node ${NODE} the max usage memory is ${MAX}MB and the current usage memory ${CURRENT}MB." 458 | PERFORMANCE="${PERFORMANCE} 'Current_memory_at_node_${NODE}'=${CURRENT}MB" 459 | LONG_PERFORMANCE="${LONG_PERFORMANCE} 'Max_memory_at_node_${NODE}'=${MAX}MB" 460 | if [[ ${PERF_MK} == "-" ]] ; then 461 | PERF_MK="'Current_memory_at_node_${NODE}'=${CURRENT}MB|'Max_memory_at_node_${NODE}'=${MAX}MB" 462 | else 463 | PERF_MK="'Current_memory_at_node_${NODE}'=${CURRENT}MB|${PERy_MK}|'Max_memory_at_node_${NODE}'=${MAX}MB" 464 | fi 465 | done < <(printf '%s\n' "${RESULT}" ) 466 | fi 467 | fi 468 | 469 | # Prints the output. 470 | if [[ ${OUTPUT} == "" ]] ; then 471 | OUTPUT="Note: The test was not executed." 472 | fi 473 | # Builds the output. 474 | if [[ ${CHECK_MK} == true ]] ; then 475 | echo "${RETURN} instanceMemory-${INSTANCE_NAME} ${PERF_MK} ${OUTPUT}" 476 | else 477 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 478 | fi 479 | # Returns the error code. 480 | if [[ ${VERBOSE} -ge 2 ]] ; then 481 | echo "Return code: ${RETURN}" 482 | fi 483 | if [[ ${TRACE} == true ]] ; then 484 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 485 | date >> ${LOG} 486 | echo -e "<<<<<\n" >> ${LOG} 487 | fi 488 | 489 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 490 | rm ${LOCK_FILE} 491 | fi 492 | 493 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 494 | 495 | exit ${RETURN} 496 | 497 | -------------------------------------------------------------------------------- /check_instance_up: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # This script queries the status of the instance by retrieving the port number 22 | # or service name from the configuration, and then querying netstat to check if 23 | # there is an associated service with that port. 24 | # 25 | # It is necessary to execute this script with a user with at least SYSMON 26 | # authority, in order to retrieve all the necessary information. If not, the 27 | # script will produce a critical alert indicating that the PID of the instance 28 | # cannot be retrieved. 29 | # 30 | # This script was based on the plugins written by Felipe Alkain de Souza. 31 | # http://www.dbatodba.com/db2/scripts-db2/nagios-plugins 32 | # 33 | # In order to be used in Nagios, you have to configure the following. 34 | # 35 | # # 'check_instance_up' command definition 36 | # define command { 37 | # command_name check_instance_up 38 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_instance_up -i '$ARG1$'" 39 | # } 40 | # define service{ 41 | # host_name db2server 42 | # service_description Instance status 43 | # check_command check_instance_up!/home/db2inst1 44 | # use generic-service 45 | # } 46 | # 47 | # When using NRPE: 48 | # 49 | # In the database server: 50 | # 51 | # command[check_instance_up]=/home/nagios/scripts/check_instance_up -i /home/db2inst1 52 | # 53 | # Then modify the path according to your script location and instance home dir. 54 | # 55 | # In the Nagios server: 56 | # 57 | # define service{ 58 | # host_name db2server 59 | # service_description Instance status 60 | # check_command check_nrpe_1arg!check_instance_up 61 | # use generic-service 62 | # } 63 | # 64 | # It is necessary to execute this script with a user with at least SYSMON authority, in 65 | # order to execute db2pd. 66 | # 67 | # Parameters: 68 | # -h | --help : Show the help of this script. 69 | # -i | --instance : Home directory of the instance. Usually it is 70 | # /home/db2inst1. 71 | # -K | --mk : Change the output for Check_MK. 72 | # -T | --trace : Trace mode. Writes output in /tmp. 73 | # -v | --verbose : Execute the program in verbose mode. 74 | # -V | --version : Show the current version of this script. 75 | # 76 | # In AIX, the long name options are not supported. 77 | # 78 | # The exit codes are the standard for Nagios. 79 | # 80 | # 0 The plugin was able to check the service and it appeared to be functioning 81 | # properly. 82 | # 1 The plugin was able to check the service, but it appeared to be above some 83 | # "warning" threshold or did not appear to be working properly. 84 | # 2 The plugin detected that either the service was not running or it was above 85 | # some "critical" threshold. 86 | # 3 Invalid command line arguments were supplied to the plugin or low-level 87 | # failures internal to the plugin (such as unable to fork, or open a tcp 88 | # socket) that prevent it from performing the specified operation. 89 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 90 | # are outside of the control of plugins and should generally NOT be reported 91 | # as UNKNOWN states. 92 | # 93 | # Author: Andres Gomez Casanova 94 | # Version: v1.1 2015-10-15 95 | 96 | # Flag for debugging. 97 | #set -xv 98 | 99 | # Locale to print messages in English. Prevent language problems. 100 | export LANG=en_US 101 | 102 | # Version of this script. 103 | function print_revision { 104 | echo Andres Gomez Casanova - AngocA 105 | echo v1.1 2015-10-15 106 | } 107 | # Function to show the help 108 | function print_usage { 109 | /bin/cat <<__EOT 110 | Usage: ${1} { -i instanceHomeDirectory [-K]| -h | -V }[-T][-v]" 111 | __EOT 112 | } 113 | 114 | function print_help { 115 | print_revision 116 | print_usage ${1} 117 | # Max 80 chars width. 118 | /bin/cat <<__EOT 119 | ------------------------------------------------------------------------------- 120 | This script checks if the instance is active by querying netstats. 121 | -h | --help 122 | Shows the current documentation. 123 | -i | --instance STRING 124 | Instance home directory. It is usually /home/db2inst1. 125 | -K | --mk 126 | Changes the output to be compatible with Check_MK. 127 | -T | --trace 128 | Trace mode: writes date and output in /tmp. 129 | -v | --verbose 130 | Executes the script in verbose mode (multiple times). 131 | -V | --version 132 | Shows the current version of this script. 133 | 134 | In AIX, the long name options are not supported. 135 | __EOT 136 | } 137 | 138 | # Variable to control the flow execution. Prevent Spaghetti code. 139 | CONTINUE=true 140 | 141 | # Nagios return codes 142 | OK=0 143 | WARNING=1 144 | CRITICAL=2 145 | UNKNOWN=3 146 | # This is the returned code. 147 | RETURN=${UNKNOWN} 148 | 149 | # Nagios Output 150 | # Text output 80 chars | Optional Perf Data Line 1 151 | # Long text Line 1 152 | # Long text Line 2 | Optional Perf Data Line 2 153 | # Optional Perf Data Line 3 154 | OUTPUT= 155 | PERFORMANCE= 156 | LONG_OUTPUT= 157 | LONG_PERFORMANCE= 158 | PERF_MK="-" 159 | 160 | APPL_NAME=$(basename ${0}) 161 | 162 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 163 | 164 | # Checks the lock file does not exist. 165 | # The lock file is the way the command was called with its parameters 166 | # without spaces. 167 | COPY_ARGS=("${@}") 168 | LOCK_FILE= 169 | for VALUE in "${COPY_ARGS[@]}" ; do 170 | LOCK_FILE="${LOCK_FILE}${VALUE}" 171 | done 172 | LOCK_FILE=${LOCK_FILE//\//} 173 | LOCK_FILE=${LOCK_FILE//\\/} 174 | LOCK_FILE=${LOCK_FILE//\:/} 175 | LOCK_FILE=${LOCK_FILE//\*/} 176 | LOCK_FILE=${LOCK_FILE//\|/} 177 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 178 | if [[ ! -r ${LOCK_FILE} ]] ; then 179 | echo $$ > ${LOCK_FILE} 180 | LOCKED=true 181 | else 182 | # If it exist, then check if the process is running. 183 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 184 | # If process is not running, delete it. 185 | if [[ ${EXIST} == "" ]] ; then 186 | rm ${LOCK_FILE} 187 | if [[ ! -r ${LOCK_FILE} ]] ; then 188 | echo $$ > ${LOCK_FILE} 189 | LOCKED=true 190 | else 191 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 192 | CONTINUE=false 193 | RETURN=${UNKNOWN} 194 | fi 195 | else 196 | OUTPUT="An instance of the script with the same parameters is already running." 197 | CONTINUE=false 198 | RETURN=${UNKNOWN} 199 | fi 200 | fi 201 | 202 | if [[ ${#} -eq 0 ]] ; then 203 | print_usage ${APPL_NAME} 204 | RETURN=${UNKNOWN} 205 | CONTINUE=false 206 | fi 207 | 208 | # Checks the operating system. geopt works different in AIX than in Linux. 209 | OS=$(uname) 210 | 211 | if [[ "${OS:0:5}" == "Linux" ]] ; then 212 | # The following requires GNU getopt. See the following discussion. 213 | # http://stackoverflow.com/questions/402377 214 | 215 | TEMP=$(getopt -o hi:KTvV --long help,instance:,mk,trace,verbose,version \ 216 | -n ${APPL_NAME} -- "${@}") 217 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 218 | set -- $(getopt hi:KTvV ${*}) 219 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 220 | echo "This plugin is not yet supported in your platform." 221 | echo "Please create a ticket in GitHub if you want to enable your current platform." 222 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 223 | RETURN=${UNKNOWN} 224 | CONTINUE=false 225 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 226 | echo "This plugin is not supported in Cygwin" 227 | RETURN=${UNKNOWN} 228 | CONTINUE=false 229 | else 230 | echo "The platform is unknown: ${OS}" 231 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 232 | RETURN=${UNKNOWN} 233 | CONTINUE=false 234 | fi 235 | 236 | if [[ ${?} -ne 0 ]] ; then 237 | print_usage ${APPL_NAME} 238 | RETURN=${UNKNOWN} 239 | CONTINUE=false 240 | fi 241 | 242 | if [[ ${CONTINUE} == true ]] ; then 243 | if [[ "${OS}" == "Linux" ]] ; then 244 | # Note the quotes around ${TEMP}: they are essential! 245 | eval set -- "${TEMP}" 246 | fi 247 | HELP=false 248 | VERSION=false 249 | CHECK_MK=false 250 | # Verbosity level 251 | VERBOSE=0 252 | # Trace activated 253 | TRACE=false 254 | LOG=/tmp/${APPL_NAME}.log 255 | INSTANCE_HOME= 256 | if [[ "${OS:0:5}" == "Linux" ]] ; then 257 | while true; do 258 | case "${1}" in 259 | -h | --help ) HELP=true ; shift ;; 260 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 261 | -K | --mk ) CHECK_MK=true ; shift ;; 262 | -T | --trace ) TRACE=true ; shift ;; 263 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 264 | -V | --version ) VERSION=true ; shift ;; 265 | -- ) shift ; break ;; 266 | * ) break ;; 267 | esac 268 | done 269 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 270 | while [[ $1 != -- ]] ; do 271 | case "${1}" in 272 | -h) HELP=true ; shift ;; 273 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 274 | -K) CHECK_MK=true ; shift ;; 275 | -T) TRACE=true ; shift ;; 276 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 277 | -V) VERSION=true ; shift ;; 278 | * ) break ;; 279 | esac 280 | done 281 | fi 282 | fi 283 | 284 | if [[ ${TRACE} == true ]] ; then 285 | echo ">>>>>" >> ${LOG} 286 | date >> ${LOG} 287 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 288 | echo "PID ${$}" >> ${LOG} 289 | fi 290 | 291 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 292 | ECHO="${ECHO}, directory:${INSTANCE_HOME}" 293 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 294 | 295 | if [[ ${VERBOSE} -ge 2 ]] ; then 296 | echo ${ECHO} 297 | fi 298 | 299 | if [[ ${TRACE} == true ]] ; then 300 | echo "PARAMS:${ECHO}" >> ${LOG} 301 | fi 302 | 303 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 304 | print_help ${APPL_NAME} 305 | RETURN=${UNKNOWN} 306 | CONTINUE=false 307 | fi 308 | 309 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 310 | print_revision ${APPL_NAME} 311 | RETURN=${UNKNOWN} 312 | CONTINUE=false 313 | fi 314 | 315 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 316 | print_usage ${APPL_NAME} 317 | RETURN=${UNKNOWN} 318 | CONTINUE=false 319 | fi 320 | 321 | if [[ ${CONTINUE} == true ]] ; then 322 | # Trims the characters. 323 | INSTANCE_HOME=$(echo ${INSTANCE_HOME} | cut -d' ' -f1) 324 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 325 | # Load the DB2 profile. 326 | . ${INSTANCE_HOME}/sqllib/db2profile 327 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 328 | else 329 | OUTPUT="Instance directory is invalid." 330 | RETURN=${UNKNOWN} 331 | CONTINUE=false 332 | fi 333 | fi 334 | 335 | if [[ ${CONTINUE} == true ]] ; then 336 | COMMAND_PID="db2pd -edus" 337 | if [[ ${VERBOSE} -ge 2 ]] ; then 338 | echo "COMMAND: ${COMMAND_PID}" 339 | fi 340 | OUTPUT_PID=$(${COMMAND_PID} | grep 'db2sysc PID' | cut -d' ' -f3) 341 | if [[ ${VERBOSE} -ge 3 ]] ; then 342 | echo "Result: ${OUTPUT_PID}" 343 | fi 344 | 345 | COMMAND_CONF="db2 get dbm cfg" 346 | if [[ ${VERBOSE} -ge 2 ]] ; then 347 | echo "COMMAND: ${COMMAND_CONF}" 348 | fi 349 | OUTPUT_CONF=$(${COMMAND_CONF}) 350 | # Gets the service name configure in the instance. It could be a text or a number. 351 | SVCNAME=$(printf '%s\n' "${OUTPUT_CONF}" | grep SVCENAME | cut -d= -f2 | sed "s/^[ \t]*//") 352 | # Copies the value in portnum, but checks later. 353 | PORTNUM=${SVCNAME} 354 | 355 | if ! [[ ${SVCNAME} =~ ^[0-9]+$ ]] ; then 356 | # SVCNAME is a text, thus replace portnum. 357 | REGEXP="^${SVCNAME}\s" 358 | PORTNUM=$(grep -P ${REGEXP} /etc/services | awk '{print $2}' | cut -d/ -f1) 359 | fi 360 | 361 | MAX_DB=$(printf '%s\n' "${OUTPUT_CONF}" | awk '/NUMDB/ {print $9}') 362 | if [[ ${VERBOSE} -ge 3 ]] ; then 363 | echo "Result: ${MAX_DB}" 364 | fi 365 | 366 | # Checks if the port has a active service associated. 367 | if [[ ${PORTNUM} != "" ]] ; then 368 | if [[ ${VERBOSE} -ge 2 ]] ; then 369 | echo "COMMAND: netstat -na | grep ${PORTNUM} | wc -l" 370 | fi 371 | PROC=$(netstat -na | grep ${PORTNUM} | wc -l) 372 | PROC=$(echo ${PROC}) 373 | if [[ ${VERBOSE} -ge 3 ]] ; then 374 | echo "Result: ${PROC}" 375 | fi 376 | fi 377 | 378 | if [[ ${OUTPUT_PID} == "" || ! ${OUTPUT_PID} =~ ^[0-9]+$ ]] ; then 379 | OUTPUT="Instance is not started (not listed in EDUs). " 380 | PERFORMANCE="'Started_Instance'=0.1;;0.4" 381 | RETURN=${UNKNOWN} 382 | else 383 | OUTPUT="PID: ${OUTPUT_PID}. " 384 | fi 385 | if [[ ${SVCNAME} == "" ]] ; then 386 | OUTPUT="${OUTPUT}TCP port not defined or instance output is invalid." 387 | PERFORMANCE="'Started_Instance'=0.1;;0.4" 388 | RETURN=${CRITICAL} 389 | elif [[ ${PORTNUM} == "" || ! ${PORTNUM} =~ ^[0-9]+$ ]] ; then 390 | OUTPUT="${OUTPUT}Problem retrieving port number." 391 | PERFORMANCE="'Started_Instance'=0.2;;0.4" 392 | RETURN=${CRITICAL} 393 | elif [[ ! ${PROC} =~ ^[0-9]+$ ]] ; then 394 | OUTPUT="${OUTPUT}Weird situation." 395 | PERFORMANCE="'Started_Instance'=0.0;;0.4" 396 | RETURN=${UNKNOWN} 397 | else 398 | if [[ ${PROC} -eq "0" ]] ; then 399 | OUTPUT="Instance at ${INSTANCE_HOME} is down." 400 | PERFORMANCE="'Started_Instance'=0.3;;0.4" 401 | RETURN=${CRITICAL} 402 | elif [[ ! ${OUTPUT_PID} =~ ^[0-9]+$ ]] ; then 403 | OUTPUT="Instance at ${INSTANCE_HOME} is up but the PID is unknown" 404 | PERFORMANCE="'Started_Instance'=${MAX_DB};;0.4" 405 | RETURN=${WARNING} 406 | else 407 | OUTPUT="Instance at ${INSTANCE_HOME} is up (PID ${OUTPUT_PID})." 408 | PERFORMANCE="'Started_Instance'=${MAX_DB};;0.4" 409 | RETURN=${OK} 410 | fi 411 | fi 412 | 413 | COMMAND_ACTIVE="db2 list active databases" 414 | if [[ ${VERBOSE} -ge 2 ]] ; then 415 | echo "Command: ${COMMAND_ACTIVE}" 416 | fi 417 | 418 | ACTIVE=$(${COMMAND_ACTIVE} | grep 'Database name' | wc -l) 419 | if [[ ${VERBOSE} -ge 2 ]] ; then 420 | echo "Result: ${ACTIVE}" 421 | fi 422 | 423 | LONG_OUTPUT="There are $(echo ${ACTIVE}) active databases" 424 | LONG_PERFORMANCE="'Active_Databases'=$(echo ${ACTIVE})" 425 | PERF_MK="${PERFORMANCE}|${LONG_PERFORMANCE}" 426 | fi 427 | 428 | # Prints the output. 429 | if [[ ${OUTPUT} == "" ]] ; then 430 | OUTPUT="Note: The test was not executed." 431 | fi 432 | # Builds the output. 433 | if [[ ${CHECK_MK} == true ]] ; then 434 | echo "${RETURN} instanceUp-${INSTANCE_NAME} ${PERF_MK} ${OUTPUT}" 435 | else 436 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 437 | fi 438 | # Returns the error code. 439 | if [[ ${VERBOSE} -ge 2 ]] ; then 440 | echo "Return code: ${RETURN}" 441 | fi 442 | if [[ ${TRACE} == true ]] ; then 443 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 444 | date >> ${LOG} 445 | echo -e "<<<<<\n" >> ${LOG} 446 | fi 447 | 448 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 449 | rm ${LOCK_FILE} 450 | fi 451 | 452 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 453 | 454 | exit ${RETURN} 455 | 456 | -------------------------------------------------------------------------------- /check_lock_wait: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # This script checks for long lock waits in the database. This plugin only 22 | # works in DB2 10.5 due to the MON_GET_DATABASE table function; this table 23 | # function does not exist in previous versions. 24 | # 25 | # This script was based on the plugins written by Felipe Alkain de Souza. 26 | # http://www.dbatodba.com/db2/scripts-db2/nagios-plugins 27 | # 28 | # In order to be used in Nagios, you have to configure the following. 29 | # 30 | # # 'check_lock_wait' command definition 31 | # define command { 32 | # command_name check_lock_wait 33 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_lock_wait -i '$ARG1$' -d '$ARG2$'" 34 | # } 35 | # define service{ 36 | # host_name db2server 37 | # service_description Long lock wait 38 | # check_command check_lock_wait!/home/db2inst1!sample 39 | # use generic-service 40 | # } 41 | # 42 | # When using NRPE: 43 | # 44 | # In the database server: 45 | # 46 | # command[check_lock_wait]=/home/nagios/scripts/check_lock-wait -i /home/db2inst1 -d sample 47 | # 48 | # Then modify the path according to your script location, instance home dir, 49 | # database and thresholds. 50 | # 51 | # In the Nagios server: 52 | # 53 | # define service{ 54 | # host_name db2server 55 | # service_description Long lock wait 56 | # check_command check_nrpe_1arg!check_lock_wait 57 | # use generic-service 58 | # } 59 | # 60 | # Parameters: 61 | # -c | --critical : Critical threshold in milliseconds. 62 | # -d | --database : Database name. 63 | # -h | --help : Show the help of this script. 64 | # -i | --instance : Home directory of the instance. Usually it is 65 | # /home/db2inst1. 66 | # -K | --mk : Change the output for Check_MK. 67 | # -T | --trace : Trace mode. Writes output in /tmp. 68 | # -u | --update : Update monitor switch before executing the command. It means 69 | # that it does not inherit values from DBM configuration. 70 | # -v | --verbose : Execute the program in verbose mode. 71 | # -V | --version : Show the current version of this script. 72 | # -w | --warning : Warning threshold in milliseconds. 73 | # 74 | # In AIX, the long name options are not supported. 75 | # 76 | # The exit codes are the standard for Nagios. 77 | # 78 | # 0 The plugin was able to check the service and it appeared to be functioning 79 | # properly. 80 | # 1 The plugin was able to check the service, but it appeared to be above some 81 | # "warning" threshold or did not appear to be working properly. 82 | # 2 The plugin detected that either the service was not running or it was above 83 | # some "critical" threshold. 84 | # 3 Invalid command line arguments were supplied to the plugin or low-level 85 | # failures internal to the plugin (such as unable to fork, or open a tcp 86 | # socket) that prevent it from performing the specified operation. 87 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 88 | # are outside of the control of plugins and should generally NOT be reported 89 | # as UNKNOWN states. 90 | # 91 | # Author: Andres Gomez Casanova 92 | # Version: v1.2 2015-10-15 93 | 94 | # Flag for debugging. 95 | #set -xv 96 | 97 | # Locale to print messages in English. Prevent language problems. 98 | export LANG=en_US 99 | 100 | # Version of this script. 101 | function print_revision { 102 | echo Andres Gomez Casanova - AngocA 103 | echo v1.2 2015-10-15 104 | } 105 | # Function to show the help 106 | function print_usage { 107 | /bin/cat <<__EOT 108 | Usage: ${1} { -i instanceHomeDirectory -d databaseName [-c][-w][-K] 109 | [--update] | -h | -V }[-T][-v] 110 | __EOT 111 | } 112 | 113 | function print_help { 114 | print_revision 115 | print_usage ${1} 116 | # Max 80 chars width. 117 | /bin/cat <<__EOT 118 | ------------------------------------------------------------------------------- 119 | Checks for long lock waits in the database. 120 | -c | --critical INTEGER 121 | Quantity of milliseconds to consider a lock-wait as critical. 122 | Default 60000. 123 | -d | --database STRING 124 | Database name. 125 | -h | --help 126 | Shows the current documentation. 127 | -i | --instance STRING 128 | Instance home directory. It is usually /home/db2inst1. 129 | -K | --mk 130 | Changes the output to be compatible with Check_MK 131 | -T | --trace 132 | Trace mode: writes date and output in /tmp. 133 | -u | --update 134 | Update the monitor switch for lock before query. 135 | -v | --verbose 136 | Executes the script in verbose mode (multiple times). 137 | -V | --version 138 | Shows the current version of this script. 139 | -w | --warning INTEGER 140 | Quantity of milliseconds to consider a lock-wait as warning. 141 | Default 10000. 142 | 143 | In AIX, the long name options are not supported. This script needs DB2 V10.5. 144 | __EOT 145 | } 146 | 147 | # Variable to control the flow execution. Prevent Spaghetti code. 148 | CONTINUE=true 149 | 150 | # Nagios return codes 151 | OK=0 152 | WARNING=1 153 | CRITICAL=2 154 | UNKNOWN=3 155 | # This is the returned code. 156 | RETURN=${UNKNOWN} 157 | 158 | # Nagios Output 159 | # Text output 80 chars | Optional Perf Data Line 1 160 | # Long text Line 1 161 | # Long text Line 2 | Optional Perf Data Line 2 162 | # Optional Perf Data Line 3 163 | OUTPUT= 164 | PERFORMANCE= 165 | LONG_OUTPUT= 166 | LONG_PERFORMANCE= 167 | PERF_MK="-" 168 | 169 | APPL_NAME=$(basename ${0}) 170 | 171 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 172 | 173 | # Checks the lock file does not exist. 174 | # The lock file is the way the command was called with its parameters 175 | # without spaces. 176 | COPY_ARGS=("${@}") 177 | LOCK_FILE= 178 | for VALUE in "${COPY_ARGS[@]}" ; do 179 | LOCK_FILE="${LOCK_FILE}${VALUE}" 180 | done 181 | LOCK_FILE=${LOCK_FILE//\//} 182 | LOCK_FILE=${LOCK_FILE//\\/} 183 | LOCK_FILE=${LOCK_FILE//\:/} 184 | LOCK_FILE=${LOCK_FILE//\*/} 185 | LOCK_FILE=${LOCK_FILE//\|/} 186 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 187 | if [[ ! -r ${LOCK_FILE} ]] ; then 188 | echo $$ > ${LOCK_FILE} 189 | LOCKED=true 190 | else 191 | # If it exist, then check if the process is running. 192 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 193 | # If process is not running, delete it. 194 | if [[ ${EXIST} == "" ]] ; then 195 | rm ${LOCK_FILE} 196 | if [[ ! -r ${LOCK_FILE} ]] ; then 197 | echo $$ > ${LOCK_FILE} 198 | LOCKED=true 199 | else 200 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 201 | CONTINUE=false 202 | RETURN=${UNKNOWN} 203 | fi 204 | else 205 | OUTPUT="An instance of the script with the same parameters is already running." 206 | CONTINUE=false 207 | RETURN=${UNKNOWN} 208 | fi 209 | fi 210 | 211 | if [[ ${#} -eq 0 ]] ; then 212 | print_usage ${APPL_NAME} 213 | RETURN=${UNKNOWN} 214 | CONTINUE=false 215 | fi 216 | 217 | # Checks the operating system. geopt works different in AIX than in Linux. 218 | OS=$(uname) 219 | 220 | if [[ "${OS:0:5}" == "Linux" ]] ; then 221 | # The following requires GNU getopt. See the following discussion. 222 | # http://stackoverflow.com/questions/402377 223 | 224 | TEMP=$(getopt -o c:d:hi:KTuvVw: --long critical:,database:,help,instance:,mk,trace,update,verbose,version,warning: \ 225 | -n ${APPL_NAME} -- "${@}") 226 | 227 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 228 | set -- $(getopt c:d:hi:KTuvVw: ${*}) 229 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 230 | echo "This plugin is not yet supported in your platform." 231 | echo "Please create a ticket in GitHub if you want to enable your current platform." 232 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 233 | RETURN=${UNKNOWN} 234 | CONTINUE=false 235 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 236 | echo "This plugin is not supported in Cygwin" 237 | RETURN=${UNKNOWN} 238 | CONTINUE=false 239 | else 240 | echo "The platform is unknown: ${OS}" 241 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 242 | RETURN=${UNKNOWN} 243 | CONTINUE=false 244 | fi 245 | 246 | if [[ ${?} -ne 0 ]] ; then 247 | print_usage ${APPL_NAME} 248 | RETURN=${UNKNOWN} 249 | CONTINUE=false 250 | fi 251 | 252 | if [[ ${CONTINUE} == true ]] ; then 253 | if [[ "${OS}" == "Linux" ]] ; then 254 | # Note the quotes around ${TEMP}: they are essential! 255 | eval set -- "${TEMP}" 256 | fi 257 | HELP=false 258 | VERSION=false 259 | CHECK_MK=false 260 | # Verbosity level 261 | VERBOSE=0 262 | # Trace activated 263 | TRACE=false 264 | LOG=/tmp/${APPL_NAME}.log 265 | INSTANCE_HOME= 266 | DATABASE_NAME= 267 | UPDATE_MON=false 268 | WARNING_THRES=10000 269 | CRITICAL_THRES=60000 270 | if [[ "${OS:0:5}" == "Linux" ]] ; then 271 | while true; do 272 | case "${1}" in 273 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 274 | -d | --database ) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 275 | -h | --help ) HELP=true ; shift ;; 276 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 277 | -K | --mk ) CHECK_MK=true ; shift ;; 278 | -T | --trace ) TRACE=true ; shift ;; 279 | -u | --update ) UPDATE_MON=true ; shift ;; 280 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 281 | -V | --version ) VERSION=true ; shift ;; 282 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 283 | -- ) shift ; break ;; 284 | * ) break ;; 285 | esac 286 | done 287 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 288 | while [[ $1 != -- ]] ; do 289 | case "${1}" in 290 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 291 | -d) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 292 | -h) HELP=true ; shift ;; 293 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 294 | -K) CHECK_MK=true ; shift ;; 295 | -T) TRACE=true ; shift ;; 296 | -u) UPDATE_MON=true ; shift ;; 297 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 298 | -V) VERSION=true ; shift ;; 299 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 300 | * ) break ;; 301 | esac 302 | done 303 | fi 304 | fi 305 | 306 | if [[ ${TRACE} == true ]] ; then 307 | echo ">>>>>" >> ${LOG} 308 | date >> ${LOG} 309 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 310 | echo "PID ${$}" >> ${LOG} 311 | fi 312 | 313 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 314 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, database:${DATABASE_NAME}" 315 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 316 | ECHO="${ECHO}, update:${UPDATE_MON}, check_mk:${CHECK_MK}" 317 | 318 | if [[ ${VERBOSE} -ge 2 ]] ; then 319 | echo ${ECHO} 320 | fi 321 | 322 | if [[ ${TRACE} == true ]] ; then 323 | echo "PARAMS:${ECHO}" >> ${LOG} 324 | fi 325 | 326 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 327 | print_help ${APPL_NAME} 328 | RETURN=${UNKNOWN} 329 | CONTINUE=false 330 | fi 331 | 332 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 333 | print_revision ${APPL_NAME} 334 | RETURN=${UNKNOWN} 335 | CONTINUE=false 336 | fi 337 | 338 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 339 | print_usage ${APPL_NAME} 340 | RETURN=${UNKNOWN} 341 | CONTINUE=false 342 | fi 343 | 344 | if [[ ${CONTINUE} == true && ${DATABASE_NAME} == "" ]] ; then 345 | print_usage ${APPL_NAME} 346 | RETURN=${UNKNOWN} 347 | CONTINUE=false 348 | fi 349 | 350 | if [[ ${CONTINUE} == true 351 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 352 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 353 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 354 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 355 | print_usage ${APPL_NAME} 356 | echo "Warning threshold should be less than critical threshold." 357 | echo "Threshold should be greater than 0." 358 | RETURN=${UNKNOWN} 359 | CONTINUE=false 360 | fi 361 | 362 | if [[ ${CONTINUE} == true ]] ; then 363 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 364 | # Load the DB2 profile. 365 | . ${INSTANCE_HOME}/sqllib/db2profile 366 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 367 | else 368 | OUTPUT="Instance directory is invalid." 369 | RETURN=${UNKNOWN} 370 | CONTINUE=false 371 | fi 372 | fi 373 | 374 | if [[ ${CONTINUE} == true ]] ; then 375 | COMMAND_DATABASE="db2 list db directory" 376 | if [[ ${VERBOSE} -ge 2 ]] ; then 377 | echo "COMMAND: ${COMMAND_DATABASE}" 378 | fi 379 | DATABASE=$(${COMMAND_DATABASE}) 380 | if [[ ${TRACE} == true ]] ; then 381 | echo "RESULT:'${DATABASE}'" >> ${LOG} 382 | fi 383 | DATABASE=$(printf '%s\n' "${DATABASE}" | awk '/Database alias/ {print $4}' | grep -iw ${DATABASE_NAME}) 384 | if [[ ${VERBOSE} -ge 3 ]] ; then 385 | echo "RESULT:'${DATABASE}'" 386 | fi 387 | if [[ ${DATABASE} == "" ]] ; then 388 | OUTPUT="The database ${DATABASE_NAME} is not catalogued." 389 | RETURN=${UNKNOWN} 390 | CONTINUE=false 391 | fi 392 | fi 393 | 394 | if [[ ${CONTINUE} == true ]] ; then 395 | COMMAND_LOCKS="db2 -x select rtrim(char(NUM_LOCKS_WAITING)), LOCK_WAIT_TIME from table(sysproc.MON_GET_DATABASE (-1)) as snapshot_database" 396 | if [[ ${VERBOSE} -ge 2 ]] ; then 397 | echo "COMMAND: ${COMMAND_LOCKS}" 398 | fi 399 | 400 | RESULT_LOCKS=$(db2 connect to ${DATABASE_NAME} > /dev/null ; ${COMMAND_LOCKS} ; db2 connect reset > /dev/null) 401 | if [[ ${TRACE} == true ]] ; then 402 | echo "RESULT:'${RESULT_LOCKS}'" >> ${LOG} 403 | fi 404 | if [[ ${VERBOSE} -ge 3 ]] ; then 405 | echo "RESULT:'${RESULT_LOCKS}'" 406 | fi 407 | # Check for errors 408 | CONN_ERROR=$(echo ${RESULT_LOCKS} | grep SQL1024) 409 | if [[ ${CONN_ERROR} != "" ]] ; then 410 | OUTPUT="Connection problem" 411 | RETURN=${UNKNOWN} 412 | CONTINUE=false 413 | fi 414 | CONN_ERROR=$(echo ${RESULT_LOCKS} | grep SQL0440) 415 | if [[ ${CONN_ERROR} != "" ]] ; then 416 | OUTPUT="Unsupported DB2 version." 417 | RETURN=${UNKNOWN} 418 | CONTINUE=false 419 | fi 420 | fi 421 | 422 | if [[ ${CONTINUE} == true ]] ; then 423 | LOCKS=$(echo ${RESULT_LOCKS} | awk '{print $1}') 424 | LOCK_WAIT=$(echo ${RESULT_LOCKS} | awk '{print $2'}) 425 | 426 | if [[ ${LOCK_WAIT} -gt ${CRITICAL_THRES} ]] ; then 427 | OUTPUT="There are ${LOCKS} application(s) in lock wait." 428 | RETURN=${CRITICAL} 429 | elif [[ ${LOCK_WAIT} -gt ${WARNING_THRES} ]] ; then 430 | OUTPUT="There are ${LOCKS} application(s) in lock wait." 431 | RETURN=${WARNING} 432 | else 433 | OUTPUT="OK. There could be short time locks." 434 | RETURN=${OK} 435 | fi 436 | PERFORMANCE="'Long_duration_locks'=${LOCK_WAIT};${WARNING_THRES};${CRITICAL_THRES}" 437 | LONG_PERFORMANCE="'All_locks'=${LOCKS}" 438 | PERF_MK="${PERFORMANCE}|${LONG_PERFORMANCE}" 439 | fi 440 | 441 | # Prints the output. 442 | if [[ ${OUTPUT} == "" ]] ; then 443 | OUTPUT="Note: The test was not executed." 444 | fi 445 | # Builds the output. 446 | if [[ ${CHECK_MK} == true ]] ; then 447 | echo "${RETURN} lockWait ${PERF_MK} ${OUTPUT}" 448 | else 449 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 450 | fi 451 | # Returns the error code. 452 | if [[ ${VERBOSE} -ge 2 ]] ; then 453 | echo "Return code: ${RETURN}" 454 | fi 455 | if [[ ${TRACE} == true ]] ; then 456 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 457 | date >> ${LOG} 458 | echo -e "<<<<<\n" >> ${LOG} 459 | fi 460 | 461 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 462 | rm ${LOCK_FILE} 463 | fi 464 | 465 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 466 | 467 | exit ${RETURN} 468 | 469 | -------------------------------------------------------------------------------- /check_log_consumption: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # This script helps to create a graph of how many transaction logs have been 22 | # used in the day, and it only retrieves performance data by returning always 23 | # OK if the value could be retrieved. It allows to identify the period of the 24 | # day when transaction consumes the most of transaction logs. 25 | # 26 | # This script was based on the plugins written by Felipe Alkain de Souza. 27 | # http://www.dbatodba.com/db2/scripts-db2/nagios-plugins 28 | # 29 | # In order to be used in Nagios, you have to configure the following. 30 | # 31 | # # 'check_log_consumption' command definition 32 | # define command { 33 | # command_name check_log_consumption 34 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_log_consumption -i '$ARG1$' -d '$ARG2$'" 35 | # } 36 | # define service{ 37 | # host_name db2server 38 | # service_description Log daily consumption 39 | # check_command check_log_consumption!/home/db2inst1!sample 40 | # use generic-service 41 | # } 42 | # 43 | # Parameters: 44 | # -d | --database : Database name. 45 | # -h | --help : Show the help of this script. 46 | # -i | --instance : Home directory of the instance. Usually it is 47 | # /home/db2inst1. 48 | # -K | --mk : Change the output for Check_MK. 49 | # -T | --trace : Trace mode. Writes output in /tmp. 50 | # -v | --verbose : Execute the program in verbose mode. 51 | # -V | --version : Show the current version of this script. 52 | # 53 | # In AIX, the long name options are not supported. 54 | # 55 | # The exit codes are the standard for Nagios. 56 | # 57 | # 0 The plugin was able to check the service and it appeared to be functioning 58 | # properly. 59 | # 1 The plugin was able to check the service, but it appeared to be above some 60 | # "warning" threshold or did not appear to be working properly. 61 | # 2 The plugin detected that either the service was not running or it was above 62 | # some "critical" threshold. 63 | # 3 Invalid command line arguments were supplied to the plugin or low-level 64 | # failures internal to the plugin (such as unable to fork, or open a tcp 65 | # socket) that prevent it from performing the specified operation. 66 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 67 | # are outside of the control of plugins and should generally NOT be reported 68 | # as UNKNOWN states. 69 | # 70 | # Author: Andres Gomez Casanova 71 | # Version: v1.1 2015-10-15 72 | 73 | # Flag for debugging. 74 | #set -xv 75 | 76 | # Locale to print messages in English. Prevent language problems. 77 | export LANG=en_US 78 | 79 | # Version of this script. 80 | function print_revision { 81 | echo Andres Gomez Casanova - AngocA 82 | echo v1.1 2015-10-15 83 | } 84 | # Function to show the help 85 | function print_usage { 86 | /bin/cat <<__EOT 87 | Usage: ${1} { -i instanceHomeDirectory -d databaseName [-K] | -h 88 | | -V }[-T][-v] 89 | __EOT 90 | } 91 | 92 | function print_help { 93 | print_revision 94 | print_usage ${1} 95 | # Max 80 chars width. 96 | /bin/cat <<__EOT 97 | ------------------------------------------------------------------------------- 98 | This script retrieves the size of the archive log in the day. 99 | This is only for performance data. 100 | -d | --database STRING 101 | Database name. 102 | -h | --help 103 | Shows the current documentation. 104 | -i | --instance STRING 105 | Instance home directory. It is usually /home/db2inst1. 106 | -K | --mk 107 | Changes the output to be compatible with Check_MK 108 | -T | --trace 109 | Trace mode: writes date and output in /tmp. 110 | -v | --verbose 111 | Executes the script in verbose mode (multiple times). 112 | -V | --version 113 | Shows the current version of this script. 114 | 115 | In AIX, the long name options are not supported. 116 | __EOT 117 | } 118 | 119 | # Variable to control the flow execution. Prevent Spaghetti code. 120 | CONTINUE=true 121 | 122 | # Nagios return codes 123 | OK=0 124 | WARNING=1 125 | CRITICAL=2 126 | UNKNOWN=3 127 | # This is the returned code. 128 | RETURN=${UNKNOWN} 129 | 130 | # Nagios Output 131 | # Text output 80 chars | Optional Perf Data Line 1 132 | # Long text Line 1 133 | # Long text Line 2 | Optional Perf Data Line 2 134 | # Optional Perf Data Line 3 135 | OUTPUT= 136 | PERFORMANCE= 137 | LONG_OUTPUT= 138 | LONG_PERFORMANCE= 139 | PERF_MK="-" 140 | 141 | APPL_NAME=$(basename ${0}) 142 | 143 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 144 | 145 | # Checks the lock file does not exist. 146 | # The lock file is the way the command was called with its parameters 147 | # without spaces. 148 | COPY_ARGS=("${@}") 149 | LOCK_FILE= 150 | for VALUE in "${COPY_ARGS[@]}" ; do 151 | LOCK_FILE="${LOCK_FILE}${VALUE}" 152 | done 153 | LOCK_FILE=${LOCK_FILE//\//} 154 | LOCK_FILE=${LOCK_FILE//\\/} 155 | LOCK_FILE=${LOCK_FILE//\:/} 156 | LOCK_FILE=${LOCK_FILE//\*/} 157 | LOCK_FILE=${LOCK_FILE//\|/} 158 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 159 | if [[ ! -r ${LOCK_FILE} ]] ; then 160 | echo $$ > ${LOCK_FILE} 161 | LOCKED=true 162 | else 163 | # If it exist, then check if the process is running. 164 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 165 | # If process is not running, delete it. 166 | if [[ ${EXIST} == "" ]] ; then 167 | rm ${LOCK_FILE} 168 | if [[ ! -r ${LOCK_FILE} ]] ; then 169 | echo $$ > ${LOCK_FILE} 170 | LOCKED=true 171 | else 172 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 173 | CONTINUE=false 174 | RETURN=${UNKNOWN} 175 | fi 176 | else 177 | OUTPUT="An instance of the script with the same parameters is already running." 178 | CONTINUE=false 179 | RETURN=${UNKNOWN} 180 | fi 181 | fi 182 | 183 | if [[ ${#} -eq 0 ]] ; then 184 | print_usage ${APPL_NAME} 185 | RETURN=${UNKNOWN} 186 | CONTINUE=false 187 | fi 188 | 189 | # Checks the operating system. geopt works different in AIX than in Linux. 190 | OS=$(uname) 191 | 192 | if [[ "${OS:0:5}" == "Linux" ]] ; then 193 | # The following requires GNU getopt. See the following discussion. 194 | # http://stackoverflow.com/questions/402377 195 | 196 | # TODO Add the arguments here. One char in -o, multiple char in -long. 197 | TEMP=$(getopt -o d:hi:KTvV --long database:,help,instance:,mk,trace,verbose,version \ 198 | -n ${APPL_NAME} -- "${@}") 199 | 200 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 201 | set -- $(getopt d:hi:KTvV ${*}) 202 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 203 | echo "This plugin is not yet supported in your platform." 204 | echo "Please create a ticket in GitHub if you want to enable your current platform." 205 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 206 | RETURN=${UNKNOWN} 207 | CONTINUE=false 208 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 209 | echo "This plugin is not supported in Cygwin" 210 | RETURN=${UNKNOWN} 211 | CONTINUE=false 212 | else 213 | echo "The platform is unknown: ${OS}" 214 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 215 | RETURN=${UNKNOWN} 216 | CONTINUE=false 217 | fi 218 | 219 | if [[ ${?} -ne 0 ]] ; then 220 | print_usage ${APPL_NAME} 221 | RETURN=${UNKNOWN} 222 | CONTINUE=false 223 | fi 224 | 225 | if [[ ${CONTINUE} == true ]] ; then 226 | if [[ "${OS}" == "Linux" ]] ; then 227 | # Note the quotes around ${TEMP}: they are essential! 228 | eval set -- "${TEMP}" 229 | fi 230 | HELP=false 231 | VERSION=false 232 | CHECK_MK=false 233 | # Verbosity level 234 | VERBOSE=0 235 | # Trace activated 236 | TRACE=false 237 | LOG=/tmp/${APPL_NAME}.log 238 | INSTANCE_HOME= 239 | DATABASE_NAME= 240 | if [[ "${OS:0:5}" == "Linux" ]] ; then 241 | while true; do 242 | case "${1}" in 243 | -d | --database ) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 244 | -h | --help ) HELP=true ; shift ;; 245 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 246 | -K | --mk ) CHECK_MK=true ; shift ;; 247 | -T | --trace ) TRACE=true ; shift ;; 248 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 249 | -V | --version ) VERSION=true ; shift ;; 250 | -- ) shift ; break ;; 251 | * ) break ;; 252 | esac 253 | done 254 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 255 | while [[ $1 != -- ]] ; do 256 | case "${1}" in 257 | -d) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 258 | -h) HELP=true ; shift ;; 259 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 260 | -K) CHECK_MK=true ; shift ;; 261 | -T) TRACE=true ; shift ;; 262 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 263 | -V) VERSION=true ; shift ;; 264 | * ) break ;; 265 | esac 266 | done 267 | fi 268 | fi 269 | 270 | if [[ ${TRACE} == true ]] ; then 271 | echo ">>>>>" >> ${LOG} 272 | date >> ${LOG} 273 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 274 | echo "PID ${$}" >> ${LOG} 275 | fi 276 | 277 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 278 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, database:${DATABASE_NAME}" 279 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 280 | 281 | if [[ ${VERBOSE} -ge 2 ]] ; then 282 | echo ${ECHO} 283 | fi 284 | 285 | if [[ ${TRACE} == true ]] ; then 286 | echo "PARAMS:${ECHO}" >> ${LOG} 287 | fi 288 | 289 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 290 | print_help ${APPL_NAME} 291 | RETURN=${UNKNOWN} 292 | CONTINUE=false 293 | fi 294 | 295 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 296 | print_revision ${APPL_NAME} 297 | RETURN=${UNKNOWN} 298 | CONTINUE=false 299 | fi 300 | 301 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 302 | print_usage ${APPL_NAME} 303 | RETURN=${UNKNOWN} 304 | CONTINUE=false 305 | fi 306 | 307 | if [[ ${CONTINUE} == true && ${DATABASE_NAME} == "" ]] ; then 308 | print_usage ${APPL_NAME} 309 | RETURN=${UNKNOWN} 310 | CONTINUE=false 311 | fi 312 | 313 | if [[ ${CONTINUE} == true ]] ; then 314 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 315 | # Load the DB2 profile. 316 | . ${INSTANCE_HOME}/sqllib/db2profile 317 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 318 | else 319 | OUTPUT="Instance directory is invalid." 320 | RETURN=${UNKNOWN} 321 | CONTINUE=false 322 | fi 323 | fi 324 | 325 | if [[ ${CONTINUE} == true ]] ; then 326 | COMMAND_DATABASE="db2 list db directory" 327 | if [[ ${VERBOSE} -ge 2 ]] ; then 328 | echo "COMMAND: ${COMMAND_DATABASE}" 329 | fi 330 | DATABASE=$(${COMMAND_DATABASE}) 331 | if [[ ${TRACE} == true ]] ; then 332 | echo "RESULT:'${DATABASE}'" >> ${LOG} 333 | fi 334 | DATABASE=$(printf '%s\n' "${DATABASE}" | awk '/Database alias/ {print $4}' | grep -iw ${DATABASE_NAME}) 335 | if [[ ${VERBOSE} -ge 3 ]] ; then 336 | echo "RESULT:'${DATABASE}'" 337 | fi 338 | if [[ ${DATABASE} == "" ]] ; then 339 | OUTPUT="The database ${DATABASE_NAME} is not catalogued." 340 | RETURN=${UNKNOWN} 341 | CONTINUE=false 342 | fi 343 | fi 344 | 345 | if [[ ${CONTINUE} == true ]] ; then 346 | COMMAND_HADR="db2pd -db ${DATABASE_NAME} -hadr" 347 | if [[ ${VERBOSE} -ge 2 ]] ; then 348 | echo "COMMAND: ${COMMAND_HADR}" 349 | fi 350 | RESULT_HARD=$(${COMMAND_HADR} | awk '/Standby/ && !/Active/ {print $1}') 351 | 352 | if [[ ${VERBOSE} -ge 3 ]] ; then 353 | echo "RESULT HADR:'${RESULT_HARD}'" 354 | fi 355 | 356 | if [[ ${RESULT_HARD} == "Standby" ]] ; then 357 | HADR=true 358 | fi 359 | 360 | if [[ ${HADR} == true ]] ; then 361 | OUTPUT="HADR does not perform archiving" 362 | RETURN=${OK} 363 | else 364 | OUTPUT="Archive logs counted" 365 | RETURN=${OK} 366 | 367 | COMMAND_DB_CFG="db2 get db cfg for ${DATABASE_NAME}" 368 | if [[ ${VERBOSE} -ge 2 ]] ; then 369 | echo "COMMAND: ${COMMAND_DB_CFG}" 370 | fi 371 | DISK=$(${COMMAND_DB_CFG} | awk '/LOGARCHMETH1/ {print $7}' | cut -d: -f1) 372 | if [ ${DISK} != "DISK" ] ; then 373 | OUTPUT="The script only checks with LOGARCHMETH1 as DISK" 374 | RETURN=${UNKNOWN} 375 | else 376 | ARCHIVE=$(${COMMAND_DB_CFG} | awk '/LOGARCHMETH1/ {print $7}' | cut -d: -f2) 377 | MONTH=$(date | awk '{print $2}') 378 | DAY=$(date | awk '{print $3}') 379 | ARCHIVE_SIZE=$(ls -lR ${ARCHIVE} | grep "${MONTH}\s*${DAY}" | awk '{sum +=$5}END{print sum/1024/1024}' | cut -d. -f1) 380 | if [[ ${VERBOSE} -ge 3 ]] ; then 381 | echo "RESULT ARCHIVE:'${ARCHIVE}'" 382 | echo "RESULT MONTH:'${MONTH}'" 383 | echo "RESULT DAY:'${DAY}'" 384 | echo "RESULT ARCHIVE_SIZE:'${ARCHIVE_SIZE}'" 385 | fi 386 | PERFORMANCE="'Size_archive_logs'=${ARCHIVE_SIZE} " 387 | PERF_MK="${PERFORMANCE}" 388 | fi 389 | fi 390 | fi 391 | 392 | # Prints the output. 393 | if [[ ${OUTPUT} == "" ]] ; then 394 | OUTPUT="Note: The test was not executed." 395 | fi 396 | # Builds the output. 397 | if [[ ${CHECK_MK} == true ]] ; then 398 | echo "${RETURN} logConsumption${TYPE}-${INSTANCE_NAME}-${DATABASE_NAME} ${PERF_MK} ${OUTPUT}" 399 | else 400 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 401 | fi 402 | # Returns the error code. 403 | if [[ ${VERBOSE} -ge 2 ]] ; then 404 | echo "Return code: ${RETURN}" 405 | fi 406 | if [[ ${TRACE} == true ]] ; then 407 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 408 | date >> ${LOG} 409 | echo -e "<<<<<\n" >> ${LOG} 410 | fi 411 | 412 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 413 | rm ${LOCK_FILE} 414 | fi 415 | 416 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 417 | 418 | exit ${RETURN} 419 | 420 | -------------------------------------------------------------------------------- /check_memory_usage: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # This script checks the usage memory: total, free, usage. This script is based 22 | # on many other script that already do that. 23 | # 24 | # Here comes the documentation 25 | # 26 | # TODO Nagios usage. 27 | # 28 | # TODO Nagios with NRPE. 29 | # 30 | # Parameters: 31 | # -c | --critical : Percentage of usage memory considered as critical. 32 | # -h | --help : Show the help of this script. 33 | # -i | --ignore : Ignore thresholds. 34 | # -K | --mk : Change the output for Check_MK. 35 | # -T | --trace : Trace mode. Writes output in /tmp. 36 | # -v | --verbose : Execute the program in verbose mode. 37 | # -V | --version : Show the current version of this script. 38 | # -w | --warning : Percentage of usage memory considered as warning. 39 | # 40 | # In AIX, the long name options are not supported. 41 | # 42 | # The exit codes are the standard for Nagios. 43 | # 44 | # 0 The plugin was able to check the service and it appeared to be functioning 45 | # properly. 46 | # 1 The plugin was able to check the service, but it appeared to be above some 47 | # "warning" threshold or did not appear to be working properly. 48 | # 2 The plugin detected that either the service was not running or it was above 49 | # some "critical" threshold. 50 | # 3 Invalid command line arguments were supplied to the plugin or low-level 51 | # failures internal to the plugin (such as unable to fork, or open a tcp 52 | # socket) that prevent it from performing the specified operation. 53 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 54 | # are outside of the control of plugins and should generally NOT be reported 55 | # as UNKNOWN states. 56 | # 57 | # Author: Andres Gomez Casanova 58 | # Version: v1.1 2015-10-15 59 | 60 | # Flag for debugging. 61 | #set -xv 62 | 63 | # Locale to print messages in English. Prevent language problems. 64 | export LANG=en_US 65 | 66 | # Version of this script. 67 | function print_revision { 68 | echo Andres Gomez Casanova - AngocA 69 | echo v1.1 2015-10-15 70 | } 71 | # Function to show the help 72 | function print_usage { 73 | /bin/cat <<__EOT 74 | Usage: ${1} { [-c][-w][-i][-K] | -h | -V } [-T][-v] 75 | __EOT 76 | } 77 | 78 | function print_help { 79 | print_revision 80 | print_usage ${1} 81 | # Max 80 chars width. 82 | /bin/cat <<__EOT 83 | ------------------------------------------------------------------------------- 84 | Checks the memory usage: total, free, usage. 85 | -c | --critical INTEGER 86 | Percentage of usage memory considered as critical. 87 | -h | --help 88 | Shows the current documentation. 89 | -i | --ignore 90 | Ignore thresholds. Always returns OK. 91 | -K | --mk 92 | Changes the output to be compatible with Check_MK. 93 | -T | --trace 94 | Trace mode: writes date and output in /tmp. 95 | -v | --verbose 96 | Executes the script in verbose mode (multiple times). 97 | -V | --version 98 | Shows the current version of this script. 99 | -w | --warning INTEGER 100 | Percentage of usage memory considered as warning. 101 | 102 | In AIX, the long name options are not supported. 103 | __EOT 104 | } 105 | 106 | # Variable to control the flow execution. Prevent Spaghetti code. 107 | CONTINUE=true 108 | 109 | # Nagios return codes 110 | OK=0 111 | WARNING=1 112 | CRITICAL=2 113 | UNKNOWN=3 114 | # This is the returned code. 115 | RETURN=${UNKNOWN} 116 | 117 | # Nagios Output 118 | # Text output 80 chars | Optional Perf Data Line 1 119 | # Long text Line 1 120 | # Long text Line 2 | Optional Perf Data Line 2 121 | # Optional Perf Data Line 3 122 | OUTPUT= 123 | PERFORMANCE= 124 | LONG_OUTPUT= 125 | LONG_PERFORMANCE= 126 | PERF_MK="-" 127 | 128 | APPL_NAME=$(basename ${0}) 129 | 130 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 131 | 132 | # Checks the lock file does not exist. 133 | # The lock file is the way the command was called with its parameters 134 | # without spaces. 135 | COPY_ARGS=("${@}") 136 | LOCK_FILE= 137 | for VALUE in "${COPY_ARGS[@]}" ; do 138 | LOCK_FILE="${LOCK_FILE}${VALUE}" 139 | done 140 | LOCK_FILE=${LOCK_FILE//\//} 141 | LOCK_FILE=${LOCK_FILE//\\/} 142 | LOCK_FILE=${LOCK_FILE//\:/} 143 | LOCK_FILE=${LOCK_FILE//\*/} 144 | LOCK_FILE=${LOCK_FILE//\|/} 145 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 146 | if [[ ! -r ${LOCK_FILE} ]] ; then 147 | echo $$ > ${LOCK_FILE} 148 | LOCKED=true 149 | else 150 | # If it exist, then check if the process is running. 151 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 152 | # If process is not running, delete it. 153 | if [[ ${EXIST} == "" ]] ; then 154 | rm ${LOCK_FILE} 155 | if [[ ! -r ${LOCK_FILE} ]] ; then 156 | echo $$ > ${LOCK_FILE} 157 | LOCKED=true 158 | else 159 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 160 | CONTINUE=false 161 | RETURN=${UNKNOWN} 162 | fi 163 | else 164 | OUTPUT="An instance of the script with the same parameters is already running." 165 | CONTINUE=false 166 | RETURN=${UNKNOWN} 167 | fi 168 | fi 169 | 170 | if [[ ${#} -eq 0 ]] ; then 171 | print_usage ${APPL_NAME} 172 | RETURN=${UNKNOWN} 173 | CONTINUE=false 174 | fi 175 | 176 | # Checks the operating system. geopt works different in AIX than in Linux. 177 | OS=$(uname) 178 | 179 | if [[ "${OS:0:5}" == "Linux" ]] ; then 180 | # The following requires GNU getopt. See the following discussion. 181 | # http://stackoverflow.com/questions/402377 182 | 183 | TEMP=$(getopt -o c:hiKTvVw: --long critical:,help,ignore,mk,trace,verbose,version,warning: \ 184 | -n ${APPL_NAME} -- "${@}") 185 | 186 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 187 | echo "This plugin is not supported in AIX" 188 | RETURN=${UNKNOWN} 189 | CONTINUE=false 190 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 191 | echo "This plugin is not yet supported in your platform." 192 | echo "Please create a ticket in GitHub if you want to enable your current platform." 193 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 194 | RETURN=${UNKNOWN} 195 | CONTINUE=false 196 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 197 | echo "This plugin is not supported in Cygwin" 198 | RETURN=${UNKNOWN} 199 | CONTINUE=false 200 | else 201 | echo "The platform is unknown: ${OS}" 202 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 203 | RETURN=${UNKNOWN} 204 | CONTINUE=false 205 | fi 206 | 207 | if [[ ${?} -ne 0 ]] ; then 208 | print_usage ${APPL_NAME} 209 | RETURN=${UNKNOWN} 210 | CONTINUE=false 211 | fi 212 | 213 | if [[ ${CONTINUE} == true ]] ; then 214 | if [[ "${OS}" == "Linux" ]] ; then 215 | # Note the quotes around ${TEMP}: they are essential! 216 | eval set -- "${TEMP}" 217 | fi 218 | HELP=false 219 | VERSION=false 220 | CHECK_MK=false 221 | # Verbosity level 222 | VERBOSE=0 223 | # Trace activated 224 | TRACE=false 225 | LOG=/tmp/${APPL_NAME}.log 226 | IGNORE_THRES=false 227 | WARNING_THRES=90 228 | CRITICAL_THRES=95 229 | if [[ "${OS:0:5}" == "Linux" ]] ; then 230 | while true; do 231 | case "${1}" in 232 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 233 | -h | --help ) HELP=true ; shift ;; 234 | -i | --ignore ) IGNORE_THRES=true ; shift ;; 235 | -K | --mk ) CHECK_MK=true ; shift ;; 236 | -T | --trace ) TRACE=true ; shift ;; 237 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 238 | -V | --version ) VERSION=true ; shift ;; 239 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 240 | -- ) shift ; break ;; 241 | * ) break ;; 242 | esac 243 | done 244 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 245 | while [[ $1 != -- ]] ; do 246 | case "${1}" in 247 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 248 | -h) HELP=true ; shift ;; 249 | -i) IGNORE_THRES=true ; shift ;; 250 | -K) CHECK_MK=true ; shift ;; 251 | -T) TRACE=true ; shift ;; 252 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 253 | -V) VERSION=true ; shift ;; 254 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 255 | * ) break ;; 256 | esac 257 | done 258 | fi 259 | fi 260 | 261 | if [[ ${TRACE} == true ]] ; then 262 | echo ">>>>>" >> ${LOG} 263 | date >> ${LOG} 264 | echo "PID ${$}" >> ${LOG} 265 | fi 266 | 267 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 268 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 269 | ECHO="${ECHO}, check_mk:${CHECK_MK}, ignore:${IGNORE_THRES}" 270 | 271 | if [[ ${VERBOSE} -ge 2 ]] ; then 272 | echo ${ECHO} 273 | fi 274 | 275 | if [[ ${TRACE} == true ]] ; then 276 | echo "PARAMS:${ECHO}" >> ${LOG} 277 | fi 278 | 279 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 280 | print_help ${APPL_NAME} 281 | RETURN=${UNKNOWN} 282 | CONTINUE=false 283 | fi 284 | 285 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 286 | print_revision ${APPL_NAME} 287 | RETURN=${UNKNOWN} 288 | CONTINUE=false 289 | fi 290 | 291 | if [[ ${CONTINUE} == true 292 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 293 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 294 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 295 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 296 | print_usage ${APPL_NAME} 297 | echo "Warning threshold should be less than critical threshold." 298 | echo "Threshold should be greater than 0." 299 | RETURN=${UNKNOWN} 300 | CONTINUE=false 301 | fi 302 | 303 | if [[ ${CONTINUE} == true ]] ; then 304 | TOTAL=$(grep '^MemTotal' /proc/meminfo | awk '{print $2}') 305 | FREE=$(grep '^MemFree' /proc/meminfo | awk '{print $2}') 306 | USAGE=$((${TOTAL} - ${FREE})) 307 | 308 | USAGE_PER=$((${USAGE} * 100 / ${TOTAL})) 309 | WARNING_VAL=$((${WARNING_THRES} * ${TOTAL} / 100)) 310 | CRITICAL_VAL=$((${CRITICAL_THRES} * ${TOTAL} / 100)) 311 | if [[ ${VERBOSE} -ge 1 ]] ; then 312 | echo "Percent '${USAGE_PER}%'" 313 | fi 314 | if [[ ${IGNORE_THRES} == true ]] ; then 315 | OUTPUT="Usage is ${USAGE_PER} (Thresholds ignored)" 316 | RETURN=${OK} 317 | elif [[ ${USAGE_PER} -lt ${WARNING_THRES} ]] ; then 318 | OUTPUT="Usage is normal (${USAGE_PER})." 319 | RETURN=${OK} 320 | elif [[ ${USAGE_PER} -lt ${CRITICAL_THRES} ]] ; then 321 | OUTPUT="Usage is intensive (${USAGE_PER})." 322 | RETURN=${WARNING} 323 | else 324 | OUTPUT="Usage is critical (${USAGE_PER})." 325 | RETURN=${CRITICAL} 326 | fi 327 | LONG_OUTPUT="The current memory usage is ${USAGE}KB of a total of ${TOTAL}KB (Free ${FREE})." 328 | PERFORMANCE="'Usage'=${USAGE}KB;${WARNING_VAL};${CRITICAL_VAL};;${TOTAL}" 329 | PERF_MK="${PERFORMANCE}" 330 | fi 331 | 332 | # Prints the output. 333 | if [[ ${OUTPUT} == "" ]] ; then 334 | OUTPUT="Note: The test was not executed." 335 | fi 336 | # Builds the output. 337 | if [[ ${CHECK_MK} == true ]] ; then 338 | echo "${RETURN} memoryUsage ${PERF_MK} ${OUTPUT}" 339 | else 340 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 341 | fi 342 | # Returns the error code. 343 | if [[ ${VERBOSE} -ge 2 ]] ; then 344 | echo "Return code: ${RETURN}" 345 | fi 346 | if [[ ${TRACE} == true ]] ; then 347 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 348 | date >> ${LOG} 349 | echo -e "<<<<<\n" >> ${LOG} 350 | fi 351 | 352 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 353 | rm ${LOCK_FILE} 354 | fi 355 | 356 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 357 | 358 | exit ${RETURN} 359 | 360 | -------------------------------------------------------------------------------- /check_mon_scripts_running: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # This script checks the quantity of scripts running on the server with the 22 | # same user id that this script runs, and that the program running starts with 23 | # 'check_'. This helps to control hung processes or long-running processes, 24 | # that sometimes could exhaust the server. 25 | # 26 | # The script does not need special permissions on the server, just list the 27 | # running processes. 28 | # 29 | # # 'check_running_scripts' command definition 30 | # define command { 31 | # command_name check_running_scripts 32 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -t 20 -C "scripts/check_running_scripts" 33 | # } 34 | # define service{ 35 | # host_name db2sever 36 | # service_description Check running monitor scripts 37 | # check_command check_running_scripts 38 | # use generic-service 39 | # } 40 | # 41 | # When using NRPE: 42 | # 43 | # In the database server: 44 | # 45 | # command[check_running_scripts]=/home/nagios/scripts/check_running_scripts 46 | # 47 | # Then modify the path according to your script location and thresholds. 48 | # 49 | # In the Nagios server: 50 | # 51 | # define service{ 52 | # host_name db2server 53 | # service_description Check running monitor scripts 54 | # check_command check_nrpe_1arg!check_running_scripts 55 | # use generic-service 56 | # } 57 | # 58 | # Parameters: 59 | # -c | --critical : Quantity of monitor scripts currently running that is 60 | # considered as critical. 61 | # -h | --help : Show the help of this script. 62 | # -K | --mk : Change the output for Check_MK. 63 | # -T | --trace : Trace mode. Writes output in /tmp. 64 | # -v | --verbose : Execute the program in verbose mode. 65 | # -V | --version : Show the current version of this script. 66 | # -w | --warning : Quantity of monitor scripts currently running that is 67 | # considered as warning. 68 | # 69 | # In AIX, the long name options are not supported. 70 | # 71 | # The exit codes are the standard for Nagios. 72 | # 73 | # 0 The plugin was able to check the service and it appeared to be functioning 74 | # properly. 75 | # 1 The plugin was able to check the service, but it appeared to be above some 76 | # "warning" threshold or did not appear to be working properly. 77 | # 2 The plugin detected that either the service was not running or it was above 78 | # some "critical" threshold. 79 | # 3 Invalid command line arguments were supplied to the plugin or low-level 80 | # failures internal to the plugin (such as unable to fork, or open a tcp 81 | # socket) that prevent it from performing the specified operation. 82 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 83 | # are outside of the control of plugins and should generally NOT be reported 84 | # as UNKNOWN states. 85 | # 86 | # Author: Andres Gomez Casanova 87 | # Version: v1.2 2015-10-15 88 | 89 | # Flag for debugging. 90 | #set -xv 91 | 92 | # Locale to print messages in English. Prevent language problems. 93 | export LANG=en_US 94 | 95 | # Version of this script. 96 | function print_revision { 97 | echo Andres Gomez Casanova - AngocA 98 | echo v1.0 2015-10-15 99 | } 100 | # Function to show the help 101 | function print_usage { 102 | /bin/cat <<__EOT 103 | Usage: ${1} { [-c][-w][-K] | -h | -V } [-T][-v] 104 | __EOT 105 | } 106 | 107 | function print_help { 108 | print_revision 109 | print_usage ${1} 110 | # Max 80 chars width. 111 | /bin/cat <<__EOT 112 | ------------------------------------------------------------------------------- 113 | Checks the quantity of monitor scripts currently running on the server. 114 | -c | --critical INTEGER 115 | Quantity of running scripts to be considered as critical. 116 | Default 20. 117 | -h | --help 118 | Shows the current documentation. 119 | -K | --mk 120 | Changes the output to be compatible with Check_MK. 121 | -T | --trace 122 | Trace mode: writes date and output in /tmp. 123 | -v | --verbose 124 | Executes the script in verbose mode (multiple times). 125 | -V | --version 126 | Shows the current version of this script. 127 | -w | --warning INTEGER 128 | Quantity of running scripts to be considered as critical. 129 | Default 15. 130 | 131 | In AIX, the long name options are not supported. 132 | __EOT 133 | } 134 | 135 | # Variable to control the flow execution. Prevent Spaghetti code. 136 | CONTINUE=true 137 | 138 | # Nagios return codes 139 | OK=0 140 | WARNING=1 141 | CRITICAL=2 142 | UNKNOWN=3 143 | # This is the returned code. 144 | RETURN=${UNKNOWN} 145 | 146 | # Nagios Output 147 | # Text output 80 chars | Optional Perf Data Line 1 148 | # Long text Line 1 149 | # Long text Line 2 | Optional Perf Data Line 2 150 | # Optional Perf Data Line 3 151 | OUTPUT= 152 | PERFORMANCE= 153 | LONG_OUTPUT= 154 | LONG_PERFORMANCE= 155 | PERF_MK="-" 156 | 157 | APPL_NAME=$(basename ${0}) 158 | 159 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 160 | 161 | # Checks the lock file does not exist. 162 | # The lock file is the way the command was called with its parameters 163 | # without spaces. 164 | COPY_ARGS=("${@}") 165 | LOCK_FILE= 166 | for VALUE in "${COPY_ARGS[@]}" ; do 167 | LOCK_FILE="${LOCK_FILE}${VALUE}" 168 | done 169 | LOCK_FILE=${LOCK_FILE//\//} 170 | LOCK_FILE=${LOCK_FILE//\\/} 171 | LOCK_FILE=${LOCK_FILE//\:/} 172 | LOCK_FILE=${LOCK_FILE//\*/} 173 | LOCK_FILE=${LOCK_FILE//\|/} 174 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 175 | if [[ ! -r ${LOCK_FILE} ]] ; then 176 | echo $$ > ${LOCK_FILE} 177 | LOCKED=true 178 | else 179 | # If it exist, then check if the process is running. 180 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 181 | # If process is not running, delete it. 182 | if [[ ${EXIST} == "" ]] ; then 183 | rm ${LOCK_FILE} 184 | if [[ ! -r ${LOCK_FILE} ]] ; then 185 | echo $$ > ${LOCK_FILE} 186 | LOCKED=true 187 | else 188 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 189 | CONTINUE=false 190 | RETURN=${UNKNOWN} 191 | fi 192 | else 193 | OUTPUT="An instance of the script with the same parameters is already running." 194 | CONTINUE=false 195 | RETURN=${UNKNOWN} 196 | fi 197 | fi 198 | 199 | # Checks the operating system. geopt works different in AIX than in Linux. 200 | OS=$(uname) 201 | 202 | if [[ "${OS:0:5}" == "Linux" ]] ; then 203 | # The following requires GNU getopt. See the following discussion. 204 | # http://stackoverflow.com/questions/402377 205 | 206 | TEMP=$(getopt -o c:hKTvVw: --long critical:,help,mk,trace,verbose,version,warning: \ 207 | -n ${APPL_NAME} -- "${@}") 208 | 209 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 210 | set -- $(getopt c:hKTvVw: ${*}) 211 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 212 | echo "This plugin is not yet supported in your platform." 213 | echo "Please create a ticket in GitHub if you want to enable your current platform." 214 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 215 | RETURN=${UNKNOWN} 216 | CONTINUE=false 217 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 218 | echo "This plugin is not supported in Cygwin" 219 | RETURN=${UNKNOWN} 220 | CONTINUE=false 221 | else 222 | echo "The platform is unknown: ${OS}" 223 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 224 | RETURN=${UNKNOWN} 225 | CONTINUE=false 226 | fi 227 | 228 | if [[ ${?} -ne 0 ]] ; then 229 | print_usage ${APPL_NAME} 230 | RETURN=${UNKNOWN} 231 | CONTINUE=false 232 | fi 233 | 234 | if [[ ${CONTINUE} == true ]] ; then 235 | if [[ "${OS}" == "Linux" ]] ; then 236 | # Note the quotes around ${TEMP}: they are essential! 237 | eval set -- "${TEMP}" 238 | fi 239 | HELP=false 240 | VERSION=false 241 | CHECK_MK=false 242 | # Verbosity level 243 | VERBOSE=0 244 | # Trace activated 245 | TRACE=false 246 | LOG=/tmp/${APPL_NAME}.log 247 | WARNING_THRES=15 248 | CRITICAL_THRES=20 249 | if [[ "${OS:0:5}" == "Linux" ]] ; then 250 | while true; do 251 | case "${1}" in 252 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 253 | -h | --help ) HELP=true ; shift ;; 254 | -K | --mk ) CHECK_MK=true ; shift ;; 255 | -T | --trace ) TRACE=true ; shift ;; 256 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 257 | -V | --version ) VERSION=true ; shift ;; 258 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 259 | -- ) shift ; break ;; 260 | * ) break ;; 261 | esac 262 | done 263 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 264 | while [[ $1 != -- ]] ; do 265 | case "${1}" in 266 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 267 | -h) HELP=true ; shift ;; 268 | -K) CHECK_MK=true ; shift ;; 269 | -T) TRACE=true ; shift ;; 270 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 271 | -V) VERSION=true ; shift ;; 272 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 273 | * ) break ;; 274 | esac 275 | done 276 | fi 277 | fi 278 | 279 | if [[ ${TRACE} == true ]] ; then 280 | echo ">>>>>" >> ${LOG} 281 | date >> ${LOG} 282 | echo "PID ${$}" >> ${LOG} 283 | fi 284 | 285 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 286 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 287 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 288 | 289 | if [[ ${VERBOSE} -ge 2 ]] ; then 290 | echo ${ECHO} 291 | fi 292 | 293 | if [[ ${TRACE} == true ]] ; then 294 | echo "PARAMS:${ECHO}" >> ${LOG} 295 | fi 296 | 297 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 298 | print_help ${APPL_NAME} 299 | RETURN=${UNKNOWN} 300 | CONTINUE=false 301 | fi 302 | 303 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 304 | print_revision ${APPL_NAME} 305 | RETURN=${UNKNOWN} 306 | CONTINUE=false 307 | fi 308 | 309 | if [[ ${CONTINUE} == true 310 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 311 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 312 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 313 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 314 | print_usage ${APPL_NAME} 315 | echo "Warning threshold should be less than critical threshold." 316 | echo "Threshold should be greater than 0." 317 | RETURN=${UNKNOWN} 318 | CONTINUE=false 319 | fi 320 | 321 | if [[ ${CONTINUE} == true ]] ; then 322 | COMMAND="ps -ef | grep -e '^ *${USER} ' | wc -l" 323 | if [[ ${VERBOSE} -ge 2 ]] ; then 324 | echo "COMMAND: ${COMMAND}" 325 | fi 326 | PROCESSES=$(eval ${COMMAND}) 327 | if [[ ${TRACE} == true ]] ; then 328 | echo "RESULT:'${PROCESSES}'" >> ${LOG} 329 | fi 330 | 331 | if [[ ${PROCESSES} -lt ${WARNING_THRES} ]] ; then 332 | OUTPUT="The quantity of monitor scripts currently running is OK" 333 | RETURN=${OK} 334 | elif [[ ${PROCESSES} -lt ${CRITICAL_THRES} ]] ; then 335 | OUTPUT="The quantity of monitor scripts currently running is getting big" 336 | RETURN=${WARNING} 337 | else 338 | OUTPUT="The quantity of monitor scripts currently running is critical" 339 | RETURN=${CRITICAL} 340 | fi 341 | # Performance data 342 | PERFORMANCE="'Monitor scripts running'=$(echo ${PROCESSES});${WARNING_THRES};${CRITICAL_THRES}" 343 | fi 344 | 345 | # Prints the output. 346 | if [[ ${OUTPUT} == "" ]] ; then 347 | OUTPUT="Note: The test was not executed." 348 | fi 349 | # Builds the output. 350 | if [[ ${CHECK_MK} == true ]] ; then 351 | echo "${RETURN} scriptsRunning${TYPE} ${PERF_MK} ${OUTPUT}" 352 | else 353 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 354 | fi 355 | # Returns the error code. 356 | if [[ ${VERBOSE} -ge 2 ]] ; then 357 | echo "Return code: ${RETURN}" 358 | fi 359 | if [[ ${TRACE} == true ]] ; then 360 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 361 | date >> ${LOG} 362 | echo -e "<<<<<\n" >> ${LOG} 363 | fi 364 | 365 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 366 | rm ${LOCK_FILE} 367 | fi 368 | 369 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 370 | 371 | exit ${RETURN} 372 | 373 | -------------------------------------------------------------------------------- /check_on_cluster: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # Wraps the execution of a check when using an active-passive cluster. When the 22 | # execution is on the passive cluster, it returns OK. In order to identify the 23 | # active or passive cluster it checks if a file system (directory) is present. 24 | # The wrapped command is called normally, with all parameters. 25 | # 26 | # In order to be used in Nagios, you have to configure the following. 27 | # (Example with check_instance_up) 28 | # 29 | # # 'check_instance_up' command definition 30 | # define command { 31 | # command_name check_connection_qty 32 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_on_cluster -f /data/db2 scripts/check_instance_up -i /home/db2inst1" 33 | # } 34 | # define service{ 35 | # host_name db2server 36 | # service_description Quantity of current connections 37 | # check_command check_instance_up 38 | # use generic-service 39 | # } 40 | # 41 | # When using NRPE (Example with check_instance_up): 42 | # 43 | # In the database server: 44 | # 45 | # command[check_instance_up]=/home/nagios/scripts/check_on_cluste -d /data/db2 check_instance_up -i /home/db2inst1 46 | # 47 | # Parameters: 48 | # -d : Directory presented only in the active node. 49 | # Followed by the set of parameters and the command to execute. 50 | # 51 | # The exit codes are the standard for Nagios. 52 | # 53 | # 0 The plugin was able to check the service and it appeared to be functioning 54 | # properly. 55 | # 1 The plugin was able to check the service, but it appeared to be above some 56 | # "warning" threshold or did not appear to be working properly. 57 | # 2 The plugin detected that either the service was not running or it was above 58 | # some "critical" threshold. 59 | # 3 Invalid command line arguments were supplied to the plugin or low-level 60 | # failures internal to the plugin (such as unable to fork, or open a tcp 61 | # socket) that prevent it from performing the specified operation. 62 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 63 | # are outside of the control of plugins and should generally NOT be reported 64 | # as UNKNOWN states. 65 | # 66 | # Author: Andres Gomez Casanova 67 | # Version: v1.0 2015-12-15 68 | 69 | # Flag for debugging. 70 | #set -xv 71 | 72 | # Locale to print messages in English. Prevent language problems. 73 | export LANG=en_US 74 | 75 | # Version of this script. 76 | function print_revision { 77 | echo Andres Gomez Casanova - AngocA 78 | echo v1.0 2015-12-15 79 | } 80 | # Function to show the help 81 | function print_usage { 82 | /bin/cat <<__EOT 83 | Usage: ${1} { -d directory commandWithParameters } 84 | __EOT 85 | } 86 | 87 | function print_help { 88 | print_revision 89 | print_usage ${1} 90 | # Max 80 chars width. 91 | /bin/cat <<__EOT 92 | ------------------------------------------------------------------------------- 93 | Wraps the execution of a command, useful when using an active-passive node. 94 | -d 95 | Directory to check if the current node is the active (if present) or the 96 | passive (not present). 97 | 98 | Followed by the command name and set of parameters to execute as the active 99 | node. 100 | __EOT 101 | } 102 | 103 | # Variable to control the flow execution. Prevent Spaghetti code. 104 | CONTINUE=true 105 | 106 | # Nagios return codes 107 | OK=0 108 | WARNING=1 109 | CRITICAL=2 110 | UNKNOWN=3 111 | # This is the returned code. 112 | RETURN=${UNKNOWN} 113 | 114 | # Nagios output 115 | OUTPUT= 116 | 117 | APPL_NAME=$(basename ${0}) 118 | 119 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 120 | 121 | # Checks the lock file does not exist. 122 | # The lock file is the way the command was called with its parameters 123 | # without spaces. 124 | COPY_ARGS=("${@}") 125 | PARAMS=("${@}") 126 | LOCK_FILE= 127 | for VALUE in "${COPY_ARGS[@]}" ; do 128 | LOCK_FILE="${LOCK_FILE}${VALUE}" 129 | done 130 | LOCK_FILE=${LOCK_FILE//\//} 131 | LOCK_FILE=${LOCK_FILE//\\/} 132 | LOCK_FILE=${LOCK_FILE//\:/} 133 | LOCK_FILE=${LOCK_FILE//\*/} 134 | LOCK_FILE=${LOCK_FILE//\|/} 135 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 136 | if [[ ! -r ${LOCK_FILE} ]] ; then 137 | echo $$ > ${LOCK_FILE} 138 | LOCKED=true 139 | else 140 | # If it exist, then check if the process is running. 141 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 142 | # If process is not running, delete it. 143 | if [[ ${EXIST} == "" ]] ; then 144 | rm ${LOCK_FILE} 145 | if [[ ! -r ${LOCK_FILE} ]] ; then 146 | echo $$ > ${LOCK_FILE} 147 | LOCKED=true 148 | else 149 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 150 | CONTINUE=false 151 | RETURN=${UNKNOWN} 152 | fi 153 | else 154 | OUTPUT="An instance of the script with the same parameters is already running." 155 | CONTINUE=false 156 | RETURN=${UNKNOWN} 157 | fi 158 | fi 159 | 160 | if [[ ${#} -eq 0 ]] ; then 161 | print_help ${APPL_NAME} 162 | RETURN=${UNKNOWN} 163 | CONTINUE=false 164 | fi 165 | 166 | #set -xv 167 | if [[ "${1}" = "-d" ]] ; then 168 | DIRECTORY=$2 169 | COMMAND=${@:3} 170 | else 171 | echo "The first parameter should be -d followed by the directory" 172 | print_usage ${APPL_NAME} 173 | RETURN=${UNKNOWN} 174 | CONTINUE=false 175 | fi 176 | 177 | if [[ ${CONTINUE} == true ]] ; then 178 | if [[ -d ${DIRECTORY} ]] ; then 179 | OUTPUT=$(${COMMAND}) 180 | RETURN=${?} 181 | else 182 | RETURN=${OK} 183 | OUTPUT="Ok, el nodo es pasivo" 184 | fi 185 | fi 186 | 187 | # Prints the output. 188 | if [[ ${OUTPUT} == "" ]] ; then 189 | OUTPUT="Note: The test was not executed." 190 | fi 191 | # Builds the output. 192 | echo "${OUTPUT}" 193 | 194 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 195 | rm ${LOCK_FILE} 196 | fi 197 | 198 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 199 | 200 | exit ${RETURN} 201 | 202 | -------------------------------------------------------------------------------- /check_open_files: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # Checks the quantity of open files by a given instance. 22 | # 23 | # This file execute the command lsof as root with the sudo command. In this 24 | # case you need to give the necessary right to execute this command as root. 25 | # Normally, you can do that just by adding a line in the sudoers file. The best 26 | # is to edit this file with the command: 'visudo' 27 | # 28 | # nagios ALL= NOPASSWD:/usr/bin/lsof 29 | # 30 | # User nagios, can execute from ALL terminals, acting as root the command lsof. 31 | # 32 | # In order to be used in Nagios, you have to configure the following. 33 | # 34 | # # 'check_open_files' command definition 35 | # define command { 36 | # command_name check_open_files 37 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_open_files -i '$ARG1$'" 38 | # } 39 | # define service{ 40 | # host_name db2server 41 | # service_description Quantity of open files 42 | # check_command check_open_files!/home/db2inst1 43 | # use generic-service 44 | # } 45 | # 46 | # When using NRPE: 47 | # 48 | # In the database server: 49 | # 50 | # command[check_open_files]=/home/nagios/scripts/check_open_files -i /home/db2inst1 51 | # 52 | # Parameters: 53 | # -c | --critical : Percentage of the soft limit. This value should be bigger 54 | # than warning value. 55 | # -h | --help : Show the help of this script. 56 | # -i | --instance : Home directory of the instance. Usually it is 57 | # /home/db2inst1. 58 | # -K | --mk : Change the output for Check_MK. 59 | # -T | --trace : Trace mode. Writes output in /tmp. 60 | # -v | --verbose : Execute the program in verbose mode. 61 | # -V | --version : Show the current version of this script. 62 | # -w | --warning : Percentage of the soft limit. This value should be smaller 63 | # than the critical value. 64 | # 65 | # In AIX, the long name options are not supported. 66 | # 67 | # The exit codes are the standard for Nagios. 68 | # 69 | # 0 The plugin was able to check the service and it appeared to be functioning 70 | # properly. 71 | # 1 The plugin was able to check the service, but it appeared to be above some 72 | # "warning" threshold or did not appear to be working properly. 73 | # 2 The plugin detected that either the service was not running or it was above 74 | # some "critical" threshold. 75 | # 3 Invalid command line arguments were supplied to the plugin or low-level 76 | # failures internal to the plugin (such as unable to fork, or open a tcp 77 | # socket) that prevent it from performing the specified operation. 78 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 79 | # are outside of the control of plugins and should generally NOT be reported 80 | # as UNKNOWN states. 81 | # 82 | # Author: Andres Gomez Casanova 83 | # Version: v1.1 2015-10-15 84 | 85 | # Flag for debugging. 86 | #set -xv 87 | 88 | # Locale to print messages in English. Prevent language problems. 89 | export LANG=en_US 90 | 91 | # Version of this script. 92 | function print_revision { 93 | echo Andres Gomez Casanova - AngocA 94 | echo v1.1 2015-10-15 95 | } 96 | # Function to show the help 97 | function print_usage { 98 | /bin/cat <<__EOT 99 | Usage: ${1} { -i instanceHomeDirectory [-c][-w][-K] | -h | -V } [-T][-v] 100 | __EOT 101 | } 102 | 103 | function print_help { 104 | print_revision 105 | print_usage ${1} 106 | # Max 80 chars width. 107 | /bin/cat <<__EOT 108 | ------------------------------------------------------------------------------- 109 | Checks the quantity of open files an compares this value with the soft max. 110 | -c | --critical INTEGER 111 | Percentage of the soft limit to be consider as critical. This value should 112 | be bigger than warning. 113 | Default 95. 114 | -h | --help 115 | Shows the current documentation. 116 | -i | --instance STRING 117 | Instance home directory. It is usually /home/db2inst1. 118 | -K | --mk 119 | Changes the output to be compatible with Check_MK. 120 | -T | --trace 121 | Trace mode: writes date and output in /tmp. 122 | -v | --verbose 123 | Executes the script in verbose mode (multiple times). 124 | -V | --version 125 | Shows the current version of this script. 126 | -w | --warning INTEGER 127 | Percentage of the soft limit to be consider as warning. This value should 128 | be smaller than warning. 129 | Default 90. 130 | 131 | In AIX, the long name options are not supported. 132 | __EOT 133 | } 134 | 135 | # Variable to control the flow execution. Prevent Spaghetti code. 136 | CONTINUE=true 137 | 138 | # Nagios return codes 139 | OK=0 140 | WARNING=1 141 | CRITICAL=2 142 | UNKNOWN=3 143 | # This is the returned code. 144 | RETURN=${UNKNOWN} 145 | 146 | # Nagios Output 147 | # Text output 80 chars | Optional Perf Data Line 1 148 | # Long text Line 1 149 | # Long text Line 2 | Optional Perf Data Line 2 150 | # Optional Perf Data Line 3 151 | OUTPUT= 152 | PERFORMANCE= 153 | LONG_OUTPUT= 154 | LONG_PERFORMANCE= 155 | PERF_MK="-" 156 | 157 | APPL_NAME=$(basename ${0}) 158 | 159 | umask 111 160 | 161 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 162 | 163 | # Checks the lock file does not exist. 164 | # The lock file is the way the command was called with its parameters 165 | # without spaces. 166 | COPY_ARGS=("${@}") 167 | LOCK_FILE= 168 | for VALUE in "${COPY_ARGS[@]}" ; do 169 | LOCK_FILE="${LOCK_FILE}${VALUE}" 170 | done 171 | LOCK_FILE=${LOCK_FILE//\//} 172 | LOCK_FILE=${LOCK_FILE//\\/} 173 | LOCK_FILE=${LOCK_FILE//\:/} 174 | LOCK_FILE=${LOCK_FILE//\*/} 175 | LOCK_FILE=${LOCK_FILE//\|/} 176 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 177 | if [[ ! -r ${LOCK_FILE} ]] ; then 178 | echo $$ > ${LOCK_FILE} 179 | LOCKED=true 180 | else 181 | # If it exist, then check if the process is running. 182 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 183 | # If process is not running, delete it. 184 | if [[ ${EXIST} == "" ]] ; then 185 | rm ${LOCK_FILE} 186 | if [[ ! -r ${LOCK_FILE} ]] ; then 187 | echo $$ > ${LOCK_FILE} 188 | LOCKED=true 189 | else 190 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 191 | CONTINUE=false 192 | RETURN=${UNKNOWN} 193 | fi 194 | else 195 | OUTPUT="An instance of the script with the same parameters is already running." 196 | CONTINUE=false 197 | RETURN=${UNKNOWN} 198 | fi 199 | fi 200 | 201 | if [[ ${#} -eq 0 ]] ; then 202 | print_usage ${APPL_NAME} 203 | RETURN=${UNKNOWN} 204 | CONTINUE=false 205 | fi 206 | 207 | # Checks the operating system. geopt works different in AIX than in Linux. 208 | OS=$(uname) 209 | 210 | if [[ "${OS:0:5}" == "Linux" ]] ; then 211 | # The following requires GNU getopt. See the following discussion. 212 | # http://stackoverflow.com/questions/402377 213 | 214 | TEMP=$(getopt -o c:hi:KTvVw: --long critical:,help,instance:,mk,trace,verbose,version,warning: \ 215 | -n ${APPL_NAME} -- "${@}") 216 | 217 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 218 | set -- $(getopt c:hi:KTvVw: ${*}) 219 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 220 | echo "This plugin is not yet supported in your platform." 221 | echo "Please create a ticket in GitHub if you want to enable your current platform." 222 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 223 | RETURN=${UNKNOWN} 224 | CONTINUE=false 225 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 226 | echo "This plugin is not supported in Cygwin" 227 | RETURN=${UNKNOWN} 228 | CONTINUE=false 229 | else 230 | echo "The platform is unknown: ${OS}" 231 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 232 | RETURN=${UNKNOWN} 233 | CONTINUE=false 234 | fi 235 | 236 | if [[ ${?} -ne 0 ]] ; then 237 | print_usage ${APPL_NAME} 238 | RETURN=${UNKNOWN} 239 | CONTINUE=false 240 | fi 241 | 242 | if [[ ${CONTINUE} == true ]] ; then 243 | if [[ "${OS}" == "Linux" ]] ; then 244 | # Note the quotes around ${TEMP}: they are essential! 245 | eval set -- "${TEMP}" 246 | fi 247 | HELP=false 248 | VERSION=false 249 | CHECK_MK=false 250 | # Verbosity level 251 | VERBOSE=0 252 | # Trace activated 253 | TRACE=false 254 | LOG=/tmp/${APPL_NAME}.log 255 | INSTANCE_HOME= 256 | WARNING_THRES=90 257 | CRITICAL_THRES=95 258 | if [[ "${OS:0:5}" == "Linux" ]] ; then 259 | while true; do 260 | case "${1}" in 261 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 262 | -h | --help ) HELP=true ; shift ;; 263 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 264 | -K | --mk ) CHECK_MK=true ; shift ;; 265 | -T | --trace ) TRACE=true ; shift ;; 266 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 267 | -V | --version ) VERSION=true ; shift ;; 268 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 269 | -- ) shift ; break ;; 270 | * ) break ;; 271 | esac 272 | done 273 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 274 | while [[ $1 != -- ]] ; do 275 | case "${1}" in 276 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 277 | -h) HELP=true ; shift ;; 278 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 279 | -K) CHECK_MK=true ; shift ;; 280 | -T) TRACE=true ; shift ;; 281 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 282 | -V) VERSION=true ; shift ;; 283 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 284 | * ) break ;; 285 | esac 286 | done 287 | fi 288 | fi 289 | 290 | if [[ ${TRACE} == true ]] ; then 291 | echo ">>>>>" >> ${LOG} 292 | date >> ${LOG} 293 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 294 | echo "PID ${$}" >> ${LOG} 295 | fi 296 | 297 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 298 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, check_mk:${CHECK_MK}" 299 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 300 | 301 | if [[ ${VERBOSE} -ge 2 ]] ; then 302 | echo ${ECHO} 303 | fi 304 | 305 | if [[ ${TRACE} == true ]] ; then 306 | echo "PARAMS:${ECHO}" >> ${LOG} 307 | fi 308 | 309 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 310 | print_help ${APPL_NAME} 311 | RETURN=${UNKNOWN} 312 | CONTINUE=false 313 | fi 314 | 315 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 316 | print_revision ${APPL_NAME} 317 | RETURN=${UNKNOWN} 318 | CONTINUE=false 319 | fi 320 | 321 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 322 | print_usage ${APPL_NAME} 323 | RETURN=${UNKNOWN} 324 | CONTINUE=false 325 | fi 326 | 327 | if [[ ${CONTINUE} == true 328 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 329 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 330 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 331 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 332 | print_usage ${APPL_NAME} 333 | echo "Warning threshold should be less than critical threshold." 334 | echo "Threshold should be greater than 0." 335 | RETURN=${UNKNOWN} 336 | CONTINUE=false 337 | fi 338 | 339 | if [[ ${CONTINUE} == true ]] ; then 340 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 341 | # Load the DB2 profile. 342 | . ${INSTANCE_HOME}/sqllib/db2profile 343 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 344 | else 345 | OUTPUT="Instance directory is invalid." 346 | RETURN=${UNKNOWN} 347 | CONTINUE=false 348 | fi 349 | fi 350 | 351 | if [[ ${CONTINUE} == true ]] ; then 352 | COMMAND_PID="db2pd -edus" 353 | if [[ ${VERBOSE} -ge 2 ]] ; then 354 | echo "COMMAND: ${COMMAND_PID}" 355 | fi 356 | OUTPUT_PID=$(${COMMAND_PID} | grep 'db2sysc PID' | cut -d' ' -f3) 357 | if [[ ${VERBOSE} -ge 3 ]] ; then 358 | echo "Result: ${OUTPUT_PID}" 359 | fi 360 | if [[ ${OUTPUT_PID} == "" ]] ; then 361 | OUTPUT="Impossible to get the process ID." 362 | RETURN=${UNKNOWN} 363 | CONTINUE=false 364 | fi 365 | fi 366 | 367 | if [[ ${CONTINUE} == true ]] ; then 368 | TMP_FILE=/tmp/check_open_files_${INSTANCE_HOME//\//} 369 | COMMAND_LSOF="sudo lsof -p ${OUTPUT_PID}" 370 | if [[ ${VERBOSE} -ge 2 ]] ; then 371 | echo "COMMAND: ${COMMAND_LSOF}" 372 | fi 373 | ${COMMAND_LSOF} > ${TMP_FILE} 2> /dev/null 374 | if [[ ${?} -ne 0 ]] ; then 375 | OUTPUT="sudo command failed." 376 | RETURN=${UNKNOWN} 377 | CONTINUE=false 378 | fi 379 | fi 380 | 381 | if [[ ${CONTINUE} == true ]] ; then 382 | OUTPUT_LSOF=$(cat ${TMP_FILE} | tail -n +3 | wc -l) 383 | if [[ ${VERBOSE} -ge 3 ]] ; then 384 | echo "Result lsof: ${OUTPUT_LSOF}" 385 | fi 386 | 387 | COMMAND_HARD_LIMIT="ulimit -Hn" 388 | COMMAND_SOFT_LIMIT="ulimit -Sn" 389 | if [[ ${VERBOSE} -ge 2 ]] ; then 390 | echo "COMMAND: ${COMMAND_HARD_LIMIT}" 391 | echo "COMMAND: ${COMMAND_SOFT_LIMIT}" 392 | fi 393 | OUTPUT_HARD_LIMIT=$(${COMMAND_HARD_LIMIT}) 394 | OUTPUT_SOFT_LIMIT=$(${COMMAND_SOFT_LIMIT}) 395 | if [[ ${VERBOSE} -ge 3 ]] ; then 396 | echo "Result hard: ${OUTPUT_HARD_LIMIT}" 397 | echo "Result soft: ${OUTPUT_SOFT_LIMIT}" 398 | fi 399 | 400 | WARN_VALUE=$((${OUTPUT_SOFT_LIMIT} * ${WARNING_THRES} / 100)) 401 | CRIT_VALUE=$((${OUTPUT_SOFT_LIMIT} * ${CRITICAL_THRES} / 100)) 402 | if [[ ${VERBOSE} -ge 3 ]] ; then 403 | echo "Thresholds: ${WARN_VALUE} and ${CRIT_VALUE}" 404 | fi 405 | 406 | if [[ ${DB2INSTANCE} == "" ]] ; then 407 | OUTPUT="Invalid instance name" 408 | RETURN=${UNKNOWN} 409 | else 410 | if [[ ${OUTPUT_LSOF} -lt ${WARN_VALUE} ]] ; then 411 | OUTPUT="OK. List of open files for instance ${DB2INSTANCE} is ${OUTPUT_LSOF}" 412 | RETURN=${OK} 413 | elif [[ ${OUTPUT_LSOF} -lt ${CRIT_VALUE} ]] ; then 414 | mv ${TMP_FILE} ${TMP_FILE}_$(date +"%Y-%m-%d-%H.%M.%S") > /dev/null 2>&1 415 | OUTPUT="List of open files for instance ${DB2INSTANCE} is ${OUTPUT_LSOF} is getting bigger" 416 | RETURN=${WARNING} 417 | else 418 | mv ${TMP_FILE} ${TMP_FILE}_$(date +"%Y-%m-%d-%H.%M.%S") > /dev/null 2>&1 419 | OUTPUT="List of open files for instance ${DB2INSTANCE} is ${OUTPUT_LSOF} is critical" 420 | RETURN=${CRITICAL} 421 | fi 422 | PERFORMANCE="'Open_files'=${OUTPUT_LSOF};;;;${OUTPUT_SOFT_LIMIT}" 423 | LONG_PERFORMANCE="'Hard_limit'=${OUTPUT_HARD_LIMIT}" 424 | PERF_MK="${PERFORMANCE}|${LONG_PERFORMANCE}" 425 | fi 426 | fi 427 | 428 | # Prints the output. 429 | if [[ ${OUTPUT} == "" ]] ; then 430 | OUTPUT="Note: The test was not executed." 431 | fi 432 | # Builds the output. 433 | if [[ ${CHECK_MK} == true ]] ; then 434 | echo "${RETURN} openFiles-${INSTANCE_NAME} ${PERF_MK} ${OUTPUT}" 435 | else 436 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 437 | fi 438 | # Returns the error code. 439 | if [[ ${VERBOSE} -ge 2 ]] ; then 440 | echo "Return code: ${RETURN}" 441 | fi 442 | if [[ ${TRACE} == true ]] ; then 443 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 444 | date >> ${LOG} 445 | echo -e "<<<<<\n" >> ${LOG} 446 | fi 447 | 448 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 449 | rm ${LOCK_FILE} 450 | fi 451 | 452 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 453 | 454 | exit ${RETURN} 455 | 456 | -------------------------------------------------------------------------------- /check_utilities: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # Checks the utilities that are currently running in the instance. Throws a 22 | # warning if a restore or backup is being performed. 23 | # 24 | # Here comes the documentation 25 | # 26 | # In order to be used in Nagios, you have to configure the following. 27 | # 28 | # # 'check_utilities' command definition 29 | # define command { 30 | # command_name check_utilities 31 | # command_line $USER1$/check_by_ssh -H $HOSTADDRESS$ -l nagios -C "scripts/check_utilities -i '$ARG1$'" 32 | # } 33 | # define service{ 34 | # host_name db2server 35 | # service_description List of utilities 36 | # check_command check_utilities!/home/db2inst1 37 | # use generic-service 38 | # } 39 | # 40 | # When using NRPE: 41 | # 42 | # In the database server: 43 | # 44 | # command[check_utilities]=/home/nagios/scripts/check_utilities -i /home/db2inst1 45 | # 46 | # Then modify the path according to your script location and instance home dir. 47 | # 48 | # In the Nagios server: 49 | # 50 | # define service{ 51 | # host_name db2server 52 | # service_description List of utilities 53 | # check_command check_nrpe_1arg!check_utilities 54 | # use generic-service 55 | # } 56 | # 57 | # Parameters: 58 | # -c | --critical : Quantity of utilities is critical. 59 | # -h | --help : Show the help of this script. 60 | # -i | --instance : Home directory of the instance. Usually it is 61 | # /home/db2inst1. 62 | # -K | --mk : Change the output for Check_MK. 63 | # -T | --trace : Trace mode. Writes output in /tmp. 64 | # -v | --verbose : Execute the program in verbose mode. 65 | # -V | --version : Show the current version of this script. 66 | # -w | --warning : quantity of utilities is big. 67 | # 68 | # In AIX, the long name options are not supported. 69 | # 70 | # The exit codes are the standard for Nagios. 71 | # 72 | # 0 The plugin was able to check the service and it appeared to be functioning 73 | # properly. 74 | # 1 The plugin was able to check the service, but it appeared to be above some 75 | # "warning" threshold or did not appear to be working properly. 76 | # 2 The plugin detected that either the service was not running or it was above 77 | # some "critical" threshold. 78 | # 3 Invalid command line arguments were supplied to the plugin or low-level 79 | # failures internal to the plugin (such as unable to fork, or open a tcp 80 | # socket) that prevent it from performing the specified operation. 81 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 82 | # are outside of the control of plugins and should generally NOT be reported 83 | # as UNKNOWN states. 84 | # 85 | # Author: Andres Gomez Casanova 86 | # Version: v1.1 2015-10-15 87 | 88 | # Flag for debugging. 89 | #set -xv 90 | 91 | # Locale to print messages in English. Prevent language problems. 92 | export LANG=en_US 93 | 94 | # Version of this script. 95 | function print_revision { 96 | echo Andres Gomez Casanova - AngocA 97 | echo v1.1 2015-10-15 98 | } 99 | # Function to show the help 100 | function print_usage { 101 | /bin/cat <<__EOT 102 | Usage: ${1} { -i instanceHomeDirectory [-c][-w][-K] | -h | -V }[-T][-v] 103 | __EOT 104 | } 105 | 106 | function print_help { 107 | print_revision 108 | print_usage ${1} 109 | # Max 80 chars width. 110 | /bin/cat <<__EOT 111 | ------------------------------------------------------------------------------- 112 | Checks the quantity of utilities currently running in the instance. 113 | -c | --critical INTEGER 114 | Quantity of utilities running is critical. 115 | Default 10. 116 | -h | --help 117 | Shows the current documentation. 118 | -i | --instance STRING 119 | Instance home directory. It is usually /home/db2inst1. 120 | -K | --mk 121 | Changes the output to be compatible with Check_MK 122 | -T | --trace 123 | Trace mode: writes date and output in /tmp. 124 | -v | --verbose 125 | Executes the script in verbose mode (multiple times). 126 | -V | --version 127 | Shows the current version of this script. 128 | -w | --warning INTEGER 129 | Quantity of utilities running is considered as a warning. 130 | Default 5. 131 | 132 | In AIX, the long name options are not supported. 133 | __EOT 134 | } 135 | 136 | # Variable to control the flow execution. Prevent Spaghetti code. 137 | CONTINUE=true 138 | 139 | # Nagios return codes 140 | OK=0 141 | WARNING=1 142 | CRITICAL=2 143 | UNKNOWN=3 144 | # This is the returned code. 145 | RETURN=${UNKNOWN} 146 | 147 | # Nagios Output 148 | # Text output 80 chars | Optional Perf Data Line 1 149 | # Long text Line 1 150 | # Long text Line 2 | Optional Perf Data Line 2 151 | # Optional Perf Data Line 3 152 | OUTPUT= 153 | PERFORMANCE= 154 | LONG_OUTPUT= 155 | LONG_PERFORMANCE= 156 | PERF_MK="-" 157 | 158 | APPL_NAME=$(basename ${0}) 159 | 160 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 161 | 162 | # Checks the lock file does not exist. 163 | # The lock file is the way the command was called with its parameters 164 | # without spaces. 165 | COPY_ARGS=("${@}") 166 | LOCK_FILE= 167 | for VALUE in "${COPY_ARGS[@]}" ; do 168 | LOCK_FILE="${LOCK_FILE}${VALUE}" 169 | done 170 | LOCK_FILE=${LOCK_FILE//\//} 171 | LOCK_FILE=${LOCK_FILE//\\/} 172 | LOCK_FILE=${LOCK_FILE//\:/} 173 | LOCK_FILE=${LOCK_FILE//\*/} 174 | LOCK_FILE=${LOCK_FILE//\|/} 175 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 176 | if [[ ! -r ${LOCK_FILE} ]] ; then 177 | echo $$ > ${LOCK_FILE} 178 | LOCKED=true 179 | else 180 | # If it exist, then check if the process is running. 181 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 182 | # If process is not running, delete it. 183 | if [[ ${EXIST} == "" ]] ; then 184 | rm ${LOCK_FILE} 185 | if [[ ! -r ${LOCK_FILE} ]] ; then 186 | echo $$ > ${LOCK_FILE} 187 | LOCKED=true 188 | else 189 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 190 | CONTINUE=false 191 | RETURN=${UNKNOWN} 192 | fi 193 | else 194 | OUTPUT="An instance of the script with the same parameters is already running." 195 | CONTINUE=false 196 | RETURN=${UNKNOWN} 197 | fi 198 | fi 199 | 200 | if [[ ${#} -eq 0 ]] ; then 201 | print_usage ${APPL_NAME} 202 | RETURN=${UNKNOWN} 203 | CONTINUE=false 204 | fi 205 | 206 | # Checks the operating system. geopt works different in AIX than in Linux. 207 | OS=$(uname) 208 | 209 | if [[ "${OS:0:5}" == "Linux" ]] ; then 210 | # The following requires GNU getopt. See the following discussion. 211 | # http://stackoverflow.com/questions/402377 212 | 213 | TEMP=$(getopt -o c:hi:KTvVw: --long critical:,help,instance:,mk,trace,verbose,version,warning: \ 214 | -n ${APPL_NAME} -- "${@}") 215 | 216 | 217 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 218 | set -- $(getopt c:hi:KTvVw: ${*}) 219 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 220 | echo "This plugin is not yet supported in your platform." 221 | echo "Please create a ticket in GitHub if you want to enable your current platform." 222 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 223 | RETURN=${UNKNOWN} 224 | CONTINUE=false 225 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 226 | echo "This plugin is not supported in Cygwin" 227 | RETURN=${UNKNOWN} 228 | CONTINUE=false 229 | else 230 | echo "The platform is unknown: ${OS}" 231 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 232 | RETURN=${UNKNOWN} 233 | CONTINUE=false 234 | fi 235 | 236 | if [[ ${?} -ne 0 ]] ; then 237 | print_usage ${APPL_NAME} 238 | RETURN=${UNKNOWN} 239 | CONTINUE=false 240 | fi 241 | 242 | if [[ ${CONTINUE} == true ]] ; then 243 | if [[ "${OS}" == "Linux" ]] ; then 244 | # Note the quotes around ${TEMP}: they are essential! 245 | eval set -- "${TEMP}" 246 | fi 247 | HELP=false 248 | VERSION=false 249 | CHECK_MK=false 250 | # Verbosity level 251 | VERBOSE=0 252 | # Trace activated 253 | TRACE=false 254 | LOG=/tmp/${APPL_NAME}.log 255 | INSTANCE_HOME= 256 | WARNING_THRES=5 257 | CRITICAL_THRES=10 258 | if [[ "${OS:0:5}" == "Linux" ]] ; then 259 | while true; do 260 | case "${1}" in 261 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 262 | -h | --help ) HELP=true ; shift ;; 263 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 264 | -K | --mk ) CHECK_MK=true ; shift ;; 265 | -T | --trace ) TRACE=true ; shift ;; 266 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 267 | -V | --version ) VERSION=true ; shift ;; 268 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 269 | -- ) shift ; break ;; 270 | * ) break ;; 271 | esac 272 | done 273 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 274 | while [[ $1 != -- ]] ; do 275 | case "${1}" in 276 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 277 | -h) HELP=true ; shift ;; 278 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 279 | -K) CHECK_MK=true ; shift ;; 280 | -T) TRACE=true ; shift ;; 281 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 282 | -V) VERSION=true ; shift ;; 283 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 284 | * ) break ;; 285 | esac 286 | done 287 | fi 288 | fi 289 | 290 | if [[ ${TRACE} == true ]] ; then 291 | echo ">>>>>" >> ${LOG} 292 | date >> ${LOG} 293 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 294 | echo "PID ${$}" >> ${LOG} 295 | fi 296 | 297 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 298 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, check_mk:${CHECK_MK}" 299 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 300 | 301 | if [[ ${VERBOSE} -ge 2 ]] ; then 302 | echo ${ECHO} 303 | fi 304 | 305 | if [[ ${TRACE} == true ]] ; then 306 | echo "PARAMS:${ECHO}" >> ${LOG} 307 | fi 308 | 309 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 310 | print_help ${APPL_NAME} 311 | RETURN=${UNKNOWN} 312 | CONTINUE=false 313 | fi 314 | 315 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 316 | print_revision ${APPL_NAME} 317 | RETURN=${UNKNOWN} 318 | CONTINUE=false 319 | fi 320 | 321 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 322 | print_usage ${APPL_NAME} 323 | RETURN=${UNKNOWN} 324 | CONTINUE=false 325 | fi 326 | 327 | if [[ ${CONTINUE} == true 328 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 329 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 330 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 331 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 332 | print_usage ${APPL_NAME} 333 | echo "Warning threshold should be less than critical threshold." 334 | echo "Threshold should be greater than 0." 335 | RETURN=${UNKNOWN} 336 | CONTINUE=false 337 | fi 338 | 339 | if [[ ${CONTINUE} == true ]] ; then 340 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 341 | # Load the DB2 profile. 342 | . ${INSTANCE_HOME}/sqllib/db2profile 343 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 344 | else 345 | OUTPUT="Instance directory is invalid." 346 | RETURN=${UNKNOWN} 347 | CONTINUE=false 348 | fi 349 | fi 350 | 351 | if [[ ${CONTINUE} == true ]] ; then 352 | COMMAND_UTILITIES="db2 list utilities" 353 | if [[ ${VERBOSE} -ge 2 ]] ; then 354 | echo "COMMAND: ${COMMAND_UTILITIES}" 355 | fi 356 | OUTPUT_UTILITIES=$(${COMMAND_UTILITIES}) 357 | if [[ ${VERBOSE} -ge 3 ]] ; then 358 | echo "OUTPUT: ${OUTPUT_UTILITIES}" 359 | fi 360 | ERROR=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep SQL1092) 361 | if [[ ${ERROR} != "" ]] ; then 362 | OUTPUT="The current user cannot execute this script: ${OUTPUT_UTILITIES}" 363 | RETURN=${UNKNOWN} 364 | CONTINUE=false 365 | fi 366 | fi 367 | if [[ ${CONTINUE} == true ]] ; then 368 | RESTORE_QTY=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep RESTORE | wc -l) 369 | # A Restore or a Backup is synonime of a warning. 370 | if [[ ${RESTORE_QTY} -ge 1 ]] ; then 371 | OUTPUT="Performing ${RESTORE_QTY} restore(s). " 372 | RETURN=${WARNING} 373 | fi 374 | BACKUP_QTY=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep BACKUP | wc -l) 375 | if [[ ${BACKUP_QTY} -ge 1 ]] ; then 376 | OUTPUT="${OUTPUT}Performing ${BACKUP_QTY} backup(s). " 377 | RETURN=${WARNING} 378 | fi 379 | 380 | RUNSTATS_QTY=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep RUNSTATS | wc -l) 381 | if [[ ${RUNSTATS_QTY} -ge 1 ]] ; then 382 | OUTPUT="${OUTPUT}Performing ${RUNSTATS_QTY} runstats(s). " 383 | fi 384 | REORGS_QTY=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep REORG | wc -l) 385 | if [[ ${REORGS_QTY} -ge 1 ]] ; then 386 | OUTPUT="${OUTPUT}Performing ${REORGS_QTY} reorg(s). " 387 | fi 388 | OTHERS=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep "^Type" | grep -v RESTORE | grep -v BACKUP | grep -v RUNSTATS | grep -v REORG | grep -v "ROLLFORWARD RECOVERY" | awk -F= '{print $2}') 389 | OTHERS_QTY=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep "^Type" | grep -v RESTORE | grep -v BACKUP | grep -v RUNSTATS | grep -v REORG | wc -l) 390 | if [[ ${OTHERS} != "" ]] ; then 391 | OUTPUT="${OUTPUT}Other utilities are running: ${OTHERS}" 392 | RETURN=${WARNING} 393 | fi 394 | QTY=$(printf '%s\n' "${OUTPUT_UTILITIES}" | grep "^Type" | wc -l) 395 | if [[ ${QTY} -ge ${CRITICAL_THRES} ]] ; then 396 | RETURN=${CRITICAL} 397 | OUTPUT="Quantity of utilities currently running is critical: ${QTY}. ${OUTPUT}" 398 | elif [[ ${QTY} -ge ${WARNING_THRES} ]] ; then 399 | RETURN=${WARNING} 400 | OUTPUT="Quantity of utilities currently running is: ${QTY}. ${OUTPUT}" 401 | else 402 | if [[ ${RETURN} == ${UNKNOWN} ]] ; then 403 | RETURN=${OK} 404 | fi 405 | OUTPUT="Quantity of utilities is normal: ${QTY}." 406 | fi 407 | PERFORMANCE="Utilities=${QTY};${WARNING_THRES};${CRITICAL_THRES}" 408 | LONG_PERFORMANCE="$(echo ${RESTORE_QTY}) Backup=$(echo ${BACKUP_QTY}) Runstats=$(echo ${RUNSTATS_QTY}) Reorgs=$(echo ${REORGS_QTY}) Others=$(echo ${OTHERS_QTY}) " 409 | PERF_MK="${PERFORMANCE}|Restore=$(echo ${RESTORE_QTY})|Backup=$(echo ${BACKUP_QTY})|Runstats=$(echo ${RUNSTATS_QTY})|Reorgs=$(echo ${REORGS_QTY})|Others=$(echo ${OTHERS_QTY})" 410 | fi 411 | 412 | # Prints the output. 413 | if [[ ${OUTPUT} == "" ]] ; then 414 | OUTPUT="Note: The test was not executed." 415 | fi 416 | # Builds the output. 417 | if [[ ${CHECK_MK} == true ]] ; then 418 | echo "${RETURN} utilities-${INSTANCE_NAME} ${PERF_MK} ${OUTPUT}" 419 | else 420 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 421 | fi 422 | # Returns the error code. 423 | if [[ ${VERBOSE} -ge 2 ]] ; then 424 | echo "Return code: ${RETURN}" 425 | fi 426 | if [[ ${TRACE} == true ]] ; then 427 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 428 | date >> ${LOG} 429 | echo -e "<<<<<\n" >> ${LOG} 430 | fi 431 | 432 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 433 | rm ${LOCK_FILE} 434 | fi 435 | 436 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 437 | 438 | exit ${RETURN} 439 | 440 | -------------------------------------------------------------------------------- /template: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Monitor DB2 with Nagios 5 | # Copyright 2013,2014,2015 Andres Gomez Casanova 6 | # https://github.com/angoca/monitor-db2-with-nagios 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | ############################################################################### 20 | 21 | # TODO This is a template for script. The script has to be silent, it means it 22 | # should not generate any output different to the OUTPUT variable. 23 | # 24 | # Here comes the documentation 25 | # 26 | # TODO Nagios usage. 27 | # 28 | # TODO Nagios with NRPE. 29 | # 30 | # Parameters: 31 | # -c | --critical : TODO What is critical. 32 | # -d | --database : Database name. 33 | # -h | --help : Show the help of this script. 34 | # -i | --instance : Home directory of the instance. Usually it is 35 | # /home/db2inst1. 36 | # -K | --mk : Change the output for Check_MK. 37 | # -T | --trace : Trace mode. Writes output in /tmp. 38 | # -v | --verbose : Execute the program in verbose mode. 39 | # -V | --version : Show the current version of this script. 40 | # -w | --warning : TODO What is warning. 41 | # 42 | # In AIX, the long name options are not supported. 43 | # 44 | # The exit codes are the standard for Nagios. 45 | # 46 | # 0 The plugin was able to check the service and it appeared to be functioning 47 | # properly. 48 | # 1 The plugin was able to check the service, but it appeared to be above some 49 | # "warning" threshold or did not appear to be working properly. 50 | # 2 The plugin detected that either the service was not running or it was above 51 | # some "critical" threshold. 52 | # 3 Invalid command line arguments were supplied to the plugin or low-level 53 | # failures internal to the plugin (such as unable to fork, or open a tcp 54 | # socket) that prevent it from performing the specified operation. 55 | # Higher-level errors (such as name resolution errors, socket timeouts, etc) 56 | # are outside of the control of plugins and should generally NOT be reported 57 | # as UNKNOWN states. 58 | # 59 | # Author: Andres Gomez Casanova 60 | # Version: v1.3 2015-10-15 61 | 62 | # Flag for debugging. 63 | #set -xv 64 | 65 | # Locale to print messages in English. Prevent language problems. 66 | export LANG=en_US 67 | 68 | # Version of this script. 69 | function print_revision { 70 | echo Andres Gomez Casanova - AngocA 71 | echo v1.3 2015-10-15 72 | } 73 | # Function to show the help 74 | function print_usage { 75 | # TODO Add arguments. 76 | /bin/cat <<__EOT 77 | Usage: ${1} { -i instanceHomeDirectory -d databaseName [-c][-w][-K] | -h | -V } 78 | [-T][-v] 79 | __EOT 80 | } 81 | 82 | function print_help { 83 | print_revision 84 | print_usage ${1} 85 | # Max 80 chars width. 86 | /bin/cat <<__EOT 87 | ------------------------------------------------------------------------------- 88 | TODO Add a global description. 89 | TODO Add arguments in detail. 90 | -c | --critical INTEGER 91 | TODO What is critical. 92 | TODO Default 2. 93 | -d | --database STRING 94 | Database name. 95 | -h | --help 96 | Shows the current documentation. 97 | -i | --instance STRING 98 | Instance home directory. It is usually /home/db2inst1. 99 | -K | --mk 100 | Changes the output to be compatible with Check_MK. 101 | -T | --trace 102 | Trace mode: writes date and output in /tmp. 103 | -v | --verbose 104 | Executes the script in verbose mode (multiple times). 105 | -V | --version 106 | Shows the current version of this script. 107 | -w | --warning INTEGER 108 | TODO What is warning. 109 | TODO Default 1. 110 | 111 | In AIX, the long name options are not supported. 112 | __EOT 113 | } 114 | 115 | # Variable to control the flow execution. Prevent Spaghetti code. 116 | CONTINUE=true 117 | 118 | # Nagios return codes 119 | OK=0 120 | WARNING=1 121 | CRITICAL=2 122 | UNKNOWN=3 123 | # This is the returned code. 124 | RETURN=${UNKNOWN} 125 | 126 | # Nagios Output 127 | # Text output 80 chars | Optional Perf Data Line 1 128 | # Long text Line 1 129 | # Long text Line 2 | Optional Perf Data Line 2 130 | # Optional Perf Data Line 3 131 | OUTPUT= 132 | PERFORMANCE= 133 | LONG_OUTPUT= 134 | LONG_PERFORMANCE= 135 | PERF_MK="-" 136 | 137 | APPL_NAME=$(basename ${0}) 138 | 139 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Started ${APPL_NAME} $@" >> /tmp/${APPL_NAME}.log 140 | 141 | # Checks the lock file does not exist. 142 | # The lock file is the way the command was called with its parameters 143 | # without spaces. 144 | COPY_ARGS=("${@}") 145 | LOCK_FILE= 146 | for VALUE in "${COPY_ARGS[@]}" ; do 147 | LOCK_FILE="${LOCK_FILE}${VALUE}" 148 | done 149 | LOCK_FILE=${LOCK_FILE//\//} 150 | LOCK_FILE=${LOCK_FILE//\\/} 151 | LOCK_FILE=${LOCK_FILE//\:/} 152 | LOCK_FILE=${LOCK_FILE//\*/} 153 | LOCK_FILE=${LOCK_FILE//\|/} 154 | LOCK_FILE="/tmp/${APPL_NAME}${LOCK_FILE}.lock" 155 | if [[ ! -r ${LOCK_FILE} ]] ; then 156 | echo $$ > ${LOCK_FILE} 157 | LOCKED=true 158 | else 159 | # If it exist, then check if the process is running. 160 | EXIST=$(ps -ef | grep $(cat ${LOCK_FILE}) | grep ${APPL_NAME}) 161 | # If process is not running, delete it. 162 | if [[ ${EXIST} == "" ]] ; then 163 | rm ${LOCK_FILE} 164 | if [[ ! -r ${LOCK_FILE} ]] ; then 165 | echo $$ > ${LOCK_FILE} 166 | LOCKED=true 167 | else 168 | OUTPUT="The lock file cannot be replaced: ${LOCK_FILE}" 169 | CONTINUE=false 170 | RETURN=${UNKNOWN} 171 | fi 172 | else 173 | OUTPUT="An instance of the script with the same parameters is already running." 174 | CONTINUE=false 175 | RETURN=${UNKNOWN} 176 | fi 177 | fi 178 | 179 | if [[ ${#} -eq 0 ]] ; then 180 | print_usage ${APPL_NAME} 181 | RETURN=${UNKNOWN} 182 | CONTINUE=false 183 | fi 184 | 185 | # Checks the operating system. geopt works different in AIX than in Linux. 186 | OS=$(uname) 187 | 188 | if [[ "${OS:0:5}" == "Linux" ]] ; then 189 | # The following requires GNU getopt. See the following discussion. 190 | # http://stackoverflow.com/questions/402377 191 | 192 | # TODO Add the arguments here. One char in -o, multiple char in -long. 193 | TEMP=$(getopt -o c:d:hi:KTvVw: --long critical:,database:,help,instance:,mk,trace,verbose,version,warning: \ 194 | -n ${APPL_NAME} -- "${@}") 195 | elif [[ "${OS:0:3}" == "AIX" ]] ; then 196 | set -- $(getopt c:d:hi:KTvVw: ${*}) 197 | elif [[ "${OS:0:6}" == "Darwin" || "${OS:0:5}" == "SunOS" || "${OS:0:5}" == "HP-UX" ]] ; then 198 | echo "This plugin is not yet supported in your platform." 199 | echo "Please create a ticket in GitHub if you want to enable your current platform." 200 | echo "https://github.com/angoca/monitor-db2-with-nagios/issues" 201 | RETURN=${UNKNOWN} 202 | CONTINUE=false 203 | elif [[ "${OS:0:6}" == "CYGWIN" ]] ; then 204 | echo "This plugin is not supported in Cygwin" 205 | RETURN=${UNKNOWN} 206 | CONTINUE=false 207 | else 208 | echo "The platform is unknown: ${OS}" 209 | echo "Please create a ticket in GitHub: https://github.com/angoca/monitor-db2-with-nagios/issues" 210 | RETURN=${UNKNOWN} 211 | CONTINUE=false 212 | fi 213 | 214 | if [[ ${?} -ne 0 ]] ; then 215 | print_usage ${APPL_NAME} 216 | RETURN=${UNKNOWN} 217 | CONTINUE=false 218 | fi 219 | 220 | if [[ ${CONTINUE} == true ]] ; then 221 | if [[ "${OS}" == "Linux" ]] ; then 222 | # Note the quotes around ${TEMP}: they are essential! 223 | eval set -- "${TEMP}" 224 | fi 225 | HELP=false 226 | VERSION=false 227 | CHECK_MK=false 228 | # Verbosity level 229 | VERBOSE=0 230 | # Trace activated 231 | TRACE=false 232 | LOG=/tmp/${APPL_NAME}.log 233 | INSTANCE_HOME= 234 | DATABASE_NAME= 235 | WARNING_THRES=1 236 | CRITICAL_THRES=2 237 | if [[ "${OS:0:5}" == "Linux" ]] ; then 238 | while true; do 239 | case "${1}" in 240 | -c | --critical ) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 241 | -d | --database ) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 242 | -h | --help ) HELP=true ; shift ;; 243 | -i | --instance ) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 244 | -K | --mk ) CHECK_MK=true ; shift ;; 245 | -T | --trace ) TRACE=true ; shift ;; 246 | -v | --verbose ) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 247 | -V | --version ) VERSION=true ; shift ;; 248 | -w | --warning ) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 249 | # TODO Validate any new argument. 250 | -- ) shift ; break ;; 251 | * ) break ;; 252 | esac 253 | done 254 | elif [[ "${OS:0:3}" = "AIX" ]] ; then 255 | while [[ $1 != -- ]] ; do 256 | case "${1}" in 257 | -c) CRITICAL_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 258 | -d) DATABASE_NAME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 259 | -h) HELP=true ; shift ;; 260 | -i) INSTANCE_HOME=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 261 | -K) CHECK_MK=true ; shift ;; 262 | -T) TRACE=true ; shift ;; 263 | -v) VERBOSE=$(( ${VERBOSE} + 1 )) ; shift ;; 264 | -V) VERSION=true ; shift ;; 265 | -w) WARNING_THRES=$(echo ${2} | cut -d' ' -f1) ; shift 2 ;; 266 | * ) break ;; 267 | esac 268 | done 269 | fi 270 | fi 271 | 272 | if [[ ${TRACE} == true ]] ; then 273 | echo ">>>>>" >> ${LOG} 274 | date >> ${LOG} 275 | echo "Instance at ${INSTANCE_HOME}" >> ${LOG} 276 | echo "PID ${$}" >> ${LOG} 277 | fi 278 | 279 | ECHO="help:${HELP}, version:${VERSION}, verbose:${VERBOSE}" 280 | ECHO="${ECHO}, directory:${INSTANCE_HOME}, database:${DATABASE_NAME}" 281 | ECHO="${ECHO}, warning:${WARNING_THRES}, critical:${CRITICAL_THRES}" 282 | ECHO="${ECHO}, check_mk:${CHECK_MK}" 283 | 284 | if [[ ${VERBOSE} -ge 2 ]] ; then 285 | echo ${ECHO} 286 | fi 287 | 288 | if [[ ${TRACE} == true ]] ; then 289 | echo "PARAMS:${ECHO}" >> ${LOG} 290 | fi 291 | 292 | if [[ ${CONTINUE} == true && ${HELP} == true ]] ; then 293 | print_help ${APPL_NAME} 294 | RETURN=${UNKNOWN} 295 | CONTINUE=false 296 | fi 297 | 298 | if [[ ${CONTINUE} == true && ${VERSION} == true ]] ; then 299 | print_revision ${APPL_NAME} 300 | RETURN=${UNKNOWN} 301 | CONTINUE=false 302 | fi 303 | 304 | if [[ ${CONTINUE} == true && ${INSTANCE_HOME} == "" ]] ; then 305 | print_usage ${APPL_NAME} 306 | RETURN=${UNKNOWN} 307 | CONTINUE=false 308 | fi 309 | 310 | if [[ ${CONTINUE} == true && ${DATABASE_NAME} == "" ]] ; then 311 | print_usage ${APPL_NAME} 312 | RETURN=${UNKNOWN} 313 | CONTINUE=false 314 | fi 315 | 316 | if [[ ${CONTINUE} == true 317 | && ( ${WARNING_THRES} == "" || ${CRITICAL_THRES} == "" 318 | || ! ${WARNING_THRES} =~ ^[0-9]+$ || ! ${CRITICAL_THRES} =~ ^[0-9]+$ 319 | || ${WARNING_THRES} -le 0 || ${CRITICAL_THRES} -le 0 320 | || ${WARNING_THRES} -ge ${CRITICAL_THRES} ) ]] ; then 321 | print_usage ${APPL_NAME} 322 | echo "Warning threshold should be less than critical threshold." 323 | echo "Threshold should be greater than 0." 324 | RETURN=${UNKNOWN} 325 | CONTINUE=false 326 | fi 327 | 328 | if [[ ${CONTINUE} == true ]] ; then 329 | if [[ -d ${INSTANCE_HOME} && -e ${INSTANCE_HOME}/sqllib/db2profile ]] ; then 330 | # Load the DB2 profile. 331 | . ${INSTANCE_HOME}/sqllib/db2profile 332 | INSTANCE_NAME=$(db2 get instance | awk '/instance/ {print $7}') 333 | else 334 | OUTPUT="Instance directory is invalid." 335 | RETURN=${UNKNOWN} 336 | CONTINUE=false 337 | fi 338 | fi 339 | 340 | if [[ ${CONTINUE} == true ]] ; then 341 | COMMAND_DATABASE="db2 list db directory" 342 | if [[ ${VERBOSE} -ge 2 ]] ; then 343 | echo "COMMAND: ${COMMAND_DATABASE}" 344 | fi 345 | DATABASE=$(${COMMAND_DATABASE}) 346 | if [[ ${TRACE} == true ]] ; then 347 | echo "RESULT:'${DATABASE}'" >> ${LOG} 348 | fi 349 | DATABASE=$(printf '%s\n' "${DATABASE}" | awk '/Database alias/ {print $4}' | grep -iw ${DATABASE_NAME}) 350 | if [[ ${VERBOSE} -ge 3 ]] ; then 351 | echo "RESULT:'${DATABASE}'" 352 | fi 353 | if [[ ${DATABASE} == "" ]] ; then 354 | OUTPUT="The database ${DATABASE_NAME} is not catalogued." 355 | RETURN=${UNKNOWN} 356 | CONTINUE=false 357 | fi 358 | fi 359 | 360 | if [[ ${CONTINUE} == true ]] ; then 361 | # TODO add the logic 362 | echo "Logic goes here!" 363 | fi 364 | 365 | # Prints the output. 366 | if [[ ${OUTPUT} == "" ]] ; then 367 | OUTPUT="Note: The test was not executed." 368 | fi 369 | # Builds the output. 370 | if [[ ${CHECK_MK} == true ]] ; then 371 | # TODO Change name for CheckMK 372 | echo "${RETURN} templateName${TYPE}-${INSTANCE_NAME}-${DATABASE_NAME} ${PERF_MK} ${OUTPUT}" 373 | else 374 | echo -e "${OUTPUT}|${PERFORMANCE}\n${LONG_OUTPUT}|${LONG_PERFORMANCE}" 375 | fi 376 | # Returns the error code. 377 | if [[ ${VERBOSE} -ge 2 ]] ; then 378 | echo "Return code: ${RETURN}" 379 | fi 380 | if [[ ${TRACE} == true ]] ; then 381 | echo -e "OUTPUT:${OUTPUT}\nPERF:${PERFORMANCE}\nLONG_OUT:${LONG_OUTPUT}\nLONGPERF:${LONG_PERFORMANCE}\nRET_CODE:${RETURN}" >> ${LOG} 382 | date >> ${LOG} 383 | echo -e "<<<<<\n" >> ${LOG} 384 | fi 385 | 386 | if [[ ${LOCKED} == true && -r ${LOCK_FILE} ]] ; then 387 | rm ${LOCK_FILE} 388 | fi 389 | 390 | echo "$(date +"%Y-%m-%d-%H.%M.%S") $$ Ended ${APPL_NAME} ${COPY_ARGS[@]}" >> /tmp/${APPL_NAME}.log 391 | 392 | exit ${RETURN} 393 | 394 | --------------------------------------------------------------------------------