├── README.md ├── LICENSE ├── check-snapshot-age └── checkzfs.py /README.md: -------------------------------------------------------------------------------- 1 | # check-zfs-replication 2 | This script checks yout ZFS replication an generates reports in different flavours or can act as checkmk agent plugin. 3 | 4 | ``` 5 | wget -O /usr/local/bin/checkzfs https://raw.githubusercontent.com/bashclub/check-zfs-replication/main/checkzfs.py 6 | chmod +x /usr/local/bin/checkzfs 7 | checkzfs --help 8 | ``` 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2021, Bash Club 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /check-snapshot-age: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import subprocess 4 | import re 5 | import time 6 | 7 | #_snapshots = open("zfs.txt","r").read() 8 | _snapshots = subprocess.check_output("/usr/sbin/zfs list -t snapshot -Hpo name,creation".split()) 9 | 10 | LABELS=("frequent","hourly","daily","weekly","monthly","yearly","backup-zfs","bashclub-zfs") 11 | RE_LABELSEARCH = re.compile("|".join(LABELS)) 12 | _datasets = {} 13 | for _datastore,_snapshot,_creation in re.findall(r"^([\w_./-]+)@([\w_.-]+)\t(\d+)",_snapshots.decode('utf-8'),re.M): 14 | if _datastore not in _datasets: 15 | _datasets[_datastore] = {} 16 | _label = RE_LABELSEARCH.search(_snapshot) 17 | if _label: 18 | _label = _label.group(0) 19 | else: 20 | _label = "other" 21 | if _label not in _datasets[_datastore]: 22 | _datasets[_datastore][_label] = [] 23 | _datasets[_datastore][_label].append((_snapshot,int(_creation))) 24 | 25 | for _datastore in _datasets.keys(): 26 | print(_datastore) 27 | print("-"*40) 28 | for _label in _datasets[_datastore].keys(): 29 | _data = _datasets[_datastore][_label] 30 | _first = time.strftime("%d.%m.%Y %H:%M:%S",time.localtime(_data[0][1])) 31 | _last = time.strftime("%d.%m.%Y %H:%M:%S",time.localtime(_data[-1][1])) 32 | _count = len(_data) 33 | print(f" {_label} {_count}") 34 | print(f" {_first} {_data[0][0]}") 35 | if _count > 1: 36 | print(f" {_last} {_data[-1][0]}") 37 | print("") 38 | -------------------------------------------------------------------------------- /checkzfs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # vim: set fileencoding=utf-8:noet 4 | ## Copyright 2023 sysops.tv ;-) 5 | ## BSD-2-Clause 6 | ## 7 | ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 8 | ## 9 | ## 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 10 | ## 11 | ## 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 12 | ## 13 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 14 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS 15 | ## BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 16 | ## GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 17 | ## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 18 | 19 | VERSION = 4.14 20 | 21 | ### for check_mk usage link or copy binary to check_mk_agent/local/checkzfs 22 | ### create /etc/check_mk/checkzfs ## the config file name matches the filename in check_mk_agent/local/ 23 | ### to create a diffent set, link script to check_mk_agent/local/checkzfs2 and create /etc/check_mk/checkzfs2 24 | ### 25 | ### source: host1 # [optional] comma seperated hostnames to check for source 26 | ### remote: host1 # [optional] " " " remote 27 | ### prefix: host1 # [optional] Prefix for check_mk Servicename - default REPLICA 28 | ### filter: rpool/data|replica # [optional] regex filter to match source 29 | ### replicafilter: remote # [optional] regex filter to match for replica snapshots 30 | ### snapshotfilter: # [optional] regex filter to match snapshot name 31 | ### threshold: 20,40 # [optional] threshold warn,crit in minutes 32 | ### maxsnapshots: 60,80 # [optional] threshold maximum of snapshots warn,crit 33 | ### ssh-extra-options: # [optional] comma seperated ssh options like added with -o 34 | ### ssh-identity: /path/to/priv.key # [optional] path to ssh private key 35 | ### disabled: 1 # [optional] disable the script with this config 36 | ### legacyhosts: host1 # [optional] use an external script zfs_legacy_list to get snapshots with guid and creation at lease 37 | 38 | ## Regex Tips: 39 | ## 'Raid5[ab]\/(?!Rep_|Swap-)\w+' everything from Raid5a or Raid5b not start with Rep_ or Swap- 40 | 41 | 42 | ## 43 | ##!/bin/bash 44 | ## legacy script example to put in path as zfs_legacy_list to for host with missing written attribute and list option -p 45 | # for snapshot in $(zfs list -H -t all -o name); 46 | # do 47 | # echo -ne "$snapshot" 48 | # zfs get -H -p type,creation,guid,used,available,userrefs,com.sun:auto-snapshot,tv.sysops:checkzfs $snapshot | awk '{print $3}'| 49 | # while IFS= read -r line; do 50 | # echo -ne "\t${line}" 51 | # done 52 | # echo "" 53 | # done 54 | 55 | from pprint import pprint 56 | import sys 57 | import re 58 | import subprocess 59 | import time 60 | import json 61 | import os.path 62 | import os 63 | import socket 64 | from email.message import EmailMessage 65 | from email.mime.application import MIMEApplication 66 | from email.utils import formatdate 67 | 68 | _ = lambda x: x ## inline translate ... maybe later 69 | 70 | class zfs_snapshot(object): 71 | def __init__(self,dataset_obj,snapshot,creation,guid,written,origin,**kwargs): 72 | self.replica = [] 73 | self.dataset_obj = dataset_obj 74 | self.snapshot = snapshot 75 | self.creation = int(creation) 76 | self.age = int(time.time() - self.creation) 77 | self.written = int(written) 78 | self.origin = origin 79 | self.guid = guid 80 | 81 | def add_replica(self,snapshot): 82 | self.replica.append(snapshot) ## den snapshot als replica hinzu 83 | self.dataset_obj.add_replica(snapshot.dataset_obj) ## als auch dem dataset 84 | 85 | def __repr__(self): 86 | return f"{self.guid} {self.dataset_obj.dataset_name} {self.snapshot}\n" 87 | 88 | def __str__(self): 89 | return f"{self.guid} {self.snapshot}\n" 90 | 91 | 92 | 93 | class zfs_dataset(object): 94 | def __init__(self,dataset,guid,used,available,creation,type,autosnapshot,checkzfs,remote=None,source=None,**kwargs): 95 | self.checkzfs = checkzfs not in ("false","ignore") ## ignore wenn tv.sysops:checkzfs entweder false oder ignore (ignore macht es überischtlicher) 96 | self.snapshots = {} 97 | self.remote = remote 98 | self.is_source = source 99 | self.guid = guid 100 | self.dataset = dataset 101 | self.creation = creation = int(creation) 102 | self.autosnapshot = {"true":2,"false":0}.get(autosnapshot,1) ### macht für crit/warn/ok am meisten sinn so 103 | self.type = type 104 | self.used = int(used) 105 | self.available = int(available) 106 | self.replica = set() 107 | self.lastsnapshot = "" 108 | 109 | def add_snapshot(self,**kwargs): 110 | _obj = zfs_snapshot(self,**kwargs) ## neuen snapshot mit parametern erstellen 111 | self.snapshots[_obj.guid] = _obj ## zu lokalen snapshots diesem DS hinzu 112 | return _obj ## snapshot objeckt zurück 113 | 114 | def add_replica(self,ds_object,**kwargs): 115 | self.replica.add(ds_object) 116 | 117 | def _get_latest_snapshot(self,source=None): 118 | _snapshots = self.sorted_snapshots() 119 | if source: ## wenn anderes dataset übergeben dann nur snapshots zurück die auch auf der anderen seite (mit gleicher guid) vorhanden sind 120 | _snapshots = list(filter(lambda x: x.guid in source.snapshots.keys(),_snapshots)) 121 | return _snapshots[0] if _snapshots else None ## letzten gemeinsamen snapshot zurück 122 | 123 | def sorted_snapshots(self): 124 | return sorted(self.snapshots.values(), key=lambda x: x.age) ## snapshots nach alter sortiert 125 | 126 | @property 127 | def dataset_name(self): ## namen mit host prefixen 128 | if self.remote: 129 | return f"{self.remote}#{self.dataset}" 130 | return self.dataset 131 | 132 | @property 133 | def latest_snapshot(self): ## letzten snapshot 134 | if self.snapshots: 135 | return self.sorted_snapshots()[0] 136 | 137 | 138 | def get_info(self,source,threshold=None,maxsnapshots=None,ignore_replica=False): 139 | _latest = self._get_latest_snapshot(source if source != self else None) ## wenn das source dataset nicht man selber ist 140 | _status = -1 141 | _has_zfs_autosnapshot = any(map(lambda x: str(x.snapshot).startswith("zfs-auto-snap_"),self.snapshots.values())) 142 | _message = "" 143 | if source == self: 144 | if not self.replica and ignore_replica == False: 145 | _status = 1 ## warn 146 | _message = _("kein Replikat gefunden") 147 | if self.autosnapshot == 2 and _has_zfs_autosnapshot: 148 | _status = 1 ## warn 149 | _message = _("com.sun:auto-snapshot ist auf der Quelle auf true und wird evtl. mit repliziert") 150 | else: 151 | if _has_zfs_autosnapshot: ## nur auf systemen mit zfs-aut-snapshot 152 | if self.autosnapshot == 1: 153 | _status = 1 ## warn 154 | _message = _("com.sun:auto-snapshot ist nicht false") 155 | elif self.autosnapshot == 2: 156 | _status = 2 ## crit 157 | _message = _("com.sun:auto-snapshot ist auf Replikationspartner aktiviert") 158 | 159 | if _latest: 160 | _threshold_status = "" 161 | _age = _latest.age / 60 ## in minuten 162 | if threshold: 163 | _threshold_status = list( 164 | map(lambda x: x[1], ## return only last 165 | filter(lambda y: y[0] < _age, ## check threshold Texte 166 | zip(threshold,(1,2)) ## warn 1 / crit 2 167 | ) 168 | ) 169 | ) 170 | if not _threshold_status: 171 | if _status == -1: 172 | _status = 0 ## ok 173 | else: 174 | _message = _("Snapshot ist zu alt") 175 | _status = _threshold_status[-1] 176 | if _latest != self.latest_snapshot: 177 | _message = _("Rollback zu altem Snapshot. - '{0.snapshot}' nicht mehr vorhanden".format(self.latest_snapshot)) 178 | _status = 2 ## crit 179 | 180 | if maxsnapshots: 181 | _maxsnapshot_status = list( 182 | map(lambda x: x[1], 183 | filter(lambda y: y[0] < len(self.snapshots.keys()), 184 | zip(maxsnapshots,(1,2)) 185 | ) 186 | ) 187 | ) 188 | if _maxsnapshot_status: 189 | if _maxsnapshot_status[-1] > _status: 190 | _message = _("zu viele Snapshots") 191 | _status = _maxsnapshot_status[-1] 192 | if not self.checkzfs: 193 | _status = -1 194 | 195 | return { ## neues object zurück was die attribute enthält die wir über columns ausgeben 196 | "source" : source.dataset_name if source else "", 197 | "replica" : self.dataset_name if source != self else "", 198 | "type" : self.type, 199 | "autosnapshot" : self.autosnapshot, 200 | "used" : self.used, 201 | "available" : self.available, 202 | "creation" : (_latest.creation if _latest else 0) if source != self else self.creation, 203 | "count" : len(self.snapshots.keys()), 204 | "snapshot" : _latest.snapshot if _latest else "", 205 | "age" : _latest.age if _latest else 0, 206 | "written" : _latest.written if _latest else 0, 207 | "origin" : _latest.origin if _latest else "", 208 | "guid" : _latest.guid if _latest else "", 209 | "status" : _status, 210 | "message" : _message 211 | } 212 | 213 | def __repr__(self): 214 | return f"{self.is_source}-{self.dataset_name:25.25}{self.type}\n" 215 | 216 | def __str__(self): 217 | return f"{self.dataset:25.25}{self.type} -snapshots: {self.lastsnapshot}\n" 218 | 219 | class no_regex_class(object): 220 | def search(*args): 221 | return True 222 | 223 | class negative_regex_class(object): 224 | def __init__(self,compiled_regex): 225 | self.regex = compiled_regex 226 | def search(self,text): 227 | return not self.regex.search(text) 228 | 229 | class zfscheck(object): 230 | ZFSLIST_REGEX = re.compile(r"^(?P.*?)(?:|@(?P.*?))\t(?P\w*)\t(?P\d+)\t(?P\d+)\t(?P\d+|-)\t(?P\d+|-)\t(?P\d+|-)\t(?P.*?)\t(?P[-\w]+)\t(?P[-\w]+)$",re.M) 231 | ZFS_DATASETS = {} 232 | ZFS_SNAPSHOTS = {} 233 | #VALIDCOLUMNS = ["source","replica","type","autosnap","snapshot","creation","guid","used","referenced","size","age","status","message"] ## valid columns 234 | VALIDCOLUMNS = zfs_dataset("","",0,0,0,"","","").get_info(None).keys() ## generate with dummy values 235 | DEFAULT_COLUMNS = ["status","source","replica","snapshot","age","count"] #,"message"] ## default columns 236 | DATEFORMAT = "%a %d.%b.%Y %H:%M" 237 | COLOR_CONSOLE = { 238 | 0 : "\033[92m", ## ok 239 | 1 : "\033[93m", ## warn ## hier ist das hässliche gelb auf der weißen console .... GOFOR themes!!!1111 240 | 2 : "\033[91m", ## crit 241 | "reset" : "\033[0m" 242 | } 243 | COLUMN_NAMES = { ## Namen frei editierbar 244 | "source" : _("Quelle"), 245 | "snapshot" : _("Snapshotname"), 246 | "creation" : _("Erstellungszeit"), 247 | "type" : _("Typ"), 248 | "age" : _("Alter"), 249 | "guid" : _("GUID"), 250 | "count" : _("Anzahl"), 251 | "used" : _("genutzt"), 252 | "available" : _("verfügbar"), 253 | "replica" : _("Replikat"), 254 | "written" : _("geschrieben"), 255 | "origin" : _("Ursprung"), 256 | "autosnapshot" : _("Autosnapshot"), 257 | "message" : _("Kommentar") 258 | } 259 | COLUMN_ALIGN = { ## formatierung align - python string format 260 | "source" : "<", 261 | "replica" : "<", 262 | "snapshot" : "<", 263 | "copy" : "<", 264 | "status" : "^" 265 | } 266 | 267 | TIME_MULTIPLICATOR = { ## todo 268 | "h" : 60, ## Stunden 269 | "d" : 60*24, ## Tage 270 | "w" : 60 * 24 * 7, ## Wochen 271 | "m" : 60 * 24 * 30 ## Monat 272 | } 273 | COLUMN_MAPPER = {} 274 | 275 | def __init__(self,remote,source,sourceonly,legacyhosts,output,ignoreattr,mail=None,prefix='REPLICA',debug=False,**kwargs): 276 | _start_time = time.time() 277 | self.remote_hosts = remote.split(",") if remote else [""] if source and not sourceonly else [] ## wenn nicht und source woanders ... "" (also lokal) als remote 278 | self.source_hosts = source.split(",") if source else [""] ## wenn nix dann "" als local 279 | self.legacy_hosts = legacyhosts.split(",") if legacyhosts else [] 280 | self.sourceonly = sourceonly 281 | self.filter = None 282 | self.debug = debug 283 | self.print_debug(f"Version: {VERSION}") 284 | self.prefix = prefix.strip().replace(" ","_") ## service name bei checkmk leerzeichen durch _ ersetzen 285 | self.rawdata = False 286 | self.ignoreattr = ignoreattr 287 | self.mail_address = mail 288 | self._overall_status = [] 289 | self.sortreverse = False 290 | self.output = output if mail == None else "mail" 291 | self.print_debug(f"set attribute: remote -> {self.remote_hosts!r}") 292 | self.print_debug(f"set attribute: source -> {self.source_hosts!r}") 293 | self.print_debug(f"set attribute: sourceonly -> {sourceonly!r}") 294 | self.print_debug(f"set attribute: prefix -> {prefix!r}") 295 | if legacyhosts: 296 | self.print_debug(f"set attribute: legacyhosts -> {self.legacy_hosts}") 297 | self._check_kwargs(kwargs) 298 | self.print_debug(f"set attribute: output -> {self.output!r}") 299 | self.get_data() 300 | if self.output != "snaplist": 301 | _data = self.get_output() 302 | else: 303 | print(self.get_snaplist()) 304 | if self.output == "text" or self.output == "": 305 | print(self.table_output(_data)) 306 | if self.output == "html": 307 | print( self.html_output(_data)) 308 | if self.output == "mail": 309 | self.mail_output(_data) 310 | if self.output == "checkmk": 311 | print(self.checkmk_output(_data)) 312 | if self.output == "json": 313 | print(self.json_output(_data)) 314 | if self.output == "csv": 315 | print(self.csv_output(_data)) 316 | 317 | def _check_kwargs(self,kwargs): ## alle argumente prüfen und als attribute zuordnen 318 | ## argumente überprüfen 319 | 320 | for _k,_v in kwargs.items(): 321 | self.print_debug(f"set attribute: {_k} -> {_v!r}") 322 | 323 | if _k == "columns": 324 | if self.output == "snaplist": 325 | _default = ["status","source","snapshot","replica","guid","age"] 326 | else: 327 | _default = self.DEFAULT_COLUMNS[:] 328 | 329 | if not _v: 330 | self.columns = _default 331 | continue ## defaults 332 | # add modus wenn mit + 333 | if not _v.startswith("+"): 334 | _default = [] 335 | else: 336 | _v = _v[1:] 337 | _v = _v.split(",") 338 | 339 | if _v == ["*"]: 340 | _default = self.VALIDCOLUMNS 341 | else: 342 | for _column in _v: 343 | if _column not in self.VALIDCOLUMNS: 344 | raise Exception(_("ungültiger Spaltenname {0} ({1})").format(_v,",".join(self.VALIDCOLUMNS))) 345 | _default.append(_column) 346 | _v = list(_default) 347 | 348 | if _k == "sort" and _v: 349 | ## sortierung desc wenn mit + 350 | if _v.startswith("+"): 351 | self.sortreverse = True 352 | _v = _v[1:] 353 | if _v not in self.VALIDCOLUMNS: 354 | raise Exception("ungültiger Spaltenname: {0} ({1})".format(_v,",".join(self.VALIDCOLUMNS))) 355 | 356 | if _k == "threshold" and _v: 357 | _v = _v.split(",") 358 | ## todo tage etc 359 | _v = list(map(int,_v[:2])) ## convert zu int 360 | if len(_v) == 1: 361 | _v = (float("inf"),_v[0]) 362 | _v = sorted(_v) ## kleinere Wert ist immer warn 363 | 364 | if _k == "maxsnapshots" and _v: 365 | _v = _v.split(",") 366 | ## todo tage etc 367 | _v = list(map(int,_v[:2])) ## convert zu int 368 | if len(_v) == 1: 369 | _v = (float("inf"),_v[0]) 370 | _v = sorted(_v) ## kleinere Wert ist immer warn 371 | 372 | if _k in ("filter","snapshotfilter","replicafilter"): 373 | if _v: 374 | if _v.startswith("!"): 375 | _v = negative_regex_class(re.compile(_v[1:])) 376 | else: 377 | _v = re.compile(_v) 378 | else: 379 | _v = no_regex_class() ### dummy klasse .search immer True - spart abfrage ob filter vorhanden 380 | 381 | setattr(self,_k,_v) 382 | 383 | ## funktionen zum anzeigen / muss hier da sonst kein self 384 | if not self.rawdata: 385 | self.COLUMN_MAPPER = { 386 | "creation" : self.convert_ts_date, 387 | "age" : self.seconds2timespan, 388 | "used" : self.format_bytes, 389 | "available" : self.format_bytes, 390 | "written" : self.format_bytes, 391 | "autosnapshot" : self.format_autosnapshot, 392 | "status" : self.format_status 393 | } 394 | 395 | def get_data(self): 396 | _hosts_checked = [] 397 | _remote_servers = set(self.source_hosts + self.remote_hosts) ### no duplicate connection 398 | _remote_data = {} 399 | _start_time = time.time() 400 | _iteration = 0 401 | for _remote in _remote_servers: ## erstmal rohdaten holen 402 | _remote = _remote.strip() if type(_remote) == str else None ## keine leerzeichen, werden von ghbn mit aufgelöst 403 | _remote_data[_remote] = self._call_proc(_remote) 404 | _iteration+=1 405 | 406 | _matched_snapshots = 0 407 | _filtered_snapshots = 0 408 | for _remote,_rawdata in _remote_data.items(): ## allen source datasets erstmal snapshots hinzu und bei den anderen dataset anlegen 409 | for _entry in self._parse(_rawdata): 410 | _iteration+=1 411 | _dsname = "{0}#{dataset}".format(_remote,**_entry) ## name bilden 412 | _is_source = bool(_remote in self.source_hosts and self.filter.search(_dsname)) 413 | if _entry.get("type") in ("volume","filesystem"): ## erstmal keine snapshots 414 | self.ZFS_DATASETS[_dsname] = zfs_dataset(**_entry,remote=_remote,source=_is_source) 415 | continue ## nix mehr zu tun ohne snapshot 416 | if not _is_source: 417 | continue 418 | ## snapshots 419 | if not self.snapshotfilter.search(_entry.get("snapshot","")): ## wenn --snapshotfilter gesetzt und kein match 420 | _filtered_snapshots+=1 421 | continue 422 | _matched_snapshots+=1 423 | _dataset = self.ZFS_DATASETS.get("{0}#{dataset}".format(_remote,**_entry)) 424 | try: 425 | _snapshot = _dataset.add_snapshot(**_entry) 426 | except: 427 | pass 428 | raise 429 | self.ZFS_SNAPSHOTS[_snapshot.guid] = _snapshot 430 | _execution_time = time.time() - _start_time 431 | 432 | if self.sourceonly == True: 433 | return 434 | 435 | for _remote,_rawdata in _remote_data.items(): ## jetzt nach replica suchen 436 | for _entry in self._parse(_rawdata): ## regex geparste ausgabe von zfs list 437 | _iteration+=1 438 | if _entry.get("type") != "snapshot": ## jetzt nur die snapshots 439 | continue 440 | _dataset = self.ZFS_DATASETS.get("{0}#{dataset}".format(_remote,**_entry)) 441 | if _dataset.is_source: 442 | continue ## ist schon source 443 | _snapshot = _dataset.add_snapshot(**_entry) ## snapshot dem dataset hinzufügen .. eigentliche verarbeitung Klasse oben snapshot object wird zurück gegeben 444 | _source_snapshot = self.ZFS_SNAPSHOTS.get(_snapshot.guid) ## suchen ob es einen source gibt 445 | if _source_snapshot: ## wenn es schon eine gleiche guid gibt 446 | if self.replicafilter.search(_dataset.dataset_name): 447 | _source_snapshot.add_replica(_snapshot) ## replica hinzu 448 | 449 | self.print_debug(f"computation time: {_execution_time:0.2f} sec / iterations: {_iteration} / matched snapshots: {_matched_snapshots} / filtered snaphots: {_filtered_snapshots}") 450 | 451 | 452 | def get_snaplist(self): 453 | _output = [] 454 | for _dataset in self.ZFS_DATASETS.values(): 455 | if not _dataset.is_source: ## nur source im filter 456 | continue 457 | for _snapshot in _dataset.snapshots.values(): 458 | _replicas = list(map(lambda x: x.dataset_obj.dataset_name,_snapshot.replica)) 459 | _output.append({ 460 | "status" : 1 if len(_replicas) == 0 else 0, 461 | "source" : _dataset.dataset_name, 462 | "snapshot" : _snapshot.snapshot, 463 | "replica" : ",".join(_replicas), 464 | "guid" : _snapshot.guid, 465 | "age" : _snapshot.age, 466 | "written" : _snapshot.written, 467 | }) 468 | 469 | #print(f"{_snapshot.snapshot}{_snapshot.guid}{_snapshot.replica}") 470 | return self.table_output(_output) 471 | 472 | def get_output(self): 473 | _output = [] 474 | for _dataset in self.ZFS_DATASETS.values(): ## alle Datasets durchgehen die als source gelistet werden sollen 475 | if not _dataset.is_source: ## wenn --filter gesetzt 476 | continue 477 | #if _dataset.remote in self.remote_hosts:## or _dataset.autosnapshot == 0: ## wenn das dataset von der remote seite ist ... dann weiter oder wenn autosnasphot explizit aus ist ... dann nicht als source hinzufügen 478 | # continue 479 | _dataset_info = _dataset.get_info(_dataset,threshold=self.threshold,maxsnapshots=self.maxsnapshots,ignore_replica=self.sourceonly) 480 | self._overall_status.append(_dataset_info.get("status",-1)) ## alle stati für email overall status 481 | _output.append(_dataset_info) 482 | if self.sourceonly == True: 483 | continue 484 | for _replica in _dataset.replica: ## jetzt das dataset welches als source angezeigt wird (alle filter etc entsprochen nach replika durchsuchen 485 | #if not self.replicafilter.search(_replica.dataset_name): 486 | # continue 487 | _replica_info = _replica.get_info(_dataset,threshold=self.threshold,maxsnapshots=self.maxsnapshots) ## verarbeitung ausgabe aus klasse 488 | self._overall_status.append(_replica_info.get("status",-1)) ## fehler aus replica zu overall status für mail adden 489 | _output.append(_replica_info) 490 | 491 | return _output 492 | 493 | def _parse(self,data): 494 | for _match in self.ZFSLIST_REGEX.finditer(data): 495 | yield _match.groupdict() 496 | 497 | def _call_proc(self,remote=None): 498 | ZFS_ATTRIBUTES = f"name,type,creation,guid,used,available,written,origin,com.sun:auto-snapshot,{self.ignoreattr}" ## wenn ändern dann auch regex oben anpassen 499 | ### eigentlicher zfs aufruf, sowohl local als auch remote 500 | zfs_args = ["zfs", "list", 501 | "-t", "all", 502 | "-Hp", ## script und numeric output 503 | "-o", ZFS_ATTRIBUTES, ## attributes to show 504 | #"-r" ## recursive 505 | ] 506 | if remote: ##wenn remote ssh adden 507 | if remote in self.legacy_hosts: 508 | zfs_args = ["zfs_legacy_list"] 509 | _privkeyoption = [] 510 | if self.ssh_identity: 511 | _privkeyoption = ["-i",self.ssh_identity] 512 | _sshoptions = ["BatchMode=yes","PreferredAuthentications=publickey"] 513 | __sshoptions = [] 514 | if self.ssh_extra_options: 515 | _sshoptions += self.ssh_extra_options.split(",") 516 | for _sshoption in _sshoptions: 517 | __sshoptions += ["-o", _sshoption] ## alle ssh optionen brauchen -o einzeln 518 | _parts = remote.split(":") 519 | _port = "22" ## default port 520 | if len(_parts) > 1: 521 | remote = _parts[0] 522 | _port = _parts[1] 523 | zfs_args = ["ssh", 524 | remote, ## Hostname 525 | "-T", ## dont allocate Terminal 526 | "-p" , _port 527 | ] + __sshoptions + _privkeyoption + zfs_args 528 | self.print_debug("call proc: '{0}'".format(" ".join(zfs_args))) 529 | _start_time = time.time() 530 | _proc = subprocess.Popen(zfs_args,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=False) #aufruf prog entweder lokal oder mit ssh 531 | _stdout, _stderr = _proc.communicate() 532 | _execution_time = time.time() - _start_time 533 | _lines_returned = len(_stdout.split()) 534 | self.print_debug(f"returncode: {_proc.returncode} / Executiontime: {_execution_time:0.2f} sec / Lines: {_lines_returned}") 535 | if _proc.returncode > 0: ## wenn fehler 536 | if remote and _proc.returncode in (2,66,74,76): ## todo max try 537 | pass ## todo retry ## hier könnte man es mehrfach versuchen wenn host nicht erreichbar aber macht bei check_mk keinen sinn 538 | #time.sleep(30) 539 | #return self._call_proc(remote=remote) 540 | if remote and _proc.returncode in (2,65,66,67,69,70,72,73,74,76,78,79): ## manche error ignorieren hauptsächlich ssh 541 | ## todo set status ssh-error .... 542 | pass ## fixme ... hostkeychange evtl fehler raisen o.ä damit check_mk das mitbekommt 543 | raise Exception(_stderr.decode(sys.stdout.encoding)) ## Raise Errorlevel with Error from proc -- kann check_mk stderr lesen? sollte das nach stdout? 544 | return _stdout.decode(sys.stdout.encoding) ## ausgabe kommt als byte wir wollen str 545 | 546 | def convert_ts_date(self,ts,dateformat=None): 547 | if dateformat: 548 | return time.strftime(dateformat,time.localtime(ts)) 549 | else: 550 | return time.strftime(self.DATEFORMAT,time.localtime(ts)) 551 | 552 | @staticmethod 553 | def format_status(val): 554 | return {-1:"ignored",0:"ok",1:"warn",2:"crit"}.get(val,"unknown") 555 | 556 | @staticmethod 557 | def format_autosnapshot(val): 558 | return {0:"deaktiviert",2:"aktiviert"}.get(val,"nicht konfiguriert") 559 | 560 | @staticmethod 561 | def format_bytes(size,unit='B'): 562 | # 2**10 = 1024 563 | size = float(size) 564 | if size == 0: 565 | return "0" 566 | power = 2**10 567 | n = 0 568 | power_labels = {0 : '', 1: 'K', 2: 'M', 3: 'G', 4: 'T'} 569 | while size > power: 570 | size /= power 571 | n += 1 572 | return "{0:.2f} {1}{2}".format(size, power_labels[n],unit) 573 | 574 | @staticmethod 575 | def seconds2timespan(seconds,details=2,seperator=" ",template="{0:.0f}{1}",fixedview=False): 576 | _periods = ( 577 | ('W', 604800), 578 | ('T', 86400), 579 | ('Std', 3600), 580 | ('Min', 60), 581 | ('Sek', 1) 582 | ) 583 | _ret = [] 584 | for _name, _period in _periods: 585 | _val = seconds//_period 586 | if _val: 587 | seconds -= _val * _period 588 | #if _val == 1: 589 | # _name = _name[:-1] 590 | _ret.append(template.format(_val,_name)) 591 | else: 592 | if fixedview: 593 | _ret.append("") 594 | return seperator.join(_ret[:details]) 595 | 596 | def _datasort(self,data): 597 | if not self.sort: 598 | return data 599 | return sorted(data, key=lambda k: k[self.sort],reverse=self.sortreverse) 600 | 601 | def checkmk_output(self,data): 602 | if not data: 603 | return "" 604 | _out = [] 605 | for _item in self._datasort(data): 606 | _status = _item.get("status",3) 607 | _source = _item.get("source","").replace(" ","_") 608 | _replica = _item.get("replica","").strip() 609 | _creation = _item.get("creation","0") 610 | _count = _item.get("count","0") 611 | _age = _item.get("age","0") 612 | _written = _item.get("written","0") 613 | _available = _item.get("available","0") 614 | _used = _item.get("used","0") 615 | if _status == -1: ## tv.sysops:checkzfs=ignore wollen wir nicht (ignoreattr) 616 | continue 617 | if self.maxsnapshots: 618 | _warn = self.maxsnapshots[0] 619 | _crit = self.maxsnapshots[1] 620 | _maxsnapshots = f"{_warn};{_crit}".replace("inf","") 621 | #if _status == 0: 622 | # _status = "P" 623 | else: 624 | _maxsnapshots = ";" 625 | if self.threshold: 626 | _warn = self.threshold[0] * 60 627 | _crit = self.threshold[1] * 60 628 | _threshold = f"{_warn};{_crit}".replace("inf","") 629 | else: 630 | _threshold = ";" 631 | _msg = _item.get("message","").strip() 632 | _msg = _msg if len(_msg) > 0 else "OK" ## wenn keine message ... dann OK 633 | _out.append(f"{_status} {self.prefix}:{_source} age={_age};{_threshold}|creation={_creation};;|file_size={_written};;|fs_used={_used};;|file_count={_count};{_maxsnapshots} {_replica} - {_msg}") 634 | 635 | if self.piggyback != "": 636 | _out.insert(0,f"<<<<{self.piggyback}>>>>\n<<>>") 637 | _out.append("<<<<>>>>") 638 | return "\n".join(_out) 639 | 640 | def table_output(self,data,color=True): 641 | if not data: 642 | return 643 | #print ("Max-Status: {0}".format(max(self._overall_status))) ## debug 644 | _header = data[0].keys() if not self.columns else self.columns 645 | _header_names = [self.COLUMN_NAMES.get(i,i) for i in _header] 646 | _converter = dict((i,self.COLUMN_MAPPER.get(i,(lambda x: str(x)))) for i in _header) 647 | _line_draw = (" | ","-+-","-") 648 | if color: 649 | _line_draw = (" ║ ","═╬═","═") ## mail quoted printable sonst base64 kein mailfilter 650 | _output_data = [_header_names] 651 | _line_status = [] 652 | for _item in self._datasort(data): 653 | _line_status.append(_item.get("status")) 654 | _output_data.append([_converter.get(_col)(_item.get(_col,"")) for _col in _header]) 655 | _maxwidth = [max(map(len,_col)) for _col in zip(*_output_data)] ## max column breite 656 | _format = _line_draw[0].join(["{{:{}{}}}".format(self.COLUMN_ALIGN.get(_h,">"),_w) for _h,_w in zip(_header,_maxwidth)]) ## format bilden mit min.max breite für gleiche spalten 657 | _line_print = False 658 | _out = [] 659 | _status = -99 # max(self._overall_status) ## ??max status?? FIXME 660 | for _item in _output_data: 661 | if _line_print: 662 | _status = _line_status.pop(0) 663 | if color: 664 | _out.append(self.COLOR_CONSOLE.get(_status,"") + _format.format(*_item) + self.COLOR_CONSOLE.get("reset")) 665 | else: 666 | _out.append(_format.format(*_item)) 667 | if not _line_print: 668 | _out.append(_line_draw[1].join(map(lambda x: x*_line_draw[2],_maxwidth))) ## trennlinie 669 | _line_print = True 670 | return "\n".join(_out) 671 | 672 | def html_output(self,data,columns=None): 673 | if not data: 674 | return "" 675 | _header = data[0].keys() if not self.columns else self.columns 676 | _header_names = [self.COLUMN_NAMES.get(i,i) for i in _header] 677 | _converter = dict((i,self.COLUMN_MAPPER.get(i,(lambda x: str(x)))) for i in _header) 678 | _hostname = socket.getfqdn() 679 | _now = self.convert_ts_date(time.time(),'%Y-%m-%d %H:%M:%S') 680 | _out = [] 681 | _out.append("") 682 | _out.append("") 683 | _out.append("") 684 | _out.append("") 696 | _out.append("Check ZFS") 697 | _out.append(f"

{_hostname}

{_now}

") 698 | _out.append("") 699 | _out.append("".format("".format("
{0}
".join(_header_names))) 700 | for _item in self._datasort(data): 701 | _out.append("
{0}
".join([_converter.get(_col)(_item.get(_col,"")) for _col in _header]),_converter["status"](_item.get("status","0")))) 702 | _out.append("
") 703 | return "".join(_out) 704 | 705 | def mail_output(self,data): 706 | _hostname = socket.getfqdn() 707 | _email = self.mail_address 708 | if not _email: 709 | _users = open("/etc/pve/user.cfg","rt").read() 710 | _email = "root@{0}".format(_hostname) 711 | _emailmatch = re.search(r"^user:root@pam:.*?:(?P[\w.]+@[\w.]+):.*?$",_users,re.M) 712 | if _emailmatch: 713 | _email = _emailmatch.group(1) 714 | #raise Exception("No PVE User Email found") 715 | _msg = EmailMessage() 716 | _msg.set_content(self.table_output(data,color=False)) 717 | _msg.add_alternative(self.html_output(data),subtype="html") ## FIXME wollte irgendwie nicht als multipart .. 718 | #_attach = MIMEApplication(self.csv_output(data),Name="zfs-check_{0}.csv".format(_hostname)) 719 | #_attach["Content-Disposition"] = "attachement; filename=zfs-check_{0}.csv".format(_hostname) 720 | #_msg.attach(_attach) 721 | _msg["From"] = "ZFS-Checkscript {0} -1 ## wenn im check_mk ordner 804 | #if _is_checkmk_plugin: 805 | if os.environ.get("MK_CONFDIR"): 806 | try: ## parse check_mk options 807 | _check_mk_configdir = "/etc/check_mk" 808 | if not os.path.isdir(_check_mk_configdir): 809 | _check_mk_configdir = os.environ["MK_CONFDIR"] 810 | args.config_file = f"{_check_mk_configdir}/{_basename}" 811 | if not os.path.exists(args.config_file): ### wenn checkmk aufruf und noch keine config ... default erstellen 812 | if not os.path.isdir(_check_mk_configdir): 813 | os.mkdir(_check_mk_configdir) 814 | with open(args.config_file,"wt") as _f: ## default config erstellen 815 | _f.write("## config for checkzfs check_mk") 816 | _f.write("\n".join([f"# {_k}:" for _k in CONFIG_KEYS.split("|")])) 817 | _f.write("\n") 818 | print(f"please edit config {args.config_file}") 819 | os._exit(0) 820 | except: 821 | pass 822 | args.output = "checkmk" if not args.output else args.output 823 | _is_zabbix_plugin = os.path.dirname(os.path.abspath(__file__)).find("/zabbix/scripts") > -1 ## wenn im check_mk ordner 824 | if _is_zabbix_plugin: 825 | try: ## parse check_mk options 826 | args.config_file = f"/etc/zabbix/checkzfs-{_basename}" 827 | if not os.path.exists(args.config_file): ### wenn checkmk aufruf und noch keine config ... default erstellen 828 | if not os.path.isdir("/etc/zabbix"): 829 | os.mkdir("/etc/zabbix") 830 | with open(args.config_file,"wt") as _f: ## default config erstellen 831 | _f.write("## config for checkzfs zabbix") 832 | _f.write("\n".join([f"# {_k}:" for _k in CONFIG_KEYS.split("|")])) 833 | _f.write("\n") 834 | print(f"please edit config {args.config_file}") 835 | os._exit(0) 836 | except: 837 | pass 838 | args.output = "json" if not args.output else args.output 839 | 840 | if args.config_file: 841 | _rawconfig = open(args.config_file,"rt").read() 842 | for _k,_v in _config_regex.findall(_rawconfig): 843 | if _k == "disabled" and _v.lower().strip() in ( "1","yes","true"): ## wenn disabled dann ignorieren check wird nicht durchgeführt 844 | os._exit(0) 845 | if _k == "sourceonly": 846 | args.sourceonly = bool(_v.lower().strip() in ( "1","yes","true")) 847 | elif _k == "prefix": 848 | args.__dict__["prefix"] = _v.strip() 849 | elif not args.__dict__.get(_k.replace("-","_"),None): 850 | args.__dict__[_k.replace("-","_")] = _v.strip() 851 | 852 | try: 853 | if args.update: 854 | import requests 855 | import hashlib 856 | import base64 857 | from datetime import datetime 858 | import difflib 859 | from pkg_resources import parse_version 860 | _github_req = requests.get(f"https://api.github.com/repos/bashclub/check-zfs-replication/contents/checkzfs.py?ref={args.update}") 861 | if _github_req.status_code != 200: 862 | raise Exception("Github Error") 863 | _github_version = _github_req.json() 864 | _github_last_modified = datetime.strptime(_github_req.headers.get("last-modified"),"%a, %d %b %Y %X %Z") 865 | _new_script = base64.b64decode(_github_version.get("content")).decode("utf-8") 866 | _new_version = re.findall(r"^VERSION\s*=[\s\x22]*([0-9.]*)",_new_script,re.M) 867 | _new_version = _new_version[0] if _new_version else "0.0.0" 868 | _script_location = os.path.realpath(__file__) 869 | _current_last_modified = datetime.fromtimestamp(int(os.path.getmtime(_script_location))) 870 | with (open(_script_location,"rb")) as _f: 871 | _content = _f.read() 872 | _current_sha = hashlib.sha1(f"blob {len(_content)}\0".encode("utf-8") + _content).hexdigest() 873 | _content = _content.decode("utf-8") 874 | if type(VERSION) != str: 875 | VERSION = str(VERSION) 876 | if _current_sha == _github_version.get("sha"): 877 | print(f"allready up to date {_current_sha}") 878 | sys.exit(0) 879 | else: 880 | _version = parse_version(VERSION) 881 | _nversion = parse_version(_new_version) 882 | if _version == _nversion: 883 | print("same Version but checksums mismatch") 884 | elif _version > _nversion: 885 | print(f"ATTENTION: Downgrade from {VERSION} to {_new_version}") 886 | while True: 887 | try: 888 | _answer = input(f"Update {_script_location} to {_new_version} (y/n) or show difference (d)? ") 889 | except KeyboardInterrupt: 890 | print("") 891 | sys.exit(0) 892 | if _answer in ("Y","y","yes","j","J"): 893 | with open(_script_location,"wb") as _f: 894 | _f.write(_new_script.encode("utf-8")) 895 | 896 | print(f"updated to Version {_new_version}") 897 | break 898 | elif _answer in ("D","d"): 899 | for _line in difflib.unified_diff(_content.split("\n"), 900 | _new_script.split("\n"), 901 | fromfile=f"Version: {VERSION}", 902 | fromfiledate=_current_last_modified.isoformat(), 903 | tofile=f"Version: {_new_version}", 904 | tofiledate=_github_last_modified.isoformat(), 905 | n=0, 906 | lineterm=""): 907 | print(_line) 908 | else: 909 | break 910 | else: 911 | ZFSCHECK_OBJ = zfscheck(**args.__dict__) 912 | except KeyboardInterrupt: 913 | print("") 914 | sys.exit(0) 915 | except Exception as e: 916 | print(str(e), file=sys.stderr) 917 | if args.debug: 918 | raise 919 | sys.exit(1) 920 | 921 | --------------------------------------------------------------------------------