├── media ├── __init__.py ├── disk.py ├── s3.py └── base.py ├── util ├── __init__.py ├── global_def.py └── config.py ├── .gitignore ├── media_entry.py ├── example.json ├── LICENSE ├── README.md └── main.py /media/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.pyc 3 | .idea/* 4 | -------------------------------------------------------------------------------- /media_entry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from media.disk import Disk 5 | from media.s3 import S3, S3_HEAD 6 | 7 | 8 | def create_media(media_path, setting, args): 9 | assert media_path and type(media_path) is str and "" != media_path 10 | if 0 == media_path.find(S3_HEAD): 11 | return S3(media_path, setting, args) 12 | return Disk(media_path, setting, args) 13 | -------------------------------------------------------------------------------- /example.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "buffy_files", 3 | "dst": ["/my_backup", "s3://my_backup_bucket"], 4 | "src": 5 | { 6 | "root": "/my_file_dir", 7 | "file": ["password.txt", "email/"], 8 | "ext": ["jpg", "png"], 9 | "re": [".*/credential.+"], 10 | "exclude": 11 | { 12 | "re": ".*.DS_Store$" 13 | } 14 | }, 15 | "compress": "yes", 16 | "encoding": "no", 17 | "rpt": 18 | { 19 | "path": "/my_buffy_report", 20 | "detail": "yes" 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /util/global_def.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | 6 | 7 | def is_windows(): 8 | import platform 9 | system_name = platform.system() 10 | return "Windows" in system_name 11 | 12 | 13 | def get_delim(): 14 | import platform 15 | system_name = platform.system() 16 | if "Darwin" in system_name: 17 | return "/" 18 | elif "Linux" in system_name: 19 | return "/" 20 | elif "Windows" in system_name: 21 | return "\\" 22 | return "/" # default treats it an unix-like system 23 | 24 | DIR_DELIM = get_delim() 25 | 26 | RPT_WARN_ERR = "" 27 | 28 | 29 | def warning(msg): 30 | logging.warning(msg) 31 | global RPT_WARN_ERR 32 | RPT_WARN_ERR += (msg + "\n") 33 | 34 | 35 | def error(msg): 36 | logging.error(msg) 37 | global RPT_WARN_ERR 38 | RPT_WARN_ERR += (msg + "\n") 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Rodney Kan 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /media/disk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | from logging import info 6 | from media.base import MediaBase, mkdir_p 7 | from util.global_def import DIR_DELIM 8 | 9 | 10 | class Disk(MediaBase): 11 | def __init__(self, dst_root, setting, dry): 12 | super(Disk, self).__init__("disk", dst_root, setting, dry) 13 | self.cp_cmd = "cp " 14 | if self._dst_root[-1] != DIR_DELIM: 15 | self._dst_root += DIR_DELIM 16 | 17 | def exist(self): 18 | return os.path.exists(self._dst_root) 19 | 20 | def create_path(self): 21 | info("[disk] create path: %s" % self._dst_root) 22 | if not self.dry: 23 | os.mkdir(self._dst_root) 24 | 25 | def get_file_info_not_dry(self, filename): 26 | if self.dry or not os.path.exists(filename): 27 | return -1, "NA" 28 | from datetime import datetime 29 | timestamp = datetime.fromtimestamp(os.path.getmtime(filename)).strftime('%Y-%m-%d %H:%M:%S') 30 | return os.stat(filename).st_size, timestamp 31 | 32 | def copyfile(self, src, dst): 33 | if not self.dry: 34 | dirname, _ = os.path.split(dst) 35 | if not os.path.exists(dirname): 36 | mkdir_p(dirname) 37 | from shutil import copyfile 38 | copyfile(src, dst) 39 | size, _ = self.get_file_info_not_dry(dst) # do not return the value 'timestamp' 40 | valid = -1 != size 41 | return valid, size if valid else 0 42 | 43 | def backup_compress(self, sources): 44 | return MediaBase.backup_compress(self, sources) 45 | 46 | def backup_uncompress(self, sources): 47 | return MediaBase.backup_uncompress(self, sources, DIR_DELIM) 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BUFFY? What is it? 2 | BUFFY, stands for 'Back Up Files For You', is a command line tool to back up files. 3 | 4 | # How BUFFY makes your life better 5 | * Back up to local and remote spaces at one time 6 | * Very simple and straightforward to use, yet flexible 7 | * Easy monitoring backup status 8 | * Support all major platforms with python3.x, e.g., Windows, linux, Mac OS 9 | 10 | # How to use BUFFY? 11 | First, let’s back up a single file **my_file.txt** to local disk **/my_backup/** 12 | ``` 13 | buffy -src my_file.txt -dst /my_backup/ 14 | ``` 15 | Note: from now, `buffy` stands for `python3 /main.py` 16 | 17 | It is suggested to back up both locally and remotely, e.g., local disk and Amazon S3 18 | ``` 19 | buffy -src my_file_dir -dst /my_backup/ -dst s3://my_backup_bucket 20 | ``` 21 | Note: to back up to s3, awscli must be installed 22 | 23 | It could be bad due to various aspects of expense, if we back up beyond requirement. 24 | BUFFY provides a set of simple yet flexible functions to specify the backup target. 25 | For example, you can tell BUFFY to back up file: 26 | * with certain extension name 27 | * matches with certain regular expression 28 | * matches with certain ‘dynamic pattern' 29 | 30 | Besides, form an excluded file list for backup is also possible. 31 | 32 | This is accomplished by giving a json configuration file to BUFFY. 33 | ``` 34 | buffy -c example.json 35 | ``` 36 | 37 | In example.json, we have 38 | ``` 39 | "src": 40 | { 41 | "root": "/my_file_dir", <== the base directory of backup source 42 | "file": ["password.txt", "email/"], <== file, or directory can be specified 43 | "ext": ["jpg", "png"], <== back up the pictures 44 | "re": [".*/credential.+"], <== back up file basename ends with 'credential' 45 | "exclude": 46 | { 47 | "re": ".*.DS_Store$" <== not back up the Mac OS system file '.DS_Store' 48 | } 49 | }, 50 | ``` 51 | 52 | We haven't talked about the usage of 'dynamic pattern', right? It is the most powerful though fallable function. So one must use it with caution. Basically, it performs runtime evaluation to decide regular expressions. For example, 53 | 54 | ``` 55 | "src": 56 | { 57 | ... 58 | "dyn": ["masterpiece_$dyn$", "datetime", "str(datetime.date.today())"], 59 | ... 60 | }, 61 | ``` 62 | 63 | Take a look at the value of 'dyn': the 1st entry is original regular expression, such that **$dyn$** is a keyword, which will be replaced by the runtime evaluation outcome of the 3rd entry. Before that, the 2nd entry will be 'imported' (leave it empty if 'import' is not needed). For instance, file named 'masterpiece_2013-06-14' will be backed up, on the date the highly praised work 'The Last of Us' on PS3 is released, which is 2013/06/14. 64 | 65 | # Command line usage 66 | ``` 67 | usage: buffy [-h] [-src SRC] [-dst DST] [-n NAME] [-e] [-cmp] [-r RPT] [-v] 68 | [-s] [-d] [-c CONFIG_FILE] 69 | 70 | BUFFY --- Back Up Files For You 71 | 72 | optional arguments: 73 | -h, --help show this help message and exit 74 | -src SRC backup source 75 | -dst DST backup destination 76 | -n NAME, --name NAME backup name 77 | -e, --encoding name encoding with date (default: False) 78 | -cmp, --compress compress backup files (default: False) 79 | -r RPT, --report RPT report path 80 | -v, --verbose verbose mode 81 | -s, --silent silent mode 82 | -d, --dry_run perform a dry run 83 | -c CONFIG_FILE, --config CONFIG_FILE 84 | config file (this option overwrites others) 85 | ``` 86 | 87 | # Future direction of BUFFY 88 | Support more remote backup media, e.g., dropbox 89 | Provide incrmental backup optionally 90 | 91 | # Contact 92 | Please contact *Rodney Kan* by its_right@msn.com for any question/request/bug without hesitation. 93 | -------------------------------------------------------------------------------- /media/s3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | from logging import info 6 | from subprocess import Popen, PIPE, STDOUT 7 | from media.base import MediaBase 8 | from util.global_def import warning, error, is_windows, DIR_DELIM 9 | 10 | """ 11 | following information is from 'aws s3 help' 12 | s3 terminology: 13 | bucket: the unique s3 identifier 14 | prefix: the directory name 15 | object: the file basename 16 | s3://bucket/....prefix.../object 17 | 18 | The path argument must begin with s3:// in order to denote that the path argument refers to a S3 object 19 | """ 20 | 21 | S3_HEAD = "s3://" 22 | S3_DELIM = "/" 23 | 24 | 25 | def pp_popen_out(out_str): 26 | return str(out_str).replace("b'", '').replace("\\n'", '') 27 | 28 | 29 | def locate_abs_exec(program): # 'program' can be an absolute path name, or just a basename 30 | def is_exe(fpath): 31 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 32 | fpath, fname = os.path.split(program) 33 | if fpath: 34 | if is_exe(program): 35 | return program 36 | else: 37 | for path in os.environ["PATH"].split(os.pathsep): 38 | path = path.strip('"') 39 | exe_file = os.path.join(path, program) 40 | if is_exe(exe_file): 41 | return exe_file 42 | if not is_windows(): 43 | # try 'type' (note: mac os may need this) 44 | type_cmd = Popen(["type", program], stdout=PIPE, stderr=STDOUT) 45 | stdout_data, _ = type_cmd.communicate() 46 | out = pp_popen_out(stdout_data).replace("%s is " % program, '') 47 | if is_exe(out): 48 | return out 49 | return None 50 | 51 | 52 | def get_aws_path(): 53 | return locate_abs_exec("aws.exe" if is_windows() else "aws") 54 | 55 | 56 | class S3(MediaBase): 57 | def __init__(self, dst_root, setting, args): 58 | assert 0 == dst_root.find(S3_HEAD) 59 | super(S3, self).__init__("s3", dst_root, setting, args) 60 | if self._dst_root[-1] != S3_DELIM: 61 | self._dst_root += S3_DELIM 62 | aws_path = get_aws_path() 63 | if not aws_path: 64 | error("[s3] cannot locate aws") 65 | self.okay = False 66 | return 67 | self.aws = aws_path 68 | self.cp_cmd = self.aws + " s3 cp " # use for logging 69 | wo_head_path = self._dst_root[len(S3_HEAD):] 70 | end_bucket = wo_head_path.find(S3_DELIM) 71 | bucket = wo_head_path if -1 == end_bucket else wo_head_path[:end_bucket] 72 | info("[s3] checking bucket '%s' existence..." % bucket) 73 | cmd_list = [self.aws, "s3", "ls", bucket] 74 | stdout_data, _ = Popen(cmd_list, stdout=PIPE, stderr=STDOUT).communicate() 75 | res = pp_popen_out(stdout_data) 76 | # we assume 'aws ls' always gives a newline (platform dependent) for its stdout when error occurs 77 | self.okay = 0 != res.find('\r\n' if is_windows() else "\\n") 78 | if not self.okay: 79 | warning("[s3] fail to locate bucket '%s'" % bucket) 80 | 81 | def create_path(self): 82 | pass 83 | 84 | def get_file_info_not_dry(self, filename): 85 | if self.dry: 86 | return -1, "NA" 87 | cmd_list = [self.aws, "s3", "ls", filename] 88 | stdout_data, _ = Popen(cmd_list, stdout=PIPE, stderr=STDOUT).communicate() 89 | output_lines = [line.strip() for line in stdout_data.splitlines()] 90 | for line in output_lines: 91 | ls_out_list = pp_popen_out(line).split() 92 | if not 4 == len(ls_out_list): 93 | warning("[s3] ls %s gives unexpected result" % filename) 94 | warning(line) 95 | continue 96 | [day, time, size, _] = ls_out_list 97 | return int(size), day + " " + time 98 | return -1, "NA" 99 | 100 | def copyfile(self, src, dst): 101 | if DIR_DELIM != S3_DELIM: 102 | dst = dst.replace(DIR_DELIM, S3_DELIM) 103 | cmd_list = [self.aws, "s3", "cp", src, dst] 104 | if not self.dry: 105 | stdout_data, _ = Popen(cmd_list, stdout=PIPE, stderr=STDOUT).communicate() 106 | # Note: 107 | # fetch size upon copy file is majorly for uncompress backup 108 | # for compress backup, it is wasted action but shall be affordable (for there's only one file copy) 109 | size, _ = self.get_file_info_not_dry(dst) # do not use the value 'timestamp' 110 | valid = -1 != size 111 | return valid, size if valid else 0 112 | 113 | def backup_uncompress(self, sources): 114 | return MediaBase.backup_uncompress(self, sources, S3_DELIM) 115 | 116 | def back_up(self, sources): 117 | if not self.okay: 118 | warning("[s3] skip to back up to destination '%s'" % self._dst_root) 119 | return 120 | return MediaBase.back_up(self, sources) 121 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import logging 6 | from argparse import ArgumentParser 7 | from logging import info, INFO, DEBUG, WARNING 8 | from media_entry import create_media 9 | from media.base import MediaBase, disk_write 10 | from util.config import Config, DEFAULT_COMPRESS, DEFAULT_ENCODING 11 | 12 | 13 | class BUFFY(object): 14 | def __init__(self): 15 | args = ArgumentParser(description='BUFFY --- Back Up Files For You') 16 | # simple flow options 17 | args.add_argument("-src", dest="src", default=None, help="backup source") 18 | args.add_argument("-dst", dest="dst", action='append', default=None, help="backup destination") 19 | args.add_argument("-n", "--name", dest="name", default=None, help="backup name") 20 | args.add_argument("-e", "--encoding", dest="encoding", action="store_const", const=True, 21 | help="name encoding with date (default: %s)" % DEFAULT_ENCODING) 22 | args.add_argument("-cmp", "--compress", dest="compress", action="store_const", const=True, 23 | help="compress backup files (default: %s)" % DEFAULT_COMPRESS) 24 | args.add_argument("-r", "--report", dest="rpt", default=None, help="report path") 25 | simple_flow_args = ["src", "dst", "name", "compress", "encoding", "rpt"] 26 | # general options 27 | args.add_argument("-v", "--verbose", dest="verbose", action="store_const", const=DEBUG, help="verbose mode") 28 | args.add_argument("-s", "--silent", dest="silent", action="store_const", const=WARNING, help="silent mode") 29 | args.add_argument("-d", "--dry_run", dest="dry", action="store_const", const=True, help="perform a dry run") 30 | # normal flow option 31 | args.add_argument("-c", "--config", dest="config_file", default=None, 32 | help="config file (this option overwrites others)") 33 | self.args = args.parse_args() 34 | if not self.args.config_file and (not self.args.src or not self.args.dst): 35 | args.print_help() 36 | BUFFY.print_information() 37 | sys.exit() 38 | log_level = self.args.verbose if self.args.verbose else self.args.silent if self.args.silent else INFO 39 | logging.basicConfig(format='', level=log_level) 40 | if self.args.config_file: 41 | for arg in vars(self.args): 42 | value = getattr(self.args, arg) 43 | if None is not value and arg in simple_flow_args: 44 | info("option value '%s = %s' has no effect" % (arg, value)) 45 | self.config = Config(self.args.config_file) 46 | else: 47 | compress = DEFAULT_COMPRESS if None is self.args.compress else self.args.compress 48 | encoding = DEFAULT_ENCODING if None is self.args.encoding else self.args.encoding 49 | self.config = Config(src=self.args.src, dst=self.args.dst, name=self.args.name, 50 | compress=compress, encoding=encoding, rpt=self.args.rpt) 51 | 52 | def run(self): 53 | if self.args.dry: 54 | info("[BUFFY] perform a dry run") 55 | info("[BUFFY] start back up...") 56 | sources = self.config.src.get_sources() 57 | if not sources: 58 | info("[BUFFY] no sources to back up") 59 | return 60 | if self.args.verbose: 61 | print("[BUFFY] backup target:") 62 | for source in sources: 63 | print(source) 64 | rpt_content = "" 65 | for dst in self.config.dst: 66 | media = create_media(dst, self.config, self.args) 67 | if not media.exist(): 68 | media.create_path() 69 | backup_report = media.back_up(sources) 70 | rpt_content += "destination: %s\n\t%s\n" % (dst, backup_report) 71 | if not self.args.dry: 72 | self.report(BUFFY.get_source_digest(self.config.src.root, sources) + rpt_content[0: len(rpt_content) - 1]) 73 | 74 | def report(self, content): 75 | info(content) 76 | if self.config.rpt.path: 77 | encoding_str = MediaBase.get_encoding(self.config.encoding) 78 | name = self.config.name 79 | backup_name = encoding_str if not name else name + ("_" + encoding_str if len(encoding_str) > 0 else "") 80 | report_file = self.config.rpt.path + ("BUFFY" if "" == backup_name else backup_name) + ".log" 81 | from util.global_def import RPT_WARN_ERR 82 | disk_write(report_file, RPT_WARN_ERR + content + "\n") 83 | 84 | @staticmethod 85 | def print_information(): 86 | print("\nexample:") 87 | print(" buffy -src /data_dir -dst /backup_dir -dst s3://backup_bucket") 88 | print(" buffy -c example.json") 89 | print("\ncheck https://github.com/r-kan/BUFFY for updates and more information!") 90 | 91 | @staticmethod 92 | def get_source_digest(root, sources): 93 | import os 94 | total_size = sum([os.stat(src).st_size for src in sources]) 95 | return "source : %s\n\tfile count: %i, size: %i\n" % (root, len(sources), total_size) 96 | 97 | 98 | if __name__ == '__main__': 99 | BUFFY().run() 100 | -------------------------------------------------------------------------------- /media/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import collections 6 | from logging import info 7 | from util.global_def import DIR_DELIM 8 | 9 | 10 | def get_temp_dir(): 11 | import tempfile 12 | return tempfile.gettempdir() 13 | 14 | TMP_DIR = "%(tmp_dir)s%(delim)s" % {'tmp_dir': get_temp_dir(), 'delim': DIR_DELIM} 15 | 16 | DEFAULT_NAME = "BUFFY" 17 | 18 | 19 | def mkdir_p(path): 20 | try: 21 | os.makedirs(path) 22 | except OSError as exc: 23 | import errno 24 | if exc.errno != errno.EEXIST or not os.path.isdir(path): 25 | raise 26 | 27 | 28 | def disk_write(filename, content, dry_run=False): 29 | if dry_run: 30 | return 31 | dirname, _ = os.path.split(filename) 32 | if not os.path.exists(dirname): 33 | mkdir_p(dirname) 34 | with open(filename, 'w') as fd: 35 | fd.write(content) 36 | 37 | 38 | class MediaBase(object): 39 | def __init__(self, media_name, dst_root, setting, args): 40 | self.media_name = media_name 41 | self._dst_root = dst_root 42 | self.root = setting.src.root 43 | self.name = setting.name 44 | self.compress = setting.compress 45 | self.encoding_str = MediaBase.get_encoding(setting.encoding) 46 | self.dry = args.dry 47 | self.report_path = setting.rpt.path 48 | self.detail_report = setting.rpt.detail 49 | 50 | def exist(self): 51 | return False 52 | 53 | def create_path(self): 54 | info("[media] '%s' not support create path: %s" % (self.media_name, self._dst_root)) 55 | 56 | def report(self, basename, content): 57 | if not self.report_path or not self.detail_report: 58 | return 59 | disk_write(self.report_path + basename, content, self.dry) 60 | 61 | def backup_compress(self, sources): 62 | is_disk = "disk" == self.media_name 63 | backup_name = self.encoding_str if not self.name else \ 64 | self.name + ("_" + self.encoding_str if len(self.encoding_str) > 0 else "") 65 | if "" == backup_name: 66 | backup_name = DEFAULT_NAME 67 | dst_base = self._dst_root + backup_name 68 | non_disk_dst_base = (self.report_path if self.report_path else TMP_DIR) + backup_name 69 | tar_input_file = non_disk_dst_base + ".list" 70 | targz_file = "%s.tar.gz" % (dst_base if is_disk else non_disk_dst_base) 71 | if not self.dry: 72 | os.chdir(self.root) 73 | import tarfile 74 | with tarfile.open("%s" % targz_file, "w|gz") as tar: 75 | for src in sources: 76 | tar.add(src.replace(self.root, "")) 77 | if self.detail_report: 78 | src_list_content = "" 79 | for src in sources: 80 | src_list_content += (src.replace(self.root, "") + "\n") 81 | disk_write(tar_input_file, src_list_content, self.dry) 82 | if not is_disk: 83 | self.copyfile(targz_file, "%s.tar.gz" % dst_base) 84 | if not self.dry: 85 | os.remove(targz_file) 86 | compress_cmd = "tar zcvf %(dst)s -T %(src_list)s" % {'dst': targz_file, 'src_list': tar_input_file} 87 | reproduce_str = "cd %s\n%s\n" % (self.root, compress_cmd) + \ 88 | ("" if is_disk else "%s%s %s\n" % (self.cp_cmd, targz_file, "%s.tar.gz" % dst_base)) 89 | size, timestamp = self.get_file_info_not_dry("%s.tar.gz" % dst_base) 90 | reproduce_file = backup_name + "_" + self.media_name + ".cmd" 91 | return "%s.tar.gz, size: %i, timestamp: %s" % (backup_name, size, timestamp), \ 92 | reproduce_file, reproduce_str 93 | 94 | def backup_uncompress(self, sources, delim): 95 | backup_map = collections.OrderedDict() 96 | backup_name = self.encoding_str if not self.name else \ 97 | self.name + ("_" + self.encoding_str if len(self.encoding_str) > 0 else "") 98 | dst_base = self._dst_root + backup_name + (delim if len(backup_name) > 0 else "") 99 | backuped_count = 0 100 | backuped_size = 0 101 | for src in sources: 102 | dirname, _ = os.path.split(src) 103 | dst_dir = (dirname + delim).replace(self.root, '') 104 | dst = dst_base + dst_dir + os.path.basename(src) 105 | backup_map[src] = dst 106 | assert not os.path.isdir(src) 107 | backuped, size = self.copyfile(src, dst) 108 | backuped_count += backuped 109 | backuped_size += size 110 | reproduce_file = (DEFAULT_NAME if "" == backup_name else backup_name) + "_" + self.media_name + ".cmd" 111 | return "file count: %i, size: %i" % (backuped_count, backuped_size), \ 112 | reproduce_file, self.get_reproduce_str(backup_map) 113 | 114 | def back_up(self, sources): 115 | info("[%s] back up to dst: %s" % (self.media_name, self._dst_root)) 116 | backup_ftor = self.backup_compress if self.compress else self.backup_uncompress 117 | backup_report, reproduce_file, reproduce_str = backup_ftor(sources) 118 | reproduce_str_max_length = 1024 * 1024 # 1MB 119 | if len(reproduce_str) > reproduce_str_max_length: 120 | message = "skip dump subsequent content for the total size (%s) is too large" % len(reproduce_str) 121 | info("[info] %s" % message) 122 | reproduce_str = reproduce_str[:reproduce_str_max_length] + "\n...\n" + message 123 | self.report(reproduce_file, reproduce_str) 124 | return backup_report 125 | 126 | @staticmethod 127 | def get_encoding(enable): 128 | if not enable: 129 | return "" 130 | from datetime import date 131 | return str(date.today()).replace("-", "") 132 | 133 | def get_reproduce_str(self, mapping): 134 | reproduce_str = "cd %s\n" % self.root 135 | for src_to_dst in mapping: 136 | reproduce_str += (self.cp_cmd + src_to_dst.replace(self.root, "") + " " + mapping[src_to_dst] + "\n") 137 | return reproduce_str 138 | -------------------------------------------------------------------------------- /util/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import re 5 | import os 6 | import sys 7 | from logging import debug, info 8 | from media.s3 import S3_HEAD 9 | from util.global_def import DIR_DELIM, warning, error 10 | 11 | 12 | def show_list(list_entry, name): 13 | if not list_entry: 14 | return 15 | if 1 == len(list_entry): 16 | debug("%s: %s" % (name, list_entry[0])) 17 | else: 18 | debug("%s:" % name) 19 | for item in list_entry: 20 | debug("\t%s" % item) 21 | 22 | 23 | ROOT_KEY = "root" 24 | FILE_KEY = "file" 25 | EXT_KEY = "ext" 26 | RE_KEY = "re" 27 | DYNAMIC_KEY = "dyn" 28 | EXCLUDE_KEY = "exclude" 29 | 30 | 31 | class Source(object): 32 | def __init__(self, data, root="", is_exclude=False): 33 | self.is_exclude = is_exclude 34 | if not type(data) in [str, dict]: 35 | error("[config] entry 'src' shall contain 'str' or 'dict' value instead of %s, program exit..." 36 | % type(data)) 37 | sys.exit() 38 | simple_spec = type(data) is str 39 | 40 | self.root = data[ROOT_KEY] if not simple_spec and ROOT_KEY in data else root 41 | assert type(self.root) is str 42 | 43 | # file: specify files by give accurate filename/dirname 44 | file_or_dir = data if simple_spec else data[FILE_KEY] if FILE_KEY in data else None 45 | assert not file_or_dir or type(file_or_dir) in [str, list] 46 | self.file_or_dir = file_or_dir if not file_or_dir or type(file_or_dir) is list else [file_or_dir] 47 | # ext: specify files by extension name 48 | ext = data[EXT_KEY] if not simple_spec and EXT_KEY in data else None 49 | assert not ext or type(ext) in [str, list] 50 | self.ext = ext if not ext or type(ext) is list else [ext] 51 | # re: specify files by regular expression matching 52 | re_data = data[RE_KEY] if not simple_spec and RE_KEY in data else None 53 | assert not re_data or type(re_data) in [str, list] 54 | self.re = re_data if not re_data or type(re_data) is list else [re_data] 55 | # dyn: specify files by re + custom code snippets 56 | dynamic = data[DYNAMIC_KEY] if not simple_spec and DYNAMIC_KEY in data else None 57 | assert not dynamic or type(dynamic) is list 58 | # dynamic shall be either a dyn-item(re-str, import-str, eval-str) list, or a list of dyn-items 59 | assert not dynamic or 0 == len(dynamic) or \ 60 | (type(dynamic[0]) is list or (type(dynamic[0]) is str and len(dynamic) == 3)) 61 | self.dynamic = dynamic if not dynamic or type(dynamic[0]) is list else [dynamic] 62 | 63 | assert self.file_or_dir or self.ext or self.re or self.dynamic 64 | 65 | if "" == self.root and self.file_or_dir and len(self.file_or_dir) == 1: 66 | dirname, basename = os.path.split(self.file_or_dir[0]) 67 | self.root = dirname 68 | if len(basename): 69 | self.file_or_dir = [basename] 70 | else: 71 | self.file_or_dir = None 72 | self.re = [".*"] 73 | 74 | if "" is not self.root and not self.is_exclude: 75 | debug("root: %s" % self.root) 76 | 77 | self.show_sources() 78 | if len(self.root) > 0 and self.root[-1] != DIR_DELIM: 79 | self.root += DIR_DELIM 80 | 81 | # exclude: sources that need not backup (kept by a child 'Source' instance) 82 | assert not self.is_exclude or EXCLUDE_KEY not in data # nested 'exclude' entry is not supported 83 | self.exclude = Source(data[EXCLUDE_KEY], self.root, True) if EXCLUDE_KEY in data else None 84 | 85 | def show_sources(self): 86 | prefix = "exclude " if self.is_exclude else "" 87 | show_list(self.file_or_dir, prefix + "file") 88 | show_list(self.ext, prefix + "ext") 89 | show_list(self.re, prefix + "re") 90 | show_list(self.dynamic, prefix + "dyn") 91 | 92 | @staticmethod 93 | def get_dir_files(dirname): 94 | assert os.path.isdir(dirname) 95 | ret = [] 96 | for root, _, files in os.walk(dirname): 97 | assert len(root) >= 1 98 | if root[-1] != DIR_DELIM: 99 | root += DIR_DELIM 100 | ret += [root + file for file in files] 101 | return ret 102 | 103 | @staticmethod 104 | def get_files(file_or_dir): 105 | return Source.get_dir_files(file_or_dir) if os.path.isdir(file_or_dir) else [file_or_dir] 106 | 107 | @staticmethod 108 | def get_re_files(root, raw_patterns): 109 | patterns = [re.compile(root + item) for item in raw_patterns] 110 | sources = [] 111 | for root, dirs, files in os.walk(root): 112 | assert len(root) >= 1 113 | if root[-1] != DIR_DELIM: 114 | root += DIR_DELIM 115 | for src in (files + dirs): 116 | file_or_dir = root + src 117 | for pattern in patterns: 118 | if re.match(pattern, file_or_dir): 119 | sources += Source.get_files(file_or_dir) 120 | return sources 121 | 122 | def get_sources(self): 123 | sources = [] 124 | if self.file_or_dir: 125 | for file_or_dir in self.file_or_dir: 126 | src = self.root + file_or_dir 127 | if not os.path.exists(src): 128 | warning("[config] the specified source '%s' does not exist" % src) 129 | continue 130 | sources += Source.get_files(src) 131 | 132 | if self.ext or self.re or self.dynamic: 133 | assert "" != self.root 134 | 135 | if self.ext: 136 | for root, _, files in os.walk(self.root): 137 | assert len(root) >= 1 138 | if root[-1] != DIR_DELIM: 139 | root += DIR_DELIM 140 | for file in files: 141 | basename, ext = os.path.splitext(file) 142 | if ext.replace(".", "") in self.ext: 143 | sources.append(root + file) 144 | if self.re: 145 | sources += Source.get_re_files(self.root, self.re) 146 | 147 | if self.dynamic: 148 | patterns = [] 149 | for dyn_item in self.dynamic: 150 | [re_str, import_str, eval_str] = dyn_item 151 | dynamic_alias = "$dyn$" 152 | if dynamic_alias not in re_str: 153 | warning("[config] '%s' does not appear in '%s', dynamic filename mechanism will not apply" 154 | % (dynamic_alias, re_str)) 155 | if "" != import_str: 156 | exec("import %s" % import_str) 157 | dyn_str = eval(eval_str) 158 | patterns.append(re_str.replace(dynamic_alias, dyn_str)) 159 | sources += Source.get_re_files(self.root, patterns) 160 | 161 | exclude_sources = self.exclude.get_sources() if self.exclude else [] 162 | # 'set' to remove duplication 163 | return sorted([src for src in list(set(sources)) if src not in exclude_sources and not os.path.islink(src)]) 164 | 165 | 166 | def get_bool_value(data, key, default_value): 167 | return True if key in data and data[key] in ["yes", "y"] else default_value 168 | 169 | 170 | PATH_KEY = "path" 171 | DETAIL_KEY = "detail" 172 | 173 | DEFAULT_DETAIL = False 174 | 175 | 176 | class Report(object): 177 | def __init__(self, data): 178 | self.path = None 179 | self.detail = DEFAULT_DETAIL 180 | if not data: 181 | return 182 | if not type(data) in [str, dict]: 183 | error("[config] entry 'rpt' shall contain 'str' or 'dict' value instead of %s" % type(data)) 184 | return 185 | path = data[PATH_KEY] if type(data) is not str and PATH_KEY in data else data 186 | if "" == path: 187 | return 188 | assert type(path) is str and "" != path 189 | if 0 == path.find(S3_HEAD): 190 | info("[config] report to aws s3 (%s) is not supported" % path) 191 | return 192 | if path[-1] != DIR_DELIM: 193 | path += DIR_DELIM 194 | self.path = path 195 | self.detail = get_bool_value(data, DETAIL_KEY, self.detail) 196 | debug("report path: %s" % self.path) 197 | debug("report detail: %s" % ("yes" if self.detail else "no")) 198 | 199 | 200 | NAME_KEY = "name" 201 | DST_KEY = "dst" # destination 202 | SRC_KEY = "src" # source 203 | RPT_KEY = "rpt" # report 204 | COMPRESS_KEY = "compress" 205 | ENCODING_KEY = "encoding" 206 | 207 | 208 | DEFAULT_COMPRESS = False 209 | DEFAULT_ENCODING = False 210 | 211 | 212 | class Config(object): 213 | 214 | def __init__(self, config_file=None, src=None, dst=None, name=None, compress=None, encoding=None, rpt=None): 215 | data = None 216 | if config_file: 217 | if not os.path.exists(config_file): 218 | error("[BUFFY] config file \"%s\" does not exist, program exit..." % config_file) 219 | sys.exit() 220 | info("[BUFFY] reading config file \"%s\"..." % config_file) 221 | with open(config_file) as config_fp: 222 | import json 223 | data = json.load(config_fp) 224 | 225 | if not dst and DST_KEY not in data: 226 | error("[config] no \'dst\' specified, program exit...") 227 | sys.exit() 228 | dst = data[DST_KEY] if not dst else dst 229 | if not type(dst) in [str, list]: 230 | error("[config] entry 'src' shall contain 'str' or 'list' value instead of %s, program exit..." 231 | % type(dst)) 232 | sys.exit() 233 | 234 | if not src and SRC_KEY not in data: 235 | error("[config] no \'src\' specified, program exit...") 236 | sys.exit() 237 | 238 | self.dst = [dst] if type(dst) is str else dst 239 | self.name = name if name else data[NAME_KEY] if data and NAME_KEY in data else "" 240 | assert type(self.name) is str 241 | self.compress = compress if None is not compress else get_bool_value(data, COMPRESS_KEY, DEFAULT_COMPRESS) 242 | self.encoding = encoding if None is not encoding else get_bool_value(data, ENCODING_KEY, DEFAULT_ENCODING) 243 | 244 | debug("------------------------") 245 | if "" != self.name: 246 | debug("name: %s" % self.name) 247 | show_list(self.dst, "dst") 248 | self.src = Source(src if src else data[SRC_KEY]) 249 | debug("compress: %s" % ("yes" if self.compress else "no")) 250 | debug("encoding: %s" % ("yes" if self.encoding else "no")) 251 | self.rpt = Report(rpt if rpt else data[RPT_KEY] if data and RPT_KEY in data else None) 252 | debug("------------------------") 253 | --------------------------------------------------------------------------------