├── .gitignore ├── LICENSE ├── README.md ├── README_cn.md ├── bdchecker ├── __init__.py ├── __version__.py ├── checker.py ├── command │ ├── check.py │ ├── clean.py │ └── gen.py ├── main.py └── utils │ ├── __init__.py │ ├── const_var.py │ ├── log_handle.py │ └── singleton.py ├── etc └── settings.xml ├── pyinstaller_pkg.bat ├── pyinstaller_pkg.sh ├── pypi_build.sh ├── pyproject.toml ├── requirements-dev.txt └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | ######### editor & tools ######### 2 | # vim 3 | *~ 4 | /compile_commands.json 5 | *.swp 6 | doc/compile_commands.json 7 | 8 | # vscode 9 | .vscode/ 10 | 11 | # clangd 12 | .clangd/ 13 | .cache/ 14 | 15 | # clang-format 16 | .clang-format 17 | 18 | # pycharm 19 | .idea 20 | 21 | ######### build & generated packages ######### 22 | _hpb/ 23 | build/ 24 | _packages/ 25 | _artifacts/ 26 | 27 | ######### python ######### 28 | # Byte-compiled / optimized / DLL files 29 | __pycache__/ 30 | *.py[cod] 31 | *$py.class 32 | 33 | # C extensions 34 | *.so 35 | 36 | # Distribution / packaging 37 | .Python 38 | build/ 39 | develop-eggs/ 40 | dist/ 41 | downloads/ 42 | eggs/ 43 | .eggs/ 44 | lib/ 45 | lib64/ 46 | parts/ 47 | sdist/ 48 | var/ 49 | wheels/ 50 | pip-wheel-metadata/ 51 | share/python-wheels/ 52 | *.egg-info/ 53 | .installed.cfg 54 | *.egg 55 | MANIFEST 56 | 57 | # PyInstaller 58 | # Usually these files are written by a python script from a template 59 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 60 | *.manifest 61 | *.spec 62 | 63 | # Installer logs 64 | pip-log.txt 65 | pip-delete-this-directory.txt 66 | 67 | # Unit test / coverage reports 68 | htmlcov/ 69 | .tox/ 70 | .nox/ 71 | .coverage 72 | .coverage.* 73 | .cache 74 | nosetests.xml 75 | coverage.xml 76 | *.cover 77 | .hypothesis/ 78 | .pytest_cache/ 79 | 80 | # Translations 81 | *.mo 82 | *.pot 83 | 84 | # Django stuff: 85 | *.log 86 | local_settings.py 87 | db.sqlite3 88 | 89 | # Flask stuff: 90 | instance/ 91 | .webassets-cache 92 | 93 | # Scrapy stuff: 94 | .scrapy 95 | 96 | # Sphinx documentation 97 | docs/_build/ 98 | 99 | # PyBuilder 100 | target/ 101 | 102 | # Jupyter Notebook 103 | .ipynb_checkpoints 104 | 105 | # IPython 106 | profile_default/ 107 | ipython_config.py 108 | 109 | # pyenv 110 | .python-version 111 | 112 | # pipenv 113 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 114 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 115 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 116 | # install all needed dependencies. 117 | #Pipfile.lock 118 | 119 | # celery beat schedule file 120 | celerybeat-schedule 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2024 Muggle Wei 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | * [readme 中文](./README_cn.md) 2 | * [readme EN](./README.md) 3 | 4 | ## Overview 5 | bdchecker (**B**ackup **D**ata Checker) is a tool for checking personal cold backup data, helping you discover data corruption in time 6 | 7 | ## Why use it 8 | Imagine that we have some data that needs to be cold backup, which may be the raw market data of a certain financial market that is compressed every day; or some electronic versions of classic movies owned by individuals; or some keys that are not used all year round; Let’s first list some options: 9 | 10 | | Storage plan | Storage life span | 11 | | ---- | ---- | 12 | | SSD | several years to more than ten years | 13 | | HDD | 10+ years | 14 | | tape drive | 30+ years | 15 | | punched paper | thousand of years | 16 | | Carved in stone (Luo Ji raised his crutch above his head and shouted solemnly) | millions of years | 17 | 18 | There is no doubt that if you have enough financial resources to engrave the information on stone and store it properly, it should be very safe unless you are attacked by a dual-vector foil attack; but for individuals, the cost of reading information from stones should be far greater than the value of the data we need to save. 19 | So when considering the ease of reading and writing of data, it is obvious that the hard disk is the most convenient; but this brings additional requirements, that is, we need to regularly check whether the data is corupted. 20 | To sum up, a simple and clear cold backup solution is 21 | 1. Use hard drives from different production batches for multiple backups 22 | 2. Regularly check whether files are corupted 23 | 3. If corupted occurs, the remaining backup files can be used to repair it. 24 | 25 | **bdchecke** is a small tool used to `regularly check whether files are corupted` 26 | 27 | ## Install 28 | * use pip 29 | ``` 30 | pip install bdchecker 31 | ``` 32 | * download from project's [Releases](https://github.com/MuggleWei/bdchecker/releases), and decompress 33 | 34 | ## Usage 35 | **bdchecker** include 3 sub-command 36 | * gen: scan directory, recursively traverse to generate the hash information of all **new** files in the directory, and place them in the `.bdchecker.meta` folder. 37 | * clean: scan directory, clean deleted files from hash information 38 | * check: scan directory, Find corrupted files (note that this operation will calculate the hash value of all files, which is more time-consuming) 39 | 40 | ### Example directory 41 | Assume that we currently have the following directory structure 42 | ``` 43 | data 44 | ├──── a.txt 45 | ├──── b.txt 46 | └──── c 47 | ├──── c1.txt 48 | └──── c2.txt 49 | ``` 50 | 51 | ### Command: gen 52 | Generate hash infos 53 | ``` 54 | bdchecker gen -d data -v 1 55 | ``` 56 | * `-d`: directory for which information needs to be generated 57 | * `-v`: verbose level 58 | 59 | After missiong completed, you can see console output: `dump meta info to data/.bdchecker.meta/sha256.csv` 60 | When there are no new files in the directory, repeatedly executing the `gen` command will not actually generate the hash information of the file. 61 | 62 | ### Command: clean 63 | remove `data/c/c2.txt`, then run 64 | ``` 65 | bdchecker clean -d data -v 1 66 | ``` 67 | You can see in the last few lines of the log: `clean missing file's meta info: c/c2.txt`, which means that we have successfully cleaned the hash information corresponding to the file. 68 | 69 | ### Command: check 70 | run 71 | ``` 72 | bdchecker check -d data -v 1 73 | ``` 74 | The last line of the log appears: `all check pass`, which means there are no new/deleted files and all files are not corrupted. 75 | 76 | Now, let's modify `a.text`, write something randomly, and then run again 77 | ``` 78 | bdchecker check -d data -v 1 79 | ``` 80 | At this time, an error message appears in the log: `check failed: a.txt, old hash: ..., cur hash: ...`, indicating that the content of `a.txt` has changed. 81 | 82 | ### Migration and comparison 83 | The hash information generated by `bdchecker` will be saved in the `.bdchecker.meta` in the directory, so you can directly migrate the entire folder during migration. 84 | When there are already multiple backup data and no hash value has been generated; at this time, you can use the `bdchecker gen` command to generate a hash value for each backup data, and then compare the two files. Since the generated file lines are already sorted, so you can directly use commands such as `diff` for comparison. 85 | -------------------------------------------------------------------------------- /README_cn.md: -------------------------------------------------------------------------------- 1 | ## bdchecker 2 | 3 | * [readme 中文](./README_cn.md) 4 | * [readme EN](./README.md) 5 | 6 | ## 概述 7 | bdchecker (**B**ackup **D**ata Checker) 是用于个人冷备数据检查的工具, 帮助你及时发现数据的损坏 8 | 9 | ## 为什么需要它 10 | 想象一下, 我们有一些数据需要进行冷备, 可能是每天压缩后的某个金融市场的原始行情数据; 或是一些个人拥有的经典电影的电子版本; 抑或是一些常年用不到的密钥;我们先列出一些方案: 11 | 12 | | 方案 | 单个存储期限级别 | 13 | | ---- | ---- | 14 | | 固态盘 | 几年至十几年 | 15 | | 机械盘 | 10年+ | 16 | | 磁带 | 30年+ | 17 | | 打孔纸带 | 千年 | 18 | | 刻在石头上 (罗辑把拐杖高举过头, 庄严地喊道) | 百万年 | 19 | 20 | 毫无疑问, 若是有足够的财力, 把信息刻在石头上并妥当存储, 除非遭到了二向箔攻击, 否则应该十分安全;但是对于个人而言, 从石头上读取信息带来的成本应该是远大于我们需要保存的数据的价值. 21 | 那么当考虑数据的易于读写性的时候, 明显硬盘是最为方便的;但是这带来了额外的要求, 那便是我们需要定期的检查数据是否出现了损坏 22 | 综上所述,一个简单明了的冷备方案便是 23 | 1. 使用不同生产批次的硬盘进行多备份 24 | 2. 定期检查文件是否出现损坏 25 | 3. 出现损坏时可用其余的备份文件来修复 26 | 27 | **bdchecke** 便是用来实现`定期检查文件是否出现损坏`的小工具 28 | 29 | ## 安装 30 | * 使用 pip 安装 31 | ``` 32 | pip install bdchecker 33 | ``` 34 | * 直接从 [Releases](https://github.com/MuggleWei/bdchecker/releases) 中获取, 解压并使用 35 | 36 | ## 使用 37 | **bdchecker** 包含三个命令, 分别为 38 | * gen: 扫描目录, 并递归遍历生成该目录下所有**新增**文件的 hash 信息, 放置在目录中的 `.bdchecker.meta` 文件夹中 39 | * clean: 扫描目录, 从 hash 信息中清理掉已删除的文件 40 | * check: 扫描目录, 查找出现损坏的文件 (注意, 此操作会计算所有文件的 hash 值, 较为耗费时间) 41 | 42 | ### 示例目录 43 | 假设当前有如下目录结构 44 | ``` 45 | data 46 | ├──── a.txt 47 | ├──── b.txt 48 | └──── c 49 | ├──── c1.txt 50 | └──── c2.txt 51 | ``` 52 | 53 | ### gen 示例 54 | 生成信息 55 | ``` 56 | bdchecker gen -d data -v 1 57 | ``` 58 | * `-d`: 表示要生成信息的目录 59 | * `-v`: 表示日志输出级别, 越高输出越详细 60 | 61 | 完成后, 可以看到屏幕上日志输出: `dump meta info to data/.bdchecker.meta/sha256.csv` 62 | 当目录中没有新增文件时, 重复执行 `gen` 命令并不会真正的去生成文件的 hash 信息 63 | 64 | ### clean 示例 65 | 删除 `data/c/c2.txt`, 并运行 66 | ``` 67 | bdchecker clean -d data -v 1 68 | ``` 69 | 70 | 可以在日志倒数几行看到: `clean missing file's meta info: c/c2.txt`, 表示当前我们已经成功清理了文件对应的 hash 信息 71 | 72 | ### check 示例 73 | 运行 74 | ``` 75 | bdchecker check -d data -v 1 76 | ``` 77 | 日志的最后一行出现: `all check pass`, 代表没有新增/删除的文件, 且所有的文件都没有损坏 78 | 79 | 现在让我们稍微更改一下文件 `a.txt`,随便更改一下其中的内容,再次运行 80 | ``` 81 | bdchecker check -d data -v 1 82 | ``` 83 | 此时,日志出现错误信息: `check failed: a.txt, old hash: ..., cur hash: ...`,表示 `a.txt` 的内容出现了改变 84 | 85 | ## 迁移与对比 86 | 由 `bdchecker` 生成的 hash 信息会保存在目录中的 `.bdchecker.meta` 目录中, 所以迁移时直接整个文件夹迁移即可 87 | 当已经有多份冷备数据存在, 且并没有生成过 hash 值时; 此时可以对每份冷备数据都使用 `bdchecker gen` 命令来生成 hash 值, 接着对比两份文件即可. 由于生成文件行是已经排序的, 所以可以直接使用 `diff` 之类的命令进行对比 88 | -------------------------------------------------------------------------------- /bdchecker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuggleWei/bdchecker/6a46335783c76d309af183abd73bbd2526009382/bdchecker/__init__.py -------------------------------------------------------------------------------- /bdchecker/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.3" 2 | -------------------------------------------------------------------------------- /bdchecker/checker.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import hashlib 3 | import logging 4 | import os 5 | import shutil 6 | 7 | from bdchecker.utils.log_handle import LogHandle 8 | 9 | 10 | class Checker: 11 | """ 12 | backup data checker 13 | """ 14 | 15 | def __init__(self, verbose=0, hash_algo="sha256"): 16 | """ 17 | init checker 18 | """ 19 | self._hash_fn = hashlib.sha256 20 | self._hash_algo = "" 21 | self._meta_dirname = ".bdchecker.meta" 22 | 23 | if verbose == 0: 24 | LogHandle.init_log("", logging.INFO, -1) 25 | elif verbose == 1: 26 | LogHandle.init_log("", logging.DEBUG, -1) 27 | else: 28 | LogHandle.init_log("", logging.DEBUG, -1) 29 | 30 | self._hash_algo = hash_algo 31 | if hash_algo == "sha256": 32 | self._hash_fn = hashlib.sha256 33 | elif hash_algo == "sha512": 34 | self._hash_fn = hashlib.sha512 35 | elif hash_algo == "md5": 36 | self._hash_fn = hashlib.md5 37 | else: 38 | self._hash_algo = "sha256" 39 | self._hash_fn = hashlib.sha256 40 | 41 | def gen(self, dst_dir): 42 | """ 43 | generate meta infos 44 | :param dst_dir: target directory 45 | """ 46 | if not os.path.exists(dst_dir): 47 | raise Exception("target directory not exists: {}".format(dst_dir)) 48 | 49 | # load meta info that already exists 50 | meta_dir = os.path.join(dst_dir, self._meta_dirname) 51 | if os.path.exists(meta_dir): 52 | if not os.path.isdir(meta_dir): 53 | raise Exception( 54 | "{} already exists and not a directory".format(meta_dir)) 55 | else: 56 | os.makedirs(meta_dir, exist_ok=True) 57 | 58 | # load old meta infos 59 | meta_filepath = os.path.join(meta_dir, "{}.csv".format(self._hash_algo)) 60 | 61 | if os.path.exists(meta_filepath): 62 | old_meta_dict = self._load_meta(meta_filepath) 63 | else: 64 | old_meta_dict = {} 65 | 66 | # scan files and generate new meta info 67 | meta_dict = self._scan(dst_dir=dst_dir) 68 | 69 | for k, v in old_meta_dict.items(): 70 | if k not in meta_dict: 71 | logging.warning("missing file: {}".format(k)) 72 | meta_dict[k] = v 73 | 74 | n_new = 0 75 | for k in meta_dict.keys(): 76 | if k not in old_meta_dict: 77 | filepath = os.path.join(dst_dir, k) 78 | hash_val_hex = self._gen_hash(filepath) 79 | logging.debug("new file: {}, hash value: {}".format( 80 | k, hash_val_hex)) 81 | meta_dict[k] = hash_val_hex 82 | n_new += 1 83 | 84 | # write into file 85 | if n_new > 0: 86 | self._dump_meta(meta_filepath, meta_dict) 87 | else: 88 | logging.info("there are no new file in {}".format(dst_dir)) 89 | 90 | def clean(self, dst_dir): 91 | """ 92 | clean 93 | """ 94 | if not os.path.exists(dst_dir): 95 | raise Exception("target directory not exists: {}".format(dst_dir)) 96 | 97 | # load meta info that already exists 98 | meta_dir = os.path.join(dst_dir, self._meta_dirname) 99 | if os.path.exists(meta_dir): 100 | if not os.path.isdir(meta_dir): 101 | raise Exception( 102 | "{} already exists and not a directory".format(meta_dir)) 103 | else: 104 | os.makedirs(meta_dir, exist_ok=True) 105 | 106 | # load old meta infos 107 | meta_filepath = os.path.join(meta_dir, "{}.csv".format(self._hash_algo)) 108 | 109 | if os.path.exists(meta_filepath): 110 | old_meta_dict = self._load_meta(meta_filepath) 111 | else: 112 | old_meta_dict = {} 113 | 114 | # scan files and remove meta info of missing file 115 | meta_dict = self._scan(dst_dir=dst_dir) 116 | 117 | missing_files = [] 118 | for k in old_meta_dict.keys(): 119 | if k not in meta_dict: 120 | logging.info("clean missing file's meta info: {}".format(k)) 121 | missing_files.append(k) 122 | 123 | for filepath in missing_files: 124 | del old_meta_dict[filepath] 125 | 126 | if len(missing_files) > 0: 127 | self._dump_meta(meta_filepath, old_meta_dict) 128 | else: 129 | logging.info("there are no missing file in {}".format(dst_dir)) 130 | 131 | def check(self, dst_dir): 132 | """ 133 | check meta infos 134 | :param dst_dir: target directory 135 | """ 136 | if not os.path.exists(dst_dir): 137 | raise Exception("target directory not exists: {}".format(dst_dir)) 138 | 139 | # load meta info that already exists 140 | meta_dir = os.path.join(dst_dir, self._meta_dirname) 141 | if os.path.exists(meta_dir): 142 | if not os.path.isdir(meta_dir): 143 | raise Exception( 144 | "{} already exists and not a directory".format(meta_dir)) 145 | else: 146 | os.makedirs(meta_dir, exist_ok=True) 147 | 148 | # load old meta infos 149 | meta_filepath = os.path.join(meta_dir, "{}.csv".format(self._hash_algo)) 150 | 151 | if not os.path.exists(meta_filepath): 152 | logging.error("can't check cause meta file not found: {}".format( 153 | meta_filepath)) 154 | old_meta_dict = self._load_meta(meta_filepath) 155 | 156 | # scan files and generate new meta info 157 | meta_dict = self._scan(dst_dir=dst_dir) 158 | for k, v in meta_dict.items(): 159 | filepath = os.path.join(dst_dir, k) 160 | hash_val_hex = self._gen_hash(filepath) 161 | meta_dict[k] = hash_val_hex 162 | logging.debug( 163 | "calculate hash value: {}, {}".format(k, hash_val_hex)) 164 | 165 | is_all_pass = True 166 | for k, v in old_meta_dict.items(): 167 | if k not in meta_dict: 168 | logging.warning("missing file: {}".format(k)) 169 | is_all_pass = False 170 | 171 | for k, v in meta_dict.items(): 172 | if k not in old_meta_dict: 173 | logging.warning("new file: {}".format(k)) 174 | is_all_pass = False 175 | 176 | old_v = old_meta_dict[k] 177 | if v != old_v: 178 | logging.error( 179 | "check failed: {}, old hash: {}, cur hash: {}".format( 180 | k, old_v, v)) 181 | is_all_pass = False 182 | 183 | if is_all_pass is True: 184 | logging.info("all check pass") 185 | 186 | def _dump_meta(self, meta_filepath, meta_dict): 187 | """ 188 | dump meta infos 189 | """ 190 | backup_filepath = "{}.backup".format(meta_filepath) 191 | tmp_meta_filepath = "{}.tmp".format(meta_filepath) 192 | 193 | # backup 194 | if os.path.exists(meta_filepath): 195 | shutil.copyfile(meta_filepath, backup_filepath) 196 | 197 | # dump 198 | logging.info("dump meta info to {}".format(meta_filepath)) 199 | sorted_dict = dict(sorted(meta_dict.items())) 200 | with open(tmp_meta_filepath, "w", newline='', encoding='utf-8') as f: 201 | writer = csv.writer(f, delimiter=",", lineterminator='\n') 202 | writer.writerow(["k", "v"]) 203 | for k, v in sorted_dict.items(): 204 | writer.writerow([k, v]) 205 | shutil.move(tmp_meta_filepath, meta_filepath) 206 | 207 | # remove backup file 208 | if os.path.exists(backup_filepath): 209 | os.remove(backup_filepath) 210 | 211 | def _scan(self, dst_dir): 212 | """ 213 | scan dst dir and get file dict with empty hash value 214 | :param dst_dir: target directory 215 | """ 216 | logging.info("start scan: {}".format(dst_dir)) 217 | meta_dict = {} 218 | for root, _, files in os.walk(dst_dir): 219 | for filename in files: 220 | sub_dirname = os.path.basename(os.path.normpath(root)) 221 | if sub_dirname == self._meta_dirname: 222 | continue 223 | dir_relpath = os.path.relpath(root, dst_dir) 224 | rel_filepath = os.path.join(dir_relpath, filename) 225 | rel_filepath = os.path.normpath(rel_filepath) 226 | rel_filepath = rel_filepath.replace("\\", "/") 227 | meta_dict[rel_filepath] = "" 228 | return meta_dict 229 | 230 | def _gen_hash(self, filepath): 231 | """ 232 | generate hash value 233 | :param filepath: 234 | """ 235 | block_size = 1024 * 32 236 | hash_val = self._hash_fn() 237 | with open(filepath, "rb") as f: 238 | while True: 239 | block = f.read(block_size) 240 | if not block: 241 | break 242 | hash_val.update(block) 243 | return hash_val.hexdigest().upper() 244 | 245 | def _load_meta(self, filepath): 246 | """ 247 | load meta info 248 | :param filepath: meta filepath 249 | """ 250 | logging.info("start load meta file: {}".format(filepath)) 251 | meta_dict = {} 252 | with open(filepath, mode="r", encoding="utf-8") as f: 253 | reader = csv.DictReader(f) 254 | for row in reader: 255 | meta_dict[row["k"]] = row["v"] 256 | logging.debug("{}: {}".format(row["k"], row["v"])) 257 | return meta_dict 258 | -------------------------------------------------------------------------------- /bdchecker/command/check.py: -------------------------------------------------------------------------------- 1 | import getopt 2 | import sys 3 | from bdchecker.checker import Checker 4 | 5 | from bdchecker.utils.const_var import APP_NAME 6 | 7 | 8 | class CommandCheck: 9 | """ 10 | check meta infos 11 | """ 12 | 13 | def __init__(self): 14 | self._usage_str = "Usage: {} gen [OPTIONS]\n" \ 15 | "Options: \n" \ 16 | " -d, --dir [REQUIRED] target directory\n" \ 17 | " -v, --verbose [OPTIONAL] set verbose level; [0|1]\n" \ 18 | " , --hash [OPTIONAL] hash algo; [md5|sha256|sha512]\n" \ 19 | "".format(APP_NAME) 20 | 21 | self._dir = "" 22 | self._verbose = 1 23 | self._hash_algo = "sha256" 24 | 25 | def run(self, args): 26 | """ 27 | run command gen 28 | """ 29 | self._parse_args(args) 30 | 31 | if len(self._dir) == 0: 32 | print("ERROR! gen without 'dir' param\n") 33 | print(self._usage_str) 34 | sys.exit(1) 35 | 36 | checker = Checker(verbose=self._verbose, hash_algo=self._hash_algo) 37 | checker.check(self._dir) 38 | 39 | def _parse_args(self, args): 40 | """ 41 | parse input arguments 42 | """ 43 | opts, _ = getopt.getopt( 44 | args, "hd:v:", ["help", "dir=", "verbose=", "hash="] 45 | ) 46 | 47 | for opt, arg in opts: 48 | if opt in ("-h", "--help"): 49 | print(self._usage_str) 50 | sys.exit(0) 51 | elif opt in ("-d", "--dir"): 52 | self._dir = arg 53 | elif opt in ("-v", "--verbose"): 54 | self._verbose = int(arg) 55 | elif opt in ("--hash"): 56 | self._hash_algo = arg 57 | -------------------------------------------------------------------------------- /bdchecker/command/clean.py: -------------------------------------------------------------------------------- 1 | import getopt 2 | import sys 3 | from bdchecker.checker import Checker 4 | 5 | from bdchecker.utils.const_var import APP_NAME 6 | 7 | 8 | class CommandClean: 9 | """ 10 | clean meta info of missing file 11 | """ 12 | def __init__(self): 13 | self._usage_str = "Usage: {} gen [OPTIONS]\n" \ 14 | "Options: \n" \ 15 | " -d, --dir [REQUIRED] target directory\n" \ 16 | " -v, --verbose [OPTIONAL] set verbose level; [0|1]\n" \ 17 | " , --hash [OPTIONAL] hash algo; [md5|sha256|sha512]\n" \ 18 | "".format(APP_NAME) 19 | 20 | self._dir = "" 21 | self._verbose = 1 22 | self._hash_algo = "sha256" 23 | 24 | def run(self, args): 25 | """ 26 | run command gen 27 | """ 28 | self._parse_args(args) 29 | 30 | if len(self._dir) == 0: 31 | print("ERROR! gen without 'dir' param\n") 32 | print(self._usage_str) 33 | sys.exit(1) 34 | 35 | checker = Checker(verbose=self._verbose, hash_algo=self._hash_algo) 36 | checker.clean(self._dir) 37 | 38 | def _parse_args(self, args): 39 | """ 40 | parse input arguments 41 | """ 42 | opts, _ = getopt.getopt( 43 | args, "hd:v:", ["help", "dir=", "verbose=", "hash="] 44 | ) 45 | 46 | for opt, arg in opts: 47 | if opt in ("-h", "--help"): 48 | print(self._usage_str) 49 | sys.exit(0) 50 | elif opt in ("-d", "--dir"): 51 | self._dir = arg 52 | elif opt in ("-v", "--verbose"): 53 | self._verbose = int(arg) 54 | elif opt in ("--hash"): 55 | self._hash_algo = arg 56 | -------------------------------------------------------------------------------- /bdchecker/command/gen.py: -------------------------------------------------------------------------------- 1 | import getopt 2 | import sys 3 | from bdchecker.checker import Checker 4 | 5 | from bdchecker.utils.const_var import APP_NAME 6 | 7 | 8 | class CommandGen: 9 | """ 10 | generate meta file 11 | """ 12 | 13 | def __init__(self): 14 | self._usage_str = "Usage: {} gen [OPTIONS]\n" \ 15 | "Options: \n" \ 16 | " -d, --dir [REQUIRED] target directory\n" \ 17 | " -v, --verbose [OPTIONAL] set verbose level; [0|1]\n" \ 18 | " , --hash [OPTIONAL] hash algo; [md5|sha256|sha512]\n" \ 19 | "".format(APP_NAME) 20 | 21 | self._dir = "" 22 | self._verbose = 1 23 | self._hash_algo = "sha256" 24 | 25 | def run(self, args): 26 | """ 27 | run command gen 28 | """ 29 | self._parse_args(args) 30 | 31 | if len(self._dir) == 0: 32 | print("ERROR! gen without 'dir' param\n") 33 | print(self._usage_str) 34 | sys.exit(1) 35 | 36 | checker = Checker(verbose=self._verbose, hash_algo=self._hash_algo) 37 | checker.gen(self._dir) 38 | 39 | def _parse_args(self, args): 40 | """ 41 | parse input arguments 42 | """ 43 | opts, _ = getopt.getopt( 44 | args, "hd:v:", ["help", "dir=", "verbose=", "hash="] 45 | ) 46 | 47 | for opt, arg in opts: 48 | if opt in ("-h", "--help"): 49 | print(self._usage_str) 50 | sys.exit(0) 51 | elif opt in ("-d", "--dir"): 52 | self._dir = arg 53 | elif opt in ("-v", "--verbose"): 54 | self._verbose = int(arg) 55 | elif opt in ("--hash"): 56 | self._hash_algo = arg 57 | -------------------------------------------------------------------------------- /bdchecker/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from bdchecker.__version__ import __version__ 4 | from bdchecker.command.check import CommandCheck 5 | from bdchecker.command.clean import CommandClean 6 | from bdchecker.command.gen import CommandGen 7 | from bdchecker.utils.const_var import APP_NAME 8 | 9 | 10 | def run_gen(): 11 | """ 12 | run command: gen 13 | """ 14 | cmd = CommandGen() 15 | cmd.run(sys.argv[2:]) 16 | 17 | 18 | def run_clean(): 19 | """ 20 | run command: clean 21 | """ 22 | cmd = CommandClean() 23 | cmd.run(sys.argv[2:]) 24 | 25 | 26 | def run_check(): 27 | """ 28 | run command: check 29 | """ 30 | cmd = CommandCheck() 31 | cmd.run(sys.argv[2:]) 32 | 33 | 34 | def main(): 35 | usage_str = "Usage: {} COMMAND [OPTIONS]\n" \ 36 | "\n" \ 37 | "Commands:\n" \ 38 | " gen generate meta file for target path\n" \ 39 | " clean remove meta info of missing file\n" \ 40 | " check check meta for target path\n" \ 41 | "".format(sys.argv[0]) 42 | 43 | if len(sys.argv) < 2: 44 | print(usage_str) 45 | sys.exit(1) 46 | 47 | if sys.argv[1] in ("-h", "--help"): 48 | print(usage_str) 49 | sys.exit(0) 50 | 51 | if sys.argv[1] in ("-v", "--version"): 52 | print("{} {}".format(APP_NAME, __version__)) 53 | sys.exit(0) 54 | 55 | # commands 56 | command_dict = { 57 | "gen": run_gen, 58 | "clean": run_clean, 59 | "check": run_check, 60 | } 61 | 62 | command = sys.argv[1] 63 | func = command_dict.get(command, None) 64 | if func is None: 65 | print(usage_str) 66 | sys.exit(1) 67 | 68 | try: 69 | func() 70 | except Exception as e: 71 | print("{}".format(e)) 72 | sys.exit(1) 73 | 74 | 75 | if __name__ == "__main__": 76 | main() 77 | -------------------------------------------------------------------------------- /bdchecker/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuggleWei/bdchecker/6a46335783c76d309af183abd73bbd2526009382/bdchecker/utils/__init__.py -------------------------------------------------------------------------------- /bdchecker/utils/const_var.py: -------------------------------------------------------------------------------- 1 | APP_NAME = "bdchecker" 2 | -------------------------------------------------------------------------------- /bdchecker/utils/log_handle.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.handlers 3 | import os 4 | import sys 5 | 6 | 7 | class UnixConsoleColorFormatter(logging.Formatter): 8 | def __init__(self, formatter): 9 | grey = "\x1b[38;20m" 10 | yellow = "\x1b[33;20m" 11 | red = "\x1b[31;20m" 12 | bold_red = "\x1b[31;1m" 13 | reset = "\x1b[0m" 14 | 15 | self.FORMATS = { 16 | logging.DEBUG: grey + formatter + reset, 17 | logging.INFO: grey + formatter + reset, 18 | logging.WARNING: yellow + formatter + reset, 19 | logging.ERROR: red + formatter + reset, 20 | logging.CRITICAL: bold_red + formatter + reset 21 | } 22 | 23 | def format(self, record): 24 | log_fmt = self.FORMATS.get(record.levelno) 25 | formatter = logging.Formatter(log_fmt) 26 | return formatter.format(record) 27 | 28 | 29 | class LogHandle(object): 30 | """simple log init""" 31 | 32 | @staticmethod 33 | def init_log( 34 | filename, 35 | console_level=logging.WARNING, 36 | file_level=logging.DEBUG, 37 | use_rotate=False, 38 | mode="a"): 39 | """ 40 | initialize log 41 | :param filename: log output filepath 42 | :param console_level: console filter level 43 | :param file_level: file filter level 44 | :param use_rotate: is use rotate 45 | :param mode: open mode 46 | :return: 47 | """ 48 | # create output dir 49 | folder = os.path.dirname(filename) 50 | if len(folder) > 0 and (not os.path.exists(folder)): 51 | os.makedirs(folder, exist_ok=True) 52 | 53 | # log handler 54 | logger = logging.getLogger() 55 | logger.setLevel(logging.DEBUG) 56 | 57 | ch = LogHandle.get_console_handler(console_level) 58 | ch.setFormatter(LogHandle.get_console_formatter()) 59 | logger.addHandler(ch) 60 | 61 | if file_level != -1: 62 | if use_rotate is True: 63 | fh = LogHandle.get_rotating_handler( 64 | level=file_level, filename=filename, mode=mode) 65 | else: 66 | fh = LogHandle.get_file_handler( 67 | level=file_level, filename=filename, mode=mode) 68 | fh.setFormatter(LogHandle.get_formatter()) 69 | logger.addHandler(fh) 70 | 71 | @staticmethod 72 | def get_formatter(): 73 | """ 74 | log format 75 | """ 76 | return logging.Formatter( 77 | "%(asctime)s|%(levelname)s|%(filename)s:%(lineno)s - " 78 | "%(message)s") 79 | 80 | @staticmethod 81 | def get_console_formatter(): 82 | """ 83 | console formatter 84 | """ 85 | if sys.platform.startswith("win32"): 86 | return LogHandle.get_formatter() 87 | else: 88 | return UnixConsoleColorFormatter( 89 | "%(asctime)s|%(levelname)s|%(filename)s:%(lineno)s - " 90 | "%(message)s") 91 | 92 | @staticmethod 93 | def get_console_handler(level): 94 | """ 95 | get console log handler 96 | :param level: log filter level 97 | :return: log handler 98 | """ 99 | handler = logging.StreamHandler() 100 | handler.setLevel(level) 101 | return handler 102 | 103 | @staticmethod 104 | def get_file_handler(level, filename, mode="a"): 105 | """ 106 | get file log handler 107 | :param level: log filter level 108 | :param filename: output filepath 109 | :param mode: open mode 110 | :return: log handler 111 | """ 112 | handler = logging.FileHandler(filename=filename, mode=mode) 113 | handler.setLevel(level) 114 | return handler 115 | 116 | @staticmethod 117 | def get_rotating_handler( 118 | level, filename, mode="a", 119 | maxBytes=20 * 1024 * 1024, backupCount=10): 120 | """ 121 | get rotating log handler 122 | :param level: log filter level 123 | :param filename: output filepath 124 | :param mode: open mode 125 | :param maxBytes: max bytes 126 | :param backupCount: backup file count 127 | :return: log handler 128 | """ 129 | handler = logging.handlers.RotatingFileHandler( 130 | filename=filename, mode=mode, maxBytes=maxBytes, 131 | backupCount=backupCount) 132 | handler.setLevel(level) 133 | return handler 134 | 135 | @staticmethod 136 | def log_level(str_level: str): 137 | """ 138 | convert string to log level enum 139 | :param str_level: log level string 140 | :return: log level enum 141 | """ 142 | if str_level.lower() == "debug": 143 | return logging.DEBUG 144 | elif str_level.lower() == "info": 145 | return logging.INFO 146 | elif str_level.lower() == "warning": 147 | return logging.WARNING 148 | elif str_level.lower() == "error": 149 | return logging.ERROR 150 | elif str_level.lower() == "fatal": 151 | return logging.FATAL 152 | else: 153 | return logging.INFO 154 | -------------------------------------------------------------------------------- /bdchecker/utils/singleton.py: -------------------------------------------------------------------------------- 1 | def singleton(cls, *args, **kwargs): 2 | instances = {} 3 | 4 | def _singleton(*args, **kwargs): 5 | if cls not in instances: 6 | instances[cls] = cls(*args, **kwargs) 7 | return instances[cls] 8 | 9 | return _singleton 10 | -------------------------------------------------------------------------------- /etc/settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /pyinstaller_pkg.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | setlocal ENABLEDELAYEDEXPANSION 4 | 5 | set origin_dir=%~dp0 6 | cd %origin_dir% 7 | 8 | python -m venv venv 9 | call venv\Scripts\activate.bat 10 | pip install -r requirements-dev.txt 11 | 12 | pyinstaller -F bdchecker\main.py --distpath dist\bdchecker -n bdchecker 13 | xcopy README.md dist\bdchecker\ /Y 14 | xcopy README_cn.md dist\bdchecker\ /Y 15 | xcopy LICENSE dist\bdchecker\ /Y 16 | 17 | call venv\Scripts\deactivate.bat -------------------------------------------------------------------------------- /pyinstaller_pkg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | origin_dir="$(dirname "$(readlink -f "$0")")" 4 | cd $origin_dir 5 | 6 | if [ -d "venv" ]; then 7 | echo "venv already exists" 8 | else 9 | echo "create venv" 10 | python -m venv venv 11 | fi 12 | 13 | source venv/bin/activate 14 | 15 | if [ $? -eq 0 ]; then 16 | echo "success source activate" 17 | else 18 | echo "failed source activate" 19 | exit 1 20 | fi 21 | 22 | pip install -r requirements-dev.txt 23 | 24 | pyinstaller -F bdchecker/main.py --distpath dist/bdchecker -n bdchecker 25 | cp ./README.md dist/bdchecker/ 26 | cp ./README_cn.md dist/bdchecker/ 27 | cp ./LICENSE dist/bdchecker/ 28 | -------------------------------------------------------------------------------- /pypi_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | origin_dir="$(dirname "$(readlink -f "$0")")" 4 | cd $origin_dir 5 | 6 | if [ -d "venv" ]; then 7 | echo "venv already exists" 8 | else 9 | echo "create venv" 10 | python -m venv venv 11 | fi 12 | 13 | source venv/bin/activate 14 | 15 | if [ $? -eq 0 ]; then 16 | echo "success source activate" 17 | else 18 | echo "failed source activate" 19 | exit 1 20 | fi 21 | 22 | pip install -r requirements-dev.txt 23 | 24 | python -m pip install --upgrade build 25 | python -m build 26 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "bdchecker" 7 | license = {file = "LICENSE"} 8 | authors = [ 9 | {name="Muggle Wei", email="mugglewei@gmail.com"} 10 | ] 11 | description = "Backup Data Checker" 12 | readme = "README.md" 13 | requires-python = ">=3.8" 14 | keywords = ["data cold backup", "data checker"] 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ] 20 | 21 | dynamic = ["dependencies", "version"] 22 | 23 | [project.scripts] 24 | bdchecker = "bdchecker.main:main" 25 | 26 | [project.urls] 27 | "Homepage" = "https://github.com/MuggleWei/bdchecker" 28 | "Bug Tracker" = "https://github.com/MuggleWei/bdchecker/issues" 29 | 30 | [tool.setuptools] 31 | include-package-data = true 32 | packages = ["bdchecker", "bdchecker.command", "bdchecker.utils"] 33 | 34 | [tool.setuptools.dynamic] 35 | dependencies = {file = ["requirements.txt"]} 36 | version = {attr = "bdchecker.__version__.__version__"} 37 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pyright 2 | pynvim 3 | pyinstaller==6.3.0 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuggleWei/bdchecker/6a46335783c76d309af183abd73bbd2526009382/requirements.txt --------------------------------------------------------------------------------