├── .gitignore
├── LICENSE
├── README.md
├── README_cn.md
├── bdchecker
    ├── __init__.py
    ├── __version__.py
    ├── checker.py
    ├── command
    │   ├── check.py
    │   ├── clean.py
    │   └── gen.py
    ├── main.py
    └── utils
    │   ├── __init__.py
    │   ├── const_var.py
    │   ├── log_handle.py
    │   └── singleton.py
├── etc
    └── settings.xml
├── pyinstaller_pkg.bat
├── pyinstaller_pkg.sh
├── pypi_build.sh
├── pyproject.toml
├── requirements-dev.txt
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | ######### editor & tools #########
  2 | # vim
  3 | *~
  4 | /compile_commands.json
  5 | *.swp
  6 | doc/compile_commands.json
  7 | 
  8 | # vscode
  9 | .vscode/
 10 | 
 11 | # clangd
 12 | .clangd/
 13 | .cache/
 14 | 
 15 | # clang-format
 16 | .clang-format
 17 | 
 18 | # pycharm
 19 | .idea
 20 | 
 21 | ######### build & generated packages #########
 22 | _hpb/
 23 | build/
 24 | _packages/
 25 | _artifacts/
 26 | 
 27 | ######### python #########
 28 | # Byte-compiled / optimized / DLL files
 29 | __pycache__/
 30 | *.py[cod]
 31 | *$py.class
 32 | 
 33 | # C extensions
 34 | *.so
 35 | 
 36 | # Distribution / packaging
 37 | .Python
 38 | build/
 39 | develop-eggs/
 40 | dist/
 41 | downloads/
 42 | eggs/
 43 | .eggs/
 44 | lib/
 45 | lib64/
 46 | parts/
 47 | sdist/
 48 | var/
 49 | wheels/
 50 | pip-wheel-metadata/
 51 | share/python-wheels/
 52 | *.egg-info/
 53 | .installed.cfg
 54 | *.egg
 55 | MANIFEST
 56 | 
 57 | # PyInstaller
 58 | #  Usually these files are written by a python script from a template
 59 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 60 | *.manifest
 61 | *.spec
 62 | 
 63 | # Installer logs
 64 | pip-log.txt
 65 | pip-delete-this-directory.txt
 66 | 
 67 | # Unit test / coverage reports
 68 | htmlcov/
 69 | .tox/
 70 | .nox/
 71 | .coverage
 72 | .coverage.*
 73 | .cache
 74 | nosetests.xml
 75 | coverage.xml
 76 | *.cover
 77 | .hypothesis/
 78 | .pytest_cache/
 79 | 
 80 | # Translations
 81 | *.mo
 82 | *.pot
 83 | 
 84 | # Django stuff:
 85 | *.log
 86 | local_settings.py
 87 | db.sqlite3
 88 | 
 89 | # Flask stuff:
 90 | instance/
 91 | .webassets-cache
 92 | 
 93 | # Scrapy stuff:
 94 | .scrapy
 95 | 
 96 | # Sphinx documentation
 97 | docs/_build/
 98 | 
 99 | # PyBuilder
100 | target/
101 | 
102 | # Jupyter Notebook
103 | .ipynb_checkpoints
104 | 
105 | # IPython
106 | profile_default/
107 | ipython_config.py
108 | 
109 | # pyenv
110 | .python-version
111 | 
112 | # pipenv
113 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
114 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
115 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
116 | #   install all needed dependencies.
117 | #Pipfile.lock
118 | 
119 | # celery beat schedule file
120 | celerybeat-schedule
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | Copyright (c) 2024 Muggle Wei
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | * [readme 中文](./README_cn.md)
 2 | * [readme EN](./README.md)
 3 | 
 4 | ## Overview
 5 | bdchecker (**B**ackup **D**ata Checker) is a tool for checking personal cold backup data, helping you discover data corruption in time
 6 | 
 7 | ## Why use it
 8 | Imagine that we have some data that needs to be cold backup, which may be the raw market data of a certain financial market that is compressed every day; or some electronic versions of classic movies owned by individuals; or some keys that are not used all year round; Let’s first list some options:
 9 | 
10 | | Storage plan | Storage life span |
11 | | ---- | ---- |
12 | | SSD | several years to more than ten years |
13 | | HDD | 10+ years |
14 | | tape drive | 30+ years |
15 | | punched paper | thousand of years |
16 | | Carved in stone (Luo Ji raised his crutch above his head and shouted solemnly) | millions of years |
17 | 
18 | There is no doubt that if you have enough financial resources to engrave the information on stone and store it properly, it should be very safe unless you are attacked by a dual-vector foil attack; but for individuals, the cost of reading information from stones should be far greater than the value of the data we need to save.  
19 | So when considering the ease of reading and writing of data, it is obvious that the hard disk is the most convenient; but this brings additional requirements, that is, we need to regularly check whether the data is corupted.  
20 | To sum up, a simple and clear cold backup solution is  
21 | 1. Use hard drives from different production batches for multiple backups
22 | 2. Regularly check whether files are corupted
23 | 3. If corupted occurs, the remaining backup files can be used to repair it.
24 | 
25 | **bdchecke** is a small tool used to `regularly check whether files are corupted`
26 | 
27 | ## Install
28 | * use pip
29 | ```
30 | pip install bdchecker
31 | ```
32 | * download from project's [Releases](https://github.com/MuggleWei/bdchecker/releases), and decompress
33 | 
34 | ## Usage
35 | **bdchecker** include 3 sub-command
36 | * gen: scan directory, recursively traverse to generate the hash information of all **new** files in the directory, and place them in the `.bdchecker.meta` folder.
37 | * clean: scan directory, clean deleted files from hash information
38 | * check: scan directory, Find corrupted files (note that this operation will calculate the hash value of all files, which is more time-consuming)
39 | 
40 | ### Example directory
41 | Assume that we currently have the following directory structure
42 | ```
43 | data
44 | ├──── a.txt
45 | ├──── b.txt
46 | └──── c
47 |       ├──── c1.txt
48 |       └──── c2.txt
49 | ```
50 | 
51 | ### Command: gen
52 | Generate hash infos
53 | ```
54 | bdchecker gen -d data -v 1
55 | ```
56 | * `-d`: directory for which information needs to be generated
57 | * `-v`: verbose level
58 | 
59 | After missiong completed, you can see console output: `dump meta info to data/.bdchecker.meta/sha256.csv`  
60 | When there are no new files in the directory, repeatedly executing the `gen` command will not actually generate the hash information of the file.  
61 | 
62 | ### Command: clean
63 | remove `data/c/c2.txt`, then run
64 | ```
65 | bdchecker clean -d data -v 1
66 | ```
67 | You can see in the last few lines of the log: `clean missing file's meta info: c/c2.txt`, which means that we have successfully cleaned the hash information corresponding to the file.  
68 | 
69 | ### Command: check
70 | run
71 | ```
72 | bdchecker check -d data -v 1
73 | ```
74 | The last line of the log appears: `all check pass`, which means there are no new/deleted files and all files are not corrupted.  
75 | 
76 | Now, let's modify `a.text`, write something randomly, and then run again
77 | ```
78 | bdchecker check -d data -v 1
79 | ```
80 | At this time, an error message appears in the log: `check failed: a.txt, old hash: ..., cur hash: ...`, indicating that the content of `a.txt` has changed.  
81 | 
82 | ### Migration and comparison
83 | The hash information generated by `bdchecker` will be saved in the `.bdchecker.meta` in the directory, so you can directly migrate the entire folder during migration.  
84 | When there are already multiple backup data and no hash value has been generated; at this time, you can use the `bdchecker gen` command to generate a hash value for each backup data, and then compare the two files. Since the generated file lines are already sorted, so you can directly use commands such as `diff` for comparison.
85 | 


--------------------------------------------------------------------------------
/README_cn.md:
--------------------------------------------------------------------------------
 1 | ## bdchecker
 2 | 
 3 | * [readme 中文](./README_cn.md)
 4 | * [readme EN](./README.md)
 5 | 
 6 | ## 概述
 7 | bdchecker (**B**ackup **D**ata Checker) 是用于个人冷备数据检查的工具, 帮助你及时发现数据的损坏  
 8 | 
 9 | ## 为什么需要它
10 | 想象一下, 我们有一些数据需要进行冷备, 可能是每天压缩后的某个金融市场的原始行情数据; 或是一些个人拥有的经典电影的电子版本; 抑或是一些常年用不到的密钥；我们先列出一些方案:  
11 | 
12 | | 方案 | 单个存储期限级别 |
13 | | ---- | ---- |
14 | | 固态盘 | 几年至十几年 |
15 | | 机械盘 | 10年+ |
16 | | 磁带 | 30年+ |
17 | | 打孔纸带 | 千年 |
18 | | 刻在石头上 (罗辑把拐杖高举过头, 庄严地喊道) | 百万年 |
19 | 
20 | 毫无疑问, 若是有足够的财力, 把信息刻在石头上并妥当存储, 除非遭到了二向箔攻击, 否则应该十分安全；但是对于个人而言, 从石头上读取信息带来的成本应该是远大于我们需要保存的数据的价值.   
21 | 那么当考虑数据的易于读写性的时候, 明显硬盘是最为方便的；但是这带来了额外的要求, 那便是我们需要定期的检查数据是否出现了损坏  
22 | 综上所述，一个简单明了的冷备方案便是
23 | 1. 使用不同生产批次的硬盘进行多备份
24 | 2. 定期检查文件是否出现损坏
25 | 3. 出现损坏时可用其余的备份文件来修复
26 | 
27 | **bdchecke** 便是用来实现`定期检查文件是否出现损坏`的小工具
28 | 
29 | ## 安装
30 | * 使用 pip 安装
31 | ```
32 | pip install bdchecker
33 | ```
34 | * 直接从 [Releases](https://github.com/MuggleWei/bdchecker/releases) 中获取, 解压并使用
35 | 
36 | ## 使用
37 | **bdchecker** 包含三个命令, 分别为
38 | * gen: 扫描目录, 并递归遍历生成该目录下所有**新增**文件的 hash 信息, 放置在目录中的 `.bdchecker.meta` 文件夹中
39 | * clean: 扫描目录, 从 hash 信息中清理掉已删除的文件
40 | * check: 扫描目录, 查找出现损坏的文件 (注意, 此操作会计算所有文件的 hash 值, 较为耗费时间)
41 | 
42 | ### 示例目录
43 | 假设当前有如下目录结构  
44 | ```
45 | data
46 | ├──── a.txt
47 | ├──── b.txt
48 | └──── c
49 |       ├──── c1.txt
50 |       └──── c2.txt
51 | ```
52 | 
53 | ### gen 示例
54 | 生成信息
55 | ```
56 | bdchecker gen -d data -v 1
57 | ```
58 | * `-d`: 表示要生成信息的目录
59 | * `-v`: 表示日志输出级别, 越高输出越详细
60 | 
61 | 完成后, 可以看到屏幕上日志输出: `dump meta info to data/.bdchecker.meta/sha256.csv`  
62 | 当目录中没有新增文件时, 重复执行 `gen` 命令并不会真正的去生成文件的 hash 信息  
63 | 
64 | ### clean 示例
65 | 删除 `data/c/c2.txt`, 并运行
66 | ```
67 | bdchecker clean -d data -v 1
68 | ```
69 | 
70 | 可以在日志倒数几行看到: `clean missing file's meta info: c/c2.txt`, 表示当前我们已经成功清理了文件对应的 hash 信息
71 | 
72 | ### check 示例
73 | 运行
74 | ```
75 | bdchecker check -d data -v 1
76 | ```
77 | 日志的最后一行出现: `all check pass`, 代表没有新增/删除的文件, 且所有的文件都没有损坏  
78 | 
79 | 现在让我们稍微更改一下文件 `a.txt`，随便更改一下其中的内容，再次运行
80 | ```
81 | bdchecker check -d data -v 1
82 | ```
83 | 此时，日志出现错误信息: `check failed: a.txt, old hash: ..., cur hash: ...`，表示 `a.txt` 的内容出现了改变
84 | 
85 | ## 迁移与对比
86 | 由 `bdchecker` 生成的 hash 信息会保存在目录中的 `.bdchecker.meta` 目录中, 所以迁移时直接整个文件夹迁移即可  
87 | 当已经有多份冷备数据存在, 且并没有生成过 hash 值时; 此时可以对每份冷备数据都使用 `bdchecker gen` 命令来生成 hash 值, 接着对比两份文件即可. 由于生成文件行是已经排序的, 所以可以直接使用 `diff` 之类的命令进行对比
88 | 


--------------------------------------------------------------------------------
/bdchecker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MuggleWei/bdchecker/6a46335783c76d309af183abd73bbd2526009382/bdchecker/__init__.py


--------------------------------------------------------------------------------
/bdchecker/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.3"
2 | 


--------------------------------------------------------------------------------
/bdchecker/checker.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import hashlib
  3 | import logging
  4 | import os
  5 | import shutil
  6 | 
  7 | from bdchecker.utils.log_handle import LogHandle
  8 | 
  9 | 
 10 | class Checker:
 11 |     """
 12 |     backup data checker
 13 |     """
 14 | 
 15 |     def __init__(self, verbose=0, hash_algo="sha256"):
 16 |         """
 17 |         init checker
 18 |         """
 19 |         self._hash_fn = hashlib.sha256
 20 |         self._hash_algo = ""
 21 |         self._meta_dirname = ".bdchecker.meta"
 22 | 
 23 |         if verbose == 0:
 24 |             LogHandle.init_log("", logging.INFO, -1)
 25 |         elif verbose == 1:
 26 |             LogHandle.init_log("", logging.DEBUG, -1)
 27 |         else:
 28 |             LogHandle.init_log("", logging.DEBUG, -1)
 29 | 
 30 |         self._hash_algo = hash_algo
 31 |         if hash_algo == "sha256":
 32 |             self._hash_fn = hashlib.sha256
 33 |         elif hash_algo == "sha512":
 34 |             self._hash_fn = hashlib.sha512
 35 |         elif hash_algo == "md5":
 36 |             self._hash_fn = hashlib.md5
 37 |         else:
 38 |             self._hash_algo = "sha256"
 39 |             self._hash_fn = hashlib.sha256
 40 | 
 41 |     def gen(self, dst_dir):
 42 |         """
 43 |         generate meta infos
 44 |         :param dst_dir: target directory
 45 |         """
 46 |         if not os.path.exists(dst_dir):
 47 |             raise Exception("target directory not exists: {}".format(dst_dir))
 48 | 
 49 |         # load meta info that already exists
 50 |         meta_dir = os.path.join(dst_dir, self._meta_dirname)
 51 |         if os.path.exists(meta_dir):
 52 |             if not os.path.isdir(meta_dir):
 53 |                 raise Exception(
 54 |                     "{} already exists and not a directory".format(meta_dir))
 55 |         else:
 56 |             os.makedirs(meta_dir, exist_ok=True)
 57 | 
 58 |         # load old meta infos
 59 |         meta_filepath = os.path.join(meta_dir, "{}.csv".format(self._hash_algo))
 60 | 
 61 |         if os.path.exists(meta_filepath):
 62 |             old_meta_dict = self._load_meta(meta_filepath)
 63 |         else:
 64 |             old_meta_dict = {}
 65 | 
 66 |         # scan files and generate new meta info
 67 |         meta_dict = self._scan(dst_dir=dst_dir)
 68 | 
 69 |         for k, v in old_meta_dict.items():
 70 |             if k not in meta_dict:
 71 |                 logging.warning("missing file: {}".format(k))
 72 |             meta_dict[k] = v
 73 | 
 74 |         n_new = 0
 75 |         for k in meta_dict.keys():
 76 |             if k not in old_meta_dict:
 77 |                 filepath = os.path.join(dst_dir, k)
 78 |                 hash_val_hex = self._gen_hash(filepath)
 79 |                 logging.debug("new file: {}, hash value: {}".format(
 80 |                     k, hash_val_hex))
 81 |                 meta_dict[k] = hash_val_hex
 82 |                 n_new += 1
 83 | 
 84 |         # write into file
 85 |         if n_new > 0:
 86 |             self._dump_meta(meta_filepath, meta_dict)
 87 |         else:
 88 |             logging.info("there are no new file in {}".format(dst_dir))
 89 | 
 90 |     def clean(self, dst_dir):
 91 |         """
 92 |         clean
 93 |         """
 94 |         if not os.path.exists(dst_dir):
 95 |             raise Exception("target directory not exists: {}".format(dst_dir))
 96 | 
 97 |         # load meta info that already exists
 98 |         meta_dir = os.path.join(dst_dir, self._meta_dirname)
 99 |         if os.path.exists(meta_dir):
100 |             if not os.path.isdir(meta_dir):
101 |                 raise Exception(
102 |                     "{} already exists and not a directory".format(meta_dir))
103 |         else:
104 |             os.makedirs(meta_dir, exist_ok=True)
105 | 
106 |         # load old meta infos
107 |         meta_filepath = os.path.join(meta_dir, "{}.csv".format(self._hash_algo))
108 | 
109 |         if os.path.exists(meta_filepath):
110 |             old_meta_dict = self._load_meta(meta_filepath)
111 |         else:
112 |             old_meta_dict = {}
113 | 
114 |         # scan files and remove meta info of missing file
115 |         meta_dict = self._scan(dst_dir=dst_dir)
116 | 
117 |         missing_files = []
118 |         for k in old_meta_dict.keys():
119 |             if k not in meta_dict:
120 |                 logging.info("clean missing file's meta info: {}".format(k))
121 |                 missing_files.append(k)
122 | 
123 |         for filepath in missing_files:
124 |             del old_meta_dict[filepath]
125 | 
126 |         if len(missing_files) > 0:
127 |             self._dump_meta(meta_filepath, old_meta_dict)
128 |         else:
129 |             logging.info("there are no missing file in {}".format(dst_dir))
130 | 
131 |     def check(self, dst_dir):
132 |         """
133 |         check meta infos
134 |         :param dst_dir: target directory
135 |         """
136 |         if not os.path.exists(dst_dir):
137 |             raise Exception("target directory not exists: {}".format(dst_dir))
138 | 
139 |         # load meta info that already exists
140 |         meta_dir = os.path.join(dst_dir, self._meta_dirname)
141 |         if os.path.exists(meta_dir):
142 |             if not os.path.isdir(meta_dir):
143 |                 raise Exception(
144 |                     "{} already exists and not a directory".format(meta_dir))
145 |         else:
146 |             os.makedirs(meta_dir, exist_ok=True)
147 | 
148 |         # load old meta infos
149 |         meta_filepath = os.path.join(meta_dir, "{}.csv".format(self._hash_algo))
150 | 
151 |         if not os.path.exists(meta_filepath):
152 |             logging.error("can't check cause meta file not found: {}".format(
153 |                 meta_filepath))
154 |         old_meta_dict = self._load_meta(meta_filepath)
155 | 
156 |         # scan files and generate new meta info
157 |         meta_dict = self._scan(dst_dir=dst_dir)
158 |         for k, v in meta_dict.items():
159 |             filepath = os.path.join(dst_dir, k)
160 |             hash_val_hex = self._gen_hash(filepath)
161 |             meta_dict[k] = hash_val_hex
162 |             logging.debug(
163 |                 "calculate hash value: {}, {}".format(k, hash_val_hex))
164 | 
165 |         is_all_pass = True
166 |         for k, v in old_meta_dict.items():
167 |             if k not in meta_dict:
168 |                 logging.warning("missing file: {}".format(k))
169 |                 is_all_pass = False
170 | 
171 |         for k, v in meta_dict.items():
172 |             if k not in old_meta_dict:
173 |                 logging.warning("new file: {}".format(k))
174 |                 is_all_pass = False
175 | 
176 |             old_v = old_meta_dict[k]
177 |             if v != old_v:
178 |                 logging.error(
179 |                     "check failed: {}, old hash: {}, cur hash: {}".format(
180 |                         k, old_v, v))
181 |                 is_all_pass = False
182 | 
183 |         if is_all_pass is True:
184 |             logging.info("all check pass")
185 | 
186 |     def _dump_meta(self, meta_filepath, meta_dict):
187 |         """
188 |         dump meta infos
189 |         """
190 |         backup_filepath = "{}.backup".format(meta_filepath)
191 |         tmp_meta_filepath = "{}.tmp".format(meta_filepath)
192 | 
193 |         # backup
194 |         if os.path.exists(meta_filepath):
195 |             shutil.copyfile(meta_filepath, backup_filepath)
196 | 
197 |         # dump
198 |         logging.info("dump meta info to {}".format(meta_filepath))
199 |         sorted_dict = dict(sorted(meta_dict.items()))
200 |         with open(tmp_meta_filepath, "w", newline='', encoding='utf-8') as f:
201 |             writer = csv.writer(f, delimiter=",", lineterminator='\n')
202 |             writer.writerow(["k", "v"])
203 |             for k, v in sorted_dict.items():
204 |                 writer.writerow([k, v])
205 |         shutil.move(tmp_meta_filepath, meta_filepath)
206 | 
207 |         # remove backup file
208 |         if os.path.exists(backup_filepath):
209 |             os.remove(backup_filepath)
210 | 
211 |     def _scan(self, dst_dir):
212 |         """
213 |         scan dst dir and get file dict with empty hash value
214 |         :param dst_dir: target directory
215 |         """
216 |         logging.info("start scan: {}".format(dst_dir))
217 |         meta_dict = {}
218 |         for root, _, files in os.walk(dst_dir):
219 |             for filename in files:
220 |                 sub_dirname = os.path.basename(os.path.normpath(root))
221 |                 if sub_dirname == self._meta_dirname:
222 |                     continue
223 |                 dir_relpath = os.path.relpath(root, dst_dir)
224 |                 rel_filepath = os.path.join(dir_relpath, filename)
225 |                 rel_filepath = os.path.normpath(rel_filepath)
226 |                 rel_filepath = rel_filepath.replace("\\", "/")
227 |                 meta_dict[rel_filepath] = ""
228 |         return meta_dict
229 | 
230 |     def _gen_hash(self, filepath):
231 |         """
232 |         generate hash value
233 |         :param filepath:
234 |         """
235 |         block_size = 1024 * 32
236 |         hash_val = self._hash_fn()
237 |         with open(filepath, "rb") as f:
238 |             while True:
239 |                 block = f.read(block_size)
240 |                 if not block:
241 |                     break
242 |                 hash_val.update(block)
243 |         return hash_val.hexdigest().upper()
244 | 
245 |     def _load_meta(self, filepath):
246 |         """
247 |         load meta info
248 |         :param filepath: meta filepath
249 |         """
250 |         logging.info("start load meta file: {}".format(filepath))
251 |         meta_dict = {}
252 |         with open(filepath, mode="r", encoding="utf-8") as f:
253 |             reader = csv.DictReader(f)
254 |             for row in reader:
255 |                 meta_dict[row["k"]] = row["v"]
256 |                 logging.debug("{}: {}".format(row["k"], row["v"]))
257 |         return meta_dict
258 | 


--------------------------------------------------------------------------------
/bdchecker/command/check.py:
--------------------------------------------------------------------------------
 1 | import getopt
 2 | import sys
 3 | from bdchecker.checker import Checker
 4 | 
 5 | from bdchecker.utils.const_var import APP_NAME
 6 | 
 7 | 
 8 | class CommandCheck:
 9 |     """
10 |     check meta infos
11 |     """
12 | 
13 |     def __init__(self):
14 |         self._usage_str = "Usage: {} gen [OPTIONS]\n" \
15 |             "Options: \n" \
16 |             "  -d, --dir        [REQUIRED] target directory\n" \
17 |             "  -v, --verbose    [OPTIONAL] set verbose level; [0|1]\n" \
18 |             "    , --hash       [OPTIONAL] hash algo; [md5|sha256|sha512]\n" \
19 |             "".format(APP_NAME)
20 | 
21 |         self._dir = ""
22 |         self._verbose = 1
23 |         self._hash_algo = "sha256"
24 | 
25 |     def run(self, args):
26 |         """
27 |         run command gen
28 |         """
29 |         self._parse_args(args)
30 | 
31 |         if len(self._dir) == 0:
32 |             print("ERROR! gen without 'dir' param\n")
33 |             print(self._usage_str)
34 |             sys.exit(1)
35 | 
36 |         checker = Checker(verbose=self._verbose, hash_algo=self._hash_algo)
37 |         checker.check(self._dir)
38 | 
39 |     def _parse_args(self, args):
40 |         """
41 |         parse input arguments
42 |         """
43 |         opts, _ = getopt.getopt(
44 |             args, "hd:v:", ["help", "dir=", "verbose=", "hash="]
45 |         )
46 | 
47 |         for opt, arg in opts:
48 |             if opt in ("-h", "--help"):
49 |                 print(self._usage_str)
50 |                 sys.exit(0)
51 |             elif opt in ("-d", "--dir"):
52 |                 self._dir = arg
53 |             elif opt in ("-v", "--verbose"):
54 |                 self._verbose = int(arg)
55 |             elif opt in ("--hash"):
56 |                 self._hash_algo = arg
57 | 


--------------------------------------------------------------------------------
/bdchecker/command/clean.py:
--------------------------------------------------------------------------------
 1 | import getopt
 2 | import sys
 3 | from bdchecker.checker import Checker
 4 | 
 5 | from bdchecker.utils.const_var import APP_NAME
 6 | 
 7 | 
 8 | class CommandClean:
 9 |     """
10 |     clean meta info of missing file
11 |     """
12 |     def __init__(self):
13 |         self._usage_str = "Usage: {} gen [OPTIONS]\n" \
14 |             "Options: \n" \
15 |             "  -d, --dir        [REQUIRED] target directory\n" \
16 |             "  -v, --verbose    [OPTIONAL] set verbose level; [0|1]\n" \
17 |             "    , --hash       [OPTIONAL] hash algo; [md5|sha256|sha512]\n" \
18 |             "".format(APP_NAME)
19 | 
20 |         self._dir = ""
21 |         self._verbose = 1
22 |         self._hash_algo = "sha256"
23 | 
24 |     def run(self, args):
25 |         """
26 |         run command gen
27 |         """
28 |         self._parse_args(args)
29 | 
30 |         if len(self._dir) == 0:
31 |             print("ERROR! gen without 'dir' param\n")
32 |             print(self._usage_str)
33 |             sys.exit(1)
34 | 
35 |         checker = Checker(verbose=self._verbose, hash_algo=self._hash_algo)
36 |         checker.clean(self._dir)
37 | 
38 |     def _parse_args(self, args):
39 |         """
40 |         parse input arguments
41 |         """
42 |         opts, _ = getopt.getopt(
43 |             args, "hd:v:", ["help", "dir=", "verbose=", "hash="]
44 |         )
45 | 
46 |         for opt, arg in opts:
47 |             if opt in ("-h", "--help"):
48 |                 print(self._usage_str)
49 |                 sys.exit(0)
50 |             elif opt in ("-d", "--dir"):
51 |                 self._dir = arg
52 |             elif opt in ("-v", "--verbose"):
53 |                 self._verbose = int(arg)
54 |             elif opt in ("--hash"):
55 |                 self._hash_algo = arg
56 | 


--------------------------------------------------------------------------------
/bdchecker/command/gen.py:
--------------------------------------------------------------------------------
 1 | import getopt
 2 | import sys
 3 | from bdchecker.checker import Checker
 4 | 
 5 | from bdchecker.utils.const_var import APP_NAME
 6 | 
 7 | 
 8 | class CommandGen:
 9 |     """
10 |     generate meta file
11 |     """
12 | 
13 |     def __init__(self):
14 |         self._usage_str = "Usage: {} gen [OPTIONS]\n" \
15 |             "Options: \n" \
16 |             "  -d, --dir        [REQUIRED] target directory\n" \
17 |             "  -v, --verbose    [OPTIONAL] set verbose level; [0|1]\n" \
18 |             "    , --hash       [OPTIONAL] hash algo; [md5|sha256|sha512]\n" \
19 |             "".format(APP_NAME)
20 | 
21 |         self._dir = ""
22 |         self._verbose = 1
23 |         self._hash_algo = "sha256"
24 | 
25 |     def run(self, args):
26 |         """
27 |         run command gen
28 |         """
29 |         self._parse_args(args)
30 | 
31 |         if len(self._dir) == 0:
32 |             print("ERROR! gen without 'dir' param\n")
33 |             print(self._usage_str)
34 |             sys.exit(1)
35 | 
36 |         checker = Checker(verbose=self._verbose, hash_algo=self._hash_algo)
37 |         checker.gen(self._dir)
38 | 
39 |     def _parse_args(self, args):
40 |         """
41 |         parse input arguments
42 |         """
43 |         opts, _ = getopt.getopt(
44 |             args, "hd:v:", ["help", "dir=", "verbose=", "hash="]
45 |         )
46 | 
47 |         for opt, arg in opts:
48 |             if opt in ("-h", "--help"):
49 |                 print(self._usage_str)
50 |                 sys.exit(0)
51 |             elif opt in ("-d", "--dir"):
52 |                 self._dir = arg
53 |             elif opt in ("-v", "--verbose"):
54 |                 self._verbose = int(arg)
55 |             elif opt in ("--hash"):
56 |                 self._hash_algo = arg
57 | 


--------------------------------------------------------------------------------
/bdchecker/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from bdchecker.__version__ import __version__
 4 | from bdchecker.command.check import CommandCheck
 5 | from bdchecker.command.clean import CommandClean
 6 | from bdchecker.command.gen import CommandGen
 7 | from bdchecker.utils.const_var import APP_NAME
 8 | 
 9 | 
10 | def run_gen():
11 |     """
12 |     run command: gen
13 |     """
14 |     cmd = CommandGen()
15 |     cmd.run(sys.argv[2:])
16 | 
17 | 
18 | def run_clean():
19 |     """
20 |     run command: clean
21 |     """
22 |     cmd = CommandClean()
23 |     cmd.run(sys.argv[2:])
24 | 
25 | 
26 | def run_check():
27 |     """
28 |     run command: check
29 |     """
30 |     cmd = CommandCheck()
31 |     cmd.run(sys.argv[2:])
32 | 
33 | 
34 | def main():
35 |     usage_str = "Usage: {} COMMAND [OPTIONS]\n" \
36 |         "\n" \
37 |         "Commands:\n" \
38 |         "  gen    generate meta file for target path\n" \
39 |         "  clean  remove meta info of missing file\n" \
40 |         "  check  check meta for target path\n" \
41 |         "".format(sys.argv[0])
42 | 
43 |     if len(sys.argv) < 2:
44 |         print(usage_str)
45 |         sys.exit(1)
46 | 
47 |     if sys.argv[1] in ("-h", "--help"):
48 |         print(usage_str)
49 |         sys.exit(0)
50 | 
51 |     if sys.argv[1] in ("-v", "--version"):
52 |         print("{} {}".format(APP_NAME, __version__))
53 |         sys.exit(0)
54 | 
55 |     # commands
56 |     command_dict = {
57 |         "gen": run_gen,
58 |         "clean": run_clean,
59 |         "check": run_check,
60 |     }
61 | 
62 |     command = sys.argv[1]
63 |     func = command_dict.get(command, None)
64 |     if func is None:
65 |         print(usage_str)
66 |         sys.exit(1)
67 | 
68 |     try:
69 |         func()
70 |     except Exception as e:
71 |         print("{}".format(e))
72 |         sys.exit(1)
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     main()
77 | 


--------------------------------------------------------------------------------
/bdchecker/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MuggleWei/bdchecker/6a46335783c76d309af183abd73bbd2526009382/bdchecker/utils/__init__.py


--------------------------------------------------------------------------------
/bdchecker/utils/const_var.py:
--------------------------------------------------------------------------------
1 | APP_NAME = "bdchecker"
2 | 


--------------------------------------------------------------------------------
/bdchecker/utils/log_handle.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import logging.handlers
  3 | import os
  4 | import sys
  5 | 
  6 | 
  7 | class UnixConsoleColorFormatter(logging.Formatter):
  8 |     def __init__(self, formatter):
  9 |         grey = "\x1b[38;20m"
 10 |         yellow = "\x1b[33;20m"
 11 |         red = "\x1b[31;20m"
 12 |         bold_red = "\x1b[31;1m"
 13 |         reset = "\x1b[0m"
 14 | 
 15 |         self.FORMATS = {
 16 |             logging.DEBUG: grey + formatter + reset,
 17 |             logging.INFO: grey + formatter + reset,
 18 |             logging.WARNING: yellow + formatter + reset,
 19 |             logging.ERROR: red + formatter + reset,
 20 |             logging.CRITICAL: bold_red + formatter + reset
 21 |         }
 22 | 
 23 |     def format(self, record):
 24 |         log_fmt = self.FORMATS.get(record.levelno)
 25 |         formatter = logging.Formatter(log_fmt)
 26 |         return formatter.format(record)
 27 | 
 28 | 
 29 | class LogHandle(object):
 30 |     """simple log init"""
 31 | 
 32 |     @staticmethod
 33 |     def init_log(
 34 |             filename,
 35 |             console_level=logging.WARNING,
 36 |             file_level=logging.DEBUG,
 37 |             use_rotate=False,
 38 |             mode="a"):
 39 |         """
 40 |         initialize log
 41 |         :param filename: log output filepath
 42 |         :param console_level: console filter level
 43 |         :param file_level: file filter level
 44 |         :param use_rotate: is use rotate
 45 |         :param mode: open mode
 46 |         :return:
 47 |         """
 48 |         # create output dir
 49 |         folder = os.path.dirname(filename)
 50 |         if len(folder) > 0 and (not os.path.exists(folder)):
 51 |             os.makedirs(folder, exist_ok=True)
 52 | 
 53 |         # log handler
 54 |         logger = logging.getLogger()
 55 |         logger.setLevel(logging.DEBUG)
 56 | 
 57 |         ch = LogHandle.get_console_handler(console_level)
 58 |         ch.setFormatter(LogHandle.get_console_formatter())
 59 |         logger.addHandler(ch)
 60 | 
 61 |         if file_level != -1:
 62 |             if use_rotate is True:
 63 |                 fh = LogHandle.get_rotating_handler(
 64 |                     level=file_level, filename=filename, mode=mode)
 65 |             else:
 66 |                 fh = LogHandle.get_file_handler(
 67 |                     level=file_level, filename=filename, mode=mode)
 68 |             fh.setFormatter(LogHandle.get_formatter())
 69 |             logger.addHandler(fh)
 70 | 
 71 |     @staticmethod
 72 |     def get_formatter():
 73 |         """
 74 |         log format
 75 |         """
 76 |         return logging.Formatter(
 77 |             "%(asctime)s|%(levelname)s|%(filename)s:%(lineno)s - "
 78 |             "%(message)s")
 79 | 
 80 |     @staticmethod
 81 |     def get_console_formatter():
 82 |         """
 83 |         console formatter
 84 |         """
 85 |         if sys.platform.startswith("win32"):
 86 |             return LogHandle.get_formatter()
 87 |         else:
 88 |             return UnixConsoleColorFormatter(
 89 |                 "%(asctime)s|%(levelname)s|%(filename)s:%(lineno)s - "
 90 |                 "%(message)s")
 91 | 
 92 |     @staticmethod
 93 |     def get_console_handler(level):
 94 |         """
 95 |         get console log handler
 96 |         :param level: log filter level
 97 |         :return: log handler
 98 |         """
 99 |         handler = logging.StreamHandler()
100 |         handler.setLevel(level)
101 |         return handler
102 | 
103 |     @staticmethod
104 |     def get_file_handler(level, filename, mode="a"):
105 |         """
106 |         get file log handler
107 |         :param level: log filter level
108 |         :param filename: output filepath
109 |         :param mode: open mode
110 |         :return: log handler
111 |         """
112 |         handler = logging.FileHandler(filename=filename, mode=mode)
113 |         handler.setLevel(level)
114 |         return handler
115 | 
116 |     @staticmethod
117 |     def get_rotating_handler(
118 |             level, filename, mode="a",
119 |             maxBytes=20 * 1024 * 1024, backupCount=10):
120 |         """
121 |         get rotating log handler
122 |         :param level: log filter level
123 |         :param filename: output filepath
124 |         :param mode: open mode
125 |         :param maxBytes: max bytes
126 |         :param backupCount: backup file count
127 |         :return: log handler
128 |         """
129 |         handler = logging.handlers.RotatingFileHandler(
130 |             filename=filename, mode=mode, maxBytes=maxBytes,
131 |             backupCount=backupCount)
132 |         handler.setLevel(level)
133 |         return handler
134 | 
135 |     @staticmethod
136 |     def log_level(str_level: str):
137 |         """
138 |         convert string to log level enum
139 |         :param str_level: log level string
140 |         :return: log level enum
141 |         """
142 |         if str_level.lower() == "debug":
143 |             return logging.DEBUG
144 |         elif str_level.lower() == "info":
145 |             return logging.INFO
146 |         elif str_level.lower() == "warning":
147 |             return logging.WARNING
148 |         elif str_level.lower() == "error":
149 |             return logging.ERROR
150 |         elif str_level.lower() == "fatal":
151 |             return logging.FATAL
152 |         else:
153 |             return logging.INFO
154 | 


--------------------------------------------------------------------------------
/bdchecker/utils/singleton.py:
--------------------------------------------------------------------------------
 1 | def singleton(cls, *args, **kwargs):
 2 |     instances = {}
 3 | 
 4 |     def _singleton(*args, **kwargs):
 5 |         if cls not in instances:
 6 |             instances[cls] = cls(*args, **kwargs)
 7 |         return instances[cls]
 8 | 
 9 |     return _singleton
10 | 


--------------------------------------------------------------------------------
/etc/settings.xml:
--------------------------------------------------------------------------------
1 | <bdchecker>
2 |     <!--
3 |     log config
4 |     :param console_level: console output log filter level
5 |     :param flle level: file output log filter level
6 |     -->
7 |     <log console_level="info" file_level="debug" />
8 | </bdchecker>
9 | 


--------------------------------------------------------------------------------
/pyinstaller_pkg.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | setlocal ENABLEDELAYEDEXPANSION
 4 | 
 5 | set origin_dir=%~dp0
 6 | cd %origin_dir%
 7 | 
 8 | python -m venv venv
 9 | call venv\Scripts\activate.bat
10 | pip install -r requirements-dev.txt
11 | 
12 | pyinstaller -F bdchecker\main.py --distpath dist\bdchecker -n bdchecker
13 | xcopy README.md dist\bdchecker\ /Y
14 | xcopy README_cn.md dist\bdchecker\ /Y
15 | xcopy LICENSE dist\bdchecker\ /Y
16 | 
17 | call venv\Scripts\deactivate.bat


--------------------------------------------------------------------------------
/pyinstaller_pkg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | origin_dir="$(dirname "$(readlink -f "$0")")"
 4 | cd $origin_dir
 5 | 
 6 | if [ -d "venv" ]; then
 7 | 	echo "venv already exists"
 8 | else
 9 | 	echo "create venv"
10 | 	python -m venv venv
11 | fi
12 | 
13 | source venv/bin/activate
14 | 
15 | if [ $? -eq 0 ]; then
16 | 	echo "success source activate"
17 | else
18 | 	echo "failed source activate"
19 | 	exit 1
20 | fi
21 | 
22 | pip install -r requirements-dev.txt
23 | 
24 | pyinstaller -F bdchecker/main.py --distpath dist/bdchecker -n bdchecker
25 | cp ./README.md dist/bdchecker/
26 | cp ./README_cn.md dist/bdchecker/
27 | cp ./LICENSE dist/bdchecker/
28 | 


--------------------------------------------------------------------------------
/pypi_build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | origin_dir="$(dirname "$(readlink -f "$0")")"
 4 | cd $origin_dir
 5 | 
 6 | if [ -d "venv" ]; then
 7 | 	echo "venv already exists"
 8 | else
 9 | 	echo "create venv"
10 | 	python -m venv venv
11 | fi
12 | 
13 | source venv/bin/activate
14 | 
15 | if [ $? -eq 0 ]; then
16 | 	echo "success source activate"
17 | else
18 | 	echo "failed source activate"
19 | 	exit 1
20 | fi
21 | 
22 | pip install -r requirements-dev.txt
23 | 
24 | python -m pip install --upgrade build
25 | python -m build
26 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "bdchecker"
 7 | license = {file = "LICENSE"}
 8 | authors = [
 9 |     {name="Muggle Wei", email="mugglewei@gmail.com"}
10 | ]
11 | description = "Backup Data Checker"
12 | readme = "README.md"
13 | requires-python = ">=3.8"
14 | keywords = ["data cold backup", "data checker"]
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 | ]
20 | 
21 | dynamic = ["dependencies", "version"]
22 | 
23 | [project.scripts]
24 | bdchecker = "bdchecker.main:main"
25 | 
26 | [project.urls]
27 | "Homepage" = "https://github.com/MuggleWei/bdchecker"
28 | "Bug Tracker" = "https://github.com/MuggleWei/bdchecker/issues"
29 | 
30 | [tool.setuptools]
31 | include-package-data = true
32 | packages = ["bdchecker", "bdchecker.command", "bdchecker.utils"]
33 | 
34 | [tool.setuptools.dynamic]
35 | dependencies = {file = ["requirements.txt"]}
36 | version = {attr = "bdchecker.__version__.__version__"}
37 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pyright
2 | pynvim
3 | pyinstaller==6.3.0
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MuggleWei/bdchecker/6a46335783c76d309af183abd73bbd2526009382/requirements.txt


--------------------------------------------------------------------------------