├── .gitignore ├── README.md ├── libs.txt ├── merge.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | Book*/ 2 | venv/ 3 | Book* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visualize-ML-Auto-Merge 2 | ## 鸢尾花书开源项目自动合并工具 3 | 鸢尾花书开源项目地址: https://github.com/Visualize-ML 4 | ## 已实现功能: 5 | 1. 项目中各repo的分章节PDF文件按repo名合并成单一PDF, 方便阅读 6 | 2. 项目中各repo的嵌套文件路径代码文件合并至repo名文件夹下, 减少文件嵌套层级 7 | 3. 遍历项目中各repo依赖包, 写入libs.txt, 方便一次性导入所有依赖包 8 | 9 | ## 使用方法: 10 | ![image](https://user-images.githubusercontent.com/24363184/236681866-4d241ae0-4557-4b58-9279-bd3928b9deec.png) 11 | 1. 安装依赖包: pip install -r requirements.txt 12 | 2. clone 鸢尾花书项目 repo 至本项目文件夹中 13 | 3. 运行merge.py, 项目代码及PDF归至MergedBooks文件夹下. 14 | 4. 刷新安装缺失依赖包: pip install -r libs.txt 15 | 16 | ## 说明: 17 | 1. 花书部分repo还在更新中, merge前建议pull 花书项目repo, 获取最新PDF 18 | 2. 建议用libs.txt文件进行依赖包安装 19 | 20 | ## MergedBooks文件夹内包含PDF和repo同名文件夹 21 | ![image](https://user-images.githubusercontent.com/24363184/236681923-dbedcd97-e401-4d45-ab2e-40526d48297a.png) 22 | ## repo同名文件夹包含该repo所有代码文件 23 | ![image](https://user-images.githubusercontent.com/24363184/236681934-3cd2ed37-e854-4199-9ea2-e360e157f616.png) 24 | -------------------------------------------------------------------------------- /libs.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | colorspacious 3 | pymc3 4 | plotly 5 | yellowbrick 6 | pandas_datareader 7 | scikit-image 8 | seaborn 9 | scipy 10 | matplotlib 11 | statsmodels 12 | sympy 13 | streamlit 14 | sklearn 15 | numpy 16 | joypy 17 | mpmath 18 | missingno 19 | -------------------------------------------------------------------------------- /merge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import shutil 4 | import sys 5 | import platform 6 | 7 | import git 8 | 9 | os_name = platform.system().lower() 10 | 11 | from PyPDF2 import PdfMerger 12 | 13 | def merge_dir_pdfs(s_path, f_name, t_path='./MergedBooks/'): 14 | fl = [f for f in os.listdir(s_path) 15 | if f.endswith('.pdf') 16 | and not f.endswith('整体布局.pdf')] 17 | fl = [os.path.join(s_path, fname) for fname in fl] 18 | fmerger = PdfMerger() 19 | for f in fl: 20 | fmerger.append(f) 21 | if not os.path.exists(t_path): 22 | os.makedirs(t_path) 23 | fmerger.write(t_path+f_name+'.pdf') 24 | 25 | def merge_dir_codes(s_path, f_name, t_path='MergedBooks'): 26 | cwd = os.getcwd() 27 | fl = [f for f in os.listdir(s_path) 28 | if not f.endswith('.pdf') 29 | and not f.endswith('.md') 30 | and not f.endswith('.git')] 31 | fl = [os.path.join(s_path, fname) for fname in fl] 32 | if os_name == 'windows': 33 | t_root_dir = f'{cwd}\\{t_path}\\{f_name}\\' 34 | else: 35 | t_root_dir = f'{cwd}/{t_path}/{f_name}/' 36 | if os.path.exists(t_root_dir): 37 | shutil.rmtree(t_root_dir) 38 | if not os.path.exists(t_root_dir): 39 | os.makedirs(t_root_dir) 40 | for d in fl: 41 | for f in os.listdir(d): 42 | if os_name == 'windows': 43 | nf = d+'\\'+f 44 | else: 45 | nf = d+'/'+f 46 | shutil.copy(nf, t_root_dir+f) 47 | 48 | def get_lib_list(s_path, f_name): 49 | fl = [f for f in os.listdir(s_path) 50 | if not f.endswith('.pdf') 51 | and not f.endswith('.md') 52 | and not f.endswith('.git')] 53 | fl = [os.path.join(s_path, fname) for fname in fl] 54 | libs = [] 55 | for d in fl: 56 | for f in os.listdir(d): 57 | if os_name == 'windows': 58 | nf = d+'\\'+f 59 | else: 60 | nf = d+'/'+f 61 | if f.endswith('ipynb'): 62 | nlibs = get_ipynb_lib_list(nf) 63 | elif f.endswith('py'): 64 | nlibs = get_py_lib_list(nf) 65 | else: 66 | continue 67 | if nlibs: 68 | libs.extend(nlibs) 69 | return libs 70 | 71 | def write_libs_txt(libs): 72 | with open('libs.txt', 'w', encoding='utf8') as f: 73 | fstr = '' 74 | for l in set(libs): 75 | if l in ('random', 'mpl_toolkits', 'pylab', 'calendar', 'os', 76 | 'colorsys', 'copy'): 77 | continue 78 | if l == 'skimage': 79 | l = 'scikit-image' 80 | fstr += f'{l}\n' 81 | f.write(fstr) 82 | 83 | def get_ipynb_lib_list(fname): 84 | libs = [] 85 | with open(fname, encoding='utf8') as f: 86 | cells = json.loads(f.read())['cells'] 87 | for cell in cells: 88 | t = cell['cell_type'] 89 | lines = cell['source'] 90 | if t != "code": 91 | continue 92 | for line in lines: 93 | if line.startswith('from'): 94 | lib = line.split(' ')[1] 95 | lib = lib.split('.')[0].strip() 96 | if lib not in sys.builtin_module_names: 97 | libs.append(lib) 98 | if line.startswith('import'): 99 | lib = line.split(' ')[1] 100 | lib = lib.split('.')[0].strip() 101 | if lib not in sys.builtin_module_names: 102 | libs.append(lib) 103 | return libs 104 | 105 | def get_py_lib_list(fname): 106 | libs = [] 107 | with open(fname, encoding='utf8') as f: 108 | for line in f.readlines(): 109 | line = line.strip() 110 | if line.startswith('from'): 111 | lib = line.split(' ')[1] 112 | lib = lib.split('.')[0].strip() 113 | if lib not in sys.builtin_module_names: 114 | libs.append(lib) 115 | if line.startswith('import'): 116 | lib = line.split(' ')[1] 117 | lib = lib.split('.')[0].strip() 118 | if lib not in sys.builtin_module_names: 119 | libs.append(lib) 120 | return libs 121 | 122 | def get_dir_list(): 123 | cwd = os.getcwd() 124 | fl = [f for f in os.listdir('./') if f.startswith('Book')] 125 | if os_name == 'windows': 126 | fl = [[f, cwd + '\\' + f] for f in fl] 127 | else: 128 | fl = [[f, cwd + '/' + f] for f in fl] 129 | return fl 130 | 131 | def pull_repo_before_merge(fname): 132 | repo = git.Repo(fname) 133 | repo.remotes.origin.pull() 134 | print(f'Pull {fname} success!') 135 | 136 | def merge_dirs(dir_list): 137 | libs = [] 138 | for fname, fpath in dir_list: 139 | # pull_repo_before_merge(fname) 140 | merge_dir_codes(fpath, fname) 141 | merge_dir_pdfs(fpath, fname) 142 | nlibs = get_lib_list(fpath, fname) 143 | libs.extend(nlibs) 144 | print(f'成功合并{fname}!') 145 | write_libs_txt(libs) 146 | 147 | if __name__ == '__main__': 148 | dir_list = get_dir_list() 149 | merge_dirs(dir_list) 150 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/8solo/Visualize-ML-Auto-Merge/b824802f430451cd1bec2d5ab51250a88664cc98/requirements.txt --------------------------------------------------------------------------------