.*?

.*?(.*?).*?(.*?).*?(.*?).*?(.*?)', 94 | re.S), response.text)[0] 95 | 96 | four_list = [] 97 | # 遍历这四组数据 98 | for d in data: 99 | # print(d) # 拿到元组中的一个 𘜸𘜹𘜹𘜶𘜽𘜽 100 | one_list = [] 101 | d = d.split(';') # 去除分号 102 | # 遍历每组数据 103 | for x in d: 104 | res = x.replace('&#', '') 105 | if res: # 判断是否有空格有的话不转化 106 | # 将res十进制转化成16进制 107 | a = int(res) # 先转化成int类型 108 | one_list.append(a) 109 | 110 | four_list.append(one_list) 111 | map_dict = XYYTTFont(font_content).getBestCmap() 112 | # print(map_dict) 113 | result_list = [] 114 | # 遍历含有四组数据的列表 115 | for one in four_list: 116 | two_string = "" 117 | # 遍历每一组数据 118 | for a in one: 119 | # print("a",a) 120 | if a in map_dict: 121 | number = map_dict[a] # 找到对应的键 122 | number = number_dict[number] # 通过键找到对应的值 123 | # print(number) 124 | two_string += number 125 | 126 | result_list.append(two_string) 127 | return result_list 128 | def get_int_num(self, numstr): 129 | if '.' in numstr: 130 | return int(numstr.replace('.','')) * 100 131 | else: 132 | return int(numstr) 133 | # 搜索视频响应解析 134 | def parse_novel_info(self, respose_info, novel_url='', **kwargs) -> dict: 135 | try: 136 | # print(novel_url) 137 | response_data = etree.HTML(respose_info.text) 138 | info_list = self.get_info(respose_info, **kwargs) 139 | except Exception as e: 140 | print(e) 141 | return {} 142 | else: 143 | # info_book_dict = info_dict.get('book', {}) 144 | novel_dict = dict() 145 | novel_dict['all_recommend_str'] = self.get_int_num(info_list[2]) # 总推荐数 str book_interact 146 | novel_dict['month_recommend_str'] = None # 月推荐数 str 147 | novel_dict['week_recommend_str'] = self.get_int_num(info_list[3]) # 周推荐数 str 148 | novel_dict['all_read_int'] = None # 总阅读数 int 149 | novel_dict['month_read_int'] = None # 月阅读数 int 150 | novel_dict['week_read_int'] = None # 周阅读数 int 151 | novel_dict['all_words_number_int'] = self.get_int_num(info_list[0]) # 总字数 152 | novel_dict['book_status_str'] = response_data.xpath('//p[@class="tag"]/span/text()')[0] # 书籍状态（连载，完结，暂无）bookCP 153 | novel_dict['book_property_str'] = response_data.xpath('//p[@class="tag"]/span/text()')[1] # 书籍属性（免费，会员，限免） 154 | novel_dict['author_type_str'] = "".join(response_data.xpath('//div[@class="author-photo"]/span/text()')) # 作者类型（金牌，签约，独立默认无） 155 | novel_dict['book_type_str'] = '|'.join(response_data.xpath('//p[@class="tag"]/a/text()')) # 书籍分类（玄幻 ,科幻，言情...）按搜索结果来多个按｜分割 156 | novel_dict['book_update_time'] = ''.join(response_data.xpath('//li[@class="update"]/div/p[@class="cf"]/em/text()')) # 书籍更新日期年-月-日 157 | novel_dict['book_zong_zhang_jie_int'] = '' # 书籍总的章节完结的，未完结就填目前的总章节 158 | novel_dict['book_zui_xin_zhang_jie_name_str'] = ''.join(response_data.xpath('//li[@class="update"]/div/p[@class="cf"]/a/text()')) # 最新章节名称 159 | novel_dict['book_introduce_text'] = ''.join(response_data.xpath('//div[@class="book-intro"]/p//text()')).replace(' ', '').replace('\u3000', '').replace('\r', '').replace('\n', '').replace('\t', '') # 书籍简介 text 160 | novel_dict['book_lable_str'] = '|'.join(response_data.xpath('//p[@class="tag"]/a/text()')) # 书籍标签（用｜分割的字符串 ''科幻｜现实｜励志''） 161 | novel_dict['book_cover_image_str'] = "https:" + "".join(response_data.xpath('//div[@class="book-information cf"]/div[@class="book-img"]/a/img/@src')).replace('\n', '') # 书籍封面 URL 162 | novel_dict['book_detail_url_str'] = novel_url # 书籍详情URL 163 | novel_dict['book_detail_id_int'] = None # 书籍详情ID 数字形式 164 | novel_dict['book_detail_id_str'] = None # 书籍详情ID 字符形式 165 | novel_dict['book_zhan_dian_str'] = None # 书籍站点（男生，女生，暂无） 166 | novel_dict['book_publish_str'] = '起点中文网' # 出版社默认侵权平台' 167 | novel_dict['book_commeds_int'] = None # 书籍评论数 Pinglunfont 168 | novel_dict['author_grade_float'] = None # 作者评分 169 | novel_dict['author_id_str'] = None # 作者ID 字符形式 ## 新增 170 | novel_dict['author_page_url_str'] = "https:" + ''.join(response_data.xpath('//a[@class="writer"]/@href')) # 作者主页链接 userId 171 | author_info_data = response_data.xpath('//ul[@class="work-state cf"]/li/em/text()') 172 | novel_dict['author_book_number_int'] = author_info_data[0] # 作者书籍总数 173 | novel_dict['author_likes_int'] = None # 作者获赞总数 174 | novel_dict['author_all_words_number_str'] = author_info_data[1] # 作者累计创作字数 175 | novel_dict['author_produce_days_str'] = author_info_data[2] # 作者累计创作天数 176 | novel_dict['author_fens_number_int'] = None # 作者粉丝数 177 | novel_dict['author_head_image_url_str'] = "https:" + "".join(response_data.xpath('//div[@class="author-photo"]/a/img/@src')) # 作者头像URL 178 | # novel_dict[''] = '' # 179 | return novel_dict 180 | 181 | 182 | # 统一的调用 search_novels 183 | search_novel_info = SFQingNovel(use_proxy=True).get_novel_info 184 | if __name__ == "__main__": 185 | result = search_novel_info('https://book.qidian.com/info/1010734492') 186 | print(result) 187 | -------------------------------------------------------------------------------- /起点中文网详情字体加密破解/字体文件解析.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 享受雷霆感受雨露 3 | # author xyy,time:2020/9/29 4 | from fontTools.ttLib import TTFont 5 | import requests 6 | # 统一请求函数 7 | def unify_requests(method="GET",url="",headers={},proxies={},data={},verify=False,cookies={}): 8 | if method=="GET": 9 | response = requests.get(url, headers=headers,proxies=proxies,data=data,cookies=cookies,timeout=5) 10 | return response 11 | else: 12 | response = requests.post(url, headers=headers,proxies=proxies,data=data,verify=verify,cookies=cookies,timeout=5) 13 | return response 14 | 15 | 16 | def zhuan_xml(): 17 | 18 | # 加载字体文件： 19 | font = TTFont('/Users/quanlifang/Desktop/my_git_file/Python_Spider_All/起点中文网详情字体加密破解/FkMwMtuL.ttf') 20 | 21 | # 转为xml文件： 22 | font.saveXML('/Users/quanlifang/Desktop/my_git_file/Python_Spider_All/起点中文网详情字体加密破解/FkMwMtuL.xml') 23 | def get_font_yingse(font_url): 24 | response = unify_requests(url=font_url) 25 | content = response.content 26 | # world = TTFont('/Users/quanlifang/Desktop/my_git_file/Python_Spider_All/起点中文网详情字体加密破解/wwOMhmLd.ttf') 27 | world = TTFont('/Users/quanlifang/Desktop/my_git_file/Python_Spider_All/起点中文网详情字体加密破解/wwOMhmLd.ttf') 28 | # 读取响应的映射关系 29 | # uni_list = world['cmap'].tables[0].ttFont.getGlyphOrder() # 'cmap' 表示汉字对应的映射为unicode编码 30 | # print(uni_list) # 按顺序拿到各个字符的unicode编码 31 | # print(world.getGlyphOrder()) 32 | # print(world.getBestCmap()) # 获得对应的字符对应的值 33 | # {100097: 'zero', 100099: 'nine', 100100: 'eight', 100101: 'five', 100102: 'seven', 100103: 'two', 100104: 'one', 100105: 'four', 100106: 'period', 100107: 'six', 100108: 'three'} 34 | # info = world.getGlyphOrder() 35 | # dic_info = {} 36 | # for each in info: 37 | # # print(world['glyf'][each].coordinates) 38 | # # print(list(world['glyf'][each].coordinates)) # 获得一个个的元组判断比较就好了 39 | # dic_info[each] = list(world['glyf'][each].coordinates) 40 | # print(dic_info) 41 | # {} 字典里面对应的值对应的 list 42 | # print(dir(world)) # 按顺序拿到各个字符的unicode编码 43 | # print(dir(world["cmap"].tableTag)) # 按顺序拿到各个字符的unicode编码 44 | # print(dir(world["glyf"])) # 按顺序拿到各个字符的unicode编码 45 | # exit(0) 46 | # print(world['glyf']['one'].coordinates) 47 | # print(world['glyf']['one']) 48 | # print(dir(world['glyf']['one'])) 49 | # print(world.getGlyphOrder()) 50 | # print(world.tables) 51 | # print(dir(world)) 52 | # unicode_list= [eval(r"u'\u" + uni[3:] + "'") for uni in uni_list[2:]] 53 | # unicode_list= [uni.encode('utf-8').decode('unicode-escape') for uni in unicode_list] 54 | # print('unicode_list = ', unicode_list) 55 | # 56 | # font = TTFont('898a472b.woff') # 打开文件 57 | # font.saveXML('898a472b.xml') # 保存为xml文件 58 | # 59 | # # 解析xml文件 60 | # from xml.etree import ElementTree as ET 61 | # 62 | # tree = ET.parse('898a472b.xml') 63 | # root = tree.getroot() # 一个Element对象 64 | # childs = root.getchildren() 65 | # 66 | # for c in childs: 67 | # for cc in c.getchildren(): 68 | # datas = {} 69 | # datas[cc.tag] = cc.attrib 70 | # print(datas, '\n\n\n') 71 | return world.getBestCmap() 72 | if __name__ == '__main__': 73 | pass 74 | # tet() 75 | # zhuan_xml() 76 | 77 | """:cvar 78 | 79 | 映射字典 80 | {'.notdef': [(256, 0), (256, 1280), (1280, 1280), (1280, 0), (288, 32), (1248, 32), (1248, 1248), (288, 1248)], 'period': [(186, 0), (186, 205), (391, 205), (391, 0)], 'zero': [(85, 723), (85, 983), (192, 1300), (403, 1472), (563, 1472), (681, 1472), (859, 1377), (975, 1198), (1041, 941), (1041, 723), (1041, 465), (935, 148), (724, -25), (563, -25), (351, -25), (230, 127), (85, 310), (270, 723), (270, 362), (439, 123), (563, 123), (687, 123), (856, 363), (856, 723), (856, 1085), (687, 1323), (561, 1323), (437, 1323), (363, 1218), (270, 1084)], 'one': [(763, 0), (583, 0), (583, 1147), (518, 1085), (307, 961), (223, 930), (223, 1104), (374, 1175), (600, 1377), (647, 1472), (763, 1472)], 'two': [(1031, 173), (1031, 0), (62, 0), (60, 65), (83, 125), (120, 224), (283, 416), (437, 542), (676, 738), (844, 967), (844, 1069), (844, 1176), (691, 1323), (568, 1323), (438, 1323), (282, 1167), (281, 1029), (96, 1048), (115, 1255), (363, 1472), (572, 1472), (783, 1472), (1029, 1238), (1029, 1065), (1029, 977), (957, 807), (790, 619), (596, 455), (434, 319), (342, 222), (312, 173)], 'three': [(86, 387), (266, 411), (297, 258), (446, 123), (553, 123), (680, 123), (855, 299), (855, 429), (855, 553), (693, 714), (568, 714), (517, 714), (441, 694), (461, 852), (479, 850), (490, 850), (605, 850), (789, 970), (789, 1095), (789, 1194), (655, 1324), (549, 1324), (444, 1324), (304, 1192), (284, 1060), (104, 1092), (137, 1273), (371, 1472), (545, 1472), (665, 1472), (867, 1369), (974, 1191), (974, 1091), (974, 996), (872, 840), (772, 794), (902, 764), (1046, 575), (1046, 433), (1046, 241), (766, -26), (552, -26), (359, -26), (104, 204)], 'four': [(662, 0), (662, 351), (26, 351), (26, 516), (695, 1466), (842, 1466), (842, 516), (1040, 516), (1040, 351), (842, 351), (842, 0), (662, 516), (662, 1177), (203, 516)], 'five': [(85, 384), (274, 400), (295, 262), (448, 123), (556, 123), (686, 123), (866, 319), (866, 481), (866, 635), (693, 813), (553, 813), (466, 813), (326, 734), (286, 671), (117, 693), (259, 1446), (988, 1446), (988, 1274), (403, 1274), (324, 880), (456, 972), (601, 972), (793, 972), (1057, 706), (1057, 497), (1057, 298), (941, 153), (800, -25), (556, -25), (356, -25), (103, 199)], 'six': [(1019, 1107), (840, 1093), (816, 1199), (772, 1247), (699, 1324), (592, 1324), (506, 1324), (441, 1276), (356, 1214), (258, 976), (256, 756), (321, 855), (509, 951), (612, 951), (792, 951), (1045, 686), (1045, 476), (1045, 338), (926, 101), (718, -25), (586, -25), (361, -25), (77, 306), (77, 686), (77, 1111), (234, 1304), (371, 1472), (603, 1472), (776, 1472), (997, 1278), (284, 475), (284, 382), (363, 212), (505, 123), (583, 123), (697, 123), (861, 307), (861, 465), (861, 617), (699, 792), (576, 792), (454, 792), (284, 617)], 'seven': [(97, 1274), (97, 1447), (1046, 1447), (1046, 1307), (906, 1158), (631, 664), (556, 403), (502, 219), (487, 0), (302, 0), (305, 173), (435, 663), (678, 1118), (815, 1274)], 'eight': [(362, 795), (250, 836), (142, 988), (142, 1094), (142, 1254), (372, 1472), (563, 1472), (755, 1472), (989, 1249), (989, 1089), (989, 987), (882, 836), (773, 795), (908, 751), (1049, 555), (1049, 419), (1049, 231), (783, -25), (566, -25), (349, -25), (83, 232), (83, 424), (83, 567), (228, 760), (326, 1100), (326, 996), (460, 864), (567, 864), (671, 864), (804, 995), (804, 1090), (804, 1189), (667, 1324), (565, 1324), (462, 1324), (326, 1192), (268, 423), (268, 346), (341, 202), (485, 123), (568, 123), (697, 123), (865, 289), (865, 417), (865, 547), (692, 717), (562, 717), (435, 717), (268, 549)], 'nine': [(112, 339), (285, 355), (307, 233), (431, 123), (528, 123), (611, 123), (736, 199), (816, 326), (870, 542), (870, 654), (870, 666), (869, 690), (815, 604), (628, 497), (519, 497), (337, 497), (85, 761), (85, 977), (85, 1200), (348, 1472), (546, 1472), (689, 1472), (926, 1318), (1049, 1033), (1049, 763), (1049, 482), (927, 149), (686, -25), (524, -25), (352, -25), (134, 166), (849, 986), (849, 1141), (684, 1323), (568, 1323), (448, 1323), (270, 1127), (270, 971), (270, 831), (439, 656), (563, 656), (688, 656), (849, 831)]} 81 | 82 | 83 | 84 | """ 85 | 86 | -------------------------------------------------------------------------------- /起点中文网详情字体加密破解/字体解密记录: -------------------------------------------------------------------------------- 1 | # 使用手册 2 | 执行 qidian_novel_info_spider.py 3 | 更换url 即可 4 | 5 | wwOMhmLd 6 | ['.notdef', 'period', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine'] 7 | GlyphCoordinates([(763, 0),(583, 0),(583, 1147),(518, 1085),(307, 961),(223, 930),(223, 1104),(374, 1175),(600, 1377),(647, 1472),(763, 1472)]) 8 | 9 | FkMwMtuL 10 | ['.notdef', 'period', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine'] 11 | GlyphCoordinates([(763, 0),(583, 0),(583, 1147),(518, 1085),(307, 961),(223, 930),(223, 1104),(374, 1175),(600, 1377),(647, 1472),(763, 1472)]) 12 | 13 | 两次结果发现没有变化，但是以防万一，还是进行一个校验增加一个值的误差值 14 | 15 | 16 | 第二次升级， 17 | 每一次都是下载文件直接比对，这样多进程的时候是会容易出现问题的，同一个文件多次改动 18 | 19 | 升级 ttfont 20 | 改源代码 21 | 新增 XYYTTFont 函数（改写了一些原函数 TTFont）之前是file 地址，现在直接传入文件的 content 流 22 | 23 | 24 | 字体加密解决思路网上一堆，就是字体文件的加密解密 25 | 两个点: 26 | world = TTFont('wwOMhmLd.ttf') 27 | # print(world.getGlyphOrder()) # 字典的value 字体文件有哪些值对应的值 28 | ['.notdef', 'period', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine'] 29 | 30 | # print(world.getBestCmap()) # 获得对应的字符对应的值 31 | {100181: 'two', 100183: 'zero', 100184: 'three', 100185: 'eight', 100186: 'seven', 100187: 'six', 100188: 'period', 100189: 'four', 100190: 'five', 100191: 'nine', 100192: 'one'} 32 | 33 | 34 | 35 | --------------------------------------------------------------------------------