├── python ├── pics │ ├── 1 │ ├── QQ截图20241228145652.jpg │ └── QQ截图20241228145737.jpg ├── dist │ └── 1 ├── gbutil │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── htmlutil.cpython-311.pyc │ │ └── imageutil.cpython-311.pyc │ ├── htmlutil.py │ └── imageutil.py └── get_gb_file.py ├── 国标下载.user.js └── README.md /python/pics/1: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /python/dist/1: -------------------------------------------------------------------------------- 1 | ceshi 2 | -------------------------------------------------------------------------------- /python/gbutil/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/pics/QQ截图20241228145652.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/pics/QQ截图20241228145652.jpg -------------------------------------------------------------------------------- /python/pics/QQ截图20241228145737.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/pics/QQ截图20241228145737.jpg -------------------------------------------------------------------------------- /python/gbutil/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/gbutil/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /python/gbutil/__pycache__/htmlutil.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/gbutil/__pycache__/htmlutil.cpython-311.pyc -------------------------------------------------------------------------------- /python/gbutil/__pycache__/imageutil.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/gbutil/__pycache__/imageutil.cpython-311.pyc -------------------------------------------------------------------------------- /python/gbutil/htmlutil.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | 3 | import requests 4 | from bs4 import BeautifulSoup 5 | import string 6 | import urllib.request 7 | 8 | 9 | def get_bgs(page_divs): 10 | """ 11 | 解析class=page的div,返回div的id和bg对应关系 12 | :param page_divs: 13 | :return: 14 | """ 15 | bgs = [] 16 | for page_div in page_divs: 17 | page_id = page_div.xpath('./@id')[0] 18 | page_bg = page_div.xpath('./@bg')[0] 19 | bgs.append({'id': page_id, 'bg': page_bg}) 20 | return bgs 21 | 22 | 23 | def get_img_urls(bgs: list, jsessionid: string, common_headers): 24 | img_urls = [] 25 | count = 0 26 | for bg in bgs: 27 | count += 1 28 | print(f'\r解析进度:%.2f' % (count/len(bgs) * 100) + '%', end="") 29 | bg_str = bg['bg'] 30 | url_img = 'http://c.gb688.cn/bzgk/gb/viewGbImg?fileName=' + bg_str 31 | req_img = urllib.request.Request(url_img, headers=common_headers) 32 | req_img.add_header('Cookie', 'JSESSIONID=' + jsessionid) 33 | with urllib.request.urlopen(req_img) as res_img: 34 | if res_img.status == 200: 35 | img_urls.append({'id': bg['id'], 'bg': bg['bg'], 'url': res_img.url}) 36 | return img_urls 37 | 38 | 39 | def download_img(img_url: string, jsessionid: string, common_headers, img_name): 40 | print('开始下载: ' + img_name) 41 | req_img_redirect = urllib.request.Request(img_url, headers=common_headers) 42 | req_img_redirect.add_header('Cookie', 'JSESSIONID=' + jsessionid) 43 | req_img_redirect.add_header('Cache-Alive', 'chunked') 44 | img_response = urllib.request.urlopen(req_img_redirect) 45 | 46 | page_img = img_response.read() 47 | 48 | with open(img_name, 'w+b') as pageImgFile: 49 | pageImgFile.write(page_img) 50 | pageImgFile.close() 51 | sleep(2) 52 | print('下载完成: ' + img_name) 53 | 54 | return 0 55 | 56 | 57 | def get_title(full_html): 58 | """ 59 | 从html文件内容获取title 60 | :param full_html: 61 | :return: 62 | """ 63 | return full_html.xpath('//title/text()')[0] 64 | 65 | 66 | def get_hcno(preview_url): 67 | """ 68 | 从在线预览url中提取hcno 69 | :param preview_url: 70 | :return: 71 | """ 72 | return preview_url.split('hcno=')[1] 73 | 74 | 75 | def get_image_name(image_url): 76 | """ 77 | 从图片url地址中获取图片文件名 78 | :param image_url: 79 | :return: 80 | """ 81 | return image_url.split('/')[-1] 82 | 83 | 84 | def get_gb_code_from_image_url(image_url: string): 85 | """ 86 | 从图片url地址中获取国标编码 87 | :param image_url: 88 | :return: 89 | """ 90 | return image_url.split('/')[-2].replace('/', '_') 91 | 92 | 93 | def get_gb_code_from_title(gb_title: string): 94 | """ 95 | 从html页面title中获取国标编码 96 | :param gb_title: 97 | :return: 98 | """ 99 | return gb_title.split('|')[1].replace(' ', '').replace('/', '_') 100 | -------------------------------------------------------------------------------- /python/get_gb_file.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import urllib.request 3 | import urllib.parse 4 | from datetime import datetime as dt 5 | import threading 6 | from gbutil import imageutil, htmlutil 7 | from lxml import html 8 | 9 | 10 | # 定义一些常量 11 | # 在线预览链接 12 | # url_preview = "http://c.gb688.cn/bzgk/gb/showGb?type=online&hcno=5ED2A10D48EE5AFF5D7C04F2683767CC" 13 | url_preview = "http://c.gb688.cn/bzgk/gb/showGb?type=online&hcno=2544D73CA09ACBA031ACCF546FFF871B" 14 | url_preview = "" 15 | # 接收用户输入的在线预览url 16 | if url_preview == "": 17 | url_preview = input('请粘贴标准在线预览界面的url:') 18 | print(url_preview) 19 | 20 | 21 | # 验证码url 22 | url_code = "http://c.gb688.cn/bzgk/gb/gc?_" + str(round(dt.timestamp(dt.now())*100)) 23 | # 验证码验证url post 24 | url_vc = "http://c.gb688.cn/bzgk/gb/verifyCode" 25 | # User Agent 26 | UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' 27 | # 共同请求头 28 | common_headers = { 29 | 'Connection': 'keep-alive', 30 | 'Host': 'c.gb688.cn', 31 | 'User-Agent': UA, 32 | 'Referer': url_preview 33 | } 34 | 35 | # 程序当前目录 36 | cwd = os.getcwd() 37 | # pdf image暂存目录,和程序同级。该目录下建立以hcno为名的文件夹,存放该国标的pdfimage 38 | tmp_dir = cwd + '\\' + 'tmp' 39 | # 输出文件目录,和程序同级。该目录下建一个目录,以hcno命名,再建两个目录,images和pdf分别存放图片和pdf 40 | output_dir = cwd + '\\' + 'output' 41 | 42 | print('当前目录:' + cwd) 43 | 44 | 45 | # 创建所需目录 46 | imageutil.create_dir(tmp_dir) 47 | imageutil.create_dir(output_dir) 48 | 49 | 50 | def show_img_thread(): 51 | imageutil.show_image('code.jpg') 52 | 53 | 54 | # 请求在线预览url,获取cookie 55 | req = urllib.request.Request(url_preview, headers=common_headers) 56 | req.remove_header('Referer') 57 | req.add_header('Referer', 'https://openstd.samr.gov.cn/') 58 | with urllib.request.urlopen(req) as f: 59 | setcookie = f.getheader('Set-Cookie') 60 | jsessionid = setcookie.split(";")[0].split("=")[1] 61 | # print(jsessionid) 62 | 63 | # 带cookie请求验证码url,获取验证码图片 64 | req_code = urllib.request.Request(url_code, headers=common_headers) 65 | req_code.add_header('Cookie', 'JSESSIONID=' + jsessionid) 66 | res2 = urllib.request.urlopen(req_code) 67 | img = res2.read() 68 | 69 | # 存储验证码 70 | with open('code.jpg', 'w+b') as f2: 71 | f2.write(img) 72 | f2.close() 73 | 74 | # 开启显示验证码线程 75 | t_show_img = threading.Thread(target=show_img_thread) 76 | # 将子线程的daemon属性设置为True,这样,当主线程结束时,子线程也会随之结束 77 | t_show_img.daemon = True 78 | t_show_img.start() 79 | 80 | 81 | # 读取用户输入 82 | print('验证码已存储在' + cwd + '\\code.jpg') 83 | print('弹出的验证码窗口被关闭后,忘记验证码可打开该code.jpg文件查看') 84 | vcode = input("请输入验证码:") 85 | 86 | # 输入验证码后,请求验证码验证url,获取在线预览页面html内容 87 | data = urllib.parse.urlencode({'verifyCode': vcode}) # post请求需要传的数据 88 | data = data.encode('ascii') 89 | # 请求验证 90 | req_verify = urllib.request.Request(url_vc, headers=common_headers) 91 | req_verify.add_header('Cookie', 'JSESSIONID=' + jsessionid) 92 | req_verify.add_header('Origin', 'http://c.gb688.cn') 93 | 94 | with urllib.request.urlopen(req_verify, data) as f3: 95 | verify_result = f3.read().decode('utf-8') 96 | if verify_result == 'success': 97 | print('验证码正确') 98 | else: 99 | print('验证码错误, 将退出程序') 100 | exit(1) 101 | 102 | if f3.status == 200: 103 | print('请求页面...') 104 | req4 = urllib.request.Request(url_preview, headers=common_headers) 105 | req4.add_header('Cookie', 'JSESSIONID='+jsessionid) 106 | r4 = urllib.request.urlopen(req4) 107 | page_byte_content = r4.read() 108 | # 在线预览页面全部html内容 109 | page_str_content = page_byte_content.decode('utf-8') 110 | # print(page_str_content) 111 | 112 | 113 | # 解析html内容,获取页面图片url 114 | print('请求页面完成,开始解析页面...') 115 | parsed_html = html.fromstring(page_str_content) 116 | 117 | # 所有pdf页面div, 即class为page的div,是一个列表 118 | page_divs = parsed_html.xpath('//div[@class="page"]') 119 | # 获取所有页面所需的bg 120 | bgs = htmlutil.get_bgs(page_divs) 121 | 122 | # 请求页面图片url,下载图片(先下载图片,后面再统一拼接) 123 | # 获取所有pdf页面图片url 124 | img_urls = htmlutil.get_img_urls(bgs, jsessionid, common_headers) 125 | print("\n解析完成,开始下载页面图片") 126 | 127 | # 创建文件夹 128 | title = htmlutil.get_title(parsed_html) 129 | gb_code = htmlutil.get_gb_code_from_title(title) 130 | tmp_img_path = tmp_dir + '\\' + gb_code 131 | output_pdf_path = output_dir + '\\' + gb_code + '\\pdf' 132 | output_img_path = output_dir + '\\' + gb_code + '\\images' 133 | 134 | imageutil.create_dir(tmp_img_path) 135 | imageutil.create_dir(output_pdf_path) 136 | imageutil.create_dir(output_img_path) 137 | 138 | with open(output_dir + '\\' + gb_code + '\\info.txt', 't+w') as gbinfo: 139 | gbinfo.write(f'国标编码:{gb_code}\n') 140 | gbinfo.write(f'国标在线预览地址:{url_preview}\n') 141 | gbinfo.close() 142 | 143 | # 下载图片 144 | for url_map in img_urls: 145 | url = url_map['url'] 146 | image_name = htmlutil.get_image_name(url) 147 | if not os.path.exists(tmp_img_path + '\\' + image_name): 148 | htmlutil.download_img(url, jsessionid, common_headers, tmp_img_path + '\\' + image_name) 149 | else: 150 | print('图片已存在:' + tmp_img_path + '\\' + image_name) 151 | 152 | 153 | # 拼接图片生成各页面image 154 | page_no = 1 155 | for page_div in page_divs: 156 | print(f'\r拼接第 ' + str(page_no) + ' 页, 共' + str(len(img_urls)) + '页', end="") 157 | page_no += 1 158 | imageutil.merge_image(page_div, img_urls, tmp_img_path, output_img_path) 159 | 160 | print("\n") 161 | print('拼接图片完成,存储于:' + output_img_path) 162 | print('tmp目录内是下载的临时文件,可删除') 163 | 164 | # pause = input("\n\n按回车退出程序") 165 | # 各页面生成pdf文件 166 | imageutil.images2pdf(output_img_path, output_pdf_path, gb_code) 167 | 168 | pause = input("\n\n按回车退出程序") 169 | -------------------------------------------------------------------------------- /python/gbutil/imageutil.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os.path 3 | import re 4 | import string 5 | import tkinter as tk 6 | from PIL import Image, ImageTk 7 | from spire.pdf.common import * 8 | from spire.pdf import * 9 | from gbutil import htmlutil 10 | import os 11 | 12 | 13 | # 简易版——图片转换为pdf,pdf页面随图片大小浮动 14 | def images2pdf(image_file_path, output_path, gb_code): 15 | os.chdir(image_file_path) 16 | images = [] 17 | file_lis = os.listdir(image_file_path) 18 | output_path_pdf = f"{output_path}/{gb_code}.pdf" 19 | con = 0 20 | for image_path in file_lis: 21 | if image_path.endswith(('.jpg', '.png')): 22 | image = Image.open(image_path) 23 | # 缩小图片尺寸以减小导出文件大小 24 | images.append(image.convert("RGB").resize((int(image.width * 0.6), int(image.height * 0.6)))) 25 | con += 1 26 | print(f'\r转换为pdf,进度:%.2f' % (con/len(file_lis) * 100) + '%', end='') 27 | # print(image_path + ':第%d张' % con) 28 | images[0].save(output_path_pdf, save_all=True, append_images=images[1:], resolution=168) 29 | print(f'\n转换pdf完成,存放于:{output_path}') 30 | 31 | 32 | def images2pdf2(folder_path, output_path, gb_code): 33 | # spire.pdf试用版有水印,有页数限制,只能添加10页 34 | # 创建一个PdfDocument类的对象 35 | pdf = PdfDocument() 36 | 37 | # 清除文档页边距 38 | pdf.PageSettings.SetMargins(0.0) 39 | 40 | # 循环遍历文件夹中的图片 41 | # folder_path = "Images/" 42 | for root, directories, files in os.walk(folder_path): 43 | for file_name in files: 44 | file_path = os.path.join(root, file_name) 45 | # 载入图片 46 | image = PdfImage.FromFile(file_path) 47 | # 获取图片的宽和高 48 | image_width = image.PhysicalDimension.Width 49 | image_height = image.PhysicalDimension.Height 50 | # 在文档中创建与图片相同大小的页面 51 | page = pdf.Pages.Add(SizeF(image_width, image_height)) 52 | # 将图片绘制在页面上 53 | page.Canvas.DrawImage(image, 0.0, 0.0, image_width, image_height) 54 | 55 | # 保存PDF文档 56 | pdf.SaveToFile(output_path + f"/{gb_code}.pdf") 57 | pdf.Close() 58 | 59 | 60 | def create_dir(output_dir: string): 61 | if not os.path.exists(output_dir): 62 | try: 63 | os.makedirs(output_dir) 64 | print('输出目录' + output_dir + '已创建') 65 | except Exception as e: 66 | print('创建目录' + output_dir + '失败,请手动创建后再运行程序') 67 | 68 | 69 | def center_window(root, width, height): 70 | # 获取屏幕尺寸 71 | screen_width = root.winfo_screenwidth() 72 | screen_height = root.winfo_screenheight() 73 | 74 | # 计算窗口位置 75 | x = (screen_width // 2) - (width // 2) 76 | y = (screen_height // 2) - (height // 2) 77 | 78 | # 设置窗口位置 79 | root.geometry(f"{width}x{height}+{x}+{y}") 80 | root.attributes('-topmost', 'true') 81 | 82 | 83 | def show_image(image_path: str) -> None: 84 | """ 85 | 根据图片地址,弹出对话框显示一张图片 86 | :param image_path: 图片路径 87 | :return: None 88 | """ 89 | 90 | # 创建一个简单的Tkinter窗口 91 | # win = tk.Toplevel() 92 | # win.attributes('-topmost', 'true') 93 | 94 | root = tk.Tk() 95 | root.title("验证码") 96 | 97 | # 加载图片 98 | image = Image.open(image_path) 99 | image = ImageTk.PhotoImage(image) 100 | h = image.height() 101 | w = image.width() 102 | 103 | # 创建一个标签来显示图片 104 | label = tk.Label(root, image=image) 105 | label.place(relx=0.5, rely=0.5, anchor="center") 106 | 107 | # 设置窗口位置在屏幕中央 108 | center_window(root, w, h) 109 | label.pack() 110 | 111 | # 进入Tkinter事件循环 112 | root.mainloop() 113 | 114 | 115 | def merge_image(page_div, img_urls, tmp_img_path, output_img_path): 116 | if os.path.exists(tmp_img_path): 117 | if os.path.isdir(tmp_img_path) and len(os.listdir(tmp_img_path)) > 0: 118 | 119 | page_id = page_div.xpath('./@id')[0] 120 | # print('拼接第' + page_id + '页') 121 | for data in img_urls: 122 | if data['id'] == page_id: 123 | url = data['url'] 124 | image_name = htmlutil.get_image_name(url) 125 | # pdf页面大小 126 | pdf_style = page_div.xpath('./@style')[0] 127 | pdf_style_2 = re.findall('\d+', pdf_style) 128 | pdf_size_w = int(pdf_style_2[0]) # pdf页面宽度 129 | pdf_size_h = int(pdf_style_2[1]) # pdf页面高度 130 | img_slice_w = math.ceil(pdf_size_w/10) # 图片切片宽度 131 | img_slice_h = math.ceil(pdf_size_h/10) # 图片切片高度 132 | im = Image.open(tmp_img_path + '\\' + image_name) 133 | im_1 = Image.new(mode='RGB', size=(pdf_size_w, pdf_size_h), color='#ffffff') 134 | 135 | # 遍历pdf页面div下的所有切片span 136 | for span in page_div.xpath('./span'): 137 | # 获取图片切片在pdf页面上的坐标 138 | span_class = span.xpath('./@class')[0] # class pdfImage-1-5 139 | pdf_row = int(span_class.split('-')[1]) 140 | pdf_col = int(span_class.split('-')[2]) 141 | # 图片切片在bg图片上的位置偏移 142 | span_bg_pos = span.xpath('./@style')[0] # 143 | span_bg_pos_2 = re.findall('\d+', span_bg_pos) 144 | # print(span_bg_pos_2) 145 | span_bg_x = int(span_bg_pos_2[0]) # 偏移量x 146 | span_bg_y = int(span_bg_pos_2[1]) # 偏移量y 147 | # print(span_bg_x) 148 | 149 | # 拼接图片 150 | im_crop = im.crop((span_bg_x, span_bg_y, span_bg_x + img_slice_w, span_bg_y + img_slice_h)) 151 | im_1.paste(im_crop, (pdf_row * (img_slice_w-1), pdf_col * (img_slice_h-1))) 152 | # im_1.show() 153 | # 缩小页面尺寸 154 | # im_1 = im_1.resize((int(im_1.width * 0.8), int(im_1.height * 0.8)), 3) 155 | im_1.save(output_img_path + '\\' + page_id.rjust(4, '0') + '.jpg', optimize=True) 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /国标下载.user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name 国标下载 3 | // @namespace http://tampermonkey.net/ 4 | // @version 0.1 5 | // @description try to take over the world! 6 | // @author wcd 7 | // @match http://c.gb688.cn/* 8 | // @icon https://www.google.com/s2/favicons?sz=64&domain=gb688.cn 9 | // @grant none 10 | // @require https://code.jquery.com/jquery-3.6.0.min.js 11 | 12 | // ==/UserScript== 13 | 14 | (function() { 15 | 'use strict'; 16 | $(function(){ 17 | 18 | $("head").append(''); 19 | 20 | let my_script=``; 127 | 128 | let source_img = ` 129 |