├── python
    ├── pics
    │   ├── 1
    │   ├── QQ截图20241228145652.jpg
    │   └── QQ截图20241228145737.jpg
    ├── dist
    │   └── 1
    ├── gbutil
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-311.pyc
    │   │   ├── htmlutil.cpython-311.pyc
    │   │   └── imageutil.cpython-311.pyc
    │   ├── htmlutil.py
    │   └── imageutil.py
    └── get_gb_file.py
├── 国标下载.user.js
└── README.md


/python/pics/1:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/python/dist/1:
--------------------------------------------------------------------------------
1 | ceshi
2 | 


--------------------------------------------------------------------------------
/python/gbutil/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python/pics/QQ截图20241228145652.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/pics/QQ截图20241228145652.jpg


--------------------------------------------------------------------------------
/python/pics/QQ截图20241228145737.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/pics/QQ截图20241228145737.jpg


--------------------------------------------------------------------------------
/python/gbutil/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/gbutil/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/python/gbutil/__pycache__/htmlutil.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/gbutil/__pycache__/htmlutil.cpython-311.pyc


--------------------------------------------------------------------------------
/python/gbutil/__pycache__/imageutil.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chengdong0421/GB_tampermonkey/HEAD/python/gbutil/__pycache__/imageutil.cpython-311.pyc


--------------------------------------------------------------------------------
/python/gbutil/htmlutil.py:
--------------------------------------------------------------------------------
  1 | from time import sleep
  2 | 
  3 | import requests
  4 | from bs4 import BeautifulSoup
  5 | import string
  6 | import urllib.request
  7 | 
  8 | 
  9 | def get_bgs(page_divs):
 10 |     """
 11 |     解析class=page的div,返回div的id和bg对应关系
 12 |     :param page_divs:
 13 |     :return:
 14 |     """
 15 |     bgs = []
 16 |     for page_div in page_divs:
 17 |         page_id = page_div.xpath('./@id')[0]
 18 |         page_bg = page_div.xpath('./@bg')[0]
 19 |         bgs.append({'id': page_id, 'bg': page_bg})
 20 |     return bgs
 21 | 
 22 | 
 23 | def get_img_urls(bgs: list, jsessionid: string, common_headers):
 24 |     img_urls = []
 25 |     count = 0
 26 |     for bg in bgs:
 27 |         count += 1
 28 |         print(f'\r解析进度：%.2f' % (count/len(bgs) * 100) + '%', end="")
 29 |         bg_str = bg['bg']
 30 |         url_img = 'http://c.gb688.cn/bzgk/gb/viewGbImg?fileName=' + bg_str
 31 |         req_img = urllib.request.Request(url_img, headers=common_headers)
 32 |         req_img.add_header('Cookie', 'JSESSIONID=' + jsessionid)
 33 |         with urllib.request.urlopen(req_img) as res_img:
 34 |             if res_img.status == 200:
 35 |                 img_urls.append({'id': bg['id'], 'bg': bg['bg'], 'url': res_img.url})
 36 |     return img_urls
 37 | 
 38 | 
 39 | def download_img(img_url: string, jsessionid: string, common_headers, img_name):
 40 |     print('开始下载: ' + img_name)
 41 |     req_img_redirect = urllib.request.Request(img_url, headers=common_headers)
 42 |     req_img_redirect.add_header('Cookie', 'JSESSIONID=' + jsessionid)
 43 |     req_img_redirect.add_header('Cache-Alive', 'chunked')
 44 |     img_response = urllib.request.urlopen(req_img_redirect)
 45 | 
 46 |     page_img = img_response.read()
 47 | 
 48 |     with open(img_name, 'w+b') as pageImgFile:
 49 |         pageImgFile.write(page_img)
 50 |         pageImgFile.close()
 51 |     sleep(2)
 52 |     print('下载完成: ' + img_name)
 53 | 
 54 |     return 0
 55 | 
 56 | 
 57 | def get_title(full_html):
 58 |     """
 59 |     从html文件内容获取title
 60 |     :param full_html:
 61 |     :return:
 62 |     """
 63 |     return full_html.xpath('//title/text()')[0]
 64 | 
 65 | 
 66 | def get_hcno(preview_url):
 67 |     """
 68 |     从在线预览url中提取hcno
 69 |     :param preview_url:
 70 |     :return:
 71 |     """
 72 |     return preview_url.split('hcno=')[1]
 73 | 
 74 | 
 75 | def get_image_name(image_url):
 76 |     """
 77 |     从图片url地址中获取图片文件名
 78 |     :param image_url:
 79 |     :return:
 80 |     """
 81 |     return image_url.split('/')[-1]
 82 | 
 83 | 
 84 | def get_gb_code_from_image_url(image_url: string):
 85 |     """
 86 |     从图片url地址中获取国标编码
 87 |     :param image_url:
 88 |     :return:
 89 |     """
 90 |     return image_url.split('/')[-2].replace('/', '_')
 91 | 
 92 | 
 93 | def get_gb_code_from_title(gb_title: string):
 94 |     """
 95 |     从html页面title中获取国标编码
 96 |     :param gb_title:
 97 |     :return:
 98 |     """
 99 |     return gb_title.split('|')[1].replace(' ', '').replace('/', '_')
100 | 


--------------------------------------------------------------------------------
/python/get_gb_file.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import urllib.request
  3 | import urllib.parse
  4 | from datetime import datetime as dt
  5 | import threading
  6 | from gbutil import imageutil, htmlutil
  7 | from lxml import html
  8 | 
  9 | 
 10 | # 定义一些常量
 11 | # 在线预览链接
 12 | # url_preview = "http://c.gb688.cn/bzgk/gb/showGb?type=online&hcno=5ED2A10D48EE5AFF5D7C04F2683767CC"
 13 | url_preview = "http://c.gb688.cn/bzgk/gb/showGb?type=online&hcno=2544D73CA09ACBA031ACCF546FFF871B"
 14 | url_preview = ""
 15 | # 接收用户输入的在线预览url
 16 | if url_preview == "":
 17 |     url_preview = input('请粘贴标准在线预览界面的url:')
 18 |     print(url_preview)
 19 | 
 20 | 
 21 | # 验证码url
 22 | url_code = "http://c.gb688.cn/bzgk/gb/gc?_" + str(round(dt.timestamp(dt.now())*100))
 23 | # 验证码验证url  post
 24 | url_vc = "http://c.gb688.cn/bzgk/gb/verifyCode"
 25 | # User Agent
 26 | UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
 27 | # 共同请求头
 28 | common_headers = {
 29 |     'Connection': 'keep-alive',
 30 |     'Host': 'c.gb688.cn',
 31 |     'User-Agent': UA,
 32 |     'Referer': url_preview
 33 | }
 34 | 
 35 | # 程序当前目录
 36 | cwd = os.getcwd()
 37 | # pdf image暂存目录，和程序同级。该目录下建立以hcno为名的文件夹，存放该国标的pdfimage
 38 | tmp_dir = cwd + '\\' + 'tmp'
 39 | # 输出文件目录，和程序同级。该目录下建一个目录，以hcno命名，再建两个目录，images和pdf分别存放图片和pdf
 40 | output_dir = cwd + '\\' + 'output'
 41 | 
 42 | print('当前目录:' + cwd)
 43 | 
 44 | 
 45 | # 创建所需目录
 46 | imageutil.create_dir(tmp_dir)
 47 | imageutil.create_dir(output_dir)
 48 | 
 49 | 
 50 | def show_img_thread():
 51 |     imageutil.show_image('code.jpg')
 52 | 
 53 | 
 54 | # 请求在线预览url，获取cookie
 55 | req = urllib.request.Request(url_preview, headers=common_headers)
 56 | req.remove_header('Referer')
 57 | req.add_header('Referer', 'https://openstd.samr.gov.cn/')
 58 | with urllib.request.urlopen(req) as f:
 59 |     setcookie = f.getheader('Set-Cookie')
 60 |     jsessionid = setcookie.split(";")[0].split("=")[1]
 61 |     # print(jsessionid)
 62 | 
 63 | # 带cookie请求验证码url，获取验证码图片
 64 | req_code = urllib.request.Request(url_code, headers=common_headers)
 65 | req_code.add_header('Cookie', 'JSESSIONID=' + jsessionid)
 66 | res2 = urllib.request.urlopen(req_code)
 67 | img = res2.read()
 68 | 
 69 | # 存储验证码
 70 | with open('code.jpg', 'w+b') as f2:
 71 |     f2.write(img)
 72 |     f2.close()
 73 | 
 74 | # 开启显示验证码线程
 75 | t_show_img = threading.Thread(target=show_img_thread)
 76 | # 将子线程的daemon属性设置为True，这样，当主线程结束时，子线程也会随之结束
 77 | t_show_img.daemon = True
 78 | t_show_img.start()
 79 | 
 80 | 
 81 | # 读取用户输入
 82 | print('验证码已存储在' + cwd + '\\code.jpg')
 83 | print('弹出的验证码窗口被关闭后，忘记验证码可打开该code.jpg文件查看')
 84 | vcode = input("请输入验证码：")
 85 | 
 86 | # 输入验证码后，请求验证码验证url,获取在线预览页面html内容
 87 | data = urllib.parse.urlencode({'verifyCode': vcode})  # post请求需要传的数据
 88 | data = data.encode('ascii')
 89 | # 请求验证
 90 | req_verify = urllib.request.Request(url_vc, headers=common_headers)
 91 | req_verify.add_header('Cookie', 'JSESSIONID=' + jsessionid)
 92 | req_verify.add_header('Origin', 'http://c.gb688.cn')
 93 | 
 94 | with urllib.request.urlopen(req_verify, data) as f3:
 95 |     verify_result = f3.read().decode('utf-8')
 96 |     if verify_result == 'success':
 97 |         print('验证码正确')
 98 |     else:
 99 |         print('验证码错误, 将退出程序')
100 |         exit(1)
101 | 
102 |     if f3.status == 200:
103 |         print('请求页面...')
104 |         req4 = urllib.request.Request(url_preview, headers=common_headers)
105 |         req4.add_header('Cookie', 'JSESSIONID='+jsessionid)
106 |         r4 = urllib.request.urlopen(req4)
107 |         page_byte_content = r4.read()
108 |         # 在线预览页面全部html内容
109 |         page_str_content = page_byte_content.decode('utf-8')
110 |         # print(page_str_content)
111 | 
112 | 
113 | # 解析html内容，获取页面图片url
114 | print('请求页面完成，开始解析页面...')
115 | parsed_html = html.fromstring(page_str_content)
116 | 
117 | # 所有pdf页面div, 即class为page的div,是一个列表
118 | page_divs = parsed_html.xpath('//div[@class="page"]')
119 | # 获取所有页面所需的bg
120 | bgs = htmlutil.get_bgs(page_divs)
121 | 
122 | # 请求页面图片url，下载图片（先下载图片，后面再统一拼接）
123 | # 获取所有pdf页面图片url
124 | img_urls = htmlutil.get_img_urls(bgs, jsessionid, common_headers)
125 | print("\n解析完成，开始下载页面图片")
126 | 
127 | # 创建文件夹
128 | title = htmlutil.get_title(parsed_html)
129 | gb_code = htmlutil.get_gb_code_from_title(title)
130 | tmp_img_path = tmp_dir + '\\' + gb_code
131 | output_pdf_path = output_dir + '\\' + gb_code + '\\pdf'
132 | output_img_path = output_dir + '\\' + gb_code + '\\images'
133 | 
134 | imageutil.create_dir(tmp_img_path)
135 | imageutil.create_dir(output_pdf_path)
136 | imageutil.create_dir(output_img_path)
137 | 
138 | with open(output_dir + '\\' + gb_code + '\\info.txt', 't+w') as gbinfo:
139 |     gbinfo.write(f'国标编码：{gb_code}\n')
140 |     gbinfo.write(f'国标在线预览地址：{url_preview}\n')
141 |     gbinfo.close()
142 | 
143 | # 下载图片
144 | for url_map in img_urls:
145 |     url = url_map['url']
146 |     image_name = htmlutil.get_image_name(url)
147 |     if not os.path.exists(tmp_img_path + '\\' + image_name):
148 |         htmlutil.download_img(url, jsessionid, common_headers, tmp_img_path + '\\' + image_name)
149 |     else:
150 |         print('图片已存在：' + tmp_img_path + '\\' + image_name)
151 | 
152 | 
153 | # 拼接图片生成各页面image
154 | page_no = 1
155 | for page_div in page_divs:
156 |     print(f'\r拼接第 ' + str(page_no) + ' 页, 共' + str(len(img_urls)) + '页', end="")
157 |     page_no += 1
158 |     imageutil.merge_image(page_div, img_urls, tmp_img_path, output_img_path)
159 | 
160 | print("\n")
161 | print('拼接图片完成，存储于：' + output_img_path)
162 | print('tmp目录内是下载的临时文件，可删除')
163 | 
164 | # pause = input("\n\n按回车退出程序")
165 | # 各页面生成pdf文件
166 | imageutil.images2pdf(output_img_path, output_pdf_path, gb_code)
167 | 
168 | pause = input("\n\n按回车退出程序")
169 | 


--------------------------------------------------------------------------------
/python/gbutil/imageutil.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os.path
  3 | import re
  4 | import string
  5 | import tkinter as tk
  6 | from PIL import Image, ImageTk
  7 | from spire.pdf.common import *
  8 | from spire.pdf import *
  9 | from gbutil import htmlutil
 10 | import os
 11 | 
 12 | 
 13 | # 简易版——图片转换为pdf，pdf页面随图片大小浮动
 14 | def images2pdf(image_file_path, output_path, gb_code):
 15 |     os.chdir(image_file_path)
 16 |     images = []
 17 |     file_lis = os.listdir(image_file_path)
 18 |     output_path_pdf = f"{output_path}/{gb_code}.pdf"
 19 |     con = 0
 20 |     for image_path in file_lis:
 21 |         if image_path.endswith(('.jpg', '.png')):
 22 |             image = Image.open(image_path)
 23 |             # 缩小图片尺寸以减小导出文件大小
 24 |             images.append(image.convert("RGB").resize((int(image.width * 0.6), int(image.height * 0.6))))
 25 |             con += 1
 26 |             print(f'\r转换为pdf,进度：%.2f' % (con/len(file_lis) * 100) + '%', end='')
 27 |             # print(image_path + '：第%d张' % con)
 28 |     images[0].save(output_path_pdf, save_all=True, append_images=images[1:], resolution=168)
 29 |     print(f'\n转换pdf完成，存放于：{output_path}')
 30 | 
 31 | 
 32 | def images2pdf2(folder_path, output_path, gb_code):
 33 |     # spire.pdf试用版有水印，有页数限制，只能添加10页
 34 |     # 创建一个PdfDocument类的对象
 35 |     pdf = PdfDocument()
 36 | 
 37 |     # 清除文档页边距
 38 |     pdf.PageSettings.SetMargins(0.0)
 39 | 
 40 |     # 循环遍历文件夹中的图片
 41 |     # folder_path = "Images/"
 42 |     for root, directories, files in os.walk(folder_path):
 43 |         for file_name in files:
 44 |             file_path = os.path.join(root, file_name)
 45 |             # 载入图片
 46 |             image = PdfImage.FromFile(file_path)
 47 |             # 获取图片的宽和高
 48 |             image_width = image.PhysicalDimension.Width
 49 |             image_height = image.PhysicalDimension.Height
 50 |             # 在文档中创建与图片相同大小的页面
 51 |             page = pdf.Pages.Add(SizeF(image_width, image_height))
 52 |             # 将图片绘制在页面上
 53 |             page.Canvas.DrawImage(image, 0.0, 0.0, image_width, image_height)
 54 | 
 55 |     # 保存PDF文档
 56 |     pdf.SaveToFile(output_path + f"/{gb_code}.pdf")
 57 |     pdf.Close()
 58 | 
 59 | 
 60 | def create_dir(output_dir: string):
 61 |     if not os.path.exists(output_dir):
 62 |         try:
 63 |             os.makedirs(output_dir)
 64 |             print('输出目录' + output_dir + '已创建')
 65 |         except Exception as e:
 66 |             print('创建目录' + output_dir + '失败，请手动创建后再运行程序')
 67 | 
 68 | 
 69 | def center_window(root, width, height):
 70 |     # 获取屏幕尺寸
 71 |     screen_width = root.winfo_screenwidth()
 72 |     screen_height = root.winfo_screenheight()
 73 | 
 74 |     # 计算窗口位置
 75 |     x = (screen_width // 2) - (width // 2)
 76 |     y = (screen_height // 2) - (height // 2)
 77 | 
 78 |     # 设置窗口位置
 79 |     root.geometry(f"{width}x{height}+{x}+{y}")
 80 |     root.attributes('-topmost', 'true')
 81 | 
 82 | 
 83 | def show_image(image_path: str) -> None:
 84 |     """
 85 |     根据图片地址，弹出对话框显示一张图片
 86 |     :param image_path: 图片路径
 87 |     :return: None
 88 |     """
 89 | 
 90 |     # 创建一个简单的Tkinter窗口
 91 |     # win = tk.Toplevel()
 92 |     # win.attributes('-topmost', 'true')
 93 | 
 94 |     root = tk.Tk()
 95 |     root.title("验证码")
 96 | 
 97 |     # 加载图片
 98 |     image = Image.open(image_path)
 99 |     image = ImageTk.PhotoImage(image)
100 |     h = image.height()
101 |     w = image.width()
102 | 
103 |     # 创建一个标签来显示图片
104 |     label = tk.Label(root, image=image)
105 |     label.place(relx=0.5, rely=0.5, anchor="center")
106 | 
107 |     # 设置窗口位置在屏幕中央
108 |     center_window(root, w, h)
109 |     label.pack()
110 | 
111 |     # 进入Tkinter事件循环
112 |     root.mainloop()
113 | 
114 | 
115 | def merge_image(page_div, img_urls, tmp_img_path, output_img_path):
116 |     if os.path.exists(tmp_img_path):
117 |         if os.path.isdir(tmp_img_path) and len(os.listdir(tmp_img_path)) > 0:
118 | 
119 |             page_id = page_div.xpath('./@id')[0]
120 |             # print('拼接第' + page_id + '页')
121 |             for data in img_urls:
122 |                 if data['id'] == page_id:
123 |                     url = data['url']
124 |                     image_name = htmlutil.get_image_name(url)
125 |                     # pdf页面大小
126 |                     pdf_style = page_div.xpath('./@style')[0]
127 |                     pdf_style_2 = re.findall('\d+', pdf_style)
128 |                     pdf_size_w = int(pdf_style_2[0])  # pdf页面宽度
129 |                     pdf_size_h = int(pdf_style_2[1])  # pdf页面高度
130 |                     img_slice_w = math.ceil(pdf_size_w/10)  # 图片切片宽度
131 |                     img_slice_h = math.ceil(pdf_size_h/10)  # 图片切片高度
132 |                     im = Image.open(tmp_img_path + '\\' + image_name)
133 |                     im_1 = Image.new(mode='RGB', size=(pdf_size_w, pdf_size_h), color='#ffffff')
134 | 
135 |                     # 遍历pdf页面div下的所有切片span
136 |                     for span in page_div.xpath('./span'):
137 |                         # 获取图片切片在pdf页面上的坐标
138 |                         span_class = span.xpath('./@class')[0]  # class pdfImage-1-5
139 |                         pdf_row = int(span_class.split('-')[1])
140 |                         pdf_col = int(span_class.split('-')[2])
141 |                         # 图片切片在bg图片上的位置偏移
142 |                         span_bg_pos = span.xpath('./@style')[0]  #
143 |                         span_bg_pos_2 = re.findall('\d+', span_bg_pos)
144 |                         # print(span_bg_pos_2)
145 |                         span_bg_x = int(span_bg_pos_2[0])  # 偏移量x
146 |                         span_bg_y = int(span_bg_pos_2[1])  # 偏移量y
147 |                         # print(span_bg_x)
148 | 
149 |                         # 拼接图片
150 |                         im_crop = im.crop((span_bg_x, span_bg_y, span_bg_x + img_slice_w, span_bg_y + img_slice_h))
151 |                         im_1.paste(im_crop, (pdf_row * (img_slice_w-1), pdf_col * (img_slice_h-1)))
152 |                     # im_1.show()
153 |                     # 缩小页面尺寸
154 |                     # im_1 = im_1.resize((int(im_1.width * 0.8), int(im_1.height * 0.8)), 3)
155 |                     im_1.save(output_img_path + '\\' + page_id.rjust(4, '0') + '.jpg', optimize=True)
156 | 
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/国标下载.user.js:
--------------------------------------------------------------------------------
  1 | // ==UserScript==
  2 | // @name         国标下载
  3 | // @namespace    http://tampermonkey.net/
  4 | // @version      0.1
  5 | // @description  try to take over the world!
  6 | // @author       wcd
  7 | // @match        http://c.gb688.cn/*
  8 | // @icon         https://www.google.com/s2/favicons?sz=64&domain=gb688.cn
  9 | // @grant        none
 10 | // @require      https://code.jquery.com/jquery-3.6.0.min.js
 11 | 
 12 | // ==/UserScript==
 13 | 
 14 | (function() {
 15 |     'use strict';
 16 | $(function(){
 17 | 
 18 |     $("head").append('<script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>');
 19 | 
 20 |     let my_script=`<script>
 21 | 
 22 |             function px2Num(px) {
 23 |                 return Number(px.split("px")[0].toString());
 24 |             }
 25 | 
 26 | 
 27 |             function getPages(){
 28 |                 if( $("canvas[id^=canvas_]").length > 0 ) {
 29 |                     $("canvas[id^=canvas_]").delete();
 30 | 
 31 |                 }
 32 |                 var baseurl = "http://c.gb688.cn/bzgk/gb/";
 33 |                 var pagecount = $("div.page").length;
 34 |                 var pages = new Array(pagecount);
 35 |                 var pagebg = new Array(pagecount);
 36 |                 var title = $("title").text().split("|")[1].toString().trim();
 37 |                 var pheight = $("#0").css("height");
 38 |                 var pwidth = $("#0").css("width");
 39 | 
 40 |                 $(".page").each(function(i, elem) {
 41 |                     if (elem.hasAttribute("bg")) {
 42 |                         pagebg[i] = elem.getAttribute("bg");
 43 |                     } else {
 44 |                         pagebg[i] = $(elem).children("span").first().css("background-image").split('"')[
 45 |                             1].split(/\\//).slice(-1)[0];
 46 |                     }
 47 |                 });
 48 | 
 49 | 
 50 |                 //拼合图片
 51 |                 $(".page").each(function(i, elem) {
 52 |                     var canvasclone = $("canvas#canvas").clone();
 53 |                     canvasclone.attr("id","canvas_"+i).css("background-color","#FFFFFFFF");
 54 |                     $("#newimg").append(canvasclone);
 55 |                     var canvas = document.getElementById('canvas_'+i);
 56 |                     var ctx = canvas.getContext('2d');
 57 |                     ctx.fillStyle="white";
 58 |                     ctx.fillRect(0,0,px2Num(pwidth), px2Num(pheight));
 59 | 
 60 |                     $("#imgContainer").append("<img id=img_" + i + " src='" + baseurl+pagebg[i] + "' />")
 61 |                     var image = document.getElementById('img_'+i);
 62 | 
 63 |                     image.addEventListener('load', e => {
 64 |                         $(elem).children("span").each(function(j,s){
 65 |                              ctx.drawImage(image, -px2Num($(s).css("background-position-x")), -px2Num($(s).css("background-position-y")),  119, 168,
 66 |                              $(s).attr("class").split('-')[1]*119, $(s).attr("class").split('-')[2]*168, 119, 168);
 67 |                         });
 68 |                     });
 69 | 
 70 |                 });
 71 |             }
 72 | 
 73 |             function isimgComplete(imgs){
 74 |                 //$("img[id^=img_]")
 75 |                 flag = true;
 76 |                 for(i=0;i<imgs.length;i++){
 77 |                     flag=flag && imgs[i].complete;
 78 |                 }
 79 |                 return flag;
 80 |             }
 81 | 
 82 |             function downloadPDF(){
 83 |                 if( $("canvas[id^=canvas_]").length == 0 ) {
 84 |                     alert("请先点击获取页面！");
 85 |                     return;
 86 |                 }
 87 | 
 88 |                 var images = $("img[id^=img_]");
 89 |                 //alert(isimgComplete(images));
 90 |                 if(!isimgComplete(images)){
 91 |                     alert("页面尚未提取完，稍后再试");
 92 |                     return;
 93 |                 }
 94 | 
 95 |                 var pheight = $("#0").css("height");
 96 |                 var pwidth = $("#0").css("width");
 97 |                 const { jsPDF } = window.jspdf;
 98 |                 const pdf = new jsPDF('p','px',[px2Num(pwidth), px2Num(pheight)]);
 99 | 
100 |                 var title = $("title").text().split("|")[1].toString().trim();
101 | 
102 |                 let [imgX, imgY] = [595.28, 841.89];
103 |                 let imgHeight = imgX / (px2Num(pwidth) / px2Num(pheight));
104 | 
105 |                 $("canvas[id^=canvas_]").each(function(i,e){
106 |                     pdf.addImage(document.getElementById('canvas_'+i).toDataURL('image/jpeg'), 'jpeg', 0, 0, px2Num(pwidth), px2Num(pheight), '', 'MEDDIUM');
107 |                     //pdf.addImage(document.getElementById('canvas_'+i).toDataURL('image/png'), 'jpeg', 0, 0, imgX, imgHeight, '', 'SLOW');
108 |                     pdf.addPage();
109 |                 });
110 | 
111 |                 let targetPage = pdf.internal.getNumberOfPages();
112 |                 pdf.deletePage(targetPage); // 删除最后一页
113 | 
114 |                 pdf.save(title + ".pdf");
115 |             }
116 | 
117 |              function downloadPDF0(){
118 |                 while($("canvas[id^=canvas_]").length < $(".page").length){
119 |                     setTimeout(function(){
120 | 
121 |                     },1000);
122 |                 }
123 |              }
124 | 
125 | 
126 |         </script>`;
127 | 
128 |     let source_img = `
129 |         <div id="canvas_container">
130 |             <input type="button" value="获取页面" onclick="getPages()"/>
131 |             <input type="button" value="下载pdf" onclick="downloadPDF()"/>
132 |         </div>
133 |         <div id="imgContainer" style="display:none;"><img id="source" src=""></div>
134 |         <div id="newimg" width="1190px"></div>
135 |         <canvas id="canvas" width="1190px" height="1680px" style="display:none;"></canvas>`;
136 | 
137 |     //let btn = `<input type="button" value="获取页面" onclick="getPages()"/>
138 |                //<input type="button" value="下载pdf" onclick="downloadPDF()"/>`;
139 | 
140 |     let style = `
141 |         <style>
142 |             #canvas_container {
143 |                 position: fixed;
144 |                 height: 30px;
145 |                 width: 150px;
146 |                 top: 50px;
147 |                 left: 10px;
148 |                 border: 1px;
149 |                 /*background-color: #00ff0099;*/
150 |                 border-radius: 3px;
151 |             }
152 |         </style>
153 |     `;
154 | 
155 |     $("head").append(style);
156 |     $("body").append(source_img);
157 |     $("body").append(my_script);
158 |     //$("body").append(btn);
159 | 
160 |     //alert($("title").text());
161 | });
162 | 
163 | })();
164 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## **彻底算白忙活了，官网竟然又开放下载权限了！！！pdf文字版！！！** 
  2 | 下载后的pdf不需要以前官网提供的查看工具，复制的文字也不是乱码！不得不说这次官网格局打开了，真是很大的进步啊！  
  3 | 
  4 | ![WJIX%4 {K MK3GADQ1QUU_1](https://github.com/user-attachments/assets/288c22ea-d318-4518-a8f9-fe823634edb8)  
  5 | 
  6 | 
  7 | 
  8 | 
  9 | ## 国家标准全文公开系统标准文件下载工具
 10 | 
 11 | - 官方网站改造后，之前的油猴脚本已不可用，现在用python重写了一个下载工具
 12 | - release中下载后直接双击使用，启动有点慢，等几秒钟，粘贴**国标文件在线预览界面的url**回车后，会弹出验证码，输入验证码后再回车即可下载解析页面，提取国标文件页面
 13 | - <s>目前只做了拼接每页图片的，没有将图片合成pdf，查看内容没问题，打印会稍微麻烦些</s>
 14 | - 支持导出pdf文件。（会生成每页图片，并将图片转成pdf文件）
 15 | - 软件运行后会在程序同级目录创建两个文件夹。tmp目录存储中间输出的临时文件，下载完可删除，output用来存放输出文件
 16 | - **只能下载可预览的国标**
 17 | - 源码还没完善好，暂时就不上传了
 18 | 
 19 | ---
 20 | 
 21 | ## GB_tampermonkey
 22 | ## tampermonkey脚本
 23 | 
 24 | ### 作用：拼接在线预览时响应回来的图片并生成pdf文件
 25 | ### 用法：启用脚本后，会在预览界面左上角生成两个按钮，先点击左边“获取页面”按钮，稍等，再点击“下载pdf”按钮
 26 | ### 缺点：生成的pdf文件稍大，生成过程比较耗内存，页数过多的标准可能没法使用
 27 | 
 28 | ## 代码写的比较粗糙，见谅。
 29 | 
 30 | 
 31 | 2022.11.05
 32 | 
 33 | 已找到原因，脚本里缺少引入jQuery的语句（ // @require https://code.jquery.com/jquery-3.6.0.min.js ）。现在已经添上，应该可以正常显示按钮了。有不能显示按钮的，请大家重新安装一下脚本试试（当然，也可以用下面手动执行代码的方法）。
 34 | 
 35 | 
 36 | 2022.11.04
 37 | 
 38 | 有人反馈预览页面不显示按钮了，经测试，确实如此。
 39 | 不过目前脚本功能是好的，但不知道是不是网站屏蔽了油猴还是其他什么原因，脚本不能自动运行。但是可以通过手动执行的方法来运行这个脚本，方法如下：
 40 | 
 41 | 1. 复制以下代码；
 42 | 2. 在预览界面，按Ctrl+Shift+I 调出调试工具（这是谷歌Chrome浏览器的快捷方式，其他浏览器不知道是不是这个）；
 43 | 3. 选择第二个页签（Console）(如下图)，鼠标在下面 > 处点一下，把代码粘贴在光标处，按回车运行代码。这样按钮就出来了。
 44 | ![E_20)K}VQIBX0`KGCU X G9](https://user-images.githubusercontent.com/12667799/199972675-038243b5-5677-40d7-a39c-df312eb68cba.png)
 45 | 
 46 |             $("head").append('<script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>');
 47 |             let my_script=`<script>
 48 |             function px2Num(px) {
 49 |                 return Number(px.split("px")[0].toString());
 50 |             }
 51 | 
 52 |             function getPages(){
 53 |                 if( $("canvas[id^=canvas_]").length > 0 ) {
 54 |                     $("canvas[id^=canvas_]").delete();
 55 |                 }
 56 | 
 57 |                 var baseurl = "http://c.gb688.cn/bzgk/gb/";
 58 |                 var pagecount = $("div.page").length;
 59 |                 var pages = new Array(pagecount);
 60 |                 var pagebg = new Array(pagecount);
 61 |                 var title = $("title").text().split("|")[1].toString().trim();
 62 |                 var pheight = $("#0").css("height");
 63 |                 var pwidth = $("#0").css("width");
 64 | 
 65 |                 $(".page").each(function(i, elem) {
 66 |                     if (elem.hasAttribute("bg")) {
 67 |                         pagebg[i] = elem.getAttribute("bg");
 68 |                     } else {
 69 |                         pagebg[i] = $(elem).children("span").first().css("background-image").split('"')[
 70 |                             1].split(/\\//).slice(-1)[0];
 71 |                     }
 72 |                 });
 73 | 
 74 |                 //拼合图片
 75 |                 $(".page").each(function(i, elem) {
 76 |                     var canvasclone = $("canvas#canvas").clone();
 77 |                     canvasclone.attr("id","canvas_"+i).css("background-color","#FFFFFFFF");
 78 |                     $("#newimg").append(canvasclone);
 79 |                     var canvas = document.getElementById('canvas_'+i);
 80 |                     var ctx = canvas.getContext('2d');
 81 |                     ctx.fillStyle="white";
 82 |                     ctx.fillRect(0,0,px2Num(pwidth), px2Num(pheight));
 83 | 
 84 |                     $("#imgContainer").append("<img id=img_" + i + " src='" + baseurl+pagebg[i] + "' />")
 85 |                     var image = document.getElementById('img_'+i);
 86 | 
 87 |                     image.addEventListener('load', e => {
 88 |                         $(elem).children("span").each(function(j,s){
 89 |                              ctx.drawImage(image, -px2Num($(s).css("background-position-x")), -px2Num($(s).css("background-position-y")),  119, 168,
 90 |                              $(s).attr("class").split('-')[1]*119, $(s).attr("class").split('-')[2]*168, 119, 168);
 91 |                         });
 92 |                     });
 93 | 
 94 |                 });
 95 |             }
 96 | 
 97 |             function isimgComplete(imgs){
 98 |                 //$("img[id^=img_]")
 99 |                 flag = true;
100 |                 for(i=0;i<imgs.length;i++){
101 |                     flag=flag && imgs[i].complete;
102 |                 }
103 |                 return flag;
104 |             }
105 | 
106 |             function downloadPDF(){
107 |                 if( $("canvas[id^=canvas_]").length == 0 ) {
108 |                     alert("请先点击获取页面！");
109 |                     return;
110 |                 }
111 | 
112 |                 var images = $("img[id^=img_]");
113 |                 //alert(isimgComplete(images));
114 |                 if(!isimgComplete(images)){
115 |                     alert("页面尚未提取完，稍后再试");
116 |                     return;
117 |                 }
118 | 
119 |                 var pheight = $("#0").css("height");
120 |                 var pwidth = $("#0").css("width");
121 |                 const { jsPDF } = window.jspdf;
122 |                 const pdf = new jsPDF('p','px',[px2Num(pwidth), px2Num(pheight)]);
123 | 
124 |                 var title = $("title").text().split("|")[1].toString().trim();
125 | 
126 |                 let [imgX, imgY] = [595.28, 841.89];
127 |                 let imgHeight = imgX / (px2Num(pwidth) / px2Num(pheight));
128 | 
129 |                 $("canvas[id^=canvas_]").each(function(i,e){
130 |                     pdf.addImage(document.getElementById('canvas_'+i).toDataURL('image/jpeg'), 'jpeg', 0, 0, px2Num(pwidth), px2Num(pheight), '', 'MEDDIUM');
131 |                     //pdf.addImage(document.getElementById('canvas_'+i).toDataURL('image/png'), 'jpeg', 0, 0, imgX, imgHeight, '', 'SLOW');
132 |                     pdf.addPage();
133 |                 });
134 | 
135 |                 let targetPage = pdf.internal.getNumberOfPages();
136 |                 pdf.deletePage(targetPage); // 删除最后一页
137 | 
138 |                 pdf.save(title + ".pdf");
139 |             }
140 | 
141 |              function downloadPDF0(){
142 |                 while($("canvas[id^=canvas_]").length < $(".page").length){
143 |                     setTimeout(function(){
144 | 
145 |                     },1000);
146 |                 }
147 |              }
148 | 
149 |         </script>`;
150 | 
151 |         let source_img = `
152 |             <div id="canvas_container">
153 |             <input type="button" value="获取页面" onclick="getPages()"/>
154 |             <input type="button" value="下载pdf" onclick="downloadPDF()"/>
155 |             </div>
156 |             <div id="imgContainer" style="display:none;"><img id="source" src=""></div>
157 |             <div id="newimg" width="1190px"></div>
158 |             <canvas id="canvas" width="1190px" height="1680px" style="display:none;"></canvas>`;
159 | 
160 |         let style = `
161 |             <style>
162 |                 #canvas_container {
163 |                 position: fixed;
164 |                 height: 30px;
165 |                 width: 150px;
166 |                 top: 50px;
167 |                 left: 10px;
168 |                 border: 1px;
169 |                 /*background-color: #00ff0099;*/
170 |                 border-radius: 3px;
171 |                 }
172 |             </style>
173 |         `;
174 | 
175 |         $("head").append(style);
176 |         $("body").append(source_img);
177 |         $("body").append(my_script);
178 | 


--------------------------------------------------------------------------------