├── README.md ├── config.py ├── download_paper_by_URLfile.py ├── download_paper_by_pageURL.py ├── img ├── exit.ico ├── ieee.png └── root.ico ├── main.py ├── main_ui.py ├── url.txt └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # 写综述必备!自动批量下载IEEE的论文 2 | 3 | 4 | 5 | version 6 | 7 | 8 | version 9 | 10 | 11 | version 12 | 13 | 14 | version 15 | 16 | 17 | 18 | 19 | 20 | > 如果领域内的大量论文需要下载,基于本工具实现半自动化,无需到IEEE网站手动一篇一篇下载论文啦,科研效率翻倍! 21 | > 工具使用前提: 一定要在能够有权限下载IEEE论文的网络下才能正常使用。 22 | 23 | 如下,介绍两种方法实现批量下载,代码流程和详细介绍请参考博客。 24 | 25 | ## 软件界面 26 | 27 |
28 | 29 |
30 | 31 | ## 方法1 32 | 将IEEE网站上的paper list导出到txt文档中,再基于该txt文件批量下载。 33 | 优点:可筛选不需要的论文 34 | 缺点:一次只能导出一页(≤25篇论文) 35 | 36 | * **step 1** 37 | * 进入IEEE官网[https://ieeexplore.ieee.org](https://ieeexplore.ieee.org/),按照需要搜索论文,按照如下方式导出论文列表 38 | 39 |
40 | 41 |
42 | 43 | * **step 2** 44 | * 导出到txt文件,如下图所示。运行main_ui.py, 进入UI界面,配置论文保存文件夹和URL文件路径两个参数,点击“开始下载”,搞定! 45 | 46 |
47 | 48 |
49 | 50 | ## 方法2 51 | 优点:可大批量下载论文 52 | 缺点:不可在下载前筛选不需要的论文 53 | * **step 1** 54 | * 进入IEEE官网[https://ieeexplore.ieee.org](https://ieeexplore.ieee.org/),按照关键词搜索论文 55 | 56 | * **step 2** 57 | * 按照同样的关键词在软件界面中配置,以及需要下载的页数,可以用英文逗号、破折号隔开,也可以用一个数字。eg. 2,3 或者 2-5 或者 2 58 | 59 | ## 配置举例 60 | 61 |
62 | 63 |
64 | 65 | > 点击下载按钮 66 | 67 |
68 | 69 |
70 | 71 | > 下载成功啦! 72 | 73 |
74 | 75 |
76 | 77 | ## 搭配EndNote使用 78 | 79 |
80 | 81 |
82 | 83 | 使用教程请参考博客: 84 | [科研效率直线提升!如何一键下载会议论文?ACL 2020 论文代码批量下载 打包分享](https://blog.csdn.net/weixin_43955436/article/details/116696395?spm=1001.2014.3001.5501) 85 | 86 | ### Reference 87 | 本工具主要参考以下资料,感谢作者分享。 88 | [1] https://blog.csdn.net/ubooksapp/article/details/49518009 89 | [2] https://github.com/EdwinZhang1970/Python -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2021/10/14 15:56 3 | # @Author : Yong Cao 4 | # @Email : yongcao_epic@hust.edu.cn 5 | class pb_value: 6 | progress_bar_value = 0 7 | 8 | # 对于每个全局变量,都需要定义get_value和set_value接口 9 | def set_value(value): 10 | pb_value.progress_bar_value = value 11 | 12 | def get_value(): 13 | return pb_value.value 14 | -------------------------------------------------------------------------------- /download_paper_by_URLfile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2021/10/12 22:49 3 | # @Author : Yong Cao 4 | # @Email : yongcao_epic@hust.edu.cn 5 | import os 6 | import re 7 | from tqdm import tqdm 8 | from utils import downLoad_paper 9 | 10 | 11 | def organize_info_by_txt(dst_dir, url_file, paper_name_with_year=None): 12 | if not os.path.exists(url_file): 13 | return False, None 14 | with open(url_file, "r") as f: 15 | lines = f.read().split("\n\n") 16 | rule = r'"(.*?)"' 17 | rstr = r"[\=\(\)\,\/\\\:\*\?\?\"\<\>\|\'']" 18 | paper_info = {} 19 | for i, line in enumerate(lines): 20 | content = line.split("\n") 21 | # paper name 22 | slotList = re.findall(rule, content[0]) 23 | papername = re.sub(rstr, '', slotList[0]) 24 | if paper_name_with_year: 25 | papername = content[1].split(".")[2] + ' ' + papername 26 | papername = os.path.join(dst_dir, papername + '.pdf') 27 | # paper url 28 | if "URL" in content[3]: 29 | arnumber = \ 30 | content[3].replace("URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=", "").split("&")[0] 31 | url = "https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber=" + arnumber + "&ref=" 32 | paper_info[i] = {} 33 | paper_info[i]['name'] = papername 34 | paper_info[i]['url'] = url 35 | return True, paper_info 36 | 37 | 38 | if __name__ == '__main__': 39 | # 配置存储文件夹 40 | dst_dir = "./save" 41 | if not os.path.exists(dst_dir): 42 | os.mkdir(dst_dir) 43 | # 封装下载url和论文名称 44 | url_txt = "url.txt" 45 | paper_info = organize_info_by_txt(dst_dir, url_txt, True) 46 | # 下载论文 47 | downLoad_paper(paper_info) 48 | -------------------------------------------------------------------------------- /download_paper_by_pageURL.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2021/10/13 10:37 3 | # @Author : Yong Cao 4 | # @Email : yongcao_epic@hust.edu.cn 5 | import json 6 | import requests 7 | import os 8 | from utils import downLoad_paper 9 | import re 10 | import urllib.request 11 | 12 | 13 | def organize_info_by_query(queryText, pageNumber, save_dir, paper_name_with_year=None): 14 | cookie = [] 15 | file = urllib.request.urlopen("https://ieeexplore.ieee.org", timeout=1).info() 16 | for key, value in file.items(): 17 | if key == "Set-Cookie": 18 | cookie.append(value) 19 | cookie_valid = "; ".join(cookie) 20 | paper_info = {} 21 | count = 0 22 | for page in pageNumber: 23 | headers = { 24 | 'Host': 'ieeexplore.ieee.org', 25 | 'Content-Type': "application/json", 26 | 'User-Agent': 'PostmanRuntime/7.28.1', 27 | 'Cookie': cookie_valid, 28 | 'Accept': '*/*'} 29 | payload = {"queryText": queryText, "pageNumber": str(page), "returnFacets": ["ALL"], 30 | "returnType": "SEARCH"} 31 | toc_res = requests.post("https://ieeexplore.ieee.org/rest/search", headers=headers, data=json.dumps(payload)) 32 | response = json.loads(toc_res.text) 33 | if 'records' in response: 34 | for item in response['records']: 35 | paper_info[count] = {} 36 | paper_info[count]['url'] = "https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber=" + item['articleNumber'] + "&ref=" 37 | paper_info[count]['name'] = item['articleTitle'] 38 | rstr = r"[\=\(\)\,\/\\\:\*\?\?\"\<\>\|\'']" 39 | if paper_name_with_year: 40 | paper_info[count]['name'] = os.path.join(save_dir, item['publicationYear'] + ' ' + re.sub(rstr, '', paper_info[count]['name']) + '.pdf') 41 | else: 42 | paper_info[count]['name'] = os.path.join(save_dir, re.sub(rstr, '', paper_info[count]['name']) + '.pdf') 43 | count += 1 44 | if len(paper_info) > 0: 45 | return True, paper_info 46 | else: 47 | return False, paper_info 48 | 49 | 50 | if __name__ == '__main__': 51 | import utils 52 | utils._init() 53 | queryText = "dialog system" 54 | pageNumber = [3] 55 | save_dir = "save" 56 | _, paper_info = organize_info_by_query(queryText, pageNumber, save_dir, True) 57 | downLoad_paper(paper_info) 58 | -------------------------------------------------------------------------------- /img/exit.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yongcaoplus/IEEE_downloader/99a41fe9593a82c129086eec5caa9730e8efbd41/img/exit.ico -------------------------------------------------------------------------------- /img/ieee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yongcaoplus/IEEE_downloader/99a41fe9593a82c129086eec5caa9730e8efbd41/img/ieee.png -------------------------------------------------------------------------------- /img/root.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yongcaoplus/IEEE_downloader/99a41fe9593a82c129086eec5caa9730e8efbd41/img/root.ico -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2021/10/13 12:34 3 | # @Author : Yong Cao 4 | # @Email : yongcao_epic@hust.edu.cn 5 | import os 6 | from download_paper_by_URLfile import organize_info_by_txt 7 | from download_paper_by_pageURL import organize_info_by_query 8 | from utils import downLoad_paper 9 | 10 | 11 | if __name__ == '__main__': 12 | ############### 配置1 ################## 13 | mode = "search" # "txt" or "search" 14 | dst_dir = "./save" 15 | ############### END ################## 16 | if mode == "txt": 17 | ############### 配置2 ################## 18 | url_txt = "url.txt" # txt mode is needed. 19 | ############### END ################## 20 | # 配置存储文件夹 21 | if not os.path.exists(dst_dir): 22 | os.mkdir(dst_dir) 23 | # 封装下载url和论文名称 24 | _, paper_info = organize_info_by_txt(url_txt) 25 | # 下载论文 26 | downLoad_paper(paper_info) 27 | else: 28 | ############### 配置3 ################## 29 | queryText = "dialog system" 30 | pageNumber = [3] 31 | save_papername_with_year = True 32 | ############### END ################## 33 | _, paper_info = organize_info_by_query(queryText, pageNumber, dst_dir, save_papername_with_year) 34 | downLoad_paper(paper_info, show_bar=True) 35 | -------------------------------------------------------------------------------- /main_ui.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import _thread 3 | import os 4 | import tkinter as tk 5 | import tkinter.font as tkFont 6 | from tkinter import messagebox, ttk 7 | 8 | from PIL import Image, ImageTk 9 | 10 | import utils 11 | from download_paper_by_URLfile import organize_info_by_txt 12 | from download_paper_by_pageURL import organize_info_by_query 13 | from utils import downLoad_paper, center_window 14 | 15 | 16 | def show_confirm(message=""): 17 | """ 18 | True : yes 19 | False : no 20 | """ 21 | return messagebox.askyesno("确认框", message) 22 | 23 | 24 | def error_inform(message=""): 25 | """ 26 | True : yes 27 | False : no 28 | """ 29 | return messagebox.showerror("参数错误", message) 30 | 31 | 32 | def show_succeed_window(message=""): 33 | """ 34 | True : yes 35 | False : no 36 | """ 37 | return messagebox.showinfo("下载成功!", message) 38 | 39 | 40 | def show_fail_window(message=""): 41 | """ 42 | True : yes 43 | False : no 44 | """ 45 | return messagebox.showerror("下载失败...", message) 46 | 47 | 48 | def show_begin_download(message=""): 49 | """ 50 | True : yes 51 | False : no 52 | """ 53 | return messagebox.askyesno("确认框", message) 54 | 55 | 56 | def tkimg_resized(img, w_box, h_box, keep_ratio=True): 57 | """对图片进行按比例缩放处理""" 58 | w, h = img.size 59 | 60 | if keep_ratio: 61 | if w > h: 62 | width = w_box 63 | height = int(h_box * (1.0 * h / w)) 64 | 65 | if h >= w: 66 | height = h_box 67 | width = int(w_box * (1.0 * w / h)) 68 | else: 69 | width = w_box 70 | height = h_box 71 | 72 | img1 = img.resize((width, height), Image.ANTIALIAS) 73 | tkimg = ImageTk.PhotoImage(img1) 74 | return tkimg 75 | 76 | 77 | def image_label(frame, img, width, height, keep_ratio=True): 78 | """输入图片信息,及尺寸,返回界面组件""" 79 | if isinstance(img, str): 80 | _img = Image.open(img) 81 | else: 82 | _img = img 83 | lbl_image = tk.Label(frame, width=width, height=height) 84 | 85 | tk_img = tkimg_resized(_img, width, height, keep_ratio) 86 | lbl_image.image = tk_img 87 | lbl_image.config(image=tk_img) 88 | return lbl_image 89 | 90 | 91 | def space(n): 92 | s = " " 93 | r = "" 94 | for i in range(n): 95 | r += s 96 | return r 97 | 98 | 99 | def check_value_valid(mode, save_dir=None, url_path=None, keyword=None, page=None): 100 | if mode == 1: 101 | if not (save_dir and url_path): 102 | error_inform("请检查 论文保存文件夹 URL文件路径 是否已输入") 103 | return False 104 | elif mode == 2: 105 | if not (save_dir and keyword and page): 106 | error_inform("请检查 论文保存文件夹 关键词 下载页数范围 是否已输入") 107 | return False 108 | return True 109 | 110 | 111 | def check_page_valid(page): 112 | page_comma = [] 113 | try: 114 | if ',' in page: 115 | for item in page.split(","): 116 | if "-" in item: 117 | pages = item.split("-") 118 | if len(pages) != 2: 119 | show_fail_window("下载页数范围输入错误") 120 | return False, None 121 | page_comma.extend([item for item in range(int(pages[0].strip()), int(pages[1].strip()) + 1)]) 122 | else: 123 | page_comma.append(int(item.strip())) 124 | 125 | elif "-" in page: 126 | pages = page.split("-") 127 | if len(pages) != 2: 128 | show_fail_window("下载页数范围输入错误") 129 | return False, None 130 | page_comma.extend([item for item in range(int(pages[0].strip()), int(pages[1].strip()) + 1)]) 131 | else: 132 | page_comma.append(int(page.strip())) 133 | except Exception as e: 134 | show_fail_window("下载页数范围输入错误") 135 | return False, None 136 | page = sorted(list(set(page_comma))) 137 | for item in page: 138 | if item < 1: 139 | show_fail_window("下载页数范围输入错误") 140 | return False, None 141 | return True, page 142 | 143 | 144 | class App: 145 | def __init__(self): 146 | self.root = tk.Tk() 147 | self.root.geometry("%dx%d" % (900, 650)) # 窗体尺寸 148 | self.root.iconbitmap("img/root.ico") # 窗体图标 149 | self.root.title("IEEE论文批量下载工具_v1.0") 150 | center_window(self.root) 151 | # self.root.resizable(False, False) # 设置窗体不可改变大小 152 | self.no_title = False 153 | self.show_title() 154 | self.body() 155 | 156 | def body(self): 157 | 158 | # --------------------------------------------------------------------- 159 | # 标题栏 160 | # --------------------------------------------------------------------- 161 | f1 = tk.Frame(self.root) 162 | im1 = image_label(f1, "img/root.ico", 86, 86, False) 163 | im1.configure(bg="Teal") 164 | im1.bind('', self.show_title) 165 | im1.pack(side=tk.LEFT, anchor=tk.NW, fill=tk.Y) 166 | 167 | ft1 = tkFont.Font(family="微软雅黑", size=24, weight=tkFont.BOLD) 168 | tk.Label(f1, text="IEEE论文批量下载工具_v1.0", height=2, fg="white", font=ft1, bg="Teal") \ 169 | .pack(side=tk.LEFT, expand=tk.YES, fill=tk.X) 170 | 171 | im2 = image_label(f1, "img/exit.ico", 86, 86, False) 172 | im2.configure(bg="Teal") 173 | im2.bind('', self.close) 174 | im2.pack(side=tk.RIGHT, anchor=tk.NW, fill=tk.Y) 175 | 176 | f2 = tk.Frame(self.root) 177 | img_content = image_label(f2, "img/ieee.png", width=400, height=142, keep_ratio=False).pack(padx=10, pady=10) 178 | f1.pack(fill=tk.X) 179 | f2.pack() 180 | 181 | ft_title = tkFont.Font(family="微软雅黑", size=13, weight=tkFont.BOLD) 182 | ft_middle = tkFont.Font(family="微软雅黑", size=11) 183 | ft = tkFont.Font(family="微软雅黑", size=13) 184 | ft_small = tkFont.Font(family="微软雅黑", size=6) 185 | 186 | f3 = tk.Frame(self.root) 187 | tk.Label(f3, text="论文保存文件夹 ", font=ft, anchor='w').pack(side='left', padx=60) 188 | self.save_dir = tk.Text(f3, bg="white", font=ft, height=1, width=50) 189 | self.save_dir.pack(side=tk.LEFT) 190 | f3.pack(fill='both', expand=True) 191 | 192 | f_empty = tk.Frame(self.root) 193 | tk.Label(f_empty, text="", font=ft_small).pack(side='left') 194 | f_empty.pack(fill='both', expand=True) 195 | 196 | # 模式1 197 | f5 = tk.Frame(self.root) 198 | tk.Label(f5, text="方法 1 : 使用URL.txt文件", font=ft_title, anchor='w').pack(side=tk.LEFT, padx=60) 199 | f5.pack(fill='both', expand=True) 200 | 201 | f_urltxt = tk.Frame(self.root) 202 | tk.Label(f_urltxt, text="URL文件路径", font=ft, anchor='w', padx=60).pack(side=tk.LEFT) 203 | self.url_txt_path = tk.Text(f_urltxt, bg="white", font=ft, height=1, width=40) 204 | self.url_txt_path.pack(side=tk.LEFT) 205 | tk.Button(f_urltxt, text="开始下载", width=10, height=1, bg="cadetblue", font=ft, command=self.begin_download_1) \ 206 | .pack(side=tk.RIGHT, anchor=tk.W, padx=80) 207 | tk.Label(f_urltxt, text="", font=ft).pack(side=tk.LEFT) 208 | f_urltxt.pack(fill='both', expand=True) 209 | f9 = tk.Frame(self.root) 210 | self.CheckVar1 = tk.IntVar() 211 | self.save_with_yesr_1 = tk.Checkbutton(f9, text="论文保存时自动添加年份前缀", font=ft_middle, variable=self.CheckVar1, 212 | onvalue=1, offvalue=0) 213 | self.save_with_yesr_1.pack(side=tk.LEFT, padx=60) 214 | f9.pack(fill='both', expand=True) 215 | 216 | f_empty2 = tk.Frame(self.root) 217 | tk.Label(f_empty2, text="", font=ft_small).pack(side='left') 218 | f_empty2.pack(fill='both', expand=True) 219 | 220 | # 模式2 221 | f6 = tk.Frame(self.root) 222 | tk.Label(f6, text="方法 2 : 在线查询", font=ft_title, anchor='w').pack(side=tk.LEFT, padx=60) 223 | f6.pack(fill='both', expand=True) 224 | f7 = tk.Frame(self.root) 225 | tk.Label(f7, text="关键词", font=ft, anchor='w').pack(side=tk.LEFT, padx=60) 226 | self.keyword = tk.Text(f7, bg="white", font=ft, height=1, width=20) 227 | self.keyword.pack(side=tk.LEFT) 228 | tk.Label(f7, text="下载页数范围", font=ft, anchor='w').pack(side=tk.LEFT, padx=40) 229 | self.page_range = tk.Text(f7, bg="white", font=ft, height=1, width=10) 230 | self.page_range.pack(side=tk.LEFT, padx=0) 231 | tk.Button(f7, text="开始下载", width=10, height=1, bg="cadetblue", font=ft, command=self.begin_download_2) \ 232 | .pack(side=tk.LEFT, anchor=tk.W, padx=40) 233 | f7.pack(fill='both', expand=True) 234 | 235 | f8 = tk.Frame(self.root) 236 | self.CheckVar2 = tk.IntVar() 237 | self.save_with_yesr_2 = tk.Checkbutton(f8, text="论文保存时自动添加年份前缀", font=ft_middle, variable=self.CheckVar2, 238 | onvalue=1, offvalue=0) 239 | self.save_with_yesr_2.pack(side=tk.LEFT, padx=60) 240 | f8.pack(fill='both', expand=True) 241 | 242 | def show_title(self, *args): 243 | self.root.overrideredirect(self.no_title) 244 | self.no_title = not self.no_title 245 | 246 | def download_1_thread(self): 247 | if show_begin_download("开始下载吗?"): 248 | save_dir = self.save_dir.get(0.0, tk.END).split("\n")[0].strip() 249 | url_txt_path = self.url_txt_path.get(0.0, tk.END).split("\n")[0].strip() 250 | save_with_year = self.CheckVar1.get() 251 | is_valid = check_value_valid(mode=1, save_dir=save_dir, url_path=url_txt_path) 252 | if not is_valid: 253 | return 254 | # 配置存储文件夹 255 | import os 256 | if not os.path.exists(save_dir): 257 | os.mkdir(save_dir) 258 | status, paper_info = organize_info_by_txt(save_dir, url_txt_path, paper_name_with_year=save_with_year) 259 | if not status: 260 | show_fail_window("URL文件未找到...") 261 | return 262 | if self.all_downloaded(paper_info): 263 | info = "{}篇论文已存在,无需下载!".format(len(paper_info)) 264 | show_succeed_window(info) 265 | return 266 | # 下载论文 267 | self.create_progress_bar(paper_info) ## 创建progress bar窗口 268 | try: 269 | _thread.start_new_thread(self.refresh_window, ()) 270 | except: 271 | show_fail_window("Error: 无法启动线程") 272 | succeed, paper_downloaded, already_exist = downLoad_paper(paper_info) 273 | if succeed: 274 | info = "成功下载{}篇论文!".format(paper_downloaded + already_exist) 275 | if hasattr(self, 'pb_window'): 276 | self.pb_window.destroy() 277 | show_succeed_window(info) 278 | else: 279 | show_fail_window("下载失败,请检查配置。") 280 | 281 | def create_progress_bar(self, paper_info): 282 | if hasattr(self, 'pb_window'): 283 | self.pb_window.destroy() 284 | self.pb_window = tk.Toplevel() 285 | self.pb_window.geometry("300x140+600+300") 286 | self.pb_window.iconbitmap("img/root.ico") # 窗体图标 287 | self.pb_window.title("下载进度") 288 | center_window(self.pb_window) 289 | self.download_pb = ttk.Progressbar(self.pb_window, length=200, mode="determinate", orient=tk.HORIZONTAL) 290 | self.download_pb.pack(padx=10, pady=20) 291 | self.download_pb["value"] = 0 292 | self.download_pb["maximum"] = len(paper_info) 293 | 294 | def refresh_window(self): 295 | if not hasattr(self, 'pb_window'): 296 | return 297 | if not hasattr(self, 'download_pb'): 298 | return 299 | while utils.get_value("progress_bar_num") < self.download_pb["maximum"]-1: 300 | if self.pb_window and self.download_pb: 301 | self.download_pb["value"] = utils.get_value("progress_bar_num") 302 | self.pb_window.update() 303 | if hasattr(self, 'pb_window'): 304 | self.pb_window.destroy() 305 | 306 | def begin_download_1(self): 307 | try: 308 | _thread.start_new_thread(self.download_1_thread, ()) 309 | except: 310 | show_fail_window("Error: 无法启动线程") 311 | 312 | def all_downloaded(self, paperlist): 313 | for key, value in paperlist.items(): 314 | if not os.path.exists(value['name']): 315 | return False 316 | return True 317 | 318 | def download_2_thread(self): 319 | if show_begin_download("开始下载吗?"): 320 | save_dir = self.save_dir.get(0.0, tk.END).split("\n")[0].strip() 321 | keywords = self.keyword.get(0.0, tk.END).split("\n")[0].strip() 322 | page_range = self.page_range.get(0.0, tk.END).split("\n")[0].strip() 323 | save_with_year = self.CheckVar2.get() 324 | is_valid = check_value_valid(mode=2, save_dir=save_dir, keyword=keywords, page=page_range) 325 | page_is_valid, page_range = check_page_valid(page_range) 326 | if not page_is_valid: 327 | return 328 | if not is_valid: 329 | return 330 | # 配置存储文件夹 331 | import os 332 | if not os.path.exists(save_dir): 333 | os.mkdir(save_dir) 334 | status, paper_info = organize_info_by_query(keywords, page_range, save_dir, save_with_year) 335 | if not status: 336 | show_fail_window("URL解析失败...") 337 | 338 | if self.all_downloaded(paper_info): 339 | info = "{}篇论文已存在,无需下载!".format(len(paper_info)) 340 | show_succeed_window(info) 341 | return 342 | # 下载论文 343 | self.create_progress_bar(paper_info) ## 创建progress bar窗口 344 | try: 345 | _thread.start_new_thread(self.refresh_window, ()) 346 | except: 347 | show_fail_window("Error: 无法启动线程") 348 | succeed, paper_downloaded, already_exist = downLoad_paper(paper_info, show_bar=True) 349 | if succeed: 350 | info = "成功下载{}篇论文!".format(paper_downloaded + already_exist) 351 | if hasattr(self, 'pb_window'): 352 | self.pb_window.destroy() 353 | show_succeed_window(info) 354 | else: 355 | show_fail_window("下载失败,请检查配置。") 356 | 357 | def begin_download_2(self): 358 | try: 359 | _thread.start_new_thread(self.download_2_thread, ()) 360 | except: 361 | show_fail_window("Error: 无法启动线程") 362 | 363 | def close(self, *arg): 364 | if show_confirm("确认退出吗 ?"): 365 | self.root.destroy() 366 | 367 | 368 | if __name__ == "__main__": 369 | utils._init() 370 | utils.set_value("progress_bar_num", 0) 371 | app = App() 372 | app.root.mainloop() 373 | -------------------------------------------------------------------------------- /url.txt: -------------------------------------------------------------------------------- 1 | J. Bang, S. Han, K. Lee and G. G. Lee, "Open-domain personalized dialog system using user-interested topics in system responses," 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), Scottsdale, AZ, USA, 2015, pp. 771-776. 2 | doi: 10.1109/ASRU.2015.7404866 3 | keywords: {Databases;Data mining;Knowledge based systems;Knowledge management;Impedance matching;Computer science;Measurement;Chat system;chatbot;open-domain dialog;knowledge base;topic-based dialog}, 4 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7404866&isnumber=7404758 5 | 6 | Y. Osanai, E. Shimokawara and T. Yamaguchi, "Correlation Analysis of Template Generality and Output Evaluation in Dialogue Response Generation," 2018 Joint 10th International Conference on Soft Computing and Intelligent Systems (SCIS) and 19th International Symposium on Advanced Intelligent Systems (ISIS), Toyama, Japan, 2018, pp. 1133-1137. 7 | doi: 10.1109/SCIS-ISIS.2018.00178 8 | keywords: {Databases;Clothing;Dictionaries;Semantics;Intelligent systems;Maximum likelihood estimation;Grammar;Dialogue System;Evaluation;template generation}, 9 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8716192&isnumber=8716030 10 | 11 | J. Ohmura and M. Eskenazi, "Context-Aware Dialog Re-Ranking for Task-Oriented Dialog Systems," 2018 IEEE Spoken Language Technology Workshop (SLT), Athens, Greece, 2018, pp. 846-853. 12 | doi: 10.1109/SLT.2018.8639596 13 | keywords: {Task analysis;Training;History;Speech recognition;Predictive models;Stacking;Mathematical model;Task-oriented dialog systems;response selection;re-ranking;ensemble learning;speech recognition errors}, 14 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8639596&isnumber=8639030 15 | 16 | M. Gašić, D. Kim, P. Tsiakoulis and S. Young, "Distributed dialogue policies for multi-domain statistical dialogue management," 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), South Brisbane, QLD, Australia, 2015, pp. 5371-5375. 17 | doi: 10.1109/ICASSP.2015.7178997 18 | keywords: {Training;Training data;Limiting;Databases;open-domain;multi-domain;dialogue systems;POMDP;Gaussian process}, 19 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7178997&isnumber=7177909 20 | 21 | G. D. Duplessis, A. Pauchet, N. Chaignaud and J. Kotowicz, "A Conventional Dialogue Model Based on Empirically Specified Dialogue Games," 2015 IEEE 27th International Conference on Tools with Artificial Intelligence (ICTAI), Vietri sul Mare, Italy, 2015, pp. 997-1004. 22 | doi: 10.1109/ICTAI.2015.143 23 | keywords: {Games;Context;Production;Semantics;Context modeling;Open source software;Coherence;Human-Machine Interaction;Dialogue Management;Dialogue Games}, 24 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7372240&isnumber=7372095 -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2021/10/13 12:16 3 | # @Author : Yong Cao 4 | # @Email : yongcao_epic@hust.edu.cn 5 | import os 6 | import requests 7 | import time 8 | 9 | 10 | def _init(): 11 | # 初始化一个全局的字典 12 | global _global_dict 13 | _global_dict = {} 14 | 15 | 16 | def set_value(key, value): 17 | _global_dict[key] = value 18 | 19 | 20 | def get_value(key): 21 | try: 22 | return _global_dict[key] 23 | except KeyError as e: 24 | print(e) 25 | 26 | 27 | 28 | def get_window_size(win, update=True): 29 | """ 获得窗体的尺寸 """ 30 | if update: 31 | win.update() 32 | return win.winfo_width(), win.winfo_height(), win.winfo_x(), win.winfo_y() 33 | 34 | 35 | def center_window(win, width=None, height=None): 36 | """ 将窗口屏幕居中 """ 37 | screenwidth = win.winfo_screenwidth() 38 | screenheight = win.winfo_screenheight() 39 | if width is None: 40 | width, height = get_window_size(win)[:2] 41 | size = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 3) 42 | win.geometry(size) 43 | 44 | 45 | def downLoad_paper(paper_info, show_bar=False): 46 | print("\n" * 2) 47 | print("执行开始".center(len(paper_info) + 28, '-')) 48 | succeed = True 49 | paper_downloaded = 0 50 | already_exist = 0 51 | start = time.perf_counter() 52 | for i, item in enumerate(paper_info.keys()): 53 | set_value("progress_bar_num", i) 54 | papername = paper_info[item]['name'] 55 | paperurl = paper_info[item]['url'] 56 | # 文件存储 57 | if os.path.exists(papername): 58 | already_exist += 1 59 | continue 60 | try: 61 | r = requests.get(paperurl) 62 | with open(papername, 'wb+') as f: 63 | f.write(r.content) 64 | paper_downloaded += 1 65 | # 停一下防禁ip 66 | time.sleep(1) 67 | except Exception as e: 68 | print(e) 69 | print("unknown name! parser error", papername) 70 | succeed = False 71 | a = '*' * i 72 | b = '.' * (len(paper_info) - i) 73 | c = (i / len(paper_info)) * 100 74 | t = time.perf_counter() - start 75 | print("\r任务进度:{:>3.0f}% [{}->{}]消耗时间:{:.2f}s".format(c, a, b, t), end="") 76 | set_value("progress_bar_num", len(paper_info)) 77 | # if show_bar: 78 | # del root 79 | print("\n"+"执行结束".center(len(paper_info)+28,'-')) 80 | print("-"*50) 81 | print("Downloaded {} papers and {} paper already exists.".format(paper_downloaded, already_exist)) 82 | print("-" * 50) 83 | return succeed, paper_downloaded, already_exist --------------------------------------------------------------------------------