├── README.md
├── config.py
├── download_paper_by_URLfile.py
├── download_paper_by_pageURL.py
├── img
├── exit.ico
├── ieee.png
└── root.ico
├── main.py
├── main_ui.py
├── url.txt
└── utils.py
/README.md:
--------------------------------------------------------------------------------
1 | # 写综述必备!自动批量下载IEEE的论文
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | > 如果领域内的大量论文需要下载,基于本工具实现半自动化,无需到IEEE网站手动一篇一篇下载论文啦,科研效率翻倍!
21 | > 工具使用前提: 一定要在能够有权限下载IEEE论文的网络下才能正常使用。
22 |
23 | 如下,介绍两种方法实现批量下载,代码流程和详细介绍请参考博客。
24 |
25 | ## 软件界面
26 |
27 |
28 |

29 |
30 |
31 | ## 方法1
32 | 将IEEE网站上的paper list导出到txt文档中,再基于该txt文件批量下载。
33 | 优点:可筛选不需要的论文
34 | 缺点:一次只能导出一页(≤25篇论文)
35 |
36 | * **step 1**
37 | * 进入IEEE官网[https://ieeexplore.ieee.org](https://ieeexplore.ieee.org/),按照需要搜索论文,按照如下方式导出论文列表
38 |
39 |
40 |

41 |
42 |
43 | * **step 2**
44 | * 导出到txt文件,如下图所示。运行main_ui.py, 进入UI界面,配置论文保存文件夹和URL文件路径两个参数,点击“开始下载”,搞定!
45 |
46 |
47 |

48 |
49 |
50 | ## 方法2
51 | 优点:可大批量下载论文
52 | 缺点:不可在下载前筛选不需要的论文
53 | * **step 1**
54 | * 进入IEEE官网[https://ieeexplore.ieee.org](https://ieeexplore.ieee.org/),按照关键词搜索论文
55 |
56 | * **step 2**
57 | * 按照同样的关键词在软件界面中配置,以及需要下载的页数,可以用英文逗号、破折号隔开,也可以用一个数字。eg. 2,3 或者 2-5 或者 2
58 |
59 | ## 配置举例
60 |
61 |
62 |

63 |
64 |
65 | > 点击下载按钮
66 |
67 |
68 |

69 |
70 |
71 | > 下载成功啦!
72 |
73 |
74 |

75 |
76 |
77 | ## 搭配EndNote使用
78 |
79 |
80 |

81 |
82 |
83 | 使用教程请参考博客:
84 | [科研效率直线提升!如何一键下载会议论文?ACL 2020 论文代码批量下载 打包分享](https://blog.csdn.net/weixin_43955436/article/details/116696395?spm=1001.2014.3001.5501)
85 |
86 | ### Reference
87 | 本工具主要参考以下资料,感谢作者分享。
88 | [1] https://blog.csdn.net/ubooksapp/article/details/49518009
89 | [2] https://github.com/EdwinZhang1970/Python
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2021/10/14 15:56
3 | # @Author : Yong Cao
4 | # @Email : yongcao_epic@hust.edu.cn
5 | class pb_value:
6 | progress_bar_value = 0
7 |
8 | # 对于每个全局变量,都需要定义get_value和set_value接口
9 | def set_value(value):
10 | pb_value.progress_bar_value = value
11 |
12 | def get_value():
13 | return pb_value.value
14 |
--------------------------------------------------------------------------------
/download_paper_by_URLfile.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2021/10/12 22:49
3 | # @Author : Yong Cao
4 | # @Email : yongcao_epic@hust.edu.cn
5 | import os
6 | import re
7 | from tqdm import tqdm
8 | from utils import downLoad_paper
9 |
10 |
11 | def organize_info_by_txt(dst_dir, url_file, paper_name_with_year=None):
12 | if not os.path.exists(url_file):
13 | return False, None
14 | with open(url_file, "r") as f:
15 | lines = f.read().split("\n\n")
16 | rule = r'"(.*?)"'
17 | rstr = r"[\=\(\)\,\/\\\:\*\?\?\"\<\>\|\'']"
18 | paper_info = {}
19 | for i, line in enumerate(lines):
20 | content = line.split("\n")
21 | # paper name
22 | slotList = re.findall(rule, content[0])
23 | papername = re.sub(rstr, '', slotList[0])
24 | if paper_name_with_year:
25 | papername = content[1].split(".")[2] + ' ' + papername
26 | papername = os.path.join(dst_dir, papername + '.pdf')
27 | # paper url
28 | if "URL" in content[3]:
29 | arnumber = \
30 | content[3].replace("URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=", "").split("&")[0]
31 | url = "https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber=" + arnumber + "&ref="
32 | paper_info[i] = {}
33 | paper_info[i]['name'] = papername
34 | paper_info[i]['url'] = url
35 | return True, paper_info
36 |
37 |
38 | if __name__ == '__main__':
39 | # 配置存储文件夹
40 | dst_dir = "./save"
41 | if not os.path.exists(dst_dir):
42 | os.mkdir(dst_dir)
43 | # 封装下载url和论文名称
44 | url_txt = "url.txt"
45 | paper_info = organize_info_by_txt(dst_dir, url_txt, True)
46 | # 下载论文
47 | downLoad_paper(paper_info)
48 |
--------------------------------------------------------------------------------
/download_paper_by_pageURL.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2021/10/13 10:37
3 | # @Author : Yong Cao
4 | # @Email : yongcao_epic@hust.edu.cn
5 | import json
6 | import requests
7 | import os
8 | from utils import downLoad_paper
9 | import re
10 | import urllib.request
11 |
12 |
13 | def organize_info_by_query(queryText, pageNumber, save_dir, paper_name_with_year=None):
14 | cookie = []
15 | file = urllib.request.urlopen("https://ieeexplore.ieee.org", timeout=1).info()
16 | for key, value in file.items():
17 | if key == "Set-Cookie":
18 | cookie.append(value)
19 | cookie_valid = "; ".join(cookie)
20 | paper_info = {}
21 | count = 0
22 | for page in pageNumber:
23 | headers = {
24 | 'Host': 'ieeexplore.ieee.org',
25 | 'Content-Type': "application/json",
26 | 'User-Agent': 'PostmanRuntime/7.28.1',
27 | 'Cookie': cookie_valid,
28 | 'Accept': '*/*'}
29 | payload = {"queryText": queryText, "pageNumber": str(page), "returnFacets": ["ALL"],
30 | "returnType": "SEARCH"}
31 | toc_res = requests.post("https://ieeexplore.ieee.org/rest/search", headers=headers, data=json.dumps(payload))
32 | response = json.loads(toc_res.text)
33 | if 'records' in response:
34 | for item in response['records']:
35 | paper_info[count] = {}
36 | paper_info[count]['url'] = "https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber=" + item['articleNumber'] + "&ref="
37 | paper_info[count]['name'] = item['articleTitle']
38 | rstr = r"[\=\(\)\,\/\\\:\*\?\?\"\<\>\|\'']"
39 | if paper_name_with_year:
40 | paper_info[count]['name'] = os.path.join(save_dir, item['publicationYear'] + ' ' + re.sub(rstr, '', paper_info[count]['name']) + '.pdf')
41 | else:
42 | paper_info[count]['name'] = os.path.join(save_dir, re.sub(rstr, '', paper_info[count]['name']) + '.pdf')
43 | count += 1
44 | if len(paper_info) > 0:
45 | return True, paper_info
46 | else:
47 | return False, paper_info
48 |
49 |
50 | if __name__ == '__main__':
51 | import utils
52 | utils._init()
53 | queryText = "dialog system"
54 | pageNumber = [3]
55 | save_dir = "save"
56 | _, paper_info = organize_info_by_query(queryText, pageNumber, save_dir, True)
57 | downLoad_paper(paper_info)
58 |
--------------------------------------------------------------------------------
/img/exit.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongcaoplus/IEEE_downloader/99a41fe9593a82c129086eec5caa9730e8efbd41/img/exit.ico
--------------------------------------------------------------------------------
/img/ieee.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongcaoplus/IEEE_downloader/99a41fe9593a82c129086eec5caa9730e8efbd41/img/ieee.png
--------------------------------------------------------------------------------
/img/root.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yongcaoplus/IEEE_downloader/99a41fe9593a82c129086eec5caa9730e8efbd41/img/root.ico
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2021/10/13 12:34
3 | # @Author : Yong Cao
4 | # @Email : yongcao_epic@hust.edu.cn
5 | import os
6 | from download_paper_by_URLfile import organize_info_by_txt
7 | from download_paper_by_pageURL import organize_info_by_query
8 | from utils import downLoad_paper
9 |
10 |
11 | if __name__ == '__main__':
12 | ############### 配置1 ##################
13 | mode = "search" # "txt" or "search"
14 | dst_dir = "./save"
15 | ############### END ##################
16 | if mode == "txt":
17 | ############### 配置2 ##################
18 | url_txt = "url.txt" # txt mode is needed.
19 | ############### END ##################
20 | # 配置存储文件夹
21 | if not os.path.exists(dst_dir):
22 | os.mkdir(dst_dir)
23 | # 封装下载url和论文名称
24 | _, paper_info = organize_info_by_txt(url_txt)
25 | # 下载论文
26 | downLoad_paper(paper_info)
27 | else:
28 | ############### 配置3 ##################
29 | queryText = "dialog system"
30 | pageNumber = [3]
31 | save_papername_with_year = True
32 | ############### END ##################
33 | _, paper_info = organize_info_by_query(queryText, pageNumber, dst_dir, save_papername_with_year)
34 | downLoad_paper(paper_info, show_bar=True)
35 |
--------------------------------------------------------------------------------
/main_ui.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | import _thread
3 | import os
4 | import tkinter as tk
5 | import tkinter.font as tkFont
6 | from tkinter import messagebox, ttk
7 |
8 | from PIL import Image, ImageTk
9 |
10 | import utils
11 | from download_paper_by_URLfile import organize_info_by_txt
12 | from download_paper_by_pageURL import organize_info_by_query
13 | from utils import downLoad_paper, center_window
14 |
15 |
16 | def show_confirm(message=""):
17 | """
18 | True : yes
19 | False : no
20 | """
21 | return messagebox.askyesno("确认框", message)
22 |
23 |
24 | def error_inform(message=""):
25 | """
26 | True : yes
27 | False : no
28 | """
29 | return messagebox.showerror("参数错误", message)
30 |
31 |
32 | def show_succeed_window(message=""):
33 | """
34 | True : yes
35 | False : no
36 | """
37 | return messagebox.showinfo("下载成功!", message)
38 |
39 |
40 | def show_fail_window(message=""):
41 | """
42 | True : yes
43 | False : no
44 | """
45 | return messagebox.showerror("下载失败...", message)
46 |
47 |
48 | def show_begin_download(message=""):
49 | """
50 | True : yes
51 | False : no
52 | """
53 | return messagebox.askyesno("确认框", message)
54 |
55 |
56 | def tkimg_resized(img, w_box, h_box, keep_ratio=True):
57 | """对图片进行按比例缩放处理"""
58 | w, h = img.size
59 |
60 | if keep_ratio:
61 | if w > h:
62 | width = w_box
63 | height = int(h_box * (1.0 * h / w))
64 |
65 | if h >= w:
66 | height = h_box
67 | width = int(w_box * (1.0 * w / h))
68 | else:
69 | width = w_box
70 | height = h_box
71 |
72 | img1 = img.resize((width, height), Image.ANTIALIAS)
73 | tkimg = ImageTk.PhotoImage(img1)
74 | return tkimg
75 |
76 |
77 | def image_label(frame, img, width, height, keep_ratio=True):
78 | """输入图片信息,及尺寸,返回界面组件"""
79 | if isinstance(img, str):
80 | _img = Image.open(img)
81 | else:
82 | _img = img
83 | lbl_image = tk.Label(frame, width=width, height=height)
84 |
85 | tk_img = tkimg_resized(_img, width, height, keep_ratio)
86 | lbl_image.image = tk_img
87 | lbl_image.config(image=tk_img)
88 | return lbl_image
89 |
90 |
91 | def space(n):
92 | s = " "
93 | r = ""
94 | for i in range(n):
95 | r += s
96 | return r
97 |
98 |
99 | def check_value_valid(mode, save_dir=None, url_path=None, keyword=None, page=None):
100 | if mode == 1:
101 | if not (save_dir and url_path):
102 | error_inform("请检查 论文保存文件夹 URL文件路径 是否已输入")
103 | return False
104 | elif mode == 2:
105 | if not (save_dir and keyword and page):
106 | error_inform("请检查 论文保存文件夹 关键词 下载页数范围 是否已输入")
107 | return False
108 | return True
109 |
110 |
111 | def check_page_valid(page):
112 | page_comma = []
113 | try:
114 | if ',' in page:
115 | for item in page.split(","):
116 | if "-" in item:
117 | pages = item.split("-")
118 | if len(pages) != 2:
119 | show_fail_window("下载页数范围输入错误")
120 | return False, None
121 | page_comma.extend([item for item in range(int(pages[0].strip()), int(pages[1].strip()) + 1)])
122 | else:
123 | page_comma.append(int(item.strip()))
124 |
125 | elif "-" in page:
126 | pages = page.split("-")
127 | if len(pages) != 2:
128 | show_fail_window("下载页数范围输入错误")
129 | return False, None
130 | page_comma.extend([item for item in range(int(pages[0].strip()), int(pages[1].strip()) + 1)])
131 | else:
132 | page_comma.append(int(page.strip()))
133 | except Exception as e:
134 | show_fail_window("下载页数范围输入错误")
135 | return False, None
136 | page = sorted(list(set(page_comma)))
137 | for item in page:
138 | if item < 1:
139 | show_fail_window("下载页数范围输入错误")
140 | return False, None
141 | return True, page
142 |
143 |
144 | class App:
145 | def __init__(self):
146 | self.root = tk.Tk()
147 | self.root.geometry("%dx%d" % (900, 650)) # 窗体尺寸
148 | self.root.iconbitmap("img/root.ico") # 窗体图标
149 | self.root.title("IEEE论文批量下载工具_v1.0")
150 | center_window(self.root)
151 | # self.root.resizable(False, False) # 设置窗体不可改变大小
152 | self.no_title = False
153 | self.show_title()
154 | self.body()
155 |
156 | def body(self):
157 |
158 | # ---------------------------------------------------------------------
159 | # 标题栏
160 | # ---------------------------------------------------------------------
161 | f1 = tk.Frame(self.root)
162 | im1 = image_label(f1, "img/root.ico", 86, 86, False)
163 | im1.configure(bg="Teal")
164 | im1.bind('', self.show_title)
165 | im1.pack(side=tk.LEFT, anchor=tk.NW, fill=tk.Y)
166 |
167 | ft1 = tkFont.Font(family="微软雅黑", size=24, weight=tkFont.BOLD)
168 | tk.Label(f1, text="IEEE论文批量下载工具_v1.0", height=2, fg="white", font=ft1, bg="Teal") \
169 | .pack(side=tk.LEFT, expand=tk.YES, fill=tk.X)
170 |
171 | im2 = image_label(f1, "img/exit.ico", 86, 86, False)
172 | im2.configure(bg="Teal")
173 | im2.bind('', self.close)
174 | im2.pack(side=tk.RIGHT, anchor=tk.NW, fill=tk.Y)
175 |
176 | f2 = tk.Frame(self.root)
177 | img_content = image_label(f2, "img/ieee.png", width=400, height=142, keep_ratio=False).pack(padx=10, pady=10)
178 | f1.pack(fill=tk.X)
179 | f2.pack()
180 |
181 | ft_title = tkFont.Font(family="微软雅黑", size=13, weight=tkFont.BOLD)
182 | ft_middle = tkFont.Font(family="微软雅黑", size=11)
183 | ft = tkFont.Font(family="微软雅黑", size=13)
184 | ft_small = tkFont.Font(family="微软雅黑", size=6)
185 |
186 | f3 = tk.Frame(self.root)
187 | tk.Label(f3, text="论文保存文件夹 ", font=ft, anchor='w').pack(side='left', padx=60)
188 | self.save_dir = tk.Text(f3, bg="white", font=ft, height=1, width=50)
189 | self.save_dir.pack(side=tk.LEFT)
190 | f3.pack(fill='both', expand=True)
191 |
192 | f_empty = tk.Frame(self.root)
193 | tk.Label(f_empty, text="", font=ft_small).pack(side='left')
194 | f_empty.pack(fill='both', expand=True)
195 |
196 | # 模式1
197 | f5 = tk.Frame(self.root)
198 | tk.Label(f5, text="方法 1 : 使用URL.txt文件", font=ft_title, anchor='w').pack(side=tk.LEFT, padx=60)
199 | f5.pack(fill='both', expand=True)
200 |
201 | f_urltxt = tk.Frame(self.root)
202 | tk.Label(f_urltxt, text="URL文件路径", font=ft, anchor='w', padx=60).pack(side=tk.LEFT)
203 | self.url_txt_path = tk.Text(f_urltxt, bg="white", font=ft, height=1, width=40)
204 | self.url_txt_path.pack(side=tk.LEFT)
205 | tk.Button(f_urltxt, text="开始下载", width=10, height=1, bg="cadetblue", font=ft, command=self.begin_download_1) \
206 | .pack(side=tk.RIGHT, anchor=tk.W, padx=80)
207 | tk.Label(f_urltxt, text="", font=ft).pack(side=tk.LEFT)
208 | f_urltxt.pack(fill='both', expand=True)
209 | f9 = tk.Frame(self.root)
210 | self.CheckVar1 = tk.IntVar()
211 | self.save_with_yesr_1 = tk.Checkbutton(f9, text="论文保存时自动添加年份前缀", font=ft_middle, variable=self.CheckVar1,
212 | onvalue=1, offvalue=0)
213 | self.save_with_yesr_1.pack(side=tk.LEFT, padx=60)
214 | f9.pack(fill='both', expand=True)
215 |
216 | f_empty2 = tk.Frame(self.root)
217 | tk.Label(f_empty2, text="", font=ft_small).pack(side='left')
218 | f_empty2.pack(fill='both', expand=True)
219 |
220 | # 模式2
221 | f6 = tk.Frame(self.root)
222 | tk.Label(f6, text="方法 2 : 在线查询", font=ft_title, anchor='w').pack(side=tk.LEFT, padx=60)
223 | f6.pack(fill='both', expand=True)
224 | f7 = tk.Frame(self.root)
225 | tk.Label(f7, text="关键词", font=ft, anchor='w').pack(side=tk.LEFT, padx=60)
226 | self.keyword = tk.Text(f7, bg="white", font=ft, height=1, width=20)
227 | self.keyword.pack(side=tk.LEFT)
228 | tk.Label(f7, text="下载页数范围", font=ft, anchor='w').pack(side=tk.LEFT, padx=40)
229 | self.page_range = tk.Text(f7, bg="white", font=ft, height=1, width=10)
230 | self.page_range.pack(side=tk.LEFT, padx=0)
231 | tk.Button(f7, text="开始下载", width=10, height=1, bg="cadetblue", font=ft, command=self.begin_download_2) \
232 | .pack(side=tk.LEFT, anchor=tk.W, padx=40)
233 | f7.pack(fill='both', expand=True)
234 |
235 | f8 = tk.Frame(self.root)
236 | self.CheckVar2 = tk.IntVar()
237 | self.save_with_yesr_2 = tk.Checkbutton(f8, text="论文保存时自动添加年份前缀", font=ft_middle, variable=self.CheckVar2,
238 | onvalue=1, offvalue=0)
239 | self.save_with_yesr_2.pack(side=tk.LEFT, padx=60)
240 | f8.pack(fill='both', expand=True)
241 |
242 | def show_title(self, *args):
243 | self.root.overrideredirect(self.no_title)
244 | self.no_title = not self.no_title
245 |
246 | def download_1_thread(self):
247 | if show_begin_download("开始下载吗?"):
248 | save_dir = self.save_dir.get(0.0, tk.END).split("\n")[0].strip()
249 | url_txt_path = self.url_txt_path.get(0.0, tk.END).split("\n")[0].strip()
250 | save_with_year = self.CheckVar1.get()
251 | is_valid = check_value_valid(mode=1, save_dir=save_dir, url_path=url_txt_path)
252 | if not is_valid:
253 | return
254 | # 配置存储文件夹
255 | import os
256 | if not os.path.exists(save_dir):
257 | os.mkdir(save_dir)
258 | status, paper_info = organize_info_by_txt(save_dir, url_txt_path, paper_name_with_year=save_with_year)
259 | if not status:
260 | show_fail_window("URL文件未找到...")
261 | return
262 | if self.all_downloaded(paper_info):
263 | info = "{}篇论文已存在,无需下载!".format(len(paper_info))
264 | show_succeed_window(info)
265 | return
266 | # 下载论文
267 | self.create_progress_bar(paper_info) ## 创建progress bar窗口
268 | try:
269 | _thread.start_new_thread(self.refresh_window, ())
270 | except:
271 | show_fail_window("Error: 无法启动线程")
272 | succeed, paper_downloaded, already_exist = downLoad_paper(paper_info)
273 | if succeed:
274 | info = "成功下载{}篇论文!".format(paper_downloaded + already_exist)
275 | if hasattr(self, 'pb_window'):
276 | self.pb_window.destroy()
277 | show_succeed_window(info)
278 | else:
279 | show_fail_window("下载失败,请检查配置。")
280 |
281 | def create_progress_bar(self, paper_info):
282 | if hasattr(self, 'pb_window'):
283 | self.pb_window.destroy()
284 | self.pb_window = tk.Toplevel()
285 | self.pb_window.geometry("300x140+600+300")
286 | self.pb_window.iconbitmap("img/root.ico") # 窗体图标
287 | self.pb_window.title("下载进度")
288 | center_window(self.pb_window)
289 | self.download_pb = ttk.Progressbar(self.pb_window, length=200, mode="determinate", orient=tk.HORIZONTAL)
290 | self.download_pb.pack(padx=10, pady=20)
291 | self.download_pb["value"] = 0
292 | self.download_pb["maximum"] = len(paper_info)
293 |
294 | def refresh_window(self):
295 | if not hasattr(self, 'pb_window'):
296 | return
297 | if not hasattr(self, 'download_pb'):
298 | return
299 | while utils.get_value("progress_bar_num") < self.download_pb["maximum"]-1:
300 | if self.pb_window and self.download_pb:
301 | self.download_pb["value"] = utils.get_value("progress_bar_num")
302 | self.pb_window.update()
303 | if hasattr(self, 'pb_window'):
304 | self.pb_window.destroy()
305 |
306 | def begin_download_1(self):
307 | try:
308 | _thread.start_new_thread(self.download_1_thread, ())
309 | except:
310 | show_fail_window("Error: 无法启动线程")
311 |
312 | def all_downloaded(self, paperlist):
313 | for key, value in paperlist.items():
314 | if not os.path.exists(value['name']):
315 | return False
316 | return True
317 |
318 | def download_2_thread(self):
319 | if show_begin_download("开始下载吗?"):
320 | save_dir = self.save_dir.get(0.0, tk.END).split("\n")[0].strip()
321 | keywords = self.keyword.get(0.0, tk.END).split("\n")[0].strip()
322 | page_range = self.page_range.get(0.0, tk.END).split("\n")[0].strip()
323 | save_with_year = self.CheckVar2.get()
324 | is_valid = check_value_valid(mode=2, save_dir=save_dir, keyword=keywords, page=page_range)
325 | page_is_valid, page_range = check_page_valid(page_range)
326 | if not page_is_valid:
327 | return
328 | if not is_valid:
329 | return
330 | # 配置存储文件夹
331 | import os
332 | if not os.path.exists(save_dir):
333 | os.mkdir(save_dir)
334 | status, paper_info = organize_info_by_query(keywords, page_range, save_dir, save_with_year)
335 | if not status:
336 | show_fail_window("URL解析失败...")
337 |
338 | if self.all_downloaded(paper_info):
339 | info = "{}篇论文已存在,无需下载!".format(len(paper_info))
340 | show_succeed_window(info)
341 | return
342 | # 下载论文
343 | self.create_progress_bar(paper_info) ## 创建progress bar窗口
344 | try:
345 | _thread.start_new_thread(self.refresh_window, ())
346 | except:
347 | show_fail_window("Error: 无法启动线程")
348 | succeed, paper_downloaded, already_exist = downLoad_paper(paper_info, show_bar=True)
349 | if succeed:
350 | info = "成功下载{}篇论文!".format(paper_downloaded + already_exist)
351 | if hasattr(self, 'pb_window'):
352 | self.pb_window.destroy()
353 | show_succeed_window(info)
354 | else:
355 | show_fail_window("下载失败,请检查配置。")
356 |
357 | def begin_download_2(self):
358 | try:
359 | _thread.start_new_thread(self.download_2_thread, ())
360 | except:
361 | show_fail_window("Error: 无法启动线程")
362 |
363 | def close(self, *arg):
364 | if show_confirm("确认退出吗 ?"):
365 | self.root.destroy()
366 |
367 |
368 | if __name__ == "__main__":
369 | utils._init()
370 | utils.set_value("progress_bar_num", 0)
371 | app = App()
372 | app.root.mainloop()
373 |
--------------------------------------------------------------------------------
/url.txt:
--------------------------------------------------------------------------------
1 | J. Bang, S. Han, K. Lee and G. G. Lee, "Open-domain personalized dialog system using user-interested topics in system responses," 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), Scottsdale, AZ, USA, 2015, pp. 771-776.
2 | doi: 10.1109/ASRU.2015.7404866
3 | keywords: {Databases;Data mining;Knowledge based systems;Knowledge management;Impedance matching;Computer science;Measurement;Chat system;chatbot;open-domain dialog;knowledge base;topic-based dialog},
4 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7404866&isnumber=7404758
5 |
6 | Y. Osanai, E. Shimokawara and T. Yamaguchi, "Correlation Analysis of Template Generality and Output Evaluation in Dialogue Response Generation," 2018 Joint 10th International Conference on Soft Computing and Intelligent Systems (SCIS) and 19th International Symposium on Advanced Intelligent Systems (ISIS), Toyama, Japan, 2018, pp. 1133-1137.
7 | doi: 10.1109/SCIS-ISIS.2018.00178
8 | keywords: {Databases;Clothing;Dictionaries;Semantics;Intelligent systems;Maximum likelihood estimation;Grammar;Dialogue System;Evaluation;template generation},
9 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8716192&isnumber=8716030
10 |
11 | J. Ohmura and M. Eskenazi, "Context-Aware Dialog Re-Ranking for Task-Oriented Dialog Systems," 2018 IEEE Spoken Language Technology Workshop (SLT), Athens, Greece, 2018, pp. 846-853.
12 | doi: 10.1109/SLT.2018.8639596
13 | keywords: {Task analysis;Training;History;Speech recognition;Predictive models;Stacking;Mathematical model;Task-oriented dialog systems;response selection;re-ranking;ensemble learning;speech recognition errors},
14 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8639596&isnumber=8639030
15 |
16 | M. Gašić, D. Kim, P. Tsiakoulis and S. Young, "Distributed dialogue policies for multi-domain statistical dialogue management," 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), South Brisbane, QLD, Australia, 2015, pp. 5371-5375.
17 | doi: 10.1109/ICASSP.2015.7178997
18 | keywords: {Training;Training data;Limiting;Databases;open-domain;multi-domain;dialogue systems;POMDP;Gaussian process},
19 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7178997&isnumber=7177909
20 |
21 | G. D. Duplessis, A. Pauchet, N. Chaignaud and J. Kotowicz, "A Conventional Dialogue Model Based on Empirically Specified Dialogue Games," 2015 IEEE 27th International Conference on Tools with Artificial Intelligence (ICTAI), Vietri sul Mare, Italy, 2015, pp. 997-1004.
22 | doi: 10.1109/ICTAI.2015.143
23 | keywords: {Games;Context;Production;Semantics;Context modeling;Open source software;Coherence;Human-Machine Interaction;Dialogue Management;Dialogue Games},
24 | URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7372240&isnumber=7372095
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2021/10/13 12:16
3 | # @Author : Yong Cao
4 | # @Email : yongcao_epic@hust.edu.cn
5 | import os
6 | import requests
7 | import time
8 |
9 |
10 | def _init():
11 | # 初始化一个全局的字典
12 | global _global_dict
13 | _global_dict = {}
14 |
15 |
16 | def set_value(key, value):
17 | _global_dict[key] = value
18 |
19 |
20 | def get_value(key):
21 | try:
22 | return _global_dict[key]
23 | except KeyError as e:
24 | print(e)
25 |
26 |
27 |
28 | def get_window_size(win, update=True):
29 | """ 获得窗体的尺寸 """
30 | if update:
31 | win.update()
32 | return win.winfo_width(), win.winfo_height(), win.winfo_x(), win.winfo_y()
33 |
34 |
35 | def center_window(win, width=None, height=None):
36 | """ 将窗口屏幕居中 """
37 | screenwidth = win.winfo_screenwidth()
38 | screenheight = win.winfo_screenheight()
39 | if width is None:
40 | width, height = get_window_size(win)[:2]
41 | size = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 3)
42 | win.geometry(size)
43 |
44 |
45 | def downLoad_paper(paper_info, show_bar=False):
46 | print("\n" * 2)
47 | print("执行开始".center(len(paper_info) + 28, '-'))
48 | succeed = True
49 | paper_downloaded = 0
50 | already_exist = 0
51 | start = time.perf_counter()
52 | for i, item in enumerate(paper_info.keys()):
53 | set_value("progress_bar_num", i)
54 | papername = paper_info[item]['name']
55 | paperurl = paper_info[item]['url']
56 | # 文件存储
57 | if os.path.exists(papername):
58 | already_exist += 1
59 | continue
60 | try:
61 | r = requests.get(paperurl)
62 | with open(papername, 'wb+') as f:
63 | f.write(r.content)
64 | paper_downloaded += 1
65 | # 停一下防禁ip
66 | time.sleep(1)
67 | except Exception as e:
68 | print(e)
69 | print("unknown name! parser error", papername)
70 | succeed = False
71 | a = '*' * i
72 | b = '.' * (len(paper_info) - i)
73 | c = (i / len(paper_info)) * 100
74 | t = time.perf_counter() - start
75 | print("\r任务进度:{:>3.0f}% [{}->{}]消耗时间:{:.2f}s".format(c, a, b, t), end="")
76 | set_value("progress_bar_num", len(paper_info))
77 | # if show_bar:
78 | # del root
79 | print("\n"+"执行结束".center(len(paper_info)+28,'-'))
80 | print("-"*50)
81 | print("Downloaded {} papers and {} paper already exists.".format(paper_downloaded, already_exist))
82 | print("-" * 50)
83 | return succeed, paper_downloaded, already_exist
--------------------------------------------------------------------------------