├── README.md └── download-pdf-with-Xunlei.py /README.md: -------------------------------------------------------------------------------- 1 | # download-pdf-with-Xunlei 2 | 3 | 4 | # -*- coding: utf-8 -*- 5 | # python 3.5.2 6 | # 测试系统,Win10, Firefox V46 7 | # Author:Van 8 | # 实现自动下载高清最新pdf的实现 9 | # V1.0 当前只针对效果还可以的国外zippyshare网盘 10 | # 其他的网盘还没添加进判断语句,先共享如何迅雷下载等 11 | # 如果您有经验优化,改进此脚本,请不吝指教 12 | # QQ群: 206241755 13 | # 简介:因下载最新高清pdf,正好发现www.foxebook.net提供 14 | # 但是很多的广告,特烦人,所以尝试脚本,最后因下载需求, 15 | # 加载了迅雷,这功能的实现小牛,不过也是网络别人共享的。 16 | 17 | 18 | ---------- 19 | 20 | 21 | 22 | # -*- coding: utf-8 -*- 23 | # python 3.5.2 24 | # tested on Win10, Firefox V46 25 | # Author:Van 26 | # download the latest PDF with Xunlei automatically 27 | # V1.0 it is for zippyshare.com netdisk 28 | # other netdisk is not included yet 29 | # if you have suggestions plz share 30 | # QQ group:206241755 31 | # the souce site is : www.foxebook.net 32 | #but too many advertisements so a script is needed 33 | # download with Xunlei , a fast tool 34 | 35 | 36 | ---------- 37 | -------------------------------------------------------------------------------- /download-pdf-with-Xunlei.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # python 3.5.2 3 | # 测试系统,Win10 4 | # Author:Van 5 | # 实现自动下载高清最新pdf的实现 6 | # V1.0 当前只针对效果还可以的国外zippyshare网盘 7 | # 其他的网盘还没添加进判断语句,先共享如何迅雷下载等 8 | # 如果您有经验优化,改进此脚本,请不吝指教 9 | # QQ群: 206241755 10 | # 简介:因下载最新高清pdf,正好发现www.foxebook.net提供 11 | # 但是很多的广告,特烦人,所以尝试脚本,最后因下载需求, 12 | # 加载了迅雷,这功能的实现小牛,不过也是网络别人共享的。 13 | 14 | from selenium import webdriver 15 | import requests 16 | from lxml import etree 17 | import re 18 | import os 19 | from win32com.client import Dispatch 20 | 21 | # def down_book(i): 22 | # href = selector.xpath('/html/body/div/div/main/div[i+1]/div[2]/h3/a/@href') 23 | # print(href) 24 | 25 | #test name of book : SciPy and NumPy 26 | # book_name = input('Please input the book name in English:\n') 27 | book_name = 'Introduction to Machine Learning with Python' 28 | print ('begin to search book(s)...') 29 | print ('---------------------------------') 30 | # search link is :http://www.foxebook.nethttp://www.foxebook.net/search/SciPy%20and%20NumPySciPy%20and%20NumPy 31 | PostUrl = "http://www.foxebook.net/search/" + book_name 32 | # print(PostUrl) 33 | # get the content of html 34 | html = requests.get(PostUrl).content 35 | 36 | # use etree selector 37 | selector = etree.HTML(html) 38 | 39 | # /html/body/div/div/main/div[2]/div[2]/h3/a 40 | # /html/body/div/div/main/div[3]/div[2]/h3/a 41 | # above is two books' xpath, so the right xpath for all book is : 42 | # /html/body/div/div/main//div[2]/h3/a 43 | # it can be confirmed by 'xpath checker' 44 | total_books = selector.xpath("/html/body/div/div/main//div[2]/h3/a/text()") 45 | # print('total books from searching are:', total_books) 46 | 47 | num1 = 0 48 | link_address = [] 49 | real_address = [] 50 | def find_link(): 51 | global num1 52 | # find the right book, put all links in a list of : link_address 53 | 54 | for i in total_books: 55 | num1 += 1 56 | if re.search(book_name,i): 57 | 58 | print('Congrdulations, we find the book(s):\n') 59 | print ('**********************************') 60 | print(i) 61 | print ('**********************************\n') 62 | href = 'http://www.foxebook.net' + selector.xpath('//*[@id="content"]/div/main/div[%d]/div[2]/h3/a/@href'%num1)[0] 63 | # print('the book link is :', href) 64 | # print('will downloading...') 65 | html_new = requests.get(href).content 66 | selector_new = etree.HTML(html_new) 67 | link_new = selector_new.xpath('//*[@id="download"]/div[2]/table/tbody/tr[1]/td[2]/a/@href')[0] 68 | # split the next link 69 | link_new = 'http:'+link_new.split(':')[-1] 70 | link_address.append(link_new) 71 | print('download link is :', link_address) 72 | print('\n\n') 73 | 74 | def real_book_link(): 75 | # print('link_address is :', link_address) 76 | # dynamic on zippyshare 77 | for j in link_address: 78 | # 用浏览器实现访问 79 | 80 | driver = webdriver.Firefox() 81 | driver.maximize_window() 82 | driver.get(j) 83 | 84 | 85 | try: 86 | 87 | # find the download button 88 | title_list = driver.find_element_by_xpath('//*[@id="dlbutton"]') 89 | film_link = title_list.get_attribute('href') 90 | real_address.append(film_link) 91 | 92 | except: 93 | print('can not download the book') 94 | 95 | print('real_book_link:', real_address) 96 | return real_address 97 | 98 | def addTasktoXunlei(down_url,course_infos): 99 | flag = False 100 | o = Dispatch("ThunderAgent.Agent.1") 101 | if down_url: 102 | course_path = os.getcwd() 103 | try: 104 | #AddTask("下载地址", "另存文件名", "保存目录","任务注释","引用地址","开始模式", "只从原始地址下载","从原始地址下载线程数") 105 | o.AddTask(down_url, '', course_path, "", "", -1, 0, 5) 106 | o.CommitTasks() 107 | flag = True 108 | except Exception: 109 | 110 | print(Exception.message) 111 | print(" AddTask is fail!") 112 | return flag 113 | 114 | if __name__ == '__main__': 115 | find_link() 116 | real_link = real_book_link() 117 | for i in real_link: 118 | addTasktoXunlei(i, course_infos=None) 119 | 120 | --------------------------------------------------------------------------------