├── README.md
└── download-pdf-with-Xunlei.py


/README.md:
--------------------------------------------------------------------------------
 1 | # download-pdf-with-Xunlei
 2 | 
 3 | 
 4 | # -*- coding: utf-8 -*-
 5 | # python 3.5.2
 6 | # 测试系统，Win10， Firefox V46
 7 | # Author:Van
 8 | # 实现自动下载高清最新pdf的实现
 9 | # V1.0 当前只针对效果还可以的国外zippyshare网盘
10 | # 其他的网盘还没添加进判断语句，先共享如何迅雷下载等
11 | # 如果您有经验优化，改进此脚本，请不吝指教
12 | # QQ群： 206241755
13 | # 简介：因下载最新高清pdf，正好发现www.foxebook.net提供
14 | # 但是很多的广告，特烦人，所以尝试脚本，最后因下载需求，
15 | # 加载了迅雷，这功能的实现小牛，不过也是网络别人共享的。
16 | 
17 | 
18 | ----------
19 | 
20 | 
21 | 
22 | # -*- coding: utf-8 -*-
23 | # python 3.5.2
24 | # tested on Win10， Firefox V46
25 | # Author:Van
26 | # download the latest PDF with Xunlei automatically
27 | # V1.0 it is for zippyshare.com netdisk
28 | # other netdisk is not included yet
29 | # if you have suggestions plz share
30 | # QQ group:206241755
31 | # the souce site is : www.foxebook.net
32 | #but too many advertisements so a script is needed
33 | # download with Xunlei , a fast tool		
34 | 
35 | 
36 | ----------
37 | 


--------------------------------------------------------------------------------
/download-pdf-with-Xunlei.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # python 3.5.2
  3 | # 测试系统，Win10
  4 | # Author:Van
  5 | # 实现自动下载高清最新pdf的实现
  6 | # V1.0 当前只针对效果还可以的国外zippyshare网盘
  7 | # 其他的网盘还没添加进判断语句，先共享如何迅雷下载等
  8 | # 如果您有经验优化，改进此脚本，请不吝指教
  9 | # QQ群： 206241755
 10 | # 简介：因下载最新高清pdf，正好发现www.foxebook.net提供
 11 | # 但是很多的广告，特烦人，所以尝试脚本，最后因下载需求，
 12 | # 加载了迅雷，这功能的实现小牛，不过也是网络别人共享的。
 13 | 
 14 | from selenium import webdriver
 15 | import requests
 16 | from lxml import etree
 17 | import re
 18 | import os
 19 | from win32com.client import Dispatch
 20 | 
 21 | # def down_book(i):
 22 | #     href = selector.xpath('/html/body/div/div/main/div[i+1]/div[2]/h3/a/@href')
 23 | #     print(href)
 24 | 
 25 | #test name of book : SciPy and NumPy
 26 | # book_name = input('Please input the book name in English:\n')
 27 | book_name = 'Introduction to Machine Learning with Python'
 28 | print ('begin to search book(s)...')
 29 | print ('---------------------------------')
 30 | # search link is :http://www.foxebook.nethttp://www.foxebook.net/search/SciPy%20and%20NumPySciPy%20and%20NumPy
 31 | PostUrl = "http://www.foxebook.net/search/" + book_name
 32 | # print(PostUrl)
 33 | # get the content of html
 34 | html = requests.get(PostUrl).content
 35 | 
 36 | # use etree selector
 37 | selector = etree.HTML(html)
 38 | 
 39 | # /html/body/div/div/main/div[2]/div[2]/h3/a
 40 | # /html/body/div/div/main/div[3]/div[2]/h3/a
 41 | # above is two books' xpath, so the right xpath for all book is :
 42 | # /html/body/div/div/main//div[2]/h3/a
 43 | # it can be confirmed by 'xpath checker'
 44 | total_books = selector.xpath("/html/body/div/div/main//div[2]/h3/a/text()")
 45 | # print('total books from searching are:', total_books)
 46 | 
 47 | num1 = 0
 48 | link_address = []
 49 | real_address = []
 50 | def find_link():
 51 |     global num1
 52 |     # find the right book, put all links in a list of : link_address
 53 | 
 54 |     for i in total_books:
 55 |         num1 += 1
 56 |         if re.search(book_name,i):
 57 | 
 58 |             print('Congrdulations, we find the book(s):\n')
 59 |             print ('**********************************')
 60 |             print(i)
 61 |             print ('**********************************\n')
 62 |             href = 'http://www.foxebook.net' + selector.xpath('//*[@id="content"]/div/main/div[%d]/div[2]/h3/a/@href'%num1)[0]
 63 |             # print('the book link is :', href)
 64 |             # print('will downloading...')
 65 |             html_new = requests.get(href).content
 66 |             selector_new = etree.HTML(html_new)
 67 |             link_new = selector_new.xpath('//*[@id="download"]/div[2]/table/tbody/tr[1]/td[2]/a/@href')[0]
 68 |             # split the next link
 69 |             link_new = 'http:'+link_new.split(':')[-1]
 70 |             link_address.append(link_new)
 71 |     print('download link is :', link_address)
 72 |     print('\n\n')
 73 | 
 74 | def real_book_link():
 75 |     # print('link_address is :', link_address)
 76 |     # dynamic on zippyshare
 77 |     for j in link_address:
 78 |         # 用浏览器实现访问
 79 | 
 80 |         driver = webdriver.Firefox()
 81 |         driver.maximize_window()
 82 |         driver.get(j)
 83 | 
 84 | 
 85 |         try:
 86 | 
 87 |             # find the download button
 88 |             title_list = driver.find_element_by_xpath('//*[@id="dlbutton"]')
 89 |             film_link = title_list.get_attribute('href')
 90 |             real_address.append(film_link)
 91 | 
 92 |         except:
 93 |             print('can not download the book')
 94 | 
 95 |     print('real_book_link:', real_address)
 96 |     return real_address
 97 | 
 98 | def addTasktoXunlei(down_url,course_infos):
 99 |     flag = False
100 |     o = Dispatch("ThunderAgent.Agent.1")
101 |     if down_url:
102 |         course_path = os.getcwd()
103 |         try:
104 |             #AddTask("下载地址", "另存文件名", "保存目录","任务注释","引用地址","开始模式", "只从原始地址下载","从原始地址下载线程数")
105 |             o.AddTask(down_url, '', course_path, "", "", -1, 0, 5)
106 |             o.CommitTasks()
107 |             flag = True
108 |         except Exception:
109 | 
110 |             print(Exception.message)
111 |             print(" AddTask is fail!")
112 |     return flag
113 | 
114 | if __name__ == '__main__':
115 |     find_link()
116 |     real_link = real_book_link()
117 |     for i in real_link:
118 |         addTasktoXunlei(i, course_infos=None)
119 | 
120 | 


--------------------------------------------------------------------------------