├── manhwa_v5.1.py ├── manhwa_v1.0.py ├── manhwa_v2.0.py ├── manhwa_v3.0.py ├── manhwa_v5.0.py ├── manhwa_v4.0.py └── README.md /manhwa_v5.1.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.system(r"python D:\manhua\整站爬取www.manhwa.cc\manhua4.py") 4 | 5 | os.system(r"F:\CloudMusic\是萝莉控真是太好了.mp3") 6 | -------------------------------------------------------------------------------- /manhwa_v1.0.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import re 4 | import os 5 | 6 | #1-1030 7 | for num1 in range(1,1031): 8 | circle = requests.get('https://www.manhwa.cc/book/'+str(num1)) 9 | # 将获取的图片地址依次放入count中 10 | count = [] 11 | # 将获取的网页内容放入BeautifulSoup 12 | soup = BeautifulSoup(circle.text, 'lxml') 13 | # 根据谷歌SelectGadGet这个插件,获取html标签,比如获取:#gallery-list 14 | 15 | for item_book in soup.select('.d_bg_t'): 16 | for book_name in item_book.find_all('a'): 17 | if(book_name.string!='韩国'and book_name.string!='男性'): 18 | book_name_clean=book_name.string 19 | print(num1, book_name_clean) 20 | 21 | os.makedirs('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + str(num1) +'.'+ book_name_clean ) 22 | 23 | #menu_path_num = [] 24 | 25 | for item in soup.select('.d_menu>ul>li'): 26 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 27 | for a in item.find_all('a'): 28 | #print('a', a) 29 | # m 是 img标签中存在的属性 30 | menu_path = 'https://www.manhwa.cc/' + a.get('href') 31 | #count.append(menu_path) 32 | #menu_path_num.append(re.findall(r"\d+\.?\d*", menu_path)) 33 | menu_path_num=re.findall(r"\d+\.?\d*", menu_path) 34 | 35 | #当前一部书爬取循环,从上面得到每一章地址后,遍历这么多“章”次 36 | 37 | #for num in menu_path_num: 38 | print('book_url:',menu_path) 39 | circle = requests.get(menu_path) 40 | # 将获取的图片地址依次放入count中 41 | count = [] 42 | # 将获取的网页内容放入BeautifulSoup 43 | soup = BeautifulSoup(circle.text, 'lxml') 44 | # 根据谷歌SelectGadGet这个插件,获取html标签,比如获取:#gallery-list 45 | 46 | 47 | for title in soup.select('div.fl.r_tab_l'): 48 | for title in title.find_all('span'): 49 | print('title:', title.text) 50 | title=title.text 51 | 52 | for item in soup.select('.r_img'): 53 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 54 | for img in item.find_all('img'): 55 | print('img_url:', img) 56 | # m 是 img标签中存在的属性 57 | img_path = img.get('data-original') 58 | count.append(img_path) 59 | # 用enumerate依次取出count中的图片地址 放入v中 60 | os.makedirs('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//' + str(title) + '//') 61 | for i, v in enumerate(count): 62 | # 将获取的v值再次放入request中进行与网站相应 63 | image = requests.get(v) 64 | # 存取图片过程中,出现不能存储 int 类型,故而,我们对他进行类型转换 str()。w:读写方式打开,b:二进制进行读写。图片一般用到的都是二进制。 65 | with open('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//'+ str(title) + '//' +str(i) + '.jpg', 'wb') as file: 66 | #with open('C://Users//50159//Desktop//manhua//test//' + str(num1) + '_' + str(i) + '.jpg', 'wb') as file: 67 | # content:图片转换成二进制,进行保存。 68 | file.write(image.content) 69 | print(i) 70 | 71 | -------------------------------------------------------------------------------- /manhwa_v2.0.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import re 4 | import os 5 | 6 | #1-1030 7 | for num1 in range(1,1030): 8 | #716字符问题无法生成文件夹 9 | 10 | import urllib.request # url包 11 | 12 | def openUrl(circle): 13 | headers = { 14 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36', 15 | 'Host': 'jandan.net' 16 | } 17 | req = urllib.request.Request(circle, headers=headers) 18 | response = urllib.request.urlopen(req) # 请求 19 | html = response.read() # 获取 20 | html = html.decode("utf-8") # 解码 21 | print(html) # 打印 22 | 23 | if __name__ == "__main__": 24 | circle = requests.get('https://www.manhwa.cc/book/' + str(num1)) 25 | 26 | # circle = requests.get('https://www.manhwa.cc/book/'+str(num1)) 27 | # 将获取的图片地址依次放入count中 28 | count = [] 29 | # 将获取的网页内容放入BeautifulSoup 30 | soup = BeautifulSoup(circle.text, 'lxml') 31 | # 根据谷歌SelectGadGet这个插件,获取html标签,比如获取:#gallery-list 32 | 33 | for item_book in soup.select('.d_bg_t'): 34 | for book_name in item_book.select('a')[0]: 35 | book_name_clean = book_name.string 36 | print(num1, book_name_clean) 37 | 38 | 39 | #menu_path_num = [] 40 | 41 | for item in soup.select('.d_menu>ul>li'): 42 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 43 | for a in item.find_all('a'): 44 | #print('a', a) 45 | # m 是 img标签中存在的属性 46 | menu_path = 'https://www.manhwa.cc/' + a.get('href') 47 | #count.append(menu_path) 48 | #menu_path_num.append(re.findall(r"\d+\.?\d*", menu_path)) 49 | menu_path_num=re.findall(r"\d+\.?\d*", menu_path) 50 | 51 | #当前一部书爬取循环,从上面得到每一章地址后,遍历这么多“章”次 52 | 53 | #for num in menu_path_num: 54 | print('book_url:',menu_path) 55 | 56 | circle = requests.get(menu_path) 57 | # 将获取的图片地址依次放入count中 58 | count = [] 59 | # 将获取的网页内容放入BeautifulSoup 60 | soup = BeautifulSoup(circle.text, 'lxml') 61 | 62 | for title in soup.select('div.fl.r_tab_l'): 63 | for title in title.find_all('span'): 64 | print('title:', title.text) 65 | title=title.text 66 | 67 | for item in soup.select('.r_img'): 68 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 69 | for img in item.find_all('img'): 70 | print('img_url:', img) 71 | # m 是 img标签中存在的属性 72 | img_path = img.get('data-original') 73 | count.append(img_path) 74 | 75 | # 用enumerate依次取出count中的图片地址 放入v中 76 | os.makedirs('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//' + str(title) + '//') 77 | for i, v in enumerate(count): 78 | # 将获取的v值再次放入request中进行与网站相应 79 | image = requests.get(v) 80 | # 存取图片过程中,出现不能存储 int 类型,故而,我们对他进行类型转换 str()。w:读写方式打开,b:二进制进行读写。图片一般用到的都是二进制。 81 | with open('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//'+ str(title) + '//' +str(i) + '.jpg', 'wb') as file: 82 | #with open('C://Users//50159//Desktop//manhua//test//' + str(num1) + '_' + str(i) + '.jpg', 'wb') as file: 83 | # content:图片转换成二进制,进行保存。 84 | file.write(image.content) 85 | print(i) 86 | -------------------------------------------------------------------------------- /manhwa_v3.0.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import re 4 | import os 5 | 6 | # 1-1030 7 | 8 | from manhua3_ui import A 9 | 10 | A().aa() 11 | 12 | ''' 13 | for num1 in range(first, last): 14 | # 716字符问题无法生成文件夹 15 | 16 | import urllib.request # url包 17 | 18 | def openUrl(circle): 19 | headers = { 20 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36', 21 | 'Host': 'jandan.net' 22 | } 23 | req = urllib.request.Request(circle, headers=headers) 24 | response = urllib.request.urlopen(req) # 请求 25 | html = response.read() # 获取 26 | html = html.decode("utf-8") # 解码 27 | print(html) # 打印 28 | 29 | 30 | if __name__ == "__main__": 31 | circle = requests.get('https://www.manhwa.cc/book/' + str(num1)) 32 | 33 | # circle = requests.get('https://www.manhwa.cc/book/'+str(num1)) 34 | # 将获取的图片地址依次放入count中 35 | count = [] 36 | # 将获取的网页内容放入BeautifulSoup 37 | soup = BeautifulSoup(circle.text, 'lxml') 38 | # 根据谷歌SelectGadGet这个插件,获取html标签,比如获取:#gallery-list 39 | 40 | for item_book in soup.select('.d_bg_t'): 41 | for book_name in item_book.select('a')[0]: 42 | book_name_clean = book_name.string 43 | print(num1, book_name_clean) 44 | 45 | for item in soup.select('.d_menu>ul>li'): 46 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 47 | for a in item.find_all('a'): 48 | # print('a', a) 49 | # m 是 img标签中存在的属性 50 | menu_path = 'https://www.manhwa.cc/' + a.get('href') 51 | # count.append(menu_path) 52 | # menu_path_num.append(re.findall(r"\d+\.?\d*", menu_path)) 53 | menu_path_num = re.findall(r"\d+\.?\d*", menu_path) 54 | 55 | # 当前一部书爬取循环,从上面得到每一章地址后,遍历这么多“章”次 56 | 57 | # for num in menu_path_num: 58 | print('book_url:', menu_path) 59 | 60 | circle = requests.get(menu_path) 61 | # 将获取的图片地址依次放入count中 62 | count = [] 63 | # 将获取的网页内容放入BeautifulSoup 64 | soup = BeautifulSoup(circle.text, 'lxml') 65 | 66 | for title in soup.select('div.fl.r_tab_l'): 67 | for title in title.find_all('span'): 68 | print('title:', title.text) 69 | title = title.text 70 | 71 | for item in soup.select('.r_img'): 72 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 73 | for img in item.find_all('img'): 74 | print('img_url:', img) 75 | # m 是 img标签中存在的属性 76 | img_path = img.get('data-original') 77 | count.append(img_path) 78 | 79 | # 用enumerate依次取出count中的图片地址 放入v中 80 | os.makedirs('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//' + str(title) + '//') 81 | for i, v in enumerate(count): 82 | # 将获取的v值再次放入request中进行与网站相应 83 | image = requests.get(v) 84 | # 存取图片过程中,出现不能存储 int 类型,故而,我们对他进行类型转换 str()。w:读写方式打开,b:二进制进行读写。图片一般用到的都是二进制。 85 | with open('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//' + str(title) + '//' + str( 86 | i) + '.jpg', 'wb') as file: 87 | # with open('C://Users//50159//Desktop//manhua//test//' + str(num1) + '_' + str(i) + '.jpg', 'wb') as file: 88 | # content:图片转换成二进制,进行保存。 89 | file.write(image.content) 90 | ''' #print(i) 91 | 92 | -------------------------------------------------------------------------------- /manhwa_v5.0.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import re 4 | import os 5 | 6 | from PyQt5.QtWidgets import QApplication, QWidget, QLineEdit, QInputDialog, QGridLayout, QLabel, QPushButton, QFrame, QProgressBar 7 | 8 | first=16 9 | 10 | class InputDialog(QWidget): 11 | 12 | def __init__(self): 13 | super(InputDialog,self).__init__() 14 | self.initUi() 15 | 16 | def initUi(self): 17 | self.setWindowTitle("漫画爬取") 18 | self.setGeometry(50,50,1200,600) 19 | 20 | label1=QLabel("第一本:") 21 | label2=QLabel("最后一本:") 22 | 23 | self.nameLable = QLabel("1")#1 24 | self.first=int(self.nameLable.text()) 25 | self.nameLable.setText(str(self.first)) 26 | self.nameLable.setFrameStyle(QFrame.Panel|QFrame.Sunken) 27 | self.styleLable = QLabel("1")#1030 28 | self.last=self.styleLable.text() 29 | self.styleLable.setText(str(self.last)) 30 | self.styleLable.setFrameStyle(QFrame.Panel|QFrame.Sunken) 31 | 32 | # 设置进度条(弃用) 33 | 34 | nameButton=QPushButton("更改") 35 | nameButton.clicked.connect(self.selectName) 36 | styleButton=QPushButton("更改") 37 | styleButton.clicked.connect(self.selectStyle) 38 | okButton = QPushButton("OK") 39 | okButton.clicked.connect(self.selectOk) 40 | 41 | mainLayout=QGridLayout() 42 | mainLayout.addWidget(label1,0,0) 43 | mainLayout.addWidget(self.nameLable,0,1) 44 | mainLayout.addWidget(nameButton,0,2) 45 | mainLayout.addWidget(label2,1,0) 46 | mainLayout.addWidget(self.styleLable,1,1) 47 | mainLayout.addWidget(styleButton,1,2) 48 | mainLayout.addWidget(okButton,2,1) 49 | 50 | self.setLayout(mainLayout) 51 | 52 | #爬取代码 53 | def ManHua(self): 54 | 55 | for num1 in range(first,1030): 56 | import urllib.request # url包 57 | 58 | def openUrl(circle): 59 | headers = { 60 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36', 61 | 'Host': 'jandan.net' 62 | } 63 | req = urllib.request.Request(circle, headers=headers) 64 | response = urllib.request.urlopen(req) # 请求 65 | html = response.read() # 获取 66 | html = html.decode("utf-8") # 解码 67 | print(html) # 打印 68 | 69 | if __name__ == "__main__": 70 | circle = requests.get('https://www.manhwa.cc/book/' + str(num1)) 71 | 72 | count = [] 73 | soup = BeautifulSoup(circle.text, 'lxml') 74 | 75 | for item_book in soup.select('.d_bg_t'): 76 | for book_name in item_book.select('a')[0]: 77 | book_name_clean = book_name.string 78 | print('') 79 | print("正在下载:",num1, book_name_clean) 80 | aa=0 81 | #print(aa,num1) 82 | if num1>aa: 83 | aa=num1 84 | #print(aa) 85 | for i in range(int(num1*(100/1030))+1): 86 | print('\r'+'总进度:' + '▇' * (i // 2) + str(i) + '%', end='') 87 | print('') 88 | 89 | for item in soup.select('.d_menu>ul>li'): 90 | for a in item.find_all('a'): 91 | menu_path = 'https://www.manhwa.cc/' + a.get('href') 92 | # count.append(menu_path) 93 | # menu_path_num.append(re.findall(r"\d+\.?\d*", menu_path)) 94 | menu_path_num = re.findall(r"\d+\.?\d*", menu_path) 95 | 96 | # 当前一部书爬取循环,从上面得到每一章地址后,遍历这么多“章”次 97 | 98 | # for num in menu_path_num: 99 | #print('book_url:', menu_path) 100 | 101 | 102 | circle = requests.get(menu_path) 103 | # 将获取的图片地址依次放入count中 104 | count = [] 105 | # 将获取的网页内容放入BeautifulSoup 106 | soup = BeautifulSoup(circle.text, 'lxml') 107 | #print(menu_path) 108 | print('.', end='') 109 | 110 | for title in soup.select('div.fl.r_tab_l'): 111 | for title in title.find_all('span'): 112 | #print('title:', title.text) 113 | title = title.text 114 | 115 | for item in soup.select('.r_img'): 116 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 117 | for img in item.find_all('img'): 118 | #print('img_url:', img) 119 | # m 是 img标签中存在的属性 120 | img_path = img.get('data-original') 121 | count.append(img_path) 122 | 123 | #自动识别'文件夹+文件'重复后跳过下载如何continue 124 | if(os.path.exists('D:/manhua/manhuatest/' + book_name_clean + '/' + str(title) + '/')): 125 | continue 126 | else: 127 | os.makedirs('D:/manhua/manhuatest/' + book_name_clean + '/' + str(title) + '/') 128 | 129 | for i, v in enumerate(count): 130 | image = requests.get(v) 131 | if (os.path.exists('D:/manhua/manhuatest/' + book_name_clean + '/' + str(title) + '/' + str(i) + '.jpg')): 132 | continue 133 | else: 134 | with open('D:/manhua/manhuatest/' + book_name_clean + '/' + str(title) + '/' + str(i) + '.jpg', 'wb') as file: 135 | file.write(image.content) 136 | #print(i) 137 | continue 138 | continue 139 | 140 | 141 | def selectName(self): 142 | name,ok = QInputDialog.getText(self,"第一本","第一本序号:", 143 | QLineEdit.Normal,self.nameLable.text()) 144 | if ok and (len(name)!=0): 145 | self.nameLable.setText(name) 146 | def selectStyle(self): 147 | style, ok = QInputDialog.getText(self, "最后一本", "最后一本序号:", 148 | QLineEdit.Normal, self.nameLable.text()) 149 | if ok and (len(style)!=0): 150 | self.styleLable.setText(style) 151 | def selectOk(self): 152 | self.ManHua() 153 | os.system(r"F:\CloudMusic\是萝莉控真是太好了.mp3") 154 | #self.first=int(self.nameLable.text()) 155 | #self.last=self.styleLable.text() 156 | #print(self.first, self.last) 157 | #os.system(r"python D:\manhua\整站爬取www.manhwa.cc\manhua3.py") 158 | 159 | 160 | 161 | if __name__=="__main__": 162 | import sys 163 | app=QApplication(sys.argv) 164 | myshow=InputDialog() 165 | myshow.show() 166 | #InputDialog().ManHua() 167 | sys.exit(app.exec_()) 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /manhwa_v4.0.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import re 4 | import os 5 | 6 | from PyQt5.QtWidgets import QApplication, QWidget, QLineEdit, QInputDialog, QGridLayout, QLabel, QPushButton, QFrame 7 | 8 | class InputDialog(QWidget): 9 | 10 | def __init__(self): 11 | super(InputDialog,self).__init__() 12 | self.initUi() 13 | 14 | def initUi(self): 15 | self.setWindowTitle("漫画爬取") 16 | self.setGeometry(400,400,300,260) 17 | 18 | label1=QLabel("第一本:") 19 | label2=QLabel("最后一本:") 20 | 21 | self.nameLable = QLabel("2")#1 22 | self.first=int(self.nameLable.text()) 23 | self.nameLable.setText(str(self.first)) 24 | self.nameLable.setFrameStyle(QFrame.Panel|QFrame.Sunken) 25 | self.styleLable = QLabel("2")#1030 26 | self.last=self.styleLable.text() 27 | self.styleLable.setText(str(self.last)) 28 | self.styleLable.setFrameStyle(QFrame.Panel|QFrame.Sunken) 29 | 30 | nameButton=QPushButton("更改") 31 | nameButton.clicked.connect(self.selectName) 32 | styleButton=QPushButton("更改") 33 | styleButton.clicked.connect(self.selectStyle) 34 | okButton = QPushButton("OK") 35 | okButton.clicked.connect(self.selectOk) 36 | 37 | mainLayout=QGridLayout() 38 | mainLayout.addWidget(label1,0,0) 39 | mainLayout.addWidget(self.nameLable,0,1) 40 | mainLayout.addWidget(nameButton,0,2) 41 | mainLayout.addWidget(label2,1,0) 42 | mainLayout.addWidget(self.styleLable,1,1) 43 | mainLayout.addWidget(styleButton,1,2) 44 | mainLayout.addWidget(okButton,2,1) 45 | 46 | self.setLayout(mainLayout) 47 | 48 | #爬取代码 49 | 50 | def ManHua(self): 51 | #2/3/4/ 52 | #多线程同时几个py def传值出去++? 53 | num=8 54 | for num1 in range(num,num+1): 55 | 56 | #循环过大(大于2都不行) 会直接返回Process finished with exit code -1073740791 (0xC0000409) 57 | #716字符问题无法生成文件夹 58 | #循环不行 只能采用一次次一本本下载完提醒 按多一次 59 | #后来发现不是循环不行 是这个 60 | # self.nameLable.setText(num1-1) 61 | # self.styleLable.setText(num1) 62 | #循环也有点关系 63 | 64 | import urllib.request # url包 65 | 66 | def openUrl(circle): 67 | headers = { 68 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36', 69 | 'Host': 'jandan.net' 70 | } 71 | req = urllib.request.Request(circle, headers=headers) 72 | response = urllib.request.urlopen(req) # 请求 73 | html = response.read() # 获取 74 | html = html.decode("utf-8") # 解码 75 | print(html) # 打印 76 | 77 | if __name__ == "__main__": 78 | circle = requests.get('https://www.manhwa.cc/book/' + str(num1)) 79 | 80 | # circle = requests.get('https://www.manhwa.cc/book/'+str(num1)) 81 | # 将获取的图片地址依次放入count中 82 | count = [] 83 | # 将获取的网页内容放入BeautifulSoup 84 | soup = BeautifulSoup(circle.text, 'lxml') 85 | # 根据谷歌SelectGadGet这个插件,获取html标签,比如获取:#gallery-list 86 | 87 | for item_book in soup.select('.d_bg_t'): 88 | for book_name in item_book.select('a')[0]: 89 | book_name_clean = book_name.string 90 | print(num1, book_name_clean) 91 | 92 | for item in soup.select('.d_menu>ul>li'): 93 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 94 | for a in item.find_all('a'): 95 | # print('a', a) 96 | # m 是 img标签中存在的属性 97 | menu_path = 'https://www.manhwa.cc/' + a.get('href') 98 | # count.append(menu_path) 99 | # menu_path_num.append(re.findall(r"\d+\.?\d*", menu_path)) 100 | menu_path_num = re.findall(r"\d+\.?\d*", menu_path) 101 | 102 | # 当前一部书爬取循环,从上面得到每一章地址后,遍历这么多“章”次 103 | 104 | # for num in menu_path_num: 105 | print('book_url:', menu_path) 106 | 107 | circle = requests.get(menu_path) 108 | # 将获取的图片地址依次放入count中 109 | count = [] 110 | # 将获取的网页内容放入BeautifulSoup 111 | soup = BeautifulSoup(circle.text, 'lxml') 112 | 113 | for title in soup.select('div.fl.r_tab_l'): 114 | for title in title.find_all('span'): 115 | print('title:', title.text) 116 | title = title.text 117 | 118 | for item in soup.select('.r_img'): 119 | # 用bs4中的find_all获取 #gallery-list 中是否存在 img这个标签 120 | for img in item.find_all('img'): 121 | print('img_url:', img) 122 | # m 是 img标签中存在的属性 123 | img_path = img.get('data-original') 124 | count.append(img_path) 125 | 126 | # 用enumerate依次取出count中的图片地址 放入v中 127 | os.makedirs('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//' + str(title) + '//') 128 | for i, v in enumerate(count): 129 | # 将获取的v值再次放入request中进行与网站相应 130 | image = requests.get(v) 131 | # 存取图片过程中,出现不能存储 int 类型,故而,我们对他进行类型转换 str()。w:读写方式打开,b:二进制进行读写。图片一般用到的都是二进制。 132 | with open('D://manhua//整站爬取www.manhwa.cc//整站漫画爬取//' + book_name_clean + '//' + str( 133 | title) + '//' + str( 134 | i) + '.jpg', 'wb') as file: 135 | # with open('C://Users//50159//Desktop//manhua//test//' + str(num1) + '_' + str(i) + '.jpg', 'wb') as file: 136 | # content:图片转换成二进制,进行保存。 137 | file.write(image.content) 138 | print(i) 139 | 140 | 141 | 142 | #爬取代码 143 | 144 | def selectName(self): 145 | name,ok = QInputDialog.getText(self,"第一本","第一本序号:", 146 | QLineEdit.Normal,self.nameLable.text()) 147 | if ok and (len(name)!=0): 148 | self.nameLable.setText(name) 149 | def selectStyle(self): 150 | style, ok = QInputDialog.getText(self, "最后一本", "最后一本序号:", 151 | QLineEdit.Normal, self.nameLable.text()) 152 | if ok and (len(style)!=0): 153 | self.styleLable.setText(style) 154 | def selectOk(self): 155 | self.ManHua() 156 | #self.first=int(self.nameLable.text()) 157 | #self.last=self.styleLable.text() 158 | #print(self.first, self.last) 159 | #os.system(r"python D:\manhua\整站爬取www.manhwa.cc\manhua3.py") 160 | 161 | 162 | if __name__=="__main__": 163 | import sys 164 | app=QApplication(sys.argv) 165 | myshow=InputDialog() 166 | myshow.show() 167 | sys.exit(app.exec_()) 168 | os.system(r"F:\CloudMusic\是萝莉控真是太好了.mp3") 169 | 170 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 第一代版本: 2 | 3 |