├── .idea
├── .name
├── Douban.iml
├── encodings.xml
├── misc.xml
├── modules.xml
├── scopes
│ └── scope_settings.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── Win_Client.7z
├── Win_Client
├── dou2_win.exe
└── 必读.txt
├── dou2.py
├── dou2_win.py
├── html
├── air.png
├── center.css
├── index.html
└── shadow_light.png
├── stup.py
└── test.py
/.idea/.name:
--------------------------------------------------------------------------------
1 | Douban
--------------------------------------------------------------------------------
/.idea/Douban.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 | 1405394782054
416 | 1405394782054
417 |
418 |
419 | 1405401436680
420 | 1405401436680
421 |
422 |
423 | 1405420255362
424 | 1405420255362
425 |
426 |
427 | 1405432782746
428 | 1405432782746
429 |
430 |
431 | 1405452937498
432 | 1405452937498
433 |
434 |
435 | 1405453700235
436 | 1405453700235
437 |
438 |
439 | 1405453751309
440 | 1405453751309
441 |
442 |
443 | 1405458742066
444 | 1405458742066
445 |
446 |
447 | 1405458937852
448 | 1405458937852
449 |
450 |
451 | 1405460428859
452 | 1405460428859
453 |
454 |
455 | 1405500594569
456 | 1405500594569
457 |
458 |
459 | 1405503584832
460 | 1405503584832
461 |
462 |
463 | 1405504215762
464 | 1405504215762
465 |
466 |
467 | 1405514518777
468 | 1405514518777
469 |
470 |
471 | 1405514560387
472 | 1405514560387
473 |
474 |
475 | 1405514589111
476 | 1405514589111
477 |
478 |
479 | 1405587097350
480 | 1405587097350
481 |
482 |
483 | 1405587687903
484 | 1405587687903
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 |
857 |
858 |
859 |
860 |
861 |
862 |
863 |
864 |
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 |
876 |
877 |
878 |
879 |
880 |
881 |
882 |
883 |
884 |
885 |
886 |
887 |
888 |
889 |
890 |
891 |
892 |
893 |
894 |
895 |
896 |
897 |
898 |
899 |
900 |
901 |
902 |
903 |
904 |
905 |
906 |
907 |
908 |
909 |
910 |
911 |
912 |
913 |
914 |
915 |
916 |
917 |
918 |
919 |
920 |
921 |
922 |
923 |
924 |
925 |
926 |
927 |
928 |
929 |
930 |
931 |
932 |
933 |
934 |
935 |
936 |
937 |
938 |
939 |
940 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ###豆瓣小组图片采集程序
2 | #####By 肾虚公子
3 |
4 | 官网: http://Douban.miaowu.asia
5 |
6 | 主程序:dou2.py
7 |
8 |
9 |
10 | Mac/Liunx 运行程序(python dou2.py)即可。
11 |
12 | Windows用户请下载压缩包: [下载](https://raw.githubusercontent.com/ShenXuGongZi/DouBanGroupPic/master/Win_Client.7z)
13 |
14 |
15 | #### 程序基本功能
16 | * 用户可以自由选择用户组下载图片支持豆瓣所有小组
17 | * 本程序自动采集代理
18 | * 采集代理后随机选择
19 | * 自动下载图片并保存
20 |
21 |
22 | 获取用户组ID方法
23 | http://www.douban.com/group/264964/ #只需要输入人group/后面的字符 不包括斜杠
24 |
25 | ##### 2015-6-3 更新
26 | * 无需用户手动创建文件夹
27 | * 更新可用性去掉代理采集
28 | * 优化Windows客户端大小以及文件数量
29 |
30 | ##### 2014-7-17 更新
31 | * 修改程序Bug
32 | * 增加错误输出
33 | * 加快程序执行效率
34 | * 尝试启用多线程失败!
35 |
36 | ##### 2014-7-16 更新
37 | * 用户可以自由选择用户组下载图片支持豆瓣所有小组
38 | * 增加默认功能
39 | * 模拟UA
40 | * 更换代理源
41 | * Win版支持
42 | * 优化程序
43 | * 美化程序
--------------------------------------------------------------------------------
/Win_Client.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShenXuGongZi/DouBanGroupPic/4839aef33e06ababbbc201589ac41a857b588e93/Win_Client.7z
--------------------------------------------------------------------------------
/Win_Client/dou2_win.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShenXuGongZi/DouBanGroupPic/4839aef33e06ababbbc201589ac41a857b588e93/Win_Client/dou2_win.exe
--------------------------------------------------------------------------------
/Win_Client/必读.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShenXuGongZi/DouBanGroupPic/4839aef33e06ababbbc201589ac41a857b588e93/Win_Client/必读.txt
--------------------------------------------------------------------------------
/dou2.py:
--------------------------------------------------------------------------------
1 | #-*- coding: utf-8 -*
2 | import urllib
3 | import urllib2
4 | import re
5 | import time
6 | import sys
7 | import os
8 | import random
9 |
10 | print '#'*50
11 | print '#'*2 + '\t\t\t\t豆瓣小组采集器\t\t\t\t\t'+'#'*2
12 | print '#'*50
13 |
14 | import os
15 | type = sys.getfilesystemencoding()
16 |
17 | mkmulu = os.path.exists('Doubanimg')
18 |
19 | if not mkmulu:
20 | print '#'*50
21 | print '目录创建成功!图片将下载到DOubanimg目录'.decode('UTF-8').encode('GBK')
22 | print '#'*50
23 | os.mkdir('Doubanimg')
24 | # return True
25 | else:
26 | print '#'*50
27 | print '目录存在,图片下载到Doubanimg目录'.decode('UTF-8').encode('GBK')
28 | print '#'*50
29 | # return False
30 |
31 | # print '*'*20+'开始采集代理'+'*'*20
32 | # #采集代理信息
33 | # f = open('proxy_list.txt','w')
34 | # exp1 = re.compile("(?isu)
]*>(.*?)
")
35 | # exp2 = re.compile("(?isu)]*>(.*?) | ")
36 | # #加入UA模拟浏览器
37 | # proxy_ua = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'}
38 | # proxyHtml = urllib2.Request(url="http://www.site-digger.com/html/articles/20110516/proxieslist.html",headers=proxy_ua)
39 | # try: #检查代理是否写入成功
40 | # proxySocket = urllib2.urlopen(proxyHtml)
41 | # htmlSource = proxySocket.read()
42 | # #print htmlSource
43 | # except Exception: #如果错误
44 | # print '-'*50
45 | # print '代理程序采集出错!请确认您的网络正常!'
46 | # print '-'*50
47 | # raw_input('按回车结束程序:')
48 | # else: #成功继续执行
49 | # for row in exp1.findall(htmlSource):
50 | # for col in exp2.findall(row)[:1]:
51 | # #写入代理信息
52 | # f.write(col+'\n')
53 | # #print col
54 | # f.close()
55 |
56 |
57 | #针对http://cn-proxy.com/网站所写的一些采集后文件内操作
58 | #删除指定字符
59 | # with open('proxy_list.txt', 'r') as f:
60 | # with open('proxy_list.txt.new', 'w') as g:
61 | # for line in f.readlines():
62 | # if '服务器地址' not in line:
63 | # g.write(line)
64 | # shutil.move('proxy_list.txt.new', 'proxy_list.txt')
65 | # #删除指定字符
66 | # with open('proxy_list.txt', 'r') as f:
67 | # with open('proxy_list.txt.new', 'w') as g:
68 | # for line in f.readlines():
69 | # if '端口' not in line:
70 | # g.write(line)
71 | # shutil.move('proxy_list.txt.new', 'proxy_list.txt')
72 | #
73 | # #读取文件合并行
74 | # file = open("proxy_list.txt",'r')
75 | # lines = file.readlines() #列出文件所有行
76 | # newlines = [] #新行
77 | # j = 1
78 | # for i in range(len(lines)):
79 | # if(j!=len(lines)-2):
80 | # string = lines[j].replace('\n','')+':'+lines[j+1].replace('\n','')
81 | # newlines.append(string)
82 | # j=j+2
83 |
84 | #print(newlines)
85 |
86 | # open("proxy_list.txt","w").write('%s' % '\n'.join(newlines))
87 | # file.close()
88 |
89 | # print '*'*20+'代理采集完成'+'*'*20
90 | ##########################################################################################3
91 | ### 代理模块(全局代理)
92 | print '#'*50
93 | print '#'*2 + '\t\t\t\t肾虚公子 亲情制作\t\t\t\t\t'+'#'*2
94 | print '#'*2 + '\t\t\t\t主页: Douban.miaowu.asia\t\t\t'+'#'*2
95 | print '#'*50
96 | print '说明:本程序可以采集豆瓣任何小组的图片.'
97 | print '说明:采集的图片在文件夹Doubanimg内.'
98 | # print '注意:代理没有验证,如果不成功请重新运行.'
99 | print '#'*50
100 |
101 | #读取代理文件的每一行
102 | # f0=open('proxy_list.txt','r')
103 | # dat0=f0.readlines()
104 | # f0.close()
105 | # #随即选取代理文件的一行
106 | # proxy_SJ = random.choice(dat0)
107 | # #代理
108 | # #proxy_input = raw_input('请输入采集代理服务器:')
109 | # proxy_handler = urllib2.ProxyHandler({'http':'%s'%proxy_SJ})
110 | # opener = urllib2.build_opener(proxy_handler)
111 | # urllib2.install_opener(opener)
112 |
113 | #采集本地路径全局变量
114 | #img_LuJ = raw_input("图片下载路径:".decode('utf-8'))
115 | #img_LuJ2 = os.path.abspath(img_LuJ)
116 | print '请输入小组代码,默认采集豆瓣害羞组[ID=haixiuzu]'
117 | print '小组ID就是(http://www.douban.com/group/这里的字符/)'
118 | Douban_group = raw_input('请输入小组ID(默认按回车继续):')or 'haixiuzu'
119 | Douban_group_url = 'http://www.douban.com/group/'
120 |
121 | #模块化输出
122 | #获取帖子单页html
123 | def gethtml2(url2):
124 | Douban_ua = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'}
125 | Douban_Html = urllib2.Request(url=(url2),headers=Douban_ua)
126 | Douban_Socket = urllib2.urlopen(Douban_Html)
127 | html2 = Douban_Socket.read().decode('utf-8')
128 | return html2
129 |
130 | #打开帖子列表
131 | def gettoimg(html2):
132 | #添加正则 匹配url路径数字 d+代表获取0-9无限循环\转义符号
133 | reg2 = r'http://www.douban.com/group/topic/\d+'
134 | toplist = re.findall(reg2,html2)
135 | x = 0
136 | #限制下载图片数量
137 | #输出topicurl 每次输出一个 的循环
138 | for topicurl in toplist:
139 | x+=1
140 | return topicurl
141 |
142 | #下载图片到本地
143 | def download(topic_page):
144 | #获取贴内图片 正则 ".+\" .匹配任意字符 + 匹配前一个字符或无限次 \ 转移符号 也就是 匹配所有字符
145 | reg3 = r'http://img3.douban.com/view/group_topic/large/public/.+\.jpg'
146 | imglist = re.findall(reg3,topic_page)
147 | i = 1
148 | download_img = None
149 | for imgurl in imglist:
150 | #取图片id为文件名
151 | img_numlist = re.findall(r'p\d{7}',imgurl)
152 | for img_num in img_numlist:
153 | #获取用户输入路径
154 | #download_img = urllib.urlretrieve(imgurl,img_LuJ2 + '/%s.jpg'%img_num)
155 | #固定程序路径
156 | download_img = urllib.urlretrieve(imgurl,'Doubanimg/%s.jpg'%img_num)
157 | time.sleep(0.5)
158 | i+=1
159 | print (imgurl)
160 | return download_img
161 |
162 |
163 | print '-'*50
164 | print '请输入采集帖子数,默认采集10个帖子'
165 | page_end = int(raw_input('输入数字即可(默认按回车继续):')or 10)
166 | print '-'*50
167 | print '正在采集图片中,程序可能用较长时间,此时您可以干点别的,比如喝杯咖啡?'
168 | print '-'*50
169 |
170 | num_end = page_end*25
171 | num = 0
172 | page_num = 1
173 | try: #判断输出错误
174 | while num<=num_end:
175 | #获取帖子列表,从0页开始
176 | html2 = gethtml2(Douban_group_url+Douban_group+"/discussion?start=%d"%num)
177 | #抽取下载图片
178 | topicurl = gettoimg(html2)
179 | topic_page = gethtml2(topicurl)
180 | download_img=download(topic_page)
181 | num = page_num*25
182 | page_num+=1
183 | #print('程序采集完成')
184 | except Exception:
185 | print '错误:图片下载失败!请检查小组名称是否正确!!请重新运行本程序'
186 | print '-'*50
187 | raw_input('按回车结束程序:')
188 | else:
189 | print '#'*20 + '下载完成' + '#'*20
190 | print '程序采集已经结束感谢您的使用!'+'网站:http://Douban.miaowu.asia'
191 | print '#'*20 + '程序结束' + '#'*20
192 | JS = raw_input('按回车结束程序:')
193 | print JS
194 |
195 |
--------------------------------------------------------------------------------
/dou2_win.py:
--------------------------------------------------------------------------------
1 | #-*- coding: utf-8 -*
2 | # coding=cp936
3 | import urllib
4 | import urllib2
5 | import re
6 | import time
7 | import random
8 | import sys
9 | import os
10 | type = sys.getfilesystemencoding()
11 |
12 | print '#'*50
13 | print '#'*2 + '\t\t\t\t豆瓣小组采集器\t\t\t\t\t'.decode('UTF-8').encode('GBK')+'#'*2
14 | print '#'*50
15 |
16 | mkmulu = os.path.exists('Doubanimg')
17 |
18 | if not mkmulu:
19 | print '#'*50
20 | print '目录创建成功!图片将下载到Doubanimg目录'.decode('UTF-8').encode('GBK')
21 | print '#'*50
22 | os.mkdir('Doubanimg')
23 | # return True
24 | else:
25 | print '#'*50
26 | print '目录存在,图片下载到Doubanimg目录'.decode('UTF-8').encode('GBK')
27 | print '#'*50
28 | # return False
29 |
30 | print '#'*50
31 | print '#'*2 + '\t肾虚公子 亲情制作'.decode('UTF-8').encode('GBK')
32 | print '#'*2 + '\t主页: Douban.miaowu.asia'.decode('UTF-8').encode('GBK')
33 | print '#'*50
34 | print '说明:本程序可以采集豆瓣任何小组的图片.'.decode('UTF-8').encode('GBK')
35 | print '说明:采集的图片在文件夹Doubanimg内.'.decode('UTF-8').encode('GBK')
36 | print '注意:代理没有验证,如果不成功请重新运行.'.decode('UTF-8').encode('GBK')
37 | print '#'*50
38 |
39 |
40 | print '请输入小组代码,默认采集豆瓣害羞组[ID=haixiuzu]'.decode('UTF-8').encode('GBK')
41 | print '小组ID就是(http://www.douban.com/group/这里的字符/)'.decode('UTF-8').encode('GBK')
42 | Douban_group = raw_input('请输入小组ID(默认按回车继续):'.decode('UTF-8').encode('GBK') )or 'haixiuzu'
43 | Douban_group_url = 'http://www.douban.com/group/'
44 |
45 | def gethtml2(url2):
46 | Douban_ua = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'}
47 | Douban_Html = urllib2.Request(url=(url2),headers=Douban_ua)
48 | Douban_Socket = urllib2.urlopen(Douban_Html)
49 | html2 = Douban_Socket.read().decode('utf-8')
50 | return html2
51 |
52 | def gettoimg(html2):
53 | reg2 = r'http://www.douban.com/group/topic/\d+'
54 | toplist = re.findall(reg2,html2)
55 | x = 0
56 | for topicurl in toplist:
57 | x+=1
58 | return topicurl
59 |
60 | def download(topic_page):
61 | reg3 = r'http://img3.douban.com/view/group_topic/large/public/.+\.jpg'
62 | imglist = re.findall(reg3,topic_page)
63 | i = 1
64 | download_img = None
65 | for imgurl in imglist:
66 | img_numlist = re.findall(r'p\d{7}',imgurl)
67 | for img_num in img_numlist:
68 | download_img = urllib.urlretrieve(imgurl,'Doubanimg/%s.jpg'%img_num)
69 | time.sleep(0.5)
70 | i+=1
71 | print (imgurl)
72 | return download_img
73 |
74 |
75 | print '-'*50
76 | print '请输入采集帖子数,默认采集10个帖子'.decode('UTF-8').encode('GBK')
77 | page_end = int(raw_input('输入数字即可(默认按回车继续):'.decode('UTF-8').encode('GBK'))or 10)
78 | print '-'*50
79 | print '正在采集图片中,程序可能用较长时间,此时您可以干点别的,比如喝杯咖啡?'.decode('UTF-8').encode('GBK')
80 | print '-'*50
81 |
82 | num_end = page_end*25
83 | num = 0
84 | page_num = 1
85 | try:
86 | while num<=num_end:
87 | html2 = gethtml2(Douban_group_url+Douban_group+"/discussion?start=%d"%num)
88 | topicurl = gettoimg(html2)
89 | topic_page = gethtml2(topicurl)
90 | download_img=download(topic_page)
91 | num = page_num*25
92 | page_num+=1
93 | except Exception:
94 | print '错误:图片下载失败!请检查小组名称是否正确!!请重新运行本程序'.decode('UTF-8').encode('GBK')
95 | print '-'*50
96 | raw_input('按回车结束程序:'.decode('UTF-8').encode('GBK') )
97 | else:
98 | print '#'*20 + '下载完成'.decode('UTF-8').encode('GBK') + '#'*20
99 | print '程序采集已经结束感谢您的使用!'+'网站:http://Douban.miaowu.asia'.decode('UTF-8').encode('GBK')
100 | print '#'*20 + '程序结束'.decode('UTF-8').encode('GBK') + '#'*20
101 | JS = raw_input('按回车结束程序:'.decode('UTF-8').encode('GBK') )
102 | print JS
--------------------------------------------------------------------------------
/html/air.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShenXuGongZi/DouBanGroupPic/4839aef33e06ababbbc201589ac41a857b588e93/html/air.png
--------------------------------------------------------------------------------
/html/center.css:
--------------------------------------------------------------------------------
1 | /* Space out content a bit */
2 | body {
3 | padding-top: 20px;
4 | padding-bottom: 20px;
5 | /* background-color: #333; */
6 | background-image: url("shadow_light.png"), url("air.png"); background-repeat: repeat-x, repeat; };
7 | }
8 |
9 | /* Everything but the jumbotron gets side spacing for mobile first views */
10 | .header,
11 | .marketing,
12 | .footer {
13 | padding-right: 15px;
14 | padding-left: 15px;
15 | }
16 |
17 | /* Custom page header */
18 | .header {
19 | border-bottom: 1px solid #e5e5e5;
20 | }
21 | /* Make the masthead heading the same height as the navigation */
22 | .header h3 {
23 | padding-bottom: 19px;
24 | margin-top: 0;
25 | margin-bottom: 0;
26 | line-height: 40px;
27 | }
28 |
29 | /* Custom page footer */
30 | .footer {
31 | padding-top: 19px;
32 | color: #777;
33 | border-top: 1px solid #e5e5e5;
34 | }
35 |
36 | /* Customize container */
37 | @media (min-width: 768px) {
38 | .container {
39 | max-width: 730px;
40 |
41 | background-color: #FCFCFC;
42 | padding-top: 20px;
43 | }
44 | }
45 | .container-narrow > hr {
46 | margin: 30px 0;
47 | }
48 |
49 | /* Main marketing message and sign up button */
50 | .jumbotron {
51 | text-align: center;
52 | border-bottom: 1px solid #e5e5e5;
53 | }
54 | .jumbotron .btn {
55 | padding: 14px 24px;
56 | font-size: 21px;
57 | }
58 |
59 | /* Supporting marketing content */
60 | .marketing {
61 | margin: 40px 0;
62 | }
63 | .marketing p + h4 {
64 | margin-top: 28px;
65 | }
66 |
67 | /* Responsive: Portrait tablets and up */
68 | @media screen and (min-width: 768px) {
69 | /* Remove the padding we set earlier */
70 | .header,
71 | .marketing,
72 | .footer {
73 | padding-right: 0;
74 | padding-left: 0;
75 | }
76 | /* Space out the masthead */
77 | .header {
78 | margin-bottom: 30px;
79 | }
80 | /* Remove the bottom border on the jumbotron for visual effect */
81 | .jumbotron {
82 | border-bottom: 0;
83 | }
84 | }
--------------------------------------------------------------------------------
/html/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | 豆瓣小组图片采集器
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
32 |
33 |
34 |
豆瓣小组图片采集器
35 |
用户可以自由选择用户组下载图片支持豆瓣所有小组.
36 |
37 | GitHub主页
38 |
39 |
40 |
41 |
42 |
43 | Mac/Linux用户
44 | 请下载dou2.py程序运行 并且在同文件夹下创建Doubanimg文件
45 | 具体流程如下
46 | mkdir Doubanimg
47 | wget http://douban.miaowu.asia/dou2.py&&python dou2.py
48 |
49 |
50 |
Windows用户
51 | Windows用户直接下载文件运行即可.
52 | 下载Windows版本
53 |
54 |
55 | 演示
56 |
57 |
58 |
59 |
60 |
63 |
64 |
65 |
66 |
67 |
69 |
70 |
71 |
74 |
--------------------------------------------------------------------------------
/html/shadow_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShenXuGongZi/DouBanGroupPic/4839aef33e06ababbbc201589ac41a857b588e93/html/shadow_light.png
--------------------------------------------------------------------------------
/stup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 |
3 | import py2exe
4 |
5 |
6 |
7 | includes = ["encodings", "encodings.*"]
8 |
9 | options = {"py2exe":
10 |
11 | { "compressed": 1,
12 |
13 | "optimize": 2,
14 |
15 | "includes": includes,
16 |
17 | "bundle_files": 1
18 |
19 | }
20 |
21 | }
22 |
23 | setup(
24 |
25 | version = "3.0.0",
26 |
27 | description = "Data Convert Tools(For MM-Bukepa.org only)",
28 |
29 | name = "Data Convert Tools",
30 |
31 | options = options,
32 |
33 | zipfile=None,
34 |
35 | console = ["dou2_win.py"],
36 | )
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from threading import Thread
3 | def run_thread(n):
4 | for i in range(n):
5 | print i
6 |
7 | t1 = Thread(target=run_thread,args=(500000,))#指定目标函数,传入参数,这里参数也是元组
8 | t1.start() #启动线程
--------------------------------------------------------------------------------