├── .gitignore ├── README.md └── ooxx.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ooxx 2 | 3 | ## Warning 4 | 5 | Just for studying. Please don't consume `Jandan` too much network traffic. 6 | 7 | ## Run 8 | 9 | Make sure you have installed python, then you can run the code below: 10 | 11 | ``` 12 | python ooxx.py 13 | ``` 14 | 15 | ## About 16 | 17 | author: **Haipz** 18 | 19 | site: **http://haipz.com** 20 | 21 | e-mail: **haipzm@gmail.com** 22 | 23 | -------------------------------------------------------------------------------- /ooxx.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | author : haipz 4 | site : haipz.com 5 | email : i@haipz.com 6 | 7 | modified by TC 8 | oo xx indexes are included. 9 | 10 | ''' 11 | 12 | import urllib2, os, re, thread, time 13 | 14 | def getHtml(url) : 15 | hdr = {'User-Agent':'Mozilla/5.0'} 16 | req = urllib2.Request(url, headers=hdr) 17 | page = urllib2.urlopen(req) 18 | html = page.read() 19 | return html 20 | 21 | def filterComment(source) : 22 | pattern = ur'begin comments([\s\S]*?)end comments' 23 | matchs = re.search(pattern, source) 24 | return matchs.group() 25 | 26 | def filterThumbnail(source) : 27 | pattern = ur')(\d*?)(?:)' 75 | xxpattern = ur'(?:)(\d*?)(?:)' 76 | oo = re.search(oopattern, match).group(1) 77 | xx = re.search(xxpattern, match).group(1) 78 | 79 | picpattern = ur'(?:href=")(http\:\/\/w[\s\S]*?)(.jpg|.png|.gif)' 80 | picobj = re.compile(picpattern) 81 | result = picobj.findall(match) 82 | 83 | count1 = 0 84 | for pic in result : 85 | if int(oo) > ooover and int(xx) < xxbelow: 86 | count1 = count1 + 1 87 | picurl = pic[0] + pic[1] 88 | picpath = path + '/' 89 | picname = str(pagenum) + '_oo' + oo + '_xx' + xx + '_' + str(count0) + '_' + str(count1) + pic[1] 90 | 91 | print 'Infomation:' 92 | print 'Picture url: ' + picurl 93 | print 'Picture path: ' + picpath 94 | print 'Picture name: ' + picname 95 | try : 96 | downloadPicture(picurl, picpath, picname) 97 | except Exception as e : 98 | print(e) 99 | --------------------------------------------------------------------------------