├── .gitignore
├── README.md
└── ooxx.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 |
46 | # Translations
47 | *.mo
48 | *.pot
49 |
50 | # Django stuff:
51 | *.log
52 |
53 | # Sphinx documentation
54 | docs/_build/
55 |
56 | # PyBuilder
57 | target/
58 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ooxx
2 |
3 | ## Warning
4 |
5 | Just for studying. Please don't consume `Jandan` too much network traffic.
6 |
7 | ## Run
8 |
9 | Make sure you have installed python, then you can run the code below:
10 |
11 | ```
12 | python ooxx.py
13 | ```
14 |
15 | ## About
16 |
17 | author: **Haipz**
18 |
19 | site: **http://haipz.com**
20 |
21 | e-mail: **haipzm@gmail.com**
22 |
23 |
--------------------------------------------------------------------------------
/ooxx.py:
--------------------------------------------------------------------------------
1 | '''
2 |
3 | author : haipz
4 | site : haipz.com
5 | email : i@haipz.com
6 |
7 | modified by TC
8 | oo xx indexes are included.
9 |
10 | '''
11 |
12 | import urllib2, os, re, thread, time
13 |
14 | def getHtml(url) :
15 | hdr = {'User-Agent':'Mozilla/5.0'}
16 | req = urllib2.Request(url, headers=hdr)
17 | page = urllib2.urlopen(req)
18 | html = page.read()
19 | return html
20 |
21 | def filterComment(source) :
22 | pattern = ur'begin comments([\s\S]*?)end comments'
23 | matchs = re.search(pattern, source)
24 | return matchs.group()
25 |
26 | def filterThumbnail(source) :
27 | pattern = ur')(\d*?)(?:)'
75 | xxpattern = ur'(?:)(\d*?)(?:)'
76 | oo = re.search(oopattern, match).group(1)
77 | xx = re.search(xxpattern, match).group(1)
78 |
79 | picpattern = ur'(?:href=")(http\:\/\/w[\s\S]*?)(.jpg|.png|.gif)'
80 | picobj = re.compile(picpattern)
81 | result = picobj.findall(match)
82 |
83 | count1 = 0
84 | for pic in result :
85 | if int(oo) > ooover and int(xx) < xxbelow:
86 | count1 = count1 + 1
87 | picurl = pic[0] + pic[1]
88 | picpath = path + '/'
89 | picname = str(pagenum) + '_oo' + oo + '_xx' + xx + '_' + str(count0) + '_' + str(count1) + pic[1]
90 |
91 | print 'Infomation:'
92 | print 'Picture url: ' + picurl
93 | print 'Picture path: ' + picpath
94 | print 'Picture name: ' + picname
95 | try :
96 | downloadPicture(picurl, picpath, picname)
97 | except Exception as e :
98 | print(e)
99 |
--------------------------------------------------------------------------------