├── .gitignore ├── resource └── pic1.PNG ├── shenzhen_house.db ├── README.md ├── main.py └── database.py /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | .idea/ 3 | *.pyc 4 | -------------------------------------------------------------------------------- /resource/pic1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rockyzsu/house/master/resource/pic1.PNG -------------------------------------------------------------------------------- /shenzhen_house.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rockyzsu/house/master/shenzhen_house.db -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 每天自动获取深圳上海北京的新房二手房的成交量 2 | 深圳市房地产信息系统:http://ris.szpl.gov.cn/ 3 | [image](http://www.30daydo.com/uploads/article/20161012/a174e3e3ea03c9c2f275c2c05ea83dd3.PNG)[image] 4 | 5 | 每天的数据写入数据库 6 | 7 | ![写入数据库](resource/pic1.PNG "Hello") 8 | 9 | ![alt text](http://www.30daydo.com/uploads/article/20161012/a174e3e3ea03c9c2f275c2c05ea83dd3.PNG =100x100 -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #-*-coding=utf-8-*- 2 | __author__ = 'rocky' 3 | #获取每天深圳一手房,二手房的成交套数与面积,并且写入数据库 4 | #主要就是正则表达抓取几个数字 5 | import urllib2,re 6 | import database 7 | def getContent(): 8 | url="http://ris.szpl.gov.cn/" 9 | one_hand="credit/showcjgs/ysfcjgs.aspx" 10 | second_hand="credit/showcjgs/esfcjgs.aspx" 11 | req=urllib2.Request(url+one_hand) 12 | content=urllib2.urlopen(req).read() 13 | #返回的就是网页的源码,没有做任何防爬虫的处理,zf网站,呵呵 14 | #print content 15 | date=re.compile(r'(.*)') 16 | reg=re.compile(r'(\d+)') 17 | result=reg.findall(content) 18 | current_date=date.findall(content) 19 | 20 | reg2=re.compile(r'(.*?)') 21 | yishou_area=reg2.findall(content) 22 | 23 | 24 | print current_date[0] 25 | print '一手商品房成交套数:%s' % result[0] 26 | print '一手商品房成交面积: %s' % yishou_area[0] 27 | 28 | 29 | sec_req=urllib2.Request(url+second_hand) 30 | sec_content=urllib2.urlopen(sec_req).read() 31 | 32 | sec_quantity=re.compile(r'(\d+)') 33 | sec_result=sec_quantity.findall(sec_content) 34 | second_area=re.findall(r'(.*?)',sec_content) 35 | 36 | print '二手商品房成交套数:%s' % sec_result[1] 37 | print '二手商品房成交面积: %s' % second_area[2] 38 | database.create_table() 39 | database.insert(current_date[0],result[0],yishou_area[0],sec_result[1],second_area[2]) 40 | 41 | getContent() -------------------------------------------------------------------------------- /database.py: -------------------------------------------------------------------------------- 1 | # -*-coding=utf-8-*- 2 | __author__ = 'Rocky' 3 | import sqlite3 4 | 5 | def create_table(): 6 | conn = sqlite3.connect('shenzhen_house.db') 7 | try: 8 | create_tb_cmd=''' 9 | CREATE TABLE IF NOT EXISTS HOUSE 10 | ('日期' TEXT, 11 | '一手房套数' TEXT, 12 | '一手房面积' TEXT, 13 | '二手房套数' TEXT, 14 | '二手房面积' TEXT); 15 | ''' 16 | #主要就是上面的语句 17 | conn.execute(create_tb_cmd) 18 | except: 19 | print "Create table failed" 20 | return False 21 | 22 | 23 | conn.execute(create_tb_cmd) 24 | conn.commit() 25 | conn.close() 26 | 27 | def insert(date,one_hand,one_area,second_hand,second_area): 28 | conn = sqlite3.connect('shenzhen_house.db') 29 | print "open database passed" 30 | 31 | cmd="INSERT INTO HOUSE ('日期','一手房套数','一手房面积','二手房套数','二手房面积') VALUES('%s','%s','%s','%s','%s');" %(date,one_hand,one_area,second_hand,second_area) 32 | #works 要么加\" 33 | #paul_su="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(5,'%s',32,'CALIFORNIA',2000.00);" %temp2 34 | #works 要么加 ’‘ 35 | 36 | #allen="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(2,'ALLEN',72,'CALIFORNIA',20500.00);" 37 | #teddy="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(3,'TEDDY',732,'CALIFORNIA',52000.00);" 38 | #mark="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(4,'MARK',327,'CALIFORNIA',3000.00);" 39 | #sun="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(?,?,?,?,?);" 40 | #conn.execute("INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(?,?,32,'CALIFORNIA',2000.00)",temp) 41 | 42 | conn.execute(cmd) 43 | 44 | conn.commit() 45 | conn.close() --------------------------------------------------------------------------------