├── .gitignore
├── resource
└── pic1.PNG
├── shenzhen_house.db
├── README.md
├── main.py
└── database.py
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | .idea/
3 | *.pyc
4 |
--------------------------------------------------------------------------------
/resource/pic1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rockyzsu/house/master/resource/pic1.PNG
--------------------------------------------------------------------------------
/shenzhen_house.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rockyzsu/house/master/shenzhen_house.db
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 每天自动获取深圳上海北京的新房二手房的成交量
2 | 深圳市房地产信息系统:http://ris.szpl.gov.cn/
3 | [image](http://www.30daydo.com/uploads/article/20161012/a174e3e3ea03c9c2f275c2c05ea83dd3.PNG)[image]
4 |
5 | 每天的数据写入数据库
6 |
7 | 
8 |
9 | :
8 | url="http://ris.szpl.gov.cn/"
9 | one_hand="credit/showcjgs/ysfcjgs.aspx"
10 | second_hand="credit/showcjgs/esfcjgs.aspx"
11 | req=urllib2.Request(url+one_hand)
12 | content=urllib2.urlopen(req).read()
13 | #返回的就是网页的源码,没有做任何防爬虫的处理,zf网站,呵呵
14 | #print content
15 | date=re.compile(r'(.*)')
16 | reg=re.compile(r'
(\d+)')
17 | result=reg.findall(content)
18 | current_date=date.findall(content)
19 |
20 | reg2=re.compile(r' | (.*?)')
21 | yishou_area=reg2.findall(content)
22 |
23 |
24 | print current_date[0]
25 | print '一手商品房成交套数:%s' % result[0]
26 | print '一手商品房成交面积: %s' % yishou_area[0]
27 |
28 |
29 | sec_req=urllib2.Request(url+second_hand)
30 | sec_content=urllib2.urlopen(sec_req).read()
31 |
32 | sec_quantity=re.compile(r' | (\d+) | ')
33 | sec_result=sec_quantity.findall(sec_content)
34 | second_area=re.findall(r'(.*?) | ',sec_content)
35 |
36 | print '二手商品房成交套数:%s' % sec_result[1]
37 | print '二手商品房成交面积: %s' % second_area[2]
38 | database.create_table()
39 | database.insert(current_date[0],result[0],yishou_area[0],sec_result[1],second_area[2])
40 |
41 | getContent()
--------------------------------------------------------------------------------
/database.py:
--------------------------------------------------------------------------------
1 | # -*-coding=utf-8-*-
2 | __author__ = 'Rocky'
3 | import sqlite3
4 |
5 | def create_table():
6 | conn = sqlite3.connect('shenzhen_house.db')
7 | try:
8 | create_tb_cmd='''
9 | CREATE TABLE IF NOT EXISTS HOUSE
10 | ('日期' TEXT,
11 | '一手房套数' TEXT,
12 | '一手房面积' TEXT,
13 | '二手房套数' TEXT,
14 | '二手房面积' TEXT);
15 | '''
16 | #主要就是上面的语句
17 | conn.execute(create_tb_cmd)
18 | except:
19 | print "Create table failed"
20 | return False
21 |
22 |
23 | conn.execute(create_tb_cmd)
24 | conn.commit()
25 | conn.close()
26 |
27 | def insert(date,one_hand,one_area,second_hand,second_area):
28 | conn = sqlite3.connect('shenzhen_house.db')
29 | print "open database passed"
30 |
31 | cmd="INSERT INTO HOUSE ('日期','一手房套数','一手房面积','二手房套数','二手房面积') VALUES('%s','%s','%s','%s','%s');" %(date,one_hand,one_area,second_hand,second_area)
32 | #works 要么加\"
33 | #paul_su="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(5,'%s',32,'CALIFORNIA',2000.00);" %temp2
34 | #works 要么加 ’‘
35 |
36 | #allen="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(2,'ALLEN',72,'CALIFORNIA',20500.00);"
37 | #teddy="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(3,'TEDDY',732,'CALIFORNIA',52000.00);"
38 | #mark="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(4,'MARK',327,'CALIFORNIA',3000.00);"
39 | #sun="INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(?,?,?,?,?);"
40 | #conn.execute("INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES(?,?,32,'CALIFORNIA',2000.00)",temp)
41 |
42 | conn.execute(cmd)
43 |
44 | conn.commit()
45 | conn.close()
--------------------------------------------------------------------------------