├── data └── images │ └── .gitignore ├── crawler ├── house_renting │ ├── __init__.py │ ├── commands │ │ ├── __init__.py │ │ └── crawl.py │ ├── spider_settings │ │ ├── __init__.py │ │ ├── lianjia.py │ │ └── a58.py │ ├── spiders │ │ ├── __init__.py │ │ ├── douban.py │ │ ├── a58.py │ │ └── lianjia.py │ ├── proxies.py │ ├── base_spider.py │ ├── exporters.py │ ├── pipelines.py │ ├── middlewares.py │ ├── items.py │ └── settings.py ├── requirements.txt ├── Dockerfile ├── setup.py └── scrapy.cfg ├── .github ├── FUNDING.yml └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── screenshot ├── discover.png ├── adding-fields.png ├── expanding-doc.png ├── searching-by-field.png ├── searching-by-fields.png ├── sorting-by-fields.png └── setting-index-pattern.png ├── .gitignore ├── scrapyd ├── Dockerfile └── scrapyd.conf ├── .travis.yml ├── readme.md ├── docker-compose.yml └── LICENSE /data/images/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/house_renting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/house_renting/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/house_renting/spider_settings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [kezhenxu94] 4 | -------------------------------------------------------------------------------- /crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | scrapy==1.4.0 2 | 3 | redis==2.10.6 4 | pillow==8.3.2 5 | elasticsearch==6.2.0 6 | -------------------------------------------------------------------------------- /screenshot/discover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kezhenxu94/house-renting/HEAD/screenshot/discover.png -------------------------------------------------------------------------------- /screenshot/adding-fields.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kezhenxu94/house-renting/HEAD/screenshot/adding-fields.png -------------------------------------------------------------------------------- /screenshot/expanding-doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kezhenxu94/house-renting/HEAD/screenshot/expanding-doc.png -------------------------------------------------------------------------------- /screenshot/searching-by-field.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kezhenxu94/house-renting/HEAD/screenshot/searching-by-field.png -------------------------------------------------------------------------------- /screenshot/searching-by-fields.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kezhenxu94/house-renting/HEAD/screenshot/searching-by-fields.png -------------------------------------------------------------------------------- /screenshot/sorting-by-fields.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kezhenxu94/house-renting/HEAD/screenshot/sorting-by-fields.png -------------------------------------------------------------------------------- /screenshot/setting-index-pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kezhenxu94/house-renting/HEAD/screenshot/setting-index-pattern.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | !.gitignore 3 | *.pyc 4 | 5 | data/elastic 6 | data/redis 7 | data/images/full 8 | 9 | build/ 10 | dbs/ 11 | eggs/ 12 | logs/ 13 | project.egg-info/ 14 | -------------------------------------------------------------------------------- /crawler/house_renting/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /crawler/house_renting/proxies.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | proxies = ( 4 | # '115.229.93.123:9000', 5 | # '114.249.116.183:9000', 6 | # '14.118.252.68:6666', 7 | # '115.229.93.123:9000', 8 | ) 9 | -------------------------------------------------------------------------------- /scrapyd/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3 2 | 3 | VOLUME /etc/scrapyd/ /var/lib/scrapyd/ 4 | 5 | COPY ./scrapyd.conf /etc/scrapyd/ 6 | 7 | RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple scrapyd 8 | 9 | ENTRYPOINT ["scrapyd"] 10 | -------------------------------------------------------------------------------- /crawler/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | COPY . /house-renting/crawler 4 | 5 | VOLUME /etc/scrapyd/ /var/lib/scrapyd/ 6 | 7 | WORKDIR /house-renting/crawler 8 | 9 | RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt 10 | -------------------------------------------------------------------------------- /crawler/setup.py: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapyd-deploy 2 | 3 | from setuptools import setup, find_packages 4 | 5 | setup( 6 | name='project', 7 | version='1.0', 8 | packages=find_packages(), 9 | entry_points={'scrapy': ['settings = house_renting.settings']}, 10 | install_requires=['scrapy', 'elasticsearch', 'redis'] 11 | ) 12 | -------------------------------------------------------------------------------- /crawler/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = house_renting.settings 8 | 9 | [deploy] 10 | url = http://127.0.0.1:6800/ 11 | username = 12 | password = 13 | project = house_renting 14 | -------------------------------------------------------------------------------- /scrapyd/scrapyd.conf: -------------------------------------------------------------------------------- 1 | [scrapyd] 2 | eggs_dir = /var/lib/scrapyd/eggs 3 | logs_dir = /var/lib/scrapyd/logs 4 | items_dir = /var/lib/scrapyd/items 5 | dbs_dir = /var/lib/scrapyd/dbs 6 | jobs_to_keep = 50 7 | max_proc = 0 8 | max_proc_per_cpu = 4 9 | finished_to_keep = 1000 10 | poll_interval = 5 11 | bind_address = 0.0.0.0 12 | http_port = 6800 13 | debug = on 14 | runner = scrapyd.runner 15 | application = scrapyd.app.application 16 | launcher = scrapyd.launcher.Launcher -------------------------------------------------------------------------------- /crawler/house_renting/base_spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | from scrapy.spiders import CrawlSpider 4 | 5 | 6 | class BaseCrawlSpider(CrawlSpider): 7 | def start_requests(self): 8 | cities = self.settings.get('cities', []) 9 | city_url_mappings = self.settings.get('available_cities_map', {}) 10 | 11 | for city in cities: 12 | city_url = city_url_mappings[city] 13 | if city_url is None: 14 | print('Cannot crawl house renting data from city: ', city) 15 | else: 16 | yield self.make_requests_from_url(city_url) 17 | -------------------------------------------------------------------------------- /crawler/house_renting/commands/crawl.py: -------------------------------------------------------------------------------- 1 | from scrapy.commands.crawl import Command 2 | from scrapy.exceptions import UsageError 3 | 4 | 5 | class CustomCrawlCommand(Command): 6 | def run(self, args, opts): 7 | if len(args) < 1: 8 | raise UsageError() 9 | elif len(args) > 1: 10 | raise UsageError("running 'scrapy crawl' with more than one spider is no longer supported") 11 | spider_name = args[0] 12 | 13 | spider_settings = self.settings.getdict('SPIDER_SETTINGS', {}).get(spider_name, {}) 14 | 15 | self.settings.update(spider_settings, priority='cmdline') 16 | 17 | self.crawler_process.crawl(spider_name, **opts.spargs) 18 | self.crawler_process.start() 19 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | language: python 3 | cache: pip 4 | python: 5 | - 2.7 6 | - 3.6 7 | matrix: 8 | allow_failures: 9 | - python: nightly 10 | - python: pypy 11 | - python: pypy3 12 | install: 13 | #- pip install -r requirements.txt 14 | - pip install flake8 # pytest # add another testing frameworks later 15 | before_script: 16 | # stop the build if there are Python syntax errors or undefined names 17 | - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics 18 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 19 | - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 20 | script: 21 | - true # pytest --capture=sys # add other tests here 22 | notifications: 23 | on_success: change 24 | on_failure: change # `always` will be the setting once code changes slow down 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 功能建议 (Feature request) 3 | about: 给这个项目提建议 (Suggest an idea for this project) 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | **你的功能建议和某一个问题相关吗? 请描述.** 9 | 10 | > A clear and concise description of what the problem is. Ex. I'm always frustrated when ... 11 | > 清晰简短的描述你遇到的问题. 比如说: "我总是搞不清楚 ES, Kibana 怎么用" 12 | 13 | **Describe the solution you'd like** 14 | **描述一下你希望怎么解决这个问题** 15 | 16 | > A clear and concise description of what you want to happen. 17 | > 清晰简短地描述你觉得我们应该怎么解决你遇到的这个问题. 18 | 19 | **Describe alternatives you've considered** 20 | **描述一下你觉得还可以接受的替代方案** 21 | 22 | > A clear and concise description of any alternative solutions or features you've considered. 23 | > 清晰简短地描述一下你考虑过的解决方案或功能实现方式. 24 | 25 | **Additional context** 26 | **额外信息** 27 | 28 | > Add any other context or screenshots about the feature request here. 29 | > 添加关于这个功能请求的任何额外截图或信息. 30 | -------------------------------------------------------------------------------- /crawler/house_renting/exporters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from elasticsearch import Elasticsearch 4 | from scrapy.conf import settings 5 | from scrapy.exporters import BaseItemExporter 6 | 7 | 8 | class ESItemExporter(BaseItemExporter): 9 | index = 'house_renting' 10 | doc_type = 'Post' 11 | 12 | def __init__(self, **kwargs): 13 | super(ESItemExporter, self).__init__(**kwargs) 14 | 15 | self.elastic_hosts = settings.get('ELASTIC_HOSTS') 16 | 17 | if self.elastic_hosts is not None: 18 | self.client = Elasticsearch(hosts=self.elastic_hosts) 19 | 20 | def start_exporting(self): 21 | pass 22 | 23 | def finish_exporting(self): 24 | pass 25 | 26 | def export_item(self, item): 27 | if self.client is None: 28 | return item 29 | 30 | item_id = item['item_id'] 31 | self.client.index(index=self.index, doc_type=self.doc_type, body=dict(item), id=item_id) 32 | return item 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 报告 Bug (Bug report) 3 | about: 报告 Bug 帮助大家把本项目做得更好 (Create a report to help us improve) 4 | 5 | --- 6 | 7 | 11 | 12 | ## Bug 描述 (Describe the bug) 13 | 14 | > 清晰简短的描述你遇到的 Bug. (A clear and concise description of what the bug is.) 15 | 16 | ## 如何重现 (To Reproduce) 17 | 18 | 重现步骤 (Steps to reproduce the behavior): 19 | 20 | > 1. Go to '...' 21 | > 2. Click on '....' 22 | > 3. Scroll down to '....' 23 | > 4. See error 24 | 25 | ## 截图 (Screenshots) 26 | 27 | > 如果有截图的话, 加上你遇到 Bug 的截图 (If applicable, add screenshots to help explain your problem). 28 | 29 | ## 桌面环境 Desktop (please complete the following information) 30 | 31 | - 操作系统(OS): 32 | 33 | 如果是通过 Docker 运行: 34 | 35 | - Docker: 36 | 37 | 如果不是是通过 Docker 运行: 38 | 39 | - Python: 40 | 41 | - Scrapy: 42 | 43 | - Redis: 44 | 45 | - Elastic search: 46 | 47 | - Kibana: 48 | 49 | ## 附加信息 (Additional context) 50 | 51 | > 添加有利于我们排查问题的附加信息. (Add any other context about the problem here.) 52 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 租房信息聚合爬虫 2 | 3 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/ff42798a35db47459892ab5c754304f8)](https://app.codacy.com/app/kezhenxu94/house-renting?utm_source=github.com&utm_medium=referral&utm_content=kezhenxu94/house-renting&utm_campaign=Badge_Grade_Settings) 4 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 5 | [![Build Status](https://travis-ci.org/kezhenxu94/house-renting.svg?branch=master)](https://travis-ci.org/kezhenxu94/house-renting) 6 | [![Python Version](https://img.shields.io/badge/Python-2.7-green.svg)](https://www.python.org/downloads/release/python-2715/) 7 | [![Python Version](https://img.shields.io/badge/Python-3.6-green.svg)](https://www.python.org/downloads/release/python-365/) 8 | [![Gitter](https://img.shields.io/gitter/room/kezhenxu94/house-renting.svg)](https://gitter.im/house-renting/Lobby/) 9 | 10 | **租房信息聚合爬虫**是作者在租房过程中, 由于不堪忍受各个租房网站上各种眼花缭乱的信息而开发的一个基于 [Scrapy](https://scrapy.org) 的爬虫, 作者已经使用该爬虫找到合适的住所; 11 | 12 | 为了方便同样身在异乡的朋友解决寻找住处的痛点, 现将该爬虫整理并开源; 目前它聚合了来自[豆瓣](https://www.douban.com), [链家](https://lianjia.com), [58 同城](http://58.com) 等上百个城市的租房信息, 且根据作者的业余时间和精力还在不断更新中; 13 | 14 | 将不同网站租房信息聚合起来的优点是: 可以统一集中搜索感兴趣的租房信息, 而不必在各个租房信息网站间来回切换搜索, 还不用受限于部分网站鸡肋的搜索功能; 15 | 16 | ## 帮助 17 | 18 | 详细说明请看本项目的 [Wiki 页面](https://github.com/kezhenxu94/house-renting/wiki); 19 | 20 | ## 鼓励 21 | 22 | 如果你觉得本项目的想法很赞, 或者本项目对你有所帮助, 点个 Star 给作者一点鼓励吧 ^ _ ^ 23 | 24 | ## 贡献 25 | 26 | 如果你觉得某个租房网站的内容对你非常有用, 应该被收录在这里, 可以[在这里提 Issue](https://github.com/kezhenxu94/house-renting/issues), 或者你自己实现之后[在这里提 Pull Request](https://github.com/kezhenxu94/house-renting/pulls), 帮助更多人找到合适的住所; 27 | -------------------------------------------------------------------------------- /crawler/house_renting/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import hashlib 8 | 9 | import redis 10 | from scrapy.conf import settings 11 | from scrapy.exceptions import DropItem 12 | 13 | from house_renting.exporters import ESItemExporter 14 | 15 | 16 | class HouseRentingPipeline(object): 17 | def process_item(self, item, spider): 18 | m = hashlib.md5() 19 | m.update(item['source_url'].encode('utf-8')) 20 | item['item_id'] = m.hexdigest() 21 | return item 22 | 23 | 24 | class ESPipeline(object): 25 | exporter = None 26 | 27 | def open_spider(self, spider): 28 | self.exporter = ESItemExporter() 29 | 30 | def process_item(self, item, spider): 31 | self.exporter.export_item(item) 32 | return item 33 | 34 | 35 | class DuplicatesPipeline(object): 36 | def __init__(self): 37 | redis_host = settings.get('REDIS_HOST') 38 | redis_port = settings.get('REDIS_PORT', default=6379) 39 | 40 | if redis_host is not None: 41 | self.r_client = redis.Redis(host=redis_host, port=redis_port) 42 | 43 | def process_item(self, item, spider): 44 | if self.r_client is None: 45 | return item 46 | 47 | if 'item_id' in item: 48 | item_id = item['item_id'] 49 | existed_id = self.r_client.get(item_id) 50 | if existed_id is not None: 51 | raise DropItem("Duplicate item found: %s" % item) 52 | self.r_client.set(item_id, 'SEEN') 53 | 54 | return item 55 | -------------------------------------------------------------------------------- /crawler/house_renting/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | import random 8 | 9 | from redis import Redis 10 | from scrapy.conf import settings 11 | from scrapy.downloadermiddlewares.retry import RetryMiddleware 12 | 13 | from house_renting import proxies 14 | 15 | 16 | class HouseRentingAgentMiddleware(object): 17 | def __init__(self, user_agents): 18 | self.user_agents = user_agents 19 | 20 | @classmethod 21 | def from_crawler(cls, crawler): 22 | return cls(crawler.settings.getlist('USER_AGENTS')) 23 | 24 | def process_request(self, request, spider): 25 | request.headers.setdefault('User-Agent', random.choice(self.user_agents)) 26 | 27 | 28 | class HouseRentingRetryMiddleware(RetryMiddleware): 29 | def __init__(self, settings): 30 | super(HouseRentingRetryMiddleware, self).__init__(settings) 31 | self.proxies = proxies.proxies 32 | 33 | def process_exception(self, request, exception, spider): 34 | if len(self.proxies) > 0: 35 | request.meta['proxy'] = random.choice(self.proxies) 36 | return super(HouseRentingRetryMiddleware, self).process_exception(request, exception, spider) 37 | 38 | 39 | class HouseRentingProxyMiddleware(object): 40 | def __init__(self): 41 | redis_host = settings.get('REDIS_HOST') 42 | redis_port = settings.get('REDIS_PORT', default=6379) 43 | 44 | if redis_host is not None: 45 | self.r_client = Redis(host=redis_host, port=redis_port) 46 | 47 | self.proxies = proxies.proxies 48 | 49 | def process_request(self, request, spider): 50 | if len(self.proxies) > 0: 51 | request.meta['proxy'] = random.choice(self.proxies) 52 | -------------------------------------------------------------------------------- /crawler/house_renting/spiders/douban.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from scrapy import Selector 4 | from scrapy.linkextractors import LinkExtractor 5 | from scrapy.loader import ItemLoader 6 | from scrapy.spiders import Rule, CrawlSpider 7 | 8 | from house_renting.items import HouseRentingDoubanItem 9 | 10 | 11 | class DoubanSpider(CrawlSpider): 12 | name = 'douban' 13 | allowed_domains = ['douban.com'] 14 | start_urls = ['https://www.douban.com/group/tianhezufang/discussion?start=0'] 15 | 16 | rules = ( 17 | Rule(LinkExtractor(allow=r'/group/tianhezufang/discussion\?start=\d+$', 18 | restrict_css=('div#content div.article table', 'div#content div.article div.paginator')), 19 | follow=True), 20 | Rule(LinkExtractor(allow=r'/group/topic/\d+/$'), callback='parse_item'), 21 | ) 22 | 23 | def parse_item(self, response): 24 | selector = Selector(response=response) 25 | selector.css('div#content div.article div.topic-content') 26 | 27 | item_loader = ItemLoader(item=HouseRentingDoubanItem(), selector=selector, response=response) 28 | item_loader.add_css(field_name='title', css='table.infobox *::text') 29 | item_loader.add_css(field_name='title', css='div#content > h1:first-child::text') 30 | item_loader.add_value(field_name='source', value=self.name) 31 | item_loader.add_css(field_name='author', css='h3 span.from a::text') 32 | item_loader.add_css(field_name='image_urls', css='div.topic-content div#link-report img::attr(src)') 33 | item_loader.add_css(field_name='author_link', css='h3 span.from a::attr(href)') 34 | item_loader.add_css(field_name='content', css='div.topic-content div#link-report *::text', re=r'\s*(.*)\s*') 35 | item_loader.add_value(field_name='source_url', value=response.url) 36 | item_loader.add_css(field_name='publish_time', css='h3 span:last-child::text', 37 | re=r'\s*(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s*') 38 | 39 | yield item_loader.load_item() 40 | -------------------------------------------------------------------------------- /crawler/house_renting/spiders/a58.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from scrapy import Selector 3 | from scrapy.linkextractors import LinkExtractor 4 | from scrapy.loader import ItemLoader 5 | from scrapy.spiders import Rule 6 | 7 | from house_renting.base_spider import BaseCrawlSpider 8 | from house_renting.items import HouseRenting58Item 9 | 10 | 11 | class A58Spider(BaseCrawlSpider): 12 | name = '58' 13 | allowed_domains = ['58.com'] 14 | 15 | rules = ( 16 | Rule(LinkExtractor(allow=(r'/zufang/(pn\d+/)?', r'/hezu/(pn\d+/)?', r'/chuzu/(pn\d+/)?'), 17 | restrict_css='div.main > div.content > div.listBox > ul.listUl > li'), 18 | follow=True), 19 | Rule(LinkExtractor(allow=(r'/zufang/\d+x\.shtml', r'/hezu/\d+x\.shtml', r'/chuzu/\d+x\.shtml')), 20 | callback='parse_item'), 21 | ) 22 | 23 | def parse_item(self, response): 24 | item_loader = ItemLoader(item=HouseRenting58Item(), selector=response.css('div.main-wrap'), response=response) 25 | item_loader.add_css(field_name='title', css='div.house-title > h1::text') 26 | item_loader.add_value(field_name='source', value=self.name) 27 | item_loader.add_css(field_name='author', css='div.house-basic-info div.house-agent-info p.agent-name > a::text') 28 | item_loader.add_css(field_name='image_urls', css='div.basic-pic-list > ul > li > img::attr(data-src)', 29 | re=r'(.*)\?.*') 30 | item_loader.add_css(field_name='author_link', 31 | css='div.house-basic-info div.house-agent-info p.agent-name > a::attr(href)') 32 | item_loader.add_css(field_name='content', css='ul.introduce-item *::text') 33 | item_loader.add_value(field_name='source_url', value=response.url) 34 | item_loader.add_css(field_name='publish_time', css='p.house-update-info::text') 35 | item_loader.add_css(field_name='price', css='div.house-pay-way *::text') 36 | item_loader.add_css(field_name='detail', css='div.house-desc-item > ul > li > span::text') 37 | 38 | yield item_loader.load_item() 39 | -------------------------------------------------------------------------------- /crawler/house_renting/spiders/lianjia.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | 4 | from scrapy.linkextractors import LinkExtractor 5 | from scrapy.loader import ItemLoader 6 | from scrapy.spiders import Rule 7 | 8 | from house_renting.base_spider import BaseCrawlSpider 9 | from house_renting.items import HouseRentingLianjiaItem 10 | 11 | current_page = 0 12 | 13 | 14 | class LianjiaSpider(BaseCrawlSpider): 15 | name = 'lianjia' 16 | allowed_domains = ['lianjia.com'] 17 | 18 | rules = ( 19 | Rule(LinkExtractor(allow=r'/zufang/(pg\d+/)?$', restrict_css='div.list-wrap > ul > li'), follow=True), 20 | Rule(LinkExtractor(allow=r'/zufang/\w+.html$'), callback='parse_item'), 21 | ) 22 | 23 | def parse_start_url(self, response): 24 | page_data = response.css('div.page-box::attr(page-data)').extract_first() 25 | if page_data is None: 26 | return 27 | 28 | page_data = json.loads(page_data) 29 | total_page = page_data['totalPage'] 30 | if total_page is None: 31 | return 32 | 33 | page_url_pattern = response.css('div.page-box::attr(page-url)').extract_first() 34 | if page_url_pattern is None: 35 | return 36 | 37 | for page in range(0, total_page): 38 | yield response.follow(page_url_pattern.replace('{page}', str(page))) 39 | 40 | def parse_item(self, response): 41 | item_loader = ItemLoader(item=HouseRentingLianjiaItem(), response=response) 42 | 43 | item_loader.add_css(field_name='title', css='div.title *::text') 44 | item_loader.add_value(field_name='source', value=self.name) 45 | item_loader.add_css(field_name='author', css='div.brokerName > a.name::text') 46 | item_loader.add_css(field_name='image_urls', css='div.thumbnail > ul > li > img::attr(src)') 47 | item_loader.add_css(field_name='author_link', css='div.brokerName > a.name::attr(href)') 48 | item_loader.add_css(field_name='content', css='div.introduction *::text', re=r'\s*(.*)\s*') 49 | item_loader.add_value(field_name='source_url', value=response.url) 50 | item_loader.add_css(field_name='publish_time', css='div.zf-room > p::text') 51 | 52 | item_loader.add_css(field_name='price', css='div.price > span.total::text') 53 | item_loader.add_css(field_name='detail', css='div.zf-room *::text') 54 | 55 | yield item_loader.load_item() 56 | -------------------------------------------------------------------------------- /crawler/house_renting/spider_settings/lianjia.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 只需要在这个列表中添加以下 available_cities 中的城市, 如果只需要扒取一个城市也需要使用一个括号包围, 如 (u'广州',) 4 | cities = (u'广州', u'北京') 5 | 6 | available_cities = ( 7 | u'北京', 8 | 9 | u'成都', u'重庆', u'长沙', 10 | 11 | u'大连', u'东莞', 12 | 13 | u'佛山', 14 | 15 | u'广州', 16 | 17 | u'杭州', u'惠州', u'合肥', 18 | 19 | u'济南', 20 | 21 | u'廊坊', 22 | 23 | u'南京', 24 | 25 | u'青岛', 26 | 27 | u'上海', u'深圳', u'苏州', u'石家庄', u'沈阳', 28 | 29 | u'天津', 30 | 31 | u'武汉', u'无锡', 32 | 33 | u'厦门', u'西安', 34 | 35 | u'烟台', 36 | 37 | u'中山', u'珠海', u'郑州', 38 | ) 39 | 40 | available_cities_map = { 41 | # B 42 | u'北京': 'https://bj.lianjia.com/zufang/', u'保亭': None, u'保定': None, 43 | 44 | # C 45 | u'成都': 'https://cd.lianjia.com/zufang/', u'重庆': 'https://cq.lianjia.com/zufang/', 46 | u'长沙': 'https://cs.lianjia.com/zufang/', u'澄迈': None, u'承德': None, u'滁州': None, 47 | 48 | # D 49 | u'大连': 'https://dl.lianjia.com/zufang/', u'东莞': 'https://dg.lianjia.com/zufang/', 50 | u'儋州': None, u'定安': None, u'大理': None, u'德阳': None, 51 | 52 | # F 53 | u'佛山': 'https://fs.lianjia.com/zufang/', 54 | 55 | # G 56 | u'广州': 'https://gz.lianjia.com/zufang/', 57 | 58 | # H 59 | u'杭州': 'https://hz.lianjia.com/zufang/', u'惠州': 'https://hui.lianjia.com/zufang/', 60 | u'海口': None, u'合肥': 'https://hf.lianjia.com/zufang/', u'衡水': None, u'黄冈': None, u'邯郸': None, 61 | 62 | # J 63 | u'济南': 'https://jn.lianjia.com/zufang/', u'嘉兴': None, u'晋中': None, 64 | 65 | # K 66 | u'昆明': None, 67 | 68 | # L 69 | u'陵水': None, u'廊坊': 'https://lf.lianjia.com/zufang/', u'临高': None, u'乐东': None, u'龙岩': None, 70 | u'乐山': None, 71 | 72 | # M 73 | u'眉山': None, 74 | 75 | # N 76 | u'南京': 'https://nj.lianjia.com/zufang/', 77 | 78 | # Q 79 | u'青岛': 'https://qd.lianjia.com/zufang/', u'琼海': None, u'琼中': None, u'泉州': None, u'清远': None, u'秦皇岛': None, 80 | 81 | # S 82 | u'上海': 'https://sh.lianjia.com/zufang/', u'深圳': 'https://sz.lianjia.com/zufang/', 83 | u'苏州': 'https://su.lianjia.com/zufang/', u'石家庄': 'https://sjz.lianjia.com/zufang/', 84 | u'沈阳': 'https://sy.lianjia.com/zufang/', u'三亚': None, u'绍兴': None, 85 | 86 | # T 87 | u'天津': 'https://tj.lianjia.com/zufang/', u'太原': None, 88 | 89 | # W 90 | u'武汉': 'https://wh.lianjia.com/zufang/', u'无锡': 'https://wx.lianjia.com/zufang/', u'文昌': None, u'万宁': None, 91 | u'五指山': None, u'威海': None, 92 | 93 | # X 94 | u'厦门': 'https://xm.lianjia.com/zufang/', u'西安': 'https://xa.lianjia.com/zufang/', u'徐州': None, u'西双版纳': None, 95 | u'咸宁': None, u'邢台': None, 96 | 97 | # Y 98 | u'烟台': 'https://yt.lianjia.com/zufang/', 99 | 100 | # Z 101 | u'中山': 'https://zs.lianjia.com/zufang/', u'珠海': 'https://zh.lianjia.com/zufang/', 102 | u'郑州': 'https://zz.lianjia.com/zufang/', u'镇江': None, u'张家口': None, u'漳州': None, 103 | } 104 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | scrapyd: 5 | build: ./scrapyd 6 | image: house-renting/scrapyd 7 | container_name: scrapyd 8 | networks: 9 | - localhost 10 | ports: 11 | - 6800:6800 12 | depends_on: 13 | - redis 14 | - elastic 15 | volumes: 16 | - ./data:/house-renting/data 17 | entrypoint: /bin/true 18 | 19 | crawler: 20 | build: ./crawler 21 | image: house-renting/crawler 22 | container_name: crawler 23 | networks: 24 | - localhost 25 | volumes: 26 | - ./data:/house-renting/data 27 | - ./data/images:/house-renting/data/images 28 | depends_on: 29 | - redis 30 | - elastic 31 | entrypoint: /bin/true 32 | 33 | douban: 34 | image: house-renting/crawler 35 | container_name: douban 36 | networks: 37 | - localhost 38 | volumes: 39 | - ./data/images:/house-renting/data/images 40 | depends_on: 41 | - crawler 42 | entrypoint: scrapy crawl douban 43 | 44 | "58": 45 | image: house-renting/crawler 46 | container_name: "58" 47 | networks: 48 | - localhost 49 | volumes: 50 | - ./data/images:/house-renting/data/images 51 | depends_on: 52 | - crawler 53 | entrypoint: scrapy crawl 58 54 | 55 | lianjia: 56 | image: house-renting/crawler 57 | container_name: lianjia 58 | networks: 59 | - localhost 60 | volumes: 61 | - ./data/images:/house-renting/data/images 62 | depends_on: 63 | - crawler 64 | entrypoint: scrapy crawl lianjia 65 | 66 | elastic: 67 | image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.2.4 68 | container_name: elasticsearch 69 | environment: 70 | - bootstrap.memory_lock=true 71 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 72 | ulimits: 73 | memlock: 74 | soft: -1 75 | hard: -1 76 | networks: 77 | - localhost 78 | ports: 79 | - 9200:9200 80 | volumes: 81 | - ./data/elastic:/usr/share/elasticsearch/data 82 | 83 | kibana: 84 | image: docker.elastic.co/kibana/kibana-oss:6.2.4 85 | container_name: kibana 86 | networks: 87 | - localhost 88 | ports: 89 | - 5601:5601 90 | environment: 91 | - ELASTICSEARCH_URL=http://elastic:9200 92 | depends_on: 93 | - elastic 94 | 95 | redis: 96 | image: redis 97 | container_name: redis 98 | networks: 99 | - localhost 100 | ports: 101 | - 6379:6379 102 | volumes: 103 | - ./data/redis:/data 104 | 105 | networks: 106 | localhost: 107 | 108 | -------------------------------------------------------------------------------- /crawler/house_renting/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import datetime 4 | import re 5 | import time 6 | 7 | import scrapy 8 | from scrapy.loader.processors import Join, MapCompose, Compose, TakeFirst 9 | 10 | 11 | def filter_title(value): 12 | return value.strip() if value != u'标题:' else None 13 | 14 | 15 | def filter_content(value): 16 | return value if len(value) > 0 else None 17 | 18 | 19 | def filter_image_url(value): 20 | return 'https:'+value if value[:2] == '//' else value 21 | 22 | 23 | class HouseRentingBaseItem(scrapy.Item): 24 | item_id = scrapy.Field() 25 | title = scrapy.Field(input_processor=MapCompose(str.strip, filter_title), 26 | output_processor=Compose(TakeFirst(), str.strip)) 27 | source = scrapy.Field(output_processor=Join()) 28 | author = scrapy.Field(input_processor=MapCompose(str.strip), 29 | output_processor=Compose(Join(), str.strip)) 30 | image_urls = scrapy.Field(input_processor=MapCompose(filter_image_url)) 31 | images = scrapy.Field() 32 | author_link = scrapy.Field(output_processor=Join()) 33 | content = scrapy.Field(input_processor=MapCompose(str.strip, filter_content), 34 | output_processor=Compose(Join(separator=u'\n'))) 35 | source_url = scrapy.Field(output_processor=Join()) 36 | publish_time = scrapy.Field(input_processor=MapCompose(str.strip), 37 | output_processor=Compose(Join(), str.strip)) 38 | 39 | 40 | def publish_time_serializer_douban(value): 41 | return int(time.mktime(datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S").timetuple())) 42 | 43 | 44 | class HouseRentingDoubanItem(HouseRentingBaseItem): 45 | publish_time = scrapy.Field(input_processor=MapCompose(str.strip), 46 | output_processor=Compose(Join(), str.strip, publish_time_serializer_douban)) 47 | 48 | 49 | def publish_time_serializer(value): 50 | minutes_ago = re.compile(u'.*?(\d+)分钟前.*').search(value) 51 | hours_ago = re.compile(u'.*?(\d+)小时前.*').search(value) 52 | days_ago = re.compile(u'.*?(\d+)天前.*').search(value) 53 | date = re.compile(u'.*?(\d+)-(\d+).*').search(value) 54 | 55 | if minutes_ago: 56 | publish_time = datetime.datetime.today() - datetime.timedelta(minutes=int(minutes_ago.group(1))) 57 | elif hours_ago: 58 | publish_time = datetime.datetime.today() - datetime.timedelta(hours=int(hours_ago.group(1))) 59 | elif days_ago: 60 | publish_time = datetime.datetime.today() - datetime.timedelta(days=int(days_ago.group(1))) 61 | else: 62 | publish_time = datetime.datetime.today().replace(month=int(date.group(1)), day=int(date.group(2))) 63 | 64 | if publish_time is not None: 65 | return int(time.mktime(publish_time.timetuple())) 66 | 67 | 68 | def price_serializer_58(value): 69 | price = re.compile(u'\s*(\d+)\s*元/月.*').search(value) 70 | if price: 71 | return int(price.group(1)) 72 | return None 73 | 74 | 75 | class HouseRenting58Item(HouseRentingBaseItem): 76 | publish_time = scrapy.Field(input_processor=MapCompose(str.strip), 77 | output_processor=Compose(Join(), str.strip, publish_time_serializer)) 78 | price = scrapy.Field(input_processor=MapCompose(str.strip), 79 | output_processor=Compose(Join(), str.strip, price_serializer_58)) 80 | detail = scrapy.Field(input_processor=MapCompose(str.strip), 81 | output_processor=Compose(Join(), str.strip)) 82 | 83 | 84 | class HouseRentingLianjiaItem(HouseRentingBaseItem): 85 | publish_time = scrapy.Field(input_processor=MapCompose(str.strip), 86 | output_processor=Compose(Join(), str.strip, publish_time_serializer)) 87 | price = scrapy.Field(input_processor=MapCompose(str.strip), output_processor=Compose(Join(), str.strip)) 88 | detail = scrapy.Field(input_processor=MapCompose(str.strip), output_processor=Compose(Join(), str.strip)) 89 | -------------------------------------------------------------------------------- /crawler/house_renting/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from house_renting.spider_settings import lianjia, a58 3 | 4 | BOT_NAME = 'house_renting' 5 | 6 | COMMANDS_MODULE = 'house_renting.commands' 7 | SPIDER_MODULES = ['house_renting.spiders'] 8 | NEWSPIDER_MODULE = 'house_renting.spiders' 9 | 10 | USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1 ' \ 11 | 'Safari/605.1.15 ' 12 | 13 | USER_AGENTS = ( 14 | 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 15 | 16 | 17 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; ' 18 | '.NET CLR 3.0.04506)', 19 | 20 | 'Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR ' 21 | '2.0.50727)', 22 | 23 | 'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)', 24 | 25 | 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR ' 26 | '3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)', 27 | 28 | 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; ' 29 | '.NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)', 30 | 31 | 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR ' 32 | '3.0.04506.30)', 33 | 34 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (' 35 | 'Change: 287 c9dfb30)', 36 | 37 | 'Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6', 38 | 39 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1', 40 | 41 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0', 42 | 43 | 'Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5', 44 | 45 | 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6', 46 | 47 | 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', 48 | 49 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 ' 50 | 'Safari/535.20', 51 | 52 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1 ' 53 | 'Safari/605.1.15', 54 | 55 | 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52', 56 | 57 | ) 58 | 59 | ROBOTSTXT_OBEY = False 60 | 61 | DOWNLOAD_DELAY = 10 62 | 63 | CONCURRENT_REQUESTS_PER_DOMAIN = 1 64 | 65 | COOKIES_ENABLED = False 66 | 67 | TELNETCONSOLE_ENABLED = False 68 | 69 | DEFAULT_REQUEST_HEADERS = { 70 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 71 | 'Accept-Language': 'en', 72 | } 73 | 74 | SPIDER_MIDDLEWARES = { 75 | } 76 | 77 | DOWNLOADER_MIDDLEWARES = { 78 | 'house_renting.middlewares.HouseRentingAgentMiddleware': 100, 79 | 'house_renting.middlewares.HouseRentingProxyMiddleware': 200, 80 | 'house_renting.middlewares.HouseRentingRetryMiddleware': 300, 81 | 'scrapy.downloadermiddlewares.retry.RetryMiddleware': None, 82 | 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None, 83 | } 84 | 85 | ITEM_PIPELINES = { 86 | 'house_renting.pipelines.HouseRentingPipeline': 100, 87 | 'house_renting.pipelines.DuplicatesPipeline': 200, 88 | 'scrapy.pipelines.images.ImagesPipeline': 300, 89 | 'house_renting.pipelines.ESPipeline': 400, 90 | } 91 | 92 | IMAGES_STORE = '/house-renting/data/images' 93 | 94 | MEDIA_ALLOW_REDIRECTS = True 95 | 96 | # Enable and configure the AutoThrottle extension (disabled by default) 97 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 98 | AUTOTHROTTLE_ENABLED = True 99 | # The initial download delay 100 | AUTOTHROTTLE_START_DELAY = 10 101 | # The maximum download delay to be set in case of high latencies 102 | AUTOTHROTTLE_MAX_DELAY = 10 103 | # The average number of requests Scrapy should be sending in parallel to 104 | # each remote server 105 | AUTOTHROTTLE_TARGET_CONCURRENCY = 2.0 106 | # Enable showing throttling stats for every response received: 107 | AUTOTHROTTLE_DEBUG = True 108 | 109 | DOWNLOAD_TIMEOUT = 30 110 | RETRY_TIMES = 3 111 | 112 | LOG_LEVEL = 'INFO' 113 | 114 | SPIDER_SETTINGS = { 115 | 'lianjia': { 116 | 'cities': lianjia.cities, 117 | 'available_cities': lianjia.available_cities, 118 | 'available_cities_map': lianjia.available_cities_map, 119 | }, 120 | '58': { 121 | 'cities': a58.cities, 122 | 'available_cities': a58.available_cities, 123 | 'available_cities_map': a58.available_cities_map, 124 | }, 125 | } 126 | 127 | # ES 节点, 可以配置多个节点(集群), 默认为 None, 不会存储到 ES 128 | ELASTIC_HOSTS = [ 129 | {'host': 'elastic', 'port': 9200}, 130 | ] 131 | 132 | REDIS_HOST = 'redis' # 默认为 None, 不会去重 133 | REDIS_PORT = 6379 # 默认 6379 134 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /crawler/house_renting/spider_settings/a58.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 只需要在这个列表中添加以下 available_cities 中的城市, 如果只需要扒取一个城市也需要使用一个括号包围, 如 (u'广州',) 4 | cities = (u'北京', u'广州',) 5 | 6 | # 以下注释的字母表示的是省份名字的拼音首字母 7 | available_cities = ( 8 | # A 9 | u'合肥', 10 | u'芜湖', 11 | u'蚌埠', 12 | u'阜阳', 13 | u'淮南', 14 | u'安庆', 15 | u'宿州', 16 | u'六安', 17 | u'淮北', 18 | u'滁州', 19 | u'马鞍山', 20 | u'铜陵', 21 | u'宣城', 22 | u'亳州', 23 | u'黄山', 24 | u'池州', 25 | u'巢湖', 26 | u'和县', 27 | u'霍邱', 28 | u'桐城', 29 | u'宁国', 30 | u'天长', 31 | u'东至', 32 | u'无为', 33 | 34 | # B 35 | u'北京', 36 | 37 | # F 38 | u'福州', 39 | u'厦门', 40 | u'泉州', 41 | u'莆田', 42 | u'漳州', 43 | u'宁德', 44 | u'三明', 45 | u'南平', 46 | u'龙岩', 47 | u'武夷山', 48 | u'石狮', 49 | u'晋江', 50 | u'南安', 51 | u'龙海', 52 | u'上杭', 53 | u'福安', 54 | u'福鼎', 55 | u'安溪', 56 | u'永春', 57 | u'永安', 58 | u'漳浦', 59 | 60 | # G 61 | u'深圳', 62 | u'广州', 63 | u'东莞', 64 | u'佛山', 65 | u'中山', 66 | u'珠海', 67 | u'惠州', 68 | u'江门', 69 | u'汕头', 70 | u'湛江', 71 | u'肇庆', 72 | u'茂名', 73 | u'揭阳', 74 | u'梅州', 75 | u'清远', 76 | u'阳江', 77 | u'韶关', 78 | u'河源', 79 | u'云浮', 80 | u'汕尾', 81 | u'潮州', 82 | u'台山', 83 | u'阳春', 84 | u'顺德', 85 | u'惠东', 86 | u'博罗', 87 | u'海丰', 88 | u'开平', 89 | u'陆丰', 90 | 91 | u'南宁', 92 | u'柳州', 93 | u'桂林', 94 | u'玉林', 95 | u'梧州', 96 | u'北海', 97 | u'贵港', 98 | u'钦州', 99 | u'百色', 100 | u'河池', 101 | u'来宾', 102 | u'贺州', 103 | u'防城港', 104 | u'崇左', 105 | u'桂平', 106 | u'北流', 107 | u'博白', 108 | u'岑溪', 109 | 110 | u'贵阳', 111 | u'遵义', 112 | u'黔东南', 113 | u'黔南', 114 | u'六盘水', 115 | u'毕节', 116 | u'铜仁', 117 | u'安顺', 118 | u'黔西南', 119 | u'仁怀', 120 | u'清镇', 121 | 122 | u'兰州', 123 | u'天水', 124 | u'白银', 125 | u'庆阳', 126 | u'平凉', 127 | u'酒泉', 128 | u'张掖', 129 | u'武威', 130 | u'定西', 131 | u'金昌', 132 | u'陇南', 133 | u'临夏', 134 | u'嘉峪关', 135 | u'甘南', 136 | u'敦煌', 137 | 138 | # H 139 | 140 | u'海口', 141 | u'三亚', 142 | u'五指山', 143 | u'三沙', 144 | u'琼海', 145 | u'文昌', 146 | u'万宁', 147 | u'屯昌', 148 | u'琼中', 149 | u'陵水', 150 | u'东方', 151 | u'定安', 152 | u'澄迈', 153 | u'保亭', 154 | u'白沙', 155 | u'儋州', 156 | 157 | u'郑州', 158 | u'洛阳', 159 | u'新乡', 160 | u'南阳', 161 | u'许昌', 162 | u'平顶山', 163 | u'安阳', 164 | u'焦作', 165 | u'商丘', 166 | u'开封', 167 | u'濮阳', 168 | u'周口', 169 | u'信阳', 170 | u'驻马店', 171 | u'漯河', 172 | u'三门峡', 173 | u'鹤壁', 174 | u'济源', 175 | u'明港', 176 | u'鄢陵', 177 | u'禹州', 178 | u'长葛', 179 | u'灵宝', 180 | u'杞县', 181 | u'汝州', 182 | u'项城', 183 | u'偃师', 184 | u'长垣', 185 | u'滑县', 186 | u'林州', 187 | u'沁阳', 188 | u'孟州', 189 | u'温县', 190 | u'尉氏', 191 | u'兰考', 192 | u'通许', 193 | u'新安', 194 | u'伊川', 195 | u'孟津', 196 | u'宜阳', 197 | u'舞钢', 198 | u'永城', 199 | u'睢县', 200 | u'鹿邑', 201 | u'渑池', 202 | u'沈丘', 203 | u'太康', 204 | u'商水', 205 | u'淇县', 206 | u'浚县', 207 | u'范县', 208 | u'固始', 209 | u'淮滨', 210 | u'邓州', 211 | u'新野', 212 | 213 | u'哈尔滨', 214 | u'大庆', 215 | u'齐齐哈尔', 216 | u'牡丹江', 217 | u'绥化', 218 | u'佳木斯', 219 | u'鸡西', 220 | u'双鸭山', 221 | u'鹤岗', 222 | u'黑河', 223 | u'伊春', 224 | u'七台河', 225 | u'大兴安岭', 226 | u'安达', 227 | u'肇东', 228 | u'肇州', 229 | 230 | u'武汉', 231 | u'宜昌', 232 | u'襄阳', 233 | u'荆州', 234 | u'十堰', 235 | u'黄石', 236 | u'孝感', 237 | u'黄冈', 238 | u'恩施', 239 | u'荆门', 240 | u'咸宁', 241 | u'鄂州', 242 | u'随州', 243 | u'潜江', 244 | u'天门', 245 | u'仙桃', 246 | u'神农架', 247 | u'宜都', 248 | u'汉川', 249 | u'枣阳', 250 | u'武穴', 251 | u'钟祥', 252 | u'京山', 253 | u'沙洋', 254 | u'松滋', 255 | u'广水', 256 | u'赤壁', 257 | u'老河口', 258 | u'谷城', 259 | u'宜城', 260 | u'南漳', 261 | u'云梦', 262 | u'安陆', 263 | u'大悟', 264 | u'孝昌', 265 | u'当阳', 266 | u'枝江', 267 | u'嘉鱼', 268 | u'随县', 269 | 270 | u'长沙', 271 | u'株洲', 272 | u'益阳', 273 | u'常德', 274 | u'衡阳', 275 | u'湘潭', 276 | u'岳阳', 277 | u'郴州', 278 | u'邵阳', 279 | u'怀化', 280 | u'永州', 281 | u'娄底', 282 | u'湘西', 283 | u'张家界', 284 | u'醴陵', 285 | u'澧县', 286 | u'桂阳', 287 | u'资兴', 288 | u'永兴', 289 | u'常宁', 290 | u'祁东', 291 | u'衡东', 292 | u'冷水江', 293 | u'涟源', 294 | u'双峰', 295 | u'邵阳县', 296 | u'邵东', 297 | u'沅江', 298 | u'南县', 299 | u'祁阳', 300 | u'湘阴', 301 | u'华容', 302 | u'慈利', 303 | u'攸县', 304 | 305 | u'石家庄', 306 | u'保定', 307 | u'唐山', 308 | u'廊坊', 309 | u'邯郸', 310 | u'秦皇岛', 311 | u'沧州', 312 | u'邢台', 313 | u'衡水', 314 | u'张家口', 315 | u'承德', 316 | u'定州', 317 | u'馆陶', 318 | u'张北', 319 | u'赵县', 320 | u'正定', 321 | u'迁安市', 322 | u'任丘', 323 | u'三河', 324 | u'武安', 325 | u'雄安新区', 326 | u'燕郊', 327 | u'涿州', 328 | u'河间', 329 | u'黄骅', 330 | u'沧县', 331 | u'磁县', 332 | u'涉县', 333 | u'霸州', 334 | u'香河', 335 | u'固安', 336 | u'遵化市', 337 | u'迁西', 338 | u'玉田', 339 | u'滦南', 340 | u'沙河', 341 | # J 342 | 343 | u'苏州', 344 | u'南京', 345 | u'无锡', 346 | u'常州', 347 | u'徐州', 348 | u'南通', 349 | u'扬州', 350 | u'盐城', 351 | u'淮安', 352 | u'连云港', 353 | u'泰州', 354 | u'宿迁', 355 | u'镇江', 356 | u'沭阳', 357 | u'大丰', 358 | u'如皋', 359 | u'启东', 360 | u'溧阳', 361 | u'海门', 362 | u'东海', 363 | u'扬中', 364 | u'兴化', 365 | u'新沂', 366 | u'泰兴', 367 | u'如东', 368 | u'邳州', 369 | u'沛县', 370 | u'靖江', 371 | u'建湖', 372 | u'海安', 373 | u'东台', 374 | u'丹阳', 375 | u'宝应县', 376 | u'灌南', 377 | u'灌云', 378 | u'姜堰', 379 | u'金坛', 380 | u'昆山', 381 | u'泗洪', 382 | u'泗阳', 383 | u'句容', 384 | u'射阳', 385 | u'阜宁', 386 | u'响水', 387 | u'盱眙', 388 | u'金湖', 389 | 390 | u'南昌', 391 | u'赣州', 392 | u'九江', 393 | u'宜春', 394 | u'吉安', 395 | u'上饶', 396 | u'萍乡', 397 | u'抚州', 398 | u'景德镇', 399 | u'新余', 400 | u'鹰潭', 401 | u'永新', 402 | u'乐平', 403 | u'进贤', 404 | u'分宜', 405 | u'丰城', 406 | u'樟树', 407 | u'高安', 408 | u'余江', 409 | u'南城', 410 | u'浮梁', 411 | 412 | u'长春', 413 | u'吉林', 414 | u'四平', 415 | u'延边', 416 | u'松原', 417 | u'白城', 418 | u'通化', 419 | u'白山', 420 | u'辽源', 421 | u'公主岭', 422 | u'梅河口', 423 | u'扶余', 424 | u'长岭', 425 | u'桦甸', 426 | u'磐石', 427 | u'梨树县', 428 | # L 429 | 430 | u'沈阳', 431 | u'大连', 432 | u'鞍山', 433 | u'锦州', 434 | u'抚顺', 435 | u'营口', 436 | u'盘锦', 437 | u'朝阳', 438 | u'丹东', 439 | u'辽阳', 440 | u'本溪', 441 | u'葫芦岛', 442 | u'铁岭', 443 | u'阜新', 444 | u'庄河', 445 | u'瓦房店', 446 | u'灯塔', 447 | u'凤城', 448 | u'北票', 449 | u'开原', 450 | # N 451 | 452 | u'银川', 453 | u'吴忠', 454 | u'石嘴山', 455 | u'中卫', 456 | u'固原', 457 | 458 | u'呼和浩特', 459 | u'包头', 460 | u'赤峰', 461 | u'鄂尔多斯', 462 | u'通辽', 463 | u'呼伦贝尔', 464 | u'巴彦淖尔市', 465 | u'乌兰察布', 466 | u'锡林郭勒', 467 | u'兴安盟', 468 | u'乌海', 469 | u'阿拉善盟', 470 | u'海拉尔', 471 | # Q 472 | 473 | u'西宁', 474 | u'海西', 475 | u'海北', 476 | u'果洛', 477 | u'海东', 478 | u'黄南', 479 | u'玉树', 480 | u'海南', 481 | u'格尔木', 482 | # S 483 | 484 | u'青岛', 485 | u'济南', 486 | u'烟台', 487 | u'潍坊', 488 | u'临沂', 489 | u'淄博', 490 | u'济宁', 491 | u'泰安', 492 | u'聊城', 493 | u'威海', 494 | u'枣庄', 495 | u'德州', 496 | u'日照', 497 | u'东营', 498 | u'菏泽', 499 | u'滨州', 500 | u'莱芜', 501 | u'章丘', 502 | u'垦利', 503 | u'诸城', 504 | u'寿光', 505 | u'龙口', 506 | u'曹县', 507 | u'单县', 508 | u'肥城', 509 | u'高密', 510 | u'广饶', 511 | u'桓台', 512 | u'莒县', 513 | u'莱州', 514 | u'蓬莱', 515 | u'青州', 516 | u'荣成', 517 | u'乳山', 518 | u'滕州', 519 | u'新泰', 520 | u'招远', 521 | u'邹城', 522 | u'邹平', 523 | u'临清', 524 | u'茌平', 525 | u'郓城', 526 | u'博兴', 527 | u'东明', 528 | u'巨野', 529 | u'无棣', 530 | u'齐河', 531 | u'微山', 532 | u'禹城', 533 | u'临邑', 534 | u'乐陵', 535 | u'莱阳', 536 | u'宁津', 537 | u'高唐', 538 | u'莘县', 539 | u'阳谷', 540 | u'冠县', 541 | u'平邑', 542 | u'郯城', 543 | u'沂源', 544 | u'汶上', 545 | u'梁山', 546 | u'利津', 547 | u'沂南', 548 | u'栖霞', 549 | u'宁阳', 550 | u'东平', 551 | u'昌邑', 552 | u'安丘', 553 | u'昌乐', 554 | u'临朐', 555 | u'鄄城', 556 | 557 | u'太原', 558 | u'临汾', 559 | u'大同', 560 | u'运城', 561 | u'晋中', 562 | u'长治', 563 | u'晋城', 564 | u'阳泉', 565 | u'吕梁', 566 | u'忻州', 567 | u'朔州', 568 | u'临猗', 569 | u'清徐', 570 | u'柳林', 571 | u'高平', 572 | u'泽州', 573 | u'襄垣', 574 | u'孝义', 575 | 576 | u'西安', 577 | u'咸阳', 578 | u'宝鸡', 579 | u'渭南', 580 | u'汉中', 581 | u'榆林', 582 | u'延安', 583 | u'安康', 584 | u'商洛', 585 | u'铜川', 586 | u'神木', 587 | u'韩城', 588 | u'府谷', 589 | u'靖边', 590 | u'定边', 591 | 592 | u'成都', 593 | u'绵阳', 594 | u'德阳', 595 | u'南充', 596 | u'宜宾', 597 | u'自贡', 598 | u'乐山', 599 | u'泸州', 600 | u'达州', 601 | u'内江', 602 | u'遂宁', 603 | u'攀枝花', 604 | u'眉山', 605 | u'广安', 606 | u'资阳', 607 | u'凉山', 608 | u'广元', 609 | u'雅安', 610 | u'巴中', 611 | u'阿坝', 612 | u'甘孜', 613 | u'安岳', 614 | u'广汉', 615 | u'简阳', 616 | u'仁寿', 617 | u'射洪', 618 | u'大竹', 619 | u'宣汉', 620 | u'渠县', 621 | u'长宁', 622 | # X 623 | 624 | u'乌鲁木齐', 625 | u'昌吉', 626 | u'巴音郭楞', 627 | u'伊犁', 628 | u'阿克苏', 629 | u'喀什', 630 | u'哈密', 631 | u'克拉玛依', 632 | u'博尔塔拉', 633 | u'吐鲁番', 634 | u'和田', 635 | u'石河子', 636 | u'克孜勒苏', 637 | u'阿拉尔', 638 | u'五家渠', 639 | u'图木舒克', 640 | u'库尔勒', 641 | u'阿勒泰', 642 | u'塔城', 643 | 644 | u'拉萨', 645 | u'日喀则', 646 | u'山南', 647 | u'林芝', 648 | u'昌都', 649 | u'那曲', 650 | u'阿里', 651 | u'日土', 652 | u'改则', 653 | # Y 654 | 655 | u'昆明', 656 | u'曲靖', 657 | u'大理', 658 | u'红河', 659 | u'玉溪', 660 | u'丽江', 661 | u'文山', 662 | u'楚雄', 663 | u'西双版纳', 664 | u'昭通', 665 | u'德宏', 666 | u'普洱', 667 | u'保山', 668 | u'临沧', 669 | u'迪庆', 670 | u'怒江', 671 | u'弥勒', 672 | u'安宁', 673 | u'宣威', 674 | # Z 675 | 676 | u'杭州', 677 | u'宁波', 678 | u'温州', 679 | u'金华', 680 | u'嘉兴', 681 | u'台州', 682 | u'绍兴', 683 | u'湖州', 684 | u'丽水', 685 | u'衢州', 686 | u'舟山', 687 | u'乐清', 688 | u'瑞安', 689 | u'义乌', 690 | u'余姚', 691 | u'诸暨', 692 | u'象山', 693 | u'温岭', 694 | u'桐乡', 695 | u'慈溪', 696 | u'长兴', 697 | u'嘉善', 698 | u'海宁', 699 | u'德清', 700 | u'东阳', 701 | u'安吉', 702 | u'苍南', 703 | u'临海', 704 | u'永康', 705 | u'玉环', 706 | u'平湖', 707 | u'海盐', 708 | u'武义县', 709 | u'嵊州', 710 | u'新昌', 711 | u'江山', 712 | u'平阳', 713 | 714 | # 其他 715 | u'香港', 716 | u'澳门', 717 | u'台湾', 718 | u'全国', 719 | u'其他', 720 | 721 | # 海外 722 | u'洛杉矶', 723 | u'旧金山', 724 | u'纽约', 725 | u'多伦多', 726 | u'温哥华', 727 | u'伦敦', 728 | u'莫斯科', 729 | u'首尔', 730 | u'东京', 731 | u'新加坡', 732 | u'曼谷', 733 | u'清迈', 734 | u'迪拜', 735 | u'奥克兰', 736 | u'悉尼', 737 | u'墨尔本', 738 | u'其他海外城市', 739 | 740 | ) 741 | 742 | available_cities_map = { 743 | # A 744 | u'合肥': 'http://hf.58.com/chuzu/', 745 | u'芜湖': 'http://wuhu.58.com/chuzu/', 746 | u'蚌埠': 'http://bengbu.58.com/chuzu/', 747 | u'阜阳': 'http://fy.58.com/chuzu/', 748 | u'淮南': 'http://hn.58.com/chuzu/', 749 | u'安庆': 'http://anqing.58.com/chuzu/', 750 | u'宿州': 'http://suzhou.58.com/chuzu/', 751 | u'六安': 'http://la.58.com/chuzu/', 752 | u'淮北': 'http://huaibei.58.com/chuzu/', 753 | u'滁州': 'http://chuzhou.58.com/chuzu/', 754 | u'马鞍山': 'http://mas.58.com/chuzu/', 755 | u'铜陵': 'http://tongling.58.com/chuzu/', 756 | u'宣城': 'http://xuancheng.58.com/chuzu/', 757 | u'亳州': 'http://bozhou.58.com/chuzu/', 758 | u'黄山': 'http://huangshan.58.com/chuzu/', 759 | u'池州': 'http://chizhou.58.com/chuzu/', 760 | u'巢湖': 'http://ch.58.com/chuzu/', 761 | u'和县': 'http://hexian.58.com/chuzu/', 762 | u'霍邱': 'http://hq.58.com/chuzu/', 763 | u'桐城': 'http://tongcheng.58.com/chuzu/', 764 | u'宁国': 'http://ningguo.58.com/chuzu/', 765 | u'天长': 'http://tianchang.58.com/chuzu/', 766 | u'东至': 'http://dongzhi.58.com/chuzu/', 767 | u'无为': 'http://wuweixian.58.com/chuzu/', 768 | 769 | # B 770 | u'北京': 'http://bj.58.com/chuzu/', 771 | 772 | # F 773 | u'福州': 'http://fz.58.com/chuzu/', 774 | u'厦门': 'http://xm.58.com/chuzu/', 775 | u'泉州': 'http://qz.58.com/chuzu/', 776 | u'莆田': 'http://pt.58.com/chuzu/', 777 | u'漳州': 'http://zhangzhou.58.com/chuzu/', 778 | u'宁德': 'http://nd.58.com/chuzu/', 779 | u'三明': 'http://sm.58.com/chuzu/', 780 | u'南平': 'http://np.58.com/chuzu/', 781 | u'龙岩': 'http://ly.58.com/chuzu/', 782 | u'武夷山': 'http://wuyishan.58.com/chuzu/', 783 | u'石狮': 'http://shishi.58.com/chuzu/', 784 | u'晋江': 'http://jinjiangshi.58.com/chuzu/', 785 | u'南安': 'http://nananshi.58.com/chuzu/', 786 | u'龙海': 'http://longhai.58.com/chuzu/', 787 | u'上杭': 'http://shanghangxian.58.com/chuzu/', 788 | u'福安': 'http://fuanshi.58.com/chuzu/', 789 | u'福鼎': 'http://fudingshi.58.com/chuzu/', 790 | u'安溪': 'http://anxixian.58.com/chuzu/', 791 | u'永春': 'http://yongchunxian.58.com/chuzu/', 792 | u'永安': 'http://yongan.58.com/chuzu/', 793 | u'漳浦': 'http://zhangpu.58.com/chuzu/', 794 | 795 | # G 796 | u'深圳': 'http://sz.58.com/chuzu/', 797 | u'广州': 'http://gz.58.com/chuzu/', 798 | u'东莞': 'http://dg.58.com/chuzu/', 799 | u'佛山': 'http://fs.58.com/chuzu/', 800 | u'中山': 'http://zs.58.com/chuzu/', 801 | u'珠海': 'http://zh.58.com/chuzu/', 802 | u'惠州': 'http://huizhou.58.com/chuzu/', 803 | u'江门': 'http://jm.58.com/chuzu/', 804 | u'汕头': 'http://st.58.com/chuzu/', 805 | u'湛江': 'http://zhanjiang.58.com/chuzu/', 806 | u'肇庆': 'http://zq.58.com/chuzu/', 807 | u'茂名': 'http://mm.58.com/chuzu/', 808 | u'揭阳': 'http://jy.58.com/chuzu/', 809 | u'梅州': 'http://mz.58.com/chuzu/', 810 | u'清远': 'http://qingyuan.58.com/chuzu/', 811 | u'阳江': 'http://yj.58.com/chuzu/', 812 | u'韶关': 'http://sg.58.com/chuzu/', 813 | u'河源': 'http://heyuan.58.com/chuzu/', 814 | u'云浮': 'http://yf.58.com/chuzu/', 815 | u'汕尾': 'http://sw.58.com/chuzu/', 816 | u'潮州': 'http://chaozhou.58.com/chuzu/', 817 | u'台山': 'http://taishan.58.com/chuzu/', 818 | u'阳春': 'http://yangchun.58.com/chuzu/', 819 | u'顺德': 'http://sd.58.com/chuzu/', 820 | u'惠东': 'http://huidong.58.com/chuzu/', 821 | u'博罗': 'http://boluo.58.com/chuzu/', 822 | u'海丰': 'http://haifengxian.58.com/chuzu/', 823 | u'开平': 'http://kaipingshi.58.com/chuzu/', 824 | u'陆丰': 'http://lufengshi.58.com/chuzu/', 825 | 826 | u'南宁': 'http://nn.58.com/chuzu/', 827 | u'柳州': 'http://liuzhou.58.com/chuzu/', 828 | u'桂林': 'http://gl.58.com/chuzu/', 829 | u'玉林': 'http://yulin.58.com/chuzu/', 830 | u'梧州': 'http://wuzhou.58.com/chuzu/', 831 | u'北海': 'http://bh.58.com/chuzu/', 832 | u'贵港': 'http://gg.58.com/chuzu/', 833 | u'钦州': 'http://qinzhou.58.com/chuzu/', 834 | u'百色': 'http://baise.58.com/chuzu/', 835 | u'河池': 'http://hc.58.com/chuzu/', 836 | u'来宾': 'http://lb.58.com/chuzu/', 837 | u'贺州': 'http://hezhou.58.com/chuzu/', 838 | u'防城港': 'http://fcg.58.com/chuzu/', 839 | u'崇左': 'http://chongzuo.58.com/chuzu/', 840 | u'桂平': 'http://guipingqu.58.com/chuzu/', 841 | u'北流': 'http://beiliushi.58.com/chuzu/', 842 | u'博白': 'http://bobaixian.58.com/chuzu/', 843 | u'岑溪': 'http://cenxi.58.com/chuzu/', 844 | 845 | u'贵阳': 'http://gy.58.com/chuzu/', 846 | u'遵义': 'http://zunyi.58.com/chuzu/', 847 | u'黔东南': 'http://qdn.58.com/chuzu/', 848 | u'黔南': 'http://qn.58.com/chuzu/', 849 | u'六盘水': 'http://lps.58.com/chuzu/', 850 | u'毕节': 'http://bijie.58.com/chuzu/', 851 | u'铜仁': 'http://tr.58.com/chuzu/', 852 | u'安顺': 'http://anshun.58.com/chuzu/', 853 | u'黔西南': 'http://qxn.58.com/chuzu/', 854 | u'仁怀': 'http://renhuaishi.58.com/chuzu/', 855 | u'清镇': 'http://qingzhen.58.com/chuzu/', 856 | 857 | u'兰州': 'http://lz.58.com/chuzu/', 858 | u'天水': 'http://tianshui.58.com/chuzu/', 859 | u'白银': 'http://by.58.com/chuzu/', 860 | u'庆阳': 'http://qingyang.58.com/chuzu/', 861 | u'平凉': 'http://pl.58.com/chuzu/', 862 | u'酒泉': 'http://jq.58.com/chuzu/', 863 | u'张掖': 'http://zhangye.58.com/chuzu/', 864 | u'武威': 'http://wuwei.58.com/chuzu/', 865 | u'定西': 'http://dx.58.com/chuzu/', 866 | u'金昌': 'http://jinchang.58.com/chuzu/', 867 | u'陇南': 'http://ln.58.com/chuzu/', 868 | u'临夏': 'http://linxia.58.com/chuzu/', 869 | u'嘉峪关': 'http://jyg.58.com/chuzu/', 870 | u'甘南': 'http://gn.58.com/chuzu/', 871 | u'敦煌': 'http://dunhuang.58.com/chuzu/', 872 | 873 | # H 874 | 875 | u'海口': 'http://haikou.58.com/chuzu/', 876 | u'三亚': 'http://sanya.58.com/chuzu/', 877 | u'五指山': 'http://wzs.58.com/chuzu/', 878 | u'三沙': 'http://sansha.58.com/chuzu/', 879 | u'琼海': 'http://qh.58.com/chuzu/', 880 | u'文昌': 'http://wenchang.58.com/chuzu/', 881 | u'万宁': 'http://wanning.58.com/chuzu/', 882 | u'屯昌': 'http://tunchang.58.com/chuzu/', 883 | u'琼中': 'http://qiongzhong.58.com/chuzu/', 884 | u'陵水': 'http://lingshui.58.com/chuzu/', 885 | u'东方': 'http://df.58.com/chuzu/', 886 | u'定安': 'http://da.58.com/chuzu/', 887 | u'澄迈': 'http://cm.58.com/chuzu/', 888 | u'保亭': 'http://baoting.58.com/chuzu/', 889 | u'白沙': 'http://baish.58.com/chuzu/', 890 | u'儋州': 'http://danzhou.58.com/chuzu/', 891 | 892 | u'郑州': 'http://zz.58.com/chuzu/', 893 | u'洛阳': 'http://luoyang.58.com/chuzu/', 894 | u'新乡': 'http://xx.58.com/chuzu/', 895 | u'南阳': 'http://ny.58.com/chuzu/', 896 | u'许昌': 'http://xc.58.com/chuzu/', 897 | u'平顶山': 'http://pds.58.com/chuzu/', 898 | u'安阳': 'http://ay.58.com/chuzu/', 899 | u'焦作': 'http://jiaozuo.58.com/chuzu/', 900 | u'商丘': 'http://sq.58.com/chuzu/', 901 | u'开封': 'http://kaifeng.58.com/chuzu/', 902 | u'濮阳': 'http://puyang.58.com/chuzu/', 903 | u'周口': 'http://zk.58.com/chuzu/', 904 | u'信阳': 'http://xy.58.com/chuzu/', 905 | u'驻马店': 'http://zmd.58.com/chuzu/', 906 | u'漯河': 'http://luohe.58.com/chuzu/', 907 | u'三门峡': 'http://smx.58.com/chuzu/', 908 | u'鹤壁': 'http://hb.58.com/chuzu/', 909 | u'济源': 'http://jiyuan.58.com/chuzu/', 910 | u'明港': 'http://mg.58.com/chuzu/', 911 | u'鄢陵': 'http://yanling.58.com/chuzu/', 912 | u'禹州': 'http://yuzhou.58.com/chuzu/', 913 | u'长葛': 'http://changge.58.com/chuzu/', 914 | u'灵宝': 'http://lingbaoshi.58.com/chuzu/', 915 | u'杞县': 'http://qixianqu.58.com/chuzu/', 916 | u'汝州': 'http://ruzhou.58.com/chuzu/', 917 | u'项城': 'http://xiangchengshi.58.com/chuzu/', 918 | u'偃师': 'http://yanshiqu.58.com/chuzu/', 919 | u'长垣': 'http://changyuan.58.com/chuzu/', 920 | u'滑县': 'http://huaxian.58.com/chuzu/', 921 | u'林州': 'http://linzhou.58.com/chuzu/', 922 | u'沁阳': 'http://qinyang.58.com/chuzu/', 923 | u'孟州': 'http://mengzhou.58.com/chuzu/', 924 | u'温县': 'http://wenxian.58.com/chuzu/', 925 | u'尉氏': 'http://weishixian.58.com/chuzu/', 926 | u'兰考': 'http://lankaoxian.58.com/chuzu/', 927 | u'通许': 'http://tongxuxian.58.com/chuzu/', 928 | u'新安': 'http://lyxinan.58.com/chuzu/', 929 | u'伊川': 'http://yichuan.58.com/chuzu/', 930 | u'孟津': 'http://mengjinqu.58.com/chuzu/', 931 | u'宜阳': 'http://lyyiyang.58.com/chuzu/', 932 | u'舞钢': 'http://wugang.58.com/chuzu/', 933 | u'永城': 'http://yongcheng.58.com/chuzu/', 934 | u'睢县': 'http://suixian.58.com/chuzu/', 935 | u'鹿邑': 'http://luyi.58.com/chuzu/', 936 | u'渑池': 'http://yingchixian.58.com/chuzu/', 937 | u'沈丘': 'http://shenqiu.58.com/chuzu/', 938 | u'太康': 'http://taikang.58.com/chuzu/', 939 | u'商水': 'http://shangshui.58.com/chuzu/', 940 | u'淇县': 'http://qixianq.58.com/chuzu/', 941 | u'浚县': 'http://junxian.58.com/chuzu/', 942 | u'范县': 'http://fanxian.58.com/chuzu/', 943 | u'固始': 'http://gushixian.58.com/chuzu/', 944 | u'淮滨': 'http://huaibinxian.58.com/chuzu/', 945 | u'邓州': 'http://dengzhou.58.com/chuzu/', 946 | u'新野': 'http://xinye.58.com/chuzu/', 947 | 948 | u'哈尔滨': 'http://hrb.58.com/chuzu/', 949 | u'大庆': 'http://dq.58.com/chuzu/', 950 | u'齐齐哈尔': 'http://qqhr.58.com/chuzu/', 951 | u'牡丹江': 'http://mdj.58.com/chuzu/', 952 | u'绥化': 'http://suihua.58.com/chuzu/', 953 | u'佳木斯': 'http://jms.58.com/chuzu/', 954 | u'鸡西': 'http://jixi.58.com/chuzu/', 955 | u'双鸭山': 'http://sys.58.com/chuzu/', 956 | u'鹤岗': 'http://hegang.58.com/chuzu/', 957 | u'黑河': 'http://heihe.58.com/chuzu/', 958 | u'伊春': 'http://yich.58.com/chuzu/', 959 | u'七台河': 'http://qth.58.com/chuzu/', 960 | u'大兴安岭': 'http://dxal.58.com/chuzu/', 961 | u'安达': 'http://shanda.58.com/chuzu/', 962 | u'肇东': 'http://shzhaodong.58.com/chuzu/', 963 | u'肇州': 'http://zhaozhou.58.com/chuzu/', 964 | 965 | u'武汉': 'http://wh.58.com/chuzu/', 966 | u'宜昌': 'http://yc.58.com/chuzu/', 967 | u'襄阳': 'http://xf.58.com/chuzu/', 968 | u'荆州': 'http://jingzhou.58.com/chuzu/', 969 | u'十堰': 'http://shiyan.58.com/chuzu/', 970 | u'黄石': 'http://hshi.58.com/chuzu/', 971 | u'孝感': 'http://xiaogan.58.com/chuzu/', 972 | u'黄冈': 'http://hg.58.com/chuzu/', 973 | u'恩施': 'http://es.58.com/chuzu/', 974 | u'荆门': 'http://jingmen.58.com/chuzu/', 975 | u'咸宁': 'http://xianning.58.com/chuzu/', 976 | u'鄂州': 'http://ez.58.com/chuzu/', 977 | u'随州': 'http://suizhou.58.com/chuzu/', 978 | u'潜江': 'http://qianjiang.58.com/chuzu/', 979 | u'天门': 'http://tm.58.com/chuzu/', 980 | u'仙桃': 'http://xiantao.58.com/chuzu/', 981 | u'神农架': 'http://snj.58.com/chuzu/', 982 | u'宜都': 'http://yidou.58.com/chuzu/', 983 | u'汉川': 'http://hanchuan.58.com/chuzu/', 984 | u'枣阳': 'http://zaoyang.58.com/chuzu/', 985 | u'武穴': 'http://wuxueshi.58.com/chuzu/', 986 | u'钟祥': 'http://zhongxiangshi.58.com/chuzu/', 987 | u'京山': 'http://jingshanxian.58.com/chuzu/', 988 | u'沙洋': 'http://shayangxian.58.com/chuzu/', 989 | u'松滋': 'http://songzi.58.com/chuzu/', 990 | u'广水': 'http://guangshuishi.58.com/chuzu/', 991 | u'赤壁': 'http://chibishi.58.com/chuzu/', 992 | u'老河口': 'http://laohekou.58.com/chuzu/', 993 | u'谷城': 'http://gucheng.58.com/chuzu/', 994 | u'宜城': 'http://yichengshi.58.com/chuzu/', 995 | u'南漳': 'http://nanzhang.58.com/chuzu/', 996 | u'云梦': 'http://yunmeng.58.com/chuzu/', 997 | u'安陆': 'http://anlu.58.com/chuzu/', 998 | u'大悟': 'http://dawu.58.com/chuzu/', 999 | u'孝昌': 'http://xiaochang.58.com/chuzu/', 1000 | u'当阳': 'http://dangyang.58.com/chuzu/', 1001 | u'枝江': 'http://zhijiang.58.com/chuzu/', 1002 | u'嘉鱼': 'http://jiayuxian.58.com/chuzu/', 1003 | u'随县': 'http://suixia.58.com/chuzu/', 1004 | 1005 | u'长沙': 'http://cs.58.com/chuzu/', 1006 | u'株洲': 'http://zhuzhou.58.com/chuzu/', 1007 | u'益阳': 'http://yiyang.58.com/chuzu/', 1008 | u'常德': 'http://changde.58.com/chuzu/', 1009 | u'衡阳': 'http://hy.58.com/chuzu/', 1010 | u'湘潭': 'http://xiangtan.58.com/chuzu/', 1011 | u'岳阳': 'http://yy.58.com/chuzu/', 1012 | u'郴州': 'http://chenzhou.58.com/chuzu/', 1013 | u'邵阳': 'http://shaoyang.58.com/chuzu/', 1014 | u'怀化': 'http://hh.58.com/chuzu/', 1015 | u'永州': 'http://yongzhou.58.com/chuzu/', 1016 | u'娄底': 'http://ld.58.com/chuzu/', 1017 | u'湘西': 'http://xiangxi.58.com/chuzu/', 1018 | u'张家界': 'http://zjj.58.com/chuzu/', 1019 | u'醴陵': 'http://liling.58.com/chuzu/', 1020 | u'澧县': 'http://lixian.58.com/chuzu/', 1021 | u'桂阳': 'http://czguiyang.58.com/chuzu/', 1022 | u'资兴': 'http://zixing.58.com/chuzu/', 1023 | u'永兴': 'http://yongxing.58.com/chuzu/', 1024 | u'常宁': 'http://changningshi.58.com/chuzu/', 1025 | u'祁东': 'http://qidongxian.58.com/chuzu/', 1026 | u'衡东': 'http://hengdong.58.com/chuzu/', 1027 | u'冷水江': 'http://lengshuijiangshi.58.com/chuzu/', 1028 | u'涟源': 'http://lianyuanshi.58.com/chuzu/', 1029 | u'双峰': 'http://shuangfengxian.58.com/chuzu/', 1030 | u'邵阳县': 'http://shaoyangxian.58.com/chuzu/', 1031 | u'邵东': 'http://shaodongxian.58.com/chuzu/', 1032 | u'沅江': 'http://yuanjiangs.58.com/chuzu/', 1033 | u'南县': 'http://nanxian.58.com/chuzu/', 1034 | u'祁阳': 'http://qiyang.58.com/chuzu/', 1035 | u'湘阴': 'http://xiangyin.58.com/chuzu/', 1036 | u'华容': 'http://huarong.58.com/chuzu/', 1037 | u'慈利': 'http://cilixian.58.com/chuzu/', 1038 | u'攸县': 'http://zzyouxian.58.com/chuzu/', 1039 | 1040 | u'石家庄': 'http://sjz.58.com/chuzu/', 1041 | u'保定': 'http://bd.58.com/chuzu/', 1042 | u'唐山': 'http://ts.58.com/chuzu/', 1043 | u'廊坊': 'http://lf.58.com/chuzu/', 1044 | u'邯郸': 'http://hd.58.com/chuzu/', 1045 | u'秦皇岛': 'http://qhd.58.com/chuzu/', 1046 | u'沧州': 'http://cangzhou.58.com/chuzu/', 1047 | u'邢台': 'http://xt.58.com/chuzu/', 1048 | u'衡水': 'http://hs.58.com/chuzu/', 1049 | u'张家口': 'http://zjk.58.com/chuzu/', 1050 | u'承德': 'http://chengde.58.com/chuzu/', 1051 | u'定州': 'http://dingzhou.58.com/chuzu/', 1052 | u'馆陶': 'http://gt.58.com/chuzu/', 1053 | u'张北': 'http://zhangbei.58.com/chuzu/', 1054 | u'赵县': 'http://zx.58.com/chuzu/', 1055 | u'正定': 'http://zd.58.com/chuzu/', 1056 | u'迁安市': 'http://qianan.58.com/chuzu/', 1057 | u'任丘': 'http://renqiu.58.com/chuzu/', 1058 | u'三河': 'http://sanhe.58.com/chuzu/', 1059 | u'武安': 'http://wuan.58.com/chuzu/', 1060 | u'雄安新区': 'http://xionganxinqu.58.com/chuzu/', 1061 | u'燕郊': 'http://lfyanjiao.58.com/chuzu/', 1062 | u'涿州': 'http://zhuozhou.58.com/chuzu/', 1063 | u'河间': 'http://hejian.58.com/chuzu/', 1064 | u'黄骅': 'http://huanghua.58.com/chuzu/', 1065 | u'沧县': 'http://cangxian.58.com/chuzu/', 1066 | u'磁县': 'http://cixian.58.com/chuzu/', 1067 | u'涉县': 'http://shexian.58.com/chuzu/', 1068 | u'霸州': 'http://bazhou.58.com/chuzu/', 1069 | u'香河': 'http://xianghe.58.com/chuzu/', 1070 | u'固安': 'http://lfguan.58.com/chuzu/', 1071 | u'遵化市': 'http://zunhua.58.com/chuzu/', 1072 | u'迁西': 'http://qianxixian.58.com/chuzu/', 1073 | u'玉田': 'http://yutianxian.58.com/chuzu/', 1074 | u'滦南': 'http://luannanxian.58.com/chuzu/', 1075 | u'沙河': 'http://shaheshi.58.com/chuzu/', 1076 | # J 1077 | 1078 | u'苏州': 'http://su.58.com/chuzu/', 1079 | u'南京': 'http://nj.58.com/chuzu/', 1080 | u'无锡': 'http://wx.58.com/chuzu/', 1081 | u'常州': 'http://cz.58.com/chuzu/', 1082 | u'徐州': 'http://xz.58.com/chuzu/', 1083 | u'南通': 'http://nt.58.com/chuzu/', 1084 | u'扬州': 'http://yz.58.com/chuzu/', 1085 | u'盐城': 'http://yancheng.58.com/chuzu/', 1086 | u'淮安': 'http://ha.58.com/chuzu/', 1087 | u'连云港': 'http://lyg.58.com/chuzu/', 1088 | u'泰州': 'http://taizhou.58.com/chuzu/', 1089 | u'宿迁': 'http://suqian.58.com/chuzu/', 1090 | u'镇江': 'http://zj.58.com/chuzu/', 1091 | u'沭阳': 'http://shuyang.58.com/chuzu/', 1092 | u'大丰': 'http://dafeng.58.com/chuzu/', 1093 | u'如皋': 'http://rugao.58.com/chuzu/', 1094 | u'启东': 'http://qidong.58.com/chuzu/', 1095 | u'溧阳': 'http://liyang.58.com/chuzu/', 1096 | u'海门': 'http://haimen.58.com/chuzu/', 1097 | u'东海': 'http://donghai.58.com/chuzu/', 1098 | u'扬中': 'http://yangzhong.58.com/chuzu/', 1099 | u'兴化': 'http://xinghuashi.58.com/chuzu/', 1100 | u'新沂': 'http://xinyishi.58.com/chuzu/', 1101 | u'泰兴': 'http://taixing.58.com/chuzu/', 1102 | u'如东': 'http://rudong.58.com/chuzu/', 1103 | u'邳州': 'http://pizhou.58.com/chuzu/', 1104 | u'沛县': 'http://xzpeixian.58.com/chuzu/', 1105 | u'靖江': 'http://jingjiang.58.com/chuzu/', 1106 | u'建湖': 'http://jianhu.58.com/chuzu/', 1107 | u'海安': 'http://haian.58.com/chuzu/', 1108 | u'东台': 'http://dongtai.58.com/chuzu/', 1109 | u'丹阳': 'http://danyang.58.com/chuzu/', 1110 | u'宝应县': 'http://baoyingx.58.com/chuzu/', 1111 | u'灌南': 'http://guannan.58.com/chuzu/', 1112 | u'灌云': 'http://guanyun.58.com/chuzu/', 1113 | u'姜堰': 'http://jiangyan.58.com/chuzu/', 1114 | u'金坛': 'http://jintan.58.com/chuzu/', 1115 | u'昆山': 'http://szkunshan.58.com/chuzu/', 1116 | u'泗洪': 'http://sihong.58.com/chuzu/', 1117 | u'泗阳': 'http://siyang.58.com/chuzu/', 1118 | u'句容': 'http://jurong.58.com/chuzu/', 1119 | u'射阳': 'http://sheyang.58.com/chuzu/', 1120 | u'阜宁': 'http://funingxian.58.com/chuzu/', 1121 | u'响水': 'http://xiangshui.58.com/chuzu/', 1122 | u'盱眙': 'http://xuyi.58.com/chuzu/', 1123 | u'金湖': 'http://jinhu.58.com/chuzu/', 1124 | 1125 | u'南昌': 'http://nc.58.com/chuzu/', 1126 | u'赣州': 'http://ganzhou.58.com/chuzu/', 1127 | u'九江': 'http://jj.58.com/chuzu/', 1128 | u'宜春': 'http://yichun.58.com/chuzu/', 1129 | u'吉安': 'http://ja.58.com/chuzu/', 1130 | u'上饶': 'http://sr.58.com/chuzu/', 1131 | u'萍乡': 'http://px.58.com/chuzu/', 1132 | u'抚州': 'http://fuzhou.58.com/chuzu/', 1133 | u'景德镇': 'http://jdz.58.com/chuzu/', 1134 | u'新余': 'http://xinyu.58.com/chuzu/', 1135 | u'鹰潭': 'http://yingtan.58.com/chuzu/', 1136 | u'永新': 'http://yxx.58.com/chuzu/', 1137 | u'乐平': 'http://lepingshi.58.com/chuzu/', 1138 | u'进贤': 'http://jinxian.58.com/chuzu/', 1139 | u'分宜': 'http://fenyi.58.com/chuzu/', 1140 | u'丰城': 'http://fengchengshi.58.com/chuzu/', 1141 | u'樟树': 'http://zhangshu.58.com/chuzu/', 1142 | u'高安': 'http://gaoan.58.com/chuzu/', 1143 | u'余江': 'http://yujiang.58.com/chuzu/', 1144 | u'南城': 'http://nanchengx.58.com/chuzu/', 1145 | u'浮梁': 'http://fuliangxian.58.com/chuzu/', 1146 | 1147 | u'长春': 'http://cc.58.com/chuzu/', 1148 | u'吉林': 'http://jl.58.com/chuzu/', 1149 | u'四平': 'http://sp.58.com/chuzu/', 1150 | u'延边': 'http://yanbian.58.com/chuzu/', 1151 | u'松原': 'http://songyuan.58.com/chuzu/', 1152 | u'白城': 'http://bc.58.com/chuzu/', 1153 | u'通化': 'http://th.58.com/chuzu/', 1154 | u'白山': 'http://baishan.58.com/chuzu/', 1155 | u'辽源': 'http://liaoyuan.58.com/chuzu/', 1156 | u'公主岭': 'http://gongzhuling.58.com/chuzu/', 1157 | u'梅河口': 'http://meihekou.58.com/chuzu/', 1158 | u'扶余': 'http://fuyuxian.58.com/chuzu/', 1159 | u'长岭': 'http://changlingxian.58.com/chuzu/', 1160 | u'桦甸': 'http://huadian.58.com/chuzu/', 1161 | u'磐石': 'http://panshi.58.com/chuzu/', 1162 | u'梨树县': 'http://lishu.58.com/chuzu/', 1163 | # L 1164 | 1165 | u'沈阳': 'http://sy.58.com/chuzu/', 1166 | u'大连': 'http://dl.58.com/chuzu/', 1167 | u'鞍山': 'http://as.58.com/chuzu/', 1168 | u'锦州': 'http://jinzhou.58.com/chuzu/', 1169 | u'抚顺': 'http://fushun.58.com/chuzu/', 1170 | u'营口': 'http://yk.58.com/chuzu/', 1171 | u'盘锦': 'http://pj.58.com/chuzu/', 1172 | u'朝阳': 'http://cy.58.com/chuzu/', 1173 | u'丹东': 'http://dandong.58.com/chuzu/', 1174 | u'辽阳': 'http://liaoyang.58.com/chuzu/', 1175 | u'本溪': 'http://benxi.58.com/chuzu/', 1176 | u'葫芦岛': 'http://hld.58.com/chuzu/', 1177 | u'铁岭': 'http://tl.58.com/chuzu/', 1178 | u'阜新': 'http://fx.58.com/chuzu/', 1179 | u'庄河': 'http://pld.58.com/chuzu/', 1180 | u'瓦房店': 'http://wfd.58.com/chuzu/', 1181 | u'灯塔': 'http://dengta.58.com/chuzu/', 1182 | u'凤城': 'http://fengcheng.58.com/chuzu/', 1183 | u'北票': 'http://beipiao.58.com/chuzu/', 1184 | u'开原': 'http://kaiyuan.58.com/chuzu/', 1185 | # N 1186 | 1187 | u'银川': 'http://yinchuan.58.com/chuzu/', 1188 | u'吴忠': 'http://wuzhong.58.com/chuzu/', 1189 | u'石嘴山': 'http://szs.58.com/chuzu/', 1190 | u'中卫': 'http://zw.58.com/chuzu/', 1191 | u'固原': 'http://guyuan.58.com/chuzu/', 1192 | 1193 | u'呼和浩特': 'http://hu.58.com/chuzu/', 1194 | u'包头': 'http://bt.58.com/chuzu/', 1195 | u'赤峰': 'http://chifeng.58.com/chuzu/', 1196 | u'鄂尔多斯': 'http://erds.58.com/chuzu/', 1197 | u'通辽': 'http://tongliao.58.com/chuzu/', 1198 | u'呼伦贝尔': 'http://hlbe.58.com/chuzu/', 1199 | u'巴彦淖尔市': 'http://bycem.58.com/chuzu/', 1200 | u'乌兰察布': 'http://wlcb.58.com/chuzu/', 1201 | u'锡林郭勒': 'http://xl.58.com/chuzu/', 1202 | u'兴安盟': 'http://xam.58.com/chuzu/', 1203 | u'乌海': 'http://wuhai.58.com/chuzu/', 1204 | u'阿拉善盟': 'http://alsm.58.com/chuzu/', 1205 | u'海拉尔': 'http://hlr.58.com/chuzu/', 1206 | # Q 1207 | 1208 | u'西宁': 'http://xn.58.com/chuzu/', 1209 | u'海西': 'http://hx.58.com/chuzu/', 1210 | u'海北': 'http://haibei.58.com/chuzu/', 1211 | u'果洛': 'http://guoluo.58.com/chuzu/', 1212 | u'海东': 'http://haidong.58.com/chuzu/', 1213 | u'黄南': 'http://huangnan.58.com/chuzu/', 1214 | u'玉树': 'http://ys.58.com/chuzu/', 1215 | u'海南': 'http://hainan.58.com/chuzu/', 1216 | u'格尔木': 'http://geermushi.58.com/chuzu/', 1217 | # S 1218 | 1219 | u'青岛': 'http://qd.58.com/chuzu/', 1220 | u'济南': 'http://jn.58.com/chuzu/', 1221 | u'烟台': 'http://yt.58.com/chuzu/', 1222 | u'潍坊': 'http://wf.58.com/chuzu/', 1223 | u'临沂': 'http://linyi.58.com/chuzu/', 1224 | u'淄博': 'http://zb.58.com/chuzu/', 1225 | u'济宁': 'http://jining.58.com/chuzu/', 1226 | u'泰安': 'http://ta.58.com/chuzu/', 1227 | u'聊城': 'http://lc.58.com/chuzu/', 1228 | u'威海': 'http://weihai.58.com/chuzu/', 1229 | u'枣庄': 'http://zaozhuang.58.com/chuzu/', 1230 | u'德州': 'http://dz.58.com/chuzu/', 1231 | u'日照': 'http://rizhao.58.com/chuzu/', 1232 | u'东营': 'http://dy.58.com/chuzu/', 1233 | u'菏泽': 'http://heze.58.com/chuzu/', 1234 | u'滨州': 'http://bz.58.com/chuzu/', 1235 | u'莱芜': 'http://lw.58.com/chuzu/', 1236 | u'章丘': 'http://zhangqiu.58.com/chuzu/', 1237 | u'垦利': 'http://kl.58.com/chuzu/', 1238 | u'诸城': 'http://zc.58.com/chuzu/', 1239 | u'寿光': 'http://shouguang.58.com/chuzu/', 1240 | u'龙口': 'http://longkou.58.com/chuzu/', 1241 | u'曹县': 'http://caoxian.58.com/chuzu/', 1242 | u'单县': 'http://shanxian.58.com/chuzu/', 1243 | u'肥城': 'http://feicheng.58.com/chuzu/', 1244 | u'高密': 'http://gaomi.58.com/chuzu/', 1245 | u'广饶': 'http://guangrao.58.com/chuzu/', 1246 | u'桓台': 'http://huantaixian.58.com/chuzu/', 1247 | u'莒县': 'http://juxian.58.com/chuzu/', 1248 | u'莱州': 'http://laizhou.58.com/chuzu/', 1249 | u'蓬莱': 'http://penglai.58.com/chuzu/', 1250 | u'青州': 'http://qingzhou.58.com/chuzu/', 1251 | u'荣成': 'http://rongcheng.58.com/chuzu/', 1252 | u'乳山': 'http://rushan.58.com/chuzu/', 1253 | u'滕州': 'http://tengzhou.58.com/chuzu/', 1254 | u'新泰': 'http://xintai.58.com/chuzu/', 1255 | u'招远': 'http://zhaoyuan.58.com/chuzu/', 1256 | u'邹城': 'http://zoucheng.58.com/chuzu/', 1257 | u'邹平': 'http://zouping.58.com/chuzu/', 1258 | u'临清': 'http://linqing.58.com/chuzu/', 1259 | u'茌平': 'http://chiping.58.com/chuzu/', 1260 | u'郓城': 'http://hzyc.58.com/chuzu/', 1261 | u'博兴': 'http://boxing.58.com/chuzu/', 1262 | u'东明': 'http://dongming.58.com/chuzu/', 1263 | u'巨野': 'http://juye.58.com/chuzu/', 1264 | u'无棣': 'http://wudi.58.com/chuzu/', 1265 | u'齐河': 'http://qihe.58.com/chuzu/', 1266 | u'微山': 'http://weishan.58.com/chuzu/', 1267 | u'禹城': 'http://yuchengshi.58.com/chuzu/', 1268 | u'临邑': 'http://linyixianq.58.com/chuzu/', 1269 | u'乐陵': 'http://leling.58.com/chuzu/', 1270 | u'莱阳': 'http://laiyang.58.com/chuzu/', 1271 | u'宁津': 'http://ningjin.58.com/chuzu/', 1272 | u'高唐': 'http://gaotang.58.com/chuzu/', 1273 | u'莘县': 'http://shenxian.58.com/chuzu/', 1274 | u'阳谷': 'http://yanggu.58.com/chuzu/', 1275 | u'冠县': 'http://guanxian.58.com/chuzu/', 1276 | u'平邑': 'http://pingyi.58.com/chuzu/', 1277 | u'郯城': 'http://tancheng.58.com/chuzu/', 1278 | u'沂源': 'http://yiyuanxian.58.com/chuzu/', 1279 | u'汶上': 'http://wenshang.58.com/chuzu/', 1280 | u'梁山': 'http://liangshanx.58.com/chuzu/', 1281 | u'利津': 'http://lijin.58.com/chuzu/', 1282 | u'沂南': 'http://yinanxian.58.com/chuzu/', 1283 | u'栖霞': 'http://qixia.58.com/chuzu/', 1284 | u'宁阳': 'http://ningyang.58.com/chuzu/', 1285 | u'东平': 'http://dongping.58.com/chuzu/', 1286 | u'昌邑': 'http://changyishi.58.com/chuzu/', 1287 | u'安丘': 'http://anqiu.58.com/chuzu/', 1288 | u'昌乐': 'http://changle.58.com/chuzu/', 1289 | u'临朐': 'http://linqu.58.com/chuzu/', 1290 | u'鄄城': 'http://juancheng.58.com/chuzu/', 1291 | 1292 | u'太原': 'http://ty.58.com/chuzu/', 1293 | u'临汾': 'http://linfen.58.com/chuzu/', 1294 | u'大同': 'http://dt.58.com/chuzu/', 1295 | u'运城': 'http://yuncheng.58.com/chuzu/', 1296 | u'晋中': 'http://jz.58.com/chuzu/', 1297 | u'长治': 'http://changzhi.58.com/chuzu/', 1298 | u'晋城': 'http://jincheng.58.com/chuzu/', 1299 | u'阳泉': 'http://yq.58.com/chuzu/', 1300 | u'吕梁': 'http://lvliang.58.com/chuzu/', 1301 | u'忻州': 'http://xinzhou.58.com/chuzu/', 1302 | u'朔州': 'http://shuozhou.58.com/chuzu/', 1303 | u'临猗': 'http://linyixian.58.com/chuzu/', 1304 | u'清徐': 'http://qingxu.58.com/chuzu/', 1305 | u'柳林': 'http://liulin.58.com/chuzu/', 1306 | u'高平': 'http://gaoping.58.com/chuzu/', 1307 | u'泽州': 'http://zezhou.58.com/chuzu/', 1308 | u'襄垣': 'http://xiangyuanxian.58.com/chuzu/', 1309 | u'孝义': 'http://xiaoyi.58.com/chuzu/', 1310 | 1311 | u'西安': 'http://xa.58.com/chuzu/', 1312 | u'咸阳': 'http://xianyang.58.com/chuzu/', 1313 | u'宝鸡': 'http://baoji.58.com/chuzu/', 1314 | u'渭南': 'http://wn.58.com/chuzu/', 1315 | u'汉中': 'http://hanzhong.58.com/chuzu/', 1316 | u'榆林': 'http://yl.58.com/chuzu/', 1317 | u'延安': 'http://yanan.58.com/chuzu/', 1318 | u'安康': 'http://ankang.58.com/chuzu/', 1319 | u'商洛': 'http://sl.58.com/chuzu/', 1320 | u'铜川': 'http://tc.58.com/chuzu/', 1321 | u'神木': 'http://shenmu.58.com/chuzu/', 1322 | u'韩城': 'http://hancheng.58.com/chuzu/', 1323 | u'府谷': 'http://fugu.58.com/chuzu/', 1324 | u'靖边': 'http://jingbian.58.com/chuzu/', 1325 | u'定边': 'http://dingbian.58.com/chuzu/', 1326 | 1327 | u'成都': 'http://cd.58.com/chuzu/', 1328 | u'绵阳': 'http://mianyang.58.com/chuzu/', 1329 | u'德阳': 'http://deyang.58.com/chuzu/', 1330 | u'南充': 'http://nanchong.58.com/chuzu/', 1331 | u'宜宾': 'http://yb.58.com/chuzu/', 1332 | u'自贡': 'http://zg.58.com/chuzu/', 1333 | u'乐山': 'http://ls.58.com/chuzu/', 1334 | u'泸州': 'http://luzhou.58.com/chuzu/', 1335 | u'达州': 'http://dazhou.58.com/chuzu/', 1336 | u'内江': 'http://scnj.58.com/chuzu/', 1337 | u'遂宁': 'http://suining.58.com/chuzu/', 1338 | u'攀枝花': 'http://panzhihua.58.com/chuzu/', 1339 | u'眉山': 'http://ms.58.com/chuzu/', 1340 | u'广安': 'http://ga.58.com/chuzu/', 1341 | u'资阳': 'http://zy.58.com/chuzu/', 1342 | u'凉山': 'http://liangshan.58.com/chuzu/', 1343 | u'广元': 'http://guangyuan.58.com/chuzu/', 1344 | u'雅安': 'http://ya.58.com/chuzu/', 1345 | u'巴中': 'http://bazhong.58.com/chuzu/', 1346 | u'阿坝': 'http://ab.58.com/chuzu/', 1347 | u'甘孜': 'http://ganzi.58.com/chuzu/', 1348 | u'安岳': 'http://anyuexian.58.com/chuzu/', 1349 | u'广汉': 'http://guanghanshi.58.com/chuzu/', 1350 | u'简阳': 'http://jianyangshi.58.com/chuzu/', 1351 | u'仁寿': 'http://renshouxian.58.com/chuzu/', 1352 | u'射洪': 'http://shehongxian.58.com/chuzu/', 1353 | u'大竹': 'http://dazu.58.com/chuzu/', 1354 | u'宣汉': 'http://xuanhan.58.com/chuzu/', 1355 | u'渠县': 'http://qux.58.com/chuzu/', 1356 | u'长宁': 'http://changningx.58.com/chuzu/', 1357 | # X 1358 | 1359 | u'乌鲁木齐': 'http://xj.58.com/chuzu/', 1360 | u'昌吉': 'http://changji.58.com/chuzu/', 1361 | u'巴音郭楞': 'http://bygl.58.com/chuzu/', 1362 | u'伊犁': 'http://yili.58.com/chuzu/', 1363 | u'阿克苏': 'http://aks.58.com/chuzu/', 1364 | u'喀什': 'http://ks.58.com/chuzu/', 1365 | u'哈密': 'http://hami.58.com/chuzu/', 1366 | u'克拉玛依': 'http://klmy.58.com/chuzu/', 1367 | u'博尔塔拉': 'http://betl.58.com/chuzu/', 1368 | u'吐鲁番': 'http://tlf.58.com/chuzu/', 1369 | u'和田': 'http://ht.58.com/chuzu/', 1370 | u'石河子': 'http://shz.58.com/chuzu/', 1371 | u'克孜勒苏': 'http://kzls.58.com/chuzu/', 1372 | u'阿拉尔': 'http://ale.58.com/chuzu/', 1373 | u'五家渠': 'http://wjq.58.com/chuzu/', 1374 | u'图木舒克': 'http://tmsk.58.com/chuzu/', 1375 | u'库尔勒': 'http://kel.58.com/chuzu/', 1376 | u'阿勒泰': 'http://alt.58.com/chuzu/', 1377 | u'塔城': 'http://tac.58.com/chuzu/', 1378 | 1379 | u'拉萨': 'http://lasa.58.com/chuzu/', 1380 | u'日喀则': 'http://rkz.58.com/chuzu/', 1381 | u'山南': 'http://sn.58.com/chuzu/', 1382 | u'林芝': 'http://linzhi.58.com/chuzu/', 1383 | u'昌都': 'http://changdu.58.com/chuzu/', 1384 | u'那曲': 'http://nq.58.com/chuzu/', 1385 | u'阿里': 'http://al.58.com/chuzu/', 1386 | u'日土': 'http://rituxian.58.com/chuzu/', 1387 | u'改则': 'http://gaizexian.58.com/chuzu/', 1388 | # Y 1389 | 1390 | u'昆明': 'http://km.58.com/chuzu/', 1391 | u'曲靖': 'http://qj.58.com/chuzu/', 1392 | u'大理': 'http://dali.58.com/chuzu/', 1393 | u'红河': 'http://honghe.58.com/chuzu/', 1394 | u'玉溪': 'http://yx.58.com/chuzu/', 1395 | u'丽江': 'http://lj.58.com/chuzu/', 1396 | u'文山': 'http://ws.58.com/chuzu/', 1397 | u'楚雄': 'http://cx.58.com/chuzu/', 1398 | u'西双版纳': 'http://bn.58.com/chuzu/', 1399 | u'昭通': 'http://zt.58.com/chuzu/', 1400 | u'德宏': 'http://dh.58.com/chuzu/', 1401 | u'普洱': 'http://pe.58.com/chuzu/', 1402 | u'保山': 'http://bs.58.com/chuzu/', 1403 | u'临沧': 'http://lincang.58.com/chuzu/', 1404 | u'迪庆': 'http://diqing.58.com/chuzu/', 1405 | u'怒江': 'http://nujiang.58.com/chuzu/', 1406 | u'弥勒': 'http://milexian.58.com/chuzu/', 1407 | u'安宁': 'http://anningshi.58.com/chuzu/', 1408 | u'宣威': 'http://xuanwushi.58.com/chuzu/', 1409 | # Z 1410 | 1411 | u'杭州': 'http://hz.58.com/chuzu/', 1412 | u'宁波': 'http://nb.58.com/chuzu/', 1413 | u'温州': 'http://wz.58.com/chuzu/', 1414 | u'金华': 'http://jh.58.com/chuzu/', 1415 | u'嘉兴': 'http://jx.58.com/chuzu/', 1416 | u'台州': 'http://tz.58.com/chuzu/', 1417 | u'绍兴': 'http://sx.58.com/chuzu/', 1418 | u'湖州': 'http://huzhou.58.com/chuzu/', 1419 | u'丽水': 'http://lishui.58.com/chuzu/', 1420 | u'衢州': 'http://quzhou.58.com/chuzu/', 1421 | u'舟山': 'http://zhoushan.58.com/chuzu/', 1422 | u'乐清': 'http://yueqingcity.58.com/chuzu/', 1423 | u'瑞安': 'http://ruiancity.58.com/chuzu/', 1424 | u'义乌': 'http://yiwu.58.com/chuzu/', 1425 | u'余姚': 'http://yuyao.58.com/chuzu/', 1426 | u'诸暨': 'http://zhuji.58.com/chuzu/', 1427 | u'象山': 'http://xiangshanxian.58.com/chuzu/', 1428 | u'温岭': 'http://wenling.58.com/chuzu/', 1429 | u'桐乡': 'http://tongxiang.58.com/chuzu/', 1430 | u'慈溪': 'http://cixi.58.com/chuzu/', 1431 | u'长兴': 'http://changxing.58.com/chuzu/', 1432 | u'嘉善': 'http://jiashanx.58.com/chuzu/', 1433 | u'海宁': 'http://haining.58.com/chuzu/', 1434 | u'德清': 'http://deqing.58.com/chuzu/', 1435 | u'东阳': 'http://dongyang.58.com/chuzu/', 1436 | u'安吉': 'http://anji.58.com/chuzu/', 1437 | u'苍南': 'http://cangnanxian.58.com/chuzu/', 1438 | u'临海': 'http://linhai.58.com/chuzu/', 1439 | u'永康': 'http://yongkang.58.com/chuzu/', 1440 | u'玉环': 'http://yuhuan.58.com/chuzu/', 1441 | u'平湖': 'http://pinghushi.58.com/chuzu/', 1442 | u'海盐': 'http://haiyan.58.com/chuzu/', 1443 | u'武义县': 'http://wuyix.58.com/chuzu/', 1444 | u'嵊州': 'http://shengzhou.58.com/chuzu/', 1445 | u'新昌': 'http://xinchang.58.com/chuzu/', 1446 | u'江山': 'http://jiangshanshi.58.com/chuzu/', 1447 | u'平阳': 'http://pingyangxian.58.com/chuzu/', 1448 | 1449 | # 其他 1450 | u'香港': 'http://hk.58.com/chuzu/', 1451 | u'澳门': 'http://am.58.com/chuzu/', 1452 | u'台湾': 'http://tw.58.com/chuzu/', 1453 | u'全国': 'http://quanguo.58.com/chuzu/', 1454 | u'其他': 'http://cn.58.com/chuzu/', 1455 | 1456 | # 海外 1457 | u'洛杉矶': 'http://g.58.com/j-gllosangeles/', 1458 | u'旧金山': 'http://g.58.com/j-glsanfrancisco/', 1459 | u'纽约': 'http://g.58.com/j-glnewyork/', 1460 | u'多伦多': 'http://g.58.com/j-gltoronto/', 1461 | u'温哥华': 'http://g.58.com/j-glvancouver/', 1462 | u'伦敦': 'http://g.58.com/j-glgreaterlondon/', 1463 | u'莫斯科': 'http://g.58.com/j-glmoscow/', 1464 | u'首尔': 'http://g.58.com/j-glseoul/', 1465 | u'东京': 'http://g.58.com/j-gltokyo/', 1466 | u'新加坡': 'http://g.58.com/j-glsingapore/', 1467 | u'曼谷': 'http://g.58.com/j-glbangkok/', 1468 | u'清迈': 'http://g.58.com/j-glchiangmai/', 1469 | u'迪拜': 'http://g.58.com/j-gldubai/', 1470 | u'奥克兰': 'http://g.58.com/j-glauckland/', 1471 | u'悉尼': 'http://g.58.com/j-glsydney/', 1472 | u'墨尔本': 'http://g.58.com/j-glmelbourne/', 1473 | u'其他海外城市': 'http://g.58.com/city/', 1474 | } 1475 | --------------------------------------------------------------------------------