├── .idea ├── .name ├── encodings.xml ├── vcs.xml ├── modules.xml ├── ESRI2016-C280.iml └── misc.xml ├── ugc.hotel.web.esri ├── .idea │ ├── .name │ ├── copyright │ │ └── profiles_settings.xml │ ├── scopes │ │ └── scope_settings.xml │ ├── encodings.xml │ ├── vcs.xml │ ├── misc.xml │ ├── modules.xml │ ├── ugc.hotel.web.iml │ └── compiler.xml ├── .bowerrc ├── images │ ├── bg.jpg │ ├── down.png │ ├── menu.png │ ├── up1.png │ ├── noise.png │ ├── qunar.png │ ├── tuniu.png │ ├── yilong.png │ ├── xiecheng.png │ └── wood_pattern.jpg ├── font │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.ttf │ └── fontawesome-webfont.woff ├── web.config.backup ├── js │ ├── dojiconfig.js │ ├── common.js │ ├── setting.js │ ├── login │ │ └── globalconfig.js │ ├── application.js │ └── review-monitor │ │ └── main.js ├── Web.config ├── package.json ├── LICENSE ├── css │ ├── loading.css │ ├── main.css │ ├── simple-sidebar.css │ ├── messages.css │ └── demo.css └── html │ ├── login.html │ ├── quality-testing.html │ ├── setting.html │ ├── public-opinion-monitor.html │ ├── review-monitor.html │ └── quality.html ├── ugc.aggregator.esri ├── .idea │ ├── .name │ ├── encodings.xml │ ├── vcs.xml │ ├── inspectionProfiles │ │ ├── profiles_settings.xml │ │ └── Project_Default.xml │ ├── modules.xml │ ├── ugc.aggregator.iml │ └── misc.xml ├── src │ └── main │ │ ├── python │ │ ├── dao │ │ │ ├── pms │ │ │ │ └── __init__.py │ │ │ ├── hotel │ │ │ │ ├── elong │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── ElongDao.py │ │ │ │ ├── xiechengdao │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── xiecheng.py │ │ │ │ └── __init__.py │ │ │ ├── weibo │ │ │ │ ├── __init__.py │ │ │ │ └── WeiboDAO.py │ │ │ ├── __init__.py │ │ │ └── SuperDAO.py │ │ ├── util │ │ │ ├── geo │ │ │ │ ├── __init__.py │ │ │ │ └── CoordTransor.py │ │ │ ├── http │ │ │ │ ├── __init__.py │ │ │ │ └── UniversalSDK.py │ │ │ ├── io │ │ │ │ ├── __init__.py │ │ │ │ ├── CSVFileUtil.py │ │ │ │ └── FileUtil.py │ │ │ └── common │ │ │ │ ├── DateHandler.py │ │ │ │ ├── CollectionUtil.py │ │ │ │ └── Decorators.py │ │ ├── service │ │ │ ├── pms │ │ │ │ └── __init__.py │ │ │ ├── hotel │ │ │ │ ├── elong │ │ │ │ │ └── __init__.py │ │ │ │ ├── xiecheng │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── XichengDataService.py │ │ │ │ ├── __init__.py │ │ │ │ ├── XieChengAPIClient.py │ │ │ │ ├── TuniuAPIClient.py │ │ │ │ └── SuperHotelService.py │ │ │ ├── map │ │ │ │ ├── baidu │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── baidu.py │ │ │ │ │ ├── APIService.py │ │ │ │ │ └── CoordinateTransferService.py │ │ │ │ └── tian │ │ │ │ │ └── APIService.py │ │ │ ├── nlp │ │ │ │ ├── __init__.py │ │ │ │ ├── sentiment.marshal │ │ │ │ ├── keywords.txt │ │ │ │ ├── Sentiment.py │ │ │ │ ├── HotelNLP.py │ │ │ │ ├── KeywordsHandler.py │ │ │ │ └── Bayes.py │ │ │ └── weibo │ │ │ │ └── APIService.py │ │ └── resource │ │ │ └── __init__.py │ │ └── scripts │ │ ├── Hotel │ │ ├── __init__.py │ │ ├── sightspot.txt │ │ ├── SightSpot.py │ │ ├── HotelGeocoding.py │ │ ├── HotelCatcher.py │ │ ├── MergeComment.py │ │ ├── HotelSentimentProcessor.py │ │ ├── TuniuCatcher.py │ │ └── XiechengCatcher.py │ │ ├── logging.ini │ │ ├── Map │ │ └── Geocoding.py │ │ ├── GeocodingServiceMultiProcess.py │ │ └── GeocodingService.py ├── docs │ └── ugc.aggregator │ │ ├── docs │ │ ├── about.md │ │ ├── mkdocs.md │ │ ├── pycharm.md │ │ ├── pybuilder.md │ │ ├── virtualvenv.md │ │ └── index.md │ │ ├── site │ │ ├── img │ │ │ └── favicon.ico │ │ ├── mkdocs │ │ │ ├── js │ │ │ │ ├── search-results-template.mustache │ │ │ │ └── search.js │ │ │ └── search_index.json │ │ ├── fonts │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.ttf │ │ │ └── fontawesome-webfont.woff │ │ ├── sitemap.xml │ │ ├── license │ │ │ └── highlight.js │ │ │ │ └── LICENSE │ │ ├── js │ │ │ └── theme.js │ │ ├── css │ │ │ ├── highlight.css │ │ │ └── theme_extra.css │ │ ├── search.html │ │ ├── about │ │ │ └── index.html │ │ └── index.html │ │ └── mkdocs.yml ├── build.py ├── setting.py └── requirements.txt ├── README.md ├── Screenshots ├── 3.2.2.png ├── 1 系统架构图.png ├── 2 爬虫系统架构.png ├── 2.1房价监控.png ├── 4 情感值分析.png ├── 4.4 运营质检.jpg ├── 6 服务发布框架.png ├── 1.1.1情感统计.png ├── 1.1.2观点统计.png ├── 1.2.1酒店对比.png ├── 1.3.1用户来源图.png ├── 1.3.4局部轨迹.png ├── 2.3订房蜂窝热度图.png ├── 3.2.1携程用户轨迹.png ├── 5 特征词提取流程.png ├── 1.4.2 客源流出计算.PNG ├── 3.1.2设施最远覆盖图.jpg └── 1.4.4酒店多级服务区分析.PNG ├── .gitattributes └── .gitignore /.idea/.name: -------------------------------------------------------------------------------- 1 | ESRI2016-C280 -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/.name: -------------------------------------------------------------------------------- 1 | ugc.hotel.web -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/.name: -------------------------------------------------------------------------------- 1 | ugc.aggregator -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/pms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/geo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/http/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/hotel/elong/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/pms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.bowerrc: -------------------------------------------------------------------------------- 1 | { 2 | "directory": "lib" 3 | } -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/hotel/elong/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/hotel/xiechengdao/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/hotel/xiecheng/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/README.md -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/docs/about.md: -------------------------------------------------------------------------------- 1 | # ugc.aggregator 2 | 众源时空信息聚合 3 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/weibo/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'LiuYang' 2 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/map/baidu/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'geosmart' 2 | -------------------------------------------------------------------------------- /Screenshots/3.2.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/3.2.2.png -------------------------------------------------------------------------------- /Screenshots/1 系统架构图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1 系统架构图.png -------------------------------------------------------------------------------- /Screenshots/2 爬虫系统架构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/2 爬虫系统架构.png -------------------------------------------------------------------------------- /Screenshots/2.1房价监控.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/2.1房价监控.png -------------------------------------------------------------------------------- /Screenshots/4 情感值分析.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/4 情感值分析.png -------------------------------------------------------------------------------- /Screenshots/4.4 运营质检.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/4.4 运营质检.jpg -------------------------------------------------------------------------------- /Screenshots/6 服务发布框架.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/6 服务发布框架.png -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __author__ = 'lizhen' 3 | -------------------------------------------------------------------------------- /Screenshots/1.1.1情感统计.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.1.1情感统计.png -------------------------------------------------------------------------------- /Screenshots/1.1.2观点统计.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.1.2观点统计.png -------------------------------------------------------------------------------- /Screenshots/1.2.1酒店对比.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.2.1酒店对比.png -------------------------------------------------------------------------------- /Screenshots/1.3.1用户来源图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.3.1用户来源图.png -------------------------------------------------------------------------------- /Screenshots/1.3.4局部轨迹.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.3.4局部轨迹.png -------------------------------------------------------------------------------- /Screenshots/2.3订房蜂窝热度图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/2.3订房蜂窝热度图.png -------------------------------------------------------------------------------- /Screenshots/3.2.1携程用户轨迹.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/3.2.1携程用户轨迹.png -------------------------------------------------------------------------------- /Screenshots/5 特征词提取流程.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/5 特征词提取流程.png -------------------------------------------------------------------------------- /Screenshots/1.4.2 客源流出计算.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.4.2 客源流出计算.PNG -------------------------------------------------------------------------------- /Screenshots/3.1.2设施最远覆盖图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/3.1.2设施最远覆盖图.jpg -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/hotel/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/hotel/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'LiuYang' 3 | -------------------------------------------------------------------------------- /Screenshots/1.4.4酒店多级服务区分析.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.4.4酒店多级服务区分析.PNG -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/bg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/bg.jpg -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/nlp/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/down.png -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/menu.png -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/up1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/up1.png -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/noise.png -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/qunar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/qunar.png -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/tuniu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/tuniu.png -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/yilong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/yilong.png -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/resource/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'LiuYang,DreamCathcer,pengshaowei' 3 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/xiecheng.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/xiecheng.png -------------------------------------------------------------------------------- /ugc.hotel.web.esri/images/wood_pattern.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/wood_pattern.jpg -------------------------------------------------------------------------------- /ugc.hotel.web.esri/font/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/font/fontawesome-webfont.eot -------------------------------------------------------------------------------- /ugc.hotel.web.esri/font/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/font/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /ugc.hotel.web.esri/font/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/font/fontawesome-webfont.woff -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/sightspot.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/src/main/scripts/Hotel/sightspot.txt -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/img/favicon.ico -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/mkdocs/js/search-results-template.mustache: -------------------------------------------------------------------------------- 1 |
2 |

{{title}}

3 |

{{summary}}

4 |
5 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/nlp/sentiment.marshal: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/src/main/python/service/nlp/sentiment.marshal -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/common/DateHandler.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'LiuYang,geosmart' 3 | 4 | ''' 5 | 时间处理类 6 | ''' 7 | class ClassHandler(object): 8 | pass -------------------------------------------------------------------------------- /ugc.hotel.web.esri/web.config.backup: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/scopes/scope_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: ugc.aggregator 2 | pages: 3 | - Home: index.md 4 | - pybuilder: pybuilder.md 5 | - mkdocs: mkdocs.md 6 | - About: about.md 7 | 8 | #theme: readthedocs -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/docs/mkdocs.md: -------------------------------------------------------------------------------- 1 | # mkdocs项目文档配置 2 | [mkdocs主页](http://www.mkdocs.org) 3 | ## 进入venvShel并安装mkdocs 4 | pip install mkdocs 5 | ## mkdocs 命令 6 | mkdocs help 7 | ## 新建项目 8 | >> cd docs 9 | >> mkdocs new pybuilder.helloworld 10 | >> cd pybuilder.helloworld 11 | ## 本机测试 12 | mkdocs serve 13 | ## 生成发布site 14 | mkdocs build 15 | ## 清空site 16 | mkdocs build --clean -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/ugc.hotel.web.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/ugc.aggregator.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/js/dojiconfig.js: -------------------------------------------------------------------------------- 1 | var dojoConfig = { 2 | parseOnLoad: true, 3 | packages: [ 4 | { 5 | "name": "bdlib", 6 | "location": location.pathname.replace(/\/[^/]+$/, '') + "../../lib/bdlib" 7 | } 8 | , 9 | { 10 | "name": "tdtlib", 11 | "location": location.pathname.replace(/\/[^/]+$/, '') + "../../lib/tdtlib" 12 | } 13 | ] 14 | }; -------------------------------------------------------------------------------- /.idea/ESRI2016-C280.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/docs/pycharm.md: -------------------------------------------------------------------------------- 1 | pycharm相关配置 2 | --- 3 | 4 | # project intepreter配置 5 | 默认在pycharm中无法选择已有virtualEnv,只能新建,可通过add local手动完成虚拟环境导入: 6 | File>setting>Project Interpreter>add local>选择virtualEnv\Scripts\python.exe 7 | 8 | #svn配置 9 | svn下载:http://netcologne.dl.sourceforge.net/project/win32svn/1.8.14/Setup-Subversion-1.8.14.msi 10 | svn安装:注意安装路径不能带空格: 11 | pycharm配置svn:在version contro>svn>command line client设置为C:\Dev\SVN\bin\svn.exe 12 | 13 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | None/ 7 | 2016-01-18 8 | daily 9 | 10 | 11 | 12 | 13 | 14 | None/about/ 15 | 2016-01-18 16 | daily 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/io/CSVFileUtil.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'LiuYang,geosmart' 3 | 4 | import csv 5 | 6 | 7 | class CSVFileUtil(object): 8 | 9 | def reader(self, file): 10 | for line in csv.reader(file): 11 | yield line 12 | 13 | def writer(self,file): 14 | return csv.writer(file) 15 | 16 | if __name__ == "__main__": 17 | csv_file_util = CSVFileUtil() 18 | for line in csv_file_util.reader(file(r'C:\Users\kaipeng\Desktop\rent.csv','rb')): 19 | print line[2] 20 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/docs/pybuilder.md: -------------------------------------------------------------------------------- 1 | # pybuilder目录 2 | ## virtualenv路径 3 | E:\PythonWorkspace\ugc\ugc.venv 4 | 5 | ## pybuilder路径 6 | E:\PythonWorkspace\ugc\ugc.venv\Scripts\ 7 | 8 | # pybuilder脚本 9 | ## 进入venvShell 10 | workon ugc.venv 11 | 12 | ## 执行默认build文件 13 | pyb_.exe 14 | 15 | ## 执行默认build文件,并打印unittest错误详情 16 | pyb_.exe -v 17 | 18 | ## 新增测试项目 19 | pyb_.exe --start-project 20 | 21 | ## 发布 22 | pyb_.exe install_dependencies publish 23 | 24 | # pybuilder树状目录介绍 25 | src/main/python:源码 26 | src/main/scripts:可执行脚本 27 | src/main/unittest:单元测试 28 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/common/CollectionUtil.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | __author__ = 'LiuYang,geosmart' 4 | import math 5 | 6 | class CollectionUtil(object): 7 | 8 | #arr是被分割的list,n是每个chunk中含n元素。 9 | def chunksBySize(self,arr, n): 10 | return [arr[i:i+n] for i in range(0, len(arr), n)] 11 | 12 | #或者让一共有m块,自动分(尽可能平均) 13 | #split the arr into N chunks 14 | def chunksByAverage(self,arr, m): 15 | n = int(math.ceil(len(arr) / float(m))) 16 | return [arr[i:i + n] for i in range(0, len(arr), n)] -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/map/baidu/baidu.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from util.http.UniversalSDK import APIClient 4 | 5 | 6 | class BaiduCrawler(object): 7 | 8 | def __init__(self): 9 | self.client = APIClient("http://api.map.baidu.com") 10 | 11 | def place(self,query,bounds,ak): 12 | return self.client.place.v2.search.get(query=query,bounds=bounds,ak=ak,output="json") 13 | 14 | if __name__ == "__main__": 15 | baiduCrawler = BaiduCrawler() 16 | #print baiduCrawler.place("银行","39.915,116.404,39.975,116.414","WBw4kIepZzGp4kH5Gn3r0ACy") -------------------------------------------------------------------------------- /ugc.aggregator.esri/build.py: -------------------------------------------------------------------------------- 1 | from pybuilder.core import use_plugin, init 2 | 3 | use_plugin("python.core") 4 | use_plugin("python.install_dependencies") 5 | use_plugin("python.flake8") 6 | use_plugin("python.distutils") 7 | 8 | use_plugin("python.coverage") 9 | use_plugin("python.pycharm") 10 | # use_plugin("python.unittest") 11 | 12 | name = "ugc.aggregator" 13 | default_task = "publish" 14 | 15 | 16 | @init 17 | def set_properties(project): 18 | project.build_depends_on('mockito') 19 | project.set_property('unittest_module_glob', '*_test') 20 | project.version = "1.0" 21 | pass -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/SightSpot.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | from service.map.baidu.APIService import BaiduMapAPIService 5 | from util.geo import CoordTransor 6 | 7 | 8 | baidu_api_service = BaiduMapAPIService("MviPFAcx5I6f1FkRQlq6iTxc") 9 | 10 | f = open('sightspot.txt', 'r') 11 | for line in f.readlines(): 12 | data = baidu_api_service.doGeocoding(addressText=line.strip(),city='南京') 13 | if "result" in data: 14 | print line+str(CoordTransor.bd09togcj02(data["result"]["location"]["lng"],data["result"]["location"]["lat"])) 15 | # print line+str(data["result"]["location"]["lat"])+","+str(data["result"]["location"]["lng"]) -------------------------------------------------------------------------------- /ugc.hotel.web.esri/Web.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/hotel/xiecheng/XichengDataService.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | import re 5 | 6 | from dao.hotel.xiechengdao.xiecheng import xiechengDAO 7 | from setting import local_hotel_setting 8 | 9 | # 配置数据库 10 | dao_setting = local_hotel_setting 11 | 12 | class XichengDataService(object): 13 | 14 | def __init__(self): 15 | self.dao = xiechengDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 16 | 17 | def get_max_distance(self): 18 | data = self.dao.get_max_distance_data() 19 | return data 20 | 21 | def get_around_facilities(self): 22 | data = self.dao.get_around_facilities_data() 23 | return data 24 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "arcgis-echarts", 3 | "version": "0.1.1", 4 | "description": "A plugin for ArcGIS JS API to load echarts map and Make big data visualization easier.", 5 | "main": "src/EchartsLayer.js", 6 | "scripts": { 7 | "start": "http-server" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/wandergis/arcgis-echarts.git" 12 | }, 13 | "keywords": [ 14 | "ArcGIS", 15 | "javascript", 16 | "esri", 17 | "echarts", 18 | "visualization" 19 | ], 20 | "author": "wandergis", 21 | "license": "MIT", 22 | "bugs": { 23 | "url": "https://github.com/wandergis/arcgis-echarts/issues" 24 | }, 25 | "homepage": "https://github.com/wandergis/arcgis-echarts#readme" 26 | } -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/setting.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | #baidu_map_uadb_setting = {"host":"192.168.1.161", "db":"standarddb", "user":"standarddb", "password":"standarddb"} 4 | baidu_map_uadb_setting = {"host":"localhost", "db":"ugc", "user":"root", "password":"1234"} 5 | 6 | # local_hotel_setting = {"host":"120.27.93.15", "db":"hotel", "user":"hotel", "password":"hotel"} 7 | local_hotel_setting = {"host":"localhost", "db":"hotel", "user":"root", "password":"1234"} 8 | 9 | local_weibo_setting = {"host":"localhost", "db":"weibo", "user":"root", "password":"1234"} 10 | 11 | lt_hotel_setting = {"host":"192.168.1.161", "db":"hotel", "user":"standarddb", "password":"standarddb"} 12 | 13 | setting = {"baidumap":baidu_map_uadb_setting, "hotel":local_hotel_setting, "weibo":local_weibo_setting} -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/docs/virtualvenv.md: -------------------------------------------------------------------------------- 1 | virtualenvwrapper-win 2 | --- 3 | ## virtualenv配置 4 | 安装:pip install virtualenv 5 | 新建virtualEnv:virtualenv --no-site-packages venv 6 | 进入venvShel:E:\PythonWorkspace\ugc\ugc.venv\Scripts\activate 7 | 8 | ## virtualenvwrapper安装 9 | linux:pip install virtualenvwrapper 10 | windows:pip install virtualenvwrapper-win 11 | 12 | ## virtualenvwrapper配置 13 | 安装完毕过后在环境变量里面新建一个WORKON_HOME字段存储虚拟python环境, 14 | WORKON_HOME:E:\PythonWorkspace\venv 15 | 环境变量立即生效:cmd中运行set WORKON_HOME=E:\PythonWorkspace\venv 16 | 17 | ## 常用的一些命令 18 | 命令安装在C:\Python27\Scripts\*.bat 19 | *. 创建虚拟环境:mkvirtualenv VirtualenvName 20 | *. 列出所有虚拟环境:Lsvirtualenv 21 | *. 移除虚拟环境:rmvirtualenv VirtualenvName 22 | *. 切换到VirtualenvName环境:workon VirtualenvName 23 | *. 退出当前虚拟环境:deactivate 24 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/map/tian/APIService.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from util.http.UniversalSDK import APIClient 4 | 5 | 6 | class TianMapAPIService(object): 7 | 8 | def __init__(self): 9 | self.tiandituClient = APIClient("http://map.tianditu.com") 10 | 11 | # 天地图地理编码 12 | # 地址:http://map.tianditu.com/query.shtml 13 | # 类型:post 14 | def tdtGeocoding(self,address): 15 | # json格式 16 | postStr = "{\"keyWord\":\"address\",\"level\":\"12\",\"mapBound\":\"118.61107,31.90788,118.93449,32.18735\",\"queryType\":\"1\",\"count\" :\"20\",\"start\":\"0\",\"queryTerminal\":\"10000\"}" 17 | postStr = postStr.replace("address",address) 18 | data=self.tiandituClient.query.addtrail(".shtml").post(postStr=postStr,type="query") 19 | return data -------------------------------------------------------------------------------- /ugc.hotel.web.esri/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 22 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/hotel/XieChengAPIClient.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'LiuYang' 3 | 4 | from util.http.UniversalSDK import APIClient 5 | import datetime 6 | 7 | 8 | class XieChengAPIClient(object): 9 | 10 | def __init__(self): 11 | self.client = APIClient("http://hotels.ctrip.com") 12 | 13 | ''' 14 | 获取携程酒店列表数据 15 | ''' 16 | def get_hotel_list(self, page, cityId, cityName, checkIn=None, checkOut=None): 17 | if checkIn is None: 18 | tomorrow = datetime.datetime.now() + datetime.timedelta(days=1) 19 | after_tomorrow = tomorrow + datetime.timedelta(days=1) 20 | checkIn = tomorrow.strftime('%Y-%m-%d') 21 | checkOut = after_tomorrow.strftime('%Y-%m-%d') 22 | header = {"Content-Type":"application/x-www-form-urlencoded"} 23 | hotel_list = self.client.Domestic.Tool.AjaxHotelList.addtrail(".aspx").addheader(header)\ 24 | .post(checkIn=checkIn, checkOut=checkOut, page=page, cityId=cityId, cityName=cityName) 25 | return hotel_list -------------------------------------------------------------------------------- /ugc.hotel.web.esri/js/common.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 获取页面参数 3 | */ 4 | function GetArgsFromHref(sHref, sArgName) { 5 | var args = sHref.split("?"); 6 | var retval = ""; 7 | 8 | if (args[0] == sHref) /*参数为空*/ { 9 | return retval; /*无需做任何处理*/ 10 | } 11 | var str = args[1]; 12 | args = str.split("&"); 13 | for (var i = 0; i < args.length; i++) { 14 | str = args[i]; 15 | var arg = str.split("="); 16 | if (arg.length <= 1) continue; 17 | if (arg[0] == sArgName) retval = arg[1]; 18 | } 19 | return retval; 20 | } 21 | 22 | /** 23 | * 复制对象 24 | */ 25 | var deepClone = function (obj) { 26 | var str, newobj = obj.constructor === Array ? [] : {}; 27 | if (typeof obj !== 'object') { 28 | return; 29 | } else if (window.JSON) { 30 | str = JSON.stringify(obj), //系列化对象 31 | newobj = JSON.parse(str); //还原 32 | } else { 33 | for (var i in obj) { 34 | newobj[i] = typeof obj[i] === 'object' ? 35 | cloneObj(obj[i]) : obj[i]; 36 | } 37 | } 38 | return newobj; 39 | }; -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/docs/index.md: -------------------------------------------------------------------------------- 1 | # pybuilder入门项目 2 | 3 | [pybuilder官方文档](http://pybuilder.github.io/documentation/tutorial.html) 4 | 5 | ## pybuilder.helloworld 6 | 7 | * `mkdocs new [dir-name]` - Create a new project. 8 | * `mkdocs serve` - Start the live-reloading docs server. 9 | * `mkdocs build` - Build the documentation site. 10 | * `mkdocs help` - Print this help message. 11 | 12 | ## Project layout 13 | 14 | mkdocs.yml # The configuration file. 15 | docs/ 16 | index.md # The documentation homepage. 17 | 18 | 19 | ## requirements 20 | logging 21 | gevent 22 | MySQLDB 23 | weibo 24 | selenium 25 | scrapy(依赖lxml) 26 | 27 | 28 | ## cmd运行配置 29 | 新增workspace.path文件到virtualenv目录(E:\PythonWorkspace\ugc\ugc_venv\Lib\site-packages) 30 | ``` 31 | E:\PythonWorkspace\ugc\ugc.aggregator 32 | E:\PythonWorkspace\ugc\ugc.aggregator\src\main\python 33 | ``` 34 | 注意path文件中的模块目录必须有__init__.py文件 35 | ## 进入virtualEnv 36 | E:\PythonWorkspace\ugc\ugc_venv\Scripts\activate 37 | ## 执行程序 38 | python E:\PythonWorkspace\ugc\ugc.aggregator\src\main\scripts\GeocodingService.py 39 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/logging.ini: -------------------------------------------------------------------------------- 1 | #Configuration for log output 2 | #Naiveloafer 3 | #2012-06-04 4 | 5 | [loggers] 6 | keys=root,ugc 7 | 8 | [handlers] 9 | keys=consoleHandler,fileHandler,rotatingFileHandler 10 | 11 | [formatters] 12 | keys=simpleFmt 13 | 14 | [logger_root] 15 | level=DEBUG 16 | handlers=rotatingFileHandler,consoleHandler 17 | #handlers=fileHandler 18 | #handlers=rotatingFileHandler 19 | 20 | [logger_ugc] 21 | level=DEBUG 22 | handlers=rotatingFileHandler,consoleHandler 23 | qualname=ugc 24 | propagate=0 25 | 26 | [handler_consoleHandler] 27 | class=StreamHandler 28 | level=DEBUG 29 | formatter=simpleFmt 30 | args=(sys.stdout,) 31 | 32 | [handler_fileHandler] 33 | class=FileHandler 34 | level=DEBUG 35 | formatter=simpleFmt 36 | args=("c:/log/ugc/run.log", "a") 37 | 38 | [handler_rotatingFileHandler] 39 | class=handlers.RotatingFileHandler 40 | level=DEBUG 41 | formatter=simpleFmt 42 | args=("c:/log/ugc/run.log", "a", 20*1024*1024, 10) 43 | 44 | 45 | [formatter_simpleFmt] 46 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s - [%(filename)s:%(lineno)s] 47 | datefmt= -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/HotelGeocoding.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | import traceback 5 | 6 | from dao.hotel.TuniuDao import TuniuDAO 7 | from setting import local_hotel_setting 8 | from service.map.baidu.APIService import BaiduMapAPIService 9 | 10 | # 配置数据库 11 | dao_setting = local_hotel_setting 12 | 13 | dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 14 | 15 | map_service = BaiduMapAPIService("MviPFAcx5I6f1FkRQlq6iTxc") 16 | 17 | hotellist = dao.get_hotelinfo() 18 | 19 | # 酒店地理编码容器 20 | hotel_location = [] 21 | 22 | # 遍历酒店信息,取出酒店名称进行地理编码 23 | for i in range(0, len(hotellist)): 24 | geocoding_info = map_service.doGeocoding(hotellist[i][1]) 25 | try: 26 | geocoding_info = {"hotel_name":hotellist[i][1], "x":geocoding_info["result"]["location"]["lng"], "y":geocoding_info["result"]["location"]["lat"]} 27 | except: 28 | traceback.print_exc() 29 | continue 30 | hotel_location.append(geocoding_info) 31 | print "%d done"%i 32 | 33 | print len(hotel_location) 34 | # 保存到数据库中 35 | dao.save_hotels_location(hotel_location) -------------------------------------------------------------------------------- /ugc.hotel.web.esri/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 记忆的残骸 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/hotel/TuniuAPIClient.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | from util.http.UniversalSDK import APIClient 5 | import datetime 6 | 7 | 8 | class TuniuAPIClient(object): 9 | 10 | def __init__(self): 11 | self.client = APIClient("http://hotel.tuniu.com") 12 | 13 | def get_hotel_list(self, page, cityCode, checkIn=None, checkOut=None): 14 | if checkIn is None: 15 | tomorrow = datetime.datetime.now() + datetime.timedelta(days=1) 16 | after_tomorrow = tomorrow + datetime.timedelta(days=1) 17 | checkIn = tomorrow.strftime('%Y-%m-%d') 18 | checkOut = after_tomorrow.strftime('%Y-%m-%d') 19 | query_param = { 20 | "r":"/hotel/ajax/list", 21 | "search[cityCode]":cityCode, 22 | "search[checkInDate]":checkIn, 23 | "search[checkOutDate]":checkOut, 24 | "sort[first][id]":"recommend", 25 | "sort[third]":"cash-back-after", 26 | "page":page, 27 | "returnFilter":0 28 | } 29 | hotel_list = self.client.yii.addtrail(".php").get_by_dict(query_param) 30 | return hotel_list -------------------------------------------------------------------------------- /ugc.aggregator.esri/requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.7 2 | Babel==2.2.0 3 | backports-abc==0.4 4 | backports.ssl-match-hostname==3.5.0.1 5 | certifi==2015.11.20.1 6 | cffi==1.5.0 7 | characteristic==14.3.0 8 | click==6.2 9 | colorama==0.3.6 10 | coverage==4.0.3 11 | cryptography==1.2.1 12 | cssselect==0.9.1 13 | docutils==0.12 14 | enum34==1.1.2 15 | flake8==2.5.1 16 | funcsigs==0.4 17 | gevent==1.0.2 18 | greenlet==0.4.9 19 | idna==2.0 20 | ipaddress==1.0.16 21 | Jinja2==2.8 22 | livereload==2.4.0 23 | logging==0.4.9.6 24 | lxml==3.5.0 25 | Markdown==2.6.5 26 | MarkupSafe==0.23 27 | mccabe==0.3.1 28 | mkdocs==0.14.0 29 | mock==1.3.0 30 | mockito==0.5.2 31 | mysql-connector-python==2.1.3 32 | MySQL-python==1.2.3 33 | pbr==1.8.1 34 | pep8==1.7.0 35 | pyasn1==0.1.9 36 | pyasn1-modules==0.0.8 37 | PyBuilder==0.11.4 38 | pycparser==2.14 39 | pydash==3.4.1 40 | pyflakes==1.0.0 41 | Pygments==2.1 42 | pyOpenSSL==0.15.1 43 | pytz==2015.7 44 | PyYAML==3.11 45 | queuelib==1.4.2 46 | Scrapy==1.0.4 47 | selenium==2.49.0 48 | service-identity==14.0.0 49 | sinaweibopy==1.1.4 50 | singledispatch==3.4.0.3 51 | six==1.10.0 52 | snowballstemmer==1.2.1 53 | Sphinx==1.3.4 54 | sphinx-rtd-theme==0.1.9 55 | tblib==1.2.0 56 | tornado==4.3 57 | Twisted==15.5.0 58 | unittest-xml-reporting==1.13.0 59 | verify==1.1.0 60 | w3lib==1.13.0 61 | wheel==0.24.0 62 | zope.interface==4.1.3 63 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/nlp/keywords.txt: -------------------------------------------------------------------------------- 1 | 房间 2 | 酒店 3 | 设施 4 | 早餐 5 | 环境 6 | 前台 7 | 交通 8 | 价格 9 | 空调 10 | 服务员 11 | 床 12 | 隔音 13 | 卫生间 14 | 总体 15 | 地方 16 | 性价比 17 | 位置 18 | 地铁 19 | 态度 20 | 夫子庙 21 | 味道 22 | 窗户 23 | 电视 24 | 地铁站 25 | 餐厅 26 | 热水 27 | 整体 28 | 客房 29 | 宾馆 30 | 声音 31 | 被子 32 | 电梯 33 | 大堂 34 | 浴室 35 | 市中心 36 | 火车站 37 | 空间 38 | 购物 39 | 品种 40 | 公交 41 | 对面 42 | 床单 43 | 饭店 44 | 步行 45 | 老板 46 | 用品 47 | 网络 48 | 硬件 49 | 电话 50 | 时间 51 | 门 52 | 枕头 53 | 浴缸 54 | 淋浴 55 | 厕所 56 | 商务 57 | 地段 58 | 餐饮 59 | 网速 60 | 大床 61 | 景点 62 | 电脑 63 | 停车场 64 | 面积 65 | 噪音 66 | 马路 67 | 地毯 68 | 工作人员 69 | 楼层 70 | 马桶 71 | 机场 72 | 大厅 73 | 住宿 74 | 拖鞋 75 | 电视机 76 | 条件 77 | 办理 78 | 走廊 79 | 小时 80 | 特色 81 | 质量 82 | 速度 83 | 价位 84 | 小吃 85 | 早饭 86 | 吹风机 87 | 洗手间 88 | 浴巾 89 | 风格 90 | 标间 91 | 布置 92 | 快捷酒店 93 | 地铁口 94 | 公交车 95 | 商场 96 | 行李 97 | 通风 98 | 信号 99 | 体验 100 | 情况 101 | 床房 102 | 灯光 103 | 无线 104 | 菜 105 | 套房 106 | 个人 107 | 客服 108 | 市区 109 | 床垫 110 | 冰箱 111 | 广场 112 | 景区 113 | 总台 114 | 特价 115 | 公寓 116 | 出租车 117 | 晚餐 118 | 家庭 119 | 口味 120 | 地点 121 | 阳台 122 | 种类 123 | 价钱 124 | 行政 125 | 细节 126 | 印象 127 | 房价 128 | 经济 129 | 无线网 130 | 窗帘 131 | 空气 132 | 房卡 133 | 地板 134 | 家具 135 | 气味 136 | 员工 137 | 宽带 138 | 评价 139 | 卫生条件 140 | 收费 141 | 风景 142 | 经理 143 | 玻璃 144 | 桌子 145 | 办事 146 | 样子 147 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Map/Geocoding.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | import traceback 5 | import csv 6 | import re 7 | 8 | from util.io.CSVFileUtil import CSVFileUtil 9 | from service.map.baidu.APIService import BaiduMapAPIService 10 | 11 | input_file = r'C:\Users\kaipeng\Desktop\rent.csv' 12 | output_file = r"C:\Users\kaipeng\Desktop\rent_geocode.csv" 13 | have_title = True 14 | handle_row_index = 2 15 | 16 | def handle_text(text): 17 | return "广州市".decode("utf-8").encode("gbk")+re.sub("[\[\]]","",text) 18 | 19 | if __name__=="__main__": 20 | csv_file_util = CSVFileUtil() 21 | map_service = BaiduMapAPIService("WBw4kIepZzGp4kH5Gn3r0ACy") 22 | writer = csv.writer(file(output_file, "wb")) 23 | count = 0 24 | for line in csv_file_util.reader(file(input_file)): 25 | count += 1 26 | if have_title and count==1: 27 | continue 28 | geocoding_info = map_service.doGeocoding(handle_text(line[handle_row_index])) 29 | try: 30 | coord = str(geocoding_info["result"]["location"]["lng"])+','+str(geocoding_info["result"]["location"]["lat"]) 31 | line[3] = coord 32 | print "Success:count:%d"%count 33 | except: 34 | traceback.print_exc() 35 | print "Error:count:%d"%count 36 | continue 37 | finally: 38 | writer.writerow(line) 39 | 40 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | Buildout 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/io/FileUtil.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'geosmart' 3 | import sys, os 4 | import pickle 5 | import io 6 | 7 | class FileUtil(object): 8 | def __init__(self): 9 | pass 10 | 11 | """ 12 | 将python对象写入文件 13 | """ 14 | def writeObjToFile(self,fileName,obj): 15 | with open(fileName, 'wb') as f: 16 | pickle.dump(obj, f) 17 | 18 | """ 19 | 从文件读取python对象 20 | """ 21 | def readFileToObj(self,fileName): 22 | if os.path.exists(fileName): 23 | with open(fileName, 'rb') as f: 24 | obj = pickle.load(f) 25 | return obj 26 | 27 | """ 28 | 删除文件 29 | """ 30 | def deleteFile(self,fileName): 31 | if os.path.exists(fileName): 32 | os.remove(fileName) 33 | """ 34 | 获取脚本文件的当前路径 35 | """ 36 | 37 | def cur_file_dir(self): 38 | # 获取脚本路径 39 | path = sys.path[0] 40 | # 判断为脚本文件还是py2exe编译后的文件,如果是脚本文件,则返回的是脚本的目录,如果是py2exe编译后的文件,则返回的是编译后的文件路径 41 | if os.path.isdir(path): 42 | return path 43 | elif os.path.isfile(path): 44 | return os.path.dirname(path) 45 | 46 | """ 47 | 获取logging配置文件的路径 48 | """ 49 | 50 | def getLogConfigPath(self, rootFolder="ugc.aggregator"): 51 | logPath = self.cur_file_dir().split(rootFolder, 1)[0] + rootFolder + "/src/main/scripts/logging.ini" 52 | # print logPath 53 | return logPath 54 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/license/highlight.js/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006, Ivan Sagalaev 2 | All rights reserved. 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of highlight.js nor the names of its contributors 12 | may be used to endorse or promote products derived from this software 13 | without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/mkdocs/search_index.json: -------------------------------------------------------------------------------- 1 | { 2 | "docs": [ 3 | { 4 | "location": "/", 5 | "text": "Welcome to MkDocs\n\n\nFor full documentation visit \nmkdocs.org\n.\n\n\nCommands\n\n\n\n\nmkdocs new [dir-name]\n - Create a new project.\n\n\nmkdocs serve\n - Start the live-reloading docs server.\n\n\nmkdocs build\n - Build the documentation site.\n\n\nmkdocs help\n - Print this help message.\n\n\n\n\nProject layout\n\n\nmkdocs.yml # The configuration file.\ndocs/\n index.md # The documentation homepage.\n ... # Other markdown pages, images and other files.", 6 | "title": "Home" 7 | }, 8 | { 9 | "location": "/#welcome-to-mkdocs", 10 | "text": "For full documentation visit mkdocs.org .", 11 | "title": "Welcome to MkDocs" 12 | }, 13 | { 14 | "location": "/#commands", 15 | "text": "mkdocs new [dir-name] - Create a new project. mkdocs serve - Start the live-reloading docs server. mkdocs build - Build the documentation site. mkdocs help - Print this help message.", 16 | "title": "Commands" 17 | }, 18 | { 19 | "location": "/#project-layout", 20 | "text": "mkdocs.yml # The configuration file.\ndocs/\n index.md # The documentation homepage.\n ... # Other markdown pages, images and other files.", 21 | "title": "Project layout" 22 | }, 23 | { 24 | "location": "/about/", 25 | "text": "", 26 | "title": "About" 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /ugc.hotel.web.esri/css/loading.css: -------------------------------------------------------------------------------- 1 | .loader { 2 | width: 150px; 3 | margin: 50px auto 70px; 4 | position: relative; 5 | } 6 | .loader .loading-1 { 7 | position: relative; 8 | width: 100%; 9 | height: 10px; 10 | border: 1px solid #93B8EB; 11 | border-radius: 10px; 12 | animation: turn 4s linear 1.75s infinite; 13 | } 14 | .loader .loading-1:before { 15 | content: ""; 16 | display: block; 17 | position: absolute; 18 | width: 0%; 19 | height: 100%; 20 | background: #1ABC9C; 21 | box-shadow: 10px 0px 15px 0px #69d2e7; 22 | animation: load 2s linear infinite; 23 | } 24 | .loader .loading-2 { 25 | width: 100%; 26 | position: absolute; 27 | top: 10px; 28 | color: #FFAD00; 29 | font-size: 22px; 30 | text-align: center; 31 | animation: bounce 2s linear infinite; 32 | } 33 | @keyframes load { 34 | 0% { 35 | width: 0%; 36 | } 37 | 87.5%, 100% { 38 | width: 100%; 39 | } 40 | } 41 | @keyframes turn { 42 | 0% { 43 | transform: rotateY(0deg); 44 | } 45 | 6.25%, 50% { 46 | transform: rotateY(180deg); 47 | } 48 | 56.25%, 100% { 49 | transform: rotateY(360deg); 50 | } 51 | } 52 | @keyframes bounce { 53 | 0%,100% { 54 | top: 10px; 55 | } 56 | 12.5% { 57 | top: 30px; 58 | } 59 | } 60 | .htmleaf-container{ 61 | margin: 0 auto; 62 | } 63 | .container{width:1170px} 64 | .row{margin-left:-15px;margin-right:-15px} 65 | .col-md-12{width:100%;z-index:3;position:absolute} -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/weibo/APIService.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'lizhen' 3 | 4 | from weibo import APIClient 5 | 6 | class WeiboAPIService(object): 7 | 8 | def __init__(self,appKey="1268278335",appSecret = "204dfdc6e50ea33fe282445f4f0a3b0e",token = "2.005jCfXFLIZp4Bd42d17a3dbC3fmaB"): 9 | self.appKey = appKey 10 | self.appSecret = appSecret 11 | self.token = token 12 | self.client = APIClient(self.appKey,self.appSecret, redirect_uri='') 13 | self.client.set_access_token(self.token,0) 14 | 15 | # 获取用户信息 16 | # 接口详情参考:http://open.weibo.com/wiki/2/users/show 17 | def getUserInfo(self,screen_name=None,uid=None): 18 | if screen_name is not None: 19 | data = self.client.users.show.get(screen_name = screen_name) 20 | elif uid is not None: 21 | data = self.client.users.show.get(uid = uid) 22 | else: 23 | raise Exception() 24 | return data 25 | 26 | # 获取某个位置周边的动态 27 | # 接口详情参考:http://open.weibo.com/wiki/2/place/nearby_timeline 28 | def getWeibo_nearbyline(self,lat,lon,starttime,endtime,range=3000,count=50,offset=0): 29 | data = self.client.place.nearby_timeline.get(lat=lat,long=lon,starttime=starttime,endtime=endtime,range=range,count=count,offset=offset) 30 | return data 31 | 32 | def get_weibo_user_timeline(self, uid, count=50): 33 | return self.client.place.user_timeline.get(uid=uid, count=count) 34 | 35 | def get_poi_timeline(self, poiid, count=50, page=1): 36 | return self.client.place.poi_timeline.get(poiid=poiid, count=count, page=page) 37 | 38 | def get_address_to_geo(self, address): 39 | return self.client.location.geo.address_to_geo.get(address=address) -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/common/Decorators.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | 5 | def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): 6 | """Retry calling the decorated function using an exponential backoff. 7 | 8 | http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ 9 | original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry 10 | 11 | :param ExceptionToCheck: the exception to check. may be a tuple of 12 | exceptions to check 13 | :type ExceptionToCheck: Exception or tuple 14 | :param tries: number of times to try (not retry) before giving up 15 | :type tries: int 16 | :param delay: initial delay between retries in seconds 17 | :type delay: int 18 | :param backoff: backoff multiplier e.g. value of 2 will double the delay 19 | each retry 20 | :type backoff: int 21 | :param logger: logger to use. If None, print 22 | :type logger: logging.Logger instance 23 | """ 24 | def deco_retry(f): 25 | 26 | @wraps(f) 27 | def f_retry(*args, **kwargs): 28 | mtries, mdelay = tries, delay 29 | while mtries > 1: 30 | try: 31 | return f(*args, **kwargs) 32 | except ExceptionToCheck, e: 33 | msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) 34 | if logger: 35 | logger.warning(msg) 36 | else: 37 | print msg 38 | time.sleep(mdelay) 39 | mtries -= 1 40 | mdelay *= backoff 41 | return f(*args, **kwargs) 42 | 43 | return f_retry # true decorator 44 | 45 | return deco_retry 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | 56 | # ========================= 57 | # Operating System Files 58 | # ========================= 59 | 60 | # OSX 61 | # ========================= 62 | 63 | .DS_Store 64 | .AppleDouble 65 | .LSOverride 66 | 67 | # Thumbnails 68 | ._* 69 | 70 | # Files that might appear on external disk 71 | .Spotlight-V100 72 | .Trashes 73 | 74 | # Directories potentially created on remote AFP share 75 | .AppleDB 76 | .AppleDesktop 77 | Network Trash Folder 78 | Temporary Items 79 | .apdisk 80 | 81 | # Windows 82 | # ========================= 83 | 84 | # Windows image file caches 85 | Thumbs.db 86 | ehthumbs.db 87 | 88 | # Folder config file 89 | Desktop.ini 90 | 91 | # Recycle Bin used on file shares 92 | $RECYCLE.BIN/ 93 | 94 | # Windows Installer files 95 | *.cab 96 | *.msi 97 | *.msm 98 | *.msp 99 | 100 | # Windows shortcuts 101 | *.lnk 102 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/js/theme.js: -------------------------------------------------------------------------------- 1 | $( document ).ready(function() { 2 | 3 | // Shift nav in mobile when clicking the menu. 4 | $(document).on('click', "[data-toggle='wy-nav-top']", function() { 5 | $("[data-toggle='wy-nav-shift']").toggleClass("shift"); 6 | $("[data-toggle='rst-versions']").toggleClass("shift"); 7 | }); 8 | 9 | // Close menu when you click a link. 10 | $(document).on('click', ".wy-menu-vertical .current ul li a", function() { 11 | $("[data-toggle='wy-nav-shift']").removeClass("shift"); 12 | $("[data-toggle='rst-versions']").toggleClass("shift"); 13 | }); 14 | 15 | $(document).on('click', "[data-toggle='rst-current-version']", function() { 16 | $("[data-toggle='rst-versions']").toggleClass("shift-up"); 17 | }); 18 | 19 | // Make tables responsive 20 | $("table.docutils:not(.field-list)").wrap("
"); 21 | 22 | hljs.initHighlightingOnLoad(); 23 | 24 | $('table').addClass('docutils'); 25 | }); 26 | 27 | window.SphinxRtdTheme = (function (jquery) { 28 | var stickyNav = (function () { 29 | var navBar, 30 | win, 31 | stickyNavCssClass = 'stickynav', 32 | applyStickNav = function () { 33 | if (navBar.height() <= win.height()) { 34 | navBar.addClass(stickyNavCssClass); 35 | } else { 36 | navBar.removeClass(stickyNavCssClass); 37 | } 38 | }, 39 | enable = function () { 40 | applyStickNav(); 41 | win.on('resize', applyStickNav); 42 | }, 43 | init = function () { 44 | navBar = jquery('nav.wy-nav-side:first'); 45 | win = jquery(window); 46 | }; 47 | jquery(init); 48 | return { 49 | enable : enable 50 | }; 51 | }()); 52 | return { 53 | StickyNav : stickyNav 54 | }; 55 | }($)); 56 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/nlp/Sentiment.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | 4 | import os 5 | import codecs 6 | from thulac import thulac 7 | 8 | from service.nlp.Bayes import Bayes 9 | 10 | 11 | data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 12 | 'sentiment.marshal') 13 | 14 | class Sentiment(): 15 | 16 | def __init__(self): 17 | self.classifier = Bayes() 18 | self.thu = thulac("-seg_only") 19 | train_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'sentiment.marshal') 20 | self.load(train_file) 21 | 22 | ''' 23 | 保存训练结果 24 | ''' 25 | def save(self, fname, iszip=True): 26 | self.classifier.save(fname, iszip) 27 | 28 | ''' 29 | 加载训练结果 30 | ''' 31 | def load(self, fname=data_path, iszip=True): 32 | self.classifier.load(fname, iszip) 33 | 34 | ''' 35 | 分词并过滤停止词 36 | ''' 37 | def handle(self, doc): 38 | words = self.thu.cut(doc) 39 | words = filter_stop(words) 40 | return words 41 | 42 | ''' 43 | 语料训练 44 | 对输入正负语料进行训练,统计词频 45 | ''' 46 | def train(self, neg_docs, pos_docs): 47 | data = [] 48 | for sent in neg_docs: 49 | data.append([self.handle(sent), 'neg']) 50 | for sent in pos_docs: 51 | data.append([self.handle(sent), 'pos']) 52 | self.classifier.train(data) 53 | 54 | ''' 55 | 分类 56 | 将输入的文本进行使用NB分类,通过拉布拉斯平滑得到归一化结果 57 | ''' 58 | def classify(self, sent): 59 | ret, prob = self.classifier.classify(self.handle(sent)) 60 | if ret == 'pos': 61 | return prob 62 | return 1-prob 63 | 64 | stop_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 65 | 'stopwords.txt') 66 | stop = set() 67 | fr = codecs.open(stop_path, 'r', 'utf-8') 68 | for word in fr: 69 | stop.add(word.encode("utf-8").strip()) 70 | fr.close() 71 | 72 | def filter_stop(words): 73 | return list(filter(lambda x: x not in stop, words)) -------------------------------------------------------------------------------- /ugc.hotel.web.esri/js/setting.js: -------------------------------------------------------------------------------- 1 | //var domain = "http://192.168.1.123:5000"; 2 | var domain = "http://localhost:5000"; 3 | //评论类型数获取 4 | var getCommTypeNum = "/ugc.hotel/rest/v100/hotel/get/type_score/statics"; 5 | var getViewpoint = "/ugc.hotel/rest/v100/hotel/get/viewpoint"; 6 | var getAdjective = "/ugc.hotel/rest/v100/hotel/get/adjective"; 7 | var getComments = "/ugc.hotel/rest/v100/hotel/get/comments"; 8 | var getWeiboCome = "/ugc.hotel/rest/v100/weibo/get/nearby_timeline/statics"; 9 | var getArroudFacility = "/ugc.hotel/rest/v100/map/get/aroundfacilities"; 10 | var getMaxDistance = "/ugc.hotel/rest/v100/map/get/maxdistance"; 11 | var getBedpraise = "/ugc.hotel/rest/v100/map/get/hotelbedinfo"; 12 | var getWeiboTrace = "/ugc.hotel/rest/v100/weibo/get/user_trace"; 13 | var getroomnum = "/ugc.hotel/rest/v100/map/get/hotelroomnum"; 14 | var getBaseinfoUrl = "/ugc.hotel/rest/v100/hotel/get/baseinfo"; 15 | var getHotelTrace = "/ugc.hotel/rest/v100/hotel/get/user_trace"; 16 | var getViewpointTuniu = "/ugc.hotel/rest/v100/hotel/get/tuniu/viewpoint"; 17 | var checkUserUrl = "/ugc.hotel/rest/v100/hotel/get/check_user"; 18 | var getFlowToHtml = "/ugc.hotel/rest/v100/hotel/get/html/customer_to"; 19 | 20 | 21 | var serverDomain = "http://localhost:6080"; 22 | var gpUrl = '/arcgis/rest/services/GP/HexagonAnalze/GPServer/HexagonAnalyze'; 23 | var hotelUrl = "/arcgis/rest/services/NJ_Hotel/FeatureServer/1"; 24 | var sightspotUrl = "/arcgis/rest/services/NJ_Hotel/FeatureServer/0"; 25 | var kernelDensityGPUrl = '/arcgis/rest/services/GP/kernelDensityAnalysis/GPServer/kernelDensityAnalysis'; 26 | var customerFlowUrl = "/arcgis/rest/services/Customer_Flow/FeatureServer/0"; 27 | var serviceAreaGPUrl = '/arcgis/rest/services/GP/serviceArea/GPServer/serviceArea'; 28 | 29 | //(推荐)pms酒店房间获取 30 | var getRoominfo = "/ugc.hotel/rest/v100/room/get/room_info"; 31 | var userLogin = "/ugc.hotel/rest/v100/user/login"; 32 | 33 | //(质检)依据楼层号获取酒店楼层各房间的评论状态, 34 | var getRemarkstates = "/ugc.hotel/rest/v100/quality/floorstate"; 35 | var getRoomRemark = "/ugc.hotel/rest/v100/quality/getroomremark"; 36 | var getRemarkByPoints = "/ugc.hotel/rest/v100/quality/getRemarkByPoints"; 37 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/hotel/elong/ElongDao.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'pengshaowei' 3 | 4 | import MySQLdb 5 | from dao.SuperDAO import SuperDAO 6 | 7 | class ElongDAO(SuperDAO): 8 | def __init__(self, host, db, user, password): 9 | SuperDAO.__init__(self, host, db, user, password) 10 | 11 | def getAllUrl(self): 12 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 13 | cursor = db.cursor() 14 | urlList = [] 15 | try: 16 | cursor.execute("select * from baseinfo") 17 | urlList = cursor.fetchall() 18 | except Exception, e: 19 | print e 20 | db.commit() 21 | cursor.close() 22 | db.close() 23 | return urlList 24 | 25 | def saveHotelInfo(self, hotelItem): 26 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 27 | cursor = db.cursor() 28 | try: 29 | placeholders = ', '.join(['%s'] * len(hotelItem)) 30 | columns = ', '.join(hotelItem.keys()) 31 | sql = "insert into elong_hotelinfo( %s ) values ( %s )" % (columns, placeholders) 32 | cursor.execute(sql, hotelItem.values()) 33 | except Exception, e: 34 | print e 35 | db.commit() 36 | cursor.close() 37 | db.close() 38 | 39 | def save_room_info(self, room_list): 40 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 41 | cursor = db.cursor() 42 | try: 43 | for room in room_list: 44 | placeholders = ', '.join(['%s'] * len(room)) 45 | columns = ', '.join(room.keys()) 46 | sql = "insert into elong_roominfo( %s ) values ( %s )" % (columns, placeholders) 47 | cursor.execute(sql, room.values()) 48 | except Exception, e: 49 | print e 50 | db.commit() 51 | cursor.close() 52 | db.close() 53 | 54 | def saveComments(self, commList): 55 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 56 | cursor = db.cursor() 57 | try: 58 | for commItem in commList: 59 | placeholders = ', '.join(['%s'] * len(commItem)) 60 | columns = ', '.join(commItem.keys()) 61 | sql = "insert into remark ( %s ) values ( %s )" % (columns, placeholders) 62 | cursor.execute(sql, commItem.values()) 63 | except Exception, e: 64 | print e 65 | db.commit() 66 | cursor.close() 67 | db.close() -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/SuperDAO.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | import traceback 5 | import MySQLdb 6 | 7 | class SuperDAO(object): 8 | 9 | def __init__(self, host, db, user, password): 10 | self.host = host 11 | self.db = db 12 | self.user = user 13 | self.password = password 14 | 15 | ''' 16 | 保存一条记录 17 | ''' 18 | def save_record(self, table_name, record): 19 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 20 | cursor = db.cursor() 21 | try: 22 | placeholders = ', '.join(['%s'] * len(record)) 23 | columns = ', '.join(record.keys()) 24 | sql = "insert into %s( %s ) values ( %s )" % (table_name, columns, placeholders) 25 | cursor.execute(sql, record.values()) 26 | except Exception, e: 27 | print e 28 | db.commit() 29 | cursor.close() 30 | db.close() 31 | 32 | ''' 33 | 保存多条记录 34 | ''' 35 | def save_records(self, table_name, records): 36 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 37 | cursor = db.cursor() 38 | for record in records: 39 | placeholders = ', '.join(['%s'] * len(record)) 40 | columns = ', '.join(record.keys()) 41 | sql = "insert into %s( %s ) values ( %s )" % (table_name, columns, placeholders) 42 | try: 43 | cursor.execute(sql, record.values()) 44 | except: 45 | print record['senti_value'] 46 | traceback.print_exc() 47 | break 48 | db.commit() 49 | cursor.close() 50 | db.close() 51 | 52 | ''' 53 | 获取多条记录 54 | ''' 55 | def get_records(self, table_name): 56 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 57 | cursor = db.cursor() 58 | records = [] 59 | try: 60 | cursor.execute("select * from %s"%table_name) 61 | records = cursor.fetchall() 62 | except Exception, e: 63 | print e 64 | db.commit() 65 | cursor.close() 66 | db.close() 67 | return records -------------------------------------------------------------------------------- /ugc.hotel.web.esri/html/login.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 酒店分析系统登录界面 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 |
19 |

酒店竞业市场 时空可视化和分析 系统

20 |

基于社会感知

21 |
22 | 对不起,浏览器不支持 23 |
24 |
25 |
26 |
27 |

28 | 29 | 30 |

31 |

32 | 33 | 34 |

35 |

36 | 37 |

38 |
39 |
40 |
41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/css/main.css: -------------------------------------------------------------------------------- 1 | .slideMenu{ 2 | position: absolute; 3 | width: 48px; 4 | height: 100%; 5 | speak: none; 6 | font-style: normal; 7 | font-weight: normal; 8 | font-variant: normal; 9 | } 10 | 11 | .slide-content { 12 | position: absolute; 13 | left: 48px; 14 | width: 401px; 15 | height: 100%; 16 | border-right:1px solid #58D68D; 17 | background: #ECF0F1; 18 | } 19 | 20 | #sentiment-table { 21 | height: 300px; 22 | width: 100%; 23 | } 24 | 25 | #word-cloud { 26 | height: 200px; 27 | width: 100%; 28 | } 29 | 30 | #review-rate { 31 | height: 200px; 32 | width: 100%; 33 | } 34 | 35 | .seamless { 36 | margin: 0px; 37 | border: 0px; 38 | } 39 | 40 | #search-box { 41 | display: block; 42 | position: absolute; 43 | z-index: 2; 44 | top: 30px; 45 | right: 74px; 46 | width: 300px; 47 | } 48 | 49 | 50 | #tag-box { 51 | display: block; 52 | position: absolute; 53 | z-index: 2; 54 | top: 2px; 55 | left: 50px; 56 | display: none; 57 | } 58 | 59 | .comparison-chart { 60 | height: 200px; 61 | width: 100%; 62 | background: #ECF0F1; 63 | 64 | } 65 | 66 | .praisecontral_charts { 67 | height: 200px; 68 | width: 100%; 69 | background: #ECF0F1; 70 | } 71 | 72 | 73 | #comparison_buttons { 74 | position: fixed; 75 | bottom: 0px; 76 | left: 48px; 77 | } 78 | 79 | #parisecontrol_buttons { 80 | position: fixed; 81 | bottom: 0px; 82 | left: 48px; 83 | } 84 | 85 | 86 | #btn_compare_roomnum { 87 | width: 133px; 88 | } 89 | 90 | 91 | #customerMap { 92 | height: 500px; 93 | } 94 | 95 | #genderPie { 96 | height: 300px; 97 | } 98 | 99 | #hexagon_buttons { 100 | position: fixed; 101 | bottom: 0px; 102 | left: 48px; 103 | } 104 | 105 | .btn-two-group { 106 | width: 185px; 107 | } 108 | 109 | .btn-third { 110 | width: 133px; 111 | } 112 | 113 | .btn-fourth { 114 | width: 92px; 115 | } 116 | 117 | .btn-onehalf { 118 | width: 200px; 119 | } 120 | 121 | #sight_buttons { 122 | position: fixed; 123 | bottom: 0px; 124 | left: 48px; 125 | } 126 | 127 | #relatedChart { 128 | height: 400px; 129 | } 130 | 131 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/nlp/HotelNLP.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | 4 | import re 5 | import os 6 | 7 | from service.nlp.Sentiment import Sentiment 8 | 9 | 10 | class HotelNLP(object): 11 | 12 | def __init__(self): 13 | self.sentiment_parser = Sentiment() 14 | keywords_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'keywords.txt') 15 | with open(keywords_file,"r") as f: 16 | self.keywords = [x.strip() for x in f.readlines()] 17 | 18 | def sentiment(self, sent): 19 | return self.sentiment_parser.classify(sent) 20 | 21 | def viewpoint(self, sent, decoding=None): 22 | viewpoint = {} 23 | # 切分句子 24 | subsents = self.subsentence(sent, decoding) 25 | for subsent in subsents: 26 | sentiment_value = None 27 | for keyword in self.keywords: 28 | # 判断关键字是否在句子中出现过 29 | if keyword in subsent: 30 | # 计算子句的情感值 31 | if sentiment_value == None: 32 | sentiment_value = self.sentiment(subsent) 33 | # 得到关键字的情感值 34 | if keyword.decode("utf-8") not in viewpoint: 35 | viewpoint[keyword.decode("utf-8")] = sentiment_value 36 | else: 37 | viewpoint[keyword.decode("utf-8")] = (viewpoint[keyword.decode("utf-8")] + sentiment_value)/2 38 | return viewpoint 39 | 40 | ''' 41 | 传入句子,切分为子句 42 | 默认输入输出格式为unicode 43 | ''' 44 | def subsentence(self, sent, decoding=None): 45 | if decoding != None: 46 | sent = sent.decode(decoding) 47 | line_break = re.compile(u'[\r\n]') 48 | delimiter = re.compile(u'[,。?!;,.?!;]') 49 | sentences = [] 50 | for line in line_break.split(sent): 51 | line = line.strip() 52 | if not line: 53 | continue 54 | for sent in delimiter.split(line): 55 | sent = sent.strip() 56 | if not sent: 57 | continue 58 | if decoding != None: 59 | sentences.append(sent.encode("utf-8")) 60 | else: 61 | sentences.append(sent) 62 | return sentences -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/css/highlight.css: -------------------------------------------------------------------------------- 1 | /* 2 | This is the GitHub theme for highlight.js 3 | 4 | github.com style (c) Vasily Polovnyov 5 | 6 | */ 7 | 8 | .hljs { 9 | display: block; 10 | overflow-x: auto; 11 | padding: 0.5em; 12 | color: #333; 13 | -webkit-text-size-adjust: none; 14 | } 15 | 16 | .hljs-comment, 17 | .diff .hljs-header, 18 | .hljs-javadoc { 19 | color: #998; 20 | font-style: italic; 21 | } 22 | 23 | .hljs-keyword, 24 | .css .rule .hljs-keyword, 25 | .hljs-winutils, 26 | .nginx .hljs-title, 27 | .hljs-subst, 28 | .hljs-request, 29 | .hljs-status { 30 | color: #333; 31 | font-weight: bold; 32 | } 33 | 34 | .hljs-number, 35 | .hljs-hexcolor, 36 | .ruby .hljs-constant { 37 | color: #008080; 38 | } 39 | 40 | .hljs-string, 41 | .hljs-tag .hljs-value, 42 | .hljs-phpdoc, 43 | .hljs-dartdoc, 44 | .tex .hljs-formula { 45 | color: #d14; 46 | } 47 | 48 | .hljs-title, 49 | .hljs-id, 50 | .scss .hljs-preprocessor { 51 | color: #900; 52 | font-weight: bold; 53 | } 54 | 55 | .hljs-list .hljs-keyword, 56 | .hljs-subst { 57 | font-weight: normal; 58 | } 59 | 60 | .hljs-class .hljs-title, 61 | .hljs-type, 62 | .vhdl .hljs-literal, 63 | .tex .hljs-command { 64 | color: #458; 65 | font-weight: bold; 66 | } 67 | 68 | .hljs-tag, 69 | .hljs-tag .hljs-title, 70 | .hljs-rule .hljs-property, 71 | .django .hljs-tag .hljs-keyword { 72 | color: #000080; 73 | font-weight: normal; 74 | } 75 | 76 | .hljs-attribute, 77 | .hljs-variable, 78 | .lisp .hljs-body, 79 | .hljs-name { 80 | color: #008080; 81 | } 82 | 83 | .hljs-regexp { 84 | color: #009926; 85 | } 86 | 87 | .hljs-symbol, 88 | .ruby .hljs-symbol .hljs-string, 89 | .lisp .hljs-keyword, 90 | .clojure .hljs-keyword, 91 | .scheme .hljs-keyword, 92 | .tex .hljs-special, 93 | .hljs-prompt { 94 | color: #990073; 95 | } 96 | 97 | .hljs-built_in { 98 | color: #0086b3; 99 | } 100 | 101 | .hljs-preprocessor, 102 | .hljs-pragma, 103 | .hljs-pi, 104 | .hljs-doctype, 105 | .hljs-shebang, 106 | .hljs-cdata { 107 | color: #999; 108 | font-weight: bold; 109 | } 110 | 111 | .hljs-deletion { 112 | background: #fdd; 113 | } 114 | 115 | .hljs-addition { 116 | background: #dfd; 117 | } 118 | 119 | .diff .hljs-change { 120 | background: #0086b3; 121 | } 122 | 123 | .hljs-chunk { 124 | color: #aaa; 125 | } 126 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/GeocodingServiceMultiProcess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from util.geo.GeoUtil import GeoUtil 4 | 5 | __author__ = 'geosmart' 6 | import copy_reg 7 | import multiprocessing 8 | import types 9 | from multiprocessing import Pool 10 | from time import time 11 | 12 | from service.map.baidu.SnatcherService import BaiduMapSnatcherService 13 | from util.common.CollectionUtil import CollectionUtil 14 | 15 | 16 | def _pickle_method(m): 17 | if m.im_self is None: 18 | return getattr, (m.im_class, m.im_func.func_name) 19 | else: 20 | return getattr, (m.im_self, m.im_func.func_name) 21 | 22 | 23 | copy_reg.pickle(types.MethodType, _pickle_method) 24 | # "bmm9EcjvS4TnnRzoZoYXXcAF", 25 | akList = [] 26 | class GeocodingService(object): 27 | def __init__(self): 28 | print "Constructor ... %s" % multiprocessing.current_process().name 29 | 30 | def __del__(self): 31 | print "... Destructor %s" % multiprocessing.current_process().name 32 | 33 | def fetchAddressNodeByPoints(self, index, points): 34 | print 'current index %s,points %s' % (index, str(len(points))) 35 | snatcherService = BaiduMapSnatcherService(akList[0]) 36 | snatcherService.fetchAddressNode(points) 37 | print 'Process %s done' % index 38 | # TODO multiprocessing多进程实现,此处代码最后一个process不执行fetchAddressNode内的代码 39 | 40 | # multiprocessing多进程并发 41 | def run(self): 42 | bounds = [113.149662, 23.038528, 113.15175, 23.039123] 43 | # bounds = [113.129391, 22.98257, 113.261335, 23.072904] 44 | step = 1 45 | snatcherService = BaiduMapSnatcherService() 46 | # 获取区域内点集 47 | points = GeoUtil().getPointByBounds(bounds, step) 48 | regionSize = len(points) / len(akList) 49 | # 点集合子集 50 | subPoints = CollectionUtil().chunksBySize(points, regionSize) 51 | 52 | processSize = len(subPoints) 53 | pool = Pool(processes=processSize) 54 | results = [] 55 | print 'process size %s ,per process data sizes %s' % (processSize, regionSize) 56 | for index in range(0, processSize, 1): 57 | print 'current process %s ' % index 58 | r = pool.apply_async(self.fetchAddressNodeByPoints, args=(index, subPoints[index])) 59 | results.append(r) 60 | 61 | for r in results: 62 | r.wait() 63 | print 'successful' 64 | 65 | if __name__ == '__main__': 66 | # python E:\PythonWorkspace\sta\UGC_Agrregator\service\map\map\GeocodingService.py 67 | ts = time() 68 | service = GeocodingService() 69 | # service.asynchronous() 70 | service.run() 71 | print 'Took %s' % format(time() - ts) 72 | raw_input() 73 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/nlp/KeywordsHandler.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | 4 | import csv 5 | from snownlp.sentiment import Sentiment 6 | import jieba.posseg as pseg 7 | from thulac import thulac 8 | from snownlp import normal 9 | from dao.hotel.TuniuDao import TuniuDAO 10 | from setting import local_hotel_setting 11 | 12 | dao_setting = local_hotel_setting 13 | 14 | 15 | class KeywordsHandler(object): 16 | 17 | def __init__(self): 18 | self.dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 19 | self.thu = thulac("-input cs.txt") 20 | 21 | def extract_keyword(self): 22 | sents = [] 23 | comm_list = self.dao.get_hotel_comments() 24 | # 从语料中读取每一行并切分成子句 25 | for comm in comm_list: 26 | sents.extend(normal.get_sentences(comm[2])) 27 | print "length of sentences:%d"%len(sents) 28 | # 每个子句进行词性判读 29 | pos_sents = [] 30 | for sent in sents: 31 | pos_sents.append(pseg.cut(sent)) 32 | print "length of pos_sents:%d"%len(pos_sents) 33 | # 分拣出名词,并进行统计 34 | print "counting" 35 | noun_dict = {} 36 | for pos_sent in pos_sents: 37 | for key,type in pos_sent: 38 | if type == "n": 39 | if key not in noun_dict: 40 | noun_dict[key] = 1 41 | else: 42 | noun_dict[key] = noun_dict[key] + 1 43 | a = sorted(noun_dict.iteritems(),key=lambda asd:asd[1],reverse=True) 44 | return a 45 | 46 | def extract_keyword_by_thulac(self): 47 | sents = [] 48 | comm_list = self.dao.get_hotel_comments() 49 | # 从语料中读取每一行并切分成子句 50 | for comm in comm_list: 51 | sents.extend(normal.get_sentences(comm[2])) 52 | print "length of sentences:%d"%len(sents) 53 | # 每个子句进行词性判读 54 | pos_sents = [] 55 | for sent in sents: 56 | try: 57 | pos_sents.append(map(lambda x: x.split("_"), self.thu.cut(sent.encode("utf-8")))) 58 | except: 59 | print sent 60 | continue 61 | print "length of pos_sents:%d"%len(pos_sents) 62 | # 分拣出名词,并进行统计 63 | print "counting" 64 | noun_dict = {} 65 | for pos_sent in pos_sents: 66 | for word in pos_sent: 67 | if word[1] == "n": 68 | if word[0] not in noun_dict: 69 | noun_dict[word[0]] = 1 70 | else: 71 | noun_dict[word[0]] = noun_dict[word[0]] + 1 72 | a = sorted(noun_dict.iteritems(),key=lambda asd:asd[1],reverse=True) 73 | return a 74 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/js/login/globalconfig.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | window.entrypage_signin_btn_click = function () { 3 | var userName = $("#account").val(); 4 | var password = $("#password").val(); 5 | if (userName == null || userName == "" || password == null || password == "") { 6 | alert("账号密码不能为空"); 7 | } else { 8 | $.ajax({ 9 | url: domain + checkUserUrl + "?user_name=" + userName + "&password=" + password, 10 | type: 'get', 11 | async: true, 12 | success: function (json) { 13 | var datajson; 14 | 15 | if (typeof (json) == "object") { 16 | //为对象 17 | datajson = json; 18 | } 19 | else { 20 | //将字符串转换为对象 21 | datajson = JSON.parse(json); 22 | } 23 | if (datajson.status != 0) { 24 | sessionStorage.user = JSON.stringify(datajson.data[0]['user']); 25 | sessionStorage.baseinfo = JSON.stringify(datajson.data[0]['baseinfo']); 26 | sessionStorage.location = JSON.stringify(datajson.data[0]['location']); 27 | window.location.href = "../html/index.html"; 28 | // window.navigate("../../html/index.html"); 29 | console.log(window.location.href); 30 | } else { 31 | $("#confirm-dialog_info").html("登录失败"); 32 | window.location.href = "#confirm-dialog"; 33 | } 34 | }, 35 | error: function (errorMsg) { 36 | $("#confirm-dialog_info").html(errorMsg); 37 | window.location.href = "#confirm-dialog"; 38 | } 39 | }); 40 | } 41 | } 42 | 43 | /** 44 | * 请求location表信息 45 | * @param locationId String locationID 46 | */ 47 | function requestLocation(locationId) { 48 | var locationData = null; 49 | var paramStr = "?location_id=" + locationId; 50 | $.ajax({ 51 | type: "get", 52 | async: false, 53 | url: domain + getLocation + paramStr, 54 | dataType: "json", 55 | timeout: 5000, 56 | success: function (result) { 57 | locationData = result; 58 | }, 59 | error: function (errorMsg) { 60 | console.log(errorMsg); 61 | alert("你输入的值有误,请输入完整参数或者重试"); 62 | } 63 | }); 64 | return locationData; 65 | } 66 | 67 | })(); 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/HotelCatcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | 4 | 5 | from service.hotel.TuniuService import TuniuService 6 | from service.hotel.xiecheng.DriveServices import XiechengDriverService 7 | import time 8 | 9 | 10 | class HotelCatcher(object): 11 | 12 | def __init__(self,service): 13 | self.service = service 14 | pass 15 | 16 | ''' 17 | 抓取酒店链接页 18 | ''' 19 | def startCrawlListPage(self, city): 20 | # 如果爬取成功,则存储数据 21 | self.service.set_city(city) 22 | while 1: 23 | if(self.service.crawlListPage()): 24 | self.service.saveListPageInfo() 25 | # service.closeDriver() 26 | break 27 | else: 28 | self.service.listPageInfo = [] 29 | 30 | ''' 31 | 抓取酒店详情页 32 | ''' 33 | def startCrawlDetail(self, city): 34 | self.service.set_city(city) 35 | listPageInfo = list(self.service.getListPageInfo()) 36 | listPageInfo = listPageInfo[0:] 37 | loop = 0 38 | while len(listPageInfo)>0: 39 | # 从listPageInfo中pop出一个酒店的数据,抓取该酒店的信息 40 | target = listPageInfo.pop() 41 | result = False 42 | while 1: 43 | if loop > 3: 44 | result = False 45 | loop = 0 46 | print "False at guid:%s,url:%s" % (target[0], target[2]) 47 | break 48 | try: 49 | result = self.service.crawlHotelInfo(target) 50 | # 如果爬取结果有误,记录循环,重新爬取 51 | if result == False: 52 | print "Flase %d time"%loop 53 | loop += 1 54 | continue 55 | loop = 0 56 | break 57 | except Exception, e: 58 | loop += 1 59 | print e 60 | time.sleep(10) 61 | continue 62 | if result: 63 | self.service.saveHotelInfo() 64 | 65 | ''' 66 | 关闭爬取服务 67 | ''' 68 | def set_service(self,service): 69 | self.service = service 70 | 71 | ''' 72 | 关闭爬取驱动 73 | ''' 74 | def exit(self): 75 | self.service.closeDriver() 76 | 77 | 78 | if __name__ == "__main__": 79 | hotel_service = TuniuService() 80 | # hotel_service = XiechengDriverService() 81 | hotelCatcher = HotelCatcher(hotel_service) 82 | hotelCatcher.startCrawlListPage("南京") 83 | # 设置爬取的内容 84 | #hotel_service.set_crawl_content(if_crawl_hotel_comment=False,if_crawl_hotel_info=False,if_crawl_hotel_price=True) 85 | # # 开始爬取 86 | #hotelCatcher.startCrawlDetail("南京") 87 | hotelCatcher.exit() 88 | 89 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/css/simple-sidebar.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Start Bootstrap - Simple Sidebar HTML Template (http://startbootstrap.com) 3 | * Code licensed under the Apache License v2.0. 4 | * For details, see http://www.apache.org/licenses/LICENSE-2.0. 5 | */ 6 | 7 | /* Toggle Styles */ 8 | 9 | #wrapper { 10 | padding-left: 0; 11 | -webkit-transition: all 0.5s ease; 12 | -moz-transition: all 0.5s ease; 13 | -o-transition: all 0.5s ease; 14 | transition: all 0.5s ease; 15 | } 16 | 17 | #wrapper.toggled { 18 | padding-left: 450px; 19 | } 20 | 21 | #sidebar-wrapper { 22 | z-index: 1000; 23 | position: fixed; 24 | left: 450px; 25 | width: 0; 26 | height: 100%; 27 | margin-left: -450px; 28 | overflow-y: auto; 29 | background: white; 30 | -webkit-transition: all 0.5s ease; 31 | -moz-transition: all 0.5s ease; 32 | -o-transition: all 0.5s ease; 33 | transition: all 0.5s ease; 34 | } 35 | 36 | #wrapper.toggled #sidebar-wrapper { 37 | width: 450px; 38 | } 39 | 40 | #page-content-wrapper { 41 | width: 100%; 42 | position: absolute; 43 | padding: 15px; 44 | } 45 | 46 | #wrapper.toggled #page-content-wrapper { 47 | position: absolute; 48 | margin-right: -450px; 49 | } 50 | 51 | /* Sidebar Styles */ 52 | 53 | .sidebar-nav { 54 | position: absolute; 55 | top: 0; 56 | width: 450px; 57 | margin: 0; 58 | padding: 0; 59 | list-style: none; 60 | } 61 | 62 | .sidebar-nav li { 63 | text-indent: 20px; 64 | line-height: 40px; 65 | } 66 | 67 | .sidebar-nav li a { 68 | display: block; 69 | text-decoration: none; 70 | color: #999999; 71 | } 72 | 73 | .sidebar-nav li a:hover { 74 | text-decoration: none; 75 | color: #fff; 76 | background: rgba(255,255,255,0.2); 77 | } 78 | 79 | .sidebar-nav li a:active, 80 | .sidebar-nav li a:focus { 81 | text-decoration: none; 82 | } 83 | 84 | .sidebar-nav > .sidebar-brand { 85 | height: 65px; 86 | font-size: 18px; 87 | line-height: 60px; 88 | } 89 | 90 | .sidebar-nav > .sidebar-brand a { 91 | color: #999999; 92 | } 93 | 94 | .sidebar-nav > .sidebar-brand a:hover { 95 | color: #fff; 96 | background: none; 97 | } 98 | 99 | @media(min-width:768px) { 100 | #wrapper { 101 | padding-left: 450px; 102 | } 103 | 104 | #wrapper.toggled { 105 | padding-left: 0; 106 | } 107 | 108 | #sidebar-wrapper { 109 | width: 450px; 110 | } 111 | 112 | #wrapper.toggled #sidebar-wrapper { 113 | width: 0; 114 | } 115 | 116 | #page-content-wrapper { 117 | padding: 20px; 118 | position: relative; 119 | } 120 | 121 | #wrapper.toggled #page-content-wrapper { 122 | position: relative; 123 | margin-right: 0; 124 | } 125 | } -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/MergeComment.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | 5 | import uuid 6 | import re 7 | import traceback 8 | 9 | from dao.hotel.HotelDAO import HotelDAO 10 | from dao.hotel.xiechengdao.xiecheng import xiechengDAO 11 | from dao.hotel.TuniuDao import TuniuDAO 12 | from setting import local_hotel_setting 13 | 14 | # 配置数据库 15 | dao_setting = local_hotel_setting 16 | 17 | hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 18 | tuniu_dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 19 | xiecheng_dao = xiechengDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 20 | 21 | # tuniu_comm = tuniu_dao.get_remarks() 22 | # 23 | # hotel_comm = [] 24 | # i = 0 25 | # for comm in tuniu_comm: 26 | # i+=1 27 | # print i 28 | # baseinfo = hotel_dao.get_baseinfo_by_hotelname(comm[10].encode('utf-8'), '南京') 29 | # for info in baseinfo: 30 | # if info[3].encode('utf-8') == '途牛': 31 | # hotel_comm.append({ 32 | # "guid":uuid.uuid1(), 33 | # "username":comm[1], 34 | # "remark":comm[2], 35 | # "comm_time":comm[3], 36 | # "comm_type":comm[6], 37 | # "user_type":comm[4], 38 | # "senti_value":comm[7], 39 | # "viewpoint":comm[8], 40 | # "word_freq":comm[9], 41 | # "baseinfo_id":info[0], 42 | # }) 43 | # hotel_dao.save_remarks(hotel_comm) 44 | 45 | print '=============Tuniu Done=================' 46 | 47 | xiecheng_comms = xiecheng_dao.get_comments() 48 | print len(xiecheng_comms) 49 | hotel_name = "" 50 | baseinfo_id = "" 51 | hotel_comm = [] 52 | # 遍历评论 53 | i = 0 54 | for comm in xiecheng_comms: 55 | i+=1 56 | print i 57 | # 当酒店名发生改变时,更新baseinfo的id 58 | if comm[0] != hotel_name: 59 | baseinfo_id = "" 60 | hotel_name = comm[0] 61 | baseinfo = hotel_dao.get_baseinfo_by_hotelname(hotel_name.encode('utf-8'), '南京') 62 | for info in baseinfo: 63 | if info[3] == u'携程': 64 | baseinfo_id = info[0] 65 | if baseinfo_id != "": 66 | try: 67 | hotel_comm.append({ 68 | "guid":uuid.uuid1(), 69 | "username":comm[1], 70 | "remark":comm[6], 71 | "intime":re.sub(u"\(本次服务由代理商提供\)",u"",comm[3]), 72 | "comm_score":float(comm[2]) if comm[2]!=u'' else None, 73 | "user_type":comm[4], 74 | "baseinfo_id":baseinfo_id, 75 | "senti_value":comm[7], 76 | "viewpoint":comm[8] 77 | }) 78 | except: 79 | traceback.print_exc() 80 | print comm 81 | print len(hotel_comm) 82 | hotel_dao.save_remarks(hotel_comm) 83 | 84 | print '=============XieCheng Done=================' -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/mkdocs/js/search.js: -------------------------------------------------------------------------------- 1 | require([ 2 | base_url + '/mkdocs/js/mustache.min.js', 3 | base_url + '/mkdocs/js/lunr-0.5.7.min.js', 4 | 'text!search-results-template.mustache', 5 | 'text!../search_index.json', 6 | ], function (Mustache, lunr, results_template, data) { 7 | "use strict"; 8 | 9 | function getSearchTerm() 10 | { 11 | var sPageURL = window.location.search.substring(1); 12 | var sURLVariables = sPageURL.split('&'); 13 | for (var i = 0; i < sURLVariables.length; i++) 14 | { 15 | var sParameterName = sURLVariables[i].split('='); 16 | if (sParameterName[0] == 'q') 17 | { 18 | return decodeURIComponent(sParameterName[1].replace(/\+/g, '%20')); 19 | } 20 | } 21 | } 22 | 23 | var index = lunr(function () { 24 | this.field('title', {boost: 10}); 25 | this.field('text'); 26 | this.ref('location'); 27 | }); 28 | 29 | data = JSON.parse(data); 30 | var documents = {}; 31 | 32 | for (var i=0; i < data.docs.length; i++){ 33 | var doc = data.docs[i]; 34 | doc.location = base_url + doc.location; 35 | index.add(doc); 36 | documents[doc.location] = doc; 37 | } 38 | 39 | var search = function(){ 40 | 41 | var query = document.getElementById('mkdocs-search-query').value; 42 | var search_results = document.getElementById("mkdocs-search-results"); 43 | while (search_results.firstChild) { 44 | search_results.removeChild(search_results.firstChild); 45 | } 46 | 47 | if(query === ''){ 48 | return; 49 | } 50 | 51 | var results = index.search(query); 52 | 53 | if (results.length > 0){ 54 | for (var i=0; i < results.length; i++){ 55 | var result = results[i]; 56 | doc = documents[result.ref]; 57 | doc.base_url = base_url; 58 | doc.summary = doc.text.substring(0, 200); 59 | var html = Mustache.to_html(results_template, doc); 60 | search_results.insertAdjacentHTML('beforeend', html); 61 | } 62 | } else { 63 | search_results.insertAdjacentHTML('beforeend', "

No results found

"); 64 | } 65 | 66 | if(jQuery){ 67 | /* 68 | * We currently only automatically hide bootstrap models. This 69 | * requires jQuery to work. 70 | */ 71 | jQuery('#mkdocs_search_modal a').click(function(){ 72 | jQuery('#mkdocs_search_modal').modal('hide'); 73 | }) 74 | } 75 | 76 | }; 77 | 78 | var search_input = document.getElementById('mkdocs-search-query'); 79 | 80 | var term = getSearchTerm(); 81 | if (term){ 82 | search_input.value = term; 83 | search(); 84 | } 85 | 86 | search_input.addEventListener("keyup", search); 87 | 88 | }); 89 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/css/theme_extra.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Tweak the overal size to better match RTD. 3 | */ 4 | body { 5 | font-size: 90%; 6 | } 7 | 8 | h3, h4, h5, h6 { 9 | color: #2980b9; 10 | font-weight: 300 11 | } 12 | 13 | /* 14 | * Sphinx doesn't have support for section dividers like we do in 15 | * MkDocs, this styles the section titles in the nav 16 | * 17 | * https://github.com/mkdocs/mkdocs/issues/175 18 | */ 19 | .wy-menu-vertical span { 20 | line-height: 18px; 21 | padding: 0.4045em 1.618em; 22 | display: block; 23 | position: relative; 24 | font-size: 90%; 25 | color: #838383; 26 | } 27 | 28 | .wy-menu-vertical .subnav a { 29 | padding: 0.4045em 2.427em; 30 | } 31 | 32 | /* 33 | * Long navigations run off the bottom of the screen as the nav 34 | * area doesn't scroll. 35 | * 36 | * https://github.com/mkdocs/mkdocs/pull/202 37 | */ 38 | .wy-nav-side { 39 | height: 100%; 40 | overflow-y: auto; 41 | } 42 | 43 | /* 44 | * readthedocs theme hides nav items when the window height is 45 | * too small to contain them. 46 | * 47 | * https://github.com/mkdocs/mkdocs/issues/#348 48 | */ 49 | .wy-menu-vertical ul { 50 | margin-bottom: 2em; 51 | } 52 | 53 | /* 54 | * Fix wrapping in the code highlighting 55 | * 56 | * https://github.com/mkdocs/mkdocs/issues/233 57 | */ 58 | code { 59 | white-space: pre; 60 | } 61 | 62 | /* 63 | * Wrap inline code samples otherwise they shoot of the side and 64 | * can't be read at all. 65 | * 66 | * https://github.com/mkdocs/mkdocs/issues/313 67 | */ 68 | p code { 69 | word-wrap: break-word; 70 | } 71 | 72 | /* 73 | * The CSS classes from highlight.js seem to clash with the 74 | * ReadTheDocs theme causing some code to be incorrectly made 75 | * bold and italic. 76 | * 77 | * https://github.com/mkdocs/mkdocs/issues/411 78 | */ 79 | code.cs, code.c { 80 | font-weight: inherit; 81 | font-style: inherit; 82 | } 83 | 84 | /* 85 | * Fix some issues with the theme and non-highlighted code 86 | * samples. Without and highlighting styles attached the 87 | * formatting is broken. 88 | * 89 | * https://github.com/mkdocs/mkdocs/issues/319 90 | */ 91 | .no-highlight { 92 | display: block; 93 | padding: 0.5em; 94 | color: #333; 95 | } 96 | 97 | 98 | /* 99 | * Additions specific to the search functionality provided by MkDocs 100 | */ 101 | 102 | #mkdocs-search-results article h3 103 | { 104 | margin-top: 23px; 105 | border-top: 1px solid #E1E4E5; 106 | padding-top: 24px; 107 | } 108 | 109 | #mkdocs-search-results article:first-child h3 { 110 | border-top: none; 111 | } 112 | 113 | #mkdocs-search-query{ 114 | width: 100%; 115 | border-radius: 50px; 116 | padding: 6px 12px; 117 | border-color: #D1D4D5; 118 | } 119 | 120 | .wy-menu-vertical li ul { 121 | display: inherit; 122 | } 123 | 124 | .wy-menu-vertical li ul.subnav ul.subnav{ 125 | padding-left: 1em; 126 | } 127 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/html/quality-testing.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 基于社会感知的酒店竞业市场时空可视化分析 10 | 11 | 12 | 13 | 14 | 15 | 16 | 21 | 48 | 49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | 60 | 61 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/nlp/Bayes.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | 4 | 5 | import sys 6 | import gzip 7 | import marshal 8 | from math import log, exp 9 | 10 | class Bayes(object): 11 | 12 | def __init__(self): 13 | self.d = {} 14 | self.total = 0 15 | 16 | def save(self, fname, iszip=True): 17 | d = {} 18 | d['total'] = self.total 19 | d['d'] = {} 20 | for k, v in self.d.items(): 21 | d['d'][k] = v.__dict__ 22 | if sys.version_info[0] == 3: 23 | fname = fname + '.3' 24 | if not iszip: 25 | marshal.dump(d, open(fname, 'wb')) 26 | else: 27 | f = gzip.open(fname, 'wb') 28 | f.write(marshal.dumps(d)) 29 | f.close() 30 | 31 | def load(self, fname, iszip=True): 32 | if sys.version_info[0] == 3: 33 | fname = fname + '.3' 34 | if not iszip: 35 | d = marshal.load(open(fname, 'rb')) 36 | else: 37 | try: 38 | f = gzip.open(fname, 'rb') 39 | d = marshal.loads(f.read()) 40 | except IOError: 41 | f = open(fname, 'rb') 42 | d = marshal.loads(f.read()) 43 | f.close() 44 | self.total = d['total'] 45 | self.d = {} 46 | for k, v in d['d'].items(): 47 | self.d[k] = AddOneProb() 48 | self.d[k].__dict__ = v 49 | 50 | def train(self, data): 51 | for d in data: 52 | c = d[1] 53 | if c not in self.d: 54 | self.d[c] = AddOneProb() 55 | for word in d[0]: 56 | self.d[c].add(word, 1) 57 | self.total = sum(map(lambda x: self.d[x].getsum(), self.d.keys())) 58 | 59 | def classify(self, x): 60 | tmp = {} 61 | for k in self.d: 62 | tmp[k] = log(self.d[k].getsum()) - log(self.total) 63 | for word in x: 64 | tmp[k] += log(self.d[k].freq(word)) 65 | ret, prob = 0, 0 66 | for k in self.d: 67 | now = 0 68 | try: 69 | for otherk in self.d: 70 | now += exp(tmp[otherk]-tmp[k]) 71 | now = 1/now 72 | except OverflowError: 73 | now = 0 74 | if now > prob: 75 | ret, prob = k, now 76 | return (ret, prob) 77 | 78 | 79 | 80 | 81 | class BaseProb(object): 82 | 83 | def __init__(self): 84 | self.d = {} 85 | self.total = 0.0 86 | self.none = 0 87 | 88 | def exists(self, key): 89 | return key in self.d 90 | 91 | def getsum(self): 92 | return self.total 93 | 94 | def get(self, key): 95 | if not self.exists(key): 96 | return False, self.none 97 | return True, self.d[key] 98 | 99 | def freq(self, key): 100 | return float(self.get(key)[1])/self.total 101 | 102 | def samples(self): 103 | return self.d.keys() 104 | 105 | class AddOneProb(BaseProb): 106 | 107 | def __init__(self): 108 | self.d = {} 109 | self.total = 0.0 110 | self.none = 1 111 | 112 | def add(self, key, value): 113 | self.total += value 114 | if not self.exists(key): 115 | self.d[key] = 1 116 | self.total += 1 117 | self.d[key] += value -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/HotelSentimentProcessor.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import re 3 | 4 | __author__ = 'DreamCathcer' 5 | 6 | 7 | import traceback 8 | import json 9 | from thulac import thulac 10 | 11 | from setting import local_hotel_setting 12 | from service.nlp.HotelNLP import HotelNLP 13 | from dao.hotel.TuniuDao import TuniuDAO 14 | from dao.hotel.HotelDAO import HotelDAO 15 | 16 | dao_setting = local_hotel_setting 17 | 18 | 19 | class HotelSentimentProcessor(object): 20 | 21 | def __init__(self): 22 | self.hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 23 | self.dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 24 | 25 | def set_sentiment_and_viewpoint(self): 26 | self.hotelnlp = HotelNLP() 27 | self.thu = thulac("") 28 | comm_list = self.hotel_dao.get_remarks() 29 | print len(comm_list) 30 | sentiment_comm_list = [] 31 | i = 0 32 | for comm in comm_list: 33 | if comm[8] is None or comm[9] is None: 34 | sentiment_value = None 35 | viewpoint = None 36 | remark = re.sub(u"\@",u"",comm[2]) 37 | try: 38 | sentiment_value = self.hotelnlp.sentiment(remark.encode("utf-8")) 39 | sentiment_value = round(sentiment_value*1000)/1000 40 | print sentiment_value 41 | except: 42 | print comm[2] 43 | traceback.print_exc() 44 | try: 45 | viewpoint = self.hotelnlp.viewpoint(remark.encode("utf-8"),decoding="utf-8") 46 | viewpoint = json.dumps(viewpoint, ensure_ascii=False) 47 | except: 48 | print remark 49 | traceback.print_exc() 50 | comm = {"guid":comm[0], "senti_value":sentiment_value, "viewpoint":viewpoint} 51 | sentiment_comm_list.append(comm) 52 | if len(sentiment_comm_list)==10000: 53 | i+=1 54 | print "update %d time"%i 55 | self.hotel_dao.update_remarks(sentiment_comm_list) 56 | sentiment_comm_list = [] 57 | 58 | 59 | def count_word_frq(self): 60 | self.thu = thulac("-input cs.txt") 61 | comm_list = self.hotel_dao.get_remarks() 62 | sentiment_comm_list = [] 63 | i = 0 64 | for comm in comm_list: 65 | a_dict = {} 66 | try: 67 | cut_comm = map(lambda x: x.split("_"), self.thu.cut(comm[2].encode("utf-8"))) 68 | except: 69 | cut_comm = [] 70 | print comm[2] 71 | traceback.print_exc() 72 | for word in cut_comm: 73 | if word[1].decode("utf-8") == "a": 74 | if word[0].decode("utf-8") not in a_dict: 75 | a_dict[word[0].decode("utf-8")] = 1 76 | else: 77 | a_dict[word[0].decode("utf-8")] += 1 78 | comm = {"guid":comm[0], "word_freq":json.dumps(a_dict, ensure_ascii=False)} 79 | sentiment_comm_list.append(comm) 80 | if len(sentiment_comm_list)==10000: 81 | i+=1 82 | print "update %d time"%i 83 | self.hotel_dao.update_hotel_comm_word_freq(sentiment_comm_list) 84 | sentiment_comm_list = [] 85 | 86 | if __name__ == "__main__": 87 | HotelSentimentProcessor().count_word_frq() -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/weibo/WeiboDAO.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __author__ = 'LiuYang' 3 | 4 | import MySQLdb 5 | import uuid 6 | import traceback 7 | 8 | from dao.SuperDAO import SuperDAO 9 | 10 | 11 | class WeiboDAO(SuperDAO): 12 | 13 | def __init__(self, host, db, user, password): 14 | SuperDAO.__init__(self, host, db, user, password) 15 | 16 | # 存储微博id 17 | def saveWeiboID(self,weiboIDSet,userID,pageNum): 18 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 19 | cursor = db.cursor() 20 | for weiboID in weiboIDSet: 21 | cursor.execute("insert into weibo_id(guid,userID,weiboID,pageNum)values(%s,%s,%s,%s)" ,(uuid.uuid1(),userID,weiboID,pageNum)) 22 | db.commit() 23 | cursor.close() 24 | db.close() 25 | 26 | # 存储微博评论 27 | def saveWeiboComment(self,items): 28 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 29 | cursor = db.cursor() 30 | for item in items: 31 | try: 32 | cursor.execute("insert into weibo_comment(guid,userID,weiboID,pageNum,commPeople,commentText,commentTime,crawlTime,likeNum)values(%s,%s,%s,%s,%s,%s,%s,%s,%s)" ,(uuid.uuid1(),item["userID"],item["weiboID"],item["pageNum"],item["commPeople"],item["commentText"],item["commentTime"],item["crawlTime"],item["likeNum"])) 33 | except: 34 | continue 35 | db.commit() 36 | cursor.close() 37 | db.close() 38 | 39 | # 获取api账号数量 40 | def countweiboaccountnumber(self): 41 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 42 | cursor = db.cursor() 43 | cursor.execute("select count(*) from api_account") 44 | data = cursor.fetchone() 45 | cursor.close() 46 | db.close() 47 | return data 48 | 49 | #从mysql中获取微博账号 50 | def get_weibo_accounts(self): 51 | weibo_accounts = None 52 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 53 | cursor = db.cursor() 54 | try: 55 | cursor.execute("select * from api_account") 56 | weibo_accounts = cursor.fetchall() 57 | except: 58 | print traceback.print_exc() 59 | db.commit() 60 | cursor.close() 61 | db.close() 62 | return weibo_accounts 63 | 64 | def saveWeibo_ByAPI(self,weiboid,text,lat,lon,title,userid,location,userdecription,gender,created_at,fax,localcity,formatted): 65 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 66 | cursor = db.cursor() 67 | cursor.execute("insert into weibo_content(weiboid,text,lat,lon,title,userid,location,userdescription,gender,created_at,fax,locality,formatted)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" 68 | ,(weiboid,text,lat,lon,title,userid,location,userdecription,gender,created_at,fax,localcity,formatted)) 69 | db.commit() 70 | cursor.close() 71 | db.close() 72 | 73 | ''' 74 | 获取地址 75 | ''' 76 | def get_location(self, city): 77 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8') 78 | cursor = db.cursor() 79 | location = None 80 | try: 81 | cursor.execute("select * from city_location where city='%s'"%city) 82 | location = cursor.fetchone() 83 | except Exception, e: 84 | print e 85 | db.commit() 86 | cursor.close() 87 | db.close() 88 | return location 89 | 90 | def save_location(self, location): 91 | self.save_record("city_location", location) -------------------------------------------------------------------------------- /ugc.hotel.web.esri/css/messages.css: -------------------------------------------------------------------------------- 1 | 2 | /*--------------------���------------------------*/ 3 | .map-div {background-color:white;width:78%;height:100%;position:fixed;top:53px;left:0;} 4 | nav {z-index:2; background-color:#7A6868;width:59px;height:180px;right:8px;position:absolute;margin:140px 10px;cursor:pointer} 5 | .floor-info{right:25px;margin-top:75px;position:absolute;font-size:30px;font-weight:bold;z-index:2;} 6 | .query-info{right:20px;margin-top:20px;position:absolute;z-index:2;} 7 | .legend-info{z-index:2;position:absolute;bottom:70px;left:10px;padding:10px;border:2px solid #F1C40F;/*border-radius:4px*/} 8 | .button-one {height:60px;background-color:#EBEBEC} 9 | .button-two {height:60px;background-color:#C4C4C7} 10 | .button-three {height:60px;background-color:#D7D7DC} 11 | .button-img {width:22px;height:15px;margin:20px 18px} 12 | 13 | /*--------------------ͼ��------------------------*/ 14 | .legend-red{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#F3859B;margin-bottom:3px;color:#FFF;text-align:center;} 15 | .legend-yellow{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#FCDE89;margin-bottom:3px;color:#FFF;text-align:center} 16 | .legend-green{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#BBFF8B;margin-bottom:3px;color:#FFF;text-align:center} 17 | .legend-gray{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#D7D7DC;margin-bottom:3px;color:#FFF;text-align:center} 18 | 19 | /*--------------------�����б���Ϣ------------------------*/ 20 | .button-danger {background-color:red;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block} 21 | .button-require {background-color:#D7D7DC;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block} 22 | .button-warning {background-color:#ffd800;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block} 23 | .button-default {background-color:#29C1BF;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block} 24 | 25 | /*--------------------��ֵ����------------------------*/ 26 | .sliderdiv{z-index:3;position:absolute; right:50px;bottom:50px;} 27 | 28 | /*--------------------�Ҳ�------------------------*/ 29 | .right {background-color:#4A5A69;width:22%;height:100%;top:53px;left:78%;position:fixed;} 30 | .right-top {background-color:white;width:100%;height:95%;border-left:1px solid #B6CFD3} 31 | .right-top-title{background-color:#B6CFD3;text-align:center;} 32 | .right-mess {overflow:auto;height:90%;} 33 | .right-list-div{border-bottom:1px solid #808080;height:45px ;} 34 | .right-list-div:hover{cursor:pointer;background-color:gray;border-color:#fff;color:white} 35 | .list-roomnum {top:7px;left:25px;position:relative;padding:10px 0px;} 36 | .list-roomnum:hover{cursor:pointer;background-color:gray;border-color:#fff;color:white} 37 | .room-button {background: transparent;border-bottom:1px solid #ffd800;color:#1ABC9C;padding:5px 20px;font-family:'Microsoft YaHei';position:relative;top:5px;display:block} 38 | /*.room-button:hover {cursor:pointer;background-color:white;border-color:#fff;color:#ffd800}*/ 39 | 40 | /*-----------------------�Ҳ��·�---------------------------*/ 41 | .right-bottom {background-color:#C4CDE4;width:100%;height:40%;border-left:2px solid #B6CFD3} 42 | 43 | /*--------------------ģ̬����------------------------*/ 44 | #remarkbody {overflow:auto;height:400px;} 45 | /*--------------------��ͼ------------------------*/ 46 | #floormap {padding: 0;margin: 0;height: 100%;width: 100%;} -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/map/baidu/APIService.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | import logging 5 | 6 | from util.http.UniversalSDK import APIClient 7 | 8 | logger = logging.getLogger('ugc') 9 | 10 | 11 | class BaiduMapAPIService(object): 12 | def __init__(self, ak): 13 | self.baiduClient = APIClient("http://api.map.baidu.com") 14 | self.__ak = ak 15 | 16 | ''' 17 | 正向地理编码geocoding 18 | 文档:http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-geocoding 19 | Rest地址:http://api.map.baidu.com/geocoder/v2/ 20 | 类型:get 21 | ''' 22 | def doGeocoding(self, addressText,city=None): 23 | if city==None: 24 | data = self.baiduClient.geocoder.v2.addtrail("/").get(ak=self.__ak, output="json", address=addressText) 25 | else: 26 | data = self.baiduClient.geocoder.v2.addtrail("/").get(ak=self.__ak, output="json", address=addressText, city=city) 27 | return data 28 | 29 | def reverseGeocodingBatch(self, locationList): 30 | ''' 31 | 逆向地理编码批量处理入口 32 | 地址:http://api.map.baidu.com/geocoder/v2/ 33 | 类型:get 34 | ''' 35 | respList = self.reverseGeocodingBatchHandler(locationList=locationList, respList=[], errorList=[]) 36 | return respList 37 | 38 | def reverseGeocodingBatchHandler(self, locationList, respList, errorList): 39 | ''' 40 | 逆向地理编码批量处理,处理timeout 41 | 地址:http://api.map.baidu.com/geocoder/v2/ 42 | 类型:get 43 | ''' 44 | for i in range(0, len(locationList), 1): 45 | location = locationList[i] 46 | resp = self.reverseGeocoding(location=location) 47 | if resp is not None and resp["status"] == 0: 48 | respList.append(resp) 49 | else: 50 | logging.debug("current token: %s " % self.__ak) 51 | logging.debug( resp) 52 | logging.debug("at point:%s",str(location)) 53 | if len(errorList) > 0: 54 | # http请求异常重新处理 55 | logging.debug("http exception ,rehandle size : " + str(len(errorList))) 56 | self.reverseGeocodingBatchHandler(locationList=errorList, respList=respList, errorList=[]) 57 | return respList 58 | 59 | def reverseGeocoding(self, location, coordtype='bd09ll', output="json", pois='0'): 60 | ''' 61 | 逆向地理编码request 62 | 地址:http://api.map.baidu.com/geocoder/v2/ 63 | 类型:get 64 | coordtype,默认bd09ll,坐标的类型,目前支持的坐标类型包括:bd09ll(百度经纬度坐标)、bd09mc(百度米制坐标)、gcj02ll(国测局经纬度坐标)、wgs84ll( GPS经纬度) 65 | ''' 66 | resp = self.baiduClient.geocoder.v2.addtrail("/").get(ak=self.__ak, output="json",pois=2, location=location) 67 | return resp 68 | 69 | def placeSearchBatch(self, query, bounds, pageNumber="0"): 70 | ''' 71 | Place地名批量查询,处理timeout 72 | 地址:http://api.map.baidu.com/geocoder/v2/ 73 | 类型:get 74 | ''' 75 | resp = self.placeSearch(query=query, bounds=bounds, pageNumber=pageNumber) 76 | if resp is None: 77 | logging.debug("http exception ,rehandle...") 78 | reHandleResp = self.placeSearch(query=query, bounds=bounds, pageNumber=pageNumber) 79 | while reHandleResp is None: 80 | reHandleResp = self.placeSearch(query=query, bounds=bounds, pageNumber=pageNumber) 81 | return reHandleResp 82 | else: 83 | # TODO 为什么返回None 84 | return resp 85 | 86 | # coord_type(坐标类型),1(wgs84ll),2(gcj02ll),3(bd09ll),4(bd09mc) 87 | def placeSearch(self, query, bounds, output="json", pageSize="20", pageNumber="0", coord_type="1", scope=2): 88 | ''' 89 | Place地名 90 | 地址:http://api.map.baidu.com/place/v2/search 91 | 类型:get 92 | ''' 93 | data = self.baiduClient.place.v2.search.get(ak=self.__ak, query=query, bounds=bounds, output=output, 94 | coord_type=coord_type, page_size=pageSize, page_num=pageNumber, 95 | scope=scope) 96 | return data 97 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 59 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/geo/CoordTransor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import math 4 | 5 | x_pi = 3.14159265358979324 * 3000.0 / 180.0 6 | pi = 3.1415926535897932384626 # π 7 | a = 6378245.0 # 长半轴 8 | ee = 0.00669342162296594323 # 扁率 9 | 10 | 11 | 12 | def gcj02tobd09(lng, lat): 13 | """ 14 | 火星坐标系(GCJ-02)转百度坐标系(BD-09) 15 | 谷歌、高德——>百度 16 | :param lng:火星坐标经度 17 | :param lat:火星坐标纬度 18 | :return: 19 | """ 20 | z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi) 21 | theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi) 22 | bd_lng = z * math.cos(theta) + 0.0065 23 | bd_lat = z * math.sin(theta) + 0.006 24 | return [bd_lng, bd_lat] 25 | 26 | 27 | def bd09togcj02(bd_lon, bd_lat): 28 | """ 29 | 百度坐标系(BD-09)转火星坐标系(GCJ-02) 30 | 百度——>谷歌、高德 31 | :param bd_lat:百度坐标纬度 32 | :param bd_lon:百度坐标经度 33 | :return:转换后的坐标列表形式 34 | """ 35 | x = bd_lon - 0.0065 36 | y = bd_lat - 0.006 37 | z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi) 38 | theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi) 39 | gg_lng = z * math.cos(theta) 40 | gg_lat = z * math.sin(theta) 41 | return [gg_lng, gg_lat] 42 | 43 | 44 | def wgs84togcj02(lng, lat): 45 | """ 46 | WGS84转GCJ02(火星坐标系) 47 | :param lng:WGS84坐标系的经度 48 | :param lat:WGS84坐标系的纬度 49 | :return: 50 | """ 51 | if out_of_china(lng, lat): # 判断是否在国内 52 | return lng, lat 53 | dlat = transformlat(lng - 105.0, lat - 35.0) 54 | dlng = transformlng(lng - 105.0, lat - 35.0) 55 | radlat = lat / 180.0 * pi 56 | magic = math.sin(radlat) 57 | magic = 1 - ee * magic * magic 58 | sqrtmagic = math.sqrt(magic) 59 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi) 60 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi) 61 | mglat = lat + dlat 62 | mglng = lng + dlng 63 | return [mglng, mglat] 64 | 65 | 66 | def gcj02towgs84(lng, lat): 67 | """ 68 | GCJ02(火星坐标系)转GPS84 69 | :param lng:火星坐标系的经度 70 | :param lat:火星坐标系纬度 71 | :return: 72 | """ 73 | if out_of_china(lng, lat): 74 | return lng, lat 75 | dlat = transformlat(lng - 105.0, lat - 35.0) 76 | dlng = transformlng(lng - 105.0, lat - 35.0) 77 | radlat = lat / 180.0 * pi 78 | magic = math.sin(radlat) 79 | magic = 1 - ee * magic * magic 80 | sqrtmagic = math.sqrt(magic) 81 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi) 82 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi) 83 | mglat = lat + dlat 84 | mglng = lng + dlng 85 | return [lng * 2 - mglng, lat * 2 - mglat] 86 | 87 | 88 | def transformlat(lng, lat): 89 | ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \ 90 | 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng)) 91 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * 92 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0 93 | ret += (20.0 * math.sin(lat * pi) + 40.0 * 94 | math.sin(lat / 3.0 * pi)) * 2.0 / 3.0 95 | ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 * 96 | math.sin(lat * pi / 30.0)) * 2.0 / 3.0 97 | return ret 98 | 99 | 100 | def transformlng(lng, lat): 101 | ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \ 102 | 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng)) 103 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * 104 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0 105 | ret += (20.0 * math.sin(lng * pi) + 40.0 * 106 | math.sin(lng / 3.0 * pi)) * 2.0 / 3.0 107 | ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 * 108 | math.sin(lng / 30.0 * pi)) * 2.0 / 3.0 109 | return ret 110 | 111 | 112 | def out_of_china(lng, lat): 113 | """ 114 | 判断是否在国内,不在国内不做偏移 115 | :param lng: 116 | :param lat: 117 | :return: 118 | """ 119 | if lng < 72.004 or lng > 137.8347: 120 | return True 121 | if lat < 0.8293 or lat > 55.8271: 122 | return True 123 | return False 124 | 125 | if __name__=="__main__": 126 | print bd09togcj02(bd_lon=119.053739,bd_lat=32.054493) 127 | print gcj02towgs84(lng=118.805048,lat=31.975998) -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | pybuilder.helloworld 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
35 | 36 | 37 | 68 | 69 |
70 | 71 | 72 | 76 | 77 | 78 |
79 |
80 |
81 |
    82 |
  • Docs »
  • 83 | 84 | 85 |
  • 86 | 87 |
  • 88 |
89 |
90 |
91 |
92 |
93 | 94 | 95 |

Search Results

96 | 97 | 101 | 102 |
103 | Sorry, page not found. 104 |
105 | 106 | 107 |
108 |
109 |
110 | 111 | 112 |
113 | 114 |
115 | 116 | 117 |
118 | 119 | Built with MkDocs using a theme provided by Read the Docs. 120 |
121 | 122 |
123 |
124 | 125 |
126 | 127 |
128 | 129 |
130 | 131 | 132 | 133 | 134 | 135 |
136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/css/demo.css: -------------------------------------------------------------------------------- 1 | /* General Demo Style */ 2 | @import url(http://fonts.googleapis.com/css?family=Lato:300,400,700); 3 | 4 | html { height: 100%; } 5 | 6 | body { 7 | font-family: 'Lato', Calibri, Arial, sans-serif; 8 | background: #ddd url(../images/bg.jpg) repeat top left; 9 | font-weight: 300; 10 | font-size: 15px; 11 | color: #333; 12 | -webkit-font-smoothing: antialiased; 13 | overflow-y: scroll; 14 | overflow-x: hidden; 15 | } 16 | 17 | a { 18 | color: #555; 19 | text-decoration: none; 20 | } 21 | 22 | .container { 23 | width: 100%; 24 | position: relative; 25 | } 26 | 27 | .clr { 28 | clear: both; 29 | padding: 0; 30 | height: 0; 31 | margin: 0; 32 | } 33 | 34 | .main { 35 | width: 90%; 36 | margin: 0 auto; 37 | position: relative; 38 | } 39 | 40 | .container > header { 41 | margin: 10px; 42 | padding: 20px 10px 10px 10px; 43 | position: relative; 44 | display: block; 45 | text-shadow: 1px 1px 1px rgba(0,0,0,0.2); 46 | text-align: center; 47 | } 48 | 49 | .container > header h1 { 50 | font-size: 30px; 51 | line-height: 38px; 52 | margin: 0; 53 | position: relative; 54 | font-weight: 300; 55 | color: #666; 56 | text-shadow: 0 1px 1px rgba(255,255,255,0.6); 57 | } 58 | 59 | .container > header h2 { 60 | font-size: 14px; 61 | font-weight: 300; 62 | margin: 0; 63 | padding: 15px 0 5px 0; 64 | color: #666; 65 | font-family: Cambria, Georgia, serif; 66 | font-style: italic; 67 | text-shadow: 0 1px 1px rgba(255,255,255,0.6); 68 | } 69 | 70 | /* Header Style */ 71 | .codrops-top { 72 | line-height: 24px; 73 | font-size: 11px; 74 | background: #fff; 75 | background: rgba(255, 255, 255, 0.8); 76 | text-transform: uppercase; 77 | z-index: 9999; 78 | position: relative; 79 | font-family: Cambria, Georgia, serif; 80 | box-shadow: 1px 0px 2px rgba(0,0,0,0.2); 81 | } 82 | 83 | /* Clearfix hack by Nicolas Gallagher: http://nicolasgallagher.com/micro-clearfix-hack/ */ 84 | 85 | .codrops-top:before, 86 | .codrops-top:after { 87 | content: " "; /* 1 */ 88 | display: table; /* 2 */ 89 | } 90 | 91 | .codrops-top:after { 92 | clear: both 93 | } 94 | 95 | .codrops-top a { 96 | padding: 0px 10px; 97 | letter-spacing: 1px; 98 | color: #333; 99 | display: inline-block; 100 | } 101 | 102 | .codrops-top a:hover { 103 | background: rgba(255,255,255,0.6); 104 | } 105 | 106 | .codrops-top span.right { 107 | float: right; 108 | } 109 | 110 | .codrops-top span.right a { 111 | float: left; 112 | display: block; 113 | } 114 | 115 | /* Demo Buttons Style */ 116 | .codrops-demos { 117 | text-align:center; 118 | display: block; 119 | line-height: 30px; 120 | padding: 5px 0px; 121 | } 122 | 123 | .codrops-demos a { 124 | display: inline-block; 125 | margin: 0px 4px; 126 | padding: 0px 6px; 127 | color: #8c8c8c; 128 | line-height: 20px; 129 | font-size: 12px; 130 | font-weight: 700; 131 | text-shadow: 1px 1px 1px #fff; 132 | border: 1px solid #fff; 133 | background: #ffffff; /* Old browsers */ 134 | background: -moz-linear-gradient(top, #ffffff 0%, #f6f6f6 47%, #ededed 100%); /* FF3.6+ */ 135 | background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#ffffff), color-stop(47%,#f6f6f6), color-stop(100%,#ededed)); /* Chrome,Safari4+ */ 136 | background: -webkit-linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* Chrome10+,Safari5.1+ */ 137 | background: -o-linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* Opera 11.10+ */ 138 | background: -ms-linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* IE10+ */ 139 | background: linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* W3C */ 140 | box-shadow: 0 1px 1px rgba(0, 0, 0, 0.1); 141 | } 142 | 143 | .codrops-demos a:hover { 144 | color: #333; 145 | background: #fff; 146 | } 147 | 148 | .codrops-demos a:active { 149 | background: #fff; 150 | } 151 | 152 | .codrops-demos a.current-demo, 153 | .codrops-demos a.current-demo:hover { 154 | background: #555; 155 | border-color: #555; 156 | color: #ddd; 157 | text-shadow: 0 1px 1px rgba(0,0,0,0.5); 158 | box-shadow: 0 1px 0 rgba(255,255,255,0.2), inset 0 1px 1px rgba(0,0,0,0.5); 159 | } 160 | 161 | .support-note span{ 162 | color: #ac375d; 163 | font-size: 16px; 164 | display: none; 165 | font-weight: bold; 166 | text-align: center; 167 | padding: 5px 0; 168 | } -------------------------------------------------------------------------------- /ugc.hotel.web.esri/js/application.js: -------------------------------------------------------------------------------- 1 | // Some general UI pack related JS 2 | // Extend JS String with repeat method 3 | String.prototype.repeat = function (num) { 4 | return new Array(Math.round(num) + 1).join(this); 5 | }; 6 | 7 | (function ($) { 8 | 9 | // Add segments to a slider 10 | $.fn.addSliderSegments = function () { 11 | return this.each(function () { 12 | var $this = $(this), 13 | option = $this.slider('option'), 14 | amount = (option.max - option.min)/option.step, 15 | orientation = option.orientation; 16 | if ( 'vertical' === orientation ) { 17 | var output = '', i; 18 | console.log(amount); 19 | for (i = 1; i <= amount - 1; i++) { 20 | output += '
'; 21 | } 22 | $this.prepend(output); 23 | } else { 24 | var segmentGap = 100 / (amount) + '%'; 25 | var segment = '
'; 26 | $this.prepend(segment.repeat(amount - 1)); 27 | } 28 | }); 29 | }; 30 | 31 | $(function () { 32 | 33 | // Todo list 34 | $('.todo').on('click', 'li', function () { 35 | $(this).toggleClass('todo-done'); 36 | }); 37 | 38 | // Custom Selects 39 | if ($('[data-toggle="select"]').length) { 40 | $('[data-toggle="select"]').select2(); 41 | } 42 | 43 | // Checkboxes and Radio buttons 44 | $('[data-toggle="checkbox"]').radiocheck(); 45 | $('[data-toggle="radio"]').radiocheck(); 46 | 47 | // Tooltips 48 | $('[data-toggle=tooltip]').tooltip('show'); 49 | 50 | // jQuery UI Sliders 51 | var $slider = $('#slider'); 52 | if ($slider.length > 0) { 53 | $slider.slider({ 54 | min: 0, 55 | max: 10, 56 | values: [1,4], 57 | orientation: 'horizontal', 58 | range: true 59 | }).addSliderSegments($slider.slider('option').max, 'vertical'); 60 | } 61 | 62 | var $verticalSlider = $('#vertical-slider'); 63 | if ($verticalSlider.length) { 64 | $verticalSlider.slider({ 65 | min: 0, 66 | max: 10, 67 | values: [4,5], 68 | orientation: 'horizontal', 69 | range: 'min' 70 | }).addSliderSegments($verticalSlider.slider('option').max, 'vertical'); 71 | } 72 | 73 | 74 | 75 | // Focus state for append/prepend inputs 76 | $('.input-group').on('focus', '.form-control', function () { 77 | $(this).closest('.input-group, .form-group').addClass('focus'); 78 | }).on('blur', '.form-control', function () { 79 | $(this).closest('.input-group, .form-group').removeClass('focus'); 80 | }); 81 | 82 | // Make pagination demo work 83 | $('.pagination').on('click', 'a', function () { 84 | $(this).parent().siblings('li').removeClass('active').end().addClass('active'); 85 | }); 86 | 87 | $('.btn-group').on('click', 'a', function () { 88 | $(this).siblings().removeClass('active').end().addClass('active'); 89 | }); 90 | 91 | // Disable link clicks to prevent page scrolling 92 | $(document).on('click', 'a[href="#fakelink"]', function (e) { 93 | e.preventDefault(); 94 | }); 95 | 96 | // Switches 97 | if ($('[data-toggle="switch"]').length) { 98 | $('[data-toggle="switch"]').bootstrapSwitch(); 99 | } 100 | 101 | // Typeahead 102 | if ($('#typeahead-demo-01').length) { 103 | var states = new Bloodhound({ 104 | datumTokenizer: function (d) { return Bloodhound.tokenizers.whitespace(d.word); }, 105 | queryTokenizer: Bloodhound.tokenizers.whitespace, 106 | limit: 4, 107 | local: [ 108 | { word: 'Alabama' }, 109 | { word: 'Alaska' }, 110 | { word: 'Arizona' }, 111 | { word: 'Arkansas' }, 112 | { word: 'California' }, 113 | { word: 'Colorado' } 114 | ] 115 | }); 116 | 117 | states.initialize(); 118 | 119 | $('#typeahead-demo-01').typeahead(null, { 120 | name: 'states', 121 | displayKey: 'word', 122 | source: states.ttAdapter() 123 | }); 124 | } 125 | 126 | // make code pretty 127 | window.prettyPrint && prettyPrint(); 128 | 129 | }); 130 | 131 | })(jQuery); 132 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/about/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | About - pybuilder.helloworld 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
37 | 38 | 39 | 74 | 75 |
76 | 77 | 78 | 82 | 83 | 84 |
85 |
86 |
87 |
    88 |
  • Docs »
  • 89 | 90 | 91 | 92 |
  • About
  • 93 |
  • 94 | 95 |
  • 96 |
97 |
98 |
99 |
100 |
101 | 102 | 103 | 104 |
105 |
106 |
107 | 108 | 114 | 115 | 116 |
117 | 118 |
119 | 120 | 121 |
122 | 123 | Built with MkDocs using a theme provided by Read the Docs. 124 |
125 | 126 |
127 |
128 | 129 |
130 | 131 |
132 | 133 |
134 | 135 | 136 | 137 | « Previous 138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/hotel/SuperHotelService.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer,LiuYang' 3 | 4 | from selenium import webdriver 5 | from selenium.webdriver.common.keys import Keys 6 | import time 7 | import traceback 8 | 9 | 10 | class HotelService(object): 11 | 12 | def __init__(self): 13 | self.driver = webdriver.Chrome() 14 | 15 | self.if_crawl_hotel_info = True 16 | 17 | self.if_crawl_hotel_comment = True 18 | 19 | self.if_crawl_hotel_price = True 20 | 21 | self._city = None 22 | 23 | ''' 24 | 打开页面 25 | ''' 26 | def openPage(self,url): 27 | self.driver.get(url) 28 | # 将界面最大化 29 | self.driver.maximize_window() 30 | 31 | ''' 32 | 等待加载 33 | ''' 34 | def wait(self,timeout): 35 | self.driver.implicitly_wait(timeout) 36 | 37 | ''' 38 | 关闭驱动 39 | ''' 40 | def closeDriver(self): 41 | self.driver.close() 42 | 43 | ''' 44 | 遍历酒店信息列表页,爬取酒店详情页链接 45 | 抓取成功返回True 失败返回False 46 | ''' 47 | def crawlListPage(self): 48 | pass 49 | 50 | ''' 51 | 保存爬取的酒店列表页数据 52 | ''' 53 | def saveListPageInfo(self): 54 | pass 55 | 56 | def set_city(self, city): 57 | self._city = city 58 | 59 | ''' 60 | 抓取酒店信息 61 | ''' 62 | def crawlHotelInfo(self,target): 63 | pass 64 | 65 | ''' 66 | 保存抓取的酒店信息 67 | ''' 68 | def saveHotelInfo(self): 69 | pass 70 | 71 | ''' 72 | 获取酒店列表页数据 73 | ''' 74 | def getListPageInfo(self): 75 | pass 76 | 77 | def scroll_and_click_by_partial_link_text(self, text, from_bottom=False): 78 | if from_bottom: 79 | # 跳到页尾 80 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END) 81 | key = Keys.ARROW_UP 82 | else: 83 | # 跳到页头 84 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME) 85 | key = Keys.ARROW_DOWN 86 | x = 0 87 | while 1: 88 | x += 1 89 | if x%500 == 0: 90 | self.driver.refresh() 91 | time.sleep(2) 92 | if from_bottom: 93 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END) 94 | else: 95 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME) 96 | if x == 1501: 97 | print u"点击评论类型出错" + self.driver.current_url 98 | break 99 | self.driver.find_element_by_tag_name("body").send_keys(key) 100 | try: 101 | self.driver.find_element_by_partial_link_text(text).click() 102 | break 103 | except: 104 | continue 105 | 106 | def scroll_and_click_by_xpath(self, text, from_bottom=False, refresh_if_failed=True, sleep_time=0): 107 | if from_bottom: 108 | # 跳到页尾 109 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END) 110 | key = Keys.ARROW_UP 111 | else: 112 | # 跳到页头 113 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME) 114 | key = Keys.ARROW_DOWN 115 | time.sleep(sleep_time) 116 | x = 0 117 | while 1: 118 | x += 1 119 | if x%500 == 0: 120 | # 刷新整个页面 121 | if refresh_if_failed: 122 | self.driver.refresh() 123 | time.sleep(sleep_time) 124 | if from_bottom: 125 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END) 126 | 127 | else: 128 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME) 129 | time.sleep(sleep_time) 130 | if x == 1501: 131 | print u"点击评论类型出错" + self.driver.current_url 132 | break 133 | self.driver.find_element_by_tag_name("body").send_keys(key) 134 | try: 135 | self.driver.find_element_by_xpath(text).click() 136 | break 137 | except: 138 | # print text 139 | continue 140 | 141 | ''' 142 | 设置爬取内容 143 | ''' 144 | def set_crawl_content(self,if_crawl_hotel_info, if_crawl_hotel_comment, if_crawl_hotel_price): 145 | self.if_crawl_hotel_info = if_crawl_hotel_info 146 | 147 | self.if_crawl_hotel_comment = if_crawl_hotel_comment 148 | 149 | self.if_crawl_hotel_price = if_crawl_hotel_price -------------------------------------------------------------------------------- /ugc.hotel.web.esri/js/review-monitor/main.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 3 | var user = JSON.parse(sessionStorage.user); 4 | 5 | var location = JSON.parse(sessionStorage.location); 6 | 7 | //初始化页面 8 | (function () { 9 | // navbar菜单账号名 10 | if (user["user_name"] != null) 11 | document.getElementById("user_name").innerHTML = location["hotel_name"] + document.getElementById("user_name").innerHTML; 12 | })(); 13 | 14 | window.reviewMonitor = function(ota) { 15 | // initial 页脚和页面评论 16 | var commentsData = requestComments(location['hotel_name'], 1, ota); 17 | // 如果成功返回数据 18 | if (commentsData != null) { 19 | document.getElementById("page_list").innerHTML = generateFooterHtml(location['hotel_name'], 1, commentsData["pageNum"], ota); 20 | document.getElementById("comment_list").innerHTML = generateCommentsHtml(commentsData["comments_info"]); 21 | } 22 | }; 23 | 24 | /** 25 | * 请求酒店的评论 26 | * @param hotelName String 酒店名 27 | * @param text String 待查询文本 28 | * @param page Int 页下标 29 | * @param ota String OTA名称 30 | */ 31 | function requestComments(hotelName, page, ota) { 32 | var commentsData = null; 33 | var paramStr = "?hotel_name=" + hotelName + "&page=" + page + "&ota=" + ota; 34 | $.ajax({ 35 | type: "get", 36 | async: false, 37 | url: domain + getComments + paramStr, 38 | dataType: "json", 39 | timeout: 5000, 40 | success: function (result) { 41 | commentsData = result; 42 | }, 43 | error: function (errorMsg) { 44 | console.log(errorMsg); 45 | alert("你输入的值有误,请输入完整参数或者重试"); 46 | } 47 | }); 48 | return commentsData; 49 | } 50 | 51 | /** 52 | * 根据页数生成切页列表 53 | * @param hoteName String 酒店名 54 | * @param text Stirng 查询文本 55 | * @param origin int 起始页数 56 | * @param pageNum int 总页数 57 | */ 58 | function generateFooterHtml(hotelName, origin, pageNum, ota) { 59 | var pagination = ""; 60 | for (var i = origin; i <= pageNum; i++) { 61 | if (i < origin + 10) { 62 | // 对前10页做处理 63 | if (i == origin) { 64 | pagination += ''; 65 | } 66 | pagination += "
  • " + i + '
  • '; 67 | if (i == pageNum) { 68 | pagination += ''; 69 | } 70 | } else { 71 | for (var j = origin + 10; j <= pageNum; j += 10) { 72 | if (j == origin + 10) { 73 | // 开始li标签,放置一个三角形按钮 74 | pagination += '
  • '; 80 | } else { 81 | pagination += '
  • ' + j + '–' + (j + 10 - 1) + '
  • '; 82 | } 83 | } 84 | break; 85 | } 86 | } 87 | return pagination; 88 | } 89 | 90 | /** 91 | * 点击切页时执行 92 | * @param hotelName Stirng 酒店名 93 | */ 94 | window.pageIndexClick = function (hotelName, ota, e) { 95 | console.log(e); 96 | var commentsData = requestComments(hotelName, e.text, ota); 97 | // 如果成功返回数据 98 | if (commentsData != null) { 99 | document.getElementById("comment_list").innerHTML = generateCommentsHtml(commentsData["comments_info"]); 100 | } 101 | $('html, body').animate({ 102 | scrollTop: $("html").offset().top 103 | }, 500); 104 | } 105 | 106 | /** 107 | * 生成评论列 108 | * @param comments list 评论内容 109 | */ 110 | function generateCommentsHtml(comments) { 111 | var commentsHtml = ""; 112 | for (var i = 0; i < comments.length; i++) { 113 | commentsHtml += '
  • ' + comments[i][2] + '
  • '; 114 | } 115 | return commentsHtml; 116 | } 117 | 118 | })(); -------------------------------------------------------------------------------- /ugc.hotel.web.esri/html/setting.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 基于社会感知的酒店竞业市场时空可视化分析 10 | 11 | 12 | 13 | 14 | 15 | 16 | 21 | 48 | 49 |
    50 |
    51 |
    52 |
    53 |
    54 |
    55 | 携程 56 |

    携程网

    57 |

    100% convertable to HTML/CSS layout.

    58 | 查看评论 59 |
    60 |
    61 |
    62 |
    63 | 艺龙 64 |

    艺龙网

    65 |

    Vector-based shapes and minimum of layer styles.

    66 | 查看评论 67 |
    68 |
    69 | 70 |
    71 |
    72 | 途牛 73 |

    途牛网

    74 |

    Easy to add or change elements.

    75 | 查看评论 76 |
    77 |
    78 | 79 |
    80 |
    81 | 去哪儿 82 |

    去哪儿网

    83 |

    Your likes, shares and comments helps us.

    84 | 查看评论 85 |
    86 |
    87 |
    88 |
    89 |
    90 |
    91 |
    92 |
    93 | 94 | 95 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/html/public-opinion-monitor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 基于社会感知的酒店竞业市场时空可视化分析 10 | 11 | 12 | 13 | 14 | 15 | 16 | 21 | 48 | 49 |
    50 |
    51 |
    52 |
    53 |
    54 |
    55 | 携程 56 |

    携程网

    57 |

    100% convertable to HTML/CSS layout.

    58 | 查看评论 59 |
    60 |
    61 |
    62 |
    63 | 艺龙 64 |

    艺龙网

    65 |

    Vector-based shapes and minimum of layer styles.

    66 | 查看评论 67 |
    68 |
    69 | 70 |
    71 |
    72 | 途牛 73 |

    途牛网

    74 |

    Easy to add or change elements.

    75 | 查看评论 76 |
    77 |
    78 | 79 |
    80 |
    81 | 去哪儿 82 |

    去哪儿网

    83 |

    Your likes, shares and comments helps us.

    84 | 查看评论 85 |
    86 |
    87 |
    88 |
    89 |
    90 |
    91 |
    92 |
    93 | 94 | 95 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/service/map/baidu/CoordinateTransferService.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import requests 4 | import math 5 | 6 | key = 'your key here' # 这里填写你的百度开放平台的key 7 | x_pi = 3.14159265358979324 * 3000.0 / 180.0 8 | pi = 3.1415926535897932384626 # π 9 | a = 6378245.0 # 长半轴 10 | ee = 0.00669342162296594323 # 扁率 11 | 12 | 13 | def geocode(address): 14 | """ 15 | 利用百度geocoding服务解析地址获取位置坐标 16 | :param address:需要解析的地址 17 | :return: 18 | """ 19 | geocoding = {'s': 'rsv3', 20 | 'key': key, 21 | 'city': '全国', 22 | 'address': address} 23 | res = requests.get( 24 | "http://restapi.amap.com/v3/geocode/geo", params=geocoding) 25 | if res.status_code == 200: 26 | json = res.json() 27 | status = json.get('status') 28 | count = json.get('count') 29 | if status == '1' and int(count) >= 1: 30 | geocodes = json.get('geocodes')[0] 31 | lng = float(geocodes.get('location').split(',')[0]) 32 | lat = float(geocodes.get('location').split(',')[1]) 33 | return [lng, lat] 34 | else: 35 | return None 36 | else: 37 | return None 38 | 39 | 40 | def gcj02tobd09(lng, lat): 41 | """ 42 | 火星坐标系(GCJ-02)转百度坐标系(BD-09) 43 | 谷歌、高德——>百度 44 | :param lng:火星坐标经度 45 | :param lat:火星坐标纬度 46 | :return: 47 | """ 48 | z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi) 49 | theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi) 50 | bd_lng = z * math.cos(theta) + 0.0065 51 | bd_lat = z * math.sin(theta) + 0.006 52 | return [bd_lng, bd_lat] 53 | 54 | 55 | def bd09togcj02(bd_lon, bd_lat): 56 | """ 57 | 百度坐标系(BD-09)转火星坐标系(GCJ-02) 58 | 百度——>谷歌、高德 59 | :param bd_lat:百度坐标纬度 60 | :param bd_lon:百度坐标经度 61 | :return:转换后的坐标列表形式 62 | """ 63 | x = bd_lon - 0.0065 64 | y = bd_lat - 0.006 65 | z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi) 66 | theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi) 67 | gg_lng = z * math.cos(theta) 68 | gg_lat = z * math.sin(theta) 69 | return [gg_lng, gg_lat] 70 | 71 | 72 | def wgs84togcj02(lng, lat): 73 | """ 74 | WGS84转GCJ02(火星坐标系) 75 | :param lng:WGS84坐标系的经度 76 | :param lat:WGS84坐标系的纬度 77 | :return: 78 | """ 79 | if out_of_china(lng, lat): # 判断是否在国内 80 | return lng, lat 81 | dlat = transformlat(lng - 105.0, lat - 35.0) 82 | dlng = transformlng(lng - 105.0, lat - 35.0) 83 | radlat = lat / 180.0 * pi 84 | magic = math.sin(radlat) 85 | magic = 1 - ee * magic * magic 86 | sqrtmagic = math.sqrt(magic) 87 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi) 88 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi) 89 | mglat = lat + dlat 90 | mglng = lng + dlng 91 | return [mglng, mglat] 92 | 93 | 94 | def gcj02towgs84(lng, lat): 95 | """ 96 | GCJ02(火星坐标系)转GPS84 97 | :param lng:火星坐标系的经度 98 | :param lat:火星坐标系纬度 99 | :return: 100 | """ 101 | if out_of_china(lng, lat): 102 | return lng, lat 103 | dlat = transformlat(lng - 105.0, lat - 35.0) 104 | dlng = transformlng(lng - 105.0, lat - 35.0) 105 | radlat = lat / 180.0 * pi 106 | magic = math.sin(radlat) 107 | magic = 1 - ee * magic * magic 108 | sqrtmagic = math.sqrt(magic) 109 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi) 110 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi) 111 | mglat = lat + dlat 112 | mglng = lng + dlng 113 | return [lng * 2 - mglng, lat * 2 - mglat] 114 | 115 | 116 | def transformlat(lng, lat): 117 | ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \ 118 | 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng)) 119 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * 120 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0 121 | ret += (20.0 * math.sin(lat * pi) + 40.0 * 122 | math.sin(lat / 3.0 * pi)) * 2.0 / 3.0 123 | ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 * 124 | math.sin(lat * pi / 30.0)) * 2.0 / 3.0 125 | return ret 126 | 127 | 128 | def transformlng(lng, lat): 129 | ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \ 130 | 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng)) 131 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * 132 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0 133 | ret += (20.0 * math.sin(lng * pi) + 40.0 * 134 | math.sin(lng / 3.0 * pi)) * 2.0 / 3.0 135 | ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 * 136 | math.sin(lng / 30.0 * pi)) * 2.0 / 3.0 137 | return ret 138 | 139 | 140 | def out_of_china(lng, lat): 141 | """ 142 | 判断是否在国内,不在国内不做偏移 143 | :param lng: 144 | :param lat: 145 | :return: 146 | """ 147 | if lng < 72.004 or lng > 137.8347: 148 | return True 149 | if lat < 0.8293 or lat > 55.8271: 150 | return True 151 | return False 152 | 153 | 154 | if __name__ == '__main__': 155 | lng = 128.543 156 | lat = 37.065 157 | result1 = gcj02tobd09(lng, lat) 158 | result2 = bd09togcj02(lng, lat) 159 | result3 = wgs84togcj02(lng, lat) 160 | result4 = gcj02towgs84(lng, lat) 161 | result5 = geocode('北京市朝阳区朝阳公园') 162 | print result1, result2, result3, result4, result5 163 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/TuniuCatcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'DreamCathcer' 3 | 4 | 5 | import time 6 | import uuid 7 | 8 | from service.hotel.TuniuAPIClient import TuniuAPIClient 9 | from dao.hotel.HotelDAO import HotelDAO 10 | from setting import local_hotel_setting 11 | from util.geo import CoordTransor 12 | 13 | # 配置数据库 14 | dao_setting = local_hotel_setting 15 | 16 | 17 | class TuniuCatcher(object): 18 | 19 | def __init__(self): 20 | self._city = None 21 | self.__ota_info = "途牛" 22 | self.tuniu_api_client = TuniuAPIClient() 23 | self.hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 24 | 25 | def setCity(self, city): 26 | self._city = city 27 | 28 | def getHotelList(self, city_code): 29 | if self._city == None: 30 | print "未设置城市,请先使用setCity方法" 31 | return 32 | hotel_list = [] 33 | page_index = 1 34 | page_amount =10000 35 | while page_index <= page_amount - 1: 36 | try: 37 | page_data = self.tuniu_api_client.get_hotel_list(page_index, city_code) 38 | # 接口返回的酒店数不稳定,所以爬取页数以最小数为准 39 | if page_amount > page_data["data"]["total"] / 20: 40 | page_amount = page_data["data"]["total"] / 20 41 | print "page_amount=%d"%page_amount 42 | hotel_list.extend(page_data["data"]["list"]) 43 | print "Page_%d Success"%page_index 44 | time.sleep(5) 45 | page_index += 1 46 | except: 47 | print "Page_%d Fail"%page_index 48 | continue 49 | return hotel_list 50 | 51 | def saveHolteList(self, hotel_list): 52 | old_location_info = self.hotel_dao.get_locations(self._city) 53 | old_baseinfo = list(self.hotel_dao.get_baseinfo(self._city, self.__ota_info)) 54 | # 将基础数据中的if_overtime先假设为都已过时 55 | for i in range(0, len(old_baseinfo)): 56 | old_baseinfo[i] = list(old_baseinfo[i]) 57 | old_baseinfo[i][5] = 1 58 | new_locations = [] 59 | new_baseinfo = [] 60 | update_baseinfo = [] 61 | # 遍历将要保存的数据 62 | for item in hotel_list: 63 | location_id = None 64 | # 首先检查该酒店是否已经保存在location表中 65 | for location in old_location_info: 66 | if item["name"] == location[3]: 67 | location_id = location[0] 68 | break 69 | # 如果没有则插入一条新的记录到location表中 70 | if location_id is None: 71 | location_id = uuid.uuid1() 72 | trans_location = CoordTransor.gcj02towgs84(lng=float(item["pos"]["lng"]), lat=float(item["pos"]["lat"])) 73 | new_locations.append({ 74 | "guid": location_id, 75 | "x": trans_location[1], 76 | "y": trans_location[0], 77 | "hotel_name": item["name"], 78 | "city": self._city, 79 | "address": item["address"] 80 | }) 81 | # 根据location的id号到baseinfo表中查询 82 | # 如果已经存于表中,则更新该条数据 83 | # 如果没有,则插入一条新的数据 84 | if_exist = False 85 | for baseinfo in old_baseinfo: 86 | if location_id == baseinfo[2]: 87 | if_exist = True 88 | baseinfo[1] = item["url"] 89 | baseinfo[4] = item["remarkCount"] 90 | baseinfo[5] = 0 91 | baseinfo[6] = int(item["remarkCount"]) - int(baseinfo[4]) if int(item["remarkCount"]) - int(baseinfo[4]) > 0 else 0 92 | baseinfo[7] = item["snapshot"] 93 | baseinfo[8] = item["id"] 94 | break 95 | if not if_exist: 96 | new_baseinfo.append({ 97 | "guid": uuid.uuid1(), 98 | "url": item["url"], 99 | "location_id": location_id, 100 | "OTA": self.__ota_info, 101 | "comm_num": item["remarkCount"], 102 | "if_overtime": 0, 103 | "incre_num": item["remarkCount"], 104 | "img": item["snapshot"], 105 | "id_in_ota": item["id"] 106 | }) 107 | for baseinfo in old_baseinfo: 108 | update_baseinfo.append({ 109 | "guid": baseinfo[0], 110 | "url": baseinfo[1], 111 | "location_id": baseinfo[2], 112 | "OTA": baseinfo[3], 113 | "comm_num": baseinfo[4], 114 | "if_overtime": baseinfo[5], 115 | "incre_num": baseinfo[6], 116 | "img": baseinfo[7], 117 | "id_in_ota": baseinfo[8] 118 | }) 119 | print len(new_locations), len(new_baseinfo), len(update_baseinfo) 120 | self.hotel_dao.save_locations(new_locations) 121 | self.hotel_dao.save_baseinfo(new_baseinfo) 122 | self.hotel_dao.update_baseinfo(update_baseinfo) 123 | 124 | 125 | 126 | if __name__ == "__main__": 127 | starttime = time.time() 128 | tuniu_catcher = TuniuCatcher() 129 | tuniu_catcher.setCity("南京") 130 | hotel_list = tuniu_catcher.getHotelList(1602) 131 | tuniu_catcher.saveHolteList(hotel_list) 132 | endtime = time.time() 133 | print endtime-starttime -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/GeocodingService.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import math 4 | from time import time, strftime, localtime 5 | import os 6 | import shutil 7 | 8 | from util.geo.GeoUtil import GeoUtil 9 | from util.common.CollectionUtil import CollectionUtil 10 | from util.io.FileUtil import FileUtil 11 | 12 | import gevent 13 | import gevent.monkey 14 | 15 | gevent.monkey.patch_socket() 16 | 17 | import logging 18 | import logging.config 19 | from setting import baidu_map_uadb_setting 20 | 21 | logging.config.fileConfig(FileUtil().getLogConfigPath()) 22 | logger = logging.getLogger("ugc") 23 | 24 | from service.map.baidu.SnatcherService import BaiduMapSnatcherService 25 | 26 | # 数据库配置 27 | dao_setting = baidu_map_uadb_setting 28 | 29 | def frange2(x, y, step): 30 | while x < y: 31 | yield x 32 | x += step 33 | if x >= y: 34 | x = y 35 | yield x 36 | 37 | 38 | # 每个Token正向编码100万,企业号300万 39 | goodAkList = [你的ak ] 40 | 41 | path = "c:/data/point_cache/" 42 | path_bak = "c:/data/point_cache_bak/" + strftime("%Y-%m-%d %H-%M-%S", localtime(time())) 43 | if os.path.exists(path_bak) == False: 44 | os.makedirs(path_bak) 45 | 46 | 47 | # 调用百度GeocodingAPI爬取数据 48 | class GeocodingService(object): 49 | 50 | # 地址节表名 51 | addressNodeTableName = 'AddressNode_Xuzhou' 52 | # poi点表名 53 | placeTableName = 'Place_Xuzhou' 54 | 55 | def fetchAddressNodeByPoints(self, index, points): 56 | # 循环Token, 57 | if index >= len(goodAkList): 58 | token = goodAkList[-1] 59 | else: 60 | token = goodAkList[index] 61 | 62 | logger.info('current index %s,points %s' % (index, str(len(points)))) 63 | snatcherService = BaiduMapSnatcherService(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"], token) 64 | snatcherService.fetchAddressNode(index, points, self.addressNodeTableName,placeTableName=self.placeTableName) 65 | logger.debug('Process %s done' % index) 66 | # TODO multiprocessing多进程实现,此处代码最后一个process不执行fetchAddressNode内的代码 67 | 68 | def run(self): 69 | pointList = [] 70 | limitSize = 950000 71 | # TODO 从文件缓存读取limitSize个points,待测试 72 | fileNames = os.listdir(path) 73 | # 从point_cache读取大约limitSize*len(goodAkList)个点,并将读取的文件移至point_cache_bak文件夹 74 | for fileName in fileNames: 75 | print len(pointList) 76 | if len(pointList) < (limitSize) * len(goodAkList): 77 | file = path + fileName 78 | myList = FileUtil().readFileToObj(file) 79 | logger.debug("read file %s,size %s" % (file, len(myList))) 80 | pointList.extend(myList) 81 | 82 | print "cut file %s to %s" % (file, path_bak) 83 | shutil.move(file, path_bak + "") 84 | else: 85 | break 86 | # 将所有点按goodAkList的数目分桶装载 87 | chunkPoints = CollectionUtil().chunksByAverage(pointList,len(goodAkList)) 88 | threads = [] 89 | threadSize = len(chunkPoints) 90 | logger.debug('thread size ...%s ' % threadSize) 91 | # 6个线程分发桶中的点数据 92 | for i in xrange(0, threadSize, 1): 93 | threadChunkPointsList = CollectionUtil().chunksBySize(chunkPoints[i], limitSize / 6) 94 | for j in xrange(0, len(threadChunkPointsList), 1): 95 | index = str(i) + "_" + str(j) 96 | logger.debug('current thread ...%s ' % index) 97 | threads.append(gevent.spawn(self.fetchAddressNodeByPoints, i, threadChunkPointsList[j])) 98 | gevent.joinall(threads) 99 | # 将地址节表中的空字符串设置为null 100 | snatcherService = BaiduMapSnatcherService(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 101 | snatcherService.setNullStrToNull(self.addressNodeTableName) 102 | 103 | def concurrentRequest(self): 104 | # 测试 105 | # bounds = [113.149662, 23.038528, 113.15175, 23.039123] 106 | # 桂城街道 107 | bounds = [113.129391, 22.98257, 113.261335, 23.072904] 108 | # 狮山镇 109 | # bounds = [113.092391, 23.132011, 113.123293, 23.167699] 110 | # 南京 111 | # bounds = [118.710042, 31.960759, 118.905082, 32.134843] 112 | # 获取区域内点集 113 | points = GeoUtil().getPointByBounds(bounds, 1000) 114 | start = int(math.ceil(len(points) / 2)) 115 | # 点集拆分爬取 116 | # end = len(points) 117 | # points = points[start:end] 118 | points = points[0:start] 119 | 120 | logger.debug('points size %s' % len(points)) 121 | # 点集合子集 122 | subPoints = CollectionUtil().chunksByAverage(points, len(goodAkList)) 123 | 124 | threads = [] 125 | processSize = len(subPoints) 126 | logger.debug('process size ...%s ,per process data size...%s' % (processSize, len(subPoints))) 127 | for index in range(0, processSize, 1): 128 | logger.debug('current process ...%s ' % index) 129 | threads.append(gevent.spawn(self.fetchAddressNodeByPoints, index, subPoints[index])) 130 | gevent.joinall(threads) 131 | 132 | 133 | if __name__ == '__main__': 134 | # python E:\PythonWorkspace\ugc\ugc.aggregator\src\main\scripts\GeocodingService.py 135 | ts = time() 136 | service = GeocodingService() 137 | # service.concurrentRequest() 138 | service.run() 139 | 140 | logger.debug('Took %s' % format(time() - ts)) 141 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/scripts/Hotel/XiechengCatcher.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'LiuYang' 3 | 4 | 5 | import time 6 | import uuid 7 | 8 | from service.hotel.XieChengAPIClient import XieChengAPIClient 9 | from dao.hotel.HotelDAO import HotelDAO 10 | from setting import local_hotel_setting 11 | from util.geo import CoordTransor 12 | 13 | # 配置数据库 14 | dao_setting = local_hotel_setting 15 | 16 | 17 | class XiechengCatcher(object): 18 | 19 | def __init__(self): 20 | self._city = None 21 | self.__ota_info = "携程" 22 | self.xiecheng_api_client = XieChengAPIClient() 23 | self.hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"]) 24 | 25 | def setCity(self, city): 26 | self._city = city 27 | 28 | def getHotelList(self, cityId): 29 | if self._city == None: 30 | print "未设置城市,请先使用setCity方法" 31 | hotel_list = [] 32 | page_index = 1 33 | page_amount =10000 34 | while page_index <= page_amount - 1: 35 | try: 36 | page_data = self.xiecheng_api_client.get_hotel_list(page_index, cityId, self._city) 37 | # 接口返回的酒店数不稳定,所以爬取页数以最小数为准 38 | if page_amount > page_data["hotelAmount"] / 25: 39 | page_amount = page_data["hotelAmount"] / 25 40 | print "page_amount=%d"%page_amount 41 | hotel_list.extend(page_data["hotelPositionJSON"]) 42 | print "Page_%d Success"%page_index 43 | page_index += 1 44 | except: 45 | print "Page_%d Fail"%page_index 46 | continue 47 | return hotel_list 48 | 49 | def saveHolteList(self, hotel_list): 50 | old_location_info = self.hotel_dao.get_locations(self._city) 51 | old_baseinfo = list(self.hotel_dao.get_baseinfo(self._city, self.__ota_info)) 52 | # 将基础数据中的if_overtime先假设为都已过时 53 | for i in range(0, len(old_baseinfo)): 54 | old_baseinfo[i] = list(old_baseinfo[i]) 55 | old_baseinfo[i][5] = 1 56 | new_locations = [] 57 | new_baseinfo = [] 58 | update_baseinfo = [] 59 | # 遍历将要保存的数据 60 | for item in hotel_list: 61 | location_id = None 62 | # 首先检查该酒店是否已经保存在location表中 63 | for location in old_location_info: 64 | if item["name"] == location[3]: 65 | location_id = location[0] 66 | break 67 | # 如果没有则插入一条新的记录到location表中 68 | if location_id is None: 69 | location_id = uuid.uuid1() 70 | trans_location = CoordTransor.bd09togcj02(bd_lon=float(item["lon"]), bd_lat=float(item["lat"])) 71 | trans_location = CoordTransor.gcj02towgs84(trans_location[1], trans_location[0]) 72 | new_locations.append({ 73 | "guid": location_id, 74 | "x": trans_location[1], 75 | "y": trans_location[0], 76 | "hotel_name": item["name"], 77 | "city": self._city, 78 | "address": item["address"] 79 | }) 80 | # 根据location的id号到baseinfo表中查询 81 | # 如果已经存于表中,则更新该条数据 82 | # 如果没有,则插入一条新的数据 83 | if_exist = False 84 | for baseinfo in old_baseinfo: 85 | if location_id == baseinfo[2]: 86 | if_exist = True 87 | baseinfo[1] = item["url"] 88 | baseinfo[4] = item["dpcount"] 89 | baseinfo[5] = 0 90 | baseinfo[6] = int(item["dpcount"]) - int(baseinfo[4]) if int(item["dpcount"]) - int(baseinfo[4]) > 0 else 0 91 | baseinfo[7] = item["img"] 92 | baseinfo[8] = item["id"] 93 | break 94 | if not if_exist: 95 | new_baseinfo.append({ 96 | "guid": uuid.uuid1(), 97 | "url": item["url"], 98 | "location_id": location_id, 99 | "OTA": self.__ota_info, 100 | "comm_num": item["dpcount"], 101 | "if_overtime": 0, 102 | "incre_num": item["dpcount"], 103 | "img": item["img"], 104 | "id_in_ota": item["id"] 105 | }) 106 | for baseinfo in old_baseinfo: 107 | update_baseinfo.append({ 108 | "guid": baseinfo[0], 109 | "url": baseinfo[1], 110 | "location_id": baseinfo[2], 111 | "OTA": baseinfo[3], 112 | "comm_num": baseinfo[4], 113 | "if_overtime": baseinfo[5], 114 | "incre_num": baseinfo[6], 115 | "img": baseinfo[7], 116 | "id_in_ota": baseinfo[8] 117 | }) 118 | print len(new_locations), len(new_baseinfo), len(update_baseinfo) 119 | self.hotel_dao.save_locations(new_locations) 120 | self.hotel_dao.save_baseinfo(new_baseinfo) 121 | self.hotel_dao.update_baseinfo(update_baseinfo) 122 | 123 | 124 | 125 | if __name__ == "__main__": 126 | starttime = time.time() 127 | xiecheng_catcher = XiechengCatcher() 128 | xiecheng_catcher.setCity("南京") 129 | hotel_list = xiecheng_catcher.getHotelList(12) 130 | xiecheng_catcher.saveHolteList(hotel_list) 131 | endtime = time.time() 132 | print endtime-starttime -------------------------------------------------------------------------------- /ugc.aggregator.esri/docs/ugc.aggregator/site/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | pybuilder.helloworld 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
    37 | 38 | 39 | 81 | 82 |
    83 | 84 | 85 | 89 | 90 | 91 |
    92 |
    93 |
    94 |
      95 |
    • Docs »
    • 96 | 97 | 98 | 99 |
    • Home
    • 100 |
    • 101 | 102 |
    • 103 |
    104 |
    105 |
    106 |
    107 |
    108 | 109 |

    Welcome to MkDocs

    110 |

    For full documentation visit mkdocs.org.

    111 |

    Commands

    112 |
      113 |
    • mkdocs new [dir-name] - Create a new project.
    • 114 |
    • mkdocs serve - Start the live-reloading docs server.
    • 115 |
    • mkdocs build - Build the documentation site.
    • 116 |
    • mkdocs help - Print this help message.
    • 117 |
    118 |

    Project layout

    119 |
    mkdocs.yml    # The configuration file.
    120 | docs/
    121 |     index.md  # The documentation homepage.
    122 |     ...       # Other markdown pages, images and other files.
    123 | 
    124 | 125 |
    126 |
    127 |
    128 | 129 | 135 | 136 | 137 |
    138 | 139 |
    140 | 141 | 142 |
    143 | 144 | Built with MkDocs using a theme provided by Read the Docs. 145 |
    146 | 147 |
    148 |
    149 | 150 |
    151 | 152 |
    153 | 154 |
    155 | 156 | 157 | 158 | 159 | Next » 160 | 161 | 162 |
    163 | 164 | 165 | 166 | 167 | 171 | -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/dao/hotel/xiechengdao/xiecheng.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __author__ = 'LiuYang' 3 | import MySQLdb 4 | import uuid 5 | import random 6 | import json 7 | 8 | from dao.SuperDAO import SuperDAO 9 | 10 | 11 | class xiechengDAO(SuperDAO): 12 | 13 | def __init__(self, host, db, user, password): 14 | SuperDAO.__init__(self, host, db, user, password) 15 | 16 | # 存储酒店基本信息 17 | def savehotelComment(self,items): 18 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 19 | cursor = db.cursor() 20 | for item in items: 21 | try: 22 | cursor.execute("replace into hotelinfo(guid,city,title,price,score,recommend,area,havawifi,discussNum,common_facilities,activity_facilities,service_facilities,room_facilities)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" ,(item["guid"],item["city"],item["title"],item["price"],item["score"],item["recommend"],item["area"],item["havawifi"],item["discussNum"],item["common_facilities"],item["activity_facilities"],item["service_facilities"],item["room_facilities"])) 23 | except Exception, e: 24 | print e 25 | db.commit() 26 | cursor.close() 27 | db.close() 28 | 29 | 30 | # 存储所有酒店的链接 31 | def savehotellink(self,listPageInfo): 32 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 33 | cursor = db.cursor() 34 | for hotel in listPageInfo: 35 | try: 36 | id = uuid.uuid1() 37 | cursor.execute("replace into hotellianjie(guid,lianjie,city,comm_num)values(%s,%s,%s,%s)" ,(id,hotel["url"],hotel["city"],hotel["comm_num"])) 38 | except Exception,e: 39 | print hotel["url"] 40 | db.commit() 41 | cursor.close() 42 | db.close() 43 | 44 | # 取出周围设施数据 45 | def get_around_facilities_data(self): 46 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 47 | cursor = db.cursor() 48 | cursor.execute("SELECT facilities_lntandlang FROM around_facilities_distance") 49 | data = [] 50 | rows = cursor.fetchall() 51 | db.commit() 52 | cursor.close() 53 | db.close() 54 | for i in rows: 55 | data.append(json.loads(i[0])) 56 | return data 57 | 58 | 59 | 60 | # 取出周围设施中最远距离的设施和距离 61 | def get_max_distance_data(self): 62 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 63 | cursor = db.cursor() 64 | cursor.execute("SELECT maxdistance FROM around_facilities_distance") 65 | data = [] 66 | rows = cursor.fetchall() 67 | 68 | for i in rows: 69 | data.append(json.loads(i[0])) 70 | return data 71 | db.commit() 72 | cursor.close() 73 | db.close() 74 | 75 | # 从数据库中读取链接数据 76 | def _return(self): 77 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 78 | cursor = db.cursor() 79 | 80 | cursor.execute("SELECT * FROM hotellianjie") 81 | 82 | rows = cursor.fetchall() 83 | return rows 84 | 85 | db.commit() 86 | cursor.close() 87 | db.close() 88 | 89 | 90 | # 从数据库中读取评论数据 91 | def _returncommentinfo(self): 92 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 93 | cursor = db.cursor() 94 | 95 | cursor.execute("SELECT * FROM hotelcommentinfo") 96 | 97 | rows = cursor.fetchall() 98 | db.commit() 99 | cursor.close() 100 | db.close() 101 | return rows 102 | 103 | # 存储酒店评论信息 104 | def savehotelCommentinfo(self,items): 105 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 106 | cursor = db.cursor() 107 | for item in items: 108 | 109 | try: 110 | cursor.execute("insert into hotelcommentinfo(hotelname,username,commentscore,intime,tourstyle,praisenum,commenttime,comment)values(%s,%s,%s,%s,%s,%s,%s,%s)" ,(item["title"],item["username"],item["commentscore"],item["intime"],item["tourstyle"],item["praisenum"],item["commenttime"],item["comment"])) 111 | except : 112 | print item 113 | db.commit() 114 | cursor.close() 115 | db.close() 116 | 117 | # 存储酒店评论信息(含好感度) 118 | def savehotelCommentinfosenti(self,items): 119 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 120 | cursor = db.cursor() 121 | for item in items: 122 | 123 | try: 124 | cursor.execute("insert into xiechengcomment(hotelname,username,commentscore,intime,tourstyle,praisenum,comment,senti_value,viewpoint)values(%s,%s,%s,%s,%s,%s,%s,%s,%s)" ,(item["hotelname"],item["username"],item["commentscore"],item["intime"],item["tourstyle"],item["praisenum"],item["comment"],item["senti_value"],item["viewpoint"])) 125 | except Exception,e: 126 | print e 127 | db.commit() 128 | cursor.close() 129 | db.close() 130 | 131 | 132 | # 从数据库中读取评论数据 133 | def _returncomment(self): 134 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8') 135 | cursor = db.cursor() 136 | cursor.execute("SELECT * FROM hotelcommentinfo") 137 | rows = cursor.fetchall() 138 | db.commit() 139 | cursor.close() 140 | db.close() 141 | return rows 142 | 143 | def get_comments(self): 144 | return self.get_records("xiechengcomment") -------------------------------------------------------------------------------- /ugc.aggregator.esri/src/main/python/util/http/UniversalSDK.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | __author__ = 'DreamCatcher' 4 | __version__ = '1.0.0' 5 | 6 | import json,urllib2,urllib,gzip,collections 7 | from util.common.Decorators import retry 8 | import logging 9 | logger = logging.getLogger('ugc') 10 | 11 | try: 12 | from cStringIO import StringIO 13 | except ImportError: 14 | from StringIO import StringIO 15 | 16 | 17 | class APIClient(object): 18 | domain = "" 19 | 20 | def __init__(self,domain): 21 | self.domain = domain 22 | 23 | def __getattr__(self,attr): 24 | return _Callable('%s/%s'%(self.domain,attr)) 25 | 26 | class _Callable(object): 27 | def __init__(self,client): 28 | self.client = client 29 | self.header = None 30 | 31 | def __getattr__(self,attr): 32 | @retry((urllib2.URLError,ValueError), tries=10, delay=1, backoff=2) 33 | def execute(**kw): 34 | params = '%s'%(_encode_params(**kw)) 35 | if len(params)!=0: 36 | http_url = '%s?%s'%(self.client,params) if self.method=='get' else self.client 37 | else: 38 | http_url = self.client 39 | http_body = None if self.method == 'get' else params 40 | # logging.info(http_url) 41 | req = urllib2.Request(http_url,data=http_body) 42 | req.add_header('Accept-Encoding', 'gzip') 43 | if self.header is not None: 44 | for key in self.header: 45 | req.add_header(key,self.header[key]) 46 | try: 47 | resp = urllib2.urlopen(req,timeout=200) 48 | body = _read_body(resp) 49 | r = _parse_json(body) 50 | return r 51 | except Exception as e: 52 | logging.error(e) 53 | pass 54 | def execute_by_dict(dict): 55 | params = '%s'%(_encode_params_by_dict(dict)) 56 | if len(params)!=0: 57 | http_url = '%s?%s'%(self.client,params) if self.method=='get' else self.client 58 | else: 59 | http_url = self.client 60 | http_body = None if self.method == 'get' else params 61 | 62 | req = urllib2.Request(http_url,data=http_body) 63 | req.add_header('Accept-Encoding', 'gzip') 64 | if self.header is not None: 65 | for key in self.header: 66 | req.add_header(key,self.header[key]) 67 | 68 | try: 69 | resp = urllib2.urlopen(req,timeout=200) 70 | body = _read_body(resp) 71 | r = _parse_json(body) 72 | return r 73 | except Exception as e: 74 | logging.error(e) 75 | pass 76 | # 添加尾巴 77 | def add_trail(trail): 78 | return _Callable('%s%s'%(self.client,trail)) 79 | 80 | def add_header(header): 81 | self.header = header 82 | return _Callable('%s'%self.client) 83 | 84 | if attr == 'get': 85 | self.method = 'get' 86 | return execute 87 | if attr == 'get_by_dict': 88 | self.method = 'get' 89 | return execute_by_dict 90 | if attr == 'post': 91 | self.method = 'post' 92 | return execute 93 | if attr == 'addtrail': 94 | return add_trail 95 | if attr == 'addheader': 96 | return add_header 97 | return _Callable('%s/%s'%(self.client,attr)) 98 | 99 | 100 | def _parse_json(s): 101 | ' parse str into JsonDict ' 102 | 103 | def _obj_hook(pairs): 104 | ' convert json object to python object ' 105 | o = JsonDict() 106 | for k, v in pairs.iteritems(): 107 | o[str(k)] = v 108 | return o 109 | return json.loads(s, object_hook=_obj_hook) 110 | 111 | class JsonDict(dict): 112 | ' general json object that allows attributes to be bound to and also behaves like a dict ' 113 | 114 | def __getattr__(self, attr): 115 | try: 116 | return self[attr] 117 | except KeyError: 118 | raise AttributeError(r"'JsonDict' object has no attribute '%s'" % attr) 119 | 120 | def __setattr__(self, attr, value): 121 | self[attr] = value 122 | 123 | def _encode_params(**kw): 124 | ''' 125 | do url-encode parameters 126 | 127 | >>> _encode_params(a=1, b='R&D') 128 | 'a=1&b=R%26D' 129 | >>> _encode_params(a=u'\u4e2d\u6587', b=['A', 'B', 123]) 130 | 'a=%E4%B8%AD%E6%96%87&b=A&b=B&b=123' 131 | ''' 132 | args = [] 133 | for k, v in kw.iteritems(): 134 | if isinstance(v, basestring): 135 | qv = v.encode('utf-8') if isinstance(v, unicode) else v 136 | args.append('%s=%s' % (k, urllib.quote(qv))) 137 | elif isinstance(v, collections.Iterable): 138 | for i in v: 139 | qv = i.encode('utf-8') if isinstance(i, unicode) else str(i) 140 | args.append('%s=%s' % (k, urllib.quote(qv))) 141 | else: 142 | qv = str(v) 143 | args.append('%s=%s' % (k, urllib.quote(qv))) 144 | return '&'.join(args) 145 | 146 | def _encode_params_by_dict(dict): 147 | args = [] 148 | for k, v in dict.iteritems(): 149 | if isinstance(v, basestring): 150 | qv = v.encode('utf-8') if isinstance(v, unicode) else v 151 | args.append('%s=%s' % (k, urllib.quote(qv))) 152 | elif isinstance(v, collections.Iterable): 153 | for i in v: 154 | qv = i.encode('utf-8') if isinstance(i, unicode) else str(i) 155 | args.append('%s=%s' % (k, urllib.quote(qv))) 156 | else: 157 | qv = str(v) 158 | args.append('%s=%s' % (k, urllib.quote(qv))) 159 | return '&'.join(args) 160 | def _read_body(obj): 161 | using_gzip = obj.headers.get('Content-Encoding', '')=='gzip' 162 | body = obj.read() 163 | if using_gzip: 164 | gzipper = gzip.GzipFile(fileobj=StringIO(body)) 165 | fcontent = gzipper.read() 166 | gzipper.close() 167 | return fcontent 168 | return body 169 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/html/review-monitor.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 基于社会感知的酒店竞业市场时空可视化分析 10 | 11 | 12 | 13 | 14 | 15 | 16 | 21 | 46 | 47 |
    48 |
    49 |
    50 |
    51 |
    52 |
    53 | 携程 54 |

    携程网

    55 | 4.3/5分    ¥273起 56 |

    源自9253位住客点评

    57 | 查看评论 58 |
    59 |
    60 |
    61 |
    62 | 艺龙 63 |

    艺龙网

    64 | 4.6/5分    ¥203起 65 |

    源自6978位住客点评

    66 | 查看评论 67 |
    68 |
    69 | 70 |
    71 |
    72 | 途牛 73 |

    途牛网

    74 | 4.3/5分    ¥232起 75 |

    源自357位住客点评

    76 | 查看评论 77 |
    78 |
    79 | 80 |
    81 |
    82 | 去哪儿 83 |

    去哪儿网

    84 | 4.0/5分    ¥246起 85 |

    源自2372位住客点评

    86 | 查看评论 87 |
    88 |
    89 |
    90 |
    91 |
    92 |
    93 |
    94 |
      95 |
    96 |
    97 |
    98 |
      99 |
    100 |
    101 |
    102 |
    103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /ugc.hotel.web.esri/html/quality.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 基于社会感知的酒店竞业市场时空可视化分析 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 47 | 48 | 49 |
    50 | 51 |
    52 |
    53 |
    54 |
    Loading....
    55 |
    56 |
    57 | 58 | 59 |
    60 | 61 | 62 |
    63 | 64 |
    65 | 66 | 67 | 68 | 69 | 70 | 74 |
    75 | 76 | 87 |
    88 |
    89 | 90 | 91 |
    92 |
    93 |
    Room Message List
    94 |
    95 | 96 | 97 | 101 | 102 |
    103 |
    104 | 105 |
    106 | 107 | 108 | 130 | 131 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | --------------------------------------------------------------------------------