├── .idea
├── .name
├── encodings.xml
├── vcs.xml
├── modules.xml
├── ESRI2016-C280.iml
└── misc.xml
├── ugc.hotel.web.esri
├── .idea
│ ├── .name
│ ├── copyright
│ │ └── profiles_settings.xml
│ ├── scopes
│ │ └── scope_settings.xml
│ ├── encodings.xml
│ ├── vcs.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── ugc.hotel.web.iml
│ └── compiler.xml
├── .bowerrc
├── images
│ ├── bg.jpg
│ ├── down.png
│ ├── menu.png
│ ├── up1.png
│ ├── noise.png
│ ├── qunar.png
│ ├── tuniu.png
│ ├── yilong.png
│ ├── xiecheng.png
│ └── wood_pattern.jpg
├── font
│ ├── fontawesome-webfont.eot
│ ├── fontawesome-webfont.ttf
│ └── fontawesome-webfont.woff
├── web.config.backup
├── js
│ ├── dojiconfig.js
│ ├── common.js
│ ├── setting.js
│ ├── login
│ │ └── globalconfig.js
│ ├── application.js
│ └── review-monitor
│ │ └── main.js
├── Web.config
├── package.json
├── LICENSE
├── css
│ ├── loading.css
│ ├── main.css
│ ├── simple-sidebar.css
│ ├── messages.css
│ └── demo.css
└── html
│ ├── login.html
│ ├── quality-testing.html
│ ├── setting.html
│ ├── public-opinion-monitor.html
│ ├── review-monitor.html
│ └── quality.html
├── ugc.aggregator.esri
├── .idea
│ ├── .name
│ ├── encodings.xml
│ ├── vcs.xml
│ ├── inspectionProfiles
│ │ ├── profiles_settings.xml
│ │ └── Project_Default.xml
│ ├── modules.xml
│ ├── ugc.aggregator.iml
│ └── misc.xml
├── src
│ └── main
│ │ ├── python
│ │ ├── dao
│ │ │ ├── pms
│ │ │ │ └── __init__.py
│ │ │ ├── hotel
│ │ │ │ ├── elong
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── ElongDao.py
│ │ │ │ ├── xiechengdao
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── xiecheng.py
│ │ │ │ └── __init__.py
│ │ │ ├── weibo
│ │ │ │ ├── __init__.py
│ │ │ │ └── WeiboDAO.py
│ │ │ ├── __init__.py
│ │ │ └── SuperDAO.py
│ │ ├── util
│ │ │ ├── geo
│ │ │ │ ├── __init__.py
│ │ │ │ └── CoordTransor.py
│ │ │ ├── http
│ │ │ │ ├── __init__.py
│ │ │ │ └── UniversalSDK.py
│ │ │ ├── io
│ │ │ │ ├── __init__.py
│ │ │ │ ├── CSVFileUtil.py
│ │ │ │ └── FileUtil.py
│ │ │ └── common
│ │ │ │ ├── DateHandler.py
│ │ │ │ ├── CollectionUtil.py
│ │ │ │ └── Decorators.py
│ │ ├── service
│ │ │ ├── pms
│ │ │ │ └── __init__.py
│ │ │ ├── hotel
│ │ │ │ ├── elong
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── xiecheng
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── XichengDataService.py
│ │ │ │ ├── __init__.py
│ │ │ │ ├── XieChengAPIClient.py
│ │ │ │ ├── TuniuAPIClient.py
│ │ │ │ └── SuperHotelService.py
│ │ │ ├── map
│ │ │ │ ├── baidu
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── baidu.py
│ │ │ │ │ ├── APIService.py
│ │ │ │ │ └── CoordinateTransferService.py
│ │ │ │ └── tian
│ │ │ │ │ └── APIService.py
│ │ │ ├── nlp
│ │ │ │ ├── __init__.py
│ │ │ │ ├── sentiment.marshal
│ │ │ │ ├── keywords.txt
│ │ │ │ ├── Sentiment.py
│ │ │ │ ├── HotelNLP.py
│ │ │ │ ├── KeywordsHandler.py
│ │ │ │ └── Bayes.py
│ │ │ └── weibo
│ │ │ │ └── APIService.py
│ │ └── resource
│ │ │ └── __init__.py
│ │ └── scripts
│ │ ├── Hotel
│ │ ├── __init__.py
│ │ ├── sightspot.txt
│ │ ├── SightSpot.py
│ │ ├── HotelGeocoding.py
│ │ ├── HotelCatcher.py
│ │ ├── MergeComment.py
│ │ ├── HotelSentimentProcessor.py
│ │ ├── TuniuCatcher.py
│ │ └── XiechengCatcher.py
│ │ ├── logging.ini
│ │ ├── Map
│ │ └── Geocoding.py
│ │ ├── GeocodingServiceMultiProcess.py
│ │ └── GeocodingService.py
├── docs
│ └── ugc.aggregator
│ │ ├── docs
│ │ ├── about.md
│ │ ├── mkdocs.md
│ │ ├── pycharm.md
│ │ ├── pybuilder.md
│ │ ├── virtualvenv.md
│ │ └── index.md
│ │ ├── site
│ │ ├── img
│ │ │ └── favicon.ico
│ │ ├── mkdocs
│ │ │ ├── js
│ │ │ │ ├── search-results-template.mustache
│ │ │ │ └── search.js
│ │ │ └── search_index.json
│ │ ├── fonts
│ │ │ ├── fontawesome-webfont.eot
│ │ │ ├── fontawesome-webfont.ttf
│ │ │ └── fontawesome-webfont.woff
│ │ ├── sitemap.xml
│ │ ├── license
│ │ │ └── highlight.js
│ │ │ │ └── LICENSE
│ │ ├── js
│ │ │ └── theme.js
│ │ ├── css
│ │ │ ├── highlight.css
│ │ │ └── theme_extra.css
│ │ ├── search.html
│ │ ├── about
│ │ │ └── index.html
│ │ └── index.html
│ │ └── mkdocs.yml
├── build.py
├── setting.py
└── requirements.txt
├── README.md
├── Screenshots
├── 3.2.2.png
├── 1 系统架构图.png
├── 2 爬虫系统架构.png
├── 2.1房价监控.png
├── 4 情感值分析.png
├── 4.4 运营质检.jpg
├── 6 服务发布框架.png
├── 1.1.1情感统计.png
├── 1.1.2观点统计.png
├── 1.2.1酒店对比.png
├── 1.3.1用户来源图.png
├── 1.3.4局部轨迹.png
├── 2.3订房蜂窝热度图.png
├── 3.2.1携程用户轨迹.png
├── 5 特征词提取流程.png
├── 1.4.2 客源流出计算.PNG
├── 3.1.2设施最远覆盖图.jpg
└── 1.4.4酒店多级服务区分析.PNG
├── .gitattributes
└── .gitignore
/.idea/.name:
--------------------------------------------------------------------------------
1 | ESRI2016-C280
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/.name:
--------------------------------------------------------------------------------
1 | ugc.hotel.web
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/.name:
--------------------------------------------------------------------------------
1 | ugc.aggregator
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/pms/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/geo/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/http/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/io/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/hotel/elong/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/pms/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.bowerrc:
--------------------------------------------------------------------------------
1 | {
2 | "directory": "lib"
3 | }
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/hotel/elong/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/hotel/xiechengdao/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/hotel/xiecheng/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/README.md
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/docs/about.md:
--------------------------------------------------------------------------------
1 | # ugc.aggregator
2 | 众源时空信息聚合
3 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/weibo/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'LiuYang'
2 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/map/baidu/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'geosmart'
2 |
--------------------------------------------------------------------------------
/Screenshots/3.2.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/3.2.2.png
--------------------------------------------------------------------------------
/Screenshots/1 系统架构图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1 系统架构图.png
--------------------------------------------------------------------------------
/Screenshots/2 爬虫系统架构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/2 爬虫系统架构.png
--------------------------------------------------------------------------------
/Screenshots/2.1房价监控.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/2.1房价监控.png
--------------------------------------------------------------------------------
/Screenshots/4 情感值分析.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/4 情感值分析.png
--------------------------------------------------------------------------------
/Screenshots/4.4 运营质检.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/4.4 运营质检.jpg
--------------------------------------------------------------------------------
/Screenshots/6 服务发布框架.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/6 服务发布框架.png
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'lizhen'
3 |
--------------------------------------------------------------------------------
/Screenshots/1.1.1情感统计.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.1.1情感统计.png
--------------------------------------------------------------------------------
/Screenshots/1.1.2观点统计.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.1.2观点统计.png
--------------------------------------------------------------------------------
/Screenshots/1.2.1酒店对比.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.2.1酒店对比.png
--------------------------------------------------------------------------------
/Screenshots/1.3.1用户来源图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.3.1用户来源图.png
--------------------------------------------------------------------------------
/Screenshots/1.3.4局部轨迹.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.3.4局部轨迹.png
--------------------------------------------------------------------------------
/Screenshots/2.3订房蜂窝热度图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/2.3订房蜂窝热度图.png
--------------------------------------------------------------------------------
/Screenshots/3.2.1携程用户轨迹.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/3.2.1携程用户轨迹.png
--------------------------------------------------------------------------------
/Screenshots/5 特征词提取流程.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/5 特征词提取流程.png
--------------------------------------------------------------------------------
/Screenshots/1.4.2 客源流出计算.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.4.2 客源流出计算.PNG
--------------------------------------------------------------------------------
/Screenshots/3.1.2设施最远覆盖图.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/3.1.2设施最远覆盖图.jpg
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/hotel/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/hotel/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'LiuYang'
3 |
--------------------------------------------------------------------------------
/Screenshots/1.4.4酒店多级服务区分析.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/Screenshots/1.4.4酒店多级服务区分析.PNG
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/bg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/bg.jpg
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/nlp/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/down.png
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/menu.png
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/up1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/up1.png
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/noise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/noise.png
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/qunar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/qunar.png
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/tuniu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/tuniu.png
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/yilong.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/yilong.png
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/resource/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'LiuYang,DreamCathcer,pengshaowei'
3 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/xiecheng.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/xiecheng.png
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/images/wood_pattern.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/images/wood_pattern.jpg
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/font/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/font/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/font/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/font/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/font/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.hotel.web.esri/font/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/sightspot.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/src/main/scripts/Hotel/sightspot.txt
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/img/favicon.ico
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/mkdocs/js/search-results-template.mustache:
--------------------------------------------------------------------------------
1 |
2 |
3 | {{summary}}
4 |
5 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/nlp/sentiment.marshal:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/src/main/python/service/nlp/sentiment.marshal
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/common/DateHandler.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'LiuYang,geosmart'
3 |
4 | '''
5 | 时间处理类
6 | '''
7 | class ClassHandler(object):
8 | pass
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/web.config.backup:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: ugc.aggregator
2 | pages:
3 | - Home: index.md
4 | - pybuilder: pybuilder.md
5 | - mkdocs: mkdocs.md
6 | - About: about.md
7 |
8 | #theme: readthedocs
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlbertRui/ESRI2016-C280/HEAD/ugc.aggregator.esri/docs/ugc.aggregator/site/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/docs/mkdocs.md:
--------------------------------------------------------------------------------
1 | # mkdocs项目文档配置
2 | [mkdocs主页](http://www.mkdocs.org)
3 | ## 进入venvShel并安装mkdocs
4 | pip install mkdocs
5 | ## mkdocs 命令
6 | mkdocs help
7 | ## 新建项目
8 | >> cd docs
9 | >> mkdocs new pybuilder.helloworld
10 | >> cd pybuilder.helloworld
11 | ## 本机测试
12 | mkdocs serve
13 | ## 生成发布site
14 | mkdocs build
15 | ## 清空site
16 | mkdocs build --clean
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/ugc.hotel.web.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/ugc.aggregator.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/js/dojiconfig.js:
--------------------------------------------------------------------------------
1 | var dojoConfig = {
2 | parseOnLoad: true,
3 | packages: [
4 | {
5 | "name": "bdlib",
6 | "location": location.pathname.replace(/\/[^/]+$/, '') + "../../lib/bdlib"
7 | }
8 | ,
9 | {
10 | "name": "tdtlib",
11 | "location": location.pathname.replace(/\/[^/]+$/, '') + "../../lib/tdtlib"
12 | }
13 | ]
14 | };
--------------------------------------------------------------------------------
/.idea/ESRI2016-C280.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 |
7 | # Standard to msysgit
8 | *.doc diff=astextplain
9 | *.DOC diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot diff=astextplain
13 | *.DOT diff=astextplain
14 | *.pdf diff=astextplain
15 | *.PDF diff=astextplain
16 | *.rtf diff=astextplain
17 | *.RTF diff=astextplain
18 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/docs/pycharm.md:
--------------------------------------------------------------------------------
1 | pycharm相关配置
2 | ---
3 |
4 | # project intepreter配置
5 | 默认在pycharm中无法选择已有virtualEnv,只能新建,可通过add local手动完成虚拟环境导入:
6 | File>setting>Project Interpreter>add local>选择virtualEnv\Scripts\python.exe
7 |
8 | #svn配置
9 | svn下载:http://netcologne.dl.sourceforge.net/project/win32svn/1.8.14/Setup-Subversion-1.8.14.msi
10 | svn安装:注意安装路径不能带空格:
11 | pycharm配置svn:在version contro>svn>command line client设置为C:\Dev\SVN\bin\svn.exe
12 |
13 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/sitemap.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | None/
7 | 2016-01-18
8 | daily
9 |
10 |
11 |
12 |
13 |
14 | None/about/
15 | 2016-01-18
16 | daily
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/io/CSVFileUtil.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'LiuYang,geosmart'
3 |
4 | import csv
5 |
6 |
7 | class CSVFileUtil(object):
8 |
9 | def reader(self, file):
10 | for line in csv.reader(file):
11 | yield line
12 |
13 | def writer(self,file):
14 | return csv.writer(file)
15 |
16 | if __name__ == "__main__":
17 | csv_file_util = CSVFileUtil()
18 | for line in csv_file_util.reader(file(r'C:\Users\kaipeng\Desktop\rent.csv','rb')):
19 | print line[2]
20 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/docs/pybuilder.md:
--------------------------------------------------------------------------------
1 | # pybuilder目录
2 | ## virtualenv路径
3 | E:\PythonWorkspace\ugc\ugc.venv
4 |
5 | ## pybuilder路径
6 | E:\PythonWorkspace\ugc\ugc.venv\Scripts\
7 |
8 | # pybuilder脚本
9 | ## 进入venvShell
10 | workon ugc.venv
11 |
12 | ## 执行默认build文件
13 | pyb_.exe
14 |
15 | ## 执行默认build文件,并打印unittest错误详情
16 | pyb_.exe -v
17 |
18 | ## 新增测试项目
19 | pyb_.exe --start-project
20 |
21 | ## 发布
22 | pyb_.exe install_dependencies publish
23 |
24 | # pybuilder树状目录介绍
25 | src/main/python:源码
26 | src/main/scripts:可执行脚本
27 | src/main/unittest:单元测试
28 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/common/CollectionUtil.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | __author__ = 'LiuYang,geosmart'
4 | import math
5 |
6 | class CollectionUtil(object):
7 |
8 | #arr是被分割的list,n是每个chunk中含n元素。
9 | def chunksBySize(self,arr, n):
10 | return [arr[i:i+n] for i in range(0, len(arr), n)]
11 |
12 | #或者让一共有m块,自动分(尽可能平均)
13 | #split the arr into N chunks
14 | def chunksByAverage(self,arr, m):
15 | n = int(math.ceil(len(arr) / float(m)))
16 | return [arr[i:i + n] for i in range(0, len(arr), n)]
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/map/baidu/baidu.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from util.http.UniversalSDK import APIClient
4 |
5 |
6 | class BaiduCrawler(object):
7 |
8 | def __init__(self):
9 | self.client = APIClient("http://api.map.baidu.com")
10 |
11 | def place(self,query,bounds,ak):
12 | return self.client.place.v2.search.get(query=query,bounds=bounds,ak=ak,output="json")
13 |
14 | if __name__ == "__main__":
15 | baiduCrawler = BaiduCrawler()
16 | #print baiduCrawler.place("银行","39.915,116.404,39.975,116.414","WBw4kIepZzGp4kH5Gn3r0ACy")
--------------------------------------------------------------------------------
/ugc.aggregator.esri/build.py:
--------------------------------------------------------------------------------
1 | from pybuilder.core import use_plugin, init
2 |
3 | use_plugin("python.core")
4 | use_plugin("python.install_dependencies")
5 | use_plugin("python.flake8")
6 | use_plugin("python.distutils")
7 |
8 | use_plugin("python.coverage")
9 | use_plugin("python.pycharm")
10 | # use_plugin("python.unittest")
11 |
12 | name = "ugc.aggregator"
13 | default_task = "publish"
14 |
15 |
16 | @init
17 | def set_properties(project):
18 | project.build_depends_on('mockito')
19 | project.set_property('unittest_module_glob', '*_test')
20 | project.version = "1.0"
21 | pass
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/SightSpot.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 | from service.map.baidu.APIService import BaiduMapAPIService
5 | from util.geo import CoordTransor
6 |
7 |
8 | baidu_api_service = BaiduMapAPIService("MviPFAcx5I6f1FkRQlq6iTxc")
9 |
10 | f = open('sightspot.txt', 'r')
11 | for line in f.readlines():
12 | data = baidu_api_service.doGeocoding(addressText=line.strip(),city='南京')
13 | if "result" in data:
14 | print line+str(CoordTransor.bd09togcj02(data["result"]["location"]["lng"],data["result"]["location"]["lat"]))
15 | # print line+str(data["result"]["location"]["lat"])+","+str(data["result"]["location"]["lng"])
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/Web.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/hotel/xiecheng/XichengDataService.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 | import re
5 |
6 | from dao.hotel.xiechengdao.xiecheng import xiechengDAO
7 | from setting import local_hotel_setting
8 |
9 | # 配置数据库
10 | dao_setting = local_hotel_setting
11 |
12 | class XichengDataService(object):
13 |
14 | def __init__(self):
15 | self.dao = xiechengDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
16 |
17 | def get_max_distance(self):
18 | data = self.dao.get_max_distance_data()
19 | return data
20 |
21 | def get_around_facilities(self):
22 | data = self.dao.get_around_facilities_data()
23 | return data
24 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "arcgis-echarts",
3 | "version": "0.1.1",
4 | "description": "A plugin for ArcGIS JS API to load echarts map and Make big data visualization easier.",
5 | "main": "src/EchartsLayer.js",
6 | "scripts": {
7 | "start": "http-server"
8 | },
9 | "repository": {
10 | "type": "git",
11 | "url": "git+https://github.com/wandergis/arcgis-echarts.git"
12 | },
13 | "keywords": [
14 | "ArcGIS",
15 | "javascript",
16 | "esri",
17 | "echarts",
18 | "visualization"
19 | ],
20 | "author": "wandergis",
21 | "license": "MIT",
22 | "bugs": {
23 | "url": "https://github.com/wandergis/arcgis-echarts/issues"
24 | },
25 | "homepage": "https://github.com/wandergis/arcgis-echarts#readme"
26 | }
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/setting.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | #baidu_map_uadb_setting = {"host":"192.168.1.161", "db":"standarddb", "user":"standarddb", "password":"standarddb"}
4 | baidu_map_uadb_setting = {"host":"localhost", "db":"ugc", "user":"root", "password":"1234"}
5 |
6 | # local_hotel_setting = {"host":"120.27.93.15", "db":"hotel", "user":"hotel", "password":"hotel"}
7 | local_hotel_setting = {"host":"localhost", "db":"hotel", "user":"root", "password":"1234"}
8 |
9 | local_weibo_setting = {"host":"localhost", "db":"weibo", "user":"root", "password":"1234"}
10 |
11 | lt_hotel_setting = {"host":"192.168.1.161", "db":"hotel", "user":"standarddb", "password":"standarddb"}
12 |
13 | setting = {"baidumap":baidu_map_uadb_setting, "hotel":local_hotel_setting, "weibo":local_weibo_setting}
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/docs/virtualvenv.md:
--------------------------------------------------------------------------------
1 | virtualenvwrapper-win
2 | ---
3 | ## virtualenv配置
4 | 安装:pip install virtualenv
5 | 新建virtualEnv:virtualenv --no-site-packages venv
6 | 进入venvShel:E:\PythonWorkspace\ugc\ugc.venv\Scripts\activate
7 |
8 | ## virtualenvwrapper安装
9 | linux:pip install virtualenvwrapper
10 | windows:pip install virtualenvwrapper-win
11 |
12 | ## virtualenvwrapper配置
13 | 安装完毕过后在环境变量里面新建一个WORKON_HOME字段存储虚拟python环境,
14 | WORKON_HOME:E:\PythonWorkspace\venv
15 | 环境变量立即生效:cmd中运行set WORKON_HOME=E:\PythonWorkspace\venv
16 |
17 | ## 常用的一些命令
18 | 命令安装在C:\Python27\Scripts\*.bat
19 | *. 创建虚拟环境:mkvirtualenv VirtualenvName
20 | *. 列出所有虚拟环境:Lsvirtualenv
21 | *. 移除虚拟环境:rmvirtualenv VirtualenvName
22 | *. 切换到VirtualenvName环境:workon VirtualenvName
23 | *. 退出当前虚拟环境:deactivate
24 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/map/tian/APIService.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | from util.http.UniversalSDK import APIClient
4 |
5 |
6 | class TianMapAPIService(object):
7 |
8 | def __init__(self):
9 | self.tiandituClient = APIClient("http://map.tianditu.com")
10 |
11 | # 天地图地理编码
12 | # 地址:http://map.tianditu.com/query.shtml
13 | # 类型:post
14 | def tdtGeocoding(self,address):
15 | # json格式
16 | postStr = "{\"keyWord\":\"address\",\"level\":\"12\",\"mapBound\":\"118.61107,31.90788,118.93449,32.18735\",\"queryType\":\"1\",\"count\" :\"20\",\"start\":\"0\",\"queryTerminal\":\"10000\"}"
17 | postStr = postStr.replace("address",address)
18 | data=self.tiandituClient.query.addtrail(".shtml").post(postStr=postStr,type="query")
19 | return data
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/hotel/XieChengAPIClient.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'LiuYang'
3 |
4 | from util.http.UniversalSDK import APIClient
5 | import datetime
6 |
7 |
8 | class XieChengAPIClient(object):
9 |
10 | def __init__(self):
11 | self.client = APIClient("http://hotels.ctrip.com")
12 |
13 | '''
14 | 获取携程酒店列表数据
15 | '''
16 | def get_hotel_list(self, page, cityId, cityName, checkIn=None, checkOut=None):
17 | if checkIn is None:
18 | tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
19 | after_tomorrow = tomorrow + datetime.timedelta(days=1)
20 | checkIn = tomorrow.strftime('%Y-%m-%d')
21 | checkOut = after_tomorrow.strftime('%Y-%m-%d')
22 | header = {"Content-Type":"application/x-www-form-urlencoded"}
23 | hotel_list = self.client.Domestic.Tool.AjaxHotelList.addtrail(".aspx").addheader(header)\
24 | .post(checkIn=checkIn, checkOut=checkOut, page=page, cityId=cityId, cityName=cityName)
25 | return hotel_list
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/js/common.js:
--------------------------------------------------------------------------------
1 | /**
2 | * 获取页面参数
3 | */
4 | function GetArgsFromHref(sHref, sArgName) {
5 | var args = sHref.split("?");
6 | var retval = "";
7 |
8 | if (args[0] == sHref) /*参数为空*/ {
9 | return retval; /*无需做任何处理*/
10 | }
11 | var str = args[1];
12 | args = str.split("&");
13 | for (var i = 0; i < args.length; i++) {
14 | str = args[i];
15 | var arg = str.split("=");
16 | if (arg.length <= 1) continue;
17 | if (arg[0] == sArgName) retval = arg[1];
18 | }
19 | return retval;
20 | }
21 |
22 | /**
23 | * 复制对象
24 | */
25 | var deepClone = function (obj) {
26 | var str, newobj = obj.constructor === Array ? [] : {};
27 | if (typeof obj !== 'object') {
28 | return;
29 | } else if (window.JSON) {
30 | str = JSON.stringify(obj), //系列化对象
31 | newobj = JSON.parse(str); //还原
32 | } else {
33 | for (var i in obj) {
34 | newobj[i] = typeof obj[i] === 'object' ?
35 | cloneObj(obj[i]) : obj[i];
36 | }
37 | }
38 | return newobj;
39 | };
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/docs/index.md:
--------------------------------------------------------------------------------
1 | # pybuilder入门项目
2 |
3 | [pybuilder官方文档](http://pybuilder.github.io/documentation/tutorial.html)
4 |
5 | ## pybuilder.helloworld
6 |
7 | * `mkdocs new [dir-name]` - Create a new project.
8 | * `mkdocs serve` - Start the live-reloading docs server.
9 | * `mkdocs build` - Build the documentation site.
10 | * `mkdocs help` - Print this help message.
11 |
12 | ## Project layout
13 |
14 | mkdocs.yml # The configuration file.
15 | docs/
16 | index.md # The documentation homepage.
17 |
18 |
19 | ## requirements
20 | logging
21 | gevent
22 | MySQLDB
23 | weibo
24 | selenium
25 | scrapy(依赖lxml)
26 |
27 |
28 | ## cmd运行配置
29 | 新增workspace.path文件到virtualenv目录(E:\PythonWorkspace\ugc\ugc_venv\Lib\site-packages)
30 | ```
31 | E:\PythonWorkspace\ugc\ugc.aggregator
32 | E:\PythonWorkspace\ugc\ugc.aggregator\src\main\python
33 | ```
34 | 注意path文件中的模块目录必须有__init__.py文件
35 | ## 进入virtualEnv
36 | E:\PythonWorkspace\ugc\ugc_venv\Scripts\activate
37 | ## 执行程序
38 | python E:\PythonWorkspace\ugc\ugc.aggregator\src\main\scripts\GeocodingService.py
39 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/logging.ini:
--------------------------------------------------------------------------------
1 | #Configuration for log output
2 | #Naiveloafer
3 | #2012-06-04
4 |
5 | [loggers]
6 | keys=root,ugc
7 |
8 | [handlers]
9 | keys=consoleHandler,fileHandler,rotatingFileHandler
10 |
11 | [formatters]
12 | keys=simpleFmt
13 |
14 | [logger_root]
15 | level=DEBUG
16 | handlers=rotatingFileHandler,consoleHandler
17 | #handlers=fileHandler
18 | #handlers=rotatingFileHandler
19 |
20 | [logger_ugc]
21 | level=DEBUG
22 | handlers=rotatingFileHandler,consoleHandler
23 | qualname=ugc
24 | propagate=0
25 |
26 | [handler_consoleHandler]
27 | class=StreamHandler
28 | level=DEBUG
29 | formatter=simpleFmt
30 | args=(sys.stdout,)
31 |
32 | [handler_fileHandler]
33 | class=FileHandler
34 | level=DEBUG
35 | formatter=simpleFmt
36 | args=("c:/log/ugc/run.log", "a")
37 |
38 | [handler_rotatingFileHandler]
39 | class=handlers.RotatingFileHandler
40 | level=DEBUG
41 | formatter=simpleFmt
42 | args=("c:/log/ugc/run.log", "a", 20*1024*1024, 10)
43 |
44 |
45 | [formatter_simpleFmt]
46 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s - [%(filename)s:%(lineno)s]
47 | datefmt=
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/HotelGeocoding.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 | import traceback
5 |
6 | from dao.hotel.TuniuDao import TuniuDAO
7 | from setting import local_hotel_setting
8 | from service.map.baidu.APIService import BaiduMapAPIService
9 |
10 | # 配置数据库
11 | dao_setting = local_hotel_setting
12 |
13 | dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
14 |
15 | map_service = BaiduMapAPIService("MviPFAcx5I6f1FkRQlq6iTxc")
16 |
17 | hotellist = dao.get_hotelinfo()
18 |
19 | # 酒店地理编码容器
20 | hotel_location = []
21 |
22 | # 遍历酒店信息,取出酒店名称进行地理编码
23 | for i in range(0, len(hotellist)):
24 | geocoding_info = map_service.doGeocoding(hotellist[i][1])
25 | try:
26 | geocoding_info = {"hotel_name":hotellist[i][1], "x":geocoding_info["result"]["location"]["lng"], "y":geocoding_info["result"]["location"]["lat"]}
27 | except:
28 | traceback.print_exc()
29 | continue
30 | hotel_location.append(geocoding_info)
31 | print "%d done"%i
32 |
33 | print len(hotel_location)
34 | # 保存到数据库中
35 | dao.save_hotels_location(hotel_location)
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 记忆的残骸
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/hotel/TuniuAPIClient.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 | from util.http.UniversalSDK import APIClient
5 | import datetime
6 |
7 |
8 | class TuniuAPIClient(object):
9 |
10 | def __init__(self):
11 | self.client = APIClient("http://hotel.tuniu.com")
12 |
13 | def get_hotel_list(self, page, cityCode, checkIn=None, checkOut=None):
14 | if checkIn is None:
15 | tomorrow = datetime.datetime.now() + datetime.timedelta(days=1)
16 | after_tomorrow = tomorrow + datetime.timedelta(days=1)
17 | checkIn = tomorrow.strftime('%Y-%m-%d')
18 | checkOut = after_tomorrow.strftime('%Y-%m-%d')
19 | query_param = {
20 | "r":"/hotel/ajax/list",
21 | "search[cityCode]":cityCode,
22 | "search[checkInDate]":checkIn,
23 | "search[checkOutDate]":checkOut,
24 | "sort[first][id]":"recommend",
25 | "sort[third]":"cash-back-after",
26 | "page":page,
27 | "returnFilter":0
28 | }
29 | hotel_list = self.client.yii.addtrail(".php").get_by_dict(query_param)
30 | return hotel_list
--------------------------------------------------------------------------------
/ugc.aggregator.esri/requirements.txt:
--------------------------------------------------------------------------------
1 | alabaster==0.7.7
2 | Babel==2.2.0
3 | backports-abc==0.4
4 | backports.ssl-match-hostname==3.5.0.1
5 | certifi==2015.11.20.1
6 | cffi==1.5.0
7 | characteristic==14.3.0
8 | click==6.2
9 | colorama==0.3.6
10 | coverage==4.0.3
11 | cryptography==1.2.1
12 | cssselect==0.9.1
13 | docutils==0.12
14 | enum34==1.1.2
15 | flake8==2.5.1
16 | funcsigs==0.4
17 | gevent==1.0.2
18 | greenlet==0.4.9
19 | idna==2.0
20 | ipaddress==1.0.16
21 | Jinja2==2.8
22 | livereload==2.4.0
23 | logging==0.4.9.6
24 | lxml==3.5.0
25 | Markdown==2.6.5
26 | MarkupSafe==0.23
27 | mccabe==0.3.1
28 | mkdocs==0.14.0
29 | mock==1.3.0
30 | mockito==0.5.2
31 | mysql-connector-python==2.1.3
32 | MySQL-python==1.2.3
33 | pbr==1.8.1
34 | pep8==1.7.0
35 | pyasn1==0.1.9
36 | pyasn1-modules==0.0.8
37 | PyBuilder==0.11.4
38 | pycparser==2.14
39 | pydash==3.4.1
40 | pyflakes==1.0.0
41 | Pygments==2.1
42 | pyOpenSSL==0.15.1
43 | pytz==2015.7
44 | PyYAML==3.11
45 | queuelib==1.4.2
46 | Scrapy==1.0.4
47 | selenium==2.49.0
48 | service-identity==14.0.0
49 | sinaweibopy==1.1.4
50 | singledispatch==3.4.0.3
51 | six==1.10.0
52 | snowballstemmer==1.2.1
53 | Sphinx==1.3.4
54 | sphinx-rtd-theme==0.1.9
55 | tblib==1.2.0
56 | tornado==4.3
57 | Twisted==15.5.0
58 | unittest-xml-reporting==1.13.0
59 | verify==1.1.0
60 | w3lib==1.13.0
61 | wheel==0.24.0
62 | zope.interface==4.1.3
63 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/nlp/keywords.txt:
--------------------------------------------------------------------------------
1 | 房间
2 | 酒店
3 | 设施
4 | 早餐
5 | 环境
6 | 前台
7 | 交通
8 | 价格
9 | 空调
10 | 服务员
11 | 床
12 | 隔音
13 | 卫生间
14 | 总体
15 | 地方
16 | 性价比
17 | 位置
18 | 地铁
19 | 态度
20 | 夫子庙
21 | 味道
22 | 窗户
23 | 电视
24 | 地铁站
25 | 餐厅
26 | 热水
27 | 整体
28 | 客房
29 | 宾馆
30 | 声音
31 | 被子
32 | 电梯
33 | 大堂
34 | 浴室
35 | 市中心
36 | 火车站
37 | 空间
38 | 购物
39 | 品种
40 | 公交
41 | 对面
42 | 床单
43 | 饭店
44 | 步行
45 | 老板
46 | 用品
47 | 网络
48 | 硬件
49 | 电话
50 | 时间
51 | 门
52 | 枕头
53 | 浴缸
54 | 淋浴
55 | 厕所
56 | 商务
57 | 地段
58 | 餐饮
59 | 网速
60 | 大床
61 | 景点
62 | 电脑
63 | 停车场
64 | 面积
65 | 噪音
66 | 马路
67 | 地毯
68 | 工作人员
69 | 楼层
70 | 马桶
71 | 机场
72 | 大厅
73 | 住宿
74 | 拖鞋
75 | 电视机
76 | 条件
77 | 办理
78 | 走廊
79 | 小时
80 | 特色
81 | 质量
82 | 速度
83 | 价位
84 | 小吃
85 | 早饭
86 | 吹风机
87 | 洗手间
88 | 浴巾
89 | 风格
90 | 标间
91 | 布置
92 | 快捷酒店
93 | 地铁口
94 | 公交车
95 | 商场
96 | 行李
97 | 通风
98 | 信号
99 | 体验
100 | 情况
101 | 床房
102 | 灯光
103 | 无线
104 | 菜
105 | 套房
106 | 个人
107 | 客服
108 | 市区
109 | 床垫
110 | 冰箱
111 | 广场
112 | 景区
113 | 总台
114 | 特价
115 | 公寓
116 | 出租车
117 | 晚餐
118 | 家庭
119 | 口味
120 | 地点
121 | 阳台
122 | 种类
123 | 价钱
124 | 行政
125 | 细节
126 | 印象
127 | 房价
128 | 经济
129 | 无线网
130 | 窗帘
131 | 空气
132 | 房卡
133 | 地板
134 | 家具
135 | 气味
136 | 员工
137 | 宽带
138 | 评价
139 | 卫生条件
140 | 收费
141 | 风景
142 | 经理
143 | 玻璃
144 | 桌子
145 | 办事
146 | 样子
147 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Map/Geocoding.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 | import traceback
5 | import csv
6 | import re
7 |
8 | from util.io.CSVFileUtil import CSVFileUtil
9 | from service.map.baidu.APIService import BaiduMapAPIService
10 |
11 | input_file = r'C:\Users\kaipeng\Desktop\rent.csv'
12 | output_file = r"C:\Users\kaipeng\Desktop\rent_geocode.csv"
13 | have_title = True
14 | handle_row_index = 2
15 |
16 | def handle_text(text):
17 | return "广州市".decode("utf-8").encode("gbk")+re.sub("[\[\]]","",text)
18 |
19 | if __name__=="__main__":
20 | csv_file_util = CSVFileUtil()
21 | map_service = BaiduMapAPIService("WBw4kIepZzGp4kH5Gn3r0ACy")
22 | writer = csv.writer(file(output_file, "wb"))
23 | count = 0
24 | for line in csv_file_util.reader(file(input_file)):
25 | count += 1
26 | if have_title and count==1:
27 | continue
28 | geocoding_info = map_service.doGeocoding(handle_text(line[handle_row_index]))
29 | try:
30 | coord = str(geocoding_info["result"]["location"]["lng"])+','+str(geocoding_info["result"]["location"]["lat"])
31 | line[3] = coord
32 | print "Success:count:%d"%count
33 | except:
34 | traceback.print_exc()
35 | print "Error:count:%d"%count
36 | continue
37 | finally:
38 | writer.writerow(line)
39 |
40 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | Buildout
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/io/FileUtil.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'geosmart'
3 | import sys, os
4 | import pickle
5 | import io
6 |
7 | class FileUtil(object):
8 | def __init__(self):
9 | pass
10 |
11 | """
12 | 将python对象写入文件
13 | """
14 | def writeObjToFile(self,fileName,obj):
15 | with open(fileName, 'wb') as f:
16 | pickle.dump(obj, f)
17 |
18 | """
19 | 从文件读取python对象
20 | """
21 | def readFileToObj(self,fileName):
22 | if os.path.exists(fileName):
23 | with open(fileName, 'rb') as f:
24 | obj = pickle.load(f)
25 | return obj
26 |
27 | """
28 | 删除文件
29 | """
30 | def deleteFile(self,fileName):
31 | if os.path.exists(fileName):
32 | os.remove(fileName)
33 | """
34 | 获取脚本文件的当前路径
35 | """
36 |
37 | def cur_file_dir(self):
38 | # 获取脚本路径
39 | path = sys.path[0]
40 | # 判断为脚本文件还是py2exe编译后的文件,如果是脚本文件,则返回的是脚本的目录,如果是py2exe编译后的文件,则返回的是编译后的文件路径
41 | if os.path.isdir(path):
42 | return path
43 | elif os.path.isfile(path):
44 | return os.path.dirname(path)
45 |
46 | """
47 | 获取logging配置文件的路径
48 | """
49 |
50 | def getLogConfigPath(self, rootFolder="ugc.aggregator"):
51 | logPath = self.cur_file_dir().split(rootFolder, 1)[0] + rootFolder + "/src/main/scripts/logging.ini"
52 | # print logPath
53 | return logPath
54 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/license/highlight.js/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2006, Ivan Sagalaev
2 | All rights reserved.
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions are met:
5 |
6 | * Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | * Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 | * Neither the name of highlight.js nor the names of its contributors
12 | may be used to endorse or promote products derived from this software
13 | without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/mkdocs/search_index.json:
--------------------------------------------------------------------------------
1 | {
2 | "docs": [
3 | {
4 | "location": "/",
5 | "text": "Welcome to MkDocs\n\n\nFor full documentation visit \nmkdocs.org\n.\n\n\nCommands\n\n\n\n\nmkdocs new [dir-name]\n - Create a new project.\n\n\nmkdocs serve\n - Start the live-reloading docs server.\n\n\nmkdocs build\n - Build the documentation site.\n\n\nmkdocs help\n - Print this help message.\n\n\n\n\nProject layout\n\n\nmkdocs.yml # The configuration file.\ndocs/\n index.md # The documentation homepage.\n ... # Other markdown pages, images and other files.",
6 | "title": "Home"
7 | },
8 | {
9 | "location": "/#welcome-to-mkdocs",
10 | "text": "For full documentation visit mkdocs.org .",
11 | "title": "Welcome to MkDocs"
12 | },
13 | {
14 | "location": "/#commands",
15 | "text": "mkdocs new [dir-name] - Create a new project. mkdocs serve - Start the live-reloading docs server. mkdocs build - Build the documentation site. mkdocs help - Print this help message.",
16 | "title": "Commands"
17 | },
18 | {
19 | "location": "/#project-layout",
20 | "text": "mkdocs.yml # The configuration file.\ndocs/\n index.md # The documentation homepage.\n ... # Other markdown pages, images and other files.",
21 | "title": "Project layout"
22 | },
23 | {
24 | "location": "/about/",
25 | "text": "",
26 | "title": "About"
27 | }
28 | ]
29 | }
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/css/loading.css:
--------------------------------------------------------------------------------
1 | .loader {
2 | width: 150px;
3 | margin: 50px auto 70px;
4 | position: relative;
5 | }
6 | .loader .loading-1 {
7 | position: relative;
8 | width: 100%;
9 | height: 10px;
10 | border: 1px solid #93B8EB;
11 | border-radius: 10px;
12 | animation: turn 4s linear 1.75s infinite;
13 | }
14 | .loader .loading-1:before {
15 | content: "";
16 | display: block;
17 | position: absolute;
18 | width: 0%;
19 | height: 100%;
20 | background: #1ABC9C;
21 | box-shadow: 10px 0px 15px 0px #69d2e7;
22 | animation: load 2s linear infinite;
23 | }
24 | .loader .loading-2 {
25 | width: 100%;
26 | position: absolute;
27 | top: 10px;
28 | color: #FFAD00;
29 | font-size: 22px;
30 | text-align: center;
31 | animation: bounce 2s linear infinite;
32 | }
33 | @keyframes load {
34 | 0% {
35 | width: 0%;
36 | }
37 | 87.5%, 100% {
38 | width: 100%;
39 | }
40 | }
41 | @keyframes turn {
42 | 0% {
43 | transform: rotateY(0deg);
44 | }
45 | 6.25%, 50% {
46 | transform: rotateY(180deg);
47 | }
48 | 56.25%, 100% {
49 | transform: rotateY(360deg);
50 | }
51 | }
52 | @keyframes bounce {
53 | 0%,100% {
54 | top: 10px;
55 | }
56 | 12.5% {
57 | top: 30px;
58 | }
59 | }
60 | .htmleaf-container{
61 | margin: 0 auto;
62 | }
63 | .container{width:1170px}
64 | .row{margin-left:-15px;margin-right:-15px}
65 | .col-md-12{width:100%;z-index:3;position:absolute}
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/weibo/APIService.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'lizhen'
3 |
4 | from weibo import APIClient
5 |
6 | class WeiboAPIService(object):
7 |
8 | def __init__(self,appKey="1268278335",appSecret = "204dfdc6e50ea33fe282445f4f0a3b0e",token = "2.005jCfXFLIZp4Bd42d17a3dbC3fmaB"):
9 | self.appKey = appKey
10 | self.appSecret = appSecret
11 | self.token = token
12 | self.client = APIClient(self.appKey,self.appSecret, redirect_uri='')
13 | self.client.set_access_token(self.token,0)
14 |
15 | # 获取用户信息
16 | # 接口详情参考:http://open.weibo.com/wiki/2/users/show
17 | def getUserInfo(self,screen_name=None,uid=None):
18 | if screen_name is not None:
19 | data = self.client.users.show.get(screen_name = screen_name)
20 | elif uid is not None:
21 | data = self.client.users.show.get(uid = uid)
22 | else:
23 | raise Exception()
24 | return data
25 |
26 | # 获取某个位置周边的动态
27 | # 接口详情参考:http://open.weibo.com/wiki/2/place/nearby_timeline
28 | def getWeibo_nearbyline(self,lat,lon,starttime,endtime,range=3000,count=50,offset=0):
29 | data = self.client.place.nearby_timeline.get(lat=lat,long=lon,starttime=starttime,endtime=endtime,range=range,count=count,offset=offset)
30 | return data
31 |
32 | def get_weibo_user_timeline(self, uid, count=50):
33 | return self.client.place.user_timeline.get(uid=uid, count=count)
34 |
35 | def get_poi_timeline(self, poiid, count=50, page=1):
36 | return self.client.place.poi_timeline.get(poiid=poiid, count=count, page=page)
37 |
38 | def get_address_to_geo(self, address):
39 | return self.client.location.geo.address_to_geo.get(address=address)
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/common/Decorators.py:
--------------------------------------------------------------------------------
1 | import time
2 | from functools import wraps
3 |
4 |
5 | def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
6 | """Retry calling the decorated function using an exponential backoff.
7 |
8 | http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
9 | original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
10 |
11 | :param ExceptionToCheck: the exception to check. may be a tuple of
12 | exceptions to check
13 | :type ExceptionToCheck: Exception or tuple
14 | :param tries: number of times to try (not retry) before giving up
15 | :type tries: int
16 | :param delay: initial delay between retries in seconds
17 | :type delay: int
18 | :param backoff: backoff multiplier e.g. value of 2 will double the delay
19 | each retry
20 | :type backoff: int
21 | :param logger: logger to use. If None, print
22 | :type logger: logging.Logger instance
23 | """
24 | def deco_retry(f):
25 |
26 | @wraps(f)
27 | def f_retry(*args, **kwargs):
28 | mtries, mdelay = tries, delay
29 | while mtries > 1:
30 | try:
31 | return f(*args, **kwargs)
32 | except ExceptionToCheck, e:
33 | msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
34 | if logger:
35 | logger.warning(msg)
36 | else:
37 | print msg
38 | time.sleep(mdelay)
39 | mtries -= 1
40 | mdelay *= backoff
41 | return f(*args, **kwargs)
42 |
43 | return f_retry # true decorator
44 |
45 | return deco_retry
46 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 |
25 | # PyInstaller
26 | # Usually these files are written by a python script from a template
27 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 |
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 |
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 |
43 | # Translations
44 | *.mo
45 | *.pot
46 |
47 | # Django stuff:
48 | *.log
49 |
50 | # Sphinx documentation
51 | docs/_build/
52 |
53 | # PyBuilder
54 | target/
55 |
56 | # =========================
57 | # Operating System Files
58 | # =========================
59 |
60 | # OSX
61 | # =========================
62 |
63 | .DS_Store
64 | .AppleDouble
65 | .LSOverride
66 |
67 | # Thumbnails
68 | ._*
69 |
70 | # Files that might appear on external disk
71 | .Spotlight-V100
72 | .Trashes
73 |
74 | # Directories potentially created on remote AFP share
75 | .AppleDB
76 | .AppleDesktop
77 | Network Trash Folder
78 | Temporary Items
79 | .apdisk
80 |
81 | # Windows
82 | # =========================
83 |
84 | # Windows image file caches
85 | Thumbs.db
86 | ehthumbs.db
87 |
88 | # Folder config file
89 | Desktop.ini
90 |
91 | # Recycle Bin used on file shares
92 | $RECYCLE.BIN/
93 |
94 | # Windows Installer files
95 | *.cab
96 | *.msi
97 | *.msm
98 | *.msp
99 |
100 | # Windows shortcuts
101 | *.lnk
102 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/js/theme.js:
--------------------------------------------------------------------------------
1 | $( document ).ready(function() {
2 |
3 | // Shift nav in mobile when clicking the menu.
4 | $(document).on('click', "[data-toggle='wy-nav-top']", function() {
5 | $("[data-toggle='wy-nav-shift']").toggleClass("shift");
6 | $("[data-toggle='rst-versions']").toggleClass("shift");
7 | });
8 |
9 | // Close menu when you click a link.
10 | $(document).on('click', ".wy-menu-vertical .current ul li a", function() {
11 | $("[data-toggle='wy-nav-shift']").removeClass("shift");
12 | $("[data-toggle='rst-versions']").toggleClass("shift");
13 | });
14 |
15 | $(document).on('click', "[data-toggle='rst-current-version']", function() {
16 | $("[data-toggle='rst-versions']").toggleClass("shift-up");
17 | });
18 |
19 | // Make tables responsive
20 | $("table.docutils:not(.field-list)").wrap("
");
21 |
22 | hljs.initHighlightingOnLoad();
23 |
24 | $('table').addClass('docutils');
25 | });
26 |
27 | window.SphinxRtdTheme = (function (jquery) {
28 | var stickyNav = (function () {
29 | var navBar,
30 | win,
31 | stickyNavCssClass = 'stickynav',
32 | applyStickNav = function () {
33 | if (navBar.height() <= win.height()) {
34 | navBar.addClass(stickyNavCssClass);
35 | } else {
36 | navBar.removeClass(stickyNavCssClass);
37 | }
38 | },
39 | enable = function () {
40 | applyStickNav();
41 | win.on('resize', applyStickNav);
42 | },
43 | init = function () {
44 | navBar = jquery('nav.wy-nav-side:first');
45 | win = jquery(window);
46 | };
47 | jquery(init);
48 | return {
49 | enable : enable
50 | };
51 | }());
52 | return {
53 | StickyNav : stickyNav
54 | };
55 | }($));
56 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/nlp/Sentiment.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
4 | import os
5 | import codecs
6 | from thulac import thulac
7 |
8 | from service.nlp.Bayes import Bayes
9 |
10 |
11 | data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
12 | 'sentiment.marshal')
13 |
14 | class Sentiment():
15 |
16 | def __init__(self):
17 | self.classifier = Bayes()
18 | self.thu = thulac("-seg_only")
19 | train_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'sentiment.marshal')
20 | self.load(train_file)
21 |
22 | '''
23 | 保存训练结果
24 | '''
25 | def save(self, fname, iszip=True):
26 | self.classifier.save(fname, iszip)
27 |
28 | '''
29 | 加载训练结果
30 | '''
31 | def load(self, fname=data_path, iszip=True):
32 | self.classifier.load(fname, iszip)
33 |
34 | '''
35 | 分词并过滤停止词
36 | '''
37 | def handle(self, doc):
38 | words = self.thu.cut(doc)
39 | words = filter_stop(words)
40 | return words
41 |
42 | '''
43 | 语料训练
44 | 对输入正负语料进行训练,统计词频
45 | '''
46 | def train(self, neg_docs, pos_docs):
47 | data = []
48 | for sent in neg_docs:
49 | data.append([self.handle(sent), 'neg'])
50 | for sent in pos_docs:
51 | data.append([self.handle(sent), 'pos'])
52 | self.classifier.train(data)
53 |
54 | '''
55 | 分类
56 | 将输入的文本进行使用NB分类,通过拉布拉斯平滑得到归一化结果
57 | '''
58 | def classify(self, sent):
59 | ret, prob = self.classifier.classify(self.handle(sent))
60 | if ret == 'pos':
61 | return prob
62 | return 1-prob
63 |
64 | stop_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
65 | 'stopwords.txt')
66 | stop = set()
67 | fr = codecs.open(stop_path, 'r', 'utf-8')
68 | for word in fr:
69 | stop.add(word.encode("utf-8").strip())
70 | fr.close()
71 |
72 | def filter_stop(words):
73 | return list(filter(lambda x: x not in stop, words))
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/js/setting.js:
--------------------------------------------------------------------------------
1 | //var domain = "http://192.168.1.123:5000";
2 | var domain = "http://localhost:5000";
3 | //评论类型数获取
4 | var getCommTypeNum = "/ugc.hotel/rest/v100/hotel/get/type_score/statics";
5 | var getViewpoint = "/ugc.hotel/rest/v100/hotel/get/viewpoint";
6 | var getAdjective = "/ugc.hotel/rest/v100/hotel/get/adjective";
7 | var getComments = "/ugc.hotel/rest/v100/hotel/get/comments";
8 | var getWeiboCome = "/ugc.hotel/rest/v100/weibo/get/nearby_timeline/statics";
9 | var getArroudFacility = "/ugc.hotel/rest/v100/map/get/aroundfacilities";
10 | var getMaxDistance = "/ugc.hotel/rest/v100/map/get/maxdistance";
11 | var getBedpraise = "/ugc.hotel/rest/v100/map/get/hotelbedinfo";
12 | var getWeiboTrace = "/ugc.hotel/rest/v100/weibo/get/user_trace";
13 | var getroomnum = "/ugc.hotel/rest/v100/map/get/hotelroomnum";
14 | var getBaseinfoUrl = "/ugc.hotel/rest/v100/hotel/get/baseinfo";
15 | var getHotelTrace = "/ugc.hotel/rest/v100/hotel/get/user_trace";
16 | var getViewpointTuniu = "/ugc.hotel/rest/v100/hotel/get/tuniu/viewpoint";
17 | var checkUserUrl = "/ugc.hotel/rest/v100/hotel/get/check_user";
18 | var getFlowToHtml = "/ugc.hotel/rest/v100/hotel/get/html/customer_to";
19 |
20 |
21 | var serverDomain = "http://localhost:6080";
22 | var gpUrl = '/arcgis/rest/services/GP/HexagonAnalze/GPServer/HexagonAnalyze';
23 | var hotelUrl = "/arcgis/rest/services/NJ_Hotel/FeatureServer/1";
24 | var sightspotUrl = "/arcgis/rest/services/NJ_Hotel/FeatureServer/0";
25 | var kernelDensityGPUrl = '/arcgis/rest/services/GP/kernelDensityAnalysis/GPServer/kernelDensityAnalysis';
26 | var customerFlowUrl = "/arcgis/rest/services/Customer_Flow/FeatureServer/0";
27 | var serviceAreaGPUrl = '/arcgis/rest/services/GP/serviceArea/GPServer/serviceArea';
28 |
29 | //(推荐)pms酒店房间获取
30 | var getRoominfo = "/ugc.hotel/rest/v100/room/get/room_info";
31 | var userLogin = "/ugc.hotel/rest/v100/user/login";
32 |
33 | //(质检)依据楼层号获取酒店楼层各房间的评论状态,
34 | var getRemarkstates = "/ugc.hotel/rest/v100/quality/floorstate";
35 | var getRoomRemark = "/ugc.hotel/rest/v100/quality/getroomremark";
36 | var getRemarkByPoints = "/ugc.hotel/rest/v100/quality/getRemarkByPoints";
37 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/hotel/elong/ElongDao.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'pengshaowei'
3 |
4 | import MySQLdb
5 | from dao.SuperDAO import SuperDAO
6 |
7 | class ElongDAO(SuperDAO):
8 | def __init__(self, host, db, user, password):
9 | SuperDAO.__init__(self, host, db, user, password)
10 |
11 | def getAllUrl(self):
12 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
13 | cursor = db.cursor()
14 | urlList = []
15 | try:
16 | cursor.execute("select * from baseinfo")
17 | urlList = cursor.fetchall()
18 | except Exception, e:
19 | print e
20 | db.commit()
21 | cursor.close()
22 | db.close()
23 | return urlList
24 |
25 | def saveHotelInfo(self, hotelItem):
26 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
27 | cursor = db.cursor()
28 | try:
29 | placeholders = ', '.join(['%s'] * len(hotelItem))
30 | columns = ', '.join(hotelItem.keys())
31 | sql = "insert into elong_hotelinfo( %s ) values ( %s )" % (columns, placeholders)
32 | cursor.execute(sql, hotelItem.values())
33 | except Exception, e:
34 | print e
35 | db.commit()
36 | cursor.close()
37 | db.close()
38 |
39 | def save_room_info(self, room_list):
40 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
41 | cursor = db.cursor()
42 | try:
43 | for room in room_list:
44 | placeholders = ', '.join(['%s'] * len(room))
45 | columns = ', '.join(room.keys())
46 | sql = "insert into elong_roominfo( %s ) values ( %s )" % (columns, placeholders)
47 | cursor.execute(sql, room.values())
48 | except Exception, e:
49 | print e
50 | db.commit()
51 | cursor.close()
52 | db.close()
53 |
54 | def saveComments(self, commList):
55 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
56 | cursor = db.cursor()
57 | try:
58 | for commItem in commList:
59 | placeholders = ', '.join(['%s'] * len(commItem))
60 | columns = ', '.join(commItem.keys())
61 | sql = "insert into remark ( %s ) values ( %s )" % (columns, placeholders)
62 | cursor.execute(sql, commItem.values())
63 | except Exception, e:
64 | print e
65 | db.commit()
66 | cursor.close()
67 | db.close()
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/SuperDAO.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 | import traceback
5 | import MySQLdb
6 |
7 | class SuperDAO(object):
8 |
9 | def __init__(self, host, db, user, password):
10 | self.host = host
11 | self.db = db
12 | self.user = user
13 | self.password = password
14 |
15 | '''
16 | 保存一条记录
17 | '''
18 | def save_record(self, table_name, record):
19 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
20 | cursor = db.cursor()
21 | try:
22 | placeholders = ', '.join(['%s'] * len(record))
23 | columns = ', '.join(record.keys())
24 | sql = "insert into %s( %s ) values ( %s )" % (table_name, columns, placeholders)
25 | cursor.execute(sql, record.values())
26 | except Exception, e:
27 | print e
28 | db.commit()
29 | cursor.close()
30 | db.close()
31 |
32 | '''
33 | 保存多条记录
34 | '''
35 | def save_records(self, table_name, records):
36 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
37 | cursor = db.cursor()
38 | for record in records:
39 | placeholders = ', '.join(['%s'] * len(record))
40 | columns = ', '.join(record.keys())
41 | sql = "insert into %s( %s ) values ( %s )" % (table_name, columns, placeholders)
42 | try:
43 | cursor.execute(sql, record.values())
44 | except:
45 | print record['senti_value']
46 | traceback.print_exc()
47 | break
48 | db.commit()
49 | cursor.close()
50 | db.close()
51 |
52 | '''
53 | 获取多条记录
54 | '''
55 | def get_records(self, table_name):
56 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
57 | cursor = db.cursor()
58 | records = []
59 | try:
60 | cursor.execute("select * from %s"%table_name)
61 | records = cursor.fetchall()
62 | except Exception, e:
63 | print e
64 | db.commit()
65 | cursor.close()
66 | db.close()
67 | return records
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/html/login.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | 酒店分析系统登录界面
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | 酒店竞业市场 时空可视化和分析 系统
20 | 基于社会感知
21 |
22 | 对不起,浏览器不支持
23 |
24 |
25 |
40 |
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/css/main.css:
--------------------------------------------------------------------------------
1 | .slideMenu{
2 | position: absolute;
3 | width: 48px;
4 | height: 100%;
5 | speak: none;
6 | font-style: normal;
7 | font-weight: normal;
8 | font-variant: normal;
9 | }
10 |
11 | .slide-content {
12 | position: absolute;
13 | left: 48px;
14 | width: 401px;
15 | height: 100%;
16 | border-right:1px solid #58D68D;
17 | background: #ECF0F1;
18 | }
19 |
20 | #sentiment-table {
21 | height: 300px;
22 | width: 100%;
23 | }
24 |
25 | #word-cloud {
26 | height: 200px;
27 | width: 100%;
28 | }
29 |
30 | #review-rate {
31 | height: 200px;
32 | width: 100%;
33 | }
34 |
35 | .seamless {
36 | margin: 0px;
37 | border: 0px;
38 | }
39 |
40 | #search-box {
41 | display: block;
42 | position: absolute;
43 | z-index: 2;
44 | top: 30px;
45 | right: 74px;
46 | width: 300px;
47 | }
48 |
49 |
50 | #tag-box {
51 | display: block;
52 | position: absolute;
53 | z-index: 2;
54 | top: 2px;
55 | left: 50px;
56 | display: none;
57 | }
58 |
59 | .comparison-chart {
60 | height: 200px;
61 | width: 100%;
62 | background: #ECF0F1;
63 |
64 | }
65 |
66 | .praisecontral_charts {
67 | height: 200px;
68 | width: 100%;
69 | background: #ECF0F1;
70 | }
71 |
72 |
73 | #comparison_buttons {
74 | position: fixed;
75 | bottom: 0px;
76 | left: 48px;
77 | }
78 |
79 | #parisecontrol_buttons {
80 | position: fixed;
81 | bottom: 0px;
82 | left: 48px;
83 | }
84 |
85 |
86 | #btn_compare_roomnum {
87 | width: 133px;
88 | }
89 |
90 |
91 | #customerMap {
92 | height: 500px;
93 | }
94 |
95 | #genderPie {
96 | height: 300px;
97 | }
98 |
99 | #hexagon_buttons {
100 | position: fixed;
101 | bottom: 0px;
102 | left: 48px;
103 | }
104 |
105 | .btn-two-group {
106 | width: 185px;
107 | }
108 |
109 | .btn-third {
110 | width: 133px;
111 | }
112 |
113 | .btn-fourth {
114 | width: 92px;
115 | }
116 |
117 | .btn-onehalf {
118 | width: 200px;
119 | }
120 |
121 | #sight_buttons {
122 | position: fixed;
123 | bottom: 0px;
124 | left: 48px;
125 | }
126 |
127 | #relatedChart {
128 | height: 400px;
129 | }
130 |
131 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/nlp/HotelNLP.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
4 | import re
5 | import os
6 |
7 | from service.nlp.Sentiment import Sentiment
8 |
9 |
10 | class HotelNLP(object):
11 |
12 | def __init__(self):
13 | self.sentiment_parser = Sentiment()
14 | keywords_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'keywords.txt')
15 | with open(keywords_file,"r") as f:
16 | self.keywords = [x.strip() for x in f.readlines()]
17 |
18 | def sentiment(self, sent):
19 | return self.sentiment_parser.classify(sent)
20 |
21 | def viewpoint(self, sent, decoding=None):
22 | viewpoint = {}
23 | # 切分句子
24 | subsents = self.subsentence(sent, decoding)
25 | for subsent in subsents:
26 | sentiment_value = None
27 | for keyword in self.keywords:
28 | # 判断关键字是否在句子中出现过
29 | if keyword in subsent:
30 | # 计算子句的情感值
31 | if sentiment_value == None:
32 | sentiment_value = self.sentiment(subsent)
33 | # 得到关键字的情感值
34 | if keyword.decode("utf-8") not in viewpoint:
35 | viewpoint[keyword.decode("utf-8")] = sentiment_value
36 | else:
37 | viewpoint[keyword.decode("utf-8")] = (viewpoint[keyword.decode("utf-8")] + sentiment_value)/2
38 | return viewpoint
39 |
40 | '''
41 | 传入句子,切分为子句
42 | 默认输入输出格式为unicode
43 | '''
44 | def subsentence(self, sent, decoding=None):
45 | if decoding != None:
46 | sent = sent.decode(decoding)
47 | line_break = re.compile(u'[\r\n]')
48 | delimiter = re.compile(u'[,。?!;,.?!;]')
49 | sentences = []
50 | for line in line_break.split(sent):
51 | line = line.strip()
52 | if not line:
53 | continue
54 | for sent in delimiter.split(line):
55 | sent = sent.strip()
56 | if not sent:
57 | continue
58 | if decoding != None:
59 | sentences.append(sent.encode("utf-8"))
60 | else:
61 | sentences.append(sent)
62 | return sentences
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/css/highlight.css:
--------------------------------------------------------------------------------
1 | /*
2 | This is the GitHub theme for highlight.js
3 |
4 | github.com style (c) Vasily Polovnyov
5 |
6 | */
7 |
8 | .hljs {
9 | display: block;
10 | overflow-x: auto;
11 | padding: 0.5em;
12 | color: #333;
13 | -webkit-text-size-adjust: none;
14 | }
15 |
16 | .hljs-comment,
17 | .diff .hljs-header,
18 | .hljs-javadoc {
19 | color: #998;
20 | font-style: italic;
21 | }
22 |
23 | .hljs-keyword,
24 | .css .rule .hljs-keyword,
25 | .hljs-winutils,
26 | .nginx .hljs-title,
27 | .hljs-subst,
28 | .hljs-request,
29 | .hljs-status {
30 | color: #333;
31 | font-weight: bold;
32 | }
33 |
34 | .hljs-number,
35 | .hljs-hexcolor,
36 | .ruby .hljs-constant {
37 | color: #008080;
38 | }
39 |
40 | .hljs-string,
41 | .hljs-tag .hljs-value,
42 | .hljs-phpdoc,
43 | .hljs-dartdoc,
44 | .tex .hljs-formula {
45 | color: #d14;
46 | }
47 |
48 | .hljs-title,
49 | .hljs-id,
50 | .scss .hljs-preprocessor {
51 | color: #900;
52 | font-weight: bold;
53 | }
54 |
55 | .hljs-list .hljs-keyword,
56 | .hljs-subst {
57 | font-weight: normal;
58 | }
59 |
60 | .hljs-class .hljs-title,
61 | .hljs-type,
62 | .vhdl .hljs-literal,
63 | .tex .hljs-command {
64 | color: #458;
65 | font-weight: bold;
66 | }
67 |
68 | .hljs-tag,
69 | .hljs-tag .hljs-title,
70 | .hljs-rule .hljs-property,
71 | .django .hljs-tag .hljs-keyword {
72 | color: #000080;
73 | font-weight: normal;
74 | }
75 |
76 | .hljs-attribute,
77 | .hljs-variable,
78 | .lisp .hljs-body,
79 | .hljs-name {
80 | color: #008080;
81 | }
82 |
83 | .hljs-regexp {
84 | color: #009926;
85 | }
86 |
87 | .hljs-symbol,
88 | .ruby .hljs-symbol .hljs-string,
89 | .lisp .hljs-keyword,
90 | .clojure .hljs-keyword,
91 | .scheme .hljs-keyword,
92 | .tex .hljs-special,
93 | .hljs-prompt {
94 | color: #990073;
95 | }
96 |
97 | .hljs-built_in {
98 | color: #0086b3;
99 | }
100 |
101 | .hljs-preprocessor,
102 | .hljs-pragma,
103 | .hljs-pi,
104 | .hljs-doctype,
105 | .hljs-shebang,
106 | .hljs-cdata {
107 | color: #999;
108 | font-weight: bold;
109 | }
110 |
111 | .hljs-deletion {
112 | background: #fdd;
113 | }
114 |
115 | .hljs-addition {
116 | background: #dfd;
117 | }
118 |
119 | .diff .hljs-change {
120 | background: #0086b3;
121 | }
122 |
123 | .hljs-chunk {
124 | color: #aaa;
125 | }
126 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/GeocodingServiceMultiProcess.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from util.geo.GeoUtil import GeoUtil
4 |
5 | __author__ = 'geosmart'
6 | import copy_reg
7 | import multiprocessing
8 | import types
9 | from multiprocessing import Pool
10 | from time import time
11 |
12 | from service.map.baidu.SnatcherService import BaiduMapSnatcherService
13 | from util.common.CollectionUtil import CollectionUtil
14 |
15 |
16 | def _pickle_method(m):
17 | if m.im_self is None:
18 | return getattr, (m.im_class, m.im_func.func_name)
19 | else:
20 | return getattr, (m.im_self, m.im_func.func_name)
21 |
22 |
23 | copy_reg.pickle(types.MethodType, _pickle_method)
24 | # "bmm9EcjvS4TnnRzoZoYXXcAF",
25 | akList = []
26 | class GeocodingService(object):
27 | def __init__(self):
28 | print "Constructor ... %s" % multiprocessing.current_process().name
29 |
30 | def __del__(self):
31 | print "... Destructor %s" % multiprocessing.current_process().name
32 |
33 | def fetchAddressNodeByPoints(self, index, points):
34 | print 'current index %s,points %s' % (index, str(len(points)))
35 | snatcherService = BaiduMapSnatcherService(akList[0])
36 | snatcherService.fetchAddressNode(points)
37 | print 'Process %s done' % index
38 | # TODO multiprocessing多进程实现,此处代码最后一个process不执行fetchAddressNode内的代码
39 |
40 | # multiprocessing多进程并发
41 | def run(self):
42 | bounds = [113.149662, 23.038528, 113.15175, 23.039123]
43 | # bounds = [113.129391, 22.98257, 113.261335, 23.072904]
44 | step = 1
45 | snatcherService = BaiduMapSnatcherService()
46 | # 获取区域内点集
47 | points = GeoUtil().getPointByBounds(bounds, step)
48 | regionSize = len(points) / len(akList)
49 | # 点集合子集
50 | subPoints = CollectionUtil().chunksBySize(points, regionSize)
51 |
52 | processSize = len(subPoints)
53 | pool = Pool(processes=processSize)
54 | results = []
55 | print 'process size %s ,per process data sizes %s' % (processSize, regionSize)
56 | for index in range(0, processSize, 1):
57 | print 'current process %s ' % index
58 | r = pool.apply_async(self.fetchAddressNodeByPoints, args=(index, subPoints[index]))
59 | results.append(r)
60 |
61 | for r in results:
62 | r.wait()
63 | print 'successful'
64 |
65 | if __name__ == '__main__':
66 | # python E:\PythonWorkspace\sta\UGC_Agrregator\service\map\map\GeocodingService.py
67 | ts = time()
68 | service = GeocodingService()
69 | # service.asynchronous()
70 | service.run()
71 | print 'Took %s' % format(time() - ts)
72 | raw_input()
73 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/nlp/KeywordsHandler.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
4 | import csv
5 | from snownlp.sentiment import Sentiment
6 | import jieba.posseg as pseg
7 | from thulac import thulac
8 | from snownlp import normal
9 | from dao.hotel.TuniuDao import TuniuDAO
10 | from setting import local_hotel_setting
11 |
12 | dao_setting = local_hotel_setting
13 |
14 |
15 | class KeywordsHandler(object):
16 |
17 | def __init__(self):
18 | self.dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
19 | self.thu = thulac("-input cs.txt")
20 |
21 | def extract_keyword(self):
22 | sents = []
23 | comm_list = self.dao.get_hotel_comments()
24 | # 从语料中读取每一行并切分成子句
25 | for comm in comm_list:
26 | sents.extend(normal.get_sentences(comm[2]))
27 | print "length of sentences:%d"%len(sents)
28 | # 每个子句进行词性判读
29 | pos_sents = []
30 | for sent in sents:
31 | pos_sents.append(pseg.cut(sent))
32 | print "length of pos_sents:%d"%len(pos_sents)
33 | # 分拣出名词,并进行统计
34 | print "counting"
35 | noun_dict = {}
36 | for pos_sent in pos_sents:
37 | for key,type in pos_sent:
38 | if type == "n":
39 | if key not in noun_dict:
40 | noun_dict[key] = 1
41 | else:
42 | noun_dict[key] = noun_dict[key] + 1
43 | a = sorted(noun_dict.iteritems(),key=lambda asd:asd[1],reverse=True)
44 | return a
45 |
46 | def extract_keyword_by_thulac(self):
47 | sents = []
48 | comm_list = self.dao.get_hotel_comments()
49 | # 从语料中读取每一行并切分成子句
50 | for comm in comm_list:
51 | sents.extend(normal.get_sentences(comm[2]))
52 | print "length of sentences:%d"%len(sents)
53 | # 每个子句进行词性判读
54 | pos_sents = []
55 | for sent in sents:
56 | try:
57 | pos_sents.append(map(lambda x: x.split("_"), self.thu.cut(sent.encode("utf-8"))))
58 | except:
59 | print sent
60 | continue
61 | print "length of pos_sents:%d"%len(pos_sents)
62 | # 分拣出名词,并进行统计
63 | print "counting"
64 | noun_dict = {}
65 | for pos_sent in pos_sents:
66 | for word in pos_sent:
67 | if word[1] == "n":
68 | if word[0] not in noun_dict:
69 | noun_dict[word[0]] = 1
70 | else:
71 | noun_dict[word[0]] = noun_dict[word[0]] + 1
72 | a = sorted(noun_dict.iteritems(),key=lambda asd:asd[1],reverse=True)
73 | return a
74 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/js/login/globalconfig.js:
--------------------------------------------------------------------------------
1 | (function() {
2 | window.entrypage_signin_btn_click = function () {
3 | var userName = $("#account").val();
4 | var password = $("#password").val();
5 | if (userName == null || userName == "" || password == null || password == "") {
6 | alert("账号密码不能为空");
7 | } else {
8 | $.ajax({
9 | url: domain + checkUserUrl + "?user_name=" + userName + "&password=" + password,
10 | type: 'get',
11 | async: true,
12 | success: function (json) {
13 | var datajson;
14 |
15 | if (typeof (json) == "object") {
16 | //为对象
17 | datajson = json;
18 | }
19 | else {
20 | //将字符串转换为对象
21 | datajson = JSON.parse(json);
22 | }
23 | if (datajson.status != 0) {
24 | sessionStorage.user = JSON.stringify(datajson.data[0]['user']);
25 | sessionStorage.baseinfo = JSON.stringify(datajson.data[0]['baseinfo']);
26 | sessionStorage.location = JSON.stringify(datajson.data[0]['location']);
27 | window.location.href = "../html/index.html";
28 | // window.navigate("../../html/index.html");
29 | console.log(window.location.href);
30 | } else {
31 | $("#confirm-dialog_info").html("登录失败");
32 | window.location.href = "#confirm-dialog";
33 | }
34 | },
35 | error: function (errorMsg) {
36 | $("#confirm-dialog_info").html(errorMsg);
37 | window.location.href = "#confirm-dialog";
38 | }
39 | });
40 | }
41 | }
42 |
43 | /**
44 | * 请求location表信息
45 | * @param locationId String locationID
46 | */
47 | function requestLocation(locationId) {
48 | var locationData = null;
49 | var paramStr = "?location_id=" + locationId;
50 | $.ajax({
51 | type: "get",
52 | async: false,
53 | url: domain + getLocation + paramStr,
54 | dataType: "json",
55 | timeout: 5000,
56 | success: function (result) {
57 | locationData = result;
58 | },
59 | error: function (errorMsg) {
60 | console.log(errorMsg);
61 | alert("你输入的值有误,请输入完整参数或者重试");
62 | }
63 | });
64 | return locationData;
65 | }
66 |
67 | })();
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/HotelCatcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
4 |
5 | from service.hotel.TuniuService import TuniuService
6 | from service.hotel.xiecheng.DriveServices import XiechengDriverService
7 | import time
8 |
9 |
10 | class HotelCatcher(object):
11 |
12 | def __init__(self,service):
13 | self.service = service
14 | pass
15 |
16 | '''
17 | 抓取酒店链接页
18 | '''
19 | def startCrawlListPage(self, city):
20 | # 如果爬取成功,则存储数据
21 | self.service.set_city(city)
22 | while 1:
23 | if(self.service.crawlListPage()):
24 | self.service.saveListPageInfo()
25 | # service.closeDriver()
26 | break
27 | else:
28 | self.service.listPageInfo = []
29 |
30 | '''
31 | 抓取酒店详情页
32 | '''
33 | def startCrawlDetail(self, city):
34 | self.service.set_city(city)
35 | listPageInfo = list(self.service.getListPageInfo())
36 | listPageInfo = listPageInfo[0:]
37 | loop = 0
38 | while len(listPageInfo)>0:
39 | # 从listPageInfo中pop出一个酒店的数据,抓取该酒店的信息
40 | target = listPageInfo.pop()
41 | result = False
42 | while 1:
43 | if loop > 3:
44 | result = False
45 | loop = 0
46 | print "False at guid:%s,url:%s" % (target[0], target[2])
47 | break
48 | try:
49 | result = self.service.crawlHotelInfo(target)
50 | # 如果爬取结果有误,记录循环,重新爬取
51 | if result == False:
52 | print "Flase %d time"%loop
53 | loop += 1
54 | continue
55 | loop = 0
56 | break
57 | except Exception, e:
58 | loop += 1
59 | print e
60 | time.sleep(10)
61 | continue
62 | if result:
63 | self.service.saveHotelInfo()
64 |
65 | '''
66 | 关闭爬取服务
67 | '''
68 | def set_service(self,service):
69 | self.service = service
70 |
71 | '''
72 | 关闭爬取驱动
73 | '''
74 | def exit(self):
75 | self.service.closeDriver()
76 |
77 |
78 | if __name__ == "__main__":
79 | hotel_service = TuniuService()
80 | # hotel_service = XiechengDriverService()
81 | hotelCatcher = HotelCatcher(hotel_service)
82 | hotelCatcher.startCrawlListPage("南京")
83 | # 设置爬取的内容
84 | #hotel_service.set_crawl_content(if_crawl_hotel_comment=False,if_crawl_hotel_info=False,if_crawl_hotel_price=True)
85 | # # 开始爬取
86 | #hotelCatcher.startCrawlDetail("南京")
87 | hotelCatcher.exit()
88 |
89 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/css/simple-sidebar.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * Start Bootstrap - Simple Sidebar HTML Template (http://startbootstrap.com)
3 | * Code licensed under the Apache License v2.0.
4 | * For details, see http://www.apache.org/licenses/LICENSE-2.0.
5 | */
6 |
7 | /* Toggle Styles */
8 |
9 | #wrapper {
10 | padding-left: 0;
11 | -webkit-transition: all 0.5s ease;
12 | -moz-transition: all 0.5s ease;
13 | -o-transition: all 0.5s ease;
14 | transition: all 0.5s ease;
15 | }
16 |
17 | #wrapper.toggled {
18 | padding-left: 450px;
19 | }
20 |
21 | #sidebar-wrapper {
22 | z-index: 1000;
23 | position: fixed;
24 | left: 450px;
25 | width: 0;
26 | height: 100%;
27 | margin-left: -450px;
28 | overflow-y: auto;
29 | background: white;
30 | -webkit-transition: all 0.5s ease;
31 | -moz-transition: all 0.5s ease;
32 | -o-transition: all 0.5s ease;
33 | transition: all 0.5s ease;
34 | }
35 |
36 | #wrapper.toggled #sidebar-wrapper {
37 | width: 450px;
38 | }
39 |
40 | #page-content-wrapper {
41 | width: 100%;
42 | position: absolute;
43 | padding: 15px;
44 | }
45 |
46 | #wrapper.toggled #page-content-wrapper {
47 | position: absolute;
48 | margin-right: -450px;
49 | }
50 |
51 | /* Sidebar Styles */
52 |
53 | .sidebar-nav {
54 | position: absolute;
55 | top: 0;
56 | width: 450px;
57 | margin: 0;
58 | padding: 0;
59 | list-style: none;
60 | }
61 |
62 | .sidebar-nav li {
63 | text-indent: 20px;
64 | line-height: 40px;
65 | }
66 |
67 | .sidebar-nav li a {
68 | display: block;
69 | text-decoration: none;
70 | color: #999999;
71 | }
72 |
73 | .sidebar-nav li a:hover {
74 | text-decoration: none;
75 | color: #fff;
76 | background: rgba(255,255,255,0.2);
77 | }
78 |
79 | .sidebar-nav li a:active,
80 | .sidebar-nav li a:focus {
81 | text-decoration: none;
82 | }
83 |
84 | .sidebar-nav > .sidebar-brand {
85 | height: 65px;
86 | font-size: 18px;
87 | line-height: 60px;
88 | }
89 |
90 | .sidebar-nav > .sidebar-brand a {
91 | color: #999999;
92 | }
93 |
94 | .sidebar-nav > .sidebar-brand a:hover {
95 | color: #fff;
96 | background: none;
97 | }
98 |
99 | @media(min-width:768px) {
100 | #wrapper {
101 | padding-left: 450px;
102 | }
103 |
104 | #wrapper.toggled {
105 | padding-left: 0;
106 | }
107 |
108 | #sidebar-wrapper {
109 | width: 450px;
110 | }
111 |
112 | #wrapper.toggled #sidebar-wrapper {
113 | width: 0;
114 | }
115 |
116 | #page-content-wrapper {
117 | padding: 20px;
118 | position: relative;
119 | }
120 |
121 | #wrapper.toggled #page-content-wrapper {
122 | position: relative;
123 | margin-right: 0;
124 | }
125 | }
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/MergeComment.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 |
5 | import uuid
6 | import re
7 | import traceback
8 |
9 | from dao.hotel.HotelDAO import HotelDAO
10 | from dao.hotel.xiechengdao.xiecheng import xiechengDAO
11 | from dao.hotel.TuniuDao import TuniuDAO
12 | from setting import local_hotel_setting
13 |
14 | # 配置数据库
15 | dao_setting = local_hotel_setting
16 |
17 | hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
18 | tuniu_dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
19 | xiecheng_dao = xiechengDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
20 |
21 | # tuniu_comm = tuniu_dao.get_remarks()
22 | #
23 | # hotel_comm = []
24 | # i = 0
25 | # for comm in tuniu_comm:
26 | # i+=1
27 | # print i
28 | # baseinfo = hotel_dao.get_baseinfo_by_hotelname(comm[10].encode('utf-8'), '南京')
29 | # for info in baseinfo:
30 | # if info[3].encode('utf-8') == '途牛':
31 | # hotel_comm.append({
32 | # "guid":uuid.uuid1(),
33 | # "username":comm[1],
34 | # "remark":comm[2],
35 | # "comm_time":comm[3],
36 | # "comm_type":comm[6],
37 | # "user_type":comm[4],
38 | # "senti_value":comm[7],
39 | # "viewpoint":comm[8],
40 | # "word_freq":comm[9],
41 | # "baseinfo_id":info[0],
42 | # })
43 | # hotel_dao.save_remarks(hotel_comm)
44 |
45 | print '=============Tuniu Done================='
46 |
47 | xiecheng_comms = xiecheng_dao.get_comments()
48 | print len(xiecheng_comms)
49 | hotel_name = ""
50 | baseinfo_id = ""
51 | hotel_comm = []
52 | # 遍历评论
53 | i = 0
54 | for comm in xiecheng_comms:
55 | i+=1
56 | print i
57 | # 当酒店名发生改变时,更新baseinfo的id
58 | if comm[0] != hotel_name:
59 | baseinfo_id = ""
60 | hotel_name = comm[0]
61 | baseinfo = hotel_dao.get_baseinfo_by_hotelname(hotel_name.encode('utf-8'), '南京')
62 | for info in baseinfo:
63 | if info[3] == u'携程':
64 | baseinfo_id = info[0]
65 | if baseinfo_id != "":
66 | try:
67 | hotel_comm.append({
68 | "guid":uuid.uuid1(),
69 | "username":comm[1],
70 | "remark":comm[6],
71 | "intime":re.sub(u"\(本次服务由代理商提供\)",u"",comm[3]),
72 | "comm_score":float(comm[2]) if comm[2]!=u'' else None,
73 | "user_type":comm[4],
74 | "baseinfo_id":baseinfo_id,
75 | "senti_value":comm[7],
76 | "viewpoint":comm[8]
77 | })
78 | except:
79 | traceback.print_exc()
80 | print comm
81 | print len(hotel_comm)
82 | hotel_dao.save_remarks(hotel_comm)
83 |
84 | print '=============XieCheng Done================='
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/mkdocs/js/search.js:
--------------------------------------------------------------------------------
1 | require([
2 | base_url + '/mkdocs/js/mustache.min.js',
3 | base_url + '/mkdocs/js/lunr-0.5.7.min.js',
4 | 'text!search-results-template.mustache',
5 | 'text!../search_index.json',
6 | ], function (Mustache, lunr, results_template, data) {
7 | "use strict";
8 |
9 | function getSearchTerm()
10 | {
11 | var sPageURL = window.location.search.substring(1);
12 | var sURLVariables = sPageURL.split('&');
13 | for (var i = 0; i < sURLVariables.length; i++)
14 | {
15 | var sParameterName = sURLVariables[i].split('=');
16 | if (sParameterName[0] == 'q')
17 | {
18 | return decodeURIComponent(sParameterName[1].replace(/\+/g, '%20'));
19 | }
20 | }
21 | }
22 |
23 | var index = lunr(function () {
24 | this.field('title', {boost: 10});
25 | this.field('text');
26 | this.ref('location');
27 | });
28 |
29 | data = JSON.parse(data);
30 | var documents = {};
31 |
32 | for (var i=0; i < data.docs.length; i++){
33 | var doc = data.docs[i];
34 | doc.location = base_url + doc.location;
35 | index.add(doc);
36 | documents[doc.location] = doc;
37 | }
38 |
39 | var search = function(){
40 |
41 | var query = document.getElementById('mkdocs-search-query').value;
42 | var search_results = document.getElementById("mkdocs-search-results");
43 | while (search_results.firstChild) {
44 | search_results.removeChild(search_results.firstChild);
45 | }
46 |
47 | if(query === ''){
48 | return;
49 | }
50 |
51 | var results = index.search(query);
52 |
53 | if (results.length > 0){
54 | for (var i=0; i < results.length; i++){
55 | var result = results[i];
56 | doc = documents[result.ref];
57 | doc.base_url = base_url;
58 | doc.summary = doc.text.substring(0, 200);
59 | var html = Mustache.to_html(results_template, doc);
60 | search_results.insertAdjacentHTML('beforeend', html);
61 | }
62 | } else {
63 | search_results.insertAdjacentHTML('beforeend', "No results found
");
64 | }
65 |
66 | if(jQuery){
67 | /*
68 | * We currently only automatically hide bootstrap models. This
69 | * requires jQuery to work.
70 | */
71 | jQuery('#mkdocs_search_modal a').click(function(){
72 | jQuery('#mkdocs_search_modal').modal('hide');
73 | })
74 | }
75 |
76 | };
77 |
78 | var search_input = document.getElementById('mkdocs-search-query');
79 |
80 | var term = getSearchTerm();
81 | if (term){
82 | search_input.value = term;
83 | search();
84 | }
85 |
86 | search_input.addEventListener("keyup", search);
87 |
88 | });
89 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/css/theme_extra.css:
--------------------------------------------------------------------------------
1 | /*
2 | * Tweak the overal size to better match RTD.
3 | */
4 | body {
5 | font-size: 90%;
6 | }
7 |
8 | h3, h4, h5, h6 {
9 | color: #2980b9;
10 | font-weight: 300
11 | }
12 |
13 | /*
14 | * Sphinx doesn't have support for section dividers like we do in
15 | * MkDocs, this styles the section titles in the nav
16 | *
17 | * https://github.com/mkdocs/mkdocs/issues/175
18 | */
19 | .wy-menu-vertical span {
20 | line-height: 18px;
21 | padding: 0.4045em 1.618em;
22 | display: block;
23 | position: relative;
24 | font-size: 90%;
25 | color: #838383;
26 | }
27 |
28 | .wy-menu-vertical .subnav a {
29 | padding: 0.4045em 2.427em;
30 | }
31 |
32 | /*
33 | * Long navigations run off the bottom of the screen as the nav
34 | * area doesn't scroll.
35 | *
36 | * https://github.com/mkdocs/mkdocs/pull/202
37 | */
38 | .wy-nav-side {
39 | height: 100%;
40 | overflow-y: auto;
41 | }
42 |
43 | /*
44 | * readthedocs theme hides nav items when the window height is
45 | * too small to contain them.
46 | *
47 | * https://github.com/mkdocs/mkdocs/issues/#348
48 | */
49 | .wy-menu-vertical ul {
50 | margin-bottom: 2em;
51 | }
52 |
53 | /*
54 | * Fix wrapping in the code highlighting
55 | *
56 | * https://github.com/mkdocs/mkdocs/issues/233
57 | */
58 | code {
59 | white-space: pre;
60 | }
61 |
62 | /*
63 | * Wrap inline code samples otherwise they shoot of the side and
64 | * can't be read at all.
65 | *
66 | * https://github.com/mkdocs/mkdocs/issues/313
67 | */
68 | p code {
69 | word-wrap: break-word;
70 | }
71 |
72 | /*
73 | * The CSS classes from highlight.js seem to clash with the
74 | * ReadTheDocs theme causing some code to be incorrectly made
75 | * bold and italic.
76 | *
77 | * https://github.com/mkdocs/mkdocs/issues/411
78 | */
79 | code.cs, code.c {
80 | font-weight: inherit;
81 | font-style: inherit;
82 | }
83 |
84 | /*
85 | * Fix some issues with the theme and non-highlighted code
86 | * samples. Without and highlighting styles attached the
87 | * formatting is broken.
88 | *
89 | * https://github.com/mkdocs/mkdocs/issues/319
90 | */
91 | .no-highlight {
92 | display: block;
93 | padding: 0.5em;
94 | color: #333;
95 | }
96 |
97 |
98 | /*
99 | * Additions specific to the search functionality provided by MkDocs
100 | */
101 |
102 | #mkdocs-search-results article h3
103 | {
104 | margin-top: 23px;
105 | border-top: 1px solid #E1E4E5;
106 | padding-top: 24px;
107 | }
108 |
109 | #mkdocs-search-results article:first-child h3 {
110 | border-top: none;
111 | }
112 |
113 | #mkdocs-search-query{
114 | width: 100%;
115 | border-radius: 50px;
116 | padding: 6px 12px;
117 | border-color: #D1D4D5;
118 | }
119 |
120 | .wy-menu-vertical li ul {
121 | display: inherit;
122 | }
123 |
124 | .wy-menu-vertical li ul.subnav ul.subnav{
125 | padding-left: 1em;
126 | }
127 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/html/quality-testing.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 基于社会感知的酒店竞业市场时空可视化分析
10 |
11 |
12 |
13 |
14 |
15 |
16 |
21 |
48 |
49 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/nlp/Bayes.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
4 |
5 | import sys
6 | import gzip
7 | import marshal
8 | from math import log, exp
9 |
10 | class Bayes(object):
11 |
12 | def __init__(self):
13 | self.d = {}
14 | self.total = 0
15 |
16 | def save(self, fname, iszip=True):
17 | d = {}
18 | d['total'] = self.total
19 | d['d'] = {}
20 | for k, v in self.d.items():
21 | d['d'][k] = v.__dict__
22 | if sys.version_info[0] == 3:
23 | fname = fname + '.3'
24 | if not iszip:
25 | marshal.dump(d, open(fname, 'wb'))
26 | else:
27 | f = gzip.open(fname, 'wb')
28 | f.write(marshal.dumps(d))
29 | f.close()
30 |
31 | def load(self, fname, iszip=True):
32 | if sys.version_info[0] == 3:
33 | fname = fname + '.3'
34 | if not iszip:
35 | d = marshal.load(open(fname, 'rb'))
36 | else:
37 | try:
38 | f = gzip.open(fname, 'rb')
39 | d = marshal.loads(f.read())
40 | except IOError:
41 | f = open(fname, 'rb')
42 | d = marshal.loads(f.read())
43 | f.close()
44 | self.total = d['total']
45 | self.d = {}
46 | for k, v in d['d'].items():
47 | self.d[k] = AddOneProb()
48 | self.d[k].__dict__ = v
49 |
50 | def train(self, data):
51 | for d in data:
52 | c = d[1]
53 | if c not in self.d:
54 | self.d[c] = AddOneProb()
55 | for word in d[0]:
56 | self.d[c].add(word, 1)
57 | self.total = sum(map(lambda x: self.d[x].getsum(), self.d.keys()))
58 |
59 | def classify(self, x):
60 | tmp = {}
61 | for k in self.d:
62 | tmp[k] = log(self.d[k].getsum()) - log(self.total)
63 | for word in x:
64 | tmp[k] += log(self.d[k].freq(word))
65 | ret, prob = 0, 0
66 | for k in self.d:
67 | now = 0
68 | try:
69 | for otherk in self.d:
70 | now += exp(tmp[otherk]-tmp[k])
71 | now = 1/now
72 | except OverflowError:
73 | now = 0
74 | if now > prob:
75 | ret, prob = k, now
76 | return (ret, prob)
77 |
78 |
79 |
80 |
81 | class BaseProb(object):
82 |
83 | def __init__(self):
84 | self.d = {}
85 | self.total = 0.0
86 | self.none = 0
87 |
88 | def exists(self, key):
89 | return key in self.d
90 |
91 | def getsum(self):
92 | return self.total
93 |
94 | def get(self, key):
95 | if not self.exists(key):
96 | return False, self.none
97 | return True, self.d[key]
98 |
99 | def freq(self, key):
100 | return float(self.get(key)[1])/self.total
101 |
102 | def samples(self):
103 | return self.d.keys()
104 |
105 | class AddOneProb(BaseProb):
106 |
107 | def __init__(self):
108 | self.d = {}
109 | self.total = 0.0
110 | self.none = 1
111 |
112 | def add(self, key, value):
113 | self.total += value
114 | if not self.exists(key):
115 | self.d[key] = 1
116 | self.total += 1
117 | self.d[key] += value
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/HotelSentimentProcessor.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | import re
3 |
4 | __author__ = 'DreamCathcer'
5 |
6 |
7 | import traceback
8 | import json
9 | from thulac import thulac
10 |
11 | from setting import local_hotel_setting
12 | from service.nlp.HotelNLP import HotelNLP
13 | from dao.hotel.TuniuDao import TuniuDAO
14 | from dao.hotel.HotelDAO import HotelDAO
15 |
16 | dao_setting = local_hotel_setting
17 |
18 |
19 | class HotelSentimentProcessor(object):
20 |
21 | def __init__(self):
22 | self.hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
23 | self.dao = TuniuDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
24 |
25 | def set_sentiment_and_viewpoint(self):
26 | self.hotelnlp = HotelNLP()
27 | self.thu = thulac("")
28 | comm_list = self.hotel_dao.get_remarks()
29 | print len(comm_list)
30 | sentiment_comm_list = []
31 | i = 0
32 | for comm in comm_list:
33 | if comm[8] is None or comm[9] is None:
34 | sentiment_value = None
35 | viewpoint = None
36 | remark = re.sub(u"\@",u"",comm[2])
37 | try:
38 | sentiment_value = self.hotelnlp.sentiment(remark.encode("utf-8"))
39 | sentiment_value = round(sentiment_value*1000)/1000
40 | print sentiment_value
41 | except:
42 | print comm[2]
43 | traceback.print_exc()
44 | try:
45 | viewpoint = self.hotelnlp.viewpoint(remark.encode("utf-8"),decoding="utf-8")
46 | viewpoint = json.dumps(viewpoint, ensure_ascii=False)
47 | except:
48 | print remark
49 | traceback.print_exc()
50 | comm = {"guid":comm[0], "senti_value":sentiment_value, "viewpoint":viewpoint}
51 | sentiment_comm_list.append(comm)
52 | if len(sentiment_comm_list)==10000:
53 | i+=1
54 | print "update %d time"%i
55 | self.hotel_dao.update_remarks(sentiment_comm_list)
56 | sentiment_comm_list = []
57 |
58 |
59 | def count_word_frq(self):
60 | self.thu = thulac("-input cs.txt")
61 | comm_list = self.hotel_dao.get_remarks()
62 | sentiment_comm_list = []
63 | i = 0
64 | for comm in comm_list:
65 | a_dict = {}
66 | try:
67 | cut_comm = map(lambda x: x.split("_"), self.thu.cut(comm[2].encode("utf-8")))
68 | except:
69 | cut_comm = []
70 | print comm[2]
71 | traceback.print_exc()
72 | for word in cut_comm:
73 | if word[1].decode("utf-8") == "a":
74 | if word[0].decode("utf-8") not in a_dict:
75 | a_dict[word[0].decode("utf-8")] = 1
76 | else:
77 | a_dict[word[0].decode("utf-8")] += 1
78 | comm = {"guid":comm[0], "word_freq":json.dumps(a_dict, ensure_ascii=False)}
79 | sentiment_comm_list.append(comm)
80 | if len(sentiment_comm_list)==10000:
81 | i+=1
82 | print "update %d time"%i
83 | self.hotel_dao.update_hotel_comm_word_freq(sentiment_comm_list)
84 | sentiment_comm_list = []
85 |
86 | if __name__ == "__main__":
87 | HotelSentimentProcessor().count_word_frq()
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/weibo/WeiboDAO.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'LiuYang'
3 |
4 | import MySQLdb
5 | import uuid
6 | import traceback
7 |
8 | from dao.SuperDAO import SuperDAO
9 |
10 |
11 | class WeiboDAO(SuperDAO):
12 |
13 | def __init__(self, host, db, user, password):
14 | SuperDAO.__init__(self, host, db, user, password)
15 |
16 | # 存储微博id
17 | def saveWeiboID(self,weiboIDSet,userID,pageNum):
18 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
19 | cursor = db.cursor()
20 | for weiboID in weiboIDSet:
21 | cursor.execute("insert into weibo_id(guid,userID,weiboID,pageNum)values(%s,%s,%s,%s)" ,(uuid.uuid1(),userID,weiboID,pageNum))
22 | db.commit()
23 | cursor.close()
24 | db.close()
25 |
26 | # 存储微博评论
27 | def saveWeiboComment(self,items):
28 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
29 | cursor = db.cursor()
30 | for item in items:
31 | try:
32 | cursor.execute("insert into weibo_comment(guid,userID,weiboID,pageNum,commPeople,commentText,commentTime,crawlTime,likeNum)values(%s,%s,%s,%s,%s,%s,%s,%s,%s)" ,(uuid.uuid1(),item["userID"],item["weiboID"],item["pageNum"],item["commPeople"],item["commentText"],item["commentTime"],item["crawlTime"],item["likeNum"]))
33 | except:
34 | continue
35 | db.commit()
36 | cursor.close()
37 | db.close()
38 |
39 | # 获取api账号数量
40 | def countweiboaccountnumber(self):
41 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
42 | cursor = db.cursor()
43 | cursor.execute("select count(*) from api_account")
44 | data = cursor.fetchone()
45 | cursor.close()
46 | db.close()
47 | return data
48 |
49 | #从mysql中获取微博账号
50 | def get_weibo_accounts(self):
51 | weibo_accounts = None
52 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
53 | cursor = db.cursor()
54 | try:
55 | cursor.execute("select * from api_account")
56 | weibo_accounts = cursor.fetchall()
57 | except:
58 | print traceback.print_exc()
59 | db.commit()
60 | cursor.close()
61 | db.close()
62 | return weibo_accounts
63 |
64 | def saveWeibo_ByAPI(self,weiboid,text,lat,lon,title,userid,location,userdecription,gender,created_at,fax,localcity,formatted):
65 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
66 | cursor = db.cursor()
67 | cursor.execute("insert into weibo_content(weiboid,text,lat,lon,title,userid,location,userdescription,gender,created_at,fax,locality,formatted)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
68 | ,(weiboid,text,lat,lon,title,userid,location,userdecription,gender,created_at,fax,localcity,formatted))
69 | db.commit()
70 | cursor.close()
71 | db.close()
72 |
73 | '''
74 | 获取地址
75 | '''
76 | def get_location(self, city):
77 | db = MySQLdb.connect(self.host, self.user, self.password, self.db, charset='utf8')
78 | cursor = db.cursor()
79 | location = None
80 | try:
81 | cursor.execute("select * from city_location where city='%s'"%city)
82 | location = cursor.fetchone()
83 | except Exception, e:
84 | print e
85 | db.commit()
86 | cursor.close()
87 | db.close()
88 | return location
89 |
90 | def save_location(self, location):
91 | self.save_record("city_location", location)
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/css/messages.css:
--------------------------------------------------------------------------------
1 |
2 | /*--------------------���------------------------*/
3 | .map-div {background-color:white;width:78%;height:100%;position:fixed;top:53px;left:0;}
4 | nav {z-index:2; background-color:#7A6868;width:59px;height:180px;right:8px;position:absolute;margin:140px 10px;cursor:pointer}
5 | .floor-info{right:25px;margin-top:75px;position:absolute;font-size:30px;font-weight:bold;z-index:2;}
6 | .query-info{right:20px;margin-top:20px;position:absolute;z-index:2;}
7 | .legend-info{z-index:2;position:absolute;bottom:70px;left:10px;padding:10px;border:2px solid #F1C40F;/*border-radius:4px*/}
8 | .button-one {height:60px;background-color:#EBEBEC}
9 | .button-two {height:60px;background-color:#C4C4C7}
10 | .button-three {height:60px;background-color:#D7D7DC}
11 | .button-img {width:22px;height:15px;margin:20px 18px}
12 |
13 | /*--------------------ͼ��------------------------*/
14 | .legend-red{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#F3859B;margin-bottom:3px;color:#FFF;text-align:center;}
15 | .legend-yellow{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#FCDE89;margin-bottom:3px;color:#FFF;text-align:center}
16 | .legend-green{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#BBFF8B;margin-bottom:3px;color:#FFF;text-align:center}
17 | .legend-gray{line-height:50px;height:50px;width:75px;border-radius:4px;background-color:#D7D7DC;margin-bottom:3px;color:#FFF;text-align:center}
18 |
19 | /*--------------------�����б���Ϣ------------------------*/
20 | .button-danger {background-color:red;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block}
21 | .button-require {background-color:#D7D7DC;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block}
22 | .button-warning {background-color:#ffd800;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block}
23 | .button-default {background-color:#29C1BF;color:#fff;font-family:'Microsoft YaHei';font-size:14px;position:relative;right:25px;padding:5px 10px;float:right;margin-top:5px;display:block}
24 |
25 | /*--------------------��ֵ����------------------------*/
26 | .sliderdiv{z-index:3;position:absolute; right:50px;bottom:50px;}
27 |
28 | /*--------------------�Ҳ�------------------------*/
29 | .right {background-color:#4A5A69;width:22%;height:100%;top:53px;left:78%;position:fixed;}
30 | .right-top {background-color:white;width:100%;height:95%;border-left:1px solid #B6CFD3}
31 | .right-top-title{background-color:#B6CFD3;text-align:center;}
32 | .right-mess {overflow:auto;height:90%;}
33 | .right-list-div{border-bottom:1px solid #808080;height:45px ;}
34 | .right-list-div:hover{cursor:pointer;background-color:gray;border-color:#fff;color:white}
35 | .list-roomnum {top:7px;left:25px;position:relative;padding:10px 0px;}
36 | .list-roomnum:hover{cursor:pointer;background-color:gray;border-color:#fff;color:white}
37 | .room-button {background: transparent;border-bottom:1px solid #ffd800;color:#1ABC9C;padding:5px 20px;font-family:'Microsoft YaHei';position:relative;top:5px;display:block}
38 | /*.room-button:hover {cursor:pointer;background-color:white;border-color:#fff;color:#ffd800}*/
39 |
40 | /*-----------------------�Ҳ��·�---------------------------*/
41 | .right-bottom {background-color:#C4CDE4;width:100%;height:40%;border-left:2px solid #B6CFD3}
42 |
43 | /*--------------------ģ̬����------------------------*/
44 | #remarkbody {overflow:auto;height:400px;}
45 | /*--------------------��ͼ------------------------*/
46 | #floormap {padding: 0;margin: 0;height: 100%;width: 100%;}
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/map/baidu/APIService.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 | import logging
5 |
6 | from util.http.UniversalSDK import APIClient
7 |
8 | logger = logging.getLogger('ugc')
9 |
10 |
11 | class BaiduMapAPIService(object):
12 | def __init__(self, ak):
13 | self.baiduClient = APIClient("http://api.map.baidu.com")
14 | self.__ak = ak
15 |
16 | '''
17 | 正向地理编码geocoding
18 | 文档:http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-geocoding
19 | Rest地址:http://api.map.baidu.com/geocoder/v2/
20 | 类型:get
21 | '''
22 | def doGeocoding(self, addressText,city=None):
23 | if city==None:
24 | data = self.baiduClient.geocoder.v2.addtrail("/").get(ak=self.__ak, output="json", address=addressText)
25 | else:
26 | data = self.baiduClient.geocoder.v2.addtrail("/").get(ak=self.__ak, output="json", address=addressText, city=city)
27 | return data
28 |
29 | def reverseGeocodingBatch(self, locationList):
30 | '''
31 | 逆向地理编码批量处理入口
32 | 地址:http://api.map.baidu.com/geocoder/v2/
33 | 类型:get
34 | '''
35 | respList = self.reverseGeocodingBatchHandler(locationList=locationList, respList=[], errorList=[])
36 | return respList
37 |
38 | def reverseGeocodingBatchHandler(self, locationList, respList, errorList):
39 | '''
40 | 逆向地理编码批量处理,处理timeout
41 | 地址:http://api.map.baidu.com/geocoder/v2/
42 | 类型:get
43 | '''
44 | for i in range(0, len(locationList), 1):
45 | location = locationList[i]
46 | resp = self.reverseGeocoding(location=location)
47 | if resp is not None and resp["status"] == 0:
48 | respList.append(resp)
49 | else:
50 | logging.debug("current token: %s " % self.__ak)
51 | logging.debug( resp)
52 | logging.debug("at point:%s",str(location))
53 | if len(errorList) > 0:
54 | # http请求异常重新处理
55 | logging.debug("http exception ,rehandle size : " + str(len(errorList)))
56 | self.reverseGeocodingBatchHandler(locationList=errorList, respList=respList, errorList=[])
57 | return respList
58 |
59 | def reverseGeocoding(self, location, coordtype='bd09ll', output="json", pois='0'):
60 | '''
61 | 逆向地理编码request
62 | 地址:http://api.map.baidu.com/geocoder/v2/
63 | 类型:get
64 | coordtype,默认bd09ll,坐标的类型,目前支持的坐标类型包括:bd09ll(百度经纬度坐标)、bd09mc(百度米制坐标)、gcj02ll(国测局经纬度坐标)、wgs84ll( GPS经纬度)
65 | '''
66 | resp = self.baiduClient.geocoder.v2.addtrail("/").get(ak=self.__ak, output="json",pois=2, location=location)
67 | return resp
68 |
69 | def placeSearchBatch(self, query, bounds, pageNumber="0"):
70 | '''
71 | Place地名批量查询,处理timeout
72 | 地址:http://api.map.baidu.com/geocoder/v2/
73 | 类型:get
74 | '''
75 | resp = self.placeSearch(query=query, bounds=bounds, pageNumber=pageNumber)
76 | if resp is None:
77 | logging.debug("http exception ,rehandle...")
78 | reHandleResp = self.placeSearch(query=query, bounds=bounds, pageNumber=pageNumber)
79 | while reHandleResp is None:
80 | reHandleResp = self.placeSearch(query=query, bounds=bounds, pageNumber=pageNumber)
81 | return reHandleResp
82 | else:
83 | # TODO 为什么返回None
84 | return resp
85 |
86 | # coord_type(坐标类型),1(wgs84ll),2(gcj02ll),3(bd09ll),4(bd09mc)
87 | def placeSearch(self, query, bounds, output="json", pageSize="20", pageNumber="0", coord_type="1", scope=2):
88 | '''
89 | Place地名
90 | 地址:http://api.map.baidu.com/place/v2/search
91 | 类型:get
92 | '''
93 | data = self.baiduClient.place.v2.search.get(ak=self.__ak, query=query, bounds=bounds, output=output,
94 | coord_type=coord_type, page_size=pageSize, page_num=pageNumber,
95 | scope=scope)
96 | return data
97 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/geo/CoordTransor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import json
3 | import math
4 |
5 | x_pi = 3.14159265358979324 * 3000.0 / 180.0
6 | pi = 3.1415926535897932384626 # π
7 | a = 6378245.0 # 长半轴
8 | ee = 0.00669342162296594323 # 扁率
9 |
10 |
11 |
12 | def gcj02tobd09(lng, lat):
13 | """
14 | 火星坐标系(GCJ-02)转百度坐标系(BD-09)
15 | 谷歌、高德——>百度
16 | :param lng:火星坐标经度
17 | :param lat:火星坐标纬度
18 | :return:
19 | """
20 | z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi)
21 | theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi)
22 | bd_lng = z * math.cos(theta) + 0.0065
23 | bd_lat = z * math.sin(theta) + 0.006
24 | return [bd_lng, bd_lat]
25 |
26 |
27 | def bd09togcj02(bd_lon, bd_lat):
28 | """
29 | 百度坐标系(BD-09)转火星坐标系(GCJ-02)
30 | 百度——>谷歌、高德
31 | :param bd_lat:百度坐标纬度
32 | :param bd_lon:百度坐标经度
33 | :return:转换后的坐标列表形式
34 | """
35 | x = bd_lon - 0.0065
36 | y = bd_lat - 0.006
37 | z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
38 | theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
39 | gg_lng = z * math.cos(theta)
40 | gg_lat = z * math.sin(theta)
41 | return [gg_lng, gg_lat]
42 |
43 |
44 | def wgs84togcj02(lng, lat):
45 | """
46 | WGS84转GCJ02(火星坐标系)
47 | :param lng:WGS84坐标系的经度
48 | :param lat:WGS84坐标系的纬度
49 | :return:
50 | """
51 | if out_of_china(lng, lat): # 判断是否在国内
52 | return lng, lat
53 | dlat = transformlat(lng - 105.0, lat - 35.0)
54 | dlng = transformlng(lng - 105.0, lat - 35.0)
55 | radlat = lat / 180.0 * pi
56 | magic = math.sin(radlat)
57 | magic = 1 - ee * magic * magic
58 | sqrtmagic = math.sqrt(magic)
59 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
60 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
61 | mglat = lat + dlat
62 | mglng = lng + dlng
63 | return [mglng, mglat]
64 |
65 |
66 | def gcj02towgs84(lng, lat):
67 | """
68 | GCJ02(火星坐标系)转GPS84
69 | :param lng:火星坐标系的经度
70 | :param lat:火星坐标系纬度
71 | :return:
72 | """
73 | if out_of_china(lng, lat):
74 | return lng, lat
75 | dlat = transformlat(lng - 105.0, lat - 35.0)
76 | dlng = transformlng(lng - 105.0, lat - 35.0)
77 | radlat = lat / 180.0 * pi
78 | magic = math.sin(radlat)
79 | magic = 1 - ee * magic * magic
80 | sqrtmagic = math.sqrt(magic)
81 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
82 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
83 | mglat = lat + dlat
84 | mglng = lng + dlng
85 | return [lng * 2 - mglng, lat * 2 - mglat]
86 |
87 |
88 | def transformlat(lng, lat):
89 | ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
90 | 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
91 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
92 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0
93 | ret += (20.0 * math.sin(lat * pi) + 40.0 *
94 | math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
95 | ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
96 | math.sin(lat * pi / 30.0)) * 2.0 / 3.0
97 | return ret
98 |
99 |
100 | def transformlng(lng, lat):
101 | ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
102 | 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
103 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
104 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0
105 | ret += (20.0 * math.sin(lng * pi) + 40.0 *
106 | math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
107 | ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
108 | math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
109 | return ret
110 |
111 |
112 | def out_of_china(lng, lat):
113 | """
114 | 判断是否在国内,不在国内不做偏移
115 | :param lng:
116 | :param lat:
117 | :return:
118 | """
119 | if lng < 72.004 or lng > 137.8347:
120 | return True
121 | if lat < 0.8293 or lat > 55.8271:
122 | return True
123 | return False
124 |
125 | if __name__=="__main__":
126 | print bd09togcj02(bd_lon=119.053739,bd_lat=32.054493)
127 | print gcj02towgs84(lng=118.805048,lat=31.975998)
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/search.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | pybuilder.helloworld
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
68 |
69 |
70 |
71 |
72 |
76 |
77 |
78 |
79 |
80 |
81 |
82 | - Docs »
83 |
84 |
85 | -
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
Search Results
96 |
97 |
101 |
102 |
103 | Sorry, page not found.
104 |
105 |
106 |
107 |
108 |
109 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/css/demo.css:
--------------------------------------------------------------------------------
1 | /* General Demo Style */
2 | @import url(http://fonts.googleapis.com/css?family=Lato:300,400,700);
3 |
4 | html { height: 100%; }
5 |
6 | body {
7 | font-family: 'Lato', Calibri, Arial, sans-serif;
8 | background: #ddd url(../images/bg.jpg) repeat top left;
9 | font-weight: 300;
10 | font-size: 15px;
11 | color: #333;
12 | -webkit-font-smoothing: antialiased;
13 | overflow-y: scroll;
14 | overflow-x: hidden;
15 | }
16 |
17 | a {
18 | color: #555;
19 | text-decoration: none;
20 | }
21 |
22 | .container {
23 | width: 100%;
24 | position: relative;
25 | }
26 |
27 | .clr {
28 | clear: both;
29 | padding: 0;
30 | height: 0;
31 | margin: 0;
32 | }
33 |
34 | .main {
35 | width: 90%;
36 | margin: 0 auto;
37 | position: relative;
38 | }
39 |
40 | .container > header {
41 | margin: 10px;
42 | padding: 20px 10px 10px 10px;
43 | position: relative;
44 | display: block;
45 | text-shadow: 1px 1px 1px rgba(0,0,0,0.2);
46 | text-align: center;
47 | }
48 |
49 | .container > header h1 {
50 | font-size: 30px;
51 | line-height: 38px;
52 | margin: 0;
53 | position: relative;
54 | font-weight: 300;
55 | color: #666;
56 | text-shadow: 0 1px 1px rgba(255,255,255,0.6);
57 | }
58 |
59 | .container > header h2 {
60 | font-size: 14px;
61 | font-weight: 300;
62 | margin: 0;
63 | padding: 15px 0 5px 0;
64 | color: #666;
65 | font-family: Cambria, Georgia, serif;
66 | font-style: italic;
67 | text-shadow: 0 1px 1px rgba(255,255,255,0.6);
68 | }
69 |
70 | /* Header Style */
71 | .codrops-top {
72 | line-height: 24px;
73 | font-size: 11px;
74 | background: #fff;
75 | background: rgba(255, 255, 255, 0.8);
76 | text-transform: uppercase;
77 | z-index: 9999;
78 | position: relative;
79 | font-family: Cambria, Georgia, serif;
80 | box-shadow: 1px 0px 2px rgba(0,0,0,0.2);
81 | }
82 |
83 | /* Clearfix hack by Nicolas Gallagher: http://nicolasgallagher.com/micro-clearfix-hack/ */
84 |
85 | .codrops-top:before,
86 | .codrops-top:after {
87 | content: " "; /* 1 */
88 | display: table; /* 2 */
89 | }
90 |
91 | .codrops-top:after {
92 | clear: both
93 | }
94 |
95 | .codrops-top a {
96 | padding: 0px 10px;
97 | letter-spacing: 1px;
98 | color: #333;
99 | display: inline-block;
100 | }
101 |
102 | .codrops-top a:hover {
103 | background: rgba(255,255,255,0.6);
104 | }
105 |
106 | .codrops-top span.right {
107 | float: right;
108 | }
109 |
110 | .codrops-top span.right a {
111 | float: left;
112 | display: block;
113 | }
114 |
115 | /* Demo Buttons Style */
116 | .codrops-demos {
117 | text-align:center;
118 | display: block;
119 | line-height: 30px;
120 | padding: 5px 0px;
121 | }
122 |
123 | .codrops-demos a {
124 | display: inline-block;
125 | margin: 0px 4px;
126 | padding: 0px 6px;
127 | color: #8c8c8c;
128 | line-height: 20px;
129 | font-size: 12px;
130 | font-weight: 700;
131 | text-shadow: 1px 1px 1px #fff;
132 | border: 1px solid #fff;
133 | background: #ffffff; /* Old browsers */
134 | background: -moz-linear-gradient(top, #ffffff 0%, #f6f6f6 47%, #ededed 100%); /* FF3.6+ */
135 | background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#ffffff), color-stop(47%,#f6f6f6), color-stop(100%,#ededed)); /* Chrome,Safari4+ */
136 | background: -webkit-linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* Chrome10+,Safari5.1+ */
137 | background: -o-linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* Opera 11.10+ */
138 | background: -ms-linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* IE10+ */
139 | background: linear-gradient(top, #ffffff 0%,#f6f6f6 47%,#ededed 100%); /* W3C */
140 | box-shadow: 0 1px 1px rgba(0, 0, 0, 0.1);
141 | }
142 |
143 | .codrops-demos a:hover {
144 | color: #333;
145 | background: #fff;
146 | }
147 |
148 | .codrops-demos a:active {
149 | background: #fff;
150 | }
151 |
152 | .codrops-demos a.current-demo,
153 | .codrops-demos a.current-demo:hover {
154 | background: #555;
155 | border-color: #555;
156 | color: #ddd;
157 | text-shadow: 0 1px 1px rgba(0,0,0,0.5);
158 | box-shadow: 0 1px 0 rgba(255,255,255,0.2), inset 0 1px 1px rgba(0,0,0,0.5);
159 | }
160 |
161 | .support-note span{
162 | color: #ac375d;
163 | font-size: 16px;
164 | display: none;
165 | font-weight: bold;
166 | text-align: center;
167 | padding: 5px 0;
168 | }
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/js/application.js:
--------------------------------------------------------------------------------
1 | // Some general UI pack related JS
2 | // Extend JS String with repeat method
3 | String.prototype.repeat = function (num) {
4 | return new Array(Math.round(num) + 1).join(this);
5 | };
6 |
7 | (function ($) {
8 |
9 | // Add segments to a slider
10 | $.fn.addSliderSegments = function () {
11 | return this.each(function () {
12 | var $this = $(this),
13 | option = $this.slider('option'),
14 | amount = (option.max - option.min)/option.step,
15 | orientation = option.orientation;
16 | if ( 'vertical' === orientation ) {
17 | var output = '', i;
18 | console.log(amount);
19 | for (i = 1; i <= amount - 1; i++) {
20 | output += '';
21 | }
22 | $this.prepend(output);
23 | } else {
24 | var segmentGap = 100 / (amount) + '%';
25 | var segment = '';
26 | $this.prepend(segment.repeat(amount - 1));
27 | }
28 | });
29 | };
30 |
31 | $(function () {
32 |
33 | // Todo list
34 | $('.todo').on('click', 'li', function () {
35 | $(this).toggleClass('todo-done');
36 | });
37 |
38 | // Custom Selects
39 | if ($('[data-toggle="select"]').length) {
40 | $('[data-toggle="select"]').select2();
41 | }
42 |
43 | // Checkboxes and Radio buttons
44 | $('[data-toggle="checkbox"]').radiocheck();
45 | $('[data-toggle="radio"]').radiocheck();
46 |
47 | // Tooltips
48 | $('[data-toggle=tooltip]').tooltip('show');
49 |
50 | // jQuery UI Sliders
51 | var $slider = $('#slider');
52 | if ($slider.length > 0) {
53 | $slider.slider({
54 | min: 0,
55 | max: 10,
56 | values: [1,4],
57 | orientation: 'horizontal',
58 | range: true
59 | }).addSliderSegments($slider.slider('option').max, 'vertical');
60 | }
61 |
62 | var $verticalSlider = $('#vertical-slider');
63 | if ($verticalSlider.length) {
64 | $verticalSlider.slider({
65 | min: 0,
66 | max: 10,
67 | values: [4,5],
68 | orientation: 'horizontal',
69 | range: 'min'
70 | }).addSliderSegments($verticalSlider.slider('option').max, 'vertical');
71 | }
72 |
73 |
74 |
75 | // Focus state for append/prepend inputs
76 | $('.input-group').on('focus', '.form-control', function () {
77 | $(this).closest('.input-group, .form-group').addClass('focus');
78 | }).on('blur', '.form-control', function () {
79 | $(this).closest('.input-group, .form-group').removeClass('focus');
80 | });
81 |
82 | // Make pagination demo work
83 | $('.pagination').on('click', 'a', function () {
84 | $(this).parent().siblings('li').removeClass('active').end().addClass('active');
85 | });
86 |
87 | $('.btn-group').on('click', 'a', function () {
88 | $(this).siblings().removeClass('active').end().addClass('active');
89 | });
90 |
91 | // Disable link clicks to prevent page scrolling
92 | $(document).on('click', 'a[href="#fakelink"]', function (e) {
93 | e.preventDefault();
94 | });
95 |
96 | // Switches
97 | if ($('[data-toggle="switch"]').length) {
98 | $('[data-toggle="switch"]').bootstrapSwitch();
99 | }
100 |
101 | // Typeahead
102 | if ($('#typeahead-demo-01').length) {
103 | var states = new Bloodhound({
104 | datumTokenizer: function (d) { return Bloodhound.tokenizers.whitespace(d.word); },
105 | queryTokenizer: Bloodhound.tokenizers.whitespace,
106 | limit: 4,
107 | local: [
108 | { word: 'Alabama' },
109 | { word: 'Alaska' },
110 | { word: 'Arizona' },
111 | { word: 'Arkansas' },
112 | { word: 'California' },
113 | { word: 'Colorado' }
114 | ]
115 | });
116 |
117 | states.initialize();
118 |
119 | $('#typeahead-demo-01').typeahead(null, {
120 | name: 'states',
121 | displayKey: 'word',
122 | source: states.ttAdapter()
123 | });
124 | }
125 |
126 | // make code pretty
127 | window.prettyPrint && prettyPrint();
128 |
129 | });
130 |
131 | })(jQuery);
132 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/about/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | About - pybuilder.helloworld
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
74 |
75 |
76 |
77 |
78 |
82 |
83 |
84 |
85 |
86 |
87 |
88 | - Docs »
89 |
90 |
91 |
92 | - About
93 | -
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 | « Previous
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/hotel/SuperHotelService.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer,LiuYang'
3 |
4 | from selenium import webdriver
5 | from selenium.webdriver.common.keys import Keys
6 | import time
7 | import traceback
8 |
9 |
10 | class HotelService(object):
11 |
12 | def __init__(self):
13 | self.driver = webdriver.Chrome()
14 |
15 | self.if_crawl_hotel_info = True
16 |
17 | self.if_crawl_hotel_comment = True
18 |
19 | self.if_crawl_hotel_price = True
20 |
21 | self._city = None
22 |
23 | '''
24 | 打开页面
25 | '''
26 | def openPage(self,url):
27 | self.driver.get(url)
28 | # 将界面最大化
29 | self.driver.maximize_window()
30 |
31 | '''
32 | 等待加载
33 | '''
34 | def wait(self,timeout):
35 | self.driver.implicitly_wait(timeout)
36 |
37 | '''
38 | 关闭驱动
39 | '''
40 | def closeDriver(self):
41 | self.driver.close()
42 |
43 | '''
44 | 遍历酒店信息列表页,爬取酒店详情页链接
45 | 抓取成功返回True 失败返回False
46 | '''
47 | def crawlListPage(self):
48 | pass
49 |
50 | '''
51 | 保存爬取的酒店列表页数据
52 | '''
53 | def saveListPageInfo(self):
54 | pass
55 |
56 | def set_city(self, city):
57 | self._city = city
58 |
59 | '''
60 | 抓取酒店信息
61 | '''
62 | def crawlHotelInfo(self,target):
63 | pass
64 |
65 | '''
66 | 保存抓取的酒店信息
67 | '''
68 | def saveHotelInfo(self):
69 | pass
70 |
71 | '''
72 | 获取酒店列表页数据
73 | '''
74 | def getListPageInfo(self):
75 | pass
76 |
77 | def scroll_and_click_by_partial_link_text(self, text, from_bottom=False):
78 | if from_bottom:
79 | # 跳到页尾
80 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
81 | key = Keys.ARROW_UP
82 | else:
83 | # 跳到页头
84 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
85 | key = Keys.ARROW_DOWN
86 | x = 0
87 | while 1:
88 | x += 1
89 | if x%500 == 0:
90 | self.driver.refresh()
91 | time.sleep(2)
92 | if from_bottom:
93 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
94 | else:
95 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
96 | if x == 1501:
97 | print u"点击评论类型出错" + self.driver.current_url
98 | break
99 | self.driver.find_element_by_tag_name("body").send_keys(key)
100 | try:
101 | self.driver.find_element_by_partial_link_text(text).click()
102 | break
103 | except:
104 | continue
105 |
106 | def scroll_and_click_by_xpath(self, text, from_bottom=False, refresh_if_failed=True, sleep_time=0):
107 | if from_bottom:
108 | # 跳到页尾
109 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
110 | key = Keys.ARROW_UP
111 | else:
112 | # 跳到页头
113 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
114 | key = Keys.ARROW_DOWN
115 | time.sleep(sleep_time)
116 | x = 0
117 | while 1:
118 | x += 1
119 | if x%500 == 0:
120 | # 刷新整个页面
121 | if refresh_if_failed:
122 | self.driver.refresh()
123 | time.sleep(sleep_time)
124 | if from_bottom:
125 | self.driver.find_element_by_tag_name("body").send_keys(Keys.END)
126 |
127 | else:
128 | self.driver.find_element_by_tag_name("body").send_keys(Keys.HOME)
129 | time.sleep(sleep_time)
130 | if x == 1501:
131 | print u"点击评论类型出错" + self.driver.current_url
132 | break
133 | self.driver.find_element_by_tag_name("body").send_keys(key)
134 | try:
135 | self.driver.find_element_by_xpath(text).click()
136 | break
137 | except:
138 | # print text
139 | continue
140 |
141 | '''
142 | 设置爬取内容
143 | '''
144 | def set_crawl_content(self,if_crawl_hotel_info, if_crawl_hotel_comment, if_crawl_hotel_price):
145 | self.if_crawl_hotel_info = if_crawl_hotel_info
146 |
147 | self.if_crawl_hotel_comment = if_crawl_hotel_comment
148 |
149 | self.if_crawl_hotel_price = if_crawl_hotel_price
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/js/review-monitor/main.js:
--------------------------------------------------------------------------------
1 | (function () {
2 |
3 | var user = JSON.parse(sessionStorage.user);
4 |
5 | var location = JSON.parse(sessionStorage.location);
6 |
7 | //初始化页面
8 | (function () {
9 | // navbar菜单账号名
10 | if (user["user_name"] != null)
11 | document.getElementById("user_name").innerHTML = location["hotel_name"] + document.getElementById("user_name").innerHTML;
12 | })();
13 |
14 | window.reviewMonitor = function(ota) {
15 | // initial 页脚和页面评论
16 | var commentsData = requestComments(location['hotel_name'], 1, ota);
17 | // 如果成功返回数据
18 | if (commentsData != null) {
19 | document.getElementById("page_list").innerHTML = generateFooterHtml(location['hotel_name'], 1, commentsData["pageNum"], ota);
20 | document.getElementById("comment_list").innerHTML = generateCommentsHtml(commentsData["comments_info"]);
21 | }
22 | };
23 |
24 | /**
25 | * 请求酒店的评论
26 | * @param hotelName String 酒店名
27 | * @param text String 待查询文本
28 | * @param page Int 页下标
29 | * @param ota String OTA名称
30 | */
31 | function requestComments(hotelName, page, ota) {
32 | var commentsData = null;
33 | var paramStr = "?hotel_name=" + hotelName + "&page=" + page + "&ota=" + ota;
34 | $.ajax({
35 | type: "get",
36 | async: false,
37 | url: domain + getComments + paramStr,
38 | dataType: "json",
39 | timeout: 5000,
40 | success: function (result) {
41 | commentsData = result;
42 | },
43 | error: function (errorMsg) {
44 | console.log(errorMsg);
45 | alert("你输入的值有误,请输入完整参数或者重试");
46 | }
47 | });
48 | return commentsData;
49 | }
50 |
51 | /**
52 | * 根据页数生成切页列表
53 | * @param hoteName String 酒店名
54 | * @param text Stirng 查询文本
55 | * @param origin int 起始页数
56 | * @param pageNum int 总页数
57 | */
58 | function generateFooterHtml(hotelName, origin, pageNum, ota) {
59 | var pagination = "";
60 | for (var i = origin; i <= pageNum; i++) {
61 | if (i < origin + 10) {
62 | // 对前10页做处理
63 | if (i == origin) {
64 | pagination += '';
65 | }
66 | pagination += "" + i + '';
67 | if (i == pageNum) {
68 | pagination += '';
69 | }
70 | } else {
71 | for (var j = origin + 10; j <= pageNum; j += 10) {
72 | if (j == origin + 10) {
73 | // 开始li标签,放置一个三角形按钮
74 | pagination += '';
80 | } else {
81 | pagination += '' + j + '–' + (j + 10 - 1) + '';
82 | }
83 | }
84 | break;
85 | }
86 | }
87 | return pagination;
88 | }
89 |
90 | /**
91 | * 点击切页时执行
92 | * @param hotelName Stirng 酒店名
93 | */
94 | window.pageIndexClick = function (hotelName, ota, e) {
95 | console.log(e);
96 | var commentsData = requestComments(hotelName, e.text, ota);
97 | // 如果成功返回数据
98 | if (commentsData != null) {
99 | document.getElementById("comment_list").innerHTML = generateCommentsHtml(commentsData["comments_info"]);
100 | }
101 | $('html, body').animate({
102 | scrollTop: $("html").offset().top
103 | }, 500);
104 | }
105 |
106 | /**
107 | * 生成评论列
108 | * @param comments list 评论内容
109 | */
110 | function generateCommentsHtml(comments) {
111 | var commentsHtml = "";
112 | for (var i = 0; i < comments.length; i++) {
113 | commentsHtml += '' + comments[i][2] + '';
114 | }
115 | return commentsHtml;
116 | }
117 |
118 | })();
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/html/setting.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 基于社会感知的酒店竞业市场时空可视化分析
10 |
11 |
12 |
13 |
14 |
15 |
16 |
21 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |

56 |
携程网
57 |
100% convertable to HTML/CSS layout.
58 |
查看评论
59 |
60 |
61 |
62 |
63 |

64 |
艺龙网
65 |
Vector-based shapes and minimum of layer styles.
66 |
查看评论
67 |
68 |
69 |
70 |
71 |
72 |

73 |
途牛网
74 |
Easy to add or change elements.
75 |
查看评论
76 |
77 |
78 |
79 |
80 |
81 |

82 |
去哪儿网
83 |
Your likes, shares and comments helps us.
84 |
查看评论
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/html/public-opinion-monitor.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 基于社会感知的酒店竞业市场时空可视化分析
10 |
11 |
12 |
13 |
14 |
15 |
16 |
21 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |

56 |
携程网
57 |
100% convertable to HTML/CSS layout.
58 |
查看评论
59 |
60 |
61 |
62 |
63 |

64 |
艺龙网
65 |
Vector-based shapes and minimum of layer styles.
66 |
查看评论
67 |
68 |
69 |
70 |
71 |
72 |

73 |
途牛网
74 |
Easy to add or change elements.
75 |
查看评论
76 |
77 |
78 |
79 |
80 |
81 |

82 |
去哪儿网
83 |
Your likes, shares and comments helps us.
84 |
查看评论
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/service/map/baidu/CoordinateTransferService.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import json
3 | import requests
4 | import math
5 |
6 | key = 'your key here' # 这里填写你的百度开放平台的key
7 | x_pi = 3.14159265358979324 * 3000.0 / 180.0
8 | pi = 3.1415926535897932384626 # π
9 | a = 6378245.0 # 长半轴
10 | ee = 0.00669342162296594323 # 扁率
11 |
12 |
13 | def geocode(address):
14 | """
15 | 利用百度geocoding服务解析地址获取位置坐标
16 | :param address:需要解析的地址
17 | :return:
18 | """
19 | geocoding = {'s': 'rsv3',
20 | 'key': key,
21 | 'city': '全国',
22 | 'address': address}
23 | res = requests.get(
24 | "http://restapi.amap.com/v3/geocode/geo", params=geocoding)
25 | if res.status_code == 200:
26 | json = res.json()
27 | status = json.get('status')
28 | count = json.get('count')
29 | if status == '1' and int(count) >= 1:
30 | geocodes = json.get('geocodes')[0]
31 | lng = float(geocodes.get('location').split(',')[0])
32 | lat = float(geocodes.get('location').split(',')[1])
33 | return [lng, lat]
34 | else:
35 | return None
36 | else:
37 | return None
38 |
39 |
40 | def gcj02tobd09(lng, lat):
41 | """
42 | 火星坐标系(GCJ-02)转百度坐标系(BD-09)
43 | 谷歌、高德——>百度
44 | :param lng:火星坐标经度
45 | :param lat:火星坐标纬度
46 | :return:
47 | """
48 | z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi)
49 | theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi)
50 | bd_lng = z * math.cos(theta) + 0.0065
51 | bd_lat = z * math.sin(theta) + 0.006
52 | return [bd_lng, bd_lat]
53 |
54 |
55 | def bd09togcj02(bd_lon, bd_lat):
56 | """
57 | 百度坐标系(BD-09)转火星坐标系(GCJ-02)
58 | 百度——>谷歌、高德
59 | :param bd_lat:百度坐标纬度
60 | :param bd_lon:百度坐标经度
61 | :return:转换后的坐标列表形式
62 | """
63 | x = bd_lon - 0.0065
64 | y = bd_lat - 0.006
65 | z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
66 | theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
67 | gg_lng = z * math.cos(theta)
68 | gg_lat = z * math.sin(theta)
69 | return [gg_lng, gg_lat]
70 |
71 |
72 | def wgs84togcj02(lng, lat):
73 | """
74 | WGS84转GCJ02(火星坐标系)
75 | :param lng:WGS84坐标系的经度
76 | :param lat:WGS84坐标系的纬度
77 | :return:
78 | """
79 | if out_of_china(lng, lat): # 判断是否在国内
80 | return lng, lat
81 | dlat = transformlat(lng - 105.0, lat - 35.0)
82 | dlng = transformlng(lng - 105.0, lat - 35.0)
83 | radlat = lat / 180.0 * pi
84 | magic = math.sin(radlat)
85 | magic = 1 - ee * magic * magic
86 | sqrtmagic = math.sqrt(magic)
87 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
88 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
89 | mglat = lat + dlat
90 | mglng = lng + dlng
91 | return [mglng, mglat]
92 |
93 |
94 | def gcj02towgs84(lng, lat):
95 | """
96 | GCJ02(火星坐标系)转GPS84
97 | :param lng:火星坐标系的经度
98 | :param lat:火星坐标系纬度
99 | :return:
100 | """
101 | if out_of_china(lng, lat):
102 | return lng, lat
103 | dlat = transformlat(lng - 105.0, lat - 35.0)
104 | dlng = transformlng(lng - 105.0, lat - 35.0)
105 | radlat = lat / 180.0 * pi
106 | magic = math.sin(radlat)
107 | magic = 1 - ee * magic * magic
108 | sqrtmagic = math.sqrt(magic)
109 | dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
110 | dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
111 | mglat = lat + dlat
112 | mglng = lng + dlng
113 | return [lng * 2 - mglng, lat * 2 - mglat]
114 |
115 |
116 | def transformlat(lng, lat):
117 | ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
118 | 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
119 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
120 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0
121 | ret += (20.0 * math.sin(lat * pi) + 40.0 *
122 | math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
123 | ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
124 | math.sin(lat * pi / 30.0)) * 2.0 / 3.0
125 | return ret
126 |
127 |
128 | def transformlng(lng, lat):
129 | ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
130 | 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
131 | ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
132 | math.sin(2.0 * lng * pi)) * 2.0 / 3.0
133 | ret += (20.0 * math.sin(lng * pi) + 40.0 *
134 | math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
135 | ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
136 | math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
137 | return ret
138 |
139 |
140 | def out_of_china(lng, lat):
141 | """
142 | 判断是否在国内,不在国内不做偏移
143 | :param lng:
144 | :param lat:
145 | :return:
146 | """
147 | if lng < 72.004 or lng > 137.8347:
148 | return True
149 | if lat < 0.8293 or lat > 55.8271:
150 | return True
151 | return False
152 |
153 |
154 | if __name__ == '__main__':
155 | lng = 128.543
156 | lat = 37.065
157 | result1 = gcj02tobd09(lng, lat)
158 | result2 = bd09togcj02(lng, lat)
159 | result3 = wgs84togcj02(lng, lat)
160 | result4 = gcj02towgs84(lng, lat)
161 | result5 = geocode('北京市朝阳区朝阳公园')
162 | print result1, result2, result3, result4, result5
163 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/TuniuCatcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'DreamCathcer'
3 |
4 |
5 | import time
6 | import uuid
7 |
8 | from service.hotel.TuniuAPIClient import TuniuAPIClient
9 | from dao.hotel.HotelDAO import HotelDAO
10 | from setting import local_hotel_setting
11 | from util.geo import CoordTransor
12 |
13 | # 配置数据库
14 | dao_setting = local_hotel_setting
15 |
16 |
17 | class TuniuCatcher(object):
18 |
19 | def __init__(self):
20 | self._city = None
21 | self.__ota_info = "途牛"
22 | self.tuniu_api_client = TuniuAPIClient()
23 | self.hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
24 |
25 | def setCity(self, city):
26 | self._city = city
27 |
28 | def getHotelList(self, city_code):
29 | if self._city == None:
30 | print "未设置城市,请先使用setCity方法"
31 | return
32 | hotel_list = []
33 | page_index = 1
34 | page_amount =10000
35 | while page_index <= page_amount - 1:
36 | try:
37 | page_data = self.tuniu_api_client.get_hotel_list(page_index, city_code)
38 | # 接口返回的酒店数不稳定,所以爬取页数以最小数为准
39 | if page_amount > page_data["data"]["total"] / 20:
40 | page_amount = page_data["data"]["total"] / 20
41 | print "page_amount=%d"%page_amount
42 | hotel_list.extend(page_data["data"]["list"])
43 | print "Page_%d Success"%page_index
44 | time.sleep(5)
45 | page_index += 1
46 | except:
47 | print "Page_%d Fail"%page_index
48 | continue
49 | return hotel_list
50 |
51 | def saveHolteList(self, hotel_list):
52 | old_location_info = self.hotel_dao.get_locations(self._city)
53 | old_baseinfo = list(self.hotel_dao.get_baseinfo(self._city, self.__ota_info))
54 | # 将基础数据中的if_overtime先假设为都已过时
55 | for i in range(0, len(old_baseinfo)):
56 | old_baseinfo[i] = list(old_baseinfo[i])
57 | old_baseinfo[i][5] = 1
58 | new_locations = []
59 | new_baseinfo = []
60 | update_baseinfo = []
61 | # 遍历将要保存的数据
62 | for item in hotel_list:
63 | location_id = None
64 | # 首先检查该酒店是否已经保存在location表中
65 | for location in old_location_info:
66 | if item["name"] == location[3]:
67 | location_id = location[0]
68 | break
69 | # 如果没有则插入一条新的记录到location表中
70 | if location_id is None:
71 | location_id = uuid.uuid1()
72 | trans_location = CoordTransor.gcj02towgs84(lng=float(item["pos"]["lng"]), lat=float(item["pos"]["lat"]))
73 | new_locations.append({
74 | "guid": location_id,
75 | "x": trans_location[1],
76 | "y": trans_location[0],
77 | "hotel_name": item["name"],
78 | "city": self._city,
79 | "address": item["address"]
80 | })
81 | # 根据location的id号到baseinfo表中查询
82 | # 如果已经存于表中,则更新该条数据
83 | # 如果没有,则插入一条新的数据
84 | if_exist = False
85 | for baseinfo in old_baseinfo:
86 | if location_id == baseinfo[2]:
87 | if_exist = True
88 | baseinfo[1] = item["url"]
89 | baseinfo[4] = item["remarkCount"]
90 | baseinfo[5] = 0
91 | baseinfo[6] = int(item["remarkCount"]) - int(baseinfo[4]) if int(item["remarkCount"]) - int(baseinfo[4]) > 0 else 0
92 | baseinfo[7] = item["snapshot"]
93 | baseinfo[8] = item["id"]
94 | break
95 | if not if_exist:
96 | new_baseinfo.append({
97 | "guid": uuid.uuid1(),
98 | "url": item["url"],
99 | "location_id": location_id,
100 | "OTA": self.__ota_info,
101 | "comm_num": item["remarkCount"],
102 | "if_overtime": 0,
103 | "incre_num": item["remarkCount"],
104 | "img": item["snapshot"],
105 | "id_in_ota": item["id"]
106 | })
107 | for baseinfo in old_baseinfo:
108 | update_baseinfo.append({
109 | "guid": baseinfo[0],
110 | "url": baseinfo[1],
111 | "location_id": baseinfo[2],
112 | "OTA": baseinfo[3],
113 | "comm_num": baseinfo[4],
114 | "if_overtime": baseinfo[5],
115 | "incre_num": baseinfo[6],
116 | "img": baseinfo[7],
117 | "id_in_ota": baseinfo[8]
118 | })
119 | print len(new_locations), len(new_baseinfo), len(update_baseinfo)
120 | self.hotel_dao.save_locations(new_locations)
121 | self.hotel_dao.save_baseinfo(new_baseinfo)
122 | self.hotel_dao.update_baseinfo(update_baseinfo)
123 |
124 |
125 |
126 | if __name__ == "__main__":
127 | starttime = time.time()
128 | tuniu_catcher = TuniuCatcher()
129 | tuniu_catcher.setCity("南京")
130 | hotel_list = tuniu_catcher.getHotelList(1602)
131 | tuniu_catcher.saveHolteList(hotel_list)
132 | endtime = time.time()
133 | print endtime-starttime
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/GeocodingService.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import math
4 | from time import time, strftime, localtime
5 | import os
6 | import shutil
7 |
8 | from util.geo.GeoUtil import GeoUtil
9 | from util.common.CollectionUtil import CollectionUtil
10 | from util.io.FileUtil import FileUtil
11 |
12 | import gevent
13 | import gevent.monkey
14 |
15 | gevent.monkey.patch_socket()
16 |
17 | import logging
18 | import logging.config
19 | from setting import baidu_map_uadb_setting
20 |
21 | logging.config.fileConfig(FileUtil().getLogConfigPath())
22 | logger = logging.getLogger("ugc")
23 |
24 | from service.map.baidu.SnatcherService import BaiduMapSnatcherService
25 |
26 | # 数据库配置
27 | dao_setting = baidu_map_uadb_setting
28 |
29 | def frange2(x, y, step):
30 | while x < y:
31 | yield x
32 | x += step
33 | if x >= y:
34 | x = y
35 | yield x
36 |
37 |
38 | # 每个Token正向编码100万,企业号300万
39 | goodAkList = [你的ak ]
40 |
41 | path = "c:/data/point_cache/"
42 | path_bak = "c:/data/point_cache_bak/" + strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
43 | if os.path.exists(path_bak) == False:
44 | os.makedirs(path_bak)
45 |
46 |
47 | # 调用百度GeocodingAPI爬取数据
48 | class GeocodingService(object):
49 |
50 | # 地址节表名
51 | addressNodeTableName = 'AddressNode_Xuzhou'
52 | # poi点表名
53 | placeTableName = 'Place_Xuzhou'
54 |
55 | def fetchAddressNodeByPoints(self, index, points):
56 | # 循环Token,
57 | if index >= len(goodAkList):
58 | token = goodAkList[-1]
59 | else:
60 | token = goodAkList[index]
61 |
62 | logger.info('current index %s,points %s' % (index, str(len(points))))
63 | snatcherService = BaiduMapSnatcherService(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"], token)
64 | snatcherService.fetchAddressNode(index, points, self.addressNodeTableName,placeTableName=self.placeTableName)
65 | logger.debug('Process %s done' % index)
66 | # TODO multiprocessing多进程实现,此处代码最后一个process不执行fetchAddressNode内的代码
67 |
68 | def run(self):
69 | pointList = []
70 | limitSize = 950000
71 | # TODO 从文件缓存读取limitSize个points,待测试
72 | fileNames = os.listdir(path)
73 | # 从point_cache读取大约limitSize*len(goodAkList)个点,并将读取的文件移至point_cache_bak文件夹
74 | for fileName in fileNames:
75 | print len(pointList)
76 | if len(pointList) < (limitSize) * len(goodAkList):
77 | file = path + fileName
78 | myList = FileUtil().readFileToObj(file)
79 | logger.debug("read file %s,size %s" % (file, len(myList)))
80 | pointList.extend(myList)
81 |
82 | print "cut file %s to %s" % (file, path_bak)
83 | shutil.move(file, path_bak + "")
84 | else:
85 | break
86 | # 将所有点按goodAkList的数目分桶装载
87 | chunkPoints = CollectionUtil().chunksByAverage(pointList,len(goodAkList))
88 | threads = []
89 | threadSize = len(chunkPoints)
90 | logger.debug('thread size ...%s ' % threadSize)
91 | # 6个线程分发桶中的点数据
92 | for i in xrange(0, threadSize, 1):
93 | threadChunkPointsList = CollectionUtil().chunksBySize(chunkPoints[i], limitSize / 6)
94 | for j in xrange(0, len(threadChunkPointsList), 1):
95 | index = str(i) + "_" + str(j)
96 | logger.debug('current thread ...%s ' % index)
97 | threads.append(gevent.spawn(self.fetchAddressNodeByPoints, i, threadChunkPointsList[j]))
98 | gevent.joinall(threads)
99 | # 将地址节表中的空字符串设置为null
100 | snatcherService = BaiduMapSnatcherService(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
101 | snatcherService.setNullStrToNull(self.addressNodeTableName)
102 |
103 | def concurrentRequest(self):
104 | # 测试
105 | # bounds = [113.149662, 23.038528, 113.15175, 23.039123]
106 | # 桂城街道
107 | bounds = [113.129391, 22.98257, 113.261335, 23.072904]
108 | # 狮山镇
109 | # bounds = [113.092391, 23.132011, 113.123293, 23.167699]
110 | # 南京
111 | # bounds = [118.710042, 31.960759, 118.905082, 32.134843]
112 | # 获取区域内点集
113 | points = GeoUtil().getPointByBounds(bounds, 1000)
114 | start = int(math.ceil(len(points) / 2))
115 | # 点集拆分爬取
116 | # end = len(points)
117 | # points = points[start:end]
118 | points = points[0:start]
119 |
120 | logger.debug('points size %s' % len(points))
121 | # 点集合子集
122 | subPoints = CollectionUtil().chunksByAverage(points, len(goodAkList))
123 |
124 | threads = []
125 | processSize = len(subPoints)
126 | logger.debug('process size ...%s ,per process data size...%s' % (processSize, len(subPoints)))
127 | for index in range(0, processSize, 1):
128 | logger.debug('current process ...%s ' % index)
129 | threads.append(gevent.spawn(self.fetchAddressNodeByPoints, index, subPoints[index]))
130 | gevent.joinall(threads)
131 |
132 |
133 | if __name__ == '__main__':
134 | # python E:\PythonWorkspace\ugc\ugc.aggregator\src\main\scripts\GeocodingService.py
135 | ts = time()
136 | service = GeocodingService()
137 | # service.concurrentRequest()
138 | service.run()
139 |
140 | logger.debug('Took %s' % format(time() - ts))
141 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/scripts/Hotel/XiechengCatcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'LiuYang'
3 |
4 |
5 | import time
6 | import uuid
7 |
8 | from service.hotel.XieChengAPIClient import XieChengAPIClient
9 | from dao.hotel.HotelDAO import HotelDAO
10 | from setting import local_hotel_setting
11 | from util.geo import CoordTransor
12 |
13 | # 配置数据库
14 | dao_setting = local_hotel_setting
15 |
16 |
17 | class XiechengCatcher(object):
18 |
19 | def __init__(self):
20 | self._city = None
21 | self.__ota_info = "携程"
22 | self.xiecheng_api_client = XieChengAPIClient()
23 | self.hotel_dao = HotelDAO(dao_setting["host"], dao_setting["db"], dao_setting["user"], dao_setting["password"])
24 |
25 | def setCity(self, city):
26 | self._city = city
27 |
28 | def getHotelList(self, cityId):
29 | if self._city == None:
30 | print "未设置城市,请先使用setCity方法"
31 | hotel_list = []
32 | page_index = 1
33 | page_amount =10000
34 | while page_index <= page_amount - 1:
35 | try:
36 | page_data = self.xiecheng_api_client.get_hotel_list(page_index, cityId, self._city)
37 | # 接口返回的酒店数不稳定,所以爬取页数以最小数为准
38 | if page_amount > page_data["hotelAmount"] / 25:
39 | page_amount = page_data["hotelAmount"] / 25
40 | print "page_amount=%d"%page_amount
41 | hotel_list.extend(page_data["hotelPositionJSON"])
42 | print "Page_%d Success"%page_index
43 | page_index += 1
44 | except:
45 | print "Page_%d Fail"%page_index
46 | continue
47 | return hotel_list
48 |
49 | def saveHolteList(self, hotel_list):
50 | old_location_info = self.hotel_dao.get_locations(self._city)
51 | old_baseinfo = list(self.hotel_dao.get_baseinfo(self._city, self.__ota_info))
52 | # 将基础数据中的if_overtime先假设为都已过时
53 | for i in range(0, len(old_baseinfo)):
54 | old_baseinfo[i] = list(old_baseinfo[i])
55 | old_baseinfo[i][5] = 1
56 | new_locations = []
57 | new_baseinfo = []
58 | update_baseinfo = []
59 | # 遍历将要保存的数据
60 | for item in hotel_list:
61 | location_id = None
62 | # 首先检查该酒店是否已经保存在location表中
63 | for location in old_location_info:
64 | if item["name"] == location[3]:
65 | location_id = location[0]
66 | break
67 | # 如果没有则插入一条新的记录到location表中
68 | if location_id is None:
69 | location_id = uuid.uuid1()
70 | trans_location = CoordTransor.bd09togcj02(bd_lon=float(item["lon"]), bd_lat=float(item["lat"]))
71 | trans_location = CoordTransor.gcj02towgs84(trans_location[1], trans_location[0])
72 | new_locations.append({
73 | "guid": location_id,
74 | "x": trans_location[1],
75 | "y": trans_location[0],
76 | "hotel_name": item["name"],
77 | "city": self._city,
78 | "address": item["address"]
79 | })
80 | # 根据location的id号到baseinfo表中查询
81 | # 如果已经存于表中,则更新该条数据
82 | # 如果没有,则插入一条新的数据
83 | if_exist = False
84 | for baseinfo in old_baseinfo:
85 | if location_id == baseinfo[2]:
86 | if_exist = True
87 | baseinfo[1] = item["url"]
88 | baseinfo[4] = item["dpcount"]
89 | baseinfo[5] = 0
90 | baseinfo[6] = int(item["dpcount"]) - int(baseinfo[4]) if int(item["dpcount"]) - int(baseinfo[4]) > 0 else 0
91 | baseinfo[7] = item["img"]
92 | baseinfo[8] = item["id"]
93 | break
94 | if not if_exist:
95 | new_baseinfo.append({
96 | "guid": uuid.uuid1(),
97 | "url": item["url"],
98 | "location_id": location_id,
99 | "OTA": self.__ota_info,
100 | "comm_num": item["dpcount"],
101 | "if_overtime": 0,
102 | "incre_num": item["dpcount"],
103 | "img": item["img"],
104 | "id_in_ota": item["id"]
105 | })
106 | for baseinfo in old_baseinfo:
107 | update_baseinfo.append({
108 | "guid": baseinfo[0],
109 | "url": baseinfo[1],
110 | "location_id": baseinfo[2],
111 | "OTA": baseinfo[3],
112 | "comm_num": baseinfo[4],
113 | "if_overtime": baseinfo[5],
114 | "incre_num": baseinfo[6],
115 | "img": baseinfo[7],
116 | "id_in_ota": baseinfo[8]
117 | })
118 | print len(new_locations), len(new_baseinfo), len(update_baseinfo)
119 | self.hotel_dao.save_locations(new_locations)
120 | self.hotel_dao.save_baseinfo(new_baseinfo)
121 | self.hotel_dao.update_baseinfo(update_baseinfo)
122 |
123 |
124 |
125 | if __name__ == "__main__":
126 | starttime = time.time()
127 | xiecheng_catcher = XiechengCatcher()
128 | xiecheng_catcher.setCity("南京")
129 | hotel_list = xiecheng_catcher.getHotelList(12)
130 | xiecheng_catcher.saveHolteList(hotel_list)
131 | endtime = time.time()
132 | print endtime-starttime
--------------------------------------------------------------------------------
/ugc.aggregator.esri/docs/ugc.aggregator/site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | pybuilder.helloworld
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
81 |
82 |
83 |
84 |
85 |
89 |
90 |
91 |
92 |
93 |
94 |
95 | - Docs »
96 |
97 |
98 |
99 | - Home
100 | -
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
Welcome to MkDocs
110 |
For full documentation visit mkdocs.org.
111 |
Commands
112 |
113 | mkdocs new [dir-name] - Create a new project.
114 | mkdocs serve - Start the live-reloading docs server.
115 | mkdocs build - Build the documentation site.
116 | mkdocs help - Print this help message.
117 |
118 |
Project layout
119 |
mkdocs.yml # The configuration file.
120 | docs/
121 | index.md # The documentation homepage.
122 | ... # Other markdown pages, images and other files.
123 |
124 |
125 |
126 |
127 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 | Next »
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
171 |
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/dao/hotel/xiechengdao/xiecheng.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'LiuYang'
3 | import MySQLdb
4 | import uuid
5 | import random
6 | import json
7 |
8 | from dao.SuperDAO import SuperDAO
9 |
10 |
11 | class xiechengDAO(SuperDAO):
12 |
13 | def __init__(self, host, db, user, password):
14 | SuperDAO.__init__(self, host, db, user, password)
15 |
16 | # 存储酒店基本信息
17 | def savehotelComment(self,items):
18 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
19 | cursor = db.cursor()
20 | for item in items:
21 | try:
22 | cursor.execute("replace into hotelinfo(guid,city,title,price,score,recommend,area,havawifi,discussNum,common_facilities,activity_facilities,service_facilities,room_facilities)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" ,(item["guid"],item["city"],item["title"],item["price"],item["score"],item["recommend"],item["area"],item["havawifi"],item["discussNum"],item["common_facilities"],item["activity_facilities"],item["service_facilities"],item["room_facilities"]))
23 | except Exception, e:
24 | print e
25 | db.commit()
26 | cursor.close()
27 | db.close()
28 |
29 |
30 | # 存储所有酒店的链接
31 | def savehotellink(self,listPageInfo):
32 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
33 | cursor = db.cursor()
34 | for hotel in listPageInfo:
35 | try:
36 | id = uuid.uuid1()
37 | cursor.execute("replace into hotellianjie(guid,lianjie,city,comm_num)values(%s,%s,%s,%s)" ,(id,hotel["url"],hotel["city"],hotel["comm_num"]))
38 | except Exception,e:
39 | print hotel["url"]
40 | db.commit()
41 | cursor.close()
42 | db.close()
43 |
44 | # 取出周围设施数据
45 | def get_around_facilities_data(self):
46 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
47 | cursor = db.cursor()
48 | cursor.execute("SELECT facilities_lntandlang FROM around_facilities_distance")
49 | data = []
50 | rows = cursor.fetchall()
51 | db.commit()
52 | cursor.close()
53 | db.close()
54 | for i in rows:
55 | data.append(json.loads(i[0]))
56 | return data
57 |
58 |
59 |
60 | # 取出周围设施中最远距离的设施和距离
61 | def get_max_distance_data(self):
62 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
63 | cursor = db.cursor()
64 | cursor.execute("SELECT maxdistance FROM around_facilities_distance")
65 | data = []
66 | rows = cursor.fetchall()
67 |
68 | for i in rows:
69 | data.append(json.loads(i[0]))
70 | return data
71 | db.commit()
72 | cursor.close()
73 | db.close()
74 |
75 | # 从数据库中读取链接数据
76 | def _return(self):
77 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
78 | cursor = db.cursor()
79 |
80 | cursor.execute("SELECT * FROM hotellianjie")
81 |
82 | rows = cursor.fetchall()
83 | return rows
84 |
85 | db.commit()
86 | cursor.close()
87 | db.close()
88 |
89 |
90 | # 从数据库中读取评论数据
91 | def _returncommentinfo(self):
92 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
93 | cursor = db.cursor()
94 |
95 | cursor.execute("SELECT * FROM hotelcommentinfo")
96 |
97 | rows = cursor.fetchall()
98 | db.commit()
99 | cursor.close()
100 | db.close()
101 | return rows
102 |
103 | # 存储酒店评论信息
104 | def savehotelCommentinfo(self,items):
105 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
106 | cursor = db.cursor()
107 | for item in items:
108 |
109 | try:
110 | cursor.execute("insert into hotelcommentinfo(hotelname,username,commentscore,intime,tourstyle,praisenum,commenttime,comment)values(%s,%s,%s,%s,%s,%s,%s,%s)" ,(item["title"],item["username"],item["commentscore"],item["intime"],item["tourstyle"],item["praisenum"],item["commenttime"],item["comment"]))
111 | except :
112 | print item
113 | db.commit()
114 | cursor.close()
115 | db.close()
116 |
117 | # 存储酒店评论信息(含好感度)
118 | def savehotelCommentinfosenti(self,items):
119 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
120 | cursor = db.cursor()
121 | for item in items:
122 |
123 | try:
124 | cursor.execute("insert into xiechengcomment(hotelname,username,commentscore,intime,tourstyle,praisenum,comment,senti_value,viewpoint)values(%s,%s,%s,%s,%s,%s,%s,%s,%s)" ,(item["hotelname"],item["username"],item["commentscore"],item["intime"],item["tourstyle"],item["praisenum"],item["comment"],item["senti_value"],item["viewpoint"]))
125 | except Exception,e:
126 | print e
127 | db.commit()
128 | cursor.close()
129 | db.close()
130 |
131 |
132 | # 从数据库中读取评论数据
133 | def _returncomment(self):
134 | db = MySQLdb.connect(self.host,self.user,self.password,self.db,charset='utf8')
135 | cursor = db.cursor()
136 | cursor.execute("SELECT * FROM hotelcommentinfo")
137 | rows = cursor.fetchall()
138 | db.commit()
139 | cursor.close()
140 | db.close()
141 | return rows
142 |
143 | def get_comments(self):
144 | return self.get_records("xiechengcomment")
--------------------------------------------------------------------------------
/ugc.aggregator.esri/src/main/python/util/http/UniversalSDK.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | __author__ = 'DreamCatcher'
4 | __version__ = '1.0.0'
5 |
6 | import json,urllib2,urllib,gzip,collections
7 | from util.common.Decorators import retry
8 | import logging
9 | logger = logging.getLogger('ugc')
10 |
11 | try:
12 | from cStringIO import StringIO
13 | except ImportError:
14 | from StringIO import StringIO
15 |
16 |
17 | class APIClient(object):
18 | domain = ""
19 |
20 | def __init__(self,domain):
21 | self.domain = domain
22 |
23 | def __getattr__(self,attr):
24 | return _Callable('%s/%s'%(self.domain,attr))
25 |
26 | class _Callable(object):
27 | def __init__(self,client):
28 | self.client = client
29 | self.header = None
30 |
31 | def __getattr__(self,attr):
32 | @retry((urllib2.URLError,ValueError), tries=10, delay=1, backoff=2)
33 | def execute(**kw):
34 | params = '%s'%(_encode_params(**kw))
35 | if len(params)!=0:
36 | http_url = '%s?%s'%(self.client,params) if self.method=='get' else self.client
37 | else:
38 | http_url = self.client
39 | http_body = None if self.method == 'get' else params
40 | # logging.info(http_url)
41 | req = urllib2.Request(http_url,data=http_body)
42 | req.add_header('Accept-Encoding', 'gzip')
43 | if self.header is not None:
44 | for key in self.header:
45 | req.add_header(key,self.header[key])
46 | try:
47 | resp = urllib2.urlopen(req,timeout=200)
48 | body = _read_body(resp)
49 | r = _parse_json(body)
50 | return r
51 | except Exception as e:
52 | logging.error(e)
53 | pass
54 | def execute_by_dict(dict):
55 | params = '%s'%(_encode_params_by_dict(dict))
56 | if len(params)!=0:
57 | http_url = '%s?%s'%(self.client,params) if self.method=='get' else self.client
58 | else:
59 | http_url = self.client
60 | http_body = None if self.method == 'get' else params
61 |
62 | req = urllib2.Request(http_url,data=http_body)
63 | req.add_header('Accept-Encoding', 'gzip')
64 | if self.header is not None:
65 | for key in self.header:
66 | req.add_header(key,self.header[key])
67 |
68 | try:
69 | resp = urllib2.urlopen(req,timeout=200)
70 | body = _read_body(resp)
71 | r = _parse_json(body)
72 | return r
73 | except Exception as e:
74 | logging.error(e)
75 | pass
76 | # 添加尾巴
77 | def add_trail(trail):
78 | return _Callable('%s%s'%(self.client,trail))
79 |
80 | def add_header(header):
81 | self.header = header
82 | return _Callable('%s'%self.client)
83 |
84 | if attr == 'get':
85 | self.method = 'get'
86 | return execute
87 | if attr == 'get_by_dict':
88 | self.method = 'get'
89 | return execute_by_dict
90 | if attr == 'post':
91 | self.method = 'post'
92 | return execute
93 | if attr == 'addtrail':
94 | return add_trail
95 | if attr == 'addheader':
96 | return add_header
97 | return _Callable('%s/%s'%(self.client,attr))
98 |
99 |
100 | def _parse_json(s):
101 | ' parse str into JsonDict '
102 |
103 | def _obj_hook(pairs):
104 | ' convert json object to python object '
105 | o = JsonDict()
106 | for k, v in pairs.iteritems():
107 | o[str(k)] = v
108 | return o
109 | return json.loads(s, object_hook=_obj_hook)
110 |
111 | class JsonDict(dict):
112 | ' general json object that allows attributes to be bound to and also behaves like a dict '
113 |
114 | def __getattr__(self, attr):
115 | try:
116 | return self[attr]
117 | except KeyError:
118 | raise AttributeError(r"'JsonDict' object has no attribute '%s'" % attr)
119 |
120 | def __setattr__(self, attr, value):
121 | self[attr] = value
122 |
123 | def _encode_params(**kw):
124 | '''
125 | do url-encode parameters
126 |
127 | >>> _encode_params(a=1, b='R&D')
128 | 'a=1&b=R%26D'
129 | >>> _encode_params(a=u'\u4e2d\u6587', b=['A', 'B', 123])
130 | 'a=%E4%B8%AD%E6%96%87&b=A&b=B&b=123'
131 | '''
132 | args = []
133 | for k, v in kw.iteritems():
134 | if isinstance(v, basestring):
135 | qv = v.encode('utf-8') if isinstance(v, unicode) else v
136 | args.append('%s=%s' % (k, urllib.quote(qv)))
137 | elif isinstance(v, collections.Iterable):
138 | for i in v:
139 | qv = i.encode('utf-8') if isinstance(i, unicode) else str(i)
140 | args.append('%s=%s' % (k, urllib.quote(qv)))
141 | else:
142 | qv = str(v)
143 | args.append('%s=%s' % (k, urllib.quote(qv)))
144 | return '&'.join(args)
145 |
146 | def _encode_params_by_dict(dict):
147 | args = []
148 | for k, v in dict.iteritems():
149 | if isinstance(v, basestring):
150 | qv = v.encode('utf-8') if isinstance(v, unicode) else v
151 | args.append('%s=%s' % (k, urllib.quote(qv)))
152 | elif isinstance(v, collections.Iterable):
153 | for i in v:
154 | qv = i.encode('utf-8') if isinstance(i, unicode) else str(i)
155 | args.append('%s=%s' % (k, urllib.quote(qv)))
156 | else:
157 | qv = str(v)
158 | args.append('%s=%s' % (k, urllib.quote(qv)))
159 | return '&'.join(args)
160 | def _read_body(obj):
161 | using_gzip = obj.headers.get('Content-Encoding', '')=='gzip'
162 | body = obj.read()
163 | if using_gzip:
164 | gzipper = gzip.GzipFile(fileobj=StringIO(body))
165 | fcontent = gzipper.read()
166 | gzipper.close()
167 | return fcontent
168 | return body
169 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/html/review-monitor.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 基于社会感知的酒店竞业市场时空可视化分析
10 |
11 |
12 |
13 |
14 |
15 |
16 |
21 |
46 |
47 |
48 |
49 |
50 |
51 |
60 |
69 |
70 |
79 |
80 |
89 |
90 |
91 |
92 |
93 |
94 |
96 |
97 |
98 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/ugc.hotel.web.esri/html/quality.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 基于社会感知的酒店竞业市场时空可视化分析
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
Loading....
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
74 |
75 |
76 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
Room Message List
94 |
95 |
96 |
97 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
119 |
120 |
123 |
124 |
127 |
128 |
129 |
130 |
131 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
--------------------------------------------------------------------------------