├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── config.py ├── cus_exception.py ├── font └── DroidSansFallback.ttf ├── jd ├── README.md ├── __init__.py ├── admin.py ├── analysis_jd_item.py ├── apps.py ├── items.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── _private.py │ │ ├── clear_running.py │ │ ├── full_analysis.py │ │ ├── rand_item_analysis.py │ │ ├── real_time_analysis.py │ │ └── run_spider.py ├── middleware.py ├── middlewares │ ├── __init__.py │ └── middlewares.py ├── migrations │ ├── 0001_initial.py │ └── __init__.py ├── models.py ├── pipelines.py ├── proxymanager.py ├── send_email.py ├── settings.py ├── spiders │ ├── __init__.py │ ├── jd.py │ ├── jd_comment.py │ └── jd_item_info.py ├── static │ ├── assets │ │ ├── css │ │ │ ├── font-awesome.min.css │ │ │ ├── ie8.css │ │ │ ├── images │ │ │ │ └── overlay.png │ │ │ └── main.css │ │ ├── fonts │ │ │ ├── FontAwesome.otf │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ ├── fontawesome-webfont.woff │ │ │ └── fontawesome-webfont.woff2 │ │ ├── js │ │ │ ├── ie │ │ │ │ ├── PIE.htc │ │ │ │ ├── backgroundsize.min.htc │ │ │ │ ├── html5shiv.js │ │ │ │ └── respond.min.js │ │ │ ├── jquery.cookie.js │ │ │ ├── jquery.min.js │ │ │ ├── jquery.poptrox.min.js │ │ │ ├── main.js │ │ │ ├── skel.min.js │ │ │ └── util.js │ │ └── sass │ │ │ ├── ie8.scss │ │ │ ├── libs │ │ │ ├── _functions.scss │ │ │ ├── _mixins.scss │ │ │ ├── _skel.scss │ │ │ └── _vars.scss │ │ │ └── main.scss │ ├── images │ │ ├── 10060984607_channel.png │ │ ├── 10060984607_color.png │ │ ├── 10060984607_creation_time.png │ │ ├── 10060984607_days.png │ │ ├── 10060984607_general.png │ │ ├── 10060984607_good.png │ │ ├── 10060984607_mobile.png │ │ ├── 10060984607_poor.png │ │ ├── 10060984607_province.png │ │ ├── 10060984607_size.png │ │ ├── 10060984607_time.png │ │ ├── 10060984607_user_level.png │ │ ├── 11476089321_channel.png │ │ ├── 11476089321_color.png │ │ ├── 11476089321_creation_time.png │ │ ├── 11476089321_days.png │ │ ├── 11476089321_general.png │ │ ├── 11476089321_good.png │ │ ├── 11476089321_mobile.png │ │ ├── 11476089321_poor.png │ │ ├── 11476089321_province.png │ │ ├── 11476089321_size.png │ │ ├── 11476089321_time.png │ │ ├── 11476089321_user_level.png │ │ ├── 11943216519_channel.png │ │ ├── 11943216519_color.png │ │ ├── 11943216519_creation_time.png │ │ ├── 11943216519_days.png │ │ ├── 11943216519_good.png │ │ ├── 11943216519_mobile.png │ │ ├── 11943216519_province.png │ │ ├── 11943216519_size.png │ │ ├── 11943216519_time.png │ │ ├── 11943216519_user_level.png │ │ ├── 2359205_channel.png │ │ ├── 2359205_color.png │ │ ├── 2359205_creation_time.png │ │ ├── 2359205_days.png │ │ ├── 2359205_general.png │ │ ├── 2359205_good.png │ │ ├── 2359205_mobile.png │ │ ├── 2359205_poor.png │ │ ├── 2359205_province.png │ │ ├── 2359205_size.png │ │ ├── 2359205_time.png │ │ ├── 2359205_user_level.png │ │ ├── 3652063_channel.png │ │ ├── 3652063_color.png │ │ ├── 3652063_creation_time.png │ │ ├── 3652063_days.png │ │ ├── 3652063_general.png │ │ ├── 3652063_good.png │ │ ├── 3652063_mobile.png │ │ ├── 3652063_poor.png │ │ ├── 3652063_province.png │ │ ├── 3652063_size.png │ │ ├── 3652063_time.png │ │ ├── 3652063_user_level.png │ │ ├── 987091_channel.png │ │ ├── 987091_color.png │ │ ├── 987091_creation_time.png │ │ ├── 987091_days.png │ │ ├── 987091_general.png │ │ ├── 987091_good.png │ │ ├── 987091_mobile.png │ │ ├── 987091_poor.png │ │ ├── 987091_province.png │ │ ├── 987091_size.png │ │ ├── 987091_time.png │ │ ├── 987091_user_level.png │ │ ├── avatar.jpg │ │ ├── bg.jpg │ │ ├── fulls │ │ │ ├── 01.jpg │ │ │ ├── 02.jpg │ │ │ ├── 03.jpg │ │ │ ├── 04.jpg │ │ │ ├── 05.jpg │ │ │ └── 06.jpg │ │ ├── job.png │ │ ├── thumbs │ │ │ ├── 01.jpg │ │ │ ├── 02.jpg │ │ │ ├── 03.jpg │ │ │ ├── 04.jpg │ │ │ ├── 05.jpg │ │ │ └── 06.jpg │ │ ├── weixin.png │ │ ├── xiaomi5s_comment_count.png │ │ └── xiaomi5s_comment_full_count.png │ ├── jquery │ │ └── jquery-3.2.0.js │ ├── js │ │ ├── jquery_cookie.js │ │ └── switchemail.js │ ├── loader.gif │ ├── showdownjs │ │ ├── script.js │ │ └── style.css │ └── style │ │ ├── Clearness Dark.css │ │ ├── Clearness.css │ │ ├── GitHub.css │ │ ├── GitHub2.css │ │ └── style_base.css ├── templates │ ├── 404.html │ ├── article.html │ ├── base.html │ ├── custom_editor_page.html │ ├── full_index.html │ ├── full_result.html │ ├── index.html │ ├── jd_index.html │ └── left_nav.html ├── tests.py ├── urls.py └── views.py ├── jd_analysis ├── __init__.py ├── settings.py ├── urls.py └── wsgi.py ├── killport.py ├── manage.py ├── media ├── 10267425905_time.png ├── 10866679001_province.png ├── 11374491518_size.png ├── 11430756607_channel.png ├── 11718220593_color.png ├── 11718220593_good.png ├── 11718220593_size.png ├── jd_analysis_show_image │ ├── 3133851_channel.png │ ├── 3133851_color.png │ ├── 3133851_general.png │ ├── 3133851_good.png │ ├── 3133851_poor.png │ ├── 3133851_province.png │ ├── 3133851_time.png │ ├── 3995645_channel.png │ ├── 3995645_color.png │ ├── 3995645_general.png │ ├── 3995645_good.png │ ├── 3995645_poor.png │ ├── 3995645_province.png │ └── 3995645_time.png ├── mask.png └── weixin.png ├── scrapy.cfg ├── sqlhelper.py └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.js linguist-language=Python 2 | *.css linguist-language=Python 3 | *.html linguist-language=Python 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # custom 92 | .idea 93 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 京东商城商品评价数据分析 2 | 附上使用地址 3 | 体验地址: 4 | 体验示例地址: 5 | 6 | ## 项目来源 7 | 互联网购物现在已经是非常普遍的购物方式,在互联网上购买商品并且使用之后,很多人都会回过头来对自己购买的商品进行一些评价,以此来表达自己对于该商品使用后的看法。商品评价的好坏对于一个商品的重要性显而易见,大部分消费者都以此作为快速评判该商品质量优劣的方式。所以,与此同时,有些商家为了获得好评,还会做一些 "好评优惠" 或者 "返点" 活动来刺激消费者评价商品。
8 | 既然商品评价对于消费者选购商品而言至关重要,那么我想试试可以从这些评价信息中获取到怎样的价值,来帮助消费者快速获取到关于该商品的一些重要信息,给他们的购物带来更加可靠地保证?
9 | 所以,我认为,一种快速、全面、高提炼度和高对比度的信息获取和展示方式将会非常必要。 于是,我采用分布式快速抓取京东的评价信息,然后使用 pandas 对抓取到的数据进行分析。 10 | 11 | 12 | ## 项目依赖 13 | * python 2.7.12 14 | * Django 15 | * django-crontab 16 | * scrapy 17 | * requests 18 | * pymysql 19 | * pandas 20 | * numpy 21 | * matplotlib 22 | * wordcloud 23 | * Markdown2 24 | * chardet 25 | * redis 26 | * redis 数据库 27 | * mysql 数据库 28 | 29 | 30 | 安装命令: 31 | 32 | ``` 33 | $ pip install Django django-crontab Scrapy requests pymysql pandas numpy wordcloud Markdown2 redis chardet 34 | ``` 35 | 安装 matplotlib 请参考:[matplotlib github](https://github.com/ehmatthes/pcc/blob/master/chapter_15/README.md#installing-matplotlib) 36 | 37 | ## 克隆使用 38 | 将项目克隆到本地 39 | 40 | ``` 41 | $ git clone https://github.com/awolfly9/jd_analysis.git 42 | ``` 43 | 44 | 进入工程目录 45 | 46 | ``` 47 | $ cd jd_analysis 48 | ``` 49 | 进入mysql命令界面 50 | 51 | ``` 52 | $ mysql -u root -p 53 | ``` 54 | 55 | 创建 Django 使用的数据库 56 | 57 | ``` 58 | $ create database jd_analysis default character set utf8; 59 | ``` 60 | 61 | 修改 Django 配置 62 | 63 | ``` 64 | $ vim jd_analysis/settings.py 65 | ---------- 66 | DATABASES = { 67 | 'default': { 68 | 'ENGINE': 'django.db.backends.mysql', 69 | 'NAME': 'jd_analysis', 70 | 'USER': 'root', 71 | 'PASSWORD': '123456', 72 | 'HOST': '', 73 | 'PORT': '', 74 | } 75 | } 76 | ``` 77 | 78 | 修改配置文件中连接数据库配置 79 | 80 | ``` 81 | $ vim config.py 82 | ---------- 83 | # local 84 | database_config = { 85 | 'host': 'localhost', 86 | 'port': 3306, 87 | 'user': 'root', 88 | 'password': '123456', 89 | 'charset': 'utf8', 90 | } 91 | ``` 92 | 93 | 修改 redis 的连接用户名和密码 94 | 95 | ``` 96 | $ vim config.py 97 | ---------- 98 | redis_pass = '' 99 | redis_host = 'localhost' 100 | redis_part = '6379' 101 | redis_db = 10 102 | ``` 103 | 104 | 部分设置参数说明: 105 | 106 | | param | Description | 默认值 | 107 | | ----| ---- | ---- | 108 | | is_distributed | 是否分布式抓取 | False | 109 | | is_proxy | 是否使用代理 | False | 110 | | proxy_address | 代理地址 | | 111 | | email_type | 使用哪个邮箱发送邮件 | gmail | 112 | | self_email | 邮箱地址 | 填写自己的邮箱地址 | 113 | | self_password | 邮箱密码 | 填写自己的邮箱密码 | 114 | 115 | 116 | 生成 Django 数据库 117 | 118 | ``` 119 | $ python manage.py makemigrations 120 | $ python manage.py migrate 121 | ``` 122 | 123 | 运行 Django 服务器 124 | 125 | ``` 126 | $ python manage.py runserver 127 | ``` 128 | 129 | 在浏览器中访问 进行测试 130 | 131 | 132 | ## 项目说明 133 | 完整流程介绍,请见 134 | 135 | 136 | 如果在使用过程中有任何问题,欢迎提 Issues,也可联系我的微信进入微信群和大伙一起学习。(在我博客中可以找到我的微信) 137 | 138 | 139 | ## 常见问题 140 | #### mysql 版本过低 141 | 在创建表的时候可能会由于数据库版本过低创建失败,解决方式请参考:[stackoverflow](http://stackoverflow.com/questions/23054394/mysql-error-there-can-be-only-one-timestamp-column-with-current-timestamp-in-de) 142 | 143 | #### win 用户出现 文件名、目录名或卷标语法不正确 144 | python subprocess 的路径问题。解决方法是更改 jd/views 中 runspider 方法。 145 | 146 | ``` 147 | cmd = 'python manage.py real_time_analysis -a name={name} -a guid={guid} ' \ 148 | '-a product_id={product_id} -a url={url};'. \ 149 | format(url = str(url), name = name, dir = settings.BASE_DIR, guid = data.get('guid'), 150 | product_id = product_id) 151 | subprocess.Popen(cmd, shell = True, cwd=settings.BASE_DIR) 152 | ``` 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # local 4 | database_config = { 5 | 'host': 'localhost', 6 | 'port': 3306, 7 | 'user': 'root', 8 | 'password': '123456', 9 | 'charset': 'utf8', 10 | } 11 | 12 | database = 'jd' 13 | 14 | jd_item_table = 'item' 15 | analysis_item_table = 'analysis' 16 | 17 | redis_pass = '' 18 | redis_host = 'localhost' 19 | redis_part = '6379' 20 | redis_db = 10 21 | 22 | is_distributed = False 23 | 24 | is_proxy = False 25 | proxy_address = 'http://127.0.0.1:8000/' 26 | 27 | email_type = 'gmail' 28 | 29 | # gmail 30 | if email_type == 'gmail': 31 | self_email = '******' 32 | self_password = '******' 33 | elif email_type == 'qq': # qq 34 | self_email = '******@qq.com' 35 | self_password = '******' 36 | -------------------------------------------------------------------------------- /cus_exception.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import logging 4 | import datetime 5 | 6 | 7 | class CusException(Exception): 8 | def __init__(self, name, error_msg): 9 | super(CusException, self).__init__(name, error_msg) 10 | self.name = name 11 | 12 | if type(error_msg) == CusException: 13 | self.error_msg = error_msg.error_msg 14 | else: 15 | self.error_msg = error_msg 16 | 17 | self.error_time = str(datetime.datetime.now()) 18 | -------------------------------------------------------------------------------- /font/DroidSansFallback.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/font/DroidSansFallback.ttf -------------------------------------------------------------------------------- /jd/README.md: -------------------------------------------------------------------------------- 1 | # 分布式抓取京东商城评价并且使用 pandas 进行数据分析 2 | 3 | 现在互联网购物已经非常成熟而且很多人在网上购买商品后都会留下评论信息,而且有些商家为了获得好评还有一些好评优惠或者返点,那么我想试试可以从这些评价信息中获取到怎样的价值呢,我采用分布式快速抓取京东的评价信息,然后使用 pandas 对抓取到的数据进行分析。话不多说先附上使用地址
4 | 体验地址:
5 | 体验示例地址:
6 | 7 | 8 | **想要分析京东商城的商品评价信息,那么需要做些什么呢**
9 | 10 | * 采用分布式抓取,尽量在短时间内抓取需要分析的商品足够多的评价信息
11 | * 将抓取到的评价信息都存储到数据库 12 | * 从数据库中取出所有数据进行数据分析 13 | * 生成好评的词云,并且获取关键字 14 | * 生成中评的词云,并且获取关键字 15 | * 生成差评的词云,并且获取关键字 16 | * 分析购买该商品不同颜色的比例,生成柱状图 17 | * 分析购买该商品不同配置的比例,生成柱状图 18 | * 分析该商品的销售数量和评论数量和时间的关系,生成时间则线图 19 | * 分析该商品不同省份购买的的比例,生成柱状图 20 | * 分析该商品不同渠道的销售比例,生成柱状图 21 | 22 | * 利用 Django 搭建后台,将数据抓取和数据分析连起来 23 | * 前端显示数据抓取和分析结果 24 | 25 | 26 | ### 分布式抓取京东商城的评价信息 27 | 采用分布式抓取的目的是快速的在短时间内尽量抓取足够多的商品评价 28 | 29 | 1. 以 [iPhone7](https://item.jd.com/3995645.html) 为例,通过 Chrome 抓包分析出京东商城的评价请求 URl 30 | 2. 找出评价请求 URL 规律,获取到如下 URL 组合链接 31 | 3. 利用 Chrome 插件 Postman 测试链接是否可用,发现京东获取评价信息并没有验证 Cookie 之类的反爬措施 32 | 4. 开始编码利用 scrapy 抓取京东商城的商品评价信息并存入数据库以备使用 33 | 34 | ### 数据分析 35 | 1. 从数据库中取出相应数据,开始分析 36 | 2. 使用 python 的扩展库 wordcloud 分别提取好评、中评、差评的关键字,并且生成相应的词云图片 37 | 3. 分析该商品不同颜色的销量占比,并且生成柱状图,例如 iphone7 的不同颜色金色、玫瑰金色、银色、黑色、亮黑色、还有最新出的红色的占比 38 | 4. 分析该商品不同配置的销量占比,并且生成柱状图,例如 iphone7 32G 、 64G、128G 存储 39 | 5. 分析该商品销售和评论时间并且生成折线图,分析出商品在什么时间最畅销 40 | 6. 分析用户购买该商品的渠道,例如用户通过京东 Android 客户端、微信京东购物、京东 iPhone 客户端购物的比例,并且生成柱状图 41 | 7. 分析购买该商品的用户的地域省份。例如北京、上海、广州那个城市在京东上购买 iPhone7 的人更多 42 | 8. 将以上分析结果都存储保留 43 | 44 | ### Django 后台 WEB 45 | 使用 Django 搭建一个简易的后台 jd_analysis,将分布式抓取数据和数据分析连起来,并且将分析结果返回前端显示。 46 | 47 | 1. jd_analysis 提供一个接口接受用户请求分析的京东商城商品的 URL 链接 48 | 2. jd_analysis 接受到商品链接后开启爬虫进程开始抓取需要分析的商品的名称和评价数量 49 | 3. 组合出完整的评价链接插入到 redis 中,实现分布式爬虫抓取,尽可能在短时间内抓取足够多的该商品评价信息(我现在是 30s 时间大概可以抓取 3000 条评价信息) 50 | 4. 主服务器等待一定的抓取时间,例如主服务器等待 30s,30s 后一定要给前端返回分析结果,所以等 30s 后清空 redis 中该商品的链接,从服务器没有读取不到需要抓取的链接也就自动关闭 51 | 5. 开启分析进程,开始分析抓取到的所有数据,并且生成图标等信息 52 | 53 | ### 前端展示 54 | 在客户端第一次请求时,生成一个 GUID,并且存储在 cookie 中。然后开启一个定时器,带上 GUID 不断的向 jd_analysis 后台请求结果。jd_analysis 后台利用请求的 GUID 从 redis 中获取抓取信息和分析结果的所有内容,返回给前端。前端显示请求到的结果。 55 | 56 | ### 最后附上两张效果图 57 | **购买和评论时间折线图
** 58 | ![](http://i.imgur.com/dYShBOB.png) 59 | **购买渠道柱状图
** 60 | ![](http://i.imgur.com/6PKeOOX.png) 61 | 62 | ### 大功告成 63 | 以上就是完整的抓取京东商品的评价信息并且使用 pandas 分析评价然后利用 Django 搭建后台前端显示抓取和分析结果的所有步骤。
64 | 65 | 再次贴上使用地址: 欢迎多多尝试,多挑毛病~
66 | 如果你对这个项目感兴趣欢迎和我交流沟通,我也建立了这个项目和数据分析的微信群,也可以加我好友进微信群,我的个人微信
67 | ![](http://awolfly9.com/static/images/weixin.png) 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /jd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/__init__.py -------------------------------------------------------------------------------- /jd/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /jd/apps.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from django.apps import AppConfig 4 | 5 | 6 | class JdConfig(AppConfig): 7 | name = 'jd' 8 | -------------------------------------------------------------------------------- /jd/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class CommentItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | pass 15 | -------------------------------------------------------------------------------- /jd/management/__init__.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /jd/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /jd/management/commands/_private.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /jd/management/commands/clear_running.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | import json 3 | import os 4 | import sys 5 | 6 | import datetime 7 | import redis 8 | from django.core.management.base import BaseCommand 9 | 10 | # python manage.py run_analysis 11 | import config 12 | import utils 13 | 14 | 15 | class Command(BaseCommand): 16 | help = 'clear running' 17 | 18 | def add_arguments(self, parser): 19 | parser.add_argument('-a', action = 'append', dest = 'spargs', default = [], 20 | help = 'set spider argument (may be repeated)') 21 | 22 | #必须实现的方法 23 | def handle(self, *args, **options): 24 | reload(sys) 25 | sys.setdefaultencoding('utf-8') 26 | 27 | spargs = utils.arglist_to_dict(options['spargs']) 28 | key = spargs.get('key', 'running') 29 | os.chdir(sys.path[0]) 30 | 31 | red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db, 32 | password = config.redis_pass) 33 | res = red.get(key) 34 | if res != None: 35 | info = json.loads(res) 36 | info['name'] = 'clear_running' 37 | info['error_msg'] = 'interrupt error' 38 | red.lpush('retry_list', info) 39 | red.delete(key) 40 | -------------------------------------------------------------------------------- /jd/management/commands/full_analysis.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import json 4 | import logging 5 | import os 6 | import datetime 7 | import config 8 | import utils 9 | import redis 10 | import sys 11 | 12 | from scrapy.utils.project import get_project_settings 13 | from scrapy.utils.log import configure_logging 14 | from django.core.management.base import BaseCommand 15 | from sqlhelper import SqlHelper 16 | from jd.models import JDCommentAnalysis 17 | from jd.spiders.jd_comment import JDCommentSpider 18 | from jd.spiders.jd_item_info import JDItemInfoSpider 19 | from scrapy.crawler import CrawlerRunner 20 | from twisted.internet import reactor, defer 21 | from cus_exception import CusException 22 | from jd.analysis_jd_item import Analysis 23 | from jd.send_email import send_email 24 | 25 | 26 | # python manage.py run_analysis 27 | class Command(BaseCommand): 28 | help = 'run analysis' 29 | 30 | def add_arguments(self, parser): 31 | parser.add_argument('-a', action = 'append', dest = 'spargs', default = [], 32 | help = 'set spider argument (may be repeated)') 33 | 34 | #必须实现的方法 35 | def handle(self, *args, **options): 36 | reload(sys) 37 | sys.setdefaultencoding('utf-8') 38 | 39 | spargs = utils.arglist_to_dict(options['spargs']) 40 | key = spargs.get('key', 'running') 41 | os.chdir(sys.path[0]) 42 | 43 | red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db, 44 | password = config.redis_pass) 45 | run = red.get(key) 46 | if run != None: # 如果有正在运行的进程则等待 47 | print('have running waiting time:%s' % str(datetime.datetime.now())) 48 | return 49 | 50 | count = red.llen('analysis_users') 51 | if count <= 0: # 如果 redis 中没有需要查询的数据 52 | print('not data waiting time:%s' % str(datetime.datetime.now())) 53 | return 54 | 55 | user = red.lpop('analysis_users') 56 | red.set(key, user) 57 | print('running... user:%s' % user) 58 | info = json.loads(user) 59 | try: 60 | run_als = RunAnalysis(red, key, user) 61 | run_als.run() 62 | except CusException, e: 63 | info['name'] = e.name 64 | info['error_msg'] = e.error_msg 65 | red.lpush('retry_list', info) 66 | except Exception, e: 67 | info['name'] = 'unknown' 68 | info['error_msg'] = e 69 | logging.exception('RunAnalysis Exception msg:%s' % e) 70 | red.lpush('retry_list', info) 71 | finally: 72 | red.delete(key) 73 | 74 | print ('finish time:%s' % datetime.datetime.now()) 75 | 76 | 77 | class RunAnalysis(object): 78 | def __init__(self, red, key, user): 79 | self.key = key 80 | self.red = red 81 | 82 | data = json.loads(user) 83 | self.product_id = data.get('product_id') 84 | self.url = data.get('url') 85 | self.email = data.get('email') 86 | self.guid = data.get('guid') 87 | self.spider_name = 'jd_comment' 88 | self.spargs = data 89 | 90 | self.sql = SqlHelper() 91 | self.spargs['red'] = self.red 92 | self.spargs['sql'] = self.sql 93 | 94 | if not os.path.exists('log'): 95 | os.makedirs('log') 96 | 97 | configure_logging(install_root_handler = False) 98 | logging.basicConfig( 99 | filename = 'log/%s.log' % self.product_id, 100 | format = '%(levelname)s %(asctime)s: %(message)s', 101 | level = logging.DEBUG 102 | ) 103 | 104 | def run(self): 105 | self.runspider() 106 | self.analysis() 107 | self.send_notice() 108 | self.clear_cache() 109 | 110 | # 运行抓取程序,使用代理抓取所有的商品评价 111 | def runspider(self): 112 | configure_logging(install_root_handler = False) 113 | s = get_project_settings() 114 | runner = CrawlerRunner(settings = s) 115 | 116 | @defer.inlineCallbacks 117 | def crawl(**spargs): 118 | yield runner.crawl(JDItemInfoSpider, **spargs) 119 | yield runner.crawl(JDCommentSpider, **spargs) 120 | reactor.stop() 121 | 122 | crawl(**self.spargs) 123 | reactor.run() # the script will block here until the last crawl call is finished 124 | 125 | # 调度分析 126 | def analysis(self): 127 | analysis = Analysis(**self.spargs) 128 | result = analysis.run() 129 | 130 | jd_comment = JDCommentAnalysis(id = None, guid = self.guid, product_id = self.product_id, item_name = 'name', 131 | content = result, email = self.email, create_time = datetime.datetime.now()) 132 | jd_comment.save() 133 | 134 | # 向用户预留邮箱发送邮件 135 | def send_notice(self): 136 | subject = '京东商城 - 商品评价分析结果展示' 137 | 138 | blog_url = '%sjd/full_result/%s' % ('http://127.0.0.1:8000/', self.guid) 139 | 140 | command = "SELECT name FROM {0} WHERE id={1}".format(config.jd_item_table, self.product_id) 141 | (item_name,) = self.sql.query_one(command) 142 | 143 | body = ''' 144 | 您好~ 145 | 您订阅的京东商城商品评价信息分析服务已经完成。商品名称:{item_name},商品链接:{jd_url},分析结果请见:{blog_url} 146 | '''.format(jd_url = self.url, blog_url = blog_url, item_name = item_name) 147 | 148 | send_email(to_email = self.email, subject = subject, body = body) 149 | 150 | def clear_cache(self): 151 | data = self.red.delete(self.key) 152 | logging.debug('clear_cacha data:%s' % data) 153 | -------------------------------------------------------------------------------- /jd/management/commands/rand_item_analysis.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import os 4 | import subprocess 5 | import requests 6 | import config 7 | import utils 8 | import re 9 | import random 10 | 11 | from django.core.management.base import BaseCommand, CommandError 12 | from django.conf import settings 13 | from sqlhelper import SqlHelper 14 | 15 | 16 | class Command(BaseCommand): 17 | help = 'randitem' 18 | 19 | def add_arguments(self, parser): 20 | parser.add_argument('-a', action = 'append', dest = 'spargs', default = [], 21 | help = 'set spider argument (may be repeated)') 22 | 23 | #必须实现的方法 24 | def handle(self, *args, **options): 25 | spargs = arglist_to_dict(options['spargs']) 26 | randitem(spargs) 27 | 28 | 29 | def randitem(spargs): 30 | guid = spargs.get('guid', 0) 31 | utils.push_redis(guid, 0, '正在随机产生商品链接', save_to_mysql = False) 32 | 33 | url = 'https://diviner.jd.com/diviner?p=610009&callback=jsonpCallbackMoreGood&lid=1&uuid=122270672' \ 34 | '.1492415671516609876050.1492415672.1492415672.1492415672.1&pin=&lim=100&ec=utf-8&_=1492415813682' 35 | headers = { 36 | 'Host': 'diviner.jd.com', 37 | 'Referer': 'https://www.jd.com/', 38 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0' 39 | } 40 | cookies = { 41 | '__jda': '122270672.1492415671516609876050.1492415672.1492415672.1492415672.1', 42 | '__jdb': '122270672.1.1492415671516609876050|1.1492415672', 43 | '__jdc': '122270672', 44 | '__jdv': '122270672|direct|-|none|-|1492415671524', 45 | '__jdu': '1492415671516609876050', 46 | } 47 | 48 | r = requests.get(url = url, headers = headers, cookies = cookies, timeout = 20) 49 | pattern = re.compile('"sku":(\d+),', re.S) 50 | ids = re.findall(pattern, r.text) 51 | id = random.choice(ids) 52 | 53 | url = 'https://item.jd.com/%s.html' % str(id) 54 | utils.push_redis(guid, 0, '生成商品链接:%s' % (url, url), save_to_mysql = False) 55 | 56 | sql = SqlHelper() 57 | command = "SELECT id FROM {table} WHERE id={product_id}". \ 58 | format(table = config.jd_item_table, product_id = id) 59 | result = sql.query_one(command) 60 | 61 | # 如果数据库中没有,则重新抓取 62 | if result == None: 63 | cmd = 'cd {dir};python manage.py real_time_analysis -a name={name} -a guid={guid} ' \ 64 | '-a product_id={product_id} -a url={url};'. \ 65 | format(url = str(url), name = 'jd', dir = settings.BASE_DIR, guid = guid, 66 | product_id = id) 67 | subprocess.Popen(cmd, shell = True) 68 | else: 69 | # 如果数据库中存在则,直接读取数据库中数据 70 | command = "SELECT * FROM {0} WHERE product_id={1} ORDER BY id". \ 71 | format(config.analysis_item_table, id) 72 | result = sql.query(command) 73 | for res in result: 74 | utils.push_redis(guid, res[1], res[2], res[3], save_to_mysql = False) 75 | 76 | 77 | def arglist_to_dict(arglist): 78 | """Convert a list of arguments like ['arg1=val1', 'arg2=val2', ...] to a 79 | dict 80 | """ 81 | return dict(x.split('=', 1) for x in arglist) 82 | -------------------------------------------------------------------------------- /jd/management/commands/real_time_analysis.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import logging 4 | import sys 5 | import matplotlib 6 | import time 7 | 8 | matplotlib.use('Agg') 9 | 10 | import os 11 | import config 12 | import utils 13 | import redis 14 | import markdown2 15 | 16 | from scrapy.utils.log import configure_logging 17 | from django.core.management.base import BaseCommand 18 | from wordcloud import WordCloud 19 | from sqlhelper import SqlHelper 20 | from django.conf import settings 21 | from pandas import Series, DataFrame 22 | from cus_exception import CusException 23 | from jd.analysis_jd_item import Analysis 24 | from scrapy.crawler import CrawlerProcess 25 | from scrapy.utils.project import get_project_settings 26 | 27 | 28 | # python manage.py 29 | class Command(BaseCommand): 30 | help = 'analysis jd comment data' 31 | 32 | def add_arguments(self, parser): 33 | parser.add_argument('-a', action = 'append', dest = 'spargs', default = [], 34 | help = 'set spider argument (may be repeated)') 35 | 36 | #必须实现的方法 37 | def handle(self, *args, **options): 38 | reload(sys) 39 | sys.setdefaultencoding('utf-8') 40 | os.chdir(sys.path[0]) 41 | 42 | spargs = utils.arglist_to_dict(options['spargs']) 43 | 44 | if not os.path.exists('log'): 45 | os.makedirs('log') 46 | 47 | configure_logging(install_root_handler = False) 48 | logging.basicConfig( 49 | filename = 'log/%s.log' % spargs.get('product_id'), 50 | format = '%(levelname)s %(asctime)s: %(message)s', 51 | level = logging.ERROR 52 | ) 53 | 54 | guid = spargs.get('guid', '0') 55 | product_id = spargs.get('product_id', '0') 56 | 57 | if guid == '0' or product_id == '0': 58 | utils.log('分析数据传入参数不对,接收到的参数为: spargs:%s' % spargs) 59 | utils.push_redis(guid = guid, product_id = product_id, info = '分析数据传入参数不对,接收到的参数为:%s' % spargs) 60 | utils.push_redis(guid = guid, product_id = product_id, info = 'finish') 61 | return 62 | 63 | utils.log('开始分析:%s' % spargs) 64 | sql = SqlHelper() 65 | red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db, 66 | password = config.redis_pass) 67 | spargs['sql'] = sql 68 | spargs['red'] = red 69 | 70 | # 运行爬虫 71 | runspider(spargs) 72 | 73 | # 开启分析 74 | analysis = RealTimeAnalysis(**spargs) 75 | analysis.run() 76 | 77 | 78 | def runspider(spargs): 79 | url = spargs.get('url') 80 | name = spargs.get('name', 'jd') 81 | 82 | if not os.path.exists('log'): 83 | os.makedirs('log') 84 | 85 | configure_logging(install_root_handler = False) 86 | logging.basicConfig( 87 | filename = 'log/%s.log' % name, 88 | format = '%(levelname)s %(asctime)s: %(message)s', 89 | level = logging.ERROR 90 | ) 91 | print "get_project_settings().attributes:", get_project_settings().attributes['SPIDER_MODULES'] 92 | process = CrawlerProcess(get_project_settings()) 93 | start_time = time.time() 94 | try: 95 | logging.info('进入爬虫') 96 | process.crawl(name, **spargs) 97 | process.start() 98 | except Exception, e: 99 | process.stop() 100 | logging.error("url:%s, errorMsg:%s" % (url, e.message)) 101 | finally: 102 | logging.error("url:%s, errorMsg:%s" % (url, "爬虫终止")) 103 | 104 | utils.log('spider crawl time:%s' % str(time.time() - start_time)) 105 | 106 | 107 | # 注意考虑到多个商品对比的情况 108 | class RealTimeAnalysis(Analysis): 109 | def __init__(self, **kwargs): 110 | super(RealTimeAnalysis, self).__init__(**kwargs) 111 | 112 | def record_result(self, result, color = 'default', font_size = 16, strong = False, type = 'word', 113 | br = True, default = False, new_line = False): 114 | self.full_result = '' 115 | if type == 'word' and default == False: 116 | if strong: 117 | result = '%s' % (color, font_size, result) 118 | else: 119 | result = '%s' % (color, font_size, result) 120 | elif type == 'image': 121 | result = markdown2.markdown(result) 122 | 123 | self.full_result += result 124 | 125 | if br: 126 | self.full_result += '
' 127 | if new_line: 128 | self.full_result += '\n' 129 | 130 | utils.push_redis(guid = self.guid, product_id = self.product_id, info = self.full_result, type = type) 131 | 132 | # 提取商品的基本信息 133 | def analysis_item_info(self): 134 | pass 135 | 136 | # 分析购买渠道并生成柱状图 137 | def analysis_buy_channel(self): 138 | self.record_result('正在分析商品的购买渠道占比...', color = 'black', font_size = 24, strong = True) 139 | super(RealTimeAnalysis, self).analysis_buy_channel() 140 | 141 | # 分析购买的商品颜色 142 | def analysis_color(self): 143 | self.record_result('正在分析该商品不同颜色的购买量...', color = 'black', font_size = 24, strong = True) 144 | super(RealTimeAnalysis, self).analysis_color() 145 | 146 | # 分析购买的商品大小分类 147 | def analysis_size(self): 148 | self.record_result('正在分析该商品不同配置的购买量...', color = 'black', font_size = 24, strong = True) 149 | super(RealTimeAnalysis, self).analysis_size() 150 | 151 | # 分析购买该商品的地域占比 152 | def analysis_province(self): 153 | self.record_result('正在分析该商品不同省份的购买量...', color = 'black', font_size = 24, strong = True) 154 | super(RealTimeAnalysis, self).analysis_province() 155 | 156 | # 分析商品购买、评论和时间关系图 157 | def analysis_sell_time(self): 158 | self.record_result('正在分析商品购买、评论和时间关系图...', color = 'black', font_size = 24, strong = True) 159 | super(RealTimeAnalysis, self).analysis_sell_time() 160 | 161 | # 分析移动端购买占比 162 | def analysis_mobile(self): 163 | self.record_result('正在分析移动端购买占比...', color = 'black', font_size = 24, strong = True) 164 | super(RealTimeAnalysis, self).analysis_mobile() 165 | 166 | # 分析购买后评论的时间分布 167 | def analysis_buy_days(self): 168 | self.record_result('正在分析该商品购买后用户评论的时间', color = 'black', font_size = 24, strong = True) 169 | super(RealTimeAnalysis, self).analysis_buy_days() 170 | 171 | # 分析购买的用户的等级分布 172 | def analysis_user_level(self): 173 | self.record_result('正在分析购买该商品用户的等级...', color = 'black', font_size = 24, strong = True) 174 | super(RealTimeAnalysis, self).analysis_user_level() 175 | 176 | # 分析 24 小时分布 177 | def analysis_hour(self): 178 | self.record_result('正在分析用户购买该商品 24 小时占比...', color = 'black', font_size = 24, strong = True) 179 | super(RealTimeAnalysis, self).analysis_hour() 180 | 181 | def finish(self): 182 | self.record_result('finish', default = True, br = False) 183 | -------------------------------------------------------------------------------- /jd/management/commands/run_spider.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import logging 4 | import os 5 | import time 6 | import utils 7 | 8 | from scrapy.crawler import CrawlerProcess 9 | from django.core.management.base import BaseCommand, CommandError 10 | from scrapy.utils.log import configure_logging 11 | from scrapy.utils.project import get_project_settings 12 | 13 | 14 | # python manage.py runspider -a url=https://item.jd.com/4142680.html -a name=jd 15 | class Command(BaseCommand): 16 | help = 'run spider' 17 | 18 | def add_arguments(self, parser): 19 | parser.add_argument('-a', action = 'append', dest = 'spargs', default = [], 20 | help = 'set spider argument (may be repeated)') 21 | 22 | #必须实现的方法 23 | def handle(self, *args, **options): 24 | spargs = arglist_to_dict(options['spargs']) 25 | print('spargs:%s' % spargs) 26 | print os.getcwd() 27 | runspider(spargs = spargs) 28 | 29 | 30 | def runspider(spargs): 31 | url = spargs.get('url') 32 | name = spargs.get('name', 'jd') 33 | guid = spargs.get('guid') 34 | product_id = spargs.get('product_id') 35 | 36 | if not os.path.exists('log'): 37 | os.makedirs('log') 38 | 39 | configure_logging(install_root_handler = False) 40 | logging.basicConfig( 41 | filename = 'log/%s.log' % name, 42 | format = '%(levelname)s %(asctime)s: %(message)s', 43 | level = logging.ERROR 44 | ) 45 | print "get_project_settings().attributes:", get_project_settings().attributes['SPIDER_MODULES'] 46 | process = CrawlerProcess(get_project_settings()) 47 | start_time = time.time() 48 | try: 49 | logging.info('进入爬虫') 50 | process.crawl(name, **spargs) 51 | process.start() 52 | except Exception, e: 53 | process.stop() 54 | logging.error("url:%s, errorMsg:%s" % (url, e.message)) 55 | finally: 56 | logging.error("url:%s, errorMsg:%s" % (url, "爬虫终止")) 57 | 58 | utils.log('spider crawl time:%s' % str(time.time() - start_time)) 59 | 60 | 61 | def arglist_to_dict(arglist): 62 | """Convert a list of arguments like ['arg1=val1', 'arg2=val2', ...] to a 63 | dict 64 | """ 65 | return dict(x.split('=', 1) for x in arglist) 66 | -------------------------------------------------------------------------------- /jd/middleware.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import datetime 4 | import utils 5 | 6 | from django.utils.deprecation import MiddlewareMixin 7 | from jd.models import JDVisit 8 | 9 | 10 | class JDVisitMiddleware(MiddlewareMixin): 11 | def process_request(self, request): 12 | page = request.path 13 | if 'runspider' in page and request.method == 'POST': 14 | ip = utils.get_visiter_ip(request) 15 | user_agent = request.META.get('HTTP_USER_AGENT', '') 16 | vt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 17 | jd_url = request.POST.get('url') 18 | visit = JDVisit(id = None, ip = ip, ip_address = '', visit_time = vt, user_agent = user_agent, 19 | jd_url = jd_url, ip_hight_success = '', ip_hight_address = '') 20 | 21 | visit.save() 22 | elif 'randitem' in page and request.method == 'POST': 23 | ip = utils.get_visiter_ip(request) 24 | user_agent = request.META.get('HTTP_USER_AGENT', '') 25 | vt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 26 | jd_url = 'randitem' 27 | visit = JDVisit(id = None, ip = ip, ip_address = '', visit_time = vt, user_agent = user_agent, 28 | jd_url = jd_url, ip_hight_success = '', ip_hight_address = '') 29 | 30 | visit.save() 31 | -------------------------------------------------------------------------------- /jd/middlewares/__init__.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | -------------------------------------------------------------------------------- /jd/middlewares/middlewares.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import logging 4 | 5 | from twisted.internet import defer 6 | from twisted.internet.error import TimeoutError, DNSLookupError, \ 7 | ConnectionRefusedError, ConnectionDone, ConnectError, \ 8 | ConnectionLost, TCPTimedOutError 9 | 10 | from scrapy.exceptions import NotConfigured 11 | from scrapy.utils.response import response_status_message 12 | from scrapy.xlib.tx import ResponseFailed 13 | from scrapy.core.downloader.handlers.http11 import TunnelError 14 | from jd.proxymanager import proxymng 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class ProxyMiddleware(object): 20 | def process_request(self, request, spider): 21 | try: 22 | request.meta['req_count'] = request.meta.get('req_count', 0) + 1 23 | 24 | if request.meta.get('is_proxy', False): 25 | request.meta['proxy'] = proxymng.get_proxy() 26 | except Exception, e: 27 | logging.warning('ProxyMiddleware Exception:%s' % str(e)) 28 | 29 | def process_exception(self, request, exception, spider): 30 | logging.error('process_exception error_request request exception:%s url:%s proxy:%s' % ( 31 | exception, request.url, str(request.meta))) 32 | 33 | if request.meta.get('is_proxy', False): 34 | proxymng.delete_proxy(request.meta.get('proxy')) 35 | request.meta['proxy'] = proxymng.get_proxy() 36 | 37 | return request 38 | 39 | 40 | class CustomRetryMiddleware(object): 41 | # IOError is raised by the HttpCompression middleware when trying to 42 | # decompress an empty response 43 | EXCEPTIONS_TO_RETRY = (defer.TimeoutError, TimeoutError, DNSLookupError, 44 | ConnectionRefusedError, ConnectionDone, ConnectError, 45 | ConnectionLost, TCPTimedOutError, ResponseFailed, 46 | IOError, TunnelError) 47 | 48 | def __init__(self, settings): 49 | if not settings.getbool('RETRY_ENABLED'): 50 | raise NotConfigured 51 | self.max_retry_times = settings.getint('RETRY_TIMES') 52 | self.retry_http_codes = set(int(x) for x in settings.getlist('RETRY_HTTP_CODES')) 53 | # self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST') 54 | self.priority_adjust = 1 55 | 56 | @classmethod 57 | def from_crawler(cls, crawler): 58 | return cls(crawler.settings) 59 | 60 | def process_response(self, request, response, spider): 61 | if request.meta.get('dont_retry', False): 62 | return response 63 | 64 | if response.status in self.retry_http_codes: 65 | reason = response_status_message(response.status) 66 | return self._retry(request, reason, spider) or response 67 | return response 68 | 69 | def process_exception(self, request, exception, spider): 70 | if isinstance(exception, self.EXCEPTIONS_TO_RETRY) and not request.meta.get('dont_retry', False): 71 | return self._retry(request, exception, spider) 72 | 73 | def _retry(self, request, reason, spider): 74 | retries = request.meta.get('retry_times', 0) + 1 75 | 76 | if retries <= self.max_retry_times: 77 | logger.debug("Retrying %(request)s (failed %(retries)d times): %(reason)s", 78 | {'request': request, 'retries': retries, 'reason': reason}, 79 | extra = {'spider': spider}) 80 | retryreq = request.copy() 81 | retryreq.meta['retry_times'] = retries 82 | retryreq.dont_filter = True 83 | retryreq.priority = request.priority + self.priority_adjust 84 | 85 | request.meta['req_count'] = request.meta.get('req_count', 0) + 1 86 | 87 | if retries == self.max_retry_times: 88 | if request.meta.get('is_proxy', False): 89 | proxymng.delete_proxy(retryreq.meta.get('proxy')) 90 | retryreq.meta['proxy'] = proxymng.get_proxy() 91 | 92 | return retryreq 93 | else: 94 | logger.debug("Gave up retrying %(request)s (failed %(retries)d times): %(reason)s", 95 | {'request': request, 'retries': retries, 'reason': reason}, 96 | extra = {'spider': spider}) 97 | -------------------------------------------------------------------------------- /jd/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2017-05-10 01:37 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | initial = True 11 | 12 | dependencies = [ 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='AnalysisUser', 18 | fields=[ 19 | ('id', models.AutoField(primary_key=True, serialize=False)), 20 | ('url', models.CharField(max_length=200, verbose_name='\u6587\u7ae0 url \u5730\u5740')), 21 | ('email', models.EmailField(max_length=254, verbose_name='\u7528\u6237\u7559\u4e0b\u7684 email')), 22 | ('guid', models.CharField(max_length=100, verbose_name='GUID')), 23 | ('ip', models.CharField(max_length=20, verbose_name='\u8bbf\u95ee\u8005\u7684 IP \u5730\u5740')), 24 | ('product_id', models.CharField(max_length=100, verbose_name='\u6839\u636e URL \u63d0\u53d6\u7684 id')), 25 | ('create_time', models.DateTimeField(auto_now=True)), 26 | ], 27 | options={ 28 | 'ordering': ['-create_time'], 29 | 'db_table': 'jd_comment_analysis_user', 30 | }, 31 | ), 32 | migrations.CreateModel( 33 | name='JDCommentAnalysis', 34 | fields=[ 35 | ('id', models.AutoField(primary_key=True, serialize=False)), 36 | ('guid', models.CharField(max_length=100, verbose_name='GUID')), 37 | ('email', models.EmailField(default='', max_length=50, verbose_name='\u7528\u6237\u7559\u4e0b\u7684 email')), 38 | ('product_id', models.BigIntegerField(verbose_name='\u4eac\u4e1c\u5546\u54c1\u7684 id')), 39 | ('item_name', models.CharField(max_length=200, verbose_name='\u4eac\u4e1c\u5546\u57ce\u5546\u54c1\u7684\u540d\u79f0')), 40 | ('content', models.TextField(verbose_name='\u5b8c\u6574\u7684\u5206\u6790\u7ed3\u679c\u5c55\u793a')), 41 | ('create_time', models.DateTimeField(auto_now=True)), 42 | ], 43 | options={ 44 | 'ordering': ['-create_time'], 45 | 'db_table': 'jd_comment_analysis_result', 46 | }, 47 | ), 48 | migrations.CreateModel( 49 | name='JDVisit', 50 | fields=[ 51 | ('id', models.AutoField(primary_key=True, serialize=False)), 52 | ('jd_url', models.CharField(default='', max_length=200, verbose_name='\u4eac\u4e1c\u5546\u57ce\u5546\u54c1\u7684 url \u94fe\u63a5')), 53 | ('ip', models.CharField(max_length=20, verbose_name='\u8bbf\u95ee\u8005\u7684 IP \u5730\u5740')), 54 | ('ip_address', models.CharField(default=None, max_length=200, verbose_name='IP \u5bf9\u5e94\u7684\u5730\u5740')), 55 | ('visit_time', models.DateTimeField(verbose_name='\u8bbf\u95ee\u7684\u65f6\u95f4')), 56 | ('user_agent', models.TextField(default='', max_length=1000, verbose_name='\u8bbf\u95ee\u8005\u7684 HTTP_USER_AGENT')), 57 | ('ip_hight_success', models.CharField(default='', max_length=10, verbose_name='\u67e5\u8be2 IP \u9ad8\u7cbe\u5ea6\u5b9a\u4f4d\u662f\u5426\u6210\u529f')), 58 | ('ip_hight_address', models.CharField(default='', max_length=200, verbose_name='IP \u5bf9\u5e94\u7684\u9ad8\u7cbe\u5ea6\u5730\u5740')), 59 | ('ip_confidence', models.FloatField(default=0, verbose_name='IP \u9ad8\u7cbe\u5ea6\u67e5\u8be2\u7ed3\u679c\u7684\u53ef\u4fe1\u5ea6')), 60 | ('ip_hight_radius', models.IntegerField(default=-1, verbose_name='IP \u9ad8\u7cbe\u5ea6\u67e5\u8be2\u7ed3\u679c\u7684\u504f\u79fb\u534a\u5f84')), 61 | ('ip_hight_lat', models.FloatField(default=-1, verbose_name='IP \u9ad8\u7cbe\u5ea6\u67e5\u8be2\u7ecf\u5ea6')), 62 | ('ip_hight_long', models.FloatField(default=-1, verbose_name='IP \u9ad8\u7cbe\u5ea6\u67e5\u8be2\u7eac\u5ea6')), 63 | ], 64 | options={ 65 | 'db_table': 'jd_visit', 66 | }, 67 | ), 68 | ] 69 | -------------------------------------------------------------------------------- /jd/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/migrations/__init__.py -------------------------------------------------------------------------------- /jd/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | from django.db import models 5 | 6 | 7 | # Create your models here. 8 | class JDVisit(models.Model): 9 | id = models.AutoField(primary_key = True, name = 'id') 10 | jd_url = models.CharField(max_length = 200, name = 'jd_url', verbose_name = '京东商城商品的 url 链接', default = '') 11 | ip = models.CharField(max_length = 20, name = 'ip', verbose_name = '访问者的 IP 地址') 12 | ip_address = models.CharField(max_length = 200, name = 'ip_address', verbose_name = 'IP 对应的地址', default = None) 13 | visit_time = models.DateTimeField(name = 'visit_time', verbose_name = '访问的时间') 14 | user_agent = models.TextField(max_length = 1000, name = 'user_agent', verbose_name = '访问者的 HTTP_USER_AGENT', 15 | default = '') 16 | 17 | ip_hight_success = models.CharField(max_length = 10, name = 'ip_hight_success', verbose_name = '查询 IP 高精度定位是否成功', 18 | default = '') 19 | ip_hight_address = models.CharField(max_length = 200, name = 'ip_hight_address', verbose_name = 'IP 对应的高精度地址', 20 | default = '') 21 | ip_confidence = models.FloatField(name = 'ip_confidence', verbose_name = 'IP 高精度查询结果的可信度', default = 0) 22 | ip_hight_radius = models.IntegerField(name = 'ip_hight_radius', verbose_name = 'IP 高精度查询结果的偏移半径', default = -1) 23 | ip_hight_lat = models.FloatField(name = 'ip_hight_lat', verbose_name = 'IP 高精度查询经度', default = -1) 24 | ip_hight_long = models.FloatField(name = 'ip_hight_long', verbose_name = 'IP 高精度查询纬度', default = -1) 25 | 26 | class Meta: 27 | db_table = 'jd_visit' 28 | 29 | 30 | # Create your models here. 31 | class AnalysisUser(models.Model): 32 | id = models.AutoField(primary_key = True, name = 'id') 33 | url = models.CharField(max_length = 200, name = 'url', verbose_name = '文章 url 地址') 34 | email = models.EmailField(name = 'email', verbose_name = '用户留下的 email') 35 | guid = models.CharField(max_length = 100, name = 'guid', verbose_name = 'GUID') 36 | ip = models.CharField(max_length = 20, name = 'ip', verbose_name = '访问者的 IP 地址') 37 | product_id = models.CharField(max_length = 100, name = 'product_id', verbose_name = '根据 URL 提取的 id') 38 | create_time = models.DateTimeField(name = 'create_time', auto_now = True) 39 | 40 | class Meta: 41 | db_table = 'jd_comment_analysis_user' 42 | ordering = ['-create_time'] 43 | 44 | 45 | class JDCommentAnalysis(models.Model): 46 | id = models.AutoField(primary_key = True, name = 'id') 47 | guid = models.CharField(max_length = 100, name = 'guid', verbose_name = 'GUID') 48 | email = models.EmailField(max_length = 50, name = 'email', verbose_name = '用户留下的 email', default = '') 49 | product_id = models.BigIntegerField(name = 'product_id', verbose_name = '京东商品的 id') 50 | item_name = models.CharField(max_length = 200, name = 'item_name', verbose_name = '京东商城商品的名称') 51 | content = models.TextField(name = 'content', verbose_name = '完整的分析结果展示') 52 | create_time = models.DateTimeField(name = 'create_time', auto_now = True) 53 | 54 | class Meta: 55 | db_table = 'jd_comment_analysis_result' 56 | ordering = ['-create_time'] 57 | -------------------------------------------------------------------------------- /jd/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class CommentPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /jd/proxymanager.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import logging 4 | import requests 5 | import json 6 | import time 7 | import utils 8 | import config 9 | 10 | 11 | class ProxyManager(object): 12 | def __init__(self): 13 | self.proxy_key = 'proxies' 14 | 15 | self.address = config.proxy_address 16 | self.db_name = 'jd' 17 | self.update_time = 0 18 | 19 | self.red = None 20 | 21 | def update_proxy(self, count = 100): 22 | try: 23 | r = requests.get(url = '%sselect?name=%s&order=save_time&sort=desc&count=%s' % 24 | (self.address, self.db_name, count), timeout = 20) 25 | data = json.loads(r.text) 26 | for item in data: 27 | proxy = 'http://%s:%s' % (item.get('ip'), item.get('port')) 28 | self.red.rpush(self.proxy_key, proxy) 29 | 30 | self.update_time = time.time() 31 | utils.log('*****************proxy manager proxys:****************\n%s' % (r.text)) 32 | except Exception, e: 33 | logging.exception('proxymanager update_proxy msg:%s' % e) 34 | 35 | def push_proxy(self, proxy): 36 | self.red.rpush(self.proxy_key, proxy) 37 | 38 | def get_proxy(self): 39 | if self.red.llen(self.proxy_key) <= 10: 40 | self.update_proxy() 41 | 42 | # 十分钟换一拨 IP 43 | if time.time() - self.update_time >= 600: 44 | self.update_proxy(count = 50) 45 | 46 | proxy = self.red.lpop(self.proxy_key) 47 | return proxy 48 | 49 | def delete_proxy(self, proxy): 50 | try: 51 | rets = proxy.split(':') 52 | ip = rets[1] 53 | ip = ip[2:] 54 | 55 | utils.log('--------------delete ip:%s-----------' % ip) 56 | r = requests.get(url = '%sdelete?name=%s&ip=%s' % (self.address, self.db_name, ip)) 57 | return r.text 58 | except: 59 | return False 60 | 61 | 62 | proxymng = ProxyManager() 63 | -------------------------------------------------------------------------------- /jd/send_email.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import smtplib 4 | import logging 5 | import config 6 | 7 | from cus_exception import CusException 8 | from email.mime.text import MIMEText 9 | from email.mime.multipart import MIMEMultipart 10 | 11 | 12 | def send_email(to_email, subject, body): 13 | try: 14 | logging.debug('send_email start...') 15 | 16 | msg = MIMEMultipart() 17 | msg['From'] = config.self_email 18 | msg['To'] = to_email 19 | msg['Subject'] = subject 20 | 21 | msg.attach(MIMEText(body, 'plain')) 22 | 23 | if config.email_type == 'gmail': # gmail send 24 | server = smtplib.SMTP('smtp.gmail.com:587') 25 | elif config.email_type == 'qq': # qq send 26 | server = smtplib.SMTP_SSL('smtp.qq.com', 465) 27 | else: # default gmail 28 | server = smtplib.SMTP('smtp.gmail.com:587') 29 | 30 | server.set_debuglevel(1) 31 | server.ehlo() 32 | server.starttls() 33 | server.login(config.self_email, config.self_password) 34 | server.sendmail(config.self_email, to_email, msg.as_string()) 35 | server.quit() 36 | logging.debug('send_email success...') 37 | return True 38 | except Exception, e: 39 | logging.exception('send_email exception msg:%s' % e) 40 | raise CusException('send_email', 'send_email error msg:%s' % e) 41 | -------------------------------------------------------------------------------- /jd/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for jd project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'jd' 13 | 14 | SPIDER_MODULES = ['jd.spiders'] 15 | NEWSPIDER_MODULE = 'jd.spiders' 16 | 17 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 18 | #USER_AGENT = 'jd (+http://www.yourdomain.com)' 19 | 20 | # Obey robots.txt rules 21 | ROBOTSTXT_OBEY = False 22 | 23 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 24 | #CONCURRENT_REQUESTS = 32 25 | 26 | # Configure a delay for requests for the same website (default: 0) 27 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 28 | # See also autothrottle settings and docs 29 | # DOWNLOAD_DELAY = 3 30 | # The download delay setting will honor only one of: 31 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 32 | #CONCURRENT_REQUESTS_PER_IP = 16 33 | 34 | # Disable cookies (enabled by default) 35 | COOKIES_ENABLED = False 36 | 37 | # Disable Telnet Console (enabled by default) 38 | #TELNETCONSOLE_ENABLED = False 39 | 40 | # Override the default request headers: 41 | #DEFAULT_REQUEST_HEADERS = { 42 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 43 | # 'Accept-Language': 'en', 44 | #} 45 | 46 | # Enable or disable spider middlewares 47 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 48 | # SPIDER_MIDDLEWARES = { 49 | # 'jd.middlewares.cookies.CookiesMiddleware': 543, 50 | # } 51 | 52 | # Enable or disable downloader middlewares 53 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 54 | DOWNLOADER_MIDDLEWARES = { 55 | # 'jd.middlewares.middlewares.ProxyMiddleware': 543, 56 | 'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': None, 57 | 'jd.middlewares.middlewares.ProxyMiddleware': 100, 58 | 'jd.middlewares.middlewares.CustomRetryMiddleware': 500, 59 | } 60 | 61 | # Enable or disable extensions 62 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 63 | #EXTENSIONS = { 64 | # 'scrapy.extensions.telnet.TelnetConsole': None, 65 | #} 66 | 67 | # Configure item pipelines 68 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 69 | #ITEM_PIPELINES = { 70 | # 'jd.pipelines.SomePipeline': 300, 71 | #} 72 | 73 | # Enable and configure the AutoThrottle extension (disabled by default) 74 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 75 | #AUTOTHROTTLE_ENABLED = True 76 | # The initial download delay 77 | #AUTOTHROTTLE_START_DELAY = 5 78 | # The maximum download delay to be set in case of high latencies 79 | # AUTOTHROTTLE_MAX_DELAY = 60 80 | # The average number of requests Scrapy should be sending in parallel to 81 | # each remote server 82 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 83 | # Enable showing throttling stats for every response received: 84 | #AUTOTHROTTLE_DEBUG = False 85 | 86 | # Enable and configure HTTP caching (disabled by default) 87 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 88 | #HTTPCACHE_ENABLED = True 89 | #HTTPCACHE_EXPIRATION_SECS = 0 90 | #HTTPCACHE_DIR = 'httpcache' 91 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 92 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 93 | 94 | # COOKIES_ENABLED = True 95 | # COOKIES_DEBUG = True 96 | 97 | 98 | INNER_CRAWL_PAGE = 50 99 | DOWNLOAD_TIMEOUT = 20 100 | 101 | LOG_ENABLED = True 102 | # LOG_FILE = 'jd_comment.log' 103 | 104 | RETRY_HTTP_CODES = [500, 502, 503, 504, 408, 403] 105 | RETRY_TIMES = 1 106 | -------------------------------------------------------------------------------- /jd/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /jd/spiders/jd_comment.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import sys 4 | import chardet 5 | import re 6 | import json 7 | import datetime 8 | import logging 9 | import config 10 | import utils 11 | 12 | from scrapy import Spider 13 | from scrapy import Request 14 | from ..proxymanager import proxymng 15 | 16 | reload(sys) 17 | sys.setdefaultencoding('utf-8') 18 | 19 | 20 | # python manage.py runspider -a url=https://item.jd.com/11478178241.html -a name=jd 21 | class JDCommentSpider(Spider): 22 | name = 'jd_comment' 23 | 24 | def __init__(self, name = None, **kwargs): 25 | super(JDCommentSpider, self).__init__(name, **kwargs) 26 | self.url = kwargs.get("url") 27 | self.guid = kwargs.get('guid', 'guid') 28 | self.product_id = kwargs.get('product_id') 29 | # self.url = 'https://item.jd.com/11478178241.html' 30 | # self.url = 'https://item.jd.com/4142680.html' 31 | # self.url = 'https://item.jd.com/3133859.html' 32 | # self.url = 'https://item.jd.com/3995645.html' 33 | # self.product_id = 3995645 34 | self.log('product_id:%s' % self.product_id) 35 | self.item_table = 'item_%s' % self.product_id 36 | self.urls_key = '%s_urls' % self.product_id 37 | 38 | self.log_dir = 'log/%s' % self.product_id 39 | self.is_record_page = False 40 | 41 | self.sql = kwargs.get('sql') 42 | self.red = kwargs.get('red') 43 | proxymng.red = self.red 44 | 45 | if self.is_record_page: 46 | utils.make_dir(self.log_dir) 47 | 48 | self.init() 49 | 50 | def init(self): 51 | command = ( 52 | "CREATE TABLE IF NOT EXISTS {} (" 53 | "`id` BIGINT (15) NOT NULL AUTO_INCREMENT," # 评论的 id 54 | "`content` TEXT NOT NULL," # 评论的内容 55 | "`creation_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP," # 评论创建的时间 56 | "`reply_count` INT(4) DEFAULT NULL ," # 回复数量 57 | "`score` INT(2) DEFAULT NULL," # 评星 58 | "`useful_vote_count` INT(5) DEFAULT NULL," # 其他用户觉得有用的数量 59 | "`useless_vote_count` INT(4) DEFAULT NULL," # 其他用户觉得无用的数量 60 | "`user_level_id` INT(4) DEFAULT NULL," # 评论用户等级的 id 61 | '`user_province` CHAR(8) DEFAULT NULL,' # 用户的省份 62 | '`nickname` CHAR(20) DEFAULT NULL,' # 评论用户的昵称 63 | '`product_color` CHAR(50) DEFAULT NULL,' # 商品的颜色 64 | "`product_size` CHAR(50) DEFAULT NULL," # 商品的大小 65 | "`user_level_name` CHAR(20) DEFAULT NULL," # 评论用户的等级 66 | "`user_client` INT(5) DEFAULT NULL," # 用户评价平台 67 | "`user_client_show` CHAR(20) DEFAULT NULL," # 用户评价平台 68 | "`is_mobile` INT (3) DEFAULT NULL," # 是否是在移动端完成的评价 69 | "`days` INT(3) DEFAULT NULL," # 购买后评论的天数 70 | "`reference_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP," # 购买的时间 71 | "`after_days` INT(3) DEFAULT NULL," # 购买后再次评论的天数 72 | "`images_count` INT(3) DEFAULT NULL," # 评论总图片的数量 73 | "`ip` CHAR(20) DEFAULT NULL," # 再次评论时的 ip 地址 74 | "`after_content` TEXT DEFAULT NULL," # 再次评论的内容 75 | "`save_time` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP," # 抓取数据的时间 76 | "PRIMARY KEY(id)" 77 | ") ENGINE=InnoDB".format(self.item_table)) 78 | self.sql.create_table(command) 79 | 80 | def start_requests(self): 81 | while True: 82 | info = self.red.lpop(self.urls_key) 83 | if info == None: 84 | break 85 | 86 | data = json.loads(info) 87 | url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv' \ 88 | '{comment_version}&productId={product_id}&score=0&sortType={sort_type}&page={page}&' \ 89 | 'pageSize=10&isShadowSku=0'. \ 90 | format(product_id = data.get('product_id'), comment_version = data.get('comment_version'), 91 | sort_type = data.get('sort_type'), page = data.get('page')) 92 | 93 | # self.log(url) 94 | yield Request( 95 | url = url, 96 | headers = { 97 | 'Accept': '*/*', 98 | 'Accept-Encoding': 'gzip, deflate, br', 99 | 'Accept-Language': 'en-US,en;q=0.5', 100 | 'Connection': 'keep-alive', 101 | 'Host': 'club.jd.com', 102 | 'Referer': 'https://item.jd.com/%s.html' % self.product_id, 103 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 ' 104 | 'Firefox/52.0', 105 | }, 106 | method = 'GET', 107 | meta = { 108 | 'page': data.get('page'), 109 | 'is_proxy': config.is_proxy, 110 | 'handle_httpstatus_list': [302, 403, 301, 404, 500, 405, 502], 111 | 'info': info 112 | }, 113 | dont_filter = True, 114 | callback = self.parse_comment, 115 | errback = self.error_request, 116 | ) 117 | 118 | def parse_comment(self, response): 119 | self.save_page('%s_%s.html' % (self.product_id, response.meta.get('page')), response.body) 120 | if response.body == None or response.body == '': 121 | self.red.rpush(self.urls_key, response.meta.get('info')) 122 | self.log('parse_comment parse NULL DATA:%s' % response.url) 123 | return 124 | 125 | try: 126 | detect = chardet.detect(response.body) 127 | encoding = detect.get('encoding', '') 128 | body = response.body.decode(encoding, 'ignore') 129 | 130 | pattern = re.compile('\((.*?)\);', re.S) 131 | item = re.search(pattern, body) 132 | if item != None and item.group(1) != None: 133 | data = json.loads(item.group(1)) 134 | comments = data.get('comments', []) 135 | for comment in comments: 136 | id = comment.get('id') # 评论的 id 137 | content = comment.get('content') # 评论的内容 138 | creation_time = comment.get('creationTime', '') # 评论创建的时间 139 | reply_count = comment.get('replyCount', '') # 回复数量 140 | score = comment.get('score', '') # 评星 141 | useful_vote_count = comment.get('usefulVoteCount', '') # 其他用户觉得有用的数量 142 | useless_vote_count = comment.get('uselessVoteCount', '') # 其他用户觉得无用的数量 143 | user_level_id = comment.get('userLevelId', '') # 评论用户等级的 id 144 | user_province = comment.get('userProvince', '') # 用户的省份 145 | nickname = comment.get('nickname', '') # 评论用户的昵称 146 | product_color = comment.get('productColor', '') # 商品的颜色 147 | product_size = comment.get('productSize', '') # 商品的大小 148 | user_level_name = comment.get('userLevelName', '') # 评论用户的等级 149 | user_client = comment.get('userClient', '') # 用户评价平台 150 | user_client_show = comment.get('userClientShow', '') # 用户评价平台 151 | is_mobile = comment.get('isMobile', '') # 是否是在移动端完成的评价 152 | days = comment.get('days', '') # 购买后评论的天数 153 | reference_time = comment.get('referenceTime', '') # 购买的时间 154 | after_days = comment.get('afterDays', '') # 购买后再次评论的天数 155 | images_count = len(comment.get('images', [])) # 评论总图片的数量 156 | after_user_comment = comment.get('afterUserComment', '') 157 | if after_user_comment != '' and after_user_comment != None: 158 | ip = after_user_comment.get('ip', '') # 再次评论的 ip 地址 159 | 160 | h_after_user_comment = after_user_comment.get('hAfterUserComment', '') 161 | after_content = h_after_user_comment.get('content', '') # 再次评论的内容 162 | else: 163 | ip = '' 164 | after_content = '' 165 | 166 | content = content.replace('\'', '') 167 | after_content = after_content.replace('\'', '') 168 | 169 | msg = { 170 | 'id': id, 171 | 'content': content, 172 | 'creation_time': creation_time, 173 | 'reply_count': reply_count, 174 | 'score': score, 175 | 'useful_vote_count': useful_vote_count, 176 | 'useless_vote_count': useless_vote_count, 177 | 'user_level_id': user_level_id, 178 | 'user_province': user_province, 179 | 'nickname': nickname, 180 | 'product_color': product_color, 181 | 'product_size': product_size, 182 | 'user_level_name': user_level_name, 183 | 'user_client': user_client, 184 | 'user_client_show': user_client_show, 185 | 'is_mobile': is_mobile, 186 | 'days': days, 187 | 'reference_time': reference_time, 188 | 'after_days': after_days, 189 | 'images_count': images_count, 190 | 'ip': ip, 191 | 'after_content': after_content, 192 | 'save_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 193 | } 194 | 195 | self.sql.insert_json(msg, self.item_table) 196 | self.sql.commit() 197 | proxymng.push_proxy(response.meta.get('proxy')) 198 | except Exception, e: 199 | self.red.rpush(self.urls_key, response.meta.get('info')) 200 | self.logger.error('parse_comment parse Exception msg:%s url:%s' % (e, response.url)) 201 | 202 | def error_request(self, failure): 203 | request = failure.request 204 | proxy = failure.request.meta.get('proxy') 205 | 206 | self.red.rpush(self.urls_key, request.url) 207 | self.logger.exception('error_request proxy:%s url:%s' % (proxy, request.url)) 208 | 209 | def save_page(self, filename, data): 210 | if self.is_record_page: 211 | with open('%s/%s' % (self.log_dir, filename), 'w') as f: 212 | f.write(data) 213 | f.close() 214 | 215 | def close(spider, reason): 216 | # 事务提交数据 217 | spider.sql.commit() 218 | -------------------------------------------------------------------------------- /jd/spiders/jd_item_info.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import sys 4 | import chardet 5 | import re 6 | import json 7 | import datetime 8 | import config 9 | import utils 10 | import redis 11 | import time 12 | 13 | from scrapy.http.cookies import CookieJar 14 | from scrapy.utils.project import get_project_settings 15 | from scrapy import Spider 16 | from scrapy import Request 17 | from sqlhelper import SqlHelper 18 | 19 | reload(sys) 20 | sys.setdefaultencoding('utf-8') 21 | 22 | 23 | # python manage.py runspider -a url=https://item.jd.com/11478178241.html -a name=jd 24 | class JDItemInfoSpider(Spider): 25 | name = 'jd_item_info' 26 | 27 | def __init__(self, name = None, **kwargs): 28 | super(JDItemInfoSpider, self).__init__(name, **kwargs) 29 | self.url = kwargs.get("url") 30 | self.guid = kwargs.get('guid', 'guid') 31 | self.product_id = kwargs.get('product_id') 32 | # self.url = 'https://item.jd.com/11478178241.html' 33 | # self.url = 'https://item.jd.com/4142680.html' 34 | # self.url = 'https://item.jd.com/3133859.html' 35 | # self.url = 'https://item.jd.com/3995645.html' 36 | # self.product_id = 3995645 37 | self.log('product_id:%s' % self.product_id) 38 | self.item_table = 'item_%s' % self.product_id 39 | self.urls_key = '%s_urls' % self.product_id 40 | 41 | self.log_dir = 'log/%s' % self.product_id 42 | self.is_record_page = False 43 | 44 | self.sql = kwargs.get('sql') 45 | self.red = kwargs.get('red') 46 | 47 | if self.is_record_page: 48 | utils.make_dir(self.log_dir) 49 | 50 | def start_requests(self): 51 | yield Request( 52 | url = self.url, 53 | headers = { 54 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 55 | 'Accept-Encoding': 'gzip, deflate, br', 56 | 'Accept-Language': 'en-US,en;q=0.5', 57 | 'Connection': 'keep-alive', 58 | 'Host': 'item.jd.com', 59 | 'Upgrade-Insecure-Requests': '1', 60 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 ' 61 | 'Firefox/52.0', 62 | }, 63 | method = 'GET', 64 | meta = { 65 | 'dont_merge_cookies': True, 66 | 'cookiejar': CookieJar(), 67 | }, 68 | dont_filter = True, 69 | callback = self.get_comment_count, 70 | ) 71 | 72 | def get_comment_count(self, response): 73 | self.save_page('%s.html' % self.product_id, response.body) 74 | 75 | name = response.xpath('//div[@class="p-img"]/a/img/@alt').extract_first() 76 | self.log('name:%s' % name) 77 | 78 | ids = response.xpath('//div[@class="dd"]/div/@data-sku').extract() 79 | item_ids = ','.join(ids) 80 | self.log('item_ids:%s' % item_ids) 81 | 82 | pattern = re.compile('commentVersion:\'(\d+)\'', re.S) 83 | comment_version = re.search(pattern, response.body).group(1) 84 | 85 | # sort type 5:推荐排序 6:时间排序 86 | url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv' \ 87 | '{comment_version}&productId={product_id}&score=0&sortType={sort_type}&page=0&pageSize=10' \ 88 | '&isShadowSku=0'. \ 89 | format(product_id = self.product_id, comment_version = comment_version, sort_type = '6') 90 | 91 | yield Request( 92 | url = url, 93 | headers = { 94 | 'Accept': '*/*', 95 | 'Accept-Encoding': 'gzip, deflate, br', 96 | 'Accept-Language': 'en-US,en;q=0.5', 97 | 'Connection': 'keep-alive', 98 | 'Host': 'club.jd.com', 99 | 'Referer': 'https://item.jd.com/%s.html' % self.product_id, 100 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 ' 101 | 'Firefox/52.0', 102 | }, 103 | method = 'GET', 104 | meta = { 105 | 'name': name, 106 | 'comment_version': comment_version, 107 | 'item_ids': item_ids 108 | }, 109 | dont_filter = True, 110 | callback = self.get_all_comment 111 | ) 112 | 113 | def get_all_comment(self, response): 114 | self.save_page('%s_all_comment.html' % self.product_id, response.body) 115 | 116 | detect = chardet.detect(response.body) 117 | encoding = detect.get('encoding', '') 118 | body = response.body.decode(encoding, 'ignore') 119 | pattern = re.compile('\((.*?)\);', re.S) 120 | item = re.search(pattern, body) 121 | if item != None and item.group(1) != None: 122 | data = json.loads(item.group(1)) 123 | # productCommentSummary 124 | pcs = data.get('productCommentSummary') 125 | self.product_msg = { 126 | 'id': self.product_id, 127 | 'name': response.meta.get('name'), 128 | 'good_rate_show': pcs.get('goodRateShow'), 129 | 'poor_rate_show': pcs.get('poorRateShow'), 130 | 'average_score': pcs.get('averageScore'), 131 | 'good_count': pcs.get('goodCount'), 132 | 'general_rate': pcs.get('generalRate'), 133 | 'general_count': pcs.get('generalCount'), 134 | 'poor_rate': pcs.get('poorRate'), 135 | 'after_count': pcs.get('afterCount'), 136 | 'good_rate_style': pcs.get('goodRateStyle'), 137 | 'poor_count': pcs.get('poorCount'), 138 | 'poor_rate_style': pcs.get('poorRateStyle'), 139 | 'general_rate_style': pcs.get('generalRateStyle'), 140 | 'comment_count': pcs.get('commentCount'), 141 | 'product_id': pcs.get('productId'), 142 | 'good_rate': pcs.get('goodRate'), 143 | 'general_rate_show': pcs.get('generalRateShow'), 144 | 'url': self.url, 145 | 'item_ids': response.meta.get('item_ids'), 146 | 'save_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 147 | } 148 | 149 | self.sql.insert_json(self.product_msg, config.jd_item_table, commit = True) 150 | 151 | comment_version = response.meta.get('comment_version') 152 | comment_count = int(pcs.get('commentCount')) 153 | 154 | page_count = comment_count / 10 + (10 if comment_count < 10000 else 100) # 这里为什么加 10 or 100? 155 | 156 | # 如果存在表,而且 redis 中有数据,则是抓取中断了,不需要重新插入数据 157 | if self.sql.is_exists(self.item_table): 158 | if self.red.llen(self.urls_key) > 0: # 抓取中断了,不需要重新插入数据 159 | page_count = 0 160 | else: # redis 中没有数据,则比较数据库中的值是否足够,如果不够则重新抓取 161 | command = "SELECT COUNT(*) FROM {}".format(self.item_table) 162 | (count,) = self.sql.query_one(command, commit = False) 163 | self.log('count:%s comment_count:%s' % (count, comment_count)) 164 | if count < comment_count: # 如果不够有两种情况,第一是需要完整的重新抓取,第二是只需要抓取一部分 165 | if count <= 3000 and comment_count > count: # 重新抓取,可能是之前抓取过 166 | self.log('count <= 3000 and comment_count > count') 167 | pass 168 | elif comment_count > count: # 只抓取增量 169 | self.log('comment_count > count') 170 | page_count = (comment_count - count) / 10 + 1 171 | else: 172 | page_count = 0 173 | 174 | self.log('page_count:%s' % page_count) 175 | for i in range(page_count): 176 | # sort type 5:推荐排序 6:时间排序 177 | # url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv' \ 178 | # '{comment_version}&productId={product_id}&score=0&sortType={sort_type}&page={page}&' \ 179 | # 'pageSize=10&isShadowSku=0'. \ 180 | # format(product_id = self.product_id, comment_version = comment_version, sort_type = '6', 181 | # page = i) 182 | # url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv1157 183 | # &productId=3133867&score=0&sortType=5&page=12&pageSize=10&isShadowSku=0' 184 | 185 | data = { 186 | 'product_id': self.product_id, 187 | 'page': i, 188 | 'comment_version': comment_version, 189 | 'sort_type': 6 190 | } 191 | 192 | self.red.rpush(self.urls_key, json.dumps(data)) 193 | 194 | def save_page(self, filename, data): 195 | if self.is_record_page: 196 | with open('%s/%s' % (self.log_dir, filename), 'w') as f: 197 | f.write(data) 198 | f.close() 199 | 200 | def close(spider, reason): 201 | if spider.product_msg != None: 202 | spider.sql.insert_json(spider.product_msg, config.jd_item_table) 203 | 204 | # 事务提交数据 205 | spider.sql.commit() 206 | -------------------------------------------------------------------------------- /jd/static/assets/css/ie8.css: -------------------------------------------------------------------------------- 1 | /* 2 | Strata by HTML5 UP 3 | html5up.net | @ajlkn 4 | Free for personal and commercial use under the CCA 3.0 license (html5up.net/license) 5 | */ 6 | 7 | /* Button */ 8 | 9 | input[type="submit"], 10 | input[type="reset"], 11 | input[type="button"], 12 | .button { 13 | position: relative; 14 | -ms-behavior: url("assets/js/ie/PIE.htc"); 15 | } 16 | 17 | /* Form */ 18 | 19 | input[type="text"], 20 | input[type="password"], 21 | input[type="email"], 22 | select, 23 | textarea { 24 | position: relative; 25 | -ms-behavior: url("assets/js/ie/PIE.htc"); 26 | } 27 | 28 | input[type="text"], 29 | input[type="password"], 30 | input[type="email"], 31 | select { 32 | height: 2.75em; 33 | line-height: 2.75em; 34 | } 35 | 36 | input[type="checkbox"] + label:before, 37 | input[type="radio"] + label:before { 38 | display: none; 39 | } 40 | 41 | /* Image */ 42 | 43 | .image { 44 | position: relative; 45 | -ms-behavior: url("assets/js/ie/PIE.htc"); 46 | } 47 | 48 | .image:before, .image:after { 49 | display: none !important; 50 | } 51 | 52 | .image img { 53 | position: relative; 54 | -ms-behavior: url("assets/js/ie/PIE.htc"); 55 | } 56 | 57 | /* Header */ 58 | 59 | #header { 60 | background-image: url("../../images/bg.jpg"); 61 | background-repeat: no-repeat; 62 | background-size: cover; 63 | -ms-behavior: url("assets/js/ie/backgroundsize.min.htc"); 64 | } 65 | 66 | #header h1 { 67 | color: #ffffff; 68 | } 69 | 70 | /* Footer */ 71 | 72 | #footer .icons a { 73 | color: #ffffff; 74 | } -------------------------------------------------------------------------------- /jd/static/assets/css/images/overlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/assets/css/images/overlay.png -------------------------------------------------------------------------------- /jd/static/assets/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/assets/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /jd/static/assets/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/assets/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /jd/static/assets/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/assets/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /jd/static/assets/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/assets/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /jd/static/assets/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/assets/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /jd/static/assets/js/ie/backgroundsize.min.htc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /jd/static/assets/js/ie/html5shiv.js: -------------------------------------------------------------------------------- 1 | /* 2 | HTML5 Shiv v3.6.2 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | (function(l,f){function m(){var a=e.elements;return"string"==typeof a?a.split(" "):a}function i(a){var b=n[a[o]];b||(b={},h++,a[o]=h,n[h]=b);return b}function p(a,b,c){b||(b=f);if(g)return b.createElement(a);c||(c=i(b));b=c.cache[a]?c.cache[a].cloneNode():r.test(a)?(c.cache[a]=c.createElem(a)).cloneNode():c.createElem(a);return b.canHaveChildren&&!s.test(a)?c.frag.appendChild(b):b}function t(a,b){if(!b.cache)b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag(); 5 | a.createElement=function(c){return!e.shivMethods?b.createElem(c):p(c,a,b)};a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+m().join().replace(/\w+/g,function(a){b.createElem(a);b.frag.createElement(a);return'c("'+a+'")'})+");return n}")(e,b.frag)}function q(a){a||(a=f);var b=i(a);if(e.shivCSS&&!j&&!b.hasCSS){var c,d=a;c=d.createElement("p");d=d.getElementsByTagName("head")[0]||d.documentElement;c.innerHTML="x"; 6 | c=d.insertBefore(c.lastChild,d.firstChild);b.hasCSS=!!c}g||t(a,b);return a}var k=l.html5||{},s=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,r=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,j,o="_html5shiv",h=0,n={},g;(function(){try{var a=f.createElement("a");a.innerHTML="";j="hidden"in a;var b;if(!(b=1==a.childNodes.length)){f.createElement("a");var c=f.createDocumentFragment();b="undefined"==typeof c.cloneNode|| 7 | "undefined"==typeof c.createDocumentFragment||"undefined"==typeof c.createElement}g=b}catch(d){g=j=!0}})();var e={elements:k.elements||"abbr article aside audio bdi canvas data datalist details figcaption figure footer header hgroup main mark meter nav output progress section summary time video",version:"3.6.2",shivCSS:!1!==k.shivCSS,supportsUnknownElements:g,shivMethods:!1!==k.shivMethods,type:"default",shivDocument:q,createElement:p,createDocumentFragment:function(a,b){a||(a=f);if(g)return a.createDocumentFragment(); 8 | for(var b=b||i(a),c=b.frag.cloneNode(),d=0,e=m(),h=e.length;d #mq-test-1 { width: 42px; }',c.insertBefore(e,d),b=42===f.offsetWidth,c.removeChild(e),{matches:b,media:a}}}(a.document)}(this),function(a){"use strict";function b(){v(!0)}var c={};a.respond=c,c.update=function(){};var d=[],e=function(){var b=!1;try{b=new a.XMLHttpRequest}catch(c){b=new a.ActiveXObject("Microsoft.XMLHTTP")}return function(){return b}}(),f=function(a,b){var c=e();c&&(c.open("GET",a,!0),c.onreadystatechange=function(){4!==c.readyState||200!==c.status&&304!==c.status||b(c.responseText)},4!==c.readyState&&c.send(null))},g=function(a){return a.replace(c.regex.minmaxwh,"").match(c.regex.other)};if(c.ajax=f,c.queue=d,c.unsupportedmq=g,c.regex={media:/@media[^\{]+\{([^\{\}]*\{[^\}\{]*\})+/gi,keyframes:/@(?:\-(?:o|moz|webkit)\-)?keyframes[^\{]+\{(?:[^\{\}]*\{[^\}\{]*\})+[^\}]*\}/gi,comments:/\/\*[^*]*\*+([^/][^*]*\*+)*\//gi,urls:/(url\()['"]?([^\/\)'"][^:\)'"]+)['"]?(\))/g,findStyles:/@media *([^\{]+)\{([\S\s]+?)$/,only:/(only\s+)?([a-zA-Z]+)\s?/,minw:/\(\s*min\-width\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/,maxw:/\(\s*max\-width\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/,minmaxwh:/\(\s*m(in|ax)\-(height|width)\s*:\s*(\s*[0-9\.]+)(px|em)\s*\)/gi,other:/\([^\)]*\)/g},c.mediaQueriesSupported=a.matchMedia&&null!==a.matchMedia("only all")&&a.matchMedia("only all").matches,!c.mediaQueriesSupported){var h,i,j,k=a.document,l=k.documentElement,m=[],n=[],o=[],p={},q=30,r=k.getElementsByTagName("head")[0]||l,s=k.getElementsByTagName("base")[0],t=r.getElementsByTagName("link"),u=function(){var a,b=k.createElement("div"),c=k.body,d=l.style.fontSize,e=c&&c.style.fontSize,f=!1;return b.style.cssText="position:absolute;font-size:1em;width:1em",c||(c=f=k.createElement("body"),c.style.background="none"),l.style.fontSize="100%",c.style.fontSize="100%",c.appendChild(b),f&&l.insertBefore(c,l.firstChild),a=b.offsetWidth,f?l.removeChild(c):c.removeChild(b),l.style.fontSize=d,e&&(c.style.fontSize=e),a=j=parseFloat(a)},v=function(b){var c="clientWidth",d=l[c],e="CSS1Compat"===k.compatMode&&d||k.body[c]||d,f={},g=t[t.length-1],p=(new Date).getTime();if(b&&h&&q>p-h)return a.clearTimeout(i),i=a.setTimeout(v,q),void 0;h=p;for(var s in m)if(m.hasOwnProperty(s)){var w=m[s],x=w.minw,y=w.maxw,z=null===x,A=null===y,B="em";x&&(x=parseFloat(x)*(x.indexOf(B)>-1?j||u():1)),y&&(y=parseFloat(y)*(y.indexOf(B)>-1?j||u():1)),w.hasquery&&(z&&A||!(z||e>=x)||!(A||y>=e))||(f[w.media]||(f[w.media]=[]),f[w.media].push(n[w.rules]))}for(var C in o)o.hasOwnProperty(C)&&o[C]&&o[C].parentNode===r&&r.removeChild(o[C]);o.length=0;for(var D in f)if(f.hasOwnProperty(D)){var E=k.createElement("style"),F=f[D].join("\n");E.type="text/css",E.media=D,r.insertBefore(E,g.nextSibling),E.styleSheet?E.styleSheet.cssText=F:E.appendChild(k.createTextNode(F)),o.push(E)}},w=function(a,b,d){var e=a.replace(c.regex.comments,"").replace(c.regex.keyframes,"").match(c.regex.media),f=e&&e.length||0;b=b.substring(0,b.lastIndexOf("/"));var h=function(a){return a.replace(c.regex.urls,"$1"+b+"$2$3")},i=!f&&d;b.length&&(b+="/"),i&&(f=1);for(var j=0;f>j;j++){var k,l,o,p;i?(k=d,n.push(h(a))):(k=e[j].match(c.regex.findStyles)&&RegExp.$1,n.push(RegExp.$2&&h(RegExp.$2))),o=k.split(","),p=o.length;for(var q=0;p>q;q++)l=o[q],g(l)||m.push({media:l.split("(")[0].match(c.regex.only)&&RegExp.$2||"all",rules:n.length-1,hasquery:l.indexOf("(")>-1,minw:l.match(c.regex.minw)&&parseFloat(RegExp.$1)+(RegExp.$2||""),maxw:l.match(c.regex.maxw)&&parseFloat(RegExp.$1)+(RegExp.$2||"")})}v()},x=function(){if(d.length){var b=d.shift();f(b.href,function(c){w(c,b.href,b.media),p[b.href]=!0,a.setTimeout(function(){x()},0)})}},y=function(){for(var b=0;b 1 && !$.isFunction(value)) { 63 | options = $.extend({}, config.defaults, options); 64 | 65 | if (typeof options.expires === 'number') { 66 | var days = options.expires, t = options.expires = new Date(); 67 | t.setMilliseconds(t.getMilliseconds() + days * 864e+5); 68 | } 69 | 70 | return (document.cookie = [ 71 | encode(key), '=', stringifyCookieValue(value), 72 | options.expires ? '; expires=' + options.expires.toUTCString() : '', // use expires attribute, max-age is not supported by IE 73 | options.path ? '; path=' + options.path : '', 74 | options.domain ? '; domain=' + options.domain : '', 75 | options.secure ? '; secure' : '' 76 | ].join('')); 77 | } 78 | 79 | // Read 80 | 81 | var result = key ? undefined : {}, 82 | // To prevent the for loop in the first place assign an empty array 83 | // in case there are no cookies at all. Also prevents odd result when 84 | // calling $.cookie(). 85 | cookies = document.cookie ? document.cookie.split('; ') : [], 86 | i = 0, 87 | l = cookies.length; 88 | 89 | for (; i < l; i++) { 90 | var parts = cookies[i].split('='), 91 | name = decode(parts.shift()), 92 | cookie = parts.join('='); 93 | 94 | if (key === name) { 95 | // If second argument (value) is a function it's a converter... 96 | result = read(cookie, value); 97 | break; 98 | } 99 | 100 | // Prevent storing a cookie that we couldn't decode. 101 | if (!key && (cookie = read(cookie)) !== undefined) { 102 | result[name] = cookie; 103 | } 104 | } 105 | 106 | return result; 107 | }; 108 | 109 | config.defaults = {}; 110 | 111 | $.removeCookie = function (key, options) { 112 | // Must not alter options, thus extending a fresh object... 113 | $.cookie(key, '', $.extend({}, options, { expires: -1 })); 114 | return !$.cookie(key); 115 | }; 116 | 117 | })); -------------------------------------------------------------------------------- /jd/static/assets/js/main.js: -------------------------------------------------------------------------------- 1 | /* 2 | Strata by HTML5 UP 3 | html5up.net | @ajlkn 4 | Free for personal and commercial use under the CCA 3.0 license (html5up.net/license) 5 | */ 6 | 7 | (function($) { 8 | 9 | var settings = { 10 | 11 | // Parallax background effect? 12 | parallax: true, 13 | 14 | // Parallax factor (lower = more intense, higher = less intense). 15 | parallaxFactor: 20 16 | 17 | }; 18 | 19 | skel.breakpoints({ 20 | xlarge: '(max-width: 1800px)', 21 | large: '(max-width: 1280px)', 22 | medium: '(max-width: 980px)', 23 | small: '(max-width: 736px)', 24 | xsmall: '(max-width: 480px)' 25 | }); 26 | 27 | $(function() { 28 | 29 | var $window = $(window), 30 | $body = $('body'), 31 | $header = $('#header'), 32 | $footer = $('#footer'), 33 | $main = $('#main'); 34 | 35 | // Disable animations/transitions until the page has loaded. 36 | $body.addClass('is-loading'); 37 | 38 | $window.on('load', function() { 39 | $body.removeClass('is-loading'); 40 | }); 41 | 42 | // Touch? 43 | if (skel.vars.mobile) { 44 | 45 | // Turn on touch mode. 46 | $body.addClass('is-touch'); 47 | 48 | // Height fix (mostly for iOS). 49 | window.setTimeout(function() { 50 | $window.scrollTop($window.scrollTop() + 1); 51 | }, 0); 52 | 53 | } 54 | 55 | // Fix: Placeholder polyfill. 56 | $('form').placeholder(); 57 | 58 | // Prioritize "important" elements on medium. 59 | skel.on('+medium -medium', function() { 60 | $.prioritize( 61 | '.important\\28 medium\\29', 62 | skel.breakpoint('medium').active 63 | ); 64 | }); 65 | 66 | // Footer. 67 | skel.on('+medium', function() { 68 | $footer.insertAfter($main); 69 | }); 70 | 71 | skel.on('-medium !medium', function() { 72 | $footer.appendTo($header); 73 | }); 74 | 75 | // Header. 76 | 77 | // Parallax background. 78 | 79 | // Disable parallax on IE (smooth scrolling is jerky), and on mobile platforms (= better performance). 80 | if (skel.vars.browser == 'ie' 81 | || skel.vars.mobile) 82 | settings.parallax = false; 83 | 84 | if (settings.parallax) { 85 | 86 | skel.on('change', function() { 87 | 88 | if (skel.breakpoint('medium').active) { 89 | 90 | $window.off('scroll.strata_parallax'); 91 | $header.css('background-position', 'top left, center center'); 92 | 93 | } 94 | else { 95 | 96 | $header.css('background-position', 'left 0px'); 97 | 98 | $window.on('scroll.strata_parallax', function() { 99 | $header.css('background-position', 'left ' + (-1 * (parseInt($window.scrollTop()) / settings.parallaxFactor)) + 'px'); 100 | }); 101 | 102 | } 103 | 104 | }); 105 | 106 | $window.on('load', function() { 107 | $window.triggerHandler('scroll'); 108 | }); 109 | 110 | } 111 | 112 | // Main Sections: Two. 113 | 114 | // Lightbox gallery. 115 | $window.on('load', function() { 116 | 117 | $('#two').poptrox({ 118 | caption: function($a) { return $a.next('h3').text(); }, 119 | overlayColor: '#2c2c2c', 120 | overlayOpacity: 0.85, 121 | popupCloserText: '', 122 | popupLoaderText: '', 123 | selector: '.work-item a.image', 124 | usePopupCaption: true, 125 | usePopupDefaultStyling: false, 126 | usePopupEasyClose: false, 127 | usePopupNav: true, 128 | windowMargin: (skel.breakpoint('small').active ? 0 : 50) 129 | }); 130 | 131 | }); 132 | 133 | }); 134 | 135 | })(jQuery); -------------------------------------------------------------------------------- /jd/static/assets/js/skel.min.js: -------------------------------------------------------------------------------- 1 | /* skel.js v3.0.1 | (c) skel.io | MIT licensed */ 2 | var skel=function(){"use strict";var t={breakpointIds:null,events:{},isInit:!1,obj:{attachments:{},breakpoints:{},head:null,states:{}},sd:"/",state:null,stateHandlers:{},stateId:"",vars:{},DOMReady:null,indexOf:null,isArray:null,iterate:null,matchesMedia:null,extend:function(e,n){t.iterate(n,function(i){t.isArray(n[i])?(t.isArray(e[i])||(e[i]=[]),t.extend(e[i],n[i])):"object"==typeof n[i]?("object"!=typeof e[i]&&(e[i]={}),t.extend(e[i],n[i])):e[i]=n[i]})},newStyle:function(t){var e=document.createElement("style");return e.type="text/css",e.innerHTML=t,e},_canUse:null,canUse:function(e){t._canUse||(t._canUse=document.createElement("div"));var n=t._canUse.style,i=e.charAt(0).toUpperCase()+e.slice(1);return e in n||"Moz"+i in n||"Webkit"+i in n||"O"+i in n||"ms"+i in n},on:function(e,n){var i=e.split(/[\s]+/);return t.iterate(i,function(e){var a=i[e];if(t.isInit){if("init"==a)return void n();if("change"==a)n();else{var r=a.charAt(0);if("+"==r||"!"==r){var o=a.substring(1);if(o in t.obj.breakpoints)if("+"==r&&t.obj.breakpoints[o].active)n();else if("!"==r&&!t.obj.breakpoints[o].active)return void n()}}}t.events[a]||(t.events[a]=[]),t.events[a].push(n)}),t},trigger:function(e){return t.events[e]&&0!=t.events[e].length?(t.iterate(t.events[e],function(n){t.events[e][n]()}),t):void 0},breakpoint:function(e){return t.obj.breakpoints[e]},breakpoints:function(e){function n(t,e){this.name=this.id=t,this.media=e,this.active=!1,this.wasActive=!1}return n.prototype.matches=function(){return t.matchesMedia(this.media)},n.prototype.sync=function(){this.wasActive=this.active,this.active=this.matches()},t.iterate(e,function(i){t.obj.breakpoints[i]=new n(i,e[i])}),window.setTimeout(function(){t.poll()},0),t},addStateHandler:function(e,n){t.stateHandlers[e]=n},callStateHandler:function(e){var n=t.stateHandlers[e]();t.iterate(n,function(e){t.state.attachments.push(n[e])})},changeState:function(e){t.iterate(t.obj.breakpoints,function(e){t.obj.breakpoints[e].sync()}),t.vars.lastStateId=t.stateId,t.stateId=e,t.breakpointIds=t.stateId===t.sd?[]:t.stateId.substring(1).split(t.sd),t.obj.states[t.stateId]?t.state=t.obj.states[t.stateId]:(t.obj.states[t.stateId]={attachments:[]},t.state=t.obj.states[t.stateId],t.iterate(t.stateHandlers,t.callStateHandler)),t.detachAll(t.state.attachments),t.attachAll(t.state.attachments),t.vars.stateId=t.stateId,t.vars.state=t.state,t.trigger("change"),t.iterate(t.obj.breakpoints,function(e){t.obj.breakpoints[e].active?t.obj.breakpoints[e].wasActive||t.trigger("+"+e):t.obj.breakpoints[e].wasActive&&t.trigger("-"+e)})},generateStateConfig:function(e,n){var i={};return t.extend(i,e),t.iterate(t.breakpointIds,function(e){t.extend(i,n[t.breakpointIds[e]])}),i},getStateId:function(){var e="";return t.iterate(t.obj.breakpoints,function(n){var i=t.obj.breakpoints[n];i.matches()&&(e+=t.sd+i.id)}),e},poll:function(){var e="";e=t.getStateId(),""===e&&(e=t.sd),e!==t.stateId&&t.changeState(e)},_attach:null,attach:function(e){var n=t.obj.head,i=e.element;return i.parentNode&&i.parentNode.tagName?!1:(t._attach||(t._attach=n.firstChild),n.insertBefore(i,t._attach.nextSibling),e.permanent&&(t._attach=i),!0)},attachAll:function(e){var n=[];t.iterate(e,function(t){n[e[t].priority]||(n[e[t].priority]=[]),n[e[t].priority].push(e[t])}),n.reverse(),t.iterate(n,function(e){t.iterate(n[e],function(i){t.attach(n[e][i])})})},detach:function(t){var e=t.element;return t.permanent||!e.parentNode||e.parentNode&&!e.parentNode.tagName?!1:(e.parentNode.removeChild(e),!0)},detachAll:function(e){var n={};t.iterate(e,function(t){n[e[t].id]=!0}),t.iterate(t.obj.attachments,function(e){e in n||t.detach(t.obj.attachments[e])})},attachment:function(e){return e in t.obj.attachments?t.obj.attachments[e]:null},newAttachment:function(e,n,i,a){return t.obj.attachments[e]={id:e,element:n,priority:i,permanent:a}},init:function(){t.initMethods(),t.initVars(),t.initEvents(),t.obj.head=document.getElementsByTagName("head")[0],t.isInit=!0,t.trigger("init")},initEvents:function(){t.on("resize",function(){t.poll()}),t.on("orientationChange",function(){t.poll()}),t.DOMReady(function(){t.trigger("ready")}),window.onload&&t.on("load",window.onload),window.onload=function(){t.trigger("load")},window.onresize&&t.on("resize",window.onresize),window.onresize=function(){t.trigger("resize")},window.onorientationchange&&t.on("orientationChange",window.onorientationchange),window.onorientationchange=function(){t.trigger("orientationChange")}},initMethods:function(){document.addEventListener?!function(e,n){t.DOMReady=n()}("domready",function(){function t(t){for(r=1;t=n.shift();)t()}var e,n=[],i=document,a="DOMContentLoaded",r=/^loaded|^c/.test(i.readyState);return i.addEventListener(a,e=function(){i.removeEventListener(a,e),t()}),function(t){r?t():n.push(t)}}):!function(e,n){t.DOMReady=n()}("domready",function(t){function e(t){for(h=1;t=i.shift();)t()}var n,i=[],a=!1,r=document,o=r.documentElement,s=o.doScroll,c="DOMContentLoaded",d="addEventListener",u="onreadystatechange",l="readyState",f=s?/^loaded|^c/:/^loaded|c/,h=f.test(r[l]);return r[d]&&r[d](c,n=function(){r.removeEventListener(c,n,a),e()},a),s&&r.attachEvent(u,n=function(){/^c/.test(r[l])&&(r.detachEvent(u,n),e())}),t=s?function(e){self!=top?h?e():i.push(e):function(){try{o.doScroll("left")}catch(n){return setTimeout(function(){t(e)},50)}e()}()}:function(t){h?t():i.push(t)}}),Array.prototype.indexOf?t.indexOf=function(t,e){return t.indexOf(e)}:t.indexOf=function(t,e){if("string"==typeof t)return t.indexOf(e);var n,i,a=e?e:0;if(!this)throw new TypeError;if(i=this.length,0===i||a>=i)return-1;for(0>a&&(a=i-Math.abs(a)),n=a;i>n;n++)if(this[n]===t)return n;return-1},Array.isArray?t.isArray=function(t){return Array.isArray(t)}:t.isArray=function(t){return"[object Array]"===Object.prototype.toString.call(t)},Object.keys?t.iterate=function(t,e){if(!t)return[];var n,i=Object.keys(t);for(n=0;i[n]&&e(i[n],t[i[n]])!==!1;n++);}:t.iterate=function(t,e){if(!t)return[];var n;for(n in t)if(Object.prototype.hasOwnProperty.call(t,n)&&e(n,t[n])===!1)break},window.matchMedia?t.matchesMedia=function(t){return""==t?!0:window.matchMedia(t).matches}:window.styleMedia||window.media?t.matchesMedia=function(t){if(""==t)return!0;var e=window.styleMedia||window.media;return e.matchMedium(t||"all")}:window.getComputedStyle?t.matchesMedia=function(t){if(""==t)return!0;var e=document.createElement("style"),n=document.getElementsByTagName("script")[0],i=null;e.type="text/css",e.id="matchmediajs-test",n.parentNode.insertBefore(e,n),i="getComputedStyle"in window&&window.getComputedStyle(e,null)||e.currentStyle;var a="@media "+t+"{ #matchmediajs-test { width: 1px; } }";return e.styleSheet?e.styleSheet.cssText=a:e.textContent=a,"1px"===i.width}:t.matchesMedia=function(t){if(""==t)return!0;var e,n,i,a,r={"min-width":null,"max-width":null},o=!1;for(i=t.split(/\s+and\s+/),e=0;er["max-width"]||null!==r["min-height"]&&cr["max-height"]?!1:!0},navigator.userAgent.match(/MSIE ([0-9]+)/)&&RegExp.$1<9&&(t.newStyle=function(t){var e=document.createElement("span");return e.innerHTML=' ",e})},initVars:function(){var e,n,i,a=navigator.userAgent;e="other",n=0,i=[["firefox",/Firefox\/([0-9\.]+)/],["bb",/BlackBerry.+Version\/([0-9\.]+)/],["bb",/BB[0-9]+.+Version\/([0-9\.]+)/],["opera",/OPR\/([0-9\.]+)/],["opera",/Opera\/([0-9\.]+)/],["edge",/Edge\/([0-9\.]+)/],["safari",/Version\/([0-9\.]+).+Safari/],["chrome",/Chrome\/([0-9\.]+)/],["ie",/MSIE ([0-9]+)/],["ie",/Trident\/.+rv:([0-9]+)/]],t.iterate(i,function(t,i){return a.match(i[1])?(e=i[0],n=parseFloat(RegExp.$1),!1):void 0}),t.vars.browser=e,t.vars.browserVersion=n,e="other",n=0,i=[["ios",/([0-9_]+) like Mac OS X/,function(t){return t.replace("_",".").replace("_","")}],["ios",/CPU like Mac OS X/,function(t){return 0}],["wp",/Windows Phone ([0-9\.]+)/,null],["android",/Android ([0-9\.]+)/,null],["mac",/Macintosh.+Mac OS X ([0-9_]+)/,function(t){return t.replace("_",".").replace("_","")}],["windows",/Windows NT ([0-9\.]+)/,null],["bb",/BlackBerry.+Version\/([0-9\.]+)/,null],["bb",/BB[0-9]+.+Version\/([0-9\.]+)/,null]],t.iterate(i,function(t,i){return a.match(i[1])?(e=i[0],n=parseFloat(i[2]?i[2](RegExp.$1):RegExp.$1),!1):void 0}),t.vars.os=e,t.vars.osVersion=n,t.vars.IEVersion="ie"==t.vars.browser?t.vars.browserVersion:99,t.vars.touch="wp"==t.vars.os?navigator.msMaxTouchPoints>0:!!("ontouchstart"in window),t.vars.mobile="wp"==t.vars.os||"android"==t.vars.os||"ios"==t.vars.os||"bb"==t.vars.os}};return t.init(),t}();!function(t,e){"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?module.exports=e():t.skel=e()}(this,function(){return skel}); 3 | -------------------------------------------------------------------------------- /jd/static/assets/sass/ie8.scss: -------------------------------------------------------------------------------- 1 | @import 'libs/vars'; 2 | @import 'libs/functions'; 3 | @import 'libs/mixins'; 4 | @import 'libs/skel'; 5 | 6 | /* 7 | Strata by HTML5 UP 8 | html5up.net | @ajlkn 9 | Free for personal and commercial use under the CCA 3.0 license (html5up.net/license) 10 | */ 11 | 12 | /* Button */ 13 | 14 | input[type="submit"], 15 | input[type="reset"], 16 | input[type="button"], 17 | .button { 18 | position: relative; 19 | -ms-behavior: url('assets/js/ie/PIE.htc'); 20 | } 21 | 22 | /* Form */ 23 | 24 | input[type="text"], 25 | input[type="password"], 26 | input[type="email"], 27 | select, 28 | textarea { 29 | position: relative; 30 | -ms-behavior: url('assets/js/ie/PIE.htc'); 31 | } 32 | 33 | input[type="text"], 34 | input[type="password"], 35 | input[type="email"], 36 | select { 37 | height: _size(element-height); 38 | line-height: _size(element-height); 39 | } 40 | 41 | input[type="checkbox"], 42 | input[type="radio"] { 43 | & + label { 44 | &:before { 45 | display: none; 46 | } 47 | } 48 | } 49 | 50 | /* Image */ 51 | 52 | .image { 53 | position: relative; 54 | -ms-behavior: url('assets/js/ie/PIE.htc'); 55 | 56 | &:before, &:after { 57 | display: none !important; 58 | } 59 | 60 | img { 61 | position: relative; 62 | -ms-behavior: url('assets/js/ie/PIE.htc'); 63 | } 64 | } 65 | 66 | /* Header */ 67 | 68 | #header { 69 | background-image: url('../../images/bg.jpg'); 70 | background-repeat: no-repeat; 71 | background-size: cover; 72 | -ms-behavior: url('assets/js/ie/backgroundsize.min.htc'); 73 | 74 | h1 { 75 | color: _palette(accent2, fg-bold); 76 | } 77 | } 78 | 79 | /* Footer */ 80 | 81 | #footer { 82 | .icons { 83 | a { 84 | color: _palette(accent2, fg-bold); 85 | } 86 | } 87 | } -------------------------------------------------------------------------------- /jd/static/assets/sass/libs/_functions.scss: -------------------------------------------------------------------------------- 1 | /// Gets a duration value. 2 | /// @param {string} $keys Key(s). 3 | /// @return {string} Value. 4 | @function _duration($keys...) { 5 | @return val($duration, $keys...); 6 | } 7 | 8 | /// Gets a font value. 9 | /// @param {string} $keys Key(s). 10 | /// @return {string} Value. 11 | @function _font($keys...) { 12 | @return val($font, $keys...); 13 | } 14 | 15 | /// Gets a misc value. 16 | /// @param {string} $keys Key(s). 17 | /// @return {string} Value. 18 | @function _misc($keys...) { 19 | @return val($misc, $keys...); 20 | } 21 | 22 | /// Gets a palette value. 23 | /// @param {string} $keys Key(s). 24 | /// @return {string} Value. 25 | @function _palette($keys...) { 26 | @return val($palette, $keys...); 27 | } 28 | 29 | /// Gets a size value. 30 | /// @param {string} $keys Key(s). 31 | /// @return {string} Value. 32 | @function _size($keys...) { 33 | @return val($size, $keys...); 34 | } -------------------------------------------------------------------------------- /jd/static/assets/sass/libs/_mixins.scss: -------------------------------------------------------------------------------- 1 | /// Makes an element's :before pseudoelement a FontAwesome icon. 2 | /// @param {string} $content Optional content value to use. 3 | /// @param {string} $where Optional pseudoelement to target (before or after). 4 | @mixin icon($content: false, $where: before) { 5 | 6 | text-decoration: none; 7 | 8 | &:#{$where} { 9 | 10 | @if $content { 11 | content: $content; 12 | } 13 | 14 | -moz-osx-font-smoothing: grayscale; 15 | -webkit-font-smoothing: antialiased; 16 | font-family: FontAwesome; 17 | font-style: normal; 18 | font-weight: normal; 19 | text-transform: none !important; 20 | 21 | } 22 | 23 | } 24 | 25 | /// Applies padding to an element, taking the current element-margin value into account. 26 | /// @param {mixed} $tb Top/bottom padding. 27 | /// @param {mixed} $lr Left/right padding. 28 | /// @param {list} $pad Optional extra padding (in the following order top, right, bottom, left) 29 | /// @param {bool} $important If true, adds !important. 30 | @mixin padding($tb, $lr, $pad: (0,0,0,0), $important: null) { 31 | 32 | @if $important { 33 | $important: '!important'; 34 | } 35 | 36 | padding: ($tb + nth($pad,1)) ($lr + nth($pad,2)) max(0.1em, $tb - _size(element-margin) + nth($pad,3)) ($lr + nth($pad,4)) #{$important}; 37 | 38 | } 39 | 40 | /// Encodes a SVG data URL so IE doesn't choke (via codepen.io/jakob-e/pen/YXXBrp). 41 | /// @param {string} $svg SVG data URL. 42 | /// @return {string} Encoded SVG data URL. 43 | @function svg-url($svg) { 44 | 45 | $svg: str-replace($svg, '"', '\''); 46 | $svg: str-replace($svg, '<', '%3C'); 47 | $svg: str-replace($svg, '>', '%3E'); 48 | $svg: str-replace($svg, '&', '%26'); 49 | $svg: str-replace($svg, '#', '%23'); 50 | $svg: str-replace($svg, '{', '%7B'); 51 | $svg: str-replace($svg, '}', '%7D'); 52 | $svg: str-replace($svg, ';', '%3B'); 53 | 54 | @return url("data:image/svg+xml;charset=utf8,#{$svg}"); 55 | 56 | } -------------------------------------------------------------------------------- /jd/static/assets/sass/libs/_vars.scss: -------------------------------------------------------------------------------- 1 | // Misc. 2 | $misc: ( 3 | z-index-base: 10000 4 | ); 5 | 6 | // Duration. 7 | $duration: ( 8 | transition: 0.2s 9 | ); 10 | 11 | // Size. 12 | $size: ( 13 | border-radius: 0.35em, 14 | element-height: 2.75em, 15 | element-margin: 2em 16 | ); 17 | 18 | // Font. 19 | $font: ( 20 | family: ('Source Sans Pro', Helvetica, sans-serif), 21 | family-fixed: ('Courier New', monospace), 22 | weight: 400, 23 | weight-bold: 400 24 | ); 25 | 26 | // Palette. 27 | $palette: ( 28 | bg: #fff, 29 | fg: #a2a2a2, 30 | fg-bold: #787878, 31 | fg-light: #b2b2b2, 32 | border: #efefef, 33 | border-bg: #f7f7f7, 34 | border2: #dfdfdf, 35 | border2-bg: #e7e7e7, 36 | 37 | accent1: ( 38 | bg: #49bf9d, 39 | fg: mix(#49bf9d, #ffffff, 25%), 40 | fg-bold: #ffffff, 41 | fg-light: mix(#49bf9d, #ffffff, 40%), 42 | border: rgba(255,255,255,0.25), 43 | border-bg: rgba(255,255,255,0.075), 44 | border2: rgba(255,255,255,0.5), 45 | border2-bg: rgba(255,255,255,0.2) 46 | ), 47 | 48 | accent2: ( 49 | bg: #1f1815, 50 | fg: rgba(255,255,255,0.5), 51 | fg-bold: #ffffff, 52 | fg-light: rgba(255,255,255,0.4), 53 | border: rgba(255,255,255,0.25), 54 | border-bg: rgba(255,255,255,0.075), 55 | border2: rgba(255,255,255,0.5), 56 | border2-bg: rgba(255,255,255,0.2) 57 | ) 58 | ); -------------------------------------------------------------------------------- /jd/static/images/10060984607_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_channel.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_color.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_creation_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_creation_time.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_days.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_days.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_general.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_good.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_mobile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_mobile.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_poor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_poor.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_province.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_size.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_time.png -------------------------------------------------------------------------------- /jd/static/images/10060984607_user_level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/10060984607_user_level.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_channel.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_color.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_creation_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_creation_time.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_days.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_days.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_general.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_good.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_mobile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_mobile.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_poor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_poor.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_province.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_size.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_time.png -------------------------------------------------------------------------------- /jd/static/images/11476089321_user_level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11476089321_user_level.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_channel.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_color.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_creation_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_creation_time.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_days.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_days.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_good.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_mobile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_mobile.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_province.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_size.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_time.png -------------------------------------------------------------------------------- /jd/static/images/11943216519_user_level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/11943216519_user_level.png -------------------------------------------------------------------------------- /jd/static/images/2359205_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_channel.png -------------------------------------------------------------------------------- /jd/static/images/2359205_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_color.png -------------------------------------------------------------------------------- /jd/static/images/2359205_creation_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_creation_time.png -------------------------------------------------------------------------------- /jd/static/images/2359205_days.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_days.png -------------------------------------------------------------------------------- /jd/static/images/2359205_general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_general.png -------------------------------------------------------------------------------- /jd/static/images/2359205_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_good.png -------------------------------------------------------------------------------- /jd/static/images/2359205_mobile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_mobile.png -------------------------------------------------------------------------------- /jd/static/images/2359205_poor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_poor.png -------------------------------------------------------------------------------- /jd/static/images/2359205_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_province.png -------------------------------------------------------------------------------- /jd/static/images/2359205_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_size.png -------------------------------------------------------------------------------- /jd/static/images/2359205_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_time.png -------------------------------------------------------------------------------- /jd/static/images/2359205_user_level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/2359205_user_level.png -------------------------------------------------------------------------------- /jd/static/images/3652063_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_channel.png -------------------------------------------------------------------------------- /jd/static/images/3652063_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_color.png -------------------------------------------------------------------------------- /jd/static/images/3652063_creation_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_creation_time.png -------------------------------------------------------------------------------- /jd/static/images/3652063_days.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_days.png -------------------------------------------------------------------------------- /jd/static/images/3652063_general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_general.png -------------------------------------------------------------------------------- /jd/static/images/3652063_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_good.png -------------------------------------------------------------------------------- /jd/static/images/3652063_mobile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_mobile.png -------------------------------------------------------------------------------- /jd/static/images/3652063_poor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_poor.png -------------------------------------------------------------------------------- /jd/static/images/3652063_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_province.png -------------------------------------------------------------------------------- /jd/static/images/3652063_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_size.png -------------------------------------------------------------------------------- /jd/static/images/3652063_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_time.png -------------------------------------------------------------------------------- /jd/static/images/3652063_user_level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/3652063_user_level.png -------------------------------------------------------------------------------- /jd/static/images/987091_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_channel.png -------------------------------------------------------------------------------- /jd/static/images/987091_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_color.png -------------------------------------------------------------------------------- /jd/static/images/987091_creation_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_creation_time.png -------------------------------------------------------------------------------- /jd/static/images/987091_days.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_days.png -------------------------------------------------------------------------------- /jd/static/images/987091_general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_general.png -------------------------------------------------------------------------------- /jd/static/images/987091_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_good.png -------------------------------------------------------------------------------- /jd/static/images/987091_mobile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_mobile.png -------------------------------------------------------------------------------- /jd/static/images/987091_poor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_poor.png -------------------------------------------------------------------------------- /jd/static/images/987091_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_province.png -------------------------------------------------------------------------------- /jd/static/images/987091_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_size.png -------------------------------------------------------------------------------- /jd/static/images/987091_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_time.png -------------------------------------------------------------------------------- /jd/static/images/987091_user_level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/987091_user_level.png -------------------------------------------------------------------------------- /jd/static/images/avatar.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/avatar.jpg -------------------------------------------------------------------------------- /jd/static/images/bg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/bg.jpg -------------------------------------------------------------------------------- /jd/static/images/fulls/01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/fulls/01.jpg -------------------------------------------------------------------------------- /jd/static/images/fulls/02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/fulls/02.jpg -------------------------------------------------------------------------------- /jd/static/images/fulls/03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/fulls/03.jpg -------------------------------------------------------------------------------- /jd/static/images/fulls/04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/fulls/04.jpg -------------------------------------------------------------------------------- /jd/static/images/fulls/05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/fulls/05.jpg -------------------------------------------------------------------------------- /jd/static/images/fulls/06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/fulls/06.jpg -------------------------------------------------------------------------------- /jd/static/images/job.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/job.png -------------------------------------------------------------------------------- /jd/static/images/thumbs/01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/thumbs/01.jpg -------------------------------------------------------------------------------- /jd/static/images/thumbs/02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/thumbs/02.jpg -------------------------------------------------------------------------------- /jd/static/images/thumbs/03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/thumbs/03.jpg -------------------------------------------------------------------------------- /jd/static/images/thumbs/04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/thumbs/04.jpg -------------------------------------------------------------------------------- /jd/static/images/thumbs/05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/thumbs/05.jpg -------------------------------------------------------------------------------- /jd/static/images/thumbs/06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/thumbs/06.jpg -------------------------------------------------------------------------------- /jd/static/images/weixin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/weixin.png -------------------------------------------------------------------------------- /jd/static/images/xiaomi5s_comment_count.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/xiaomi5s_comment_count.png -------------------------------------------------------------------------------- /jd/static/images/xiaomi5s_comment_full_count.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/images/xiaomi5s_comment_full_count.png -------------------------------------------------------------------------------- /jd/static/js/jquery_cookie.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by lgq on 17/5/4. 3 | */ 4 | 5 | // using jQuery 6 | function getCookie(name) { 7 | var cookieValue = null; 8 | if (document.cookie && document.cookie != '') { 9 | var cookies = document.cookie.split(';'); 10 | for (var i = 0; i < cookies.length; i++) { 11 | var cookie = jQuery.trim(cookies[i]); 12 | // Does this cookie string begin with the name we want? 13 | if (cookie.substring(0, name.length + 1) == (name + '=')) { 14 | cookieValue = decodeURIComponent(cookie.substring(name.length + 1)); 15 | break; 16 | } 17 | } 18 | } 19 | return cookieValue; 20 | } 21 | 22 | var csrftoken = getCookie('csrftoken'); 23 | function csrfSafeMethod(method) { 24 | // these HTTP methods do not require CSRF protection 25 | return (/^(GET|HEAD|OPTIONS|TRACE)$/.test(method)); 26 | } 27 | 28 | $.ajaxSetup({ 29 | beforeSend: function (xhr, settings) { 30 | if (!csrfSafeMethod(settings.type) && !this.crossDomain) { 31 | xhr.setRequestHeader("X-CSRFToken", csrftoken); 32 | } 33 | } 34 | }); -------------------------------------------------------------------------------- /jd/static/js/switchemail.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by lgq on 17/5/3. 3 | */ 4 | 5 | function switchemail() { 6 | var text = document.getElementById('email'); 7 | if (text.style.display === 'none') { 8 | text.style.display = 'inline'; 9 | // text.style.color = '#FFFFFF'; 10 | } 11 | else { 12 | text.style.display = 'none'; 13 | } 14 | } -------------------------------------------------------------------------------- /jd/static/loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd/static/loader.gif -------------------------------------------------------------------------------- /jd/static/showdownjs/script.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by lgq on 17/2/27. 3 | */ 4 | 5 | 6 | function run() { 7 | var text = document.getElementById('sourceTA').value, 8 | target = document.getElementById('targetDiv'), 9 | converter = new showdown.Converter({ 10 | 'omitExtraWLInCodeBlocks': 'true', 11 | 'parseImgDimensions': 'true', 12 | 'noHeaderId': 'false', 13 | // 'prefixHeaderId': 'true', 14 | 'simplifiedAutoLink': 'true', 15 | 'literalMidWordUnderscores': 'true', 16 | 'strikethrough': 'true', 17 | 'tables': 'true', 18 | 'tablesHeaderId': 'true', 19 | 'ghCodeBlocks': 'true', 20 | 'tasklists': 'true', 21 | 'smoothLivePreview': 'true', 22 | 'prefixHeaderId': 'false', 23 | 'disableForced4SpacesIndentedSublists': 'true', 24 | 'ghCompatibleHeaderId': 'true', 25 | 'smartIndentationFix': 'false', 26 | 'excludeTrailingPunctuationFromURLs': 'false', 27 | 'simpleLineBreaks': 'false', 28 | 'requireSpaceBeforeHeadingText': 'false', 29 | 'ghMentions': 'false' 30 | }), 31 | 32 | html = converter.makeHtml(text); 33 | 34 | target.innerHTML = html; 35 | } -------------------------------------------------------------------------------- /jd/static/showdownjs/style.css: -------------------------------------------------------------------------------- 1 | #sourceTA { 2 | display: block; 3 | } 4 | #targetDiv { 5 | border: 1px dashed #333333; 6 | width: 600px; 7 | height: 400px; 8 | } -------------------------------------------------------------------------------- /jd/static/style/Clearness Dark.css: -------------------------------------------------------------------------------- 1 | h1, 2 | h2, 3 | h3, 4 | h4, 5 | h5, 6 | h6, 7 | p, 8 | blockquote { 9 | margin: 0; 10 | padding: 0; 11 | } 12 | body { 13 | font-family: "Helvetica Neue", Helvetica, "Hiragino Sans GB", Arial, sans-serif; 14 | font-size: 13px; 15 | line-height: 18px; 16 | color: #fff; 17 | background-color: #282a36; 18 | margin: 10px 13px 10px 13px; 19 | } 20 | table { 21 | margin: 10px 0 15px 0; 22 | border-collapse: collapse; 23 | } 24 | td,th { 25 | border: 1px solid #ddd; 26 | padding: 3px 10px; 27 | } 28 | th { 29 | padding: 5px 10px; 30 | } 31 | a { 32 | color: #59acf3; 33 | } 34 | a:hover { 35 | color: #a7d8ff; 36 | text-decoration: none; 37 | } 38 | a img { 39 | border: none; 40 | } 41 | p { 42 | margin-bottom: 9px; 43 | } 44 | h1, 45 | h2, 46 | h3, 47 | h4, 48 | h5, 49 | h6 { 50 | color: #fff; 51 | line-height: 36px; 52 | } 53 | h1 { 54 | margin-bottom: 18px; 55 | font-size: 30px; 56 | } 57 | h2 { 58 | font-size: 24px; 59 | } 60 | h3 { 61 | font-size: 18px; 62 | } 63 | h4 { 64 | font-size: 16px; 65 | } 66 | h5 { 67 | font-size: 14px; 68 | } 69 | h6 { 70 | font-size: 13px; 71 | } 72 | hr { 73 | margin: 0 0 19px; 74 | border: 0; 75 | border-bottom: 1px solid #ccc; 76 | } 77 | blockquote { 78 | padding: 13px 13px 21px 15px; 79 | margin-bottom: 18px; 80 | font-family:georgia,serif; 81 | font-style: italic; 82 | } 83 | blockquote:before { 84 | content:"\201C"; 85 | font-size:40px; 86 | margin-left:-10px; 87 | font-family:georgia,serif; 88 | color:#eee; 89 | } 90 | blockquote p { 91 | font-size: 14px; 92 | font-weight: 300; 93 | line-height: 18px; 94 | margin-bottom: 0; 95 | font-style: italic; 96 | } 97 | code, pre { 98 | font-family: Monaco, Andale Mono, Courier New, monospace; 99 | } 100 | code { 101 | color: #ff4a14; 102 | padding: 1px 3px; 103 | font-size: 12px; 104 | -webkit-border-radius: 3px; 105 | -moz-border-radius: 3px; 106 | border-radius: 3px; 107 | } 108 | pre { 109 | display: block; 110 | padding: 14px; 111 | margin: 0 0 18px; 112 | line-height: 16px; 113 | font-size: 11px; 114 | border: 1px solid #bf370f; 115 | white-space: pre; 116 | white-space: pre-wrap; 117 | word-wrap: break-word; 118 | } 119 | pre code { 120 | background-color: #282a36; 121 | color: #ff4a14; 122 | font-size: 11px; 123 | padding: 0; 124 | } 125 | sup { 126 | font-size: 0.83em; 127 | vertical-align: super; 128 | line-height: 0; 129 | } 130 | * { 131 | -webkit-print-color-adjust: exact; 132 | } 133 | @media screen and (min-width: 914px) { 134 | body { 135 | width: 854px; 136 | margin:10px auto; 137 | } 138 | } 139 | @media print { 140 | body,code,pre code,h1,h2,h3,h4,h5,h6 { 141 | color: black; 142 | } 143 | table, pre { 144 | page-break-inside: avoid; 145 | } 146 | } -------------------------------------------------------------------------------- /jd/static/style/Clearness.css: -------------------------------------------------------------------------------- 1 | h1, 2 | h2, 3 | h3, 4 | h4, 5 | h5, 6 | h6, 7 | p, 8 | blockquote { 9 | margin: 0; 10 | padding: 0; 11 | } 12 | body { 13 | font-family: "Helvetica Neue", Helvetica, "Hiragino Sans GB", Arial, sans-serif; 14 | font-size: 13px; 15 | line-height: 18px; 16 | color: #737373; 17 | background-color: white; 18 | margin: 10px 13px 10px 13px; 19 | } 20 | table { 21 | margin: 10px 0 15px 0; 22 | border-collapse: collapse; 23 | } 24 | td,th { 25 | border: 1px solid #ddd; 26 | padding: 3px 10px; 27 | } 28 | th { 29 | padding: 5px 10px; 30 | } 31 | 32 | a { 33 | color: #0069d6; 34 | } 35 | a:hover { 36 | color: #0050a3; 37 | text-decoration: none; 38 | } 39 | a img { 40 | border: none; 41 | } 42 | p { 43 | margin-bottom: 9px; 44 | } 45 | h1, 46 | h2, 47 | h3, 48 | h4, 49 | h5, 50 | h6 { 51 | color: #404040; 52 | line-height: 36px; 53 | } 54 | h1 { 55 | margin-bottom: 18px; 56 | font-size: 30px; 57 | } 58 | h2 { 59 | font-size: 24px; 60 | } 61 | h3 { 62 | font-size: 18px; 63 | } 64 | h4 { 65 | font-size: 16px; 66 | } 67 | h5 { 68 | font-size: 14px; 69 | } 70 | h6 { 71 | font-size: 13px; 72 | } 73 | hr { 74 | margin: 0 0 19px; 75 | border: 0; 76 | border-bottom: 1px solid #ccc; 77 | } 78 | blockquote { 79 | padding: 13px 13px 21px 15px; 80 | margin-bottom: 18px; 81 | font-family:georgia,serif; 82 | font-style: italic; 83 | } 84 | blockquote:before { 85 | content:"\201C"; 86 | font-size:40px; 87 | margin-left:-10px; 88 | font-family:georgia,serif; 89 | color:#eee; 90 | } 91 | blockquote p { 92 | font-size: 14px; 93 | font-weight: 300; 94 | line-height: 18px; 95 | margin-bottom: 0; 96 | font-style: italic; 97 | } 98 | code, pre { 99 | font-family: Monaco, Andale Mono, Courier New, monospace; 100 | } 101 | code { 102 | background-color: #fee9cc; 103 | color: rgba(0, 0, 0, 0.75); 104 | padding: 1px 3px; 105 | font-size: 12px; 106 | -webkit-border-radius: 3px; 107 | -moz-border-radius: 3px; 108 | border-radius: 3px; 109 | } 110 | pre { 111 | display: block; 112 | padding: 14px; 113 | margin: 0 0 18px; 114 | line-height: 16px; 115 | font-size: 11px; 116 | border: 1px solid #d9d9d9; 117 | white-space: pre-wrap; 118 | word-wrap: break-word; 119 | } 120 | pre code { 121 | background-color: #fff; 122 | color:#737373; 123 | font-size: 11px; 124 | padding: 0; 125 | } 126 | sup { 127 | font-size: 0.83em; 128 | vertical-align: super; 129 | line-height: 0; 130 | } 131 | * { 132 | -webkit-print-color-adjust: exact; 133 | } 134 | @media screen and (min-width: 914px) { 135 | body { 136 | width: 854px; 137 | margin:10px auto; 138 | } 139 | } 140 | @media print { 141 | body,code,pre code,h1,h2,h3,h4,h5,h6 { 142 | color: black; 143 | } 144 | table, pre { 145 | page-break-inside: avoid; 146 | } 147 | } -------------------------------------------------------------------------------- /jd/static/style/GitHub.css: -------------------------------------------------------------------------------- 1 | *{margin:0;padding:0;} 2 | body { 3 | font:13.34px helvetica,arial,freesans,clean,sans-serif; 4 | color:black; 5 | line-height:1.4em; 6 | background-color: #F8F8F8; 7 | padding: 0.7em; 8 | } 9 | p { 10 | margin:1em 0; 11 | line-height:1.5em; 12 | } 13 | table { 14 | font-size:inherit; 15 | font:100%; 16 | margin:1em; 17 | } 18 | table th{border-bottom:1px solid #bbb;padding:.2em 1em;} 19 | table td{border-bottom:1px solid #ddd;padding:.2em 1em;} 20 | input[type=text],input[type=password],input[type=image],textarea{font:99% helvetica,arial,freesans,sans-serif;} 21 | select,option{padding:0 .25em;} 22 | optgroup{margin-top:.5em;} 23 | pre,code{font:12px Monaco,"Courier New","DejaVu Sans Mono","Bitstream Vera Sans Mono",monospace;} 24 | pre { 25 | margin:1em 0; 26 | font-size:12px; 27 | background-color:#eee; 28 | border:1px solid #ddd; 29 | padding:5px; 30 | line-height:1.5em; 31 | color:#444; 32 | overflow:auto; 33 | -webkit-box-shadow:rgba(0,0,0,0.07) 0 1px 2px inset; 34 | -webkit-border-radius:3px; 35 | -moz-border-radius:3px;border-radius:3px; 36 | } 37 | pre code { 38 | padding:0; 39 | font-size:12px; 40 | background-color:#eee; 41 | border:none; 42 | } 43 | code { 44 | font-size:12px; 45 | background-color:#f8f8ff; 46 | color:#444; 47 | padding:0 .2em; 48 | border:1px solid #dedede; 49 | } 50 | img{border:0;max-width:100%;} 51 | abbr{border-bottom:none;} 52 | a{color:#4183c4;text-decoration:none;} 53 | a:hover{text-decoration:underline;} 54 | a code,a:link code,a:visited code{color:#4183c4;} 55 | h2,h3{margin:1em 0;} 56 | h1,h2,h3,h4,h5,h6{border:0;} 57 | h1{font-size:170%;border-top:4px solid #aaa;padding-top:.5em;margin-top:1.5em;} 58 | h1:first-child{margin-top:0;padding-top:.25em;border-top:none;} 59 | h2{font-size:150%;margin-top:1.5em;border-top:4px solid #e0e0e0;padding-top:.5em;} 60 | h3{margin-top:1em;} 61 | hr{border:1px solid #ddd;} 62 | ul{margin:1em 0 1em 2em;} 63 | ol{margin:1em 0 1em 2em;} 64 | ul li,ol li{margin-top:.5em;margin-bottom:.5em;} 65 | ul ul,ul ol,ol ol,ol ul{margin-top:0;margin-bottom:0;} 66 | blockquote{margin:1em 0;border-left:5px solid #ddd;padding-left:.6em;color:#555;} 67 | dt{font-weight:bold;margin-left:1em;} 68 | dd{margin-left:2em;margin-bottom:1em;} 69 | sup { 70 | font-size: 0.83em; 71 | vertical-align: super; 72 | line-height: 0; 73 | } 74 | * { 75 | -webkit-print-color-adjust: exact; 76 | } 77 | @media screen and (min-width: 914px) { 78 | body { 79 | width: 854px; 80 | margin:0 auto; 81 | } 82 | } 83 | @media print { 84 | table, pre { 85 | page-break-inside: avoid; 86 | } 87 | pre { 88 | word-wrap: break-word; 89 | } 90 | } -------------------------------------------------------------------------------- /jd/static/style/GitHub2.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Helvetica, arial, sans-serif; 3 | font-size: 14px; 4 | line-height: 1.6; 5 | padding-top: 10px; 6 | padding-bottom: 10px; 7 | background-color: white; 8 | padding: 30px; } 9 | 10 | body > *:first-child { 11 | margin-top: 0 !important; } 12 | body > *:last-child { 13 | margin-bottom: 0 !important; } 14 | 15 | a { 16 | color: #4183C4; } 17 | a.absent { 18 | color: #cc0000; } 19 | a.anchor { 20 | display: block; 21 | padding-left: 30px; 22 | margin-left: -30px; 23 | cursor: pointer; 24 | position: absolute; 25 | top: 0; 26 | left: 0; 27 | bottom: 0; } 28 | 29 | h1, h2, h3, h4, h5, h6 { 30 | margin: 20px 0 10px; 31 | padding: 0; 32 | font-weight: bold; 33 | -webkit-font-smoothing: antialiased; 34 | cursor: text; 35 | position: relative; } 36 | 37 | h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, h5:hover a.anchor, h6:hover a.anchor { 38 | background: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAA09pVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMy1jMDExIDY2LjE0NTY2MSwgMjAxMi8wMi8wNi0xNDo1NjoyNyAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNiAoMTMuMCAyMDEyMDMwNS5tLjQxNSAyMDEyLzAzLzA1OjIxOjAwOjAwKSAgKE1hY2ludG9zaCkiIHhtcE1NOkluc3RhbmNlSUQ9InhtcC5paWQ6OUM2NjlDQjI4ODBGMTFFMTg1ODlEODNERDJBRjUwQTQiIHhtcE1NOkRvY3VtZW50SUQ9InhtcC5kaWQ6OUM2NjlDQjM4ODBGMTFFMTg1ODlEODNERDJBRjUwQTQiPiA8eG1wTU06RGVyaXZlZEZyb20gc3RSZWY6aW5zdGFuY2VJRD0ieG1wLmlpZDo5QzY2OUNCMDg4MEYxMUUxODU4OUQ4M0REMkFGNTBBNCIgc3RSZWY6ZG9jdW1lbnRJRD0ieG1wLmRpZDo5QzY2OUNCMTg4MEYxMUUxODU4OUQ4M0REMkFGNTBBNCIvPiA8L3JkZjpEZXNjcmlwdGlvbj4gPC9yZGY6UkRGPiA8L3g6eG1wbWV0YT4gPD94cGFja2V0IGVuZD0iciI/PsQhXeAAAABfSURBVHjaYvz//z8DJYCRUgMYQAbAMBQIAvEqkBQWXI6sHqwHiwG70TTBxGaiWwjCTGgOUgJiF1J8wMRAIUA34B4Q76HUBelAfJYSA0CuMIEaRP8wGIkGMA54bgQIMACAmkXJi0hKJQAAAABJRU5ErkJggg==) no-repeat 10px center; 39 | text-decoration: none; } 40 | 41 | h1 tt, h1 code { 42 | font-size: inherit; } 43 | 44 | h2 tt, h2 code { 45 | font-size: inherit; } 46 | 47 | h3 tt, h3 code { 48 | font-size: inherit; } 49 | 50 | h4 tt, h4 code { 51 | font-size: inherit; } 52 | 53 | h5 tt, h5 code { 54 | font-size: inherit; } 55 | 56 | h6 tt, h6 code { 57 | font-size: inherit; } 58 | 59 | h1 { 60 | font-size: 28px; 61 | color: black; } 62 | 63 | h2 { 64 | font-size: 24px; 65 | border-bottom: 1px solid #cccccc; 66 | color: black; } 67 | 68 | h3 { 69 | font-size: 18px; } 70 | 71 | h4 { 72 | font-size: 16px; } 73 | 74 | h5 { 75 | font-size: 14px; } 76 | 77 | h6 { 78 | color: #777777; 79 | font-size: 14px; } 80 | 81 | p, blockquote, ul, ol, dl, li, table, pre { 82 | margin: 15px 0; } 83 | 84 | hr { 85 | background: transparent url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAYAAAAECAYAAACtBE5DAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyJpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMC1jMDYwIDYxLjEzNDc3NywgMjAxMC8wMi8xMi0xNzozMjowMCAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNSBNYWNpbnRvc2giIHhtcE1NOkluc3RhbmNlSUQ9InhtcC5paWQ6OENDRjNBN0E2NTZBMTFFMEI3QjRBODM4NzJDMjlGNDgiIHhtcE1NOkRvY3VtZW50SUQ9InhtcC5kaWQ6OENDRjNBN0I2NTZBMTFFMEI3QjRBODM4NzJDMjlGNDgiPiA8eG1wTU06RGVyaXZlZEZyb20gc3RSZWY6aW5zdGFuY2VJRD0ieG1wLmlpZDo4Q0NGM0E3ODY1NkExMUUwQjdCNEE4Mzg3MkMyOUY0OCIgc3RSZWY6ZG9jdW1lbnRJRD0ieG1wLmRpZDo4Q0NGM0E3OTY1NkExMUUwQjdCNEE4Mzg3MkMyOUY0OCIvPiA8L3JkZjpEZXNjcmlwdGlvbj4gPC9yZGY6UkRGPiA8L3g6eG1wbWV0YT4gPD94cGFja2V0IGVuZD0iciI/PqqezsUAAAAfSURBVHjaYmRABcYwBiM2QSA4y4hNEKYDQxAEAAIMAHNGAzhkPOlYAAAAAElFTkSuQmCC) repeat-x 0 0; 86 | border: 0 none; 87 | color: #cccccc; 88 | height: 4px; 89 | padding: 0; 90 | } 91 | 92 | body > h2:first-child { 93 | margin-top: 0; 94 | padding-top: 0; } 95 | body > h1:first-child { 96 | margin-top: 0; 97 | padding-top: 0; } 98 | body > h1:first-child + h2 { 99 | margin-top: 0; 100 | padding-top: 0; } 101 | body > h3:first-child, body > h4:first-child, body > h5:first-child, body > h6:first-child { 102 | margin-top: 0; 103 | padding-top: 0; } 104 | 105 | a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 { 106 | margin-top: 0; 107 | padding-top: 0; } 108 | 109 | h1 p, h2 p, h3 p, h4 p, h5 p, h6 p { 110 | margin-top: 0; } 111 | 112 | li p.first { 113 | display: inline-block; } 114 | li { 115 | margin: 0; } 116 | ul, ol { 117 | padding-left: 30px; } 118 | 119 | ul :first-child, ol :first-child { 120 | margin-top: 0; } 121 | 122 | dl { 123 | padding: 0; } 124 | dl dt { 125 | font-size: 14px; 126 | font-weight: bold; 127 | font-style: italic; 128 | padding: 0; 129 | margin: 15px 0 5px; } 130 | dl dt:first-child { 131 | padding: 0; } 132 | dl dt > :first-child { 133 | margin-top: 0; } 134 | dl dt > :last-child { 135 | margin-bottom: 0; } 136 | dl dd { 137 | margin: 0 0 15px; 138 | padding: 0 15px; } 139 | dl dd > :first-child { 140 | margin-top: 0; } 141 | dl dd > :last-child { 142 | margin-bottom: 0; } 143 | 144 | blockquote { 145 | border-left: 4px solid #dddddd; 146 | padding: 0 15px; 147 | color: #777777; } 148 | blockquote > :first-child { 149 | margin-top: 0; } 150 | blockquote > :last-child { 151 | margin-bottom: 0; } 152 | 153 | table { 154 | padding: 0;border-collapse: collapse; } 155 | table tr { 156 | border-top: 1px solid #cccccc; 157 | background-color: white; 158 | margin: 0; 159 | padding: 0; } 160 | table tr:nth-child(2n) { 161 | background-color: #f8f8f8; } 162 | table tr th { 163 | font-weight: bold; 164 | border: 1px solid #cccccc; 165 | margin: 0; 166 | padding: 6px 13px; } 167 | table tr td { 168 | border: 1px solid #cccccc; 169 | margin: 0; 170 | padding: 6px 13px; } 171 | table tr th :first-child, table tr td :first-child { 172 | margin-top: 0; } 173 | table tr th :last-child, table tr td :last-child { 174 | margin-bottom: 0; } 175 | 176 | img { 177 | max-width: 100%; } 178 | 179 | span.frame { 180 | display: block; 181 | overflow: hidden; } 182 | span.frame > span { 183 | border: 1px solid #dddddd; 184 | display: block; 185 | float: left; 186 | overflow: hidden; 187 | margin: 13px 0 0; 188 | padding: 7px; 189 | width: auto; } 190 | span.frame span img { 191 | display: block; 192 | float: left; } 193 | span.frame span span { 194 | clear: both; 195 | color: #333333; 196 | display: block; 197 | padding: 5px 0 0; } 198 | span.align-center { 199 | display: block; 200 | overflow: hidden; 201 | clear: both; } 202 | span.align-center > span { 203 | display: block; 204 | overflow: hidden; 205 | margin: 13px auto 0; 206 | text-align: center; } 207 | span.align-center span img { 208 | margin: 0 auto; 209 | text-align: center; } 210 | span.align-right { 211 | display: block; 212 | overflow: hidden; 213 | clear: both; } 214 | span.align-right > span { 215 | display: block; 216 | overflow: hidden; 217 | margin: 13px 0 0; 218 | text-align: right; } 219 | span.align-right span img { 220 | margin: 0; 221 | text-align: right; } 222 | span.float-left { 223 | display: block; 224 | margin-right: 13px; 225 | overflow: hidden; 226 | float: left; } 227 | span.float-left span { 228 | margin: 13px 0 0; } 229 | span.float-right { 230 | display: block; 231 | margin-left: 13px; 232 | overflow: hidden; 233 | float: right; } 234 | span.float-right > span { 235 | display: block; 236 | overflow: hidden; 237 | margin: 13px auto 0; 238 | text-align: right; } 239 | 240 | code, tt { 241 | margin: 0 2px; 242 | padding: 0 5px; 243 | white-space: nowrap; 244 | border: 1px solid #eaeaea; 245 | background-color: #f8f8f8; 246 | border-radius: 3px; } 247 | 248 | pre code { 249 | margin: 0; 250 | padding: 0; 251 | white-space: pre; 252 | border: none; 253 | background: transparent; } 254 | 255 | .highlight pre { 256 | background-color: #f8f8f8; 257 | border: 1px solid #cccccc; 258 | font-size: 13px; 259 | line-height: 19px; 260 | overflow: auto; 261 | padding: 6px 10px; 262 | border-radius: 3px; } 263 | 264 | pre { 265 | background-color: #f8f8f8; 266 | border: 1px solid #cccccc; 267 | font-size: 13px; 268 | line-height: 19px; 269 | overflow: auto; 270 | padding: 6px 10px; 271 | border-radius: 3px; } 272 | pre code, pre tt { 273 | background-color: transparent; 274 | border: none; } 275 | 276 | sup { 277 | font-size: 0.83em; 278 | vertical-align: super; 279 | line-height: 0; 280 | } 281 | * { 282 | -webkit-print-color-adjust: exact; 283 | } 284 | @media screen and (min-width: 914px) { 285 | body { 286 | width: 854px; 287 | margin:0 auto; 288 | } 289 | } 290 | @media print { 291 | table, pre { 292 | page-break-inside: avoid; 293 | } 294 | pre { 295 | word-wrap: break-word; 296 | } 297 | } -------------------------------------------------------------------------------- /jd/templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 4 | awolfly9 blog 5 | {% endblock %} 6 | 7 | {% block style %} 8 | 9 | 10 | {% endblock %} 11 | 12 | 13 | {% block context %} 14 | {% include "left_nav.html" %} 15 | 16 |
17 |
18 |
    19 | {# TODO... 做一个炫酷的 404 #} 20 |

    你访问的页面不存在~

    21 |
22 |
23 |
24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /jd/templates/article.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 4 | {% if article %} 5 | {{ article.title }} 6 | {% endif %} 7 | {% endblock %} 8 | 9 | {% block style %} 10 | {# #} 11 | {# #} 12 | {# #} 13 | {# #} 14 | 15 | 16 | {% endblock %} 17 | 18 | {% block context %} 19 | {% if article %} 20 | 21 |
22 |

{{ article.title }} 23 |

24 | 25 |
26 | {{ article.content | safe }} 27 |
28 |
29 | {% endif %} 30 | {% endblock %} -------------------------------------------------------------------------------- /jd/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | {% block title %} 13 | awolfly9 blog 14 | {% endblock %} 15 | 16 | {% block style %} 17 | 18 | {% endblock %} 19 | 20 | 21 | 22 | {# 所有的博客标题 #} 23 | {% block context %} 24 | {% endblock %} 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 34 | 35 | 36 | 37 | {# 加载单独的脚本 #} 38 | {% block script %} 39 | {% endblock script %} 40 | 41 | -------------------------------------------------------------------------------- /jd/templates/custom_editor_page.html: -------------------------------------------------------------------------------- 1 | {#{% extends "admin/base_site.html" %}#} 2 | 3 | {% extends "admin/change_form.html" %} 4 | {% load i18n admin_urls static admin_modify %} 5 | 6 | {% block content %} 7 | 8 |
9 | {% block object-tools %} 10 | {% if change %}{% if not is_popup %} 11 |
    12 | {% block object-tools-items %} 13 |
  • 14 | {% url opts|admin_urlname:'history' original.pk|admin_urlquote as history_url %} 15 | {% trans "History" %} 16 |
  • 17 | {% if has_absolute_url %} 18 |
  • {% trans "View on site" %} 19 |
  • {% endif %} 20 | {% endblock %} 21 |
22 | {% endif %}{% endif %} 23 | {% endblock %} 24 |
25 | {% csrf_token %}{% block form_top %}{% endblock %} 26 |
27 | {% if is_popup %}{% endif %} 28 | {% if to_field %} 29 | {% endif %} 30 | {% if save_on_top %}{% block submit_buttons_top %}{% submit_row %}{% endblock %}{% endif %} 31 | {% if errors %} 32 |

33 | {% if errors|length == 1 %}{% trans "Please correct the error below." %}{% else %} 34 | {% trans "Please correct the errors below." %}{% endif %} 35 |

36 | {{ adminform.form.non_field_errors }} 37 | {% endif %} 38 | 39 | {% block field_sets %} 40 | {% for fieldset in adminform %} 41 | {% include "admin/includes/fieldset.html" %} 42 | {% endfor %} 43 | {% endblock %} 44 | 45 | {% if article %} 46 | 47 |
48 |

49 | {{ article.title }} 50 |

51 | 52 |
53 | {{ content | safe }} 54 |
55 |
56 | {% endif %} 57 | 58 | {% block after_field_sets %}{% endblock %} 59 | 60 | {% block inline_field_sets %} 61 | {% for inline_admin_formset in inline_admin_formsets %} 62 | {% include inline_admin_formset.opts.template %} 63 | {% endfor %} 64 | {% endblock %} 65 | 66 | {% block after_related_objects %}{% endblock %} 67 | 68 | {% block submit_buttons_bottom %}{% submit_row %}{% endblock %} 69 | 70 | {% block admin_change_form_document_ready %} 71 | 78 | {% endblock %} 79 | 80 | JavaScript for prepopulated fields {% prepopulated_fields_js %} 81 | 82 |
83 |
84 |
85 | 86 | {% endblock %} 87 | -------------------------------------------------------------------------------- /jd/templates/full_index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 4 | 京东商品评价分析-订阅式服务 5 | {% endblock %} 6 | 7 | 8 | {% block context %} 9 | {% csrf_token %} 10 | {% include "left_nav.html" %} 11 | 12 |
13 |
14 |
15 |
16 |

订阅式服务:请输入京东商城商品链接(URL)和您的邮箱地址,当抓取所有评价数据并且分析完成后,会通过邮件将分析结果发送到您的邮箱

17 | {# #} 18 | 19 | 20 | 21 |
22 |
23 |
    24 |
25 |
26 |
27 |
28 |
29 |
30 | {% endblock %} 31 | 32 | {% block script %} 33 | 34 | 60 | {% endblock %} -------------------------------------------------------------------------------- /jd/templates/full_result.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 4 | 京东商品评价分析-订阅式服务 5 | {% endblock %} 6 | 7 | 8 | {% block context %} 9 | {% csrf_token %} 10 | {% include "left_nav.html" %} 11 |
12 |
13 |
14 | {{ article.content | safe }} 15 |
16 |
17 |
18 | {% endblock %} 19 | 20 | -------------------------------------------------------------------------------- /jd/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 4 | awolfly9 blog 5 | {% endblock %} 6 | 7 | {% block style %} 8 | {# #} 9 | {# #} 10 | {% endblock %} 11 | 12 | {% block context %} 13 | {% include "left_nav.html" %} 14 | 15 |
16 |
17 |
    18 | {% if articles %} 19 | {% for article in articles %} 20 |
  • 21 | {{ article.title }} 22 |
    {{ article.abstract | safe }} 23 |
    24 |
  • 25 | {% endfor %} 26 | {% endif %} 27 |
28 |
29 |
30 | {% endblock %} -------------------------------------------------------------------------------- /jd/templates/jd_index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %} 4 | 京东评价信息数据分析 5 | {% endblock %} 6 | 7 | {% block context %} 8 | {% include "left_nav.html" %} 9 | {% csrf_token %} 10 | 11 |
12 |
13 |
14 |
15 |
16 |
17 | {#
#} 18 | {# #} 19 | {# #} {#
#} 20 | 21 | 22 |
23 |
24 |
    25 |
26 |
27 |
28 |
29 | 30 |
31 |
32 | {% endblock %} 33 | 34 | {% block script %} 35 | 36 | 118 | {% endblock %} -------------------------------------------------------------------------------- /jd/templates/left_nav.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block style %} 4 | 5 | 6 | {% endblock %} 7 | 8 | {% block context %} 9 |
10 |
11 |
12 |

awolfly9

13 |
14 | 20 |
21 |
22 |
23 |
24 |
25 | {% endblock %} 26 | 27 | {% block script %} 28 | {# 加载显示 gmail js 函数#} 29 | 30 | {# 加载粒子效果 #} 31 | 32 | 83 | {% endblock %} -------------------------------------------------------------------------------- /jd/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /jd/urls.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import views 4 | 5 | from django.conf.urls import url 6 | 7 | urlpatterns = [ 8 | url(r'^runspider$', views.runspider, name = 'runspider'), 9 | url(r'^randitem', views.randitem, name = 'randitem'), 10 | url(r'^analysis$', views.analysis, name = 'analysis'), 11 | url(r'^register_spider$', views.register_spider, name = 'register_spider'), 12 | url(r'^delete_spider$', views.delete_spider, name = 'delete_spider'), 13 | url(r'^full$', views.FullView.as_view(), name = 'full'), 14 | url(r'^full_comment', views.full_comment, name = 'full_comment'), 15 | url(r'^full_result/(?P.*)', views.AnalysisResultView.as_view(), name = 'result'), 16 | url(r'^$', views.IndexView.as_view(), name = 'index'), 17 | ] 18 | -------------------------------------------------------------------------------- /jd/views.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import json 4 | import logging 5 | import os 6 | import subprocess 7 | import re 8 | import uuid 9 | import datetime 10 | 11 | import markdown2 12 | import redis 13 | from django.db.models import Q 14 | 15 | import config 16 | import utils 17 | 18 | from django.dispatch import receiver 19 | from django.http import HttpResponse 20 | from django.shortcuts import render 21 | from django.conf import settings 22 | from django.views import View 23 | 24 | from models import JDCommentAnalysis 25 | from models import AnalysisUser 26 | from sqlhelper import SqlHelper 27 | from django.core.signals import request_finished 28 | 29 | red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db, 30 | password = config.redis_pass) 31 | 32 | 33 | # Create your views here. 34 | class IndexView(View): 35 | def get(self, request): 36 | logging.info('index view') 37 | return render(request, 'jd_index.html', context = {}) 38 | 39 | 40 | def runspider(request): 41 | data = { 42 | 'status': 'failure', 43 | 'guid': '0', 44 | 'info': '', 45 | } 46 | 47 | try: 48 | # 正式环境用 post 请求 49 | url = request.POST.get('url') 50 | force = request.POST.get('force', 'false') 51 | pattern = re.compile('\d+', re.S) 52 | product_id = re.search(pattern, url).group() 53 | sql = SqlHelper() 54 | 55 | utils.log('product_id:%s' % product_id) 56 | 57 | if 'item.jd.com' in url and product_id != None: 58 | data['status'] = 'success' 59 | data['guid'] = str(uuid.uuid4()) 60 | data['info'] = '成功接收数据,正在为您抓取并分析数据,精彩稍候呈现', 61 | 62 | command = "SELECT id FROM {table} WHERE id={product_id}". \ 63 | format(table = config.jd_item_table, product_id = product_id) 64 | result = sql.query_one(command) 65 | 66 | if result == None: 67 | name = 'jd' 68 | cmd = 'cd {dir};python manage.py real_time_analysis -a name={name} -a guid={guid} ' \ 69 | '-a product_id={product_id} -a url={url};'. \ 70 | format(url = str(url), name = name, dir = settings.BASE_DIR, guid = data.get('guid'), 71 | product_id = product_id) 72 | 73 | subprocess.Popen(cmd, shell = True) 74 | else: 75 | if force == 'false': 76 | utils.log('数据库中存在数据,从数据库中取出分析结果') 77 | command = "SELECT * FROM {0} WHERE product_id={1} ORDER BY id". \ 78 | format(config.analysis_item_table, product_id) 79 | result = sql.query(command) 80 | for res in result: 81 | utils.push_redis(data.get('guid'), res[1], res[2], res[3], save_to_mysql = False) 82 | else: 83 | command = "DELETE FROM {0} WHERE produce_id={1}".format(config.analysis_item_table, product_id) 84 | sql.execute(command) 85 | #重新分析数据 86 | cmd = 'cd {dir};python manage.py analysis -a url={url} -a name={name} -a guid={guid} -a ' \ 87 | 'product_id={product_id};'. \ 88 | format(url = url, name = 'jd', dir = settings.BASE_DIR, guid = data.get('guid'), 89 | product_id = product_id) 90 | 91 | subprocess.Popen(cmd, shell = True) 92 | else: 93 | data['info'] = '传入网址有误,请检查后重新输入,请输入以下格式的网址:\n%s' % 'https://item.jd.com/3995645.html' 94 | except Exception, e: 95 | logging.error('run spider exception:%s' % e) 96 | data['info'] = '出现错误,错误原因:%s' % e 97 | 98 | response = HttpResponse(json.dumps(data), content_type = "application/json") 99 | response.set_cookie('status', data.get('status')) 100 | response.set_cookie('guid', data.get('guid')) 101 | return response 102 | 103 | 104 | def randitem(request): 105 | data = { 106 | 'status': 'failure', 107 | 'guid': '0', 108 | 'info': '', 109 | } 110 | try: 111 | is_rand = request.POST.get('rand') 112 | if is_rand == 'true': 113 | data['status'] = 'success' 114 | data['guid'] = str(uuid.uuid4()) 115 | data['info'] = '成功接收数据,正在为您抓取并分析数据,精彩稍候呈现' 116 | 117 | cmd = 'cd {dir};python manage.py rand_item_analysis -a name={name} -a guid={guid}'. \ 118 | format(dir = settings.BASE_DIR, name = 'jd', guid = data.get('guid')) 119 | subprocess.Popen(cmd, shell = True) 120 | else: 121 | data['info'] = '传入参数有误' 122 | except Exception, e: 123 | logging.error('rand item exception:%s' % e) 124 | data['info'] = '出现错误,错误原因:%s' % e 125 | 126 | response = HttpResponse(json.dumps(data), content_type = "application/json") 127 | response.set_cookie('status', data.get('status')) 128 | response.set_cookie('guid', data.get('guid')) 129 | return response 130 | 131 | 132 | def analysis(request): 133 | data = { 134 | 'status': 'failure' 135 | } 136 | 137 | try: 138 | status = request.COOKIES.get('status', '') 139 | guid = request.COOKIES.get('guid', '0') 140 | if status == 'success' and guid != '0': 141 | msg = red.lpop(guid) 142 | if msg != None: 143 | data = json.loads(msg) 144 | data['status'] = status 145 | utils.log('info:%s' % data.get('info')) 146 | response = HttpResponse(json.dumps(data), content_type = "application/json") 147 | return response 148 | except Exception, e: 149 | logging.error('analysis data exception:%s' % e) 150 | 151 | response = HttpResponse(json.dumps(data), content_type = "application/json") 152 | return response 153 | 154 | 155 | def register_spider(request): 156 | data = {} 157 | try: 158 | guid = str(uuid.uuid4()) 159 | data['guid'] = guid 160 | 161 | red.lpush('spiders', guid) 162 | except Exception, e: 163 | logging.error('register_spider exception:%s' % e) 164 | 165 | response = HttpResponse(json.dumps(data), content_type = "application/json") 166 | return response 167 | 168 | 169 | def delete_spider(request): 170 | data = { 171 | 'result': False 172 | } 173 | try: 174 | guid = request.GET.get('guid', -1) 175 | print('guid:%s' % guid) 176 | if guid != -1 and guid != None: 177 | red.delete(guid) 178 | red.lrem('spiders', 1, guid) 179 | data['result'] = True 180 | except Exception, e: 181 | logging.error('analysis data exception:%s' % e) 182 | 183 | response = HttpResponse(json.dumps(data), content_type = "application/json") 184 | return response 185 | 186 | 187 | # 188 | # @receiver(request_finished) 189 | # def my_callback(sender, **kwargs): 190 | # print("Request finished!") 191 | # 192 | 193 | 194 | class FullView(View): 195 | def get(self, request): 196 | return render(request, 'full_index.html', context = {}) 197 | 198 | 199 | def full_comment(request): 200 | data = { 201 | 'status': 'failure', 202 | 'guid': str(uuid.uuid4()), 203 | 'info': '', 204 | } 205 | 206 | try: 207 | if request.method == 'POST': 208 | url = request.POST.get('url') 209 | email = request.POST.get('email') 210 | 211 | # 检查 url 和 email 符合规范 212 | if url == None or 'item.jd.com' not in url: 213 | data['info'] = 'URL 格式不正确,请重新输入' 214 | elif email == None or email == '' or '@' not in email: 215 | data['info'] = '邮箱格式不正确,请重新输入' 216 | else: 217 | pattern = re.compile('\d+', re.S) 218 | product_id = re.search(pattern, url).group() 219 | 220 | if 'item.jd.com' in url and product_id != None: 221 | user = { 222 | 'url': url, 223 | 'product_id': product_id, 224 | 'email': email, 225 | 'guid': data.get('guid') 226 | } 227 | red.rpush('analysis_users', json.dumps(user)) 228 | 229 | user = AnalysisUser(id = None, url = url, email = email, product_id = product_id, 230 | guid = data.get('guid'), 231 | ip = utils.get_visiter_ip(request), create_time = datetime.datetime.now()) 232 | user.save() 233 | 234 | data['status'] = 'success' 235 | data['info'] = '已经收到信息,正在开始分析' 236 | else: 237 | data['info'] = '输入参数不符合规范,请重新输入' 238 | except Exception, e: 239 | data['info'] = '出现错误:%s' % e 240 | 241 | response = HttpResponse(json.dumps(data), content_type = "application/json") 242 | response.set_cookie('status', data.get('status')) 243 | response.set_cookie('guid', data.get('guid')) 244 | return response 245 | 246 | 247 | class AnalysisResultView(View): 248 | def get(self, request, param): 249 | print('path:%s param:%s' % (request.path, param)) 250 | try: 251 | article = JDCommentAnalysis.objects.filter(Q(guid__iexact = param) | Q(product_id__iexact = param)).first() 252 | article.content = markdown2.markdown(text = article.content, extras = { 253 | 'tables': True, 254 | 'wiki-tables': True, 255 | 'fenced-code-blocks': True, 256 | }) 257 | 258 | context = { 259 | 'article': article 260 | } 261 | 262 | return render(request, 'full_result.html', context = context) 263 | except: 264 | return render(request, '404.html') 265 | -------------------------------------------------------------------------------- /jd_analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/jd_analysis/__init__.py -------------------------------------------------------------------------------- /jd_analysis/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for jd_analysis project. 3 | 4 | Generated by 'django-admin startproject' using Django 1.11. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/1.11/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | # Quick-start development settings - unsuitable for production 19 | # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ 20 | 21 | # SECURITY WARNING: keep the secret key used in production secret! 22 | SECRET_KEY = '!=6ve76(w#_0r_b8ic#mo-*s2#xf^*&(lfn1$#c=0-u=icl(8@' 23 | 24 | # SECURITY WARNING: don't run with debug turned on in production! 25 | DEBUG = True 26 | 27 | ALLOWED_HOSTS = [] 28 | 29 | # Application definition 30 | 31 | INSTALLED_APPS = [ 32 | 'django.contrib.admin', 33 | 'django.contrib.auth', 34 | 'django.contrib.contenttypes', 35 | 'django.contrib.sessions', 36 | 'django.contrib.messages', 37 | 'django.contrib.staticfiles', 38 | 'django_crontab', 39 | 'jd', 40 | ] 41 | 42 | MIDDLEWARE = [ 43 | 'django.middleware.security.SecurityMiddleware', 44 | 'django.contrib.sessions.middleware.SessionMiddleware', 45 | 'django.middleware.common.CommonMiddleware', 46 | 'django.middleware.csrf.CsrfViewMiddleware', 47 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 48 | 'django.contrib.messages.middleware.MessageMiddleware', 49 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 50 | ] 51 | 52 | ROOT_URLCONF = 'jd_analysis.urls' 53 | 54 | TEMPLATES = [ 55 | { 56 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 57 | 'DIRS': [], 58 | 'APP_DIRS': True, 59 | 'OPTIONS': { 60 | 'context_processors': [ 61 | 'django.template.context_processors.debug', 62 | 'django.template.context_processors.request', 63 | 'django.contrib.auth.context_processors.auth', 64 | 'django.contrib.messages.context_processors.messages', 65 | ], 66 | }, 67 | }, 68 | ] 69 | 70 | WSGI_APPLICATION = 'jd_analysis.wsgi.application' 71 | 72 | # Database 73 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases 74 | 75 | DATABASES = { 76 | 'default': { 77 | 'ENGINE': 'django.db.backends.mysql', 78 | 'NAME': 'jd_analysis', 79 | 'USER': 'root', 80 | 'PASSWORD': '123456', 81 | 'HOST': '', 82 | 'PORT': '', 83 | } 84 | } 85 | 86 | # Password validation 87 | # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators 88 | 89 | AUTH_PASSWORD_VALIDATORS = [ 90 | { 91 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 92 | }, 93 | { 94 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 95 | }, 96 | { 97 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 98 | }, 99 | { 100 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 101 | }, 102 | ] 103 | 104 | CRONJOBS = [ 105 | ('*/1 */1 * * *', 'django.core.management.call_command', ['full_analysis'], {}, 106 | '>> ' + BASE_DIR + '/log/full_analysis.log') 107 | ] 108 | 109 | # Internationalization 110 | # https://docs.djangoproject.com/en/1.11/topics/i18n/ 111 | 112 | LANGUAGE_CODE = 'en-us' 113 | 114 | TIME_ZONE = 'Asia/Shanghai' 115 | 116 | USE_I18N = True 117 | 118 | USE_L10N = True 119 | 120 | USE_TZ = True 121 | 122 | # Static files (CSS, JavaScript, Images) 123 | # https://docs.djangoproject.com/en/1.11/howto/static-files/ 124 | 125 | STATIC_URL = '/static/' 126 | STATIC_ROOT = os.path.join(BASE_DIR, 'static/') -------------------------------------------------------------------------------- /jd_analysis/urls.py: -------------------------------------------------------------------------------- 1 | """jd_analysis URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.11/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url, include 17 | from django.contrib import admin 18 | 19 | urlpatterns = [ 20 | url(r'^admin/', admin.site.urls), 21 | url(r'^jd/', include('jd.urls'), name = 'jd'), 22 | ] 23 | -------------------------------------------------------------------------------- /jd_analysis/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for jd_analysis project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jd_analysis.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /killport.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import re 4 | import subprocess 5 | import time 6 | 7 | 8 | # 服务器使用,清理端口占用 9 | import sys 10 | 11 | 12 | def kill_ports(ports): 13 | for port in ports: 14 | print('kill %s start' % port) 15 | popen = subprocess.Popen('lsof -i:%s' % port, shell = True, stdout = subprocess.PIPE) 16 | (data, err) = popen.communicate() 17 | print('data:\n%s \nerr:\n%s' % (data, err)) 18 | 19 | pattern = re.compile(r'\b\d+\b', re.S) 20 | pids = re.findall(pattern, data) 21 | 22 | print('pids:%s' % str(pids)) 23 | 24 | for pid in pids: 25 | if pid != '' and pid != None: 26 | try: 27 | print('pid:%s' % pid) 28 | popen = subprocess.Popen('kill -9 %s' % pid, shell = True, stdout = subprocess.PIPE) 29 | (data, err) = popen.communicate() 30 | print('data:\n%s \nerr:\n%s' % (data, err)) 31 | except Exception, e: 32 | print('kill_ports exception:%s' % e) 33 | 34 | print('kill %s finish' % port) 35 | 36 | time.sleep(1) 37 | 38 | 39 | if __name__ == '__main__': 40 | ports = sys.argv 41 | kill_ports(ports = ports) 42 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jd_analysis.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError: 10 | # The above import may fail for some other reason. Ensure that the 11 | # issue is really that Django is missing to avoid masking other 12 | # exceptions on Python 2. 13 | try: 14 | import django 15 | except ImportError: 16 | raise ImportError( 17 | "Couldn't import Django. Are you sure it's installed and " 18 | "available on your PYTHONPATH environment variable? Did you " 19 | "forget to activate a virtual environment?" 20 | ) 21 | raise 22 | execute_from_command_line(sys.argv) 23 | -------------------------------------------------------------------------------- /media/10267425905_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/10267425905_time.png -------------------------------------------------------------------------------- /media/10866679001_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/10866679001_province.png -------------------------------------------------------------------------------- /media/11374491518_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/11374491518_size.png -------------------------------------------------------------------------------- /media/11430756607_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/11430756607_channel.png -------------------------------------------------------------------------------- /media/11718220593_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/11718220593_color.png -------------------------------------------------------------------------------- /media/11718220593_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/11718220593_good.png -------------------------------------------------------------------------------- /media/11718220593_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/11718220593_size.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3133851_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3133851_channel.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3133851_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3133851_color.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3133851_general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3133851_general.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3133851_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3133851_good.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3133851_poor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3133851_poor.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3133851_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3133851_province.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3133851_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3133851_time.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3995645_channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3995645_channel.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3995645_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3995645_color.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3995645_general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3995645_general.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3995645_good.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3995645_good.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3995645_poor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3995645_poor.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3995645_province.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3995645_province.png -------------------------------------------------------------------------------- /media/jd_analysis_show_image/3995645_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/jd_analysis_show_image/3995645_time.png -------------------------------------------------------------------------------- /media/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/mask.png -------------------------------------------------------------------------------- /media/weixin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awolfly9/jd_analysis/c96b54fbe182dd98747e8e6ee8fe0a7e07ea8d71/media/weixin.png -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = jd.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = jd 12 | -------------------------------------------------------------------------------- /sqlhelper.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import logging 4 | import pymysql 5 | import utils 6 | import config 7 | 8 | 9 | class SqlHelper(object): 10 | def __init__(self): 11 | self.conn = pymysql.connect(**config.database_config) 12 | self.cursor = self.conn.cursor() 13 | 14 | try: 15 | self.conn.select_db(config.database) 16 | except: 17 | self.create_database(config.database) 18 | self.conn.select_db(config.database) 19 | self.init() 20 | 21 | def init(self): 22 | # 创建商品抓取记录表 23 | command = ( 24 | "CREATE TABLE IF NOT EXISTS {} (" 25 | "`id` BIGINT (15) NOT NULL AUTO_INCREMENT," # 商品 id 26 | "`name` CHAR(200) NOT NULL," # 商品名称 27 | "`average_score` INT(2) DEFAULT NULL," # 综合评分星级 28 | "`good_count` INT(7) DEFAULT NULL ," # 好评数量 29 | "`good_rate` FLOAT DEFAULT NULL," # 好评的比例 30 | "`general_count` INT(4) DEFAULT NULL," # 中评数量 31 | "`general_rate` FLOAT DEFAULT NULL," # 中评比例 32 | "`poor_count` INT(4) DEFAULT NULL," # 差评数量 33 | '`poor_rate` FLOAT DEFAULT NULL,' # 差评比例 34 | '`after_count` INT(5) DEFAULT NULL,' # 追评数量 35 | '`good_rate_style` INT(7) DEFAULT NULL,' # 36 | "`poor_rate_style` INT(5) DEFAULT NULL," # 37 | "`general_rate_style` INT(5) DEFAULT NULL," # 38 | "`comment_count` INT(7) DEFAULT NULL," # 总共评论数量 39 | "`product_id` BIGINT(15) DEFAULT NULL," # 商品 id 40 | "`good_rate_show` INT(3) DEFAULT NULL," # 显示的好评百分比 41 | "`poor_rate_show` INT(3) DEFAULT NULL," # 显示的差评百分比 42 | "`general_rate_show` INT(7) DEFAULT NULL," # 显示中评的百分比 43 | "`url` TEXT NOT NULL," # 网站 44 | "`item_ids` TEXT DEFAULT NULL," # 同一个商品的多个 ids 45 | "`save_time` TIMESTAMP NOT NULL," # 抓取数据的时间 46 | "PRIMARY KEY(id)" 47 | ") ENGINE=InnoDB".format(config.jd_item_table)) 48 | self.create_table(command) 49 | 50 | # 创建分析商品评论结果表 51 | command = ( 52 | "CREATE TABLE IF NOT EXISTS {} (" 53 | "`id` INT(5) NOT NULL AUTO_INCREMENT," # 自增 id 54 | "`product_id` BIGINT(15) DEFAULT NULL ," # 商品 id 55 | "`info` CHAR(255) DEFAULT NULL," # 分析结果的信息 56 | "`type` CHAR(10) DEFAULT NULL," # 分析结果类型 57 | "`guid` CHAR(40) NOT NULL," # guid 58 | "`save_time` TIMESTAMP NOT NULL," # 分析数据的时间 59 | "PRIMARY KEY(id)" 60 | ") ENGINE=InnoDB".format(config.analysis_item_table)) 61 | self.create_table(command) 62 | 63 | def create_database(self, database_name): 64 | try: 65 | command = 'CREATE DATABASE IF NOT EXISTS %s DEFAULT CHARACTER SET \'utf8\' ' % database_name 66 | # utils.log('sql helper create_database command:%s' % command) 67 | self.cursor.execute(command) 68 | except Exception, e: 69 | utils.log('sql helper create_database exception:%s' % str(e), logging.WARNING) 70 | 71 | def create_table(self, command): 72 | try: 73 | # utils.log('sql helper create_table command:%s' % command) 74 | self.cursor.execute(command) 75 | self.conn.commit() 76 | except Exception, e: 77 | utils.log('sql helper create_table exception:%s' % str(e), logging.WARNING) 78 | 79 | def insert_data(self, command, data, commit = False): 80 | try: 81 | # utils.log('insert_data command:%s, data:%s' % (command, data)) 82 | 83 | self.cursor.execute(command, data) 84 | if commit: 85 | self.conn.commit() 86 | except Exception, e: 87 | utils.log('sql helper insert_data exception msg:%s' % e, logging.WARNING) 88 | 89 | def insert_json(self, data = {}, table_name = None, commit = False): 90 | try: 91 | keys = [] 92 | vals = [] 93 | for k, v in data.items(): 94 | keys.append(k) 95 | vals.append(v) 96 | val_str = ','.join(['%s'] * len(vals)) 97 | key_str = ','.join(keys) 98 | 99 | command = "INSERT IGNORE INTO {table} ({keys}) VALUES({values})". \ 100 | format(keys = key_str, values = val_str, table = table_name) 101 | # utils.log('insert_json command:%s' % command) 102 | self.cursor.execute(command, tuple(vals)) 103 | 104 | if commit: 105 | self.conn.commit() 106 | except Exception, e: 107 | utils.log('sql helper insert_json exception msg:%s' % e, logging.WARNING) 108 | 109 | def commit(self): 110 | self.conn.commit() 111 | 112 | def execute(self, command, commit = True): 113 | try: 114 | # utils.log('sql helper execute command:%s' % command) 115 | data = self.cursor.execute(command) 116 | self.conn.commit() 117 | return data 118 | except Exception, e: 119 | utils.log('sql helper execute exception msg:%s' % str(e)) 120 | return None 121 | 122 | def is_exists(self, table_name): 123 | try: 124 | command = "SHOW TABLES LIKE '%s'" % table_name 125 | utils.log('sql helper is_exists command:%s' % command) 126 | data = self.cursor.execute(command) 127 | return True if data == 1 else False 128 | except Exception, e: 129 | logging.exception('sql helper is_exists exception msg:%s' % e) 130 | 131 | def query(self, command, commit = False, cursor_type = 'tuple'): 132 | try: 133 | utils.log('sql helper execute command:%s' % command) 134 | 135 | cursor = None 136 | if cursor_type == 'dict': 137 | cursor = self.conn.cursor(pymysql.cursors.DictCursor) 138 | else: 139 | cursor = self.cursor 140 | 141 | cursor.execute(command) 142 | data = cursor.fetchall() 143 | if commit: 144 | self.conn.commit() 145 | return data 146 | except Exception, e: 147 | utils.log('sql helper execute exception msg:%s' % str(e)) 148 | return None 149 | 150 | def query_one(self, command, commit = False, cursor_type = 'tuple'): 151 | try: 152 | utils.log('sql helper execute command:%s' % command) 153 | 154 | cursor = None 155 | if cursor_type == 'dict': 156 | cursor = self.conn.cursor(pymysql.cursors.DictCursor) 157 | else: 158 | cursor = self.cursor 159 | 160 | cursor.execute(command) 161 | data = cursor.fetchone() 162 | if commit: 163 | self.conn.commit() 164 | return data 165 | except Exception, e: 166 | utils.log('sql helper execute exception msg:%s' % str(e)) 167 | return None 168 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import json 4 | import logging 5 | import os 6 | import re 7 | import subprocess 8 | import traceback 9 | import time 10 | import datetime 11 | import redis 12 | 13 | import config 14 | from jd_analysis import settings 15 | from sqlhelper import SqlHelper 16 | 17 | 18 | # 自定义的日志输出 19 | def log(msg, level = logging.DEBUG): 20 | logging.log(level, msg) 21 | print('%s [%s], msg:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), level, msg)) 22 | 23 | # if level == logging.WARNING or level == logging.ERROR: 24 | # for line in traceback.format_stack(): 25 | # print(line.strip()) 26 | # 27 | # for line in traceback.format_stack(): 28 | # logging.log(level, line.strip()) 29 | 30 | 31 | # 服务器使用,清理端口占用 32 | def kill_ports(ports): 33 | for port in ports: 34 | log('kill %s start' % port) 35 | popen = subprocess.Popen('lsof -i:%s' % port, shell = True, stdout = subprocess.PIPE) 36 | (data, err) = popen.communicate() 37 | log('data:\n%s \nerr:\n%s' % (data, err)) 38 | 39 | pattern = re.compile(r'\b\d+\b', re.S) 40 | pids = re.findall(pattern, data) 41 | 42 | log('pids:%s' % str(pids)) 43 | 44 | for pid in pids: 45 | if pid != '' and pid != None: 46 | try: 47 | log('pid:%s' % pid) 48 | popen = subprocess.Popen('kill -9 %s' % pid, shell = True, stdout = subprocess.PIPE) 49 | (data, err) = popen.communicate() 50 | log('data:\n%s \nerr:\n%s' % (data, err)) 51 | except Exception, e: 52 | log('kill_ports exception:%s' % e) 53 | 54 | log('kill %s finish' % port) 55 | 56 | time.sleep(1) 57 | 58 | 59 | # 创建文件夹 60 | def make_dir(dir): 61 | log('make dir:%s' % dir) 62 | if not os.path.exists(dir): 63 | os.makedirs(dir) 64 | 65 | 66 | def arglist_to_dict(arglist): 67 | """Convert a list of arguments like ['arg1=val1', 'arg2=val2', ...] to a 68 | dict 69 | """ 70 | return dict(x.split('=', 1) for x in arglist) 71 | 72 | 73 | def get_visiter_ip(request): 74 | if request.META.has_key('HTTP_X_FORWARDED_FOR'): 75 | ip = request.META['HTTP_X_FORWARDED_FOR'] 76 | else: 77 | ip = request.META['REMOTE_ADDR'] 78 | 79 | return ip 80 | 81 | 82 | def get_save_image_path(): 83 | if settings.DEBUG == False: 84 | return '%s/media/images' % settings.BASE_DIR 85 | else: 86 | return '%s/jd/static/images' % settings.BASE_DIR 87 | 88 | 89 | def get_image_src(filename): 90 | if settings.DEBUG == False: 91 | result = '![](/media/images/%s)' % filename 92 | else: 93 | result = '![](/static/images/%s)' % filename 94 | 95 | return result 96 | 97 | 98 | red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db, 99 | password = config.redis_pass) 100 | sql = SqlHelper() 101 | 102 | 103 | def push_redis(guid, product_id, info, type = 'word', save_to_mysql = True): 104 | data = { 105 | 'id': None, 106 | 'product_id': product_id, 107 | 'info': info, 108 | 'type': type, 109 | 'guid': guid, 110 | 'save_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 111 | } 112 | 113 | if save_to_mysql: 114 | sql.insert_json(data, config.analysis_item_table, commit = True) 115 | 116 | red.rpush(guid, json.dumps(data)) 117 | 118 | 119 | def create_analysis_table(product_id): 120 | # 创建分析商品评论结果表 121 | command = ( 122 | "CREATE TABLE IF NOT EXISTS {} (" 123 | "`id` INT(5) NOT NULL AUTO_INCREMENT," # 自增 id 124 | "`product_id` BIGINT(15) DEFAULT NULL ," # 商品 id 125 | "`info` CHAR(255) DEFAULT NULL," # 分析结果的信息 126 | "`type` CHAR(10) DEFAULT NULL," # 分析结果类型 127 | "`guid` CHAR(40) NOT NULL," # guid 128 | "`save_time` TIMESTAMP NOT NULL," # 分析数据的时间 129 | "PRIMARY KEY(id)" 130 | ") ENGINE=InnoDB".format(config.analysis_item_table + '_' + product_id)) 131 | sql.create_table(command) 132 | --------------------------------------------------------------------------------