├── .gitignore
├── LICENSE
├── README.md
├── comment_analysis
├── .idea
│ ├── comment_analysis.iml
│ ├── dataSources.local.xml
│ ├── dataSources.xml
│ ├── dataSources
│ │ └── 45308517-00ac-4b16-8c84-9590de05cab2.xml
│ ├── dictionaries
│ │ └── ASUS.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── analysis.py
├── dict
│ ├── adverb_dict.txt
│ ├── conjunction_dict.txt
│ ├── denial_dict.txt
│ ├── negative_dict.txt
│ ├── phrase_dict.txt
│ ├── positive_dict.txt
│ ├── punctuation_dict.txt
│ └── user_dict.txt
├── judge_polarity.py
└── log.txt
├── comment_spider
├── .idea
│ ├── comment_spider.iml
│ ├── dataSources.local.xml
│ ├── dataSources.xml
│ ├── dataSources
│ │ └── 56b1a419-7a84-4661-a69b-42b7df13cf8b.xml
│ ├── dictionaries
│ │ └── ASUS.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── comment_spider
│ ├── __init__.py
│ ├── items.py
│ ├── middlewares.py
│ ├── pipelines.py
│ ├── settings.py
│ └── spiders
│ │ ├── __init__.py
│ │ └── commentspider.py
├── run_spider.py
├── scrapy.cfg
└── tools
│ ├── jieba_content.py
│ ├── sql_tools.py
│ ├── stop_words.txt
│ └── user_words.txt
├── computer_analysis
├── .idea
│ ├── computer_analysis.iml
│ ├── dataSources.local.xml
│ ├── dataSources.xml
│ ├── dataSources
│ │ └── 6ce9a5dc-7a40-4f48-a105-d36e592d5e6e.xml
│ ├── dictionaries
│ │ └── ASUS.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── api
│ ├── __init__.py
│ ├── admin.py
│ ├── apps.py
│ ├── migrations
│ │ ├── 0001_initial.py
│ │ └── __init__.py
│ ├── models.py
│ ├── search_indexes.py
│ ├── tests.py
│ └── views.py
├── computer_analysis
│ ├── __init__.py
│ ├── settings.py
│ ├── urls.py
│ └── wsgi.py
├── manage.py
├── static
│ ├── bar
│ │ ├── 100002368328.svg
│ │ ├── 11528184498.svg
│ │ ├── 11547179910.svg
│ │ ├── 15019918741.svg
│ │ ├── 20167878769.svg
│ │ ├── 3714545.svg
│ │ ├── 39987003288.svg
│ │ └── 5520838.svg
│ ├── jieba_top10_bar
│ │ ├── 100002368328.svg
│ │ ├── 11528184498.svg
│ │ ├── 11547179910.svg
│ │ ├── 15019918741.svg
│ │ ├── 20167878769.svg
│ │ ├── 3714545.svg
│ │ ├── 39987003288.svg
│ │ └── 5520838.svg
│ ├── pie
│ │ ├── 100002368328.svg
│ │ ├── 11528184498.svg
│ │ ├── 11547179910.svg
│ │ ├── 15019918741.svg
│ │ ├── 20167878769.svg
│ │ ├── 3714545.svg
│ │ ├── 39987003288.svg
│ │ └── 5520838.svg
│ └── wordcloud
│ │ ├── 100002368328.png
│ │ ├── 11528184498.png
│ │ ├── 11547179910.png
│ │ ├── 15019918741.png
│ │ ├── 20167878769.png
│ │ ├── 3714545.png
│ │ ├── 39987003288.png
│ │ └── 5520838.png
├── templates
│ └── search
│ │ └── indexes
│ │ └── api
│ │ └── computer_text.txt
├── tools
│ ├── STXINGKA.TTF
│ ├── decorator.py
│ ├── orm2json.py
│ ├── pygal_process.py
│ └── searchresult2json.py
└── whoosh_index
│ ├── MAIN_6vchd7acq93n4dv5.seg
│ ├── MAIN_WRITELOCK
│ ├── MAIN_agm2z7e75evl86bh.seg
│ └── _MAIN_2.toc
├── computer_spider
├── .idea
│ ├── computer_spider.iml
│ ├── dataSources.local.xml
│ ├── dataSources.xml
│ ├── dataSources
│ │ └── 42a1e165-b6b3-4fce-94e9-b34a7f891498.xml
│ ├── dictionaries
│ │ └── ASUS.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── computer_spider
│ ├── __init__.py
│ ├── items.py
│ ├── middlewares.py
│ ├── pipelines.py
│ ├── settings.py
│ └── spiders
│ │ ├── __init__.py
│ │ └── cpt_spider.py
├── run_spider.py
└── scrapy.cfg
└── html
├── .idea
├── dictionaries
│ └── ASUS.xml
├── html.iml
├── misc.xml
├── modules.xml
└── workspace.xml
├── css
├── bootstrap-theme.css
├── bootstrap-theme.css.map
├── bootstrap-theme.min.css
├── bootstrap-theme.min.css.map
├── bootstrap.css
├── bootstrap.css.map
├── bootstrap.min.css
├── bootstrap.min.css.map
└── detail.css
├── detail.html
├── fonts
├── glyphicons-halflings-regular.eot
├── glyphicons-halflings-regular.svg
├── glyphicons-halflings-regular.ttf
├── glyphicons-halflings-regular.woff
└── glyphicons-halflings-regular.woff2
├── index.html
└── js
├── bootstrap.js
├── bootstrap.min.js
├── detail.js
├── index.js
├── jquery-3.3.1.min.js
└── npm.js
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # jd-spider
2 | 毕业设计京东商品评论爬虫分析
3 | ###
4 | 京东商品评论爬虫,并以图云的形式展示
5 | 后台数据抛出采用了Django框架进行数据抛出
6 | 前台采用同ajax数据请求
7 | 需要数据库请与我联系QQ614303219
8 |
--------------------------------------------------------------------------------
/comment_analysis/.idea/comment_analysis.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/comment_analysis/.idea/dataSources.local.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #@
7 | `
8 |
9 |
10 | master_key
11 | root
12 | *:jd_computer
13 |
14 |
15 |
--------------------------------------------------------------------------------
/comment_analysis/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mysql
6 | true
7 | com.mysql.jdbc.Driver
8 | jdbc:mysql://localhost:3309/jd_computer
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/comment_analysis/.idea/dictionaries/ASUS.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/comment_analysis/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/comment_analysis/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/comment_analysis/analysis.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: comment_analysis
6 | @time: 2019/4/2 21:00
7 | '''
8 |
9 | import pymysql
10 | from judge_polarity import DictClassifier
11 |
12 | class Analysis(object):
13 | def __init__(self):
14 | self.connect = None
15 | self.cursor = None
16 | self.score = 0
17 | self.total_score = 0
18 |
19 | def connect_sql(self):
20 | """
21 | 连接数据库
22 | :return:
23 | """
24 | self.connect = pymysql.connect(
25 | host='localhost',
26 | port=3309,
27 | user='root',
28 | password='123456',
29 | db='jd_computer'
30 | )
31 | self.cursor = self.connect.cursor()
32 |
33 | def close_sql(self):
34 | """
35 | 关闭数据库
36 | :return:
37 | """
38 | self.connect.commit()
39 | self.connect.close()
40 | self.cursor.close()
41 |
42 | def get_goods_id(self):
43 | """
44 | 获取computer表中的computer_id
45 | :return:
46 | """
47 | self.connect_sql()
48 | sql = 'SELECT computer_id FROM computer'
49 | self.cursor.execute(sql)
50 | results = self.cursor.fetchall()
51 | self.close_sql()
52 | for goods_id in results:
53 | yield goods_id[0]
54 |
55 | def select_comment(self, computer_id):
56 | """
57 | 查询评论表内所有computer_id的评论
58 | :param good_id:
59 | :return:
60 | """
61 | self.connect_sql()
62 | sql = f'SELECT content FROM computer_comment WHERE computer_id={computer_id}'
63 | self.cursor.execute(sql)
64 | # 取出goods_id的content结果集
65 | content_data = self.cursor.fetchall()
66 | # content数大于99条的进行数据分析
67 | # print(content_data)
68 | if len(content_data) > 99:
69 | self.close_sql()
70 | return content_data
71 |
72 | else:
73 | print(f'------{computer_id}评论不足,仅有{len(content_data)}------')
74 | del_tags_sql = f'DELETE FROM computer_tag WHERE computer_id={computer_id}'
75 | self.cursor.execute(del_tags_sql)
76 | print('删除便签成功')
77 | del_comment_sql = f'DELETE FROM computer_comment WHERE computer_id={computer_id}'
78 | self.cursor.execute(del_comment_sql)
79 | print('删除评论成功')
80 | del_computer_sql = f'DELETE FROM computer WHERE computer_id={computer_id}'
81 | self.cursor.execute(del_computer_sql)
82 | print(f'---------删除笔记本{computer_id}所有信息成功。---------')
83 | self.close_sql()
84 | return None
85 | # 小于则放弃分析
86 | # else:
87 | # self.close_sql()
88 | # return None
89 |
90 |
91 | def analysis_content(self, content):
92 | """
93 | 分析评论获取评论情感好评率
94 | :param content:
95 | :return:
96 | """
97 | obj = DictClassifier()
98 | result = obj.analyse_sentence(content)
99 | print(f'{result}: {content}')
100 | self.total_score += result
101 |
102 | def save_result(self, goods_id, good_rate):
103 | """
104 | :param goods_id:
105 | :return:
106 | """
107 | print(good_rate)
108 | if int(good_rate) == 1:
109 | good_rate = 0.99
110 | self.connect_sql()
111 | print(f'{goods_id}评论情感分析好评率:{good_rate}')
112 | sql = f'UPDATE computer SET good_rate={good_rate} WHERE computer_id={goods_id}'
113 | self.cursor.execute(sql)
114 | self.close_sql()
115 |
116 |
117 | if __name__ == '__main__':
118 | obj = Analysis()
119 | for good_id in obj.get_goods_id():
120 | content_data = obj.select_comment(good_id)
121 | # if content_data:
122 | # total_num = len(content_data)
123 | # obj.total_score = 0
124 | # for content in content_data:
125 | # content = content[0]
126 | # obj.analysis_content(content)
127 | # good_rate = round(obj.total_score / total_num, 4)
128 | # obj.save_result(good_id, good_rate)
129 |
--------------------------------------------------------------------------------
/comment_analysis/dict/adverb_dict.txt:
--------------------------------------------------------------------------------
1 | 超级 2
2 | 超 2
3 | 都 1.75
4 | 还 1.5
5 | 实在 1.75
6 | 越来越 2
7 | 再也 2
8 | 完全 2
9 | 真是 1.75
10 | 足足 1.75
11 | 大大的 1.75
12 | 巨 2
13 | 最最 2
14 | 老是 1.75
15 | 压根 1.75
16 | 明显 1.5
17 |
18 | 好//1.75
19 | 老//1.75
20 | 过//2
21 |
22 | 最 2
23 | 最为 2
24 | 太 2
25 | 极 2
26 | 极为 2
27 | 极其 2
28 | 极度 2
29 | 极端 2
30 | 至 2
31 | 至为 2
32 | 顶 2
33 | 过于 2
34 | 过分 2
35 | 分外 2
36 | 万分 2
37 | 根本 2
38 | 更 1.75
39 | 更加 1.75
40 | 更其 1.75
41 | 越 1.75
42 | 越发 1.75
43 | 备加 1.75
44 | 愈 1.75
45 | 愈加 1.75
46 | 愈发 1.75
47 | 愈为 1.75
48 | 愈益 1.75
49 | 越加 1.75
50 | 格外 1.75
51 | 益发 1.75
52 | 很 1.75
53 | 挺 1.75
54 | 怪 1.75
55 | 非常 1.75
56 | 特别 1.75
57 | 相当 1.75
58 | 十分 1.75
59 | 好不 1.75
60 | 甚 1.75
61 | 甚为 1.75
62 | 颇 1.75
63 | 颇为 1.75
64 | 异常 1.75
65 | 深为 1.75
66 | 满 1.75
67 | 蛮 1.75
68 | 够 1.75
69 | 多 1.75
70 | 多么 1.75
71 | 殊特 1.75
72 | 大 1.75
73 | 大为 1.75
74 | 何等 1.75
75 | 何其 1.75
76 | 尤其 1.75
77 | 无比尤为 1.75
78 | 不胜 1.75
79 | 较 1.5
80 | 蛮 1.5
81 | 比较 1.5
82 | 较比 1.5
83 | 较为 1.5
84 | 不大 1.5
85 | 不太 1.5
86 | 不很 1.5
87 | 不甚 1.5
88 | 稍 0.8
89 | 稍稍 0.8
90 | 稍微 0.8
91 | 稍为 0.8
92 | 稍许 0.8
93 | 略 0.8
94 | 略略 0.8
95 | 略微 0.8
96 | 略为 0.8
97 | 些微 0.8
98 | 多少 0.8
99 | 有点 0.8
100 | 有些 0.8
--------------------------------------------------------------------------------
/comment_analysis/dict/conjunction_dict.txt:
--------------------------------------------------------------------------------
1 | 并 1.2
2 | 且 1.2
3 | 而 1.2
4 | 虽然 1.2
5 | 不过 1.2
6 | 至于 1.2
7 | 致 1.2
8 | 不料 1.2
9 | 岂知 1.2
10 |
11 | 也 1.5
12 | 不但 1.5
13 | 其次 1.5
14 | 不仅 1.5
15 | 就是 1.5
16 |
17 | 但是 2
18 | 偏偏 2
19 | 而且 2
20 | 何况 2
21 | 况且 2
22 | 乃至 2
23 | 但 2
24 | 却 2
25 | 然而 2
26 | 只是 2
27 |
28 | 甚至 3
29 | 尤其 3
30 | 居然 3
31 |
--------------------------------------------------------------------------------
/comment_analysis/dict/denial_dict.txt:
--------------------------------------------------------------------------------
1 | 没敢 1
2 | 不是 1
3 |
4 | 不 1
5 | 没 1
6 | 无 1
7 | 非 1
8 | 莫 1
9 | 弗 1
10 | 毋 1
11 | 勿 1
12 | 未 1
13 | 否 1
14 | 别 1
15 | 休 1
16 | 無 1
17 | 不曾 1
18 | 未必 1
19 | 没有 1
20 | 不要 1
21 | 难以 1
22 | 未曾 1
23 | 并非 1
24 | 绝不 1
25 | 不可 1
26 |
--------------------------------------------------------------------------------
/comment_analysis/dict/phrase_dict.txt:
--------------------------------------------------------------------------------
1 | 电脑……差 -2
2 | 虽然……但 -1.5
3 | 希望……提高……质量 -1
4 | ……蓝屏 -2
5 | 再也不…… -3
6 | 不会再…… -3
7 |
8 | 没有……特点 -1
9 | 像素……好 -1
10 | 没……好 -1 start:1 end:6
11 | 没……便宜 -1 start:1 end:6
12 |
13 | 比……贵 -1
14 | 没……值 -1
15 | 没……实用 -1
16 | 玩……卡 -2
17 | 玩……黑屏 -2
18 | 等……很久 -2
19 |
20 | (和|跟)……不(同|一样) -1
21 |
22 | 物流……快 2
23 | 物流……慢 -2
--------------------------------------------------------------------------------
/comment_analysis/dict/punctuation_dict.txt:
--------------------------------------------------------------------------------
1 | ! 2
2 | ! 2
3 | ~ 1.2
4 | ~ 1.2
5 | … 1.2
6 | .. 1.1
7 | ... 1.1
8 | .... 1.2
9 | ..... 1.2
10 | ...... 1.2
11 | ....... 1.2
12 | ........ 1.2
13 | ......... 1.3
14 | .......... 1.3
15 | ........... 1.3
16 | ............ 1.3
17 | ............. 1.3
18 | .............. 1.3
19 | ............... 1.3
20 | ................ 1.3
--------------------------------------------------------------------------------
/comment_analysis/dict/user_dict.txt:
--------------------------------------------------------------------------------
1 | 超薄
2 | 携带方便
3 | 特别喜欢
4 | 系统稳定
5 | 轻薄小巧
6 | 携带方便
7 | verygood
8 | 还好
9 | 还可以
10 | 色彩逼真
11 | 音质无敌
12 | 轻薄
13 | 物流好
14 | 五星好评
15 | 神机
16 | 美美哒
17 | 流畅度佳
18 | 小巧玲珑
19 | 方便携带
20 | 轻薄精巧
21 | 功能齐全
22 | 十分流畅
23 | 质量上乘
24 | 稳定可靠
25 | 反应灵敏
26 | 十分结实
27 | 电量充足
28 | 画质清晰
29 | 运行稳定
30 | 反应灵敏
31 | 方便快捷
32 | 时尚大气
33 | 运行超快
34 | 尺寸合适
35 | 极其省电
36 | 不好
37 | 好
38 | 不好用
39 | 散热差
40 | 散热好
41 | 真垃圾
42 | 垃圾
43 | 电脑不错
44 | 耐用
45 | 挺耐用
46 | 真耐用
47 | 手感好
48 | 屏幕大
49 | 手感极差
50 | 手感差
51 | 太贵了
52 | 贵
53 | 买贵了
54 | 不值
55 | 后悔
56 | 不后悔
57 | 太坑了
58 | 难看
59 | 高兴
60 | 上档次
61 | 颜值高
62 | 亏
63 | 真亏
64 | 太卡了
65 | 太卡
66 | 满意
67 | 物流快
68 | 颜值爆炸
69 | 超好玩
70 | 好评
71 | 易发烫
72 | 操作流畅
73 | 反应快速
74 | 游戏不卡顿
75 | 游戏卡顿
76 | 跑分也不低
77 | 跑分不低
78 | 质量有保证
79 | 售后服务好
80 | 颜值无敌
81 | 手感很好
82 | 一点都不卡
83 | 质量不错
84 | 手感极佳
85 | 好喜欢
86 | 很好用
87 | 高大上
88 | 好高大上
89 | 好惊喜
90 | 不会卡
91 | 科技感
92 | 一流
93 | 科技感一流
94 | 物超所值
95 | 有品位
96 | 低调奢华
97 | 做工好
98 | 很时尚
99 | 性价比真的高
100 | 性价比高
101 | 性价比低
102 | 掉漆
103 | 真棒
104 | 独一无二
105 | 别具匠心
106 | 发热严重
107 | 超级精美
108 | 蛮好的
109 | 特别帅
110 | 帅炸了
111 | 最满意
112 | 最喜欢
113 | 非常高清
114 | 最牛逼
115 | 非常好用
116 | 可以指纹
117 | 便宜
118 | 外观好看
119 | 没有卡顿
120 | 很轻
121 | 爱不释手
122 | 功能强大
123 | 配置高
124 | 分辨率低
125 | 分辨率高
126 | 反应也很快
127 | 屏幕艳丽
128 | 屏幕顺滑
129 | 大小合适
130 | 好太多
131 | 很灵活
132 | 音质效果很好
133 | 音质效果好
134 | 音质效果很不好
135 | 音质效果差
136 | 很溜
137 | 屏幕色彩艳丽
138 | 性价比很高
139 | 很值得
140 | 很不错
141 | 完美
142 | 产品质量高
143 | 价格合理
144 | 民族品牌
145 | 产品不错
146 | 值得信赖
147 | 有点卡
148 | 挺不错
149 | 一切满意
150 | 网速快
151 | 网速慢
152 | 有质感
153 | 有手感
154 | 续航很不错
155 | 操作很流畅
156 | 值得购买
157 | 玩游戏爽
158 | 玩游戏不行
159 | 反应很快
160 | 屏幕很大
161 | 值得购买
162 | 非常优越
163 | 使用顺手
164 | 非常满意
165 | 正品
166 | 吃鸡无压力
167 | 都能满足
168 | 性能好
169 | 屏幕够大
170 | 非常完美
171 | 性能不错
172 | 一点小瑕疵
173 | 有瑕疵
174 | 有点小瑕疵
175 | 稳定
176 | 很稳定
177 | 很快
178 | 不错的选择
179 | 超级好
180 | 反应超快
181 | 快递给力
182 | 不好
183 | 好
184 | 十分不错
185 | 送货速度
186 | 实用性强
187 | 特别流畅
188 | 特方便
189 | 很好上手
190 | 质量挺好
191 | 真心不错
192 | 价格实惠
193 | 质量超级好
194 | 快递速度
195 | 快递给力
196 | 性能优良
197 | 非常不错
198 | 好评
199 | 必须好评
200 | 容易上手
201 | 没问题
202 | 杠杠地
203 | 无瑕疵
204 | 特别快
205 | 非常流畅
206 | 帧数超高
207 | 得心应手
208 | 很惊喜
209 | 很失望
210 | 还行
211 | 商品不错
212 | 差评
213 | 挺好的
214 | 挺好
215 | 很棒
216 | 很惊艳
217 | 惊艳
218 | 超级棒
219 | 可以
220 | 使用流畅
221 | 价钱可以
222 | 太喜欢了
223 | 太好了
224 | 耐用
225 | 实用
226 | 大品牌
227 |
228 |
--------------------------------------------------------------------------------
/comment_analysis/log.txt:
--------------------------------------------------------------------------------
1 | 电脑很快就收到了,
2 | [pair('电脑', 'n'), pair('很快', 'd'), pair('就', 'd'), pair('收到', 'v'), pair('了', 'ul'), pair(',', 'x')]
3 | 性价比不错,
4 | [pair('性价比', 'n'), pair('不错', 'a'), pair(',', 'x')]
5 | 非常好用的一个电脑,
6 | [pair('非常好用', 'x'), pair('的', 'uj'), pair('一个', 'm'), pair('电脑', 'n'), pair(',', 'x')]
7 | 整体效果也挺好,
8 | [pair('整体', 'n'), pair('效果', 'n'), pair('也', 'd'), pair('挺好', 'x'), pair(',', 'x')]
9 | 颜值高!
10 | [pair('颜值高', 'x'), pair('!', 'x')]
11 |
12 | 电脑很快就收到了,性价比不错,非常好用的一个电脑,整体效果也挺好,颜值高!
13 | Score:10.375
14 | Sub-clause0: positive:很快
15 | Sub-clause1: positive:不错
16 | Sub-clause2: positive:非常好用
17 | Sub-clause3: conjunction:也 positive:挺好
18 | Sub-clause4: positive:颜值高 punctuation:!
19 | {'score': 10.375, 'su-clause0': {'score': 1.75, 'positive': [{'key': '很快', 'adverb': [], 'denial': [], 'value': 1.75, 'score': 1.75}], 'negative': [], 'conjunction': [], 'punctuation': [], 'pattern': []}, 'su-clause1': {'score': 1.0, 'positive': [{'key': '不错', 'adverb': [], 'denial': [], 'value': 1.0, 'score': 1.0}], 'negative': [], 'conjunction': [], 'punctuation': [], 'pattern': []}, 'su-clause2': {'score': 2.0, 'positive': [{'key': '非常好用', 'adverb': [], 'denial': [], 'value': 2.0, 'score': 2.0}], 'negative': [], 'conjunction': [], 'punctuation': [], 'pattern': []}, 'su-clause3': {'score': 2.625, 'positive': [{'key': '挺好', 'adverb': [], 'denial': [], 'value': 1.75, 'score': 1.75}], 'negative': [], 'conjunction': [{'key': '也', 'value': 1.5}], 'punctuation': [], 'pattern': []}, 'su-clause4': {'score': 3.0, 'positive': [{'key': '颜值高', 'adverb': [], 'denial': [], 'value': 1.5, 'score': 1.5}], 'negative': [], 'conjunction': [], 'punctuation': [{'key': '!', 'value': 2.0}], 'pattern': []}}
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/comment_spider/.idea/comment_spider.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/comment_spider/.idea/dataSources.local.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #@
7 | `
8 |
9 |
10 | master_key
11 | root
12 | *:jd_computer
13 |
14 |
15 |
--------------------------------------------------------------------------------
/comment_spider/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mysql
6 | true
7 | com.mysql.jdbc.Driver
8 | jdbc:mysql://localhost:3309/jd_computer
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/comment_spider/.idea/dictionaries/ASUS.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/comment_spider/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/comment_spider/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/comment_spider/comment_spider/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/comment_spider/comment_spider/__init__.py
--------------------------------------------------------------------------------
/comment_spider/comment_spider/items.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your scraped items
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/items.html
7 |
8 | import scrapy
9 |
10 |
11 | class CommentSpiderItem(scrapy.Item):
12 | # define the fields for your item here like:
13 | # name = scrapy.Field()
14 | pass
15 |
--------------------------------------------------------------------------------
/comment_spider/comment_spider/middlewares.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your spider middleware
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
7 |
8 | from scrapy import signals
9 |
10 |
11 | class CommentSpiderSpiderMiddleware(object):
12 | # Not all methods need to be defined. If a method is not defined,
13 | # scrapy acts as if the spider middleware does not modify the
14 | # passed objects.
15 |
16 | @classmethod
17 | def from_crawler(cls, crawler):
18 | # This method is used by Scrapy to create your spiders.
19 | s = cls()
20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 | return s
22 |
23 | def process_spider_input(self, response, spider):
24 | # Called for each response that goes through the spider
25 | # middleware and into the spider.
26 |
27 | # Should return None or raise an exception.
28 | return None
29 |
30 | def process_spider_output(self, response, result, spider):
31 | # Called with the results returned from the Spider, after
32 | # it has processed the response.
33 |
34 | # Must return an iterable of Request, dict or Item objects.
35 | for i in result:
36 | yield i
37 |
38 | def process_spider_exception(self, response, exception, spider):
39 | # Called when a spider or process_spider_input() method
40 | # (from other spider middleware) raises an exception.
41 |
42 | # Should return either None or an iterable of Response, dict
43 | # or Item objects.
44 | pass
45 |
46 | def process_start_requests(self, start_requests, spider):
47 | # Called with the start requests of the spider, and works
48 | # similarly to the process_spider_output() method, except
49 | # that it doesn’t have a response associated.
50 |
51 | # Must return only requests (not items).
52 | for r in start_requests:
53 | yield r
54 |
55 | def spider_opened(self, spider):
56 | spider.logger.info('Spider opened: %s' % spider.name)
57 |
58 |
59 | class CommentSpiderDownloaderMiddleware(object):
60 | # Not all methods need to be defined. If a method is not defined,
61 | # scrapy acts as if the downloader middleware does not modify the
62 | # passed objects.
63 |
64 | @classmethod
65 | def from_crawler(cls, crawler):
66 | # This method is used by Scrapy to create your spiders.
67 | s = cls()
68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
69 | return s
70 |
71 | def process_request(self, request, spider):
72 | # Called for each request that goes through the downloader
73 | # middleware.
74 |
75 | # Must either:
76 | # - return None: continue processing this request
77 | # - or return a Response object
78 | # - or return a Request object
79 | # - or raise IgnoreRequest: process_exception() methods of
80 | # installed downloader middleware will be called
81 | return None
82 |
83 | def process_response(self, request, response, spider):
84 | # Called with the response returned from the downloader.
85 |
86 | # Must either;
87 | # - return a Response object
88 | # - return a Request object
89 | # - or raise IgnoreRequest
90 | return response
91 |
92 | def process_exception(self, request, exception, spider):
93 | # Called when a download handler or a process_request()
94 | # (from other downloader middleware) raises an exception.
95 |
96 | # Must either:
97 | # - return None: continue processing this exception
98 | # - return a Response object: stops process_exception() chain
99 | # - return a Request object: stops process_exception() chain
100 | pass
101 |
102 | def spider_opened(self, spider):
103 | spider.logger.info('Spider opened: %s' % spider.name)
104 |
--------------------------------------------------------------------------------
/comment_spider/comment_spider/pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define your item pipelines here
4 | #
5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
7 |
8 |
9 | class CommentSpiderPipeline(object):
10 | def process_item(self, item, spider):
11 | return item
12 |
--------------------------------------------------------------------------------
/comment_spider/comment_spider/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Scrapy settings for comment_spider project
4 | #
5 | # For simplicity, this file contains only settings considered important or
6 | # commonly used. You can find more settings consulting the documentation:
7 | #
8 | # https://doc.scrapy.org/en/latest/topics/settings.html
9 | # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 | from tools.sql_tools import get_exists_comments
12 | BOT_NAME = 'comment_spider'
13 |
14 | SPIDER_MODULES = ['comment_spider.spiders']
15 | NEWSPIDER_MODULE = 'comment_spider.spiders'
16 |
17 |
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'comment_spider (+http://www.yourdomain.com)'
20 |
21 | # Obey robots.txt rules
22 | ROBOTSTXT_OBEY = False
23 |
24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
25 | CONCURRENT_REQUESTS = 5
26 |
27 | # Configure a delay for requests for the same website (default: 0)
28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
29 | # See also autothrottle settings and docs
30 | # DOWNLOAD_DELAY = 0.1
31 | # The download delay setting will honor only one of:
32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
33 | #CONCURRENT_REQUESTS_PER_IP = 16
34 |
35 | # Disable cookies (enabled by default)
36 | #COOKIES_ENABLED = False
37 |
38 | # Disable Telnet Console (enabled by default)
39 | #TELNETCONSOLE_ENABLED = False
40 |
41 | # Override the default request headers:
42 | DEFAULT_REQUEST_HEADERS = {
43 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 | 'Accept-Language': 'en',
45 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
46 | }
47 |
48 | # Enable or disable spider middlewares
49 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
50 | #SPIDER_MIDDLEWARES = {
51 | # 'comment_spider.middlewares.CommentSpiderSpiderMiddleware': 543,
52 | #}
53 |
54 | # Enable or disable downloader middlewares
55 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
56 | #DOWNLOADER_MIDDLEWARES = {
57 | # 'comment_spider.middlewares.CommentSpiderDownloaderMiddleware': 543,
58 | #}
59 |
60 | # Enable or disable extensions
61 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
62 | #EXTENSIONS = {
63 | # 'scrapy.extensions.telnet.TelnetConsole': None,
64 | #}
65 |
66 | # Configure item pipelines
67 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
68 | ITEM_PIPELINES = {
69 | 'comment_spider.pipelines.CommentSpiderPipeline': 1,
70 | }
71 |
72 | # Enable and configure the AutoThrottle extension (disabled by default)
73 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
74 | #AUTOTHROTTLE_ENABLED = True
75 | # The initial download delay
76 | #AUTOTHROTTLE_START_DELAY = 5
77 | # The maximum download delay to be set in case of high latencies
78 | #AUTOTHROTTLE_MAX_DELAY = 60
79 | # The average number of requests Scrapy should be sending in parallel to
80 | # each remote server
81 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
82 | # Enable showing throttling stats for every response received:
83 | #AUTOTHROTTLE_DEBUG = False
84 |
85 | # Enable and configure HTTP caching (disabled by default)
86 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
87 | #HTTPCACHE_ENABLED = True
88 | #HTTPCACHE_EXPIRATION_SECS = 0
89 | #HTTPCACHE_DIR = 'httpcache'
90 | #HTTPCACHE_IGNORE_HTTP_CODES = []
91 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
92 |
93 |
94 |
95 |
96 |
97 |
98 | EXISTS_CONMENTS = get_exists_comments()
99 |
--------------------------------------------------------------------------------
/comment_spider/comment_spider/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/comment_spider/comment_spider/spiders/commentspider.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import scrapy
3 | import json, re
4 | from tools.sql_tools import *
5 | from tools.jieba_content import get_jieba_comment
6 | from ..items import CommentSpiderItem
7 | from ..settings import EXISTS_CONMENTS
8 |
9 | class CommentspiderSpider(scrapy.Spider):
10 | name = 'commentspider'
11 | allowed_domains = ['jd.com']
12 | start_urls = get_start_urls()
13 |
14 |
15 | def parse(self, response):
16 | if response.text:
17 | json_obj = json.loads(response.text)
18 | if json_obj:
19 | tag_data = json_obj['hotCommentTagStatistics']
20 | tags = '|'.join([tag['name'] for tag in tag_data])
21 | count = '|'.join([str(tag['count']) for tag in tag_data])
22 | url = response._url
23 | page_num = int(url.split('&')[4].split('=')[1])
24 | computer_id = int(url.split('&')[1].split('=')[1])
25 | comments = json_obj['comments']
26 | # 保存数据
27 | if page_num == 1:
28 | save_tags(tags, count, computer_id)
29 | if 0 < len(comments) < 10:
30 | for comment in comments:
31 | comment_id = str(computer_id) + str(comment['id'])
32 | content = re.sub(r"…|\.| |~|'", '', comment['content'])
33 | print(content)
34 | jieba_content = get_jieba_comment(content)
35 | print(jieba_content)
36 | create_time = comment['creationTime']
37 | score = comment['score']
38 | print(comment_id, content, jieba_content, score, create_time, computer_id)
39 | if comment_id in EXISTS_CONMENTS:
40 | print(f'{comment_id} 评论已存在')
41 | else:
42 | save_comment(comment_id, content, jieba_content, score, create_time, computer_id)
43 | # 该商品评论爬取完成更新if_spider字段
44 | update_if_spider(computer_id)
45 |
46 | elif len(comments) == 10:
47 | for comment in comments:
48 | comment_id = str(computer_id) + str(comment['id'])
49 | content = comment['content'].replace(' ', '')
50 | jieba_content = get_jieba_comment(content)
51 | create_time = comment['creationTime']
52 | score = comment['score']
53 | print(comment_id, content, jieba_content, score, create_time, computer_id)
54 | if comment_id in EXISTS_CONMENTS:
55 | print(f'{comment_id} 评论已存在')
56 | else:
57 | save_comment(comment_id, content, jieba_content, score, create_time, computer_id)
58 | page_num += 1
59 | if page_num == 101:
60 | # 该商品评论爬取完成更新if_spider字段
61 | update_if_spider(computer_id)
62 | # 找下一页
63 | if page_num < 101:
64 | next_url = f'https://club.jd.com/comment/skuProductPageComments.action?&productId={computer_id}&score=0&sortType=5&page={page_num}&pageSize=10&isShadowSku=0&rid=0&fold=1%27'
65 | yield scrapy.Request(url=next_url, callback=self.parse)
66 | else:
67 | update_if_spider(computer_id)
68 | # 进行下一个商品评论收集
69 | yield CommentspiderSpider()
70 |
--------------------------------------------------------------------------------
/comment_spider/run_spider.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: run_spider
6 | @time: 2019/3/29 21:44
7 | '''
8 |
9 | from scrapy.cmdline import execute
10 |
11 | execute(['scrapy', 'crawl', 'commentspider'])
--------------------------------------------------------------------------------
/comment_spider/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
5 |
6 | [settings]
7 | default = comment_spider.settings
8 |
9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = comment_spider
12 |
--------------------------------------------------------------------------------
/comment_spider/tools/jieba_content.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: jieba_content
6 | @time: 2019/3/30 10:30
7 | '''
8 |
9 | import jieba
10 |
11 | # 加载用户字典
12 | jieba.load_userdict('tools\\user_words.txt')
13 |
14 |
15 | # 获取stop_words字典
16 | def get_stop_words():
17 | words = []
18 | with open('tools\stop_words.txt', 'r', encoding='utf-8') as f:
19 | for line in f.readlines():
20 | words.append(line.strip())
21 | return words
22 |
23 |
24 | # 获取评论分词
25 | def get_jieba_comment(string):
26 | result = jieba.cut(string)
27 | jieba_content = ''
28 | stop_words = get_stop_words()
29 | for x in result:
30 | if x not in stop_words:
31 | jieba_content += " " + x
32 | return jieba_content
--------------------------------------------------------------------------------
/comment_spider/tools/sql_tools.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: sql_tools
6 | @time: 2019/3/29 22:22
7 | '''
8 |
9 | import pymysql
10 |
11 | def connect_mysql():
12 | """
13 | 连接数据库
14 | :return: connect, cursor
15 | """
16 | connect = pymysql.Connect(host='localhost', user='root', password='123456', port=3309,
17 | db='jd_computer'
18 | )
19 | cursor = connect.cursor()
20 | return connect, cursor
21 |
22 | def close_mysql(connect, cursor):
23 | """
24 | 关闭数据库
25 | :param connect:
26 | :return:
27 | """
28 | connect.commit()
29 | cursor.close()
30 | connect.close()
31 |
32 | # 获取start_urls
33 | def get_start_urls():
34 | """
35 | 获取start_urls
36 | :return:
37 | """
38 | connect, cursor = connect_mysql()
39 | select_sql = 'SELECT computer_id FROM computer WHERE if_spider=0'
40 | cursor.execute(select_sql)
41 | result = cursor.fetchall()
42 | close_mysql(connect, cursor)
43 | print(len(result))
44 | return [f'https://club.jd.com/comment/skuProductPageComments.action?&productId={url[0]}&score=0&sortType=5&page=1&pageSize=10&isShadowSku=0&rid=0&fold=1%27' for url in result]
45 |
46 | # 保存标签
47 | def save_tags(*args):
48 | """
49 | 保存标签数据to computer_tag
50 | :param args:
51 | :return:
52 | """
53 | connect, cursor = connect_mysql()
54 | try:
55 | insert_sql = 'INSERT INTO computer_tag(tags, count, computer_id) VALUES (%s, %s, %s)'
56 | if_exists_sql = 'SELECT COUNT(*) FROM computer_tag WHERE computer_id={}'.format(args[2])
57 | cursor.execute(if_exists_sql)
58 | if not cursor.fetchall()[0][0]:
59 | cursor.execute(insert_sql, args)
60 | print(f'{args[2]}的评论标签数据添加成功。')
61 | else:
62 | print(f'{args[2]}的评论标签数据已存在!')
63 | except Exception as e:
64 | print(f'添加{args[2]}评论标签数据时数据库出现错误!!')
65 | print(e)
66 | finally:
67 | close_mysql(connect, cursor)
68 |
69 | # 某个商品评论爬取完成将computer表 这个商品的if_spider设为1
70 | def update_if_spider(computer_id):
71 | connect, cursor = connect_mysql()
72 | updata_sql = f'UPDATE computer SET if_spider=1 WHERE computer_id={computer_id}'
73 | cursor.execute(updata_sql)
74 | print(f'{computer_id} 评论爬取完成!')
75 | close_mysql(connect, cursor)
76 |
77 | # 保存评论
78 | def save_comment(*args):
79 | connect, cursor = connect_mysql()
80 | # if_exists_sql = 'SELECT COUNT(*) FROM computer_comment WHERE comment_id={}'.format(args[0])
81 | insert_sql = 'INSERT INTO computer_comment(comment_id, content, jieba_content, score, create_time, computer_id) VALUES (%s, %s,%s,%s,%s,%s)'
82 | # cursor.execute(if_exists_sql)
83 | # if cursor.fetchall()[0][0]:
84 | # update_sql = 'UPDATE computer_comment SET content=%s,jieba_content=%s WHERE comment_id = %s'
85 | # cursor.execute(update_sql, (args[1], args[2], args[0]))
86 | # close_mysql(connect, cursor)
87 | # print(f'{args[0]}评论已更新!')
88 | # else:
89 | cursor.execute(insert_sql, args)
90 | close_mysql(connect, cursor)
91 | print(f'{args[0]}评论添加成功!')
92 |
93 | def get_exists_comments():
94 | connect, cursor = connect_mysql()
95 | select_sql = 'SELECT comment_id FROM computer_comment'
96 | cursor.execute(select_sql)
97 | exists_comments = cursor.fetchall()
98 | close_mysql(connect, cursor)
99 | return [x[0] for x in exists_comments]
100 |
101 |
--------------------------------------------------------------------------------
/comment_spider/tools/user_words.txt:
--------------------------------------------------------------------------------
1 | 超薄
2 | 携带方便
3 | 特别喜欢
4 | 系统稳定
5 | 轻薄小巧
6 | 携带方便
7 | verygood
8 | 还好
9 | 还可以
10 | 色彩逼真
11 | 音质无敌
12 | 轻薄
13 | 物流好
14 | 五星好评
15 | 神机
16 | 美美哒
17 | 流畅度佳
18 | 小巧玲珑
19 | 方便携带
20 | 轻薄精巧
21 | 功能齐全
22 | 十分流畅
23 | 质量上乘
24 | 稳定可靠
25 | 反应灵敏
26 | 十分结实
27 | 电量充足
28 | 画质清晰
29 | 运行稳定
30 | 反应灵敏
31 | 方便快捷
32 | 时尚大气
33 | 运行超快
34 | 尺寸合适
35 | 极其省电
36 | 不好
37 | 好
38 | 不好用
39 | 散热差
40 | 散热好
41 | 真垃圾
42 | 垃圾
43 | 电脑不错
44 | 耐用
45 | 挺耐用
46 | 真耐用
47 | 手感好
48 | 屏幕大
49 | 手感极差
50 | 手感差
51 | 太贵了
52 | 贵
53 | 买贵了
54 | 不值
55 | 后悔
56 | 不后悔
57 | 太坑了
58 | 难看
59 | 高兴
60 | 上档次
61 | 颜值高
62 | 亏
63 | 真亏
64 | 太卡了
65 | 太卡
66 | 满意
67 | 物流快
68 | 颜值爆炸
69 | 超好玩
70 | 好评
71 | 易发烫
72 | 操作流畅
73 | 反应快速
74 | 游戏不卡顿
75 | 游戏卡顿
76 | 跑分也不低
77 | 跑分不低
78 | 质量有保证
79 | 售后服务好
80 | 颜值无敌
81 | 手感很好
82 | 一点都不卡
83 | 质量不错
84 | 手感极佳
85 | 好喜欢
86 | 很好用
87 | 高大上
88 | 好高大上
89 | 好惊喜
90 | 不会卡
91 | 科技感
92 | 一流
93 | 科技感一流
94 | 物超所值
95 | 有品位
96 | 低调奢华
97 | 做工好
98 | 很时尚
99 | 性价比真的高
100 | 性价比高
101 | 性价比低
102 | 掉漆
103 | 独一无二
104 | 别具匠心
105 | 发热严重
106 | 超级精美
107 | 蛮好的
108 | 特别帅
109 | 帅炸了
110 | 最满意
111 | 最喜欢
112 | 非常高清
113 | 最牛逼
114 | 非常好用
115 | 可以指纹
116 | 便宜
117 | 外观好看
118 | 没有卡顿
119 | 很轻
120 | 爱不释手
121 | 功能强大
122 | 配置高
123 | 分辨率低
124 | 分辨率高
125 | 反应也很快
126 | 屏幕艳丽
127 | 屏幕顺滑
128 | 大小合适
129 | 好太多
130 | 很灵活
131 | 音质效果很好
132 | 音质效果好
133 | 音质效果很不好
134 | 音质效果差
135 | 很溜
136 | 屏幕色彩艳丽
137 | 性价比很高
138 | 很值得
139 | 很不错
140 | 完美
141 | 产品质量高
142 | 价格合理
143 | 民族品牌
144 | 产品不错
145 | 值得信赖
146 | 有点卡
147 | 挺不错
148 | 一切满意
149 | 网速快
150 | 网速慢
151 | 有质感
152 | 有手感
153 | 续航很不错
154 | 操作很流畅
155 | 值得购买
156 | 玩游戏爽
157 | 玩游戏不行
158 | 反应很快
159 | 屏幕很大
160 | 值得购买
161 | 非常优越
162 | 使用顺手
163 | 非常满意
164 | 正品
165 | 吃鸡无压力
166 | 都能满足
167 | 性能好
168 | 屏幕够大
169 | 非常完美
170 | 性能不错
171 | 一点小瑕疵
172 | 有瑕疵
173 | 有点小瑕疵
174 | 稳定
175 | 很稳定
176 | 很快
177 | 不错的选择
178 | 超级好
179 | 反应超快
180 | 快递给力
181 | 不好
182 | 好
183 | 十分不错
184 | 送货速度
185 | 实用性强
186 | 特别流畅
187 | 特方便
188 | 很好上手
189 | 质量挺好
190 | 真心不错
191 | 价格实惠
192 | 质量超级好
193 | 快递速度
194 | 快递给力
195 | 性能优良
196 | 非常不错
197 | 好评
198 | 必须好评
199 | 容易上手
200 | 没问题
201 | 杠杠地
202 | 无瑕疵
203 | 特别快
204 | 非常流畅
205 | 帧数超高
206 | 得心应手
207 | 很惊喜
208 | 很失望
209 | 还行
210 | 商品不错
211 | 差评
212 | 挺好的
213 | 挺好
214 | 很棒
215 | 很惊艳
216 | 惊艳
217 | 超级棒
218 | 可以
219 | 使用流畅
220 | 价钱可以
221 | 太喜欢了
222 | 太好了
223 | 耐用
224 | 实用
225 | 大品牌
226 |
227 |
--------------------------------------------------------------------------------
/computer_analysis/.idea/computer_analysis.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/computer_analysis/.idea/dataSources.local.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #@
7 | `
8 |
9 |
10 | master_key
11 | root
12 | *:jd_computer
13 |
14 |
15 |
--------------------------------------------------------------------------------
/computer_analysis/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mysql
6 | true
7 | com.mysql.jdbc.Driver
8 | jdbc:mysql://localhost:3309/jd_computer
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/computer_analysis/.idea/dictionaries/ASUS.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/computer_analysis/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/computer_analysis/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/computer_analysis/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/api/__init__.py
--------------------------------------------------------------------------------
/computer_analysis/api/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 |
3 | # Register your models here.
4 |
--------------------------------------------------------------------------------
/computer_analysis/api/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 |
3 |
4 | class CptAnalysisConfig(AppConfig):
5 | name = 'api'
6 |
--------------------------------------------------------------------------------
/computer_analysis/api/migrations/0001_initial.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 2.1.4 on 2019-03-29 05:14
2 |
3 | from django.db import migrations, models
4 | import django.db.models.deletion
5 |
6 |
7 | class Migration(migrations.Migration):
8 |
9 | initial = True
10 |
11 | dependencies = [
12 | ]
13 |
14 | operations = [
15 | migrations.CreateModel(
16 | name='Comment',
17 | fields=[
18 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
19 | ('comment_id', models.CharField(max_length=40, unique=True)),
20 | ('content', models.TextField()),
21 | ('jieba_content', models.TextField()),
22 | ('score', models.BigIntegerField()),
23 | ('create_time', models.CharField(max_length=30)),
24 | ],
25 | options={
26 | 'db_table': 'computer_comment',
27 | },
28 | ),
29 | migrations.CreateModel(
30 | name='Computer',
31 | fields=[
32 | ('computer_id', models.BigIntegerField(primary_key=True, serialize=False)),
33 | ('brand', models.CharField(max_length=30)),
34 | ('title', models.CharField(max_length=255)),
35 | ('price', models.IntegerField()),
36 | ('img_url', models.CharField(max_length=255)),
37 | ('param', models.TextField()),
38 | ('if_spider', models.BooleanField(default=0)),
39 | ('good_rate', models.FloatField(default=0.0)),
40 | ],
41 | options={
42 | 'db_table': 'computer',
43 | },
44 | ),
45 | migrations.CreateModel(
46 | name='Tag',
47 | fields=[
48 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
49 | ('tags', models.CharField(max_length=255)),
50 | ('count', models.CharField(max_length=200)),
51 | ('computer', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='api.Computer')),
52 | ],
53 | options={
54 | 'db_table': 'computer_tag',
55 | },
56 | ),
57 | migrations.AddField(
58 | model_name='comment',
59 | name='computer',
60 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='api.Computer'),
61 | ),
62 | ]
63 |
--------------------------------------------------------------------------------
/computer_analysis/api/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/api/migrations/__init__.py
--------------------------------------------------------------------------------
/computer_analysis/api/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 |
3 |
4 | # Create your models here.
5 |
6 | # 电脑信息表
7 | class Computer(models.Model):
8 | computer_id = models.BigIntegerField(primary_key=True)
9 | brand = models.CharField(max_length=30, null=False)
10 | title = models.CharField(max_length=255)
11 | price = models.IntegerField()
12 | img_url = models.CharField(max_length=255)
13 | param = models.TextField()
14 | if_spider = models.BooleanField(default=False)
15 | good_rate = models.FloatField(default=0.0)
16 |
17 | class Meta:
18 | db_table = 'computer'
19 |
20 | # 评论标签
21 | class Tag(models.Model):
22 | tags = models.CharField(max_length=255, null=False)
23 | count = models.CharField(max_length=200, null=False)
24 | computer = models.OneToOneField(Computer, on_delete=models.CASCADE)
25 |
26 | class Meta:
27 | db_table = 'computer_tag'
28 |
29 |
30 | # 评论内容
31 | class Comment(models.Model):
32 | comment_id = models.CharField(max_length=40, unique=True)
33 | content = models.TextField()
34 | jieba_content = models.TextField()
35 | score = models.BigIntegerField(null=False)
36 | create_time = models.CharField(max_length=30)
37 | computer = models.ForeignKey(Computer, on_delete=models.CASCADE)
38 |
39 | class Meta:
40 | db_table = 'computer_comment'
41 |
--------------------------------------------------------------------------------
/computer_analysis/api/search_indexes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: search_indexes
6 | @time: 2019/4/3 13:40
7 | '''
8 | from haystack import indexes
9 | from api.models import Computer
10 |
11 |
12 | class GoodsInfoIndex(indexes.SearchIndex, indexes.Indexable):
13 | text = indexes.CharField(document=True, use_template=True)
14 |
15 | good_rate = indexes.FloatField(model_attr='good_rate')
16 | computer_id = indexes.CharField(model_attr='computer_id')
17 | price = indexes.CharField(model_attr='price')
18 | img_url = indexes.CharField(model_attr='img_url')
19 | title = indexes.CharField(model_attr='title')
20 |
21 | def get_model(self):
22 | return Computer
--------------------------------------------------------------------------------
/computer_analysis/api/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 |
3 | # Create your tests here.
4 |
--------------------------------------------------------------------------------
/computer_analysis/api/views.py:
--------------------------------------------------------------------------------
1 | import os
2 | from django.views.generic import View
3 | from tools.decorator import allow_origin
4 | from django.utils.decorators import method_decorator
5 | from tools.pygal_process import create_wordcloud, create_pie, create_bar, jieba_top10_bar
6 | # 导入分页模块
7 | from django.core.paginator import Paginator, PageNotAnInteger, InvalidPage, EmptyPage
8 | # 导入表
9 | from .models import Computer
10 | from tools.orm2json import object_to_json
11 | # 导入haystack搜索框架SearchView
12 | from haystack.views import SearchView
13 | from django.http import JsonResponse
14 |
15 | from tools.searchresult2json import sea_result2json
16 |
17 | class ComputerView(View):
18 | @method_decorator(allow_origin)
19 | def get(self, request):
20 | # ?&brand=huawei&page=1&page_size=5
21 | # 1、获取请请求参数
22 | brand = request.GET.get('brand', 'huawei')
23 | page = request.GET.get('page', 1)
24 | page_size = request.GET.get('page_size', 5)
25 | results = Computer.objects.filter(brand=brand).order_by('-good_rate')
26 | # isdigit():判断page、page_size是否纯数字组成
27 | page = int(page) if page.isdigit() else 1
28 | page_size = int(page_size) if page_size.isdigit() else 5
29 | # print(page_size)
30 | # 2、开始分页
31 | paginator = Paginator(results, page_size)
32 | error = 0
33 | try:
34 | # 获取当前页对象
35 | current_page = paginator.page(page)
36 | print(current_page.object_list)
37 | except (EmptyPage, InvalidPage, PageNotAnInteger) as e:
38 | error = '请求参数异常,默认已返回最后一页'
39 | current_page = paginator.page(paginator.num_pages)
40 | # 同时将page改为最后一页
41 | page = paginator.num_pages
42 | # 3、上一页、下一页链接以及总页数
43 | # next_url = f'http://127.0.0.1:8000/phone/?brand={brand}&page={page+1}&page_size={page_size}' if current_page.has_next() else ""
44 | # pre_url = f'http://127.0.0.1:8000/phone/?brand={brand}&page={page-1}&page_size={page_size}' if current_page.has_previous() else ""
45 | total_page = paginator.num_pages
46 | # 4、计算页码
47 | page_numbers = []
48 | if total_page <= 5:
49 | page_numbers = [x for x in range(1, total_page+1)]
50 | else:
51 | # 移动
52 | if 3 < page < total_page-3:
53 | page_numbers = [x for x in range(page-2, page+3)]
54 | # 不移动
55 | elif page <= 3:
56 | page_numbers = [x for x in range(1, 6)]
57 | # 不移动
58 | elif page >= total_page-3:
59 | page_numbers = [x for x in range(page-5, total_page+1)]
60 | data = {
61 | 'status': 1,
62 | 'error': error,
63 | 'if_has_pre_page': current_page.has_previous(),
64 | 'if_has_next_page': current_page.has_next(),
65 | 'page_numbers': page_numbers,
66 | 'current_page_data': object_to_json(current_page.object_list)
67 | }
68 | # response = JsonResponse(data)
69 | # response['Access-Control-Allow-Origin'] = '*'
70 | return data
71 |
72 |
73 | class DetailView(View):
74 | @method_decorator(allow_origin)
75 | def get(self, request):
76 | computer_id = request.GET.get('computer_id', '')
77 | computer = 0
78 | status = 1
79 | wordcloud_path = 0
80 | pie_path = 0
81 | bar_path = 0
82 | top10_bar_path = 0
83 | try:
84 | computer_obj = Computer.objects.filter(computer_id=computer_id)
85 | computer = object_to_json(computer_obj)[0]
86 | # 词云图
87 | if not os.path.exists(f'static\wordcloud\{computer_id}.png'):
88 | if_success = create_wordcloud(computer_id)
89 | if if_success:
90 | wordcloud_path = f'..\computer_analysis\static\wordcloud\{computer_id}.png'
91 | else:
92 | wordcloud_path = f'..\computer_analysis\static\wordcloud\{computer_id}.png'
93 | # 饼状图
94 | if not os.path.exists(f'..\static\pie\{computer_id}.svg'):
95 | if_success = create_pie(computer_id)
96 | if if_success:
97 | pie_path = f'..\computer_analysis\static\pie\{computer_id}.svg'
98 | else:
99 | pie_path = f'..\computer_analysis\static\pie\{computer_id}.svg'
100 | # 柱状图
101 | if not os.path.exists(f'..\static\\bar\{computer_id}.svg'):
102 | if_success = create_bar(computer_id)
103 | if if_success:
104 | bar_path = f'..\computer_analysis\static\\bar\{computer_id}.svg'
105 | else:
106 | bar_path = f'..\computer_analysis\static\\bar\{computer_id}.svg'
107 | # jieba_top10_bar柱图
108 | if not os.path.exists(f'..\static\jieba_top10_bar\{computer_id}.svg'):
109 | if_success = jieba_top10_bar(computer_id)
110 | if if_success:
111 | top10_bar_path = f'..\computer_analysis\static\jieba_top10_bar\{computer_id}.svg'
112 | else:
113 | bar_path = f'..\computer_analysis\static\jieba_top10_bar\{computer_id}.svg'
114 | except Exception as e:
115 | print(e)
116 | status = 0
117 | data = {
118 | 'status': status,
119 | 'computer': computer,
120 | 'wordcloud': wordcloud_path,
121 | 'pie': pie_path,
122 | 'bar': bar_path,
123 | 'top10_bar': top10_bar_path
124 | }
125 | # response = JsonResponse(data)
126 | # response['Access-Control-Allow-Origin'] = '*'
127 | return data
128 |
129 |
130 |
131 | class ComputerSearchView(SearchView):
132 |
133 | def extra_context(self):
134 | context = super(ComputerSearchView, self).extra_context()
135 | key = self.request.GET.get('q')
136 | context['q'] = key
137 | return context
138 |
139 | def build_page(self):
140 | data = {
141 | 'status': 1,
142 | 'error': 0,
143 | 'if_has_pre_page': 0,
144 | 'if_has_next_page': 0,
145 | 'page_numbers': 0,
146 | 'current_page_data': 0
147 | }
148 | try:
149 | page = int(self.request.GET.get('page', 1))
150 | except (TypeError, ValueError):
151 | data['error'] = '页码值非法!'
152 | page = 0
153 | data['status'] = 0
154 | if page < 1:
155 | data['error'] = '页码值非法!'
156 | data['status'] = 0
157 | # start_offset = (page - 1) * self.results_per_page
158 | if data['status']:
159 | self.results[:]
160 | self.results = self.results.order_by('-good_rate')
161 | paginator = Paginator(self.results, self.results_per_page)
162 | # self.results[start_offset: start_offset + self.results_per_page]
163 | # self.results在经过上面的切片代码self.results[start_offset:start_offset + self.results_per_page]之后,才有值。在此之前是没有值的,所有排序的order_by必须设置在这句代码的后面
164 | # haystack对搜索结果的分页,并不是将所有的搜索结果全部分页。
165 | # paginator = Paginator(self.results, self.results_per_page)
166 | try:
167 | # 获取当前页对象
168 | current_page = paginator.page(page)
169 | data['current_page_data'] = sea_result2json(current_page.object_list)
170 | data['if_has_next_page'] = current_page.has_next()
171 | data['if_has_pre_page'] = current_page.has_previous()
172 | except (EmptyPage, InvalidPage, PageNotAnInteger) as e:
173 | data['error'] = '请求页码超出范围,默认返回最后一页'
174 | data['current_page_data'] = sea_result2json(paginator.page(paginator.num_pages).object_list)
175 | # 同时将page改为最后一页
176 | page = paginator.num_pages
177 | data['if_has_pre_page'] = True
178 | total_page = paginator.num_pages
179 | # 计算页码
180 | page_numbers = []
181 | if total_page <= 5:
182 | page_numbers = [x for x in range(1, total_page + 1)]
183 | else:
184 | # 移动
185 | if 3 < page < total_page - 3:
186 | page_numbers = [x for x in range(page - 2, page + 3)]
187 | # 不移动
188 | elif page <= 3:
189 | page_numbers = [x for x in range(1, 6)]
190 | # 不移动
191 | elif page >= total_page - 3:
192 | page_numbers = [x for x in range(page - 5, total_page + 1)]
193 | data['page_numbers'] = page_numbers
194 | return data
195 | return data
196 |
197 | def get_context(self):
198 | data = self.build_page()
199 | response = JsonResponse(data)
200 | # 解决跨域
201 | response['Access-Control-Allow-Origin'] = '*'
202 | return response
203 |
204 | def create_response(self):
205 | response = self.get_context()
206 | return response
207 |
208 |
209 |
210 |
211 |
--------------------------------------------------------------------------------
/computer_analysis/computer_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | import pymysql
2 |
3 | pymysql.install_as_MySQLdb()
--------------------------------------------------------------------------------
/computer_analysis/computer_analysis/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for computer_analysis project.
3 |
4 | Generated by 'django-admin startproject' using Django 2.1.4.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/2.1/topics/settings/
8 |
9 | For the full list of settings and their values, see
10 | https://docs.djangoproject.com/en/2.1/ref/settings/
11 | """
12 |
13 | import os
14 |
15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
17 |
18 |
19 | # Quick-start development settings - unsuitable for production
20 | # See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/
21 |
22 | # SECURITY WARNING: keep the secret key used in production secret!
23 | SECRET_KEY = 'f213_o#bb=_g+j(2r&d!0&%e%b(u4a!w94b2!tp8(wt!$w$mld'
24 |
25 | # SECURITY WARNING: don't run with debug turned on in production!
26 | DEBUG = True
27 |
28 | ALLOWED_HOSTS = []
29 |
30 |
31 | # Application definition
32 |
33 | INSTALLED_APPS = [
34 | 'django.contrib.admin',
35 | 'django.contrib.auth',
36 | 'django.contrib.contenttypes',
37 | 'django.contrib.sessions',
38 | 'django.contrib.messages',
39 | 'django.contrib.staticfiles',
40 | 'api',
41 | 'haystack'
42 | ]
43 |
44 | MIDDLEWARE = [
45 | 'django.middleware.security.SecurityMiddleware',
46 | 'django.contrib.sessions.middleware.SessionMiddleware',
47 | 'django.middleware.common.CommonMiddleware',
48 | 'django.middleware.csrf.CsrfViewMiddleware',
49 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
50 | 'django.contrib.messages.middleware.MessageMiddleware',
51 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
52 | ]
53 |
54 | ROOT_URLCONF = 'computer_analysis.urls'
55 |
56 | TEMPLATES = [
57 | {
58 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
59 | 'DIRS': ['templates'],
60 | 'APP_DIRS': True,
61 | 'OPTIONS': {
62 | 'context_processors': [
63 | 'django.template.context_processors.debug',
64 | 'django.template.context_processors.request',
65 | 'django.contrib.auth.context_processors.auth',
66 | 'django.contrib.messages.context_processors.messages',
67 | ],
68 | },
69 | },
70 | ]
71 |
72 | WSGI_APPLICATION = 'computer_analysis.wsgi.application'
73 |
74 |
75 | # Database
76 | # https://docs.djangoproject.com/en/2.1/ref/settings/#databases
77 |
78 | DATABASES = {
79 | 'default': {
80 | 'ENGINE': 'django.db.backends.mysql',
81 | 'NAME': 'jd_computer',
82 | 'HOME': 'localhost',
83 | 'USER': 'root',
84 | 'PASSWORD': '123456',
85 | 'PORT': 3309,
86 | }
87 | }
88 |
89 |
90 | # Password validation
91 | # https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators
92 |
93 | AUTH_PASSWORD_VALIDATORS = [
94 | {
95 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
96 | },
97 | {
98 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
99 | },
100 | {
101 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
102 | },
103 | {
104 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
105 | },
106 | ]
107 |
108 |
109 | # Internationalization
110 | # https://docs.djangoproject.com/en/2.1/topics/i18n/
111 |
112 | LANGUAGE_CODE = 'zh-hans'
113 |
114 | TIME_ZONE = 'Asia/Shanghai'
115 |
116 | USE_I18N = True
117 |
118 | USE_L10N = True
119 |
120 | USE_TZ = True
121 |
122 |
123 | # Static files (CSS, JavaScript, Images)
124 | # https://docs.djangoproject.com/en/2.1/howto/static-files/
125 |
126 | STATIC_URL = '/static/'
127 | # 配置haystack这个框架所搭配的搜索引擎
128 | HAYSTACK_CONNECTIONS = {
129 | 'default': {
130 | 'ENGINE': 'haystack.backends.whoosh_cn_backend.WhooshEngine',
131 | 'PATH': os.path.join(BASE_DIR, 'whoosh_index'),
132 | },
133 | }
134 | HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
135 |
136 | HAYSTACK_SEARCH_RESULTS_PER_PAGE = 5
--------------------------------------------------------------------------------
/computer_analysis/computer_analysis/urls.py:
--------------------------------------------------------------------------------
1 | """computer_analysis URL Configuration
2 |
3 | The `urlpatterns` list routes URLs to views. For more information please see:
4 | https://docs.djangoproject.com/en/2.1/topics/http/urls/
5 | Examples:
6 | Function views
7 | 1. Add an import: from my_app import views
8 | 2. Add a URL to urlpatterns: path('', views.home, name='home')
9 | Class-based views
10 | 1. Add an import: from other_app.views import Home
11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
12 | Including another URLconf
13 | 1. Import the include() function: from django.urls import include, path
14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
15 | """
16 | from django.contrib import admin
17 | from django.urls import path
18 | from api.views import *
19 |
20 | urlpatterns = [
21 | path('admin/', admin.site.urls),
22 | path('api/v1/computer/list/', ComputerView.as_view()),
23 | path('api/v1/computer/detail/', DetailView.as_view()),
24 | path('api/v1/computer/search/', ComputerSearchView()),
25 | ]
26 |
--------------------------------------------------------------------------------
/computer_analysis/computer_analysis/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for computer_analysis project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'computer_analysis.settings')
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/computer_analysis/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == '__main__':
6 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'computer_analysis.settings')
7 | try:
8 | from django.core.management import execute_from_command_line
9 | except ImportError as exc:
10 | raise ImportError(
11 | "Couldn't import Django. Are you sure it's installed and "
12 | "available on your PYTHONPATH environment variable? Did you "
13 | "forget to activate a virtual environment?"
14 | ) from exc
15 | execute_from_command_line(sys.argv)
16 |
--------------------------------------------------------------------------------
/computer_analysis/static/pie/100002368328.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/computer_analysis/static/pie/11547179910.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/computer_analysis/static/pie/15019918741.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/computer_analysis/static/pie/20167878769.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/computer_analysis/static/pie/3714545.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/computer_analysis/static/pie/39987003288.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/100002368328.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/100002368328.png
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/11528184498.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/11528184498.png
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/11547179910.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/11547179910.png
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/15019918741.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/15019918741.png
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/20167878769.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/20167878769.png
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/3714545.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/3714545.png
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/39987003288.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/39987003288.png
--------------------------------------------------------------------------------
/computer_analysis/static/wordcloud/5520838.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/static/wordcloud/5520838.png
--------------------------------------------------------------------------------
/computer_analysis/templates/search/indexes/api/computer_text.txt:
--------------------------------------------------------------------------------
1 | {{ object.title }}
2 | {{ object.computer_id }}
3 | {{ object.brand }}
--------------------------------------------------------------------------------
/computer_analysis/tools/STXINGKA.TTF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/tools/STXINGKA.TTF
--------------------------------------------------------------------------------
/computer_analysis/tools/decorator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: decorator
6 | @time: 2019/3/14 16:00
7 | '''
8 |
9 | from django.http import JsonResponse
10 |
11 | def allow_origin(func):
12 | def _func(*args, **kwargs):
13 | data = func(*args, **kwargs)
14 | response = JsonResponse(data)
15 | response['Access-Control-Allow-Origin'] = '*'
16 | return response
17 | return _func
--------------------------------------------------------------------------------
/computer_analysis/tools/orm2json.py:
--------------------------------------------------------------------------------
1 | from django.db.models.query import QuerySet
2 |
3 |
4 | def object_to_json(model, ignore=None):
5 | if ignore is None:
6 | ignore = []
7 | if type(model) in [QuerySet, list]:
8 | json = []
9 | for element in model:
10 | json.append(_django_single_object_to_json(element, ignore))
11 | return json
12 | else:
13 | return _django_single_object_to_json(model, ignore)
14 |
15 |
16 | def _django_single_object_to_json(element, ignore=None):
17 | return dict([(attr, getattr(element, attr)) for attr in [f.name for f in element._meta.fields if f not in ignore]])
--------------------------------------------------------------------------------
/computer_analysis/tools/pygal_process.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: pygal_process
6 | @time: 2019/4/3 13:55
7 | '''
8 |
9 | import pygal, pymysql
10 | from pygal.style import BlueStyle
11 | from wordcloud import WordCloud
12 |
13 |
14 | def connect_sql():
15 | connect = pymysql.connect(
16 | user='root',
17 | password='123456',
18 | host='localhost',
19 | port=3309,
20 | db='jd_computer'
21 | )
22 | cursor = connect.cursor()
23 | return connect, cursor
24 |
25 |
26 | def close_sql(connect, cursor):
27 | connect.commit()
28 | cursor.close()
29 | connect.close()
30 |
31 |
32 | def create_wordcloud(goods_id):
33 | """
34 | 生成产品评论词云图
35 | :param goods_id:
36 | :return:
37 | """
38 | connect, cursor = connect_sql()
39 | sql = f'SELECT jieba_content FROM computer_comment WHERE computer_id={goods_id}'
40 | cursor.execute(sql)
41 | result = cursor.fetchall()
42 | close_sql(connect, cursor)
43 | # 评论数大于50才生成词云图
44 | if len(result) >= 50:
45 | jieba_str = ''.join([x[0] for x in result]).replace(' ', '').replace('\n', '')
46 | wc = WordCloud(width=500,
47 | height=500,
48 | background_color='white', # 背景颜色
49 | max_words=50, # 最大词数
50 | max_font_size=100, # 显示字体的最大值
51 | font_path='STXINGKA.TTF',
52 | random_state=200, # 为每个词返回一个PIL颜色
53 | )
54 | wc.generate(jieba_str)
55 | wc.to_file(f'static\wordcloud\{goods_id}.png')
56 | return True
57 | else:
58 | return False
59 |
60 |
61 | def create_pie(goods_id):
62 | """
63 | 生成用户评分饼状图
64 | :param goods_id:
65 | :return:
66 | """
67 | pie_chart = pygal.Pie(style=BlueStyle)
68 | connect, cursor = connect_sql()
69 | sql = f'SELECT score,count(score) FROM computer_comment WHERE computer_id={goods_id} GROUP BY score'
70 | cursor.execute(sql)
71 | result = cursor.fetchall()
72 | close_sql(connect, cursor)
73 | total = sum([x[1] for x in result])
74 | if total >= 5:
75 | pie_chart.title = f'{total}位买家评分分析饼状图(%)'
76 | for score_group in result:
77 | pie_chart.add(str(score_group[0])+'分', round(score_group[1]/total, 4)*100)
78 | pie_chart.render_to_file(f'static\pie\{goods_id}.svg')
79 | return True
80 | else:
81 | return False
82 |
83 |
84 | def create_bar(goods_id):
85 | """
86 | 生成标签柱状图
87 | :param goods_id:
88 | :return:
89 | """
90 | bar_chart = pygal.HorizontalBar(style=BlueStyle)
91 | bar_chart.title = '标签分析条形图'
92 | connect, cursor = connect_sql()
93 | sql = f'SELECT tags,count FROM computer_tag WHERE computer_id={goods_id}'
94 | cursor.execute(sql)
95 | result = cursor.fetchall()
96 | close_sql(connect, cursor)
97 | # 判断是否存在标签
98 | if result:
99 | tags = result[0][0].split('|')
100 | count = result[0][1].split('|')
101 | for index in range(len(tags)):
102 | bar_chart.add(tags[index], int(count[index]))
103 | bar_chart.render_to_file(f'static\\bar\{goods_id}.svg')
104 | return True
105 | # 标签不小于3个的生成图
106 | # if len(tags) >= 3:
107 | # for index in range(len(tags)):
108 | # bar_chart.add(tags[index], int(count[index]))
109 | # bar_chart.render_to_file(f'..\static\bar\{good_id}.svg')
110 | # return 'success'
111 | # else:
112 | # # 返回标签字典
113 | # tags_dict = {}
114 | # for index in range(len(tags)):
115 | # tags_dict[tags[index]] = count[index]
116 | # print(tags_dict)
117 | # return tags_dict
118 | else:
119 | return False
120 |
121 |
122 | def jieba_top10_bar(goods_id):
123 | bar_chart = pygal.Bar(style=BlueStyle)
124 | bar_chart.title = '分词比重top10'
125 | connect, cursor = connect_sql()
126 | sql = f'SELECT jieba_content FROM computer_comment WHERE computer_id={goods_id}'
127 | cursor.execute(sql)
128 | result = cursor.fetchall()
129 | close_sql(connect, cursor)
130 | # 评论数大于50才生成
131 | if len(result) > 50:
132 | jieba_list = ''.join([x[0] for x in result]).replace(' ', '').replace('\n', '').split(' ')
133 | topn_dict = {}
134 | for word in jieba_list:
135 | if word:
136 | if word not in topn_dict:
137 | topn_dict[word] = 1
138 | else:
139 | topn_dict[word] = topn_dict[word] + 1
140 | stop = ['物流', '运行', '电脑', '客服', '收到', '开机', '东西', '质量', '购物']
141 | for s in stop:
142 | if s in topn_dict:
143 | topn_dict.pop(s)
144 | top10_list = sorted(topn_dict.items(), key=lambda item: item[1], reverse=True)[1:11]
145 | for heat_word in top10_list:
146 | bar_chart.add(heat_word[0], int(heat_word[1]))
147 | bar_chart.render_to_file(f'static\jieba_top10_bar\{goods_id}.svg')
148 | return True
149 | else:
150 | return False
151 |
152 |
153 |
154 | # create_bar(6099496)
155 | # print(create_bar(6099496))
156 | # jieba_top10_bar(29196113704)
157 |
--------------------------------------------------------------------------------
/computer_analysis/tools/searchresult2json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: searchresult2json
6 | @time: 2019/3/28 21:17
7 | '''
8 | # from haystack.models import SearchResult
9 |
10 | def sea_result2json(list_obj):
11 | json = [sr.get_additional_fields() for sr in list_obj]
12 | return json
--------------------------------------------------------------------------------
/computer_analysis/whoosh_index/MAIN_6vchd7acq93n4dv5.seg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/whoosh_index/MAIN_6vchd7acq93n4dv5.seg
--------------------------------------------------------------------------------
/computer_analysis/whoosh_index/MAIN_WRITELOCK:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/whoosh_index/MAIN_WRITELOCK
--------------------------------------------------------------------------------
/computer_analysis/whoosh_index/MAIN_agm2z7e75evl86bh.seg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/whoosh_index/MAIN_agm2z7e75evl86bh.seg
--------------------------------------------------------------------------------
/computer_analysis/whoosh_index/_MAIN_2.toc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_analysis/whoosh_index/_MAIN_2.toc
--------------------------------------------------------------------------------
/computer_spider/.idea/computer_spider.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/computer_spider/.idea/dataSources.local.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #@
7 | `
8 |
9 |
10 | master_key
11 | root
12 | *:jd_computer
13 |
14 |
15 |
--------------------------------------------------------------------------------
/computer_spider/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mysql
6 | true
7 | com.mysql.jdbc.Driver
8 | jdbc:mysql://localhost:3309/jd_computer
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/computer_spider/.idea/dictionaries/ASUS.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/computer_spider/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/computer_spider/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/computer_spider/computer_spider/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/computer_spider/computer_spider/__init__.py
--------------------------------------------------------------------------------
/computer_spider/computer_spider/items.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your scraped items
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/items.html
7 |
8 | import scrapy
9 |
10 |
11 | class ComputerSpiderItem(scrapy.Item):
12 | # define the fields for your item here like:
13 | # name = scrapy.Field()
14 | goods_id = scrapy.Field()
15 | brand = scrapy.Field()
16 | title = scrapy.Field()
17 | price = scrapy.Field()
18 | img_url = scrapy.Field()
19 | param = scrapy.Field()
20 |
--------------------------------------------------------------------------------
/computer_spider/computer_spider/middlewares.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your spider middleware
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
7 |
8 | from scrapy import signals
9 |
10 |
11 | class ComputerSpiderSpiderMiddleware(object):
12 | # Not all methods need to be defined. If a method is not defined,
13 | # scrapy acts as if the spider middleware does not modify the
14 | # passed objects.
15 |
16 | @classmethod
17 | def from_crawler(cls, crawler):
18 | # This method is used by Scrapy to create your spiders.
19 | s = cls()
20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 | return s
22 |
23 | def process_spider_input(self, response, spider):
24 | # Called for each response that goes through the spider
25 | # middleware and into the spider.
26 |
27 | # Should return None or raise an exception.
28 | return None
29 |
30 | def process_spider_output(self, response, result, spider):
31 | # Called with the results returned from the Spider, after
32 | # it has processed the response.
33 |
34 | # Must return an iterable of Request, dict or Item objects.
35 | for i in result:
36 | yield i
37 |
38 | def process_spider_exception(self, response, exception, spider):
39 | # Called when a spider or process_spider_input() method
40 | # (from other spider middleware) raises an exception.
41 |
42 | # Should return either None or an iterable of Response, dict
43 | # or Item objects.
44 | pass
45 |
46 | def process_start_requests(self, start_requests, spider):
47 | # Called with the start requests of the spider, and works
48 | # similarly to the process_spider_output() method, except
49 | # that it doesn’t have a response associated.
50 |
51 | # Must return only requests (not items).
52 | for r in start_requests:
53 | yield r
54 |
55 | def spider_opened(self, spider):
56 | spider.logger.info('Spider opened: %s' % spider.name)
57 |
58 |
59 | class ComputerSpiderDownloaderMiddleware(object):
60 | # Not all methods need to be defined. If a method is not defined,
61 | # scrapy acts as if the downloader middleware does not modify the
62 | # passed objects.
63 |
64 | @classmethod
65 | def from_crawler(cls, crawler):
66 | # This method is used by Scrapy to create your spiders.
67 | s = cls()
68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
69 | return s
70 |
71 | def process_request(self, request, spider):
72 | # Called for each request that goes through the downloader
73 | # middleware.
74 |
75 | # Must either:
76 | # - return None: continue processing this request
77 | # - or return a Response object
78 | # - or return a Request object
79 | # - or raise IgnoreRequest: process_exception() methods of
80 | # installed downloader middleware will be called
81 | return None
82 |
83 | def process_response(self, request, response, spider):
84 | # Called with the response returned from the downloader.
85 |
86 | # Must either;
87 | # - return a Response object
88 | # - return a Request object
89 | # - or raise IgnoreRequest
90 | return response
91 |
92 | def process_exception(self, request, exception, spider):
93 | # Called when a download handler or a process_request()
94 | # (from other downloader middleware) raises an exception.
95 |
96 | # Must either:
97 | # - return None: continue processing this exception
98 | # - return a Response object: stops process_exception() chain
99 | # - return a Request object: stops process_exception() chain
100 | pass
101 |
102 | def spider_opened(self, spider):
103 | spider.logger.info('Spider opened: %s' % spider.name)
104 |
--------------------------------------------------------------------------------
/computer_spider/computer_spider/pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define your item pipelines here
4 | #
5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
7 | import pymysql
8 |
9 |
10 | class ComputerSpiderPipeline(object):
11 | def __init__(self):
12 | self.connect = pymysql.connect(
13 | host='localhost',
14 | port=3309,
15 | user='root',
16 | password='123456',
17 | db='jd_computer'
18 | )
19 | self.cursor = self.connect.cursor()
20 |
21 | def process_item(self, item, spider):
22 | sql = 'INSERT INTO computer(computer_id, brand, title, price, img_url, param) values (%s, %s, %s, %s, %s, %s)'
23 | if_exists_sql = f'SELECT count(*) FROM computer WHERE computer_id={item["goods_id"]}'
24 | self.cursor.execute(if_exists_sql)
25 | if self.cursor.fetchall()[0][0]:
26 | print(f'{item["goods_id"]}已存在')
27 | print(item['price'])
28 | elif round(float(item['price']), 2) < 1000:
29 | print(f'价格获取有误!放弃收集该产品!')
30 | else:
31 | self.cursor.execute(sql, (
32 | item['goods_id'], item['brand'], item['title'], item['price'], item['img_url'], item['param']))
33 | self.connect.commit()
34 | print(f'{item["goods_id"]}入库成功。')
35 | return item
36 |
--------------------------------------------------------------------------------
/computer_spider/computer_spider/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Scrapy settings for computer_spider project
4 | #
5 | # For simplicity, this file contains only settings considered important or
6 | # commonly used. You can find more settings consulting the documentation:
7 | #
8 | # https://doc.scrapy.org/en/latest/topics/settings.html
9 | # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 |
12 | BOT_NAME = 'computer_spider'
13 |
14 | SPIDER_MODULES = ['computer_spider.spiders']
15 | NEWSPIDER_MODULE = 'computer_spider.spiders'
16 |
17 |
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'computer_spider (+http://www.yourdomain.com)'
20 |
21 | # Obey robots.txt rules
22 | ROBOTSTXT_OBEY = False
23 |
24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
25 | #CONCURRENT_REQUESTS = 32
26 |
27 | # Configure a delay for requests for the same website (default: 0)
28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
29 | # See also autothrottle settings and docs
30 | #DOWNLOAD_DELAY = 3
31 | # The download delay setting will honor only one of:
32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
33 | #CONCURRENT_REQUESTS_PER_IP = 16
34 |
35 | # Disable cookies (enabled by default)
36 | #COOKIES_ENABLED = False
37 |
38 | # Disable Telnet Console (enabled by default)
39 | #TELNETCONSOLE_ENABLED = False
40 |
41 | # Override the default request headers:
42 | DEFAULT_REQUEST_HEADERS = {
43 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 | 'Accept-Language': 'en',
45 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
46 | }
47 |
48 | # Enable or disable spider middlewares
49 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
50 | #SPIDER_MIDDLEWARES = {
51 | # 'computer_spider.middlewares.ComputerSpiderSpiderMiddleware': 543,
52 | #}
53 |
54 | # Enable or disable downloader middlewares
55 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
56 | #DOWNLOADER_MIDDLEWARES = {
57 | # 'computer_spider.middlewares.ComputerSpiderDownloaderMiddleware': 543,
58 | #}
59 |
60 | # Enable or disable extensions
61 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
62 | #EXTENSIONS = {
63 | # 'scrapy.extensions.telnet.TelnetConsole': None,
64 | #}
65 |
66 | # Configure item pipelines
67 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
68 | ITEM_PIPELINES = {
69 | 'computer_spider.pipelines.ComputerSpiderPipeline': 1,
70 | }
71 |
72 | # Enable and configure the AutoThrottle extension (disabled by default)
73 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
74 | #AUTOTHROTTLE_ENABLED = True
75 | # The initial download delay
76 | #AUTOTHROTTLE_START_DELAY = 5
77 | # The maximum download delay to be set in case of high latencies
78 | #AUTOTHROTTLE_MAX_DELAY = 60
79 | # The average number of requests Scrapy should be sending in parallel to
80 | # each remote server
81 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
82 | # Enable showing throttling stats for every response received:
83 | #AUTOTHROTTLE_DEBUG = False
84 |
85 | # Enable and configure HTTP caching (disabled by default)
86 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
87 | #HTTPCACHE_ENABLED = True
88 | #HTTPCACHE_EXPIRATION_SECS = 0
89 | #HTTPCACHE_DIR = 'httpcache'
90 | #HTTPCACHE_IGNORE_HTTP_CODES = []
91 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
92 |
--------------------------------------------------------------------------------
/computer_spider/computer_spider/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/computer_spider/computer_spider/spiders/cpt_spider.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import scrapy
3 | import json, re
4 | from ..items import ComputerSpiderItem
5 |
6 | class CptSpiderSpider(scrapy.Spider):
7 | name = 'cpt_spider'
8 | allowed_domains = ['jd.com', 'p.3.cn']
9 | start_urls = ['https://list.jd.com/list.html?cat=670%2C671%2C672']
10 |
11 | def parse(self, response):
12 | """
13 | 获取京东笔记本各大品牌的url
14 | :param response:
15 | :return:
16 | """
17 | brand_urls = response.xpath('//ul[@id="brandsArea"]/li/a/@href').extract()
18 | brands = response.xpath('//ul[@id="brandsArea"]/li/a/@title').extract()
19 | print(len(brand_urls), len(brands))
20 | for index, brand_url in enumerate(brand_urls):
21 | brand_url = 'https://list.jd.com' + brand_url
22 | brand = brands[index]
23 | print(brand)
24 | if '华为' in brand:
25 | brand = 'huawei'
26 | elif '联想' in brand:
27 | brand = 'Lenovo'
28 | elif 'ThinkPad' in brand:
29 | brand = 'ThinkPad'
30 | elif 'Apple' in brand:
31 | brand = 'apple'
32 | elif '戴尔' in brand:
33 | brand = 'DELL'
34 | elif '三星' in brand:
35 | brand = 'samsung'
36 | elif '华硕' in brand:
37 | brand = 'ASUS'
38 | elif '惠普' in brand:
39 | brand = 'HP'
40 | elif '宏碁' in brand:
41 | brand = 'acer'
42 | elif '小米' in brand:
43 | brand = 'xiaomi'
44 | elif '微软' in brand:
45 | brand = 'Microsoft'
46 | elif '外星人' in brand:
47 | brand = 'Alienware'
48 | elif '机械革命' in brand:
49 | brand = 'MECHREVO'
50 | elif '神舟' in brand:
51 | brand = 'HASEE'
52 | elif '微星' in brand:
53 | brand = 'MSI'
54 | elif '雷蛇' in brand:
55 | brand = 'Razer'
56 | elif '戴睿' in brand:
57 | brand = 'dere'
58 | elif '海尔' in brand:
59 | brand = 'Haier'
60 | response.meta['brand'] = brand
61 | print(brand)
62 | yield scrapy.Request(url=brand_url, callback=self.parse_list_page, meta=response.meta)
63 |
64 | def parse_list_page(self, response):
65 | """
66 | 解析列表页
67 | :param response:
68 | :return:
69 | """
70 | detail_urls = response.xpath('//li[@class="gl-item"]/div/div[@class="p-img"]/a/@href').extract()
71 | src_img_urls = response.xpath(
72 | '//li[@class="gl-item"]/div/div[@class="p-img"]/a[@target="_blank"]/img/@src').extract()
73 | data_lazy_img_urls = response.xpath(
74 | '//li[@class="gl-item"]/div/div[@class="p-img"]/a[@target="_blank"]/img/@data-lazy-img').extract()
75 | img_urls = src_img_urls + data_lazy_img_urls
76 | next_url = response.xpath('//a[contains(text(), "下一页")]/@href').extract_first()
77 | print(len(detail_urls), len(img_urls))
78 | for index, detail_url in enumerate(detail_urls):
79 | detail_url = 'https:' + detail_url
80 | goods_id = detail_url.split('/')[-1].split('.')[0]
81 | response.meta['goods_id'] = goods_id
82 | img_url = 'https:' + img_urls[index]
83 | response.meta['img_url'] = img_url
84 | yield scrapy.Request(url=detail_url, callback=self.parse_detail_page, meta=response.meta)
85 | if next_url:
86 | next_url = 'https://list.jd.com' + next_url
87 | yield scrapy.Request(url=next_url, callback=self.parse_list_page, meta=response.meta)
88 |
89 | def parse_detail_page(self, response):
90 | """
91 | 解析详情页
92 | :param response:
93 | :return:
94 | """
95 | title = response.xpath('//title/text()').extract_first()
96 | title = re.sub('【.*?】|-京东', '', title.strip())
97 | print(title)
98 | param = response.xpath('//div[@class="Ptable"]').extract()
99 | if len(param):
100 | param = param[0]
101 | else:
102 | param = response.xpath('//table[@class="Ptable"]').extract()[0]
103 | # 处理参数
104 | param = re.sub(' |\t|\r|\n', '', param)
105 | # print(param)
106 | response.meta['title'] = title
107 | response.meta['param'] = param
108 | price_url = 'https://p.3.cn/prices/mgets?skuIds=J_' + response.meta['goods_id']
109 | yield scrapy.Request(url=price_url, callback=self.get_goods_price, meta=response.meta)
110 |
111 | def get_goods_price(self, response):
112 | """
113 | 获取商品价格
114 | :param response:
115 | :return:
116 | """
117 | json_obj = json.loads(response.text)
118 | price = json_obj[0]['p']
119 | response.meta['price'] = price
120 | item = ComputerSpiderItem()
121 | item['goods_id'] = int(response.meta['goods_id'])
122 | item['brand'] = response.meta['brand']
123 | item['title'] = response.meta['title']
124 | item['img_url'] = response.meta['img_url']
125 | item['param'] = response.meta['param']
126 | item['price'] = response.meta['price']
127 | yield item
128 |
--------------------------------------------------------------------------------
/computer_spider/run_spider.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | '''
4 | @author: taiyc
5 | @file: run_spider
6 | @time: 2019/3/29 13:56
7 | '''
8 | from scrapy.cmdline import execute
9 |
10 | execute(['scrapy', 'crawl', 'cpt_spider'])
--------------------------------------------------------------------------------
/computer_spider/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
5 |
6 | [settings]
7 | default = computer_spider.settings
8 |
9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = computer_spider
12 |
--------------------------------------------------------------------------------
/html/.idea/dictionaries/ASUS.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/html/.idea/html.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/html/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/html/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/html/css/detail.css:
--------------------------------------------------------------------------------
1 |
2 | #goods {
3 | margin-left: 35%;
4 | margin-top: 5%;
5 | }
6 |
7 | .self-dl{
8 | display: flex;
9 | margin-top: 37%;
10 | font-size: 20px
11 | }
12 |
13 | .self-dl > dd{
14 | color: red;
15 | }
16 | #wordcloud{
17 | margin-top: 5%;
18 | text-align: center;
19 | }
20 | .svg{
21 | margin-top: 5%;
22 | text-align: center;
23 | }
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 | #param{
37 | margin-top: 5%;
38 | /*background-color: beige;*/
39 | }
40 |
41 | .Ptable {
42 | margin: 10px
43 | }
44 |
45 | .Ptable-item {
46 | padding-top: 0px;
47 | padding: 12px;
48 | line-height: 220%;
49 | color: #999;
50 | font-size: 12px;
51 | }
52 |
53 | .Ptable-item:after {
54 | content: "";
55 | height: 0;
56 | visibility: hidden;
57 | display: block;
58 | clear: both
59 | }
60 |
61 | .Ptable-item h3 {
62 | align: center;
63 | padding-top:0px;
64 | padding-bottom:0px;
65 | margin-top: 0px;
66 | width: 150px;
67 | text-align: center
68 | }
69 |
70 | .Ptable-item dl {
71 | margin-left: 110px;
72 | margin-bottom:0px;
73 | }
74 |
75 | .Ptable-item dt {
76 | width: 370px;
77 | float: left;
78 | text-align: right;
79 | padding-right: 5px
80 | }
81 |
82 | .Ptable-item dd {
83 | margin-left: 210px
84 | }
85 |
86 | .Ptable-item .Ptable-tips {
87 | position: relative;
88 | float: left;
89 | width: auto;
90 | margin-left: 0
91 | }
92 |
93 | .Ptable-item .Ptable-tips:hover {
94 | z-index: 2
95 | }
96 |
97 | .Ptable-item .Ptable-sprite-question {
98 | display: inline-block;
99 | margin-left: 4px;
100 | width: 16px;
101 | height: 16px;
102 | vertical-align: -3px;
103 | }
104 |
105 | .Ptable-tips .tips {
106 | display: none;
107 | position: absolute;
108 | left: -10px;
109 | top: 27px;
110 | width: 300px
111 | }
112 |
113 | .Ptable-tips:hover .tips {
114 | display: block
115 | }
116 |
117 | .Ptable-tips .content {
118 | padding: 8px 10px;
119 | background: #fff;
120 | border: 1px solid #cecbce;
121 | box-shadow: 0 0 2px 2px #eee
122 | }
123 |
124 | .Ptable-tips p {
125 | font-family: "microsoft yahei";
126 | color: #999;
127 | line-height: 160%;
128 | text-align: left
129 | }
130 |
131 | .Ptable-tips .Ptable-sprite-arrow {
132 | position: absolute;
133 | overflow: hidden;
134 | left: 15px;
135 | top: -5px;
136 | width: 11px;
137 | height: 6px;
138 | }
139 |
140 |
141 |
142 | .Ptable td,.Ptable th {
143 | font-size: 12px
144 | }
145 |
146 | .Ptable th {
147 | background: #EEF7FE;
148 | text-align: right;
149 | padding: 5px
150 | }
151 |
152 | .Ptable td {
153 | padding: 2px 5px;
154 | background: #fff
155 | }
156 |
157 | .Ptable th.tdTitle {
158 | text-align: center
159 | }
160 |
161 | .Ptable .tdTitle {
162 | text-align: right;
163 | width: 500px;
164 | background: #F5FAFE
165 | }
166 | .Ptable tr {
167 | text-align: center
168 | }
169 |
170 |
171 | .Ptable-tips {
172 | display: inline-block;
173 | position: relative;
174 | *display: inline;
175 | *zoom:1}
176 |
177 | .Ptable-tips:hover {
178 | z-index: 2
179 | }
180 |
181 | .Ptable-sprite-question {
182 | display: inline-block;
183 | margin-left: 4px;
184 | width: 16px;
185 | height: 16px;
186 | vertical-align: -3px;
187 | }
188 |
189 | .Ptable-tips .tips {
190 | display: none;
191 | position: absolute;
192 | left: -10px;
193 | top: 27px;
194 | width: 215px
195 | }
196 |
197 | .Ptable-tips:hover .tips {
198 | display: block
199 | }
200 |
201 | .Ptable-tips .content {
202 | padding: 8px 10px;
203 | background: #fff;
204 | border: 1px solid #cecbce;
205 | box-shadow: 0 0 2px 2px #eee
206 | }
207 |
208 | .Ptable-tips p {
209 | font-family: "microsoft yahei";
210 | color: #999;
211 | line-height: 160%;
212 | text-align: left
213 | }
214 |
215 | .Ptable-tips .Ptable-sprite-arrow {
216 | position: absolute;
217 | overflow: hidden;
218 | left: 15px;
219 | top: -5px;
220 | width: 11px;
221 | height: 6px;
222 | }
223 |
224 |
225 |
226 |
--------------------------------------------------------------------------------
/html/detail.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 详情页
6 |
7 |
8 |
9 |
10 |
11 |
12 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/html/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/html/fonts/glyphicons-halflings-regular.eot
--------------------------------------------------------------------------------
/html/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/html/fonts/glyphicons-halflings-regular.ttf
--------------------------------------------------------------------------------
/html/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/html/fonts/glyphicons-halflings-regular.woff
--------------------------------------------------------------------------------
/html/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moxi255/jd-spider/8f92f7e7a9a27c2f83f6432f17e1f0bb920b4213/html/fonts/glyphicons-halflings-regular.woff2
--------------------------------------------------------------------------------
/html/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 首页
6 |
7 |
8 |
9 |