├── README.md
└── wallstreetcnScrapy
├── .project
├── .pydevproject
├── .settings
└── org.eclipse.core.resources.prefs
├── db
└── database.sql
├── log
└── ws.log
├── scrapy.cfg
└── wallstreetcnScrapy
├── .gitignore
├── __init__.py
├── commands.pyc
├── crawls
├── CommentarySpider-1
│ ├── requests.seen
│ └── spider.state
└── CommentarySpider-2
│ ├── requests.queue
│ └── p0
│ └── requests.seen
├── items.py
├── main.py
├── middlewares.py
├── pipelines.py
├── settings.py
└── spiders
├── .gitignore
├── CommentarySpider.py
├── __init__.py
├── jqkaCommentarySpider.py
└── sinaCommentarySpider.py
/README.md:
--------------------------------------------------------------------------------
1 | # wallstreetcnScrapy
2 | a crawler for wallstreetcn,finance.sina by Scrapy
3 |
4 | This is the crawler part of my graduation project which is a financial infomation search engine.
5 | It crawled three websites by using Scrapy as follows:
6 | http://wallstreetcn.com/news,
7 | http://finance.sina.com.cn/,
8 | http://news.10jqka.com.cn/
9 | then send the data to solr server to build index. And it stored the data in MySQL for backup.
10 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | wallstreetcnScrapy
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | /${PROJECT_DIR_NAME}
5 |
6 | python 2.7
7 | python27
8 |
9 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//wallstreetcnScrapy/items.py=utf-8
3 | encoding//wallstreetcnScrapy/pipelines.py=utf-8
4 | encoding//wallstreetcnScrapy/settings.py=utf-8
5 | encoding//wallstreetcnScrapy/spiders/jqkaCommentarySpider.py=UTF-8
6 | encoding//wallstreetcnScrapy/spiders/sinaCommentarySpider.py=UTF-8
7 | encoding/=UTF-8
8 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/db/database.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE `wstable` (
2 | `id` CHAR(32) NOT NULL COMMENT 'uri+title+author md5编码id',
3 | `uri` VARCHAR(255) COMMENT 'uri',
4 | `title` VARCHAR(255) COMMENT '标题',
5 | `author` VARCHAR(255) COMMENT '作者',
6 | `time1` DATETIME COMMENT '时间',
7 | `description` TEXT COMMENT '描述',
8 | `content` TEXT COMMENT '内容',
9 | `images` TEXT COMMENT 'json uris',
10 | `view1` TEXT COMMENT 'view',
11 | PRIMARY KEY (`id`)
12 | ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
13 | select * from `wstable`;
14 | DROP TABLE `wstable`;
15 | TRUNCATE TABLE `wstable`;
16 | select count(*) from `wstable`
--------------------------------------------------------------------------------
/wallstreetcnScrapy/log/ws.log:
--------------------------------------------------------------------------------
1 | 2016-04-29 17:27:04 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/)
2 | Traceback (most recent call last):
3 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
4 | yield next(it)
5 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
6 | for x in result:
7 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
8 | return (_set_referer(r) for r in result or ())
9 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
10 | return (r for r in result or () if _filter(r))
11 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
12 | return (r for r in result or () if _filter(r))
13 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
14 | cb_res = callback(response, **cb_kwargs) or ()
15 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
16 | _ = _.extract()[0]
17 | IndexError: list index out of range
18 | 2016-04-29 17:27:09 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589704962.shtml)
19 | Traceback (most recent call last):
20 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
21 | yield next(it)
22 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
23 | for x in result:
24 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
25 | return (_set_referer(r) for r in result or ())
26 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
27 | return (r for r in result or () if _filter(r))
28 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
29 | return (r for r in result or () if _filter(r))
30 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
31 | cb_res = callback(response, **cb_kwargs) or ()
32 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
33 | _ = _.extract()[0]
34 | IndexError: list index out of range
35 | 2016-04-29 17:27:13 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589706632.shtml)
36 | Traceback (most recent call last):
37 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
38 | yield next(it)
39 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
40 | for x in result:
41 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
42 | return (_set_referer(r) for r in result or ())
43 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
44 | return (r for r in result or () if _filter(r))
45 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
46 | return (r for r in result or () if _filter(r))
47 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
48 | cb_res = callback(response, **cb_kwargs) or ()
49 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
50 | _ = _.extract()[0]
51 | IndexError: list index out of range
52 | 2016-04-29 17:27:13 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589706632.shtml)
53 | Traceback (most recent call last):
54 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
55 | yield next(it)
56 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
57 | for x in result:
58 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
59 | return (_set_referer(r) for r in result or ())
60 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
61 | return (r for r in result or () if _filter(r))
62 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
63 | return (r for r in result or () if _filter(r))
64 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
65 | cb_res = callback(response, **cb_kwargs) or ()
66 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
67 | _ = _.extract()[0]
68 | IndexError: list index out of range
69 | 2016-04-29 17:27:13 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589706632.shtml)
70 | Traceback (most recent call last):
71 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
72 | yield next(it)
73 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
74 | for x in result:
75 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
76 | return (_set_referer(r) for r in result or ())
77 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
78 | return (r for r in result or () if _filter(r))
79 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
80 | return (r for r in result or () if _filter(r))
81 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
82 | cb_res = callback(response, **cb_kwargs) or ()
83 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
84 | _ = _.extract()[0]
85 | IndexError: list index out of range
86 | 2016-04-29 17:27:14 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160429/c589802622.shtml)
87 | Traceback (most recent call last):
88 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
89 | yield next(it)
90 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
91 | for x in result:
92 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
93 | return (_set_referer(r) for r in result or ())
94 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
95 | return (r for r in result or () if _filter(r))
96 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
97 | return (r for r in result or () if _filter(r))
98 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
99 | cb_res = callback(response, **cb_kwargs) or ()
100 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
101 | _ = _.extract()[0]
102 | IndexError: list index out of range
103 | 2016-04-29 17:27:21 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160427/c589681503.shtml)
104 | Traceback (most recent call last):
105 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
106 | yield next(it)
107 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
108 | for x in result:
109 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
110 | return (_set_referer(r) for r in result or ())
111 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
112 | return (r for r in result or () if _filter(r))
113 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
114 | return (r for r in result or () if _filter(r))
115 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
116 | cb_res = callback(response, **cb_kwargs) or ()
117 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
118 | _ = _.extract()[0]
119 | IndexError: list index out of range
120 | 2016-04-29 17:27:48 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160317/c588573874.shtml)
121 | Traceback (most recent call last):
122 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
123 | yield next(it)
124 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
125 | for x in result:
126 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
127 | return (_set_referer(r) for r in result or ())
128 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
129 | return (r for r in result or () if _filter(r))
130 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
131 | return (r for r in result or () if _filter(r))
132 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
133 | cb_res = callback(response, **cb_kwargs) or ()
134 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
135 | _ = _.extract()[0]
136 | IndexError: list index out of range
137 | 2016-04-29 17:27:58 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151229/c586749894.shtml)
138 | Traceback (most recent call last):
139 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
140 | yield next(it)
141 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
142 | for x in result:
143 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
144 | return (_set_referer(r) for r in result or ())
145 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
146 | return (r for r in result or () if _filter(r))
147 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
148 | return (r for r in result or () if _filter(r))
149 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
150 | cb_res = callback(response, **cb_kwargs) or ()
151 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
152 | _ = _.extract()[0]
153 | IndexError: list index out of range
154 | 2016-04-29 17:27:58 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160323/c588693373.shtml)
155 | Traceback (most recent call last):
156 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
157 | yield next(it)
158 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
159 | for x in result:
160 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
161 | return (_set_referer(r) for r in result or ())
162 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
163 | return (r for r in result or () if _filter(r))
164 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
165 | return (r for r in result or () if _filter(r))
166 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
167 | cb_res = callback(response, **cb_kwargs) or ()
168 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
169 | _ = _.extract()[0]
170 | IndexError: list index out of range
171 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
172 | Traceback (most recent call last):
173 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
174 | yield next(it)
175 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
176 | for x in result:
177 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
178 | return (_set_referer(r) for r in result or ())
179 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
180 | return (r for r in result or () if _filter(r))
181 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
182 | return (r for r in result or () if _filter(r))
183 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
184 | cb_res = callback(response, **cb_kwargs) or ()
185 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
186 | _ = _.extract()[0]
187 | IndexError: list index out of range
188 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
189 | Traceback (most recent call last):
190 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
191 | yield next(it)
192 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
193 | for x in result:
194 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
195 | return (_set_referer(r) for r in result or ())
196 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
197 | return (r for r in result or () if _filter(r))
198 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
199 | return (r for r in result or () if _filter(r))
200 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
201 | cb_res = callback(response, **cb_kwargs) or ()
202 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
203 | _ = _.extract()[0]
204 | IndexError: list index out of range
205 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
206 | Traceback (most recent call last):
207 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
208 | yield next(it)
209 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
210 | for x in result:
211 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
212 | return (_set_referer(r) for r in result or ())
213 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
214 | return (r for r in result or () if _filter(r))
215 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
216 | return (r for r in result or () if _filter(r))
217 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
218 | cb_res = callback(response, **cb_kwargs) or ()
219 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
220 | _ = _.extract()[0]
221 | IndexError: list index out of range
222 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
223 | Traceback (most recent call last):
224 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
225 | yield next(it)
226 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
227 | for x in result:
228 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
229 | return (_set_referer(r) for r in result or ())
230 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
231 | return (r for r in result or () if _filter(r))
232 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
233 | return (r for r in result or () if _filter(r))
234 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
235 | cb_res = callback(response, **cb_kwargs) or ()
236 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
237 | _ = _.extract()[0]
238 | IndexError: list index out of range
239 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
240 | Traceback (most recent call last):
241 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
242 | yield next(it)
243 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
244 | for x in result:
245 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
246 | return (_set_referer(r) for r in result or ())
247 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
248 | return (r for r in result or () if _filter(r))
249 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
250 | return (r for r in result or () if _filter(r))
251 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
252 | cb_res = callback(response, **cb_kwargs) or ()
253 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
254 | _ = _.extract()[0]
255 | IndexError: list index out of range
256 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
257 | Traceback (most recent call last):
258 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
259 | yield next(it)
260 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
261 | for x in result:
262 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
263 | return (_set_referer(r) for r in result or ())
264 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
265 | return (r for r in result or () if _filter(r))
266 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
267 | return (r for r in result or () if _filter(r))
268 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
269 | cb_res = callback(response, **cb_kwargs) or ()
270 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
271 | _ = _.extract()[0]
272 | IndexError: list index out of range
273 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
274 | Traceback (most recent call last):
275 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
276 | yield next(it)
277 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
278 | for x in result:
279 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
280 | return (_set_referer(r) for r in result or ())
281 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
282 | return (r for r in result or () if _filter(r))
283 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
284 | return (r for r in result or () if _filter(r))
285 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
286 | cb_res = callback(response, **cb_kwargs) or ()
287 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
288 | _ = _.extract()[0]
289 | IndexError: list index out of range
290 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
291 | Traceback (most recent call last):
292 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
293 | yield next(it)
294 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
295 | for x in result:
296 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
297 | return (_set_referer(r) for r in result or ())
298 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
299 | return (r for r in result or () if _filter(r))
300 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
301 | return (r for r in result or () if _filter(r))
302 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
303 | cb_res = callback(response, **cb_kwargs) or ()
304 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
305 | _ = _.extract()[0]
306 | IndexError: list index out of range
307 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
308 | Traceback (most recent call last):
309 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
310 | yield next(it)
311 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
312 | for x in result:
313 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
314 | return (_set_referer(r) for r in result or ())
315 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
316 | return (r for r in result or () if _filter(r))
317 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
318 | return (r for r in result or () if _filter(r))
319 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
320 | cb_res = callback(response, **cb_kwargs) or ()
321 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
322 | _ = _.extract()[0]
323 | IndexError: list index out of range
324 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
325 | Traceback (most recent call last):
326 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
327 | yield next(it)
328 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
329 | for x in result:
330 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
331 | return (_set_referer(r) for r in result or ())
332 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
333 | return (r for r in result or () if _filter(r))
334 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
335 | return (r for r in result or () if _filter(r))
336 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
337 | cb_res = callback(response, **cb_kwargs) or ()
338 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
339 | _ = _.extract()[0]
340 | IndexError: list index out of range
341 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
342 | Traceback (most recent call last):
343 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
344 | yield next(it)
345 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
346 | for x in result:
347 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
348 | return (_set_referer(r) for r in result or ())
349 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
350 | return (r for r in result or () if _filter(r))
351 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
352 | return (r for r in result or () if _filter(r))
353 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
354 | cb_res = callback(response, **cb_kwargs) or ()
355 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
356 | _ = _.extract()[0]
357 | IndexError: list index out of range
358 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
359 | Traceback (most recent call last):
360 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
361 | yield next(it)
362 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
363 | for x in result:
364 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
365 | return (_set_referer(r) for r in result or ())
366 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
367 | return (r for r in result or () if _filter(r))
368 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
369 | return (r for r in result or () if _filter(r))
370 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
371 | cb_res = callback(response, **cb_kwargs) or ()
372 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
373 | _ = _.extract()[0]
374 | IndexError: list index out of range
375 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml)
376 | Traceback (most recent call last):
377 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
378 | yield next(it)
379 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
380 | for x in result:
381 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
382 | return (_set_referer(r) for r in result or ())
383 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
384 | return (r for r in result or () if _filter(r))
385 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
386 | return (r for r in result or () if _filter(r))
387 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
388 | cb_res = callback(response, **cb_kwargs) or ()
389 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
390 | _ = _.extract()[0]
391 | IndexError: list index out of range
392 | 2016-04-29 17:28:25 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160330/c588893392.shtml)
393 | Traceback (most recent call last):
394 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
395 | yield next(it)
396 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
397 | for x in result:
398 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
399 | return (_set_referer(r) for r in result or ())
400 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
401 | return (r for r in result or () if _filter(r))
402 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
403 | return (r for r in result or () if _filter(r))
404 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
405 | cb_res = callback(response, **cb_kwargs) or ()
406 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
407 | _ = _.extract()[0]
408 | IndexError: list index out of range
409 | 2016-04-29 17:28:33 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160308/c588398925.shtml)
410 | Traceback (most recent call last):
411 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
412 | yield next(it)
413 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
414 | for x in result:
415 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
416 | return (_set_referer(r) for r in result or ())
417 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
418 | return (r for r in result or () if _filter(r))
419 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
420 | return (r for r in result or () if _filter(r))
421 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
422 | cb_res = callback(response, **cb_kwargs) or ()
423 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
424 | _ = _.extract()[0]
425 | IndexError: list index out of range
426 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
427 | Traceback (most recent call last):
428 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
429 | yield next(it)
430 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
431 | for x in result:
432 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
433 | return (_set_referer(r) for r in result or ())
434 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
435 | return (r for r in result or () if _filter(r))
436 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
437 | return (r for r in result or () if _filter(r))
438 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
439 | cb_res = callback(response, **cb_kwargs) or ()
440 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
441 | _ = _.extract()[0]
442 | IndexError: list index out of range
443 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
444 | Traceback (most recent call last):
445 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
446 | yield next(it)
447 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
448 | for x in result:
449 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
450 | return (_set_referer(r) for r in result or ())
451 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
452 | return (r for r in result or () if _filter(r))
453 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
454 | return (r for r in result or () if _filter(r))
455 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
456 | cb_res = callback(response, **cb_kwargs) or ()
457 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
458 | _ = _.extract()[0]
459 | IndexError: list index out of range
460 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
461 | Traceback (most recent call last):
462 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
463 | yield next(it)
464 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
465 | for x in result:
466 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
467 | return (_set_referer(r) for r in result or ())
468 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
469 | return (r for r in result or () if _filter(r))
470 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
471 | return (r for r in result or () if _filter(r))
472 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
473 | cb_res = callback(response, **cb_kwargs) or ()
474 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
475 | _ = _.extract()[0]
476 | IndexError: list index out of range
477 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
478 | Traceback (most recent call last):
479 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
480 | yield next(it)
481 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
482 | for x in result:
483 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
484 | return (_set_referer(r) for r in result or ())
485 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
486 | return (r for r in result or () if _filter(r))
487 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
488 | return (r for r in result or () if _filter(r))
489 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
490 | cb_res = callback(response, **cb_kwargs) or ()
491 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
492 | _ = _.extract()[0]
493 | IndexError: list index out of range
494 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
495 | Traceback (most recent call last):
496 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
497 | yield next(it)
498 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
499 | for x in result:
500 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
501 | return (_set_referer(r) for r in result or ())
502 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
503 | return (r for r in result or () if _filter(r))
504 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
505 | return (r for r in result or () if _filter(r))
506 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
507 | cb_res = callback(response, **cb_kwargs) or ()
508 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
509 | _ = _.extract()[0]
510 | IndexError: list index out of range
511 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
512 | Traceback (most recent call last):
513 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
514 | yield next(it)
515 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
516 | for x in result:
517 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
518 | return (_set_referer(r) for r in result or ())
519 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
520 | return (r for r in result or () if _filter(r))
521 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
522 | return (r for r in result or () if _filter(r))
523 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
524 | cb_res = callback(response, **cb_kwargs) or ()
525 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
526 | _ = _.extract()[0]
527 | IndexError: list index out of range
528 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
529 | Traceback (most recent call last):
530 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
531 | yield next(it)
532 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
533 | for x in result:
534 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
535 | return (_set_referer(r) for r in result or ())
536 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
537 | return (r for r in result or () if _filter(r))
538 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
539 | return (r for r in result or () if _filter(r))
540 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
541 | cb_res = callback(response, **cb_kwargs) or ()
542 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
543 | _ = _.extract()[0]
544 | IndexError: list index out of range
545 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
546 | Traceback (most recent call last):
547 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
548 | yield next(it)
549 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
550 | for x in result:
551 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
552 | return (_set_referer(r) for r in result or ())
553 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
554 | return (r for r in result or () if _filter(r))
555 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
556 | return (r for r in result or () if _filter(r))
557 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
558 | cb_res = callback(response, **cb_kwargs) or ()
559 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
560 | _ = _.extract()[0]
561 | IndexError: list index out of range
562 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
563 | Traceback (most recent call last):
564 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
565 | yield next(it)
566 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
567 | for x in result:
568 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
569 | return (_set_referer(r) for r in result or ())
570 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
571 | return (r for r in result or () if _filter(r))
572 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
573 | return (r for r in result or () if _filter(r))
574 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
575 | cb_res = callback(response, **cb_kwargs) or ()
576 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
577 | _ = _.extract()[0]
578 | IndexError: list index out of range
579 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
580 | Traceback (most recent call last):
581 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
582 | yield next(it)
583 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
584 | for x in result:
585 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
586 | return (_set_referer(r) for r in result or ())
587 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
588 | return (r for r in result or () if _filter(r))
589 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
590 | return (r for r in result or () if _filter(r))
591 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
592 | cb_res = callback(response, **cb_kwargs) or ()
593 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
594 | _ = _.extract()[0]
595 | IndexError: list index out of range
596 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
597 | Traceback (most recent call last):
598 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
599 | yield next(it)
600 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
601 | for x in result:
602 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
603 | return (_set_referer(r) for r in result or ())
604 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
605 | return (r for r in result or () if _filter(r))
606 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
607 | return (r for r in result or () if _filter(r))
608 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
609 | cb_res = callback(response, **cb_kwargs) or ()
610 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
611 | _ = _.extract()[0]
612 | IndexError: list index out of range
613 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml)
614 | Traceback (most recent call last):
615 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
616 | yield next(it)
617 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
618 | for x in result:
619 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
620 | return (_set_referer(r) for r in result or ())
621 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
622 | return (r for r in result or () if _filter(r))
623 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
624 | return (r for r in result or () if _filter(r))
625 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
626 | cb_res = callback(response, **cb_kwargs) or ()
627 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
628 | _ = _.extract()[0]
629 | IndexError: list index out of range
630 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
631 | Traceback (most recent call last):
632 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
633 | yield next(it)
634 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
635 | for x in result:
636 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
637 | return (_set_referer(r) for r in result or ())
638 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
639 | return (r for r in result or () if _filter(r))
640 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
641 | return (r for r in result or () if _filter(r))
642 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
643 | cb_res = callback(response, **cb_kwargs) or ()
644 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
645 | _ = _.extract()[0]
646 | IndexError: list index out of range
647 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
648 | Traceback (most recent call last):
649 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
650 | yield next(it)
651 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
652 | for x in result:
653 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
654 | return (_set_referer(r) for r in result or ())
655 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
656 | return (r for r in result or () if _filter(r))
657 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
658 | return (r for r in result or () if _filter(r))
659 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
660 | cb_res = callback(response, **cb_kwargs) or ()
661 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
662 | _ = _.extract()[0]
663 | IndexError: list index out of range
664 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
665 | Traceback (most recent call last):
666 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
667 | yield next(it)
668 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
669 | for x in result:
670 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
671 | return (_set_referer(r) for r in result or ())
672 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
673 | return (r for r in result or () if _filter(r))
674 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
675 | return (r for r in result or () if _filter(r))
676 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
677 | cb_res = callback(response, **cb_kwargs) or ()
678 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
679 | _ = _.extract()[0]
680 | IndexError: list index out of range
681 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
682 | Traceback (most recent call last):
683 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
684 | yield next(it)
685 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
686 | for x in result:
687 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
688 | return (_set_referer(r) for r in result or ())
689 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
690 | return (r for r in result or () if _filter(r))
691 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
692 | return (r for r in result or () if _filter(r))
693 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
694 | cb_res = callback(response, **cb_kwargs) or ()
695 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
696 | _ = _.extract()[0]
697 | IndexError: list index out of range
698 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
699 | Traceback (most recent call last):
700 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
701 | yield next(it)
702 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
703 | for x in result:
704 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
705 | return (_set_referer(r) for r in result or ())
706 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
707 | return (r for r in result or () if _filter(r))
708 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
709 | return (r for r in result or () if _filter(r))
710 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
711 | cb_res = callback(response, **cb_kwargs) or ()
712 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
713 | _ = _.extract()[0]
714 | IndexError: list index out of range
715 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
716 | Traceback (most recent call last):
717 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
718 | yield next(it)
719 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
720 | for x in result:
721 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
722 | return (_set_referer(r) for r in result or ())
723 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
724 | return (r for r in result or () if _filter(r))
725 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
726 | return (r for r in result or () if _filter(r))
727 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
728 | cb_res = callback(response, **cb_kwargs) or ()
729 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
730 | _ = _.extract()[0]
731 | IndexError: list index out of range
732 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
733 | Traceback (most recent call last):
734 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
735 | yield next(it)
736 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
737 | for x in result:
738 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
739 | return (_set_referer(r) for r in result or ())
740 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
741 | return (r for r in result or () if _filter(r))
742 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
743 | return (r for r in result or () if _filter(r))
744 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
745 | cb_res = callback(response, **cb_kwargs) or ()
746 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
747 | _ = _.extract()[0]
748 | IndexError: list index out of range
749 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
750 | Traceback (most recent call last):
751 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
752 | yield next(it)
753 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
754 | for x in result:
755 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
756 | return (_set_referer(r) for r in result or ())
757 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
758 | return (r for r in result or () if _filter(r))
759 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
760 | return (r for r in result or () if _filter(r))
761 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
762 | cb_res = callback(response, **cb_kwargs) or ()
763 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
764 | _ = _.extract()[0]
765 | IndexError: list index out of range
766 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
767 | Traceback (most recent call last):
768 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
769 | yield next(it)
770 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
771 | for x in result:
772 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
773 | return (_set_referer(r) for r in result or ())
774 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
775 | return (r for r in result or () if _filter(r))
776 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
777 | return (r for r in result or () if _filter(r))
778 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
779 | cb_res = callback(response, **cb_kwargs) or ()
780 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
781 | _ = _.extract()[0]
782 | IndexError: list index out of range
783 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
784 | Traceback (most recent call last):
785 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
786 | yield next(it)
787 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
788 | for x in result:
789 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
790 | return (_set_referer(r) for r in result or ())
791 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
792 | return (r for r in result or () if _filter(r))
793 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
794 | return (r for r in result or () if _filter(r))
795 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
796 | cb_res = callback(response, **cb_kwargs) or ()
797 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
798 | _ = _.extract()[0]
799 | IndexError: list index out of range
800 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
801 | Traceback (most recent call last):
802 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
803 | yield next(it)
804 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
805 | for x in result:
806 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
807 | return (_set_referer(r) for r in result or ())
808 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
809 | return (r for r in result or () if _filter(r))
810 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
811 | return (r for r in result or () if _filter(r))
812 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
813 | cb_res = callback(response, **cb_kwargs) or ()
814 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
815 | _ = _.extract()[0]
816 | IndexError: list index out of range
817 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml)
818 | Traceback (most recent call last):
819 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
820 | yield next(it)
821 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output
822 | for x in result:
823 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in
824 | return (_set_referer(r) for r in result or ())
825 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in
826 | return (r for r in result or () if _filter(r))
827 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in
828 | return (r for r in result or () if _filter(r))
829 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response
830 | cb_res = callback(response, **cb_kwargs) or ()
831 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary
832 | _ = _.extract()[0]
833 | IndexError: list index out of range
834 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
5 |
6 | [settings]
7 | default = wallstreetcnScrapy.settings
8 |
9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = wallstreetcnScrapy
12 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/.gitignore:
--------------------------------------------------------------------------------
1 | /__init__.pyc
2 | /items.pyc
3 | /middlewares.pyc
4 | /pipelines.pyc
5 | /settings.pyc
6 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/__init__.py
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/commands.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/commands.pyc
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-1/spider.state:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-1/spider.state
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-2/requests.queue/p0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-2/requests.queue/p0
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-2/requests.seen:
--------------------------------------------------------------------------------
1 | 8aebb558bc4b0334eab8ad89792ccbe0a97725ab
2 | added85ef9dd4e673e68715d8904dda064b8f7b7
3 | 731aba78419f9810eae6f5f7546c6094699eb1ae
4 | 8d1c74de525c1c8d2ec96aee420cc6fc84b27d86
5 | cb176cc78e26600726d0d25a3f9fa6a511865e18
6 | 2e0c21d9a3764dacb9ba2a134fca2cb213af7b2a
7 | 90650bd5ae9386880c361f02145672aca82c0b93
8 | 091d395c743fddd07f0026d7bd35b4b1738bf4ee
9 | b400280df54ac4d8239264b584ffa04df7c29045
10 | 88062c9896fb793cc0ec4c7dc93085b41997ed64
11 | 5cbfd4b4d4b14b63b86e405e885b2aa43f0ec13a
12 | ac37cc4cdf9a9133a0e39353b8e9eb560238929c
13 | 6e682298aa5da7ef58e74d2f5c4beb8c95acbac0
14 | 8cddfc89ebec5c4beda10e4d714fb0aae4aaab26
15 | cb061d4fa4207105a45e11d1ca7b75da3ed7deee
16 | 1c9b2705d0a03ef8dbea9b4e869d00e23abdb9b5
17 | 40ae640fdd89ee351fc0ceea9ee7228635cbde52
18 | dbc3a382075adf2827d7cdb12844ac72125a5034
19 | af122d81afcd49e342dee53ea992136310034563
20 | be5416a2f521087a147a6dd6d731fa387d9a19ae
21 | 5cf5ee79c69dbec99122e169a6be84a5a7a1ed3a
22 | 94dcc41140da785b46b57cf531d22aab7eb921f3
23 | ca892a7f17fe2f0f207b09c1d11bf589218f6167
24 | 83dcb5c1b88b67b2bf7956502c2dddc50e4f41e3
25 | 851d288c904cf600e4db17f63cea4f38120b5d2e
26 | 57017c756c4e504b650f201d1a407b136118f6f1
27 | 1c909005ae12d591d9c6722e9ffa17296928a972
28 | 417c7ab1ad59575c52c3cb9721a0a0717e61eb68
29 | e06ec62b002bce90b61aba3545719f23c336c187
30 | e2e732d544fd5de3ec49752cebd8045ea765c996
31 | 273c4159a0536bcd14d1848cdce58baf96027f54
32 | 3b47ff9e7de313c6ddd33163f9f474ac143fd92b
33 | eada7e64ce22556eb9757aa6e9ec06223450c299
34 | a9516bee191db986256f1cc49b6291ca63c19792
35 | 2cd246937a1ed88daae413271a5465d939c4cbe0
36 | 80a84189cd872560645ae5af11851a04a14a1f3a
37 | 0c2b8edfd6df91cafc3eccc8599260d93b268d2a
38 | c3c995c0f501864b470d0da559613279c7462652
39 | bb656708350857a1862df1e795dde7d983d88a1f
40 | a34475ef90ee6f47cad81e9a6c2412a411986aa7
41 | b7e77e55573947c2aca27401995e48dfe1622d5b
42 | edb4fd917b2936f6f389bf3ab2601ed69659000a
43 | 51ec37f9681f1c4ed4be0ea685d85f07fad47e68
44 | d50522c133c4ff181dca39d1f48d710eb94107ed
45 | 7121b83f43d010208d29048ad9c0a0538858bb46
46 | 9de69255a864d2451f9e66ef698318c0d26a8f7e
47 | 1ccc5008b4a042e653f5a05f85b107f471d96387
48 | 9459bcc7f6dadb607c847cc4729903a05c74a0f5
49 | b52775ce8d7d324ada9ee043c614bba296a92de5
50 | 8d2b4dd9fdd0171085591fb72b287eb47b5f85b6
51 | 80c93e56e3a85f0d9eb6d5e3bcd26c9cdd5c07d6
52 | 6996696418a6040b5a1626d69fd6c1868b1c5385
53 | 16f43e6b0f2183f15e911018220980d761e85ba6
54 | 2c0ecabe88dd71c1960b084235a4650d89352387
55 | 5d57efd2aab1cebc63792a46ff393e5e800adb5f
56 | 6271b716c1928eceaa2f7b72bbbbcc83a118d3fb
57 | 981f2f94e204abf20308cea91498cca14697d639
58 | f75ab2c4855128ef22ac76e42e9f91e8f58efa63
59 | ace38f61d056ec33ddcad73a2b4db3b318ab84ef
60 | a7cc447c1137e49fe54240d52245015fd936e7b4
61 | 770f3c48995da6bd6361b17e982d6d6c8c534921
62 | 0b60fb4d2f17d27e8faf7cdc64837b21a4edf8ce
63 | 63f3a4cc1bf3fe5860cacb6e8ca842edafd66ae9
64 | ad77c12868c31affda88dcb35ec5a6580c39713a
65 | d0d9fce1e22d2479ecb370b37b3db74a7fbe7b1e
66 | b410467bb3852ff8b9df418425befe52beb13db7
67 | 364d1e2996ddd522a1bfe652e8f7f2e06db84e3b
68 | f3c176cbc6267428714c8d59b8b07b813d4e56e9
69 | 3bf30cff105f5ad48306f670892063ebd72f0371
70 | 5a4c9ca63a615d6067411038f8331ddf313c5152
71 | f33e6f988ba620b8559bed8187e444c50b99912a
72 | 2a0c94ab37275668667ed8c7d52577924a25a2d7
73 | 405e4b1e023c15ea7583dd7569885d2ca0b0bc1c
74 | 216987c13420b53556677d7dd2388bda7d348ae9
75 | 6e17d5ab874e80532c937d5ea4677d11fcc12701
76 | d548162422a6a37bc7b8db2a897b793f11862eb2
77 | 67a65d07e6969c5a2d261649d6a91356e5dee721
78 | c3e3ac0fd8579d0848e359a7b44a18529bcc8ad9
79 | 993a881bd51323d9c7c92b77ecf4fa8b4791b4be
80 | 6f2c7d1ed1cedc9978229acfd44ccb0093669dbb
81 | d62474dc5da163f03f0fca123181a5ce860d20a9
82 | 3f11334ad2c1fdb219e76aa409877a121211fc1f
83 | 47d94662880466a572df3b3fc12146b6ee8c7d2d
84 | 88f157c7b449f3a076b6f002b0d3b435b1efc298
85 | 289fdb8537bbb767a5bd33e57693f81e43ae981f
86 | 5601018aa4a291181c772997510771e85f16fb92
87 | eb903ce6543099874989a50f1dbce25c07ab3dda
88 | 9b430039f2afa7050f81fa2780f3b9a01c92cc82
89 | 7b5e343f4a11843454a3e7a15984027fbc32be78
90 | 7064527f1ac61e00e562d4c4126ec3e4f177c6a3
91 | befdab2b9f5f5751f2aa32bdffc174501b9012b3
92 | 741d57bb513a669e18d67bf7881f9e6c0bf12e31
93 | 9100cb7044a55d0c82d44f03c2acaf8c42d0afcd
94 | f3fe07fe6867880e3f43be568e7b059bb1a3bf75
95 | ffbf6761185ef4533f0a90f2176b5730329eaec6
96 | d4d332466a8ecc57dc1d9e673b260fe6d8e8611e
97 | f721f684621a089570f8648215a4921f648b65dc
98 | be96bd9ae8ef76e10adb2f8c5e82d001c5448535
99 | 761ae2519b6567cfd88989e573eff23780a2b5bc
100 | 5ce9315486033abe4e700e3c53b1613c4e875f79
101 | 6a0e9f87dbb0b5f6539883e183017e3bb848a510
102 | d6e5d51afa980caa7e7c5ec74c503216d874ee27
103 | 7e933b7e65902636486612b61728745dfdd4f5be
104 | 77d2fe9290a9de4a592a793125f3465a1d2c8d9d
105 | 7ccb3fbe40f908dd457cce248599011649d0744a
106 | b169d85b4e88199e602e919acccfaac3d8546c94
107 | edce20e76413054c42df17b96e4ec35160203c40
108 | 75c8d36cc13308eb9be90a97407369551e365469
109 | b7a3fbcd61bc47127986a43c50f637d903a524f0
110 | e6b4afdf8566b29f4f599b8cc3e6e5f7fa50c0fc
111 | af0c553ca7243192ef890f4590538871c5f87770
112 | 00bf089068a64d7865f07e4d628f89eaca7ca367
113 | cc39a7deba9f373f11a44605e47fb807f7169e51
114 | 2db10f629c3fabc9df104c24d62f3b7836b83cc0
115 | b431c3e1c243461c962d40d81315f382d7bc2034
116 | c3ec3b1ed02a5dc90072725acd77366ad5684d4e
117 | ef66b887d778206f3c3a0c5988dc186e2c9ef7d4
118 | e23a8df6d469fadb279e07c9710b35c09b805746
119 | 0eca410e2befddfb0b849a66843a1b282ec919b0
120 | e7982fd8a49a98f4a738649e9e2ed0b06706e08d
121 | 88b04cec0c56f8d7eee9bd5185e0011e5e253b5f
122 | fee0a1bbfdf33c5bb3fe54eecaa2fd7ae36740c2
123 | afa6048155b2f7a08853b7b74d081f5b32393380
124 | 25f24b081dc3f8c9ec6bad5ffec7f5657b7ee587
125 | db72dd5769c613c7753b9cb7a37452c1d8710e09
126 | 40ac28b030036e23bd2ea279db64c224c3bcc08e
127 | 037224950fb57390954bd09f83fda5616fb9fcbe
128 | 0f6b30ca0d326649463f9ce06187ecb67057ad01
129 | d4f85df8c6c2a741e605688f62825400e49c987b
130 | 691838ffbd7e606faad24ebcdb73c7d1c27f488b
131 | bb20f211f364412b8c7365656f4cbae8bff9347e
132 | f951a6a7362c6cb627e1ba702d076ae5f1088669
133 | 67f68446d452d38f1c5c07275c8832e2f694792d
134 | 2fbcb821ed3efe2e07458f91167973912d5ec16c
135 | f9a21da8d4035cc0c17a6818020d32c2380769da
136 | 5a2b664dfdb2492d20eee870857a8bbe07dd349f
137 | 901c1144f788189dd45e93be4145c73f9058f3a6
138 | 64828da864c79f4bbc9c4c30669076a44053cd37
139 | 17cf2fe8ee9587fa18e4f47abed13563d6e5b561
140 | 9e3af1dd861b9fe5cf047bf7a826a271184667ce
141 | 513c8446fcf4e15cca4c8a1f667423cb0931c780
142 | 0f23ce1dcbee08bb26d9e76c858e3169c12b8074
143 | 4f914f10d7d3060bc707533c41b3684cb135cc2d
144 | 0c8e4feb0e41255a200da404f9851f3c4d593a20
145 | 8948002471ce79e6d0278aba8d48e4ead5b483f6
146 | 6c2b0f8226a1ce712ba1ed1a02fbbac724d45c9b
147 | 7eff25ca7b71b75717bababd0bd85837cd62908a
148 | 1dcfcbdb5763aa0b27953df2cfc774c03e166765
149 | 09778ddf8927aadd2eb751cb21ef302d11d2817b
150 | df934e8febda106658d4ab6a381e02a14c3b090c
151 | 877c1f4acf13452dd84125d72e77518724da0c9b
152 | 6c1f29817725711506d980c9602b3bc73180006e
153 | ecf8b8f9a08aab683cbdafd3bbbe6ccab67d1c2e
154 | 2bce08d27c4fc4af759a2bf91b75f12d3b627588
155 | e2885d106445e29d2d7573d04453f1bd86d9fbe1
156 | d6e70a475505c4445f2ab2b0df0d90619c75dd5d
157 | ff48264df39079fb9814923dc656f9f5cfe47a35
158 | 5c9b9f50c80e79b6c3c08bea7e6bc3556d98d30c
159 | bc4051a5dd62d52039cae458548bdaaec70f0300
160 | fc0207271c9b1d559a3416fd8f35bbfb227e187f
161 | dae577c51a5ddcfc5e1836667247f9d0133195ba
162 | 4d0f07118c697ac7c7c2ff2a4f93471b3e843f31
163 | 2970c9e29a78f796ef0a875e31149f5d45ee90f7
164 | e82541f847c1b5283f55937c5f420a056a3a7377
165 | 2521c367eb5739446c6e1a69a96738771eb777eb
166 | bf1cf6079d7b2d879288aeecad40582e8e36fa4c
167 | 727c0a64d15039ddc820e7e8aafb4a5b5b99f5ce
168 | 3d2899372ce6092fa3b484bc71eeebb8b8dd9e2a
169 | bddbb614b866ff1d375b0bb2d167ec07ec5066db
170 | 9478150f9f515fe30bf4c7397e60c97a0d6d1743
171 | 7c8e71ffd5284991478c70ea62937442d5b6d8ea
172 | 80f3f1d08903ffce68cf7ecac9d6ccec9de3bfe4
173 | 3e900608bef9e299a2946c28e8006aa1a52527de
174 | e52a2d1b0fb39d125da970372a7883517d306c9a
175 | 77e1dfa9660a91be1b151479c6d0004307f9e518
176 | 106b55b52ee8b18b1f33d278a475876da61f6851
177 | dd7df5315ebcd2b3ca343ef2359ef7398a854d71
178 | 7bcdd92593b2500194fa164f5066d5ed6de07bf9
179 | 68ee60013085a7730cad0dc1755d8646cab6985d
180 | 7be8e6b3b477988bead4de545b41ae07f96ff482
181 | f03947c0bdbe68019f441fcb63a12aac8f3c8e24
182 | 5adfc7622a2d97248fdbc3f42d85da39d2b88052
183 | 5650660db430e8580346cfd43519f79c368ab597
184 | b6682c520898c7217d34374bd34634a89b79d6e7
185 | c5b4bc387bbc8ed0f247a41a7f8851347d8cd013
186 | 046b8febb81e076faf3c63a1778852d24d9a667a
187 | 256eb65918fcfb738f010293868fce22694d7968
188 | be699bdfefb9197b4fe083a69138ae9e67566610
189 | 97ad074b9114625620a671f65a10a1f2cb26cb56
190 | 458822cfa8a425cf6e27e20f6cd1a73d2a890aa3
191 | e76dbd2cf97834c338d1b7e7e4ec878b697233ad
192 | 8f4d0a37acb0f8838ee8aa5cdbd129607aff8a47
193 | e28665f3a45c63da8b63a226364ede5db2233cc3
194 | c962852c5f3f8df8e049fdbd30a2c53c13ce1e28
195 | 19b7191ca9b5b42977a62e127a1bc8d490b83552
196 | 869344fbeb64ac41aefd8be0628de987e1ee141d
197 | e56fbe1917d06161eba1e7e004565fe3de606f9e
198 | 8b5a30051f4157eb43145ded0dab5f33cab9e5ba
199 | 0d2c776b9bda4ae8d8e4af9590eb87739956c6a8
200 | 34da778896aca39b923d0405cd9372c88aec75d0
201 | cf23f2c9f74cb416f0af984de668e3669e9696ca
202 | f8b5d6154d7530f205486573918a3dae1c0c9be7
203 | a79aadd407f2cd9bdc89a4afb3fac7aa40e65716
204 | df960e082c94609670913431c3d4c2c9cb437009
205 | 7e97e0dc3760e51246f315b8e143ebe33a15ef35
206 | decc8ceba1a3737fd60467ec4ce9dab4ab2fb104
207 | ff3c80f5df0f1b8ddac728875ee5c45f7c62f076
208 | a69c61e827e3beb3febd7e2d742dde87f4e76184
209 | 700fbced740c6fcfa7e1ffeeb747ad1e2ad75b27
210 | aab13ce295b0be93d808139a12c1664be70c41da
211 | e71735db0732b84840e0f8bdf89d790d37cbd2db
212 | b38e934550e069fcc5a36c8f7c87d58b0538f6e4
213 | ec51a6a9bd0574f434dfb69f185be8cd1bcd3319
214 | 68077ed84cb0115fd4b0754239c3080a9867e628
215 | d63b378b4bc2fcaf4c2eb4966587ea8142196dd4
216 | 21853e35df8d600c4fecc0b4973dff11397dc7c2
217 | 27182af3ca5747460aa95f231ab33a67093124de
218 | 1dfc4e9f7e0712115d3867085e279824c4d276a0
219 | a079f5848f3a36b5ba27bbe38eb5cf338efb66bc
220 | 16605ebe9be47846f9cad54f91ca6dfe4908e853
221 | 3283651219d1afc91ba0769fc1db0ffd06742f8a
222 | 14814cc244e0b19256c10a804e12b58de60d108c
223 | 2b16b50d551d2f5c108a07fc71c7d253a4858b1b
224 | 1616b7246494a777ee32ab097db9f58938268c27
225 | 6ea65ceec712473a4f356e5312553f21e0e62219
226 | b507ed35dc4779d4f1f474103e1b15b01006b06c
227 | 27ae04d11f5179c0e1a3908590b63dbe05fcc0ae
228 | eae8c9d441d8af80b9b303875dc84c8c33ab7ea9
229 | c37d06aa7bafa30a3a2371c780adb828e7342a26
230 | 05a7b39bdf7fe58223d95335b3d94e3520e37268
231 | 44a7da99ab89512fe23dc96d4bf6233549f04900
232 | 5517435add61a9bd8098af5d192b5bb0dd8c43e6
233 | 7d484c0754a8389436cf63b18b0eb000a582b95c
234 | 19459cee34c7349242cd4308eb71678c56ec2d9e
235 | b4f7bc2e6249844a4b9220be02535c42f733b6c2
236 | 4543efb5f2b33c78811d2de51220843a0b150c6d
237 | 0d832adda6ef5b5e3aa49049a67485b0511a11f4
238 | 1759868d04546a83cee501a3613184b27940666b
239 | 0201a6176da93ce2469b356f488147fcf09a533f
240 | 800c2a0152345bb9a30b6a7616c0e22883b0dd8c
241 | a7f3b2d593b9f6d635d6b464f76e1b416d17d834
242 | db36320ad6cb76837016e1a2c1efaa31c47f8137
243 | 7ef4b60a20e4244d616f5a8b53544bbe9cbfef9b
244 | 98d4262fac9873fc4c1586488ad1a8c7b93c8df1
245 | 7381505235e920e549c09afe8239cecb56f343ec
246 | c98f8da85e23d7d5ecbbbe60f87f089c3baf20ec
247 | e247aa7ce186c93e8064901d55feea88c733c65b
248 | 76db8900ed3c1f7daaff79b4d889f942f621a7ca
249 | f2eaec825ddd9d622d9de32cf09adde52f3aaa36
250 | 67be55c711c4efa7584e46f682787cb28ae68a6c
251 | 04dc027c5f12ea87a07f2eae9e41b35a04ba2d04
252 | 03e075910193e4501da6f4bbdf2906ad3a515223
253 | a4577682a4b8157350c2520bc9501d9c7fa6ce2a
254 | 1e45f6e848ea16afdadde88116aff40207647ebc
255 | 4ba17602f6c135456298e6a1ee0690f6b67a6e96
256 | eefc648101f429dcdd910ee4c5d70933116fbefc
257 | 6b28aaa593239a9605c21d4d8ff9f0ff17a1c329
258 | 67640f9f6b782e094c12d5e004bda4e741b226c7
259 | c824f5c55cb83946e215d875ddf00ac2c9aae61f
260 | 5d3f732badff84379cd11453ffd5f40df8ef9ffb
261 | b41a3f3647661bc47995c640e7cfbf0cd892689d
262 | 9fa7edbac6ad9bc88e3b5af676209a7a5b7a2054
263 | 56b4ec2a0446d10b12da39f13c6d9911747b4347
264 | 5acc93253f78aea7b894419f246d735459062e9e
265 | bb3883bc92d8fc6693c18c422c622d6c32e0494c
266 | 4cda6a7e35444bb23fa03ae2765d53f69ee66c17
267 | 7bc95627fed6f1e8b3c2543d2a91199d62caea91
268 | 14ffbc0813eeb43bcbad29484a3d2bb78ce83531
269 | 8056b3d1c17a3c041ff2c14b4cefd4490338d536
270 | 2a7f1868c618fb8c5f45b10c5d8ab12e4ddd561b
271 | 27fe4d4bd32dac154ac16a4977d96bb6b5419aa6
272 | 6a4d177567d7d05b16d34e92fb12436daf9b2033
273 | 54ca65af580934fcaedec57f93f2e58070cd0c81
274 | df6c6475a7b365a92c4da20fe94d7c08a02dfb5f
275 | b1234f5d87511d609ae291b5bba097463aaad8a0
276 | fd45c5cb2b5f4accd3380d3b3cc32d19ab0b621a
277 | 8c121af61b55c13b45bd52b15c27cb3701de0422
278 | 2513bbcd056ee311fac724fc9fd904d5061af839
279 | 8b0d30187e040931334f064072812572b8308feb
280 | 9f4e3078c621fc9e7f0d2fb926d2505cbd771551
281 | 74f2f5738369104c3facc74b96385fc0d2cd3841
282 | 721def5538b4d80947c76e6c9ecb56c92b0c2522
283 | c3c16be5ecf22e643e48d7fc7fa62696134e469b
284 | 648787f881c50cb1e027126885356385b7b4616c
285 | 5257187d6421c4b870e437f84ee852bb16dc2dca
286 | 6cec40bcaec952ad5f443c1fafc9e202e6d9b336
287 | 2e5691a7e83d016944265bff083c65e4e729779c
288 | 6735a4a3d25633cbc9de9b27275a42c79bafc057
289 | 72f0e701b75b51ad17a55c7545fafd99543954d0
290 | aa45b150e1c2be81725b061ead3ab014af720a9b
291 | e7d37261dab67bd3f9c64e14a18e73409a7c7ecc
292 | 7b26e1e185f194d4d13b2aaaf1fc87472d319708
293 | 63d5769a8b22fe908e1693abb7c01eb8e5824072
294 | 8e3777f48abf9642dc04824ad9b9b8e8f096d321
295 | a071bca269b3e2be9e700ca1f2b2a3609d1f3d50
296 | 47bc4571689de764a2f95f970c14e0e066a44164
297 | 7bfa2d000a65694c5cb7b2cacdfed75348bf7d1d
298 | d2ebec3d3ba1207b01764028a21f269a4b6bec8f
299 | 7dafd8934628ac1d1ae2ac3a9a2c12b527bbaa4a
300 | a9a2631569682431ed1e8341e489462a0832f1b6
301 | 2ea78022fed6297afd47a8375ce4cd06e29c9151
302 | 120e8e48a3ab4bd63eca5f5fafd1900ade869efa
303 | e368064fe4dc14fcee7b8f664b8f6a51f9fb0899
304 | b1e6009af726806bbe93918664d60bcd5fecddad
305 | d7e0f968f1749149a3dabce696672bc06acee32e
306 | 95f24b871550b823542cae4dcd8c6d490ec90721
307 | 2490c8557dfeccdb9fa7d79214a3d77e2fb5f4bb
308 | 1f70691bf049e62beab03072e36368f92f0b2a36
309 | 0b57050b3c864a25d1fc0f17aab76458a8ca3314
310 | 834d24d3be63b3c0bda560a62e3c00478f4ec3a5
311 | 1ea8626174d82d992b006929008e23edd248f3c4
312 | d69cdcb41d6d6d9afe443fea566d3f9f493931b1
313 | 332eb3a291913db3eeed42d2cfb8050cbbcc5f76
314 | 63a40e1f31cfede575514277100fe35d3f052b40
315 | 812a13047f1e54f4f08a1273d9ab6486e7b7c699
316 | 17d7e3cbabbfa59ba99922a15eed9d2062b43aa5
317 | d0ae9ba925cc6bfb8365986f7470def5a135a7a6
318 | 47ebcdb42eb03eb9680932dbd675c7d763d36fa5
319 | afa96bec4a7542ac94ebb9efb071c3d5b997b612
320 | c44c98f05b152fe9862df5a33eba4f6f7b248a27
321 | f7ffeed8cd7e3416b733b773fe56dd88d1bf8794
322 | 808e8a2de4572ad05844fabdb9684308ae5cad00
323 | a15ced6dcc4b1bf90f805a3bbeff88f8f6516a5f
324 | 2542c1ad5cc5bea42d4c4abb432967e1e08e5398
325 | 87ca3d6bdf40aa7cb294b0c8a2e20dd552a5305f
326 | d51008f050eb278400bb79ecded2ef51aa64b0cb
327 | 305643172f39641606676716a11d2b1fadfff04e
328 | bac501443e55f27ce90c7d5f66c235c79ffe00de
329 | 7f19d97b24a9b9c57de38634da00fab3abf5a733
330 | d9aa7a13dbe24aa765637fca1a6b1ce3a6a6882f
331 | 52012758bd20c5e436e69720d2004e5cac6cbe71
332 | 27e7419fbb591aceecab76d4a6a5f666ee9a759f
333 | b745b72abdc94d31c3005f9e88728fcc589b4d18
334 | 7745852852e95402c9a8ef4ace301b6f02040fae
335 | 0b62d2474ebdbfd3f48be50eea510f9bc0fa1d09
336 | 57c5c16a44965b28f134ad04cac233383fcc0f0b
337 | 86b1456a5ba256604719d8bd2ae959e19441cea8
338 | e7921ccbb1915d48ccfe60d8adcf50c00f082031
339 | d9978a9b225827c359888761d87b06857d99ca11
340 | 5c9f8d2bd2a617ccb2fe1b72bec33ff8123960ff
341 | 25f850a43fb6bab709ce809a20b8b5be1e01560b
342 | af8924387d68fb736a0dd84fa04a4882239c4bd3
343 | db684dcf4fb06aea2412fb1f2814e7bbe9deeb31
344 | aaf1e44c38191739f6e54bc014b443c78fc4bba7
345 | c9a6cf11bde7811ea3abbadb329ea1856eaa6e39
346 | dfe735fd8850880caff7099fde94fe1611745fa4
347 | 4108a36b6760732684530f415ac230a1a94f3c66
348 | cddc06c36f6f55b82946c1e7fb67467b2f603345
349 | 8a437a74ecd59cb4cfc35fe7e0305b49a2f1cb44
350 | 492c5eaebf9a5751a668809733434027e2044473
351 | 712fabe65be6b80433ac3fc75668b1ef8ed451de
352 | c88fe19bbb98ed773f993adcfff3c4531e0503f4
353 | cb53df10ec5128abd5289d81a4bd84b0fa0f2cdf
354 | 6d7171301edb4abbc775796a01dbd90b424f9672
355 | 26713436ec199423c19cc7ef34d8e27b2e27c515
356 | 4d0ffe031d34025f2cb99726a2ba873c6326d33f
357 | 7ae64f6226cd59af7cb6758769f4dfdb103cdac3
358 | 9acd24e2965a61945dfc5b16da270b7b6023af4f
359 | dadc47e61c329814d4f6c4dd837f0d0a7a09fc71
360 | df4ec590397e421933073d040b119a80a8b3e8cb
361 | c074634efe730f956d8123ab7f235a9bb9ae1fff
362 | 3cb919c36e832534a7a36712c9de0b81fc856d6e
363 | fbec0d300e88035e5a763c4a717cc286195c033d
364 | 677334e2c69a26ecc37b3bf1d0cc124171ffc7b4
365 | 7e1e2f77834bc652d670cc5f11de7a4ec42835db
366 | 8eb1dccf4e31f0e4a8ae57a0744934f0c6102e3c
367 | aca7f700ace587fcb4b085776215ab2046026893
368 | f9088fd591c68754b8056256330aa75f47e2bb6b
369 | ca35c99a2a2a94d73d2714de928289045a5dbbd9
370 | ad56aeb660423aa08efa1d029fb8bce174dd1141
371 | 28b57bad27fc049341a91d02f063070f70bc4fc7
372 | 724f802662ed5e06a6d252d65d4f1c7c4cda3009
373 | bf6355368e6147c9ef7b8d59d2ab8a6aa945f334
374 | 55e923386a679b9b89a0620c4862ed301c2fbafe
375 | 34db7cb5b56b9baa7e86598eab91166cacf5f086
376 | 4395d648616fbb70e076bc8e75f263ce2951aafe
377 | 458e53ed4c26ae4ab1256c3a16fe1580487d1151
378 | 746ce634bb6d74c9106ea47662118aa09af04bef
379 | 961b2d2578a89c39ac11f94668e8841bb85bcf56
380 | 46684ad433426aff59f871476956a3abcab16cfe
381 | 93f7430b418a1a6d6b0420981eac737a6421cd87
382 | ab9b31b15e209ee70e0c0a8197985e08a39e4bec
383 | 77559f8e1a177f4df55bc92f9142c5e957109f6d
384 | 79dbb926185ce645f1ea3cfd05ec02367ec123ca
385 | 89d6b3c1ae65c8c433731d98154b66f44f1c02d7
386 | 62a7d26236517ef169e74d76587454efd1c4c2b6
387 | a8d49816b362e3f94c920be54037cffe252acf2e
388 | d9be6b594b6361673454feba6c089c2ad8f6b6c1
389 | 62eef7346ee7b22c955442b8f3a0e25ae248d119
390 | ffd327a01feddef224036510d41647994f0f2654
391 | 95e62c791ce9ffc5986561516e9912309369db2e
392 | 3e24d001950e402bd03609534c4d6bd99ae00d49
393 | 02b3f55a033e6f56343e642b9a4d392998c6150a
394 | 47024106663462b2ef1991014dd8dcdfae2fe856
395 | 03a92584280e0719919e7d1e70c446194882ace3
396 | 6691e8828ef08bddf41de642f73d97ee47c1ae81
397 | ff63ada7fe7dd879802156b30400fb337700140a
398 | 4d3267cb1e55079c9ea7d8809b81e1186369c2af
399 | 0358345f606fb40077b5a34477dce4599e136578
400 | 62e6c5843d01138bd931309212e8befea04c034e
401 | 1083ebebbda09607568fc1f7e1bab303565f6f82
402 | 1756eef2d8a23b1046f8114742d07245cef7b3c3
403 | 07ab9f38293a55b0646dc0db82c54149afc0c4b4
404 | df28aa8fd02f6292d0a706933ccad2dcb92d872d
405 | 134c475c754c3dda740b38bf1549dee96213bbb4
406 | c6e01d8c69505bb4641cddd7cb3c6c9088fd773d
407 | 3c0fdc9aa0177a95e3f0eeda0ad278c6f9428e83
408 | 08f4d911e4af16119da5a260d1ffbf9ad2187fcb
409 | 9ed00ded58be7934a9046d9d9532d06b034f063d
410 | afb0b8b7a169ccf5bd0496a0f80bf06bea47c92c
411 | 62c0e9822322db728c86fce1db64cc7d5b6ac975
412 | 25fa4d7ebb729eb03415d72c01e03f4eb4c25f7d
413 | 0bae35f8e91ed8e08dab747c17ef437fd6015905
414 | fe34900b3149e7d539f7da19a2f26b7550b56528
415 | 182ad9bed4f71023da577f6b5968361eb9beb202
416 | 3bb3039f78b40a28dd838e5a0056e650f819705b
417 | 2a6be549fc69420f62e4f0dc43e3580b45b21838
418 | 5d1f64747a33bba32d1dcc638e8decf9fdb36307
419 | 071fdded9649f452a9c67d5996e525d8106f4747
420 | 009cb5d2500ab2f23ec73fa218861cfb54f293ee
421 | 1e2298037aba3802bff44380de4aaf1ad9a690ee
422 | 9b476fe7156e03d12469c73b614e4e24da8e4da1
423 | f7f490292c3110f1a60265de12c85533cb8be7d5
424 | c866ce4e266a392637f3f4c040f2303bb92f06ca
425 | 4ab41dbe3dcc3a037a1a1f76b335573a32485db7
426 | 0507913198ce5e8b979aeee6930aac74e55ab00a
427 | d8d8b9dabae93d1d52c109868b13895be480a398
428 | 7ca7ef22459a92fbad804d46ad7cc875b366bfa0
429 | 9976f4fe0793978af7672204606df447a5b3fc8c
430 | 7097ce1d52014caa2ae1c6fdd63389f78428d8ef
431 | 1f0365410cf81fd7f213792d826fa276d3696aa7
432 | 2b2d68c364609ef58256c01da7293af2b4f4f1bd
433 | e5626a23d9df8bfb74e91b8966fdf0a81b39cde6
434 | 06a1e4b3cb550f01b4eb04b6451021169e970637
435 | 94cbec065a52da99bf42867e234db33d4851bb77
436 | 03910223b4f55411893c941de07991ba9b9fd58b
437 | 43bb3dca130a18675114e712aa786038c4af3f50
438 | 68cc28ce35171eb68a9a9fefcb74d6209fa0c3f2
439 | cf524f9ee90eb7afd51da430111fa78a26b76e8f
440 | 1f607db8612c1da44063abea5e01e3a4f7398bb3
441 | 5f7cf3212fc815e9fe72fda3143076972f5fdf3c
442 | fd3820b95e92f23c412ed926134b57028ce7c545
443 | 3e760678ac4a58c79b14834b7f41bc151ea0a481
444 | adf988440fb1486238a2ca6cff65b1c7d739a154
445 | c45d5c3b15f7460731ba5198f7a95d3492b15c1e
446 | 8c7f90f4eadf7a9c30dc3c2ff211a94d6d0718d9
447 | 5fcb95a09c011ad16e0e4bb2ba2dc62d6d98d6ba
448 | 0ff1b042a5776f0582b78f283902014c58b7bcad
449 | 5388a98ade82c938de247560cce387dadae774e9
450 | f4eaaa00fc4d043bc733496198ac3019531f009b
451 | de2d7a195a64f9349b1639d9e82ef52c6feb9ca6
452 | 7adc9a869134494ef1edb89d23791304bde4ec78
453 | 68d9d9b00dee03def191a1554018021737c190c0
454 | e96265e7a17dc84662a9793c224667d3db8ee52a
455 | 81fad681155cc8c26adcfe44f40ba7f2fd938396
456 | 4879713e8055bb5bdbad2b6e91619e97eaa122cd
457 | 7e2fea277f1fcf367e5c84cdaa2abe438221f209
458 | e634811a599e016db5abf5ea8a479398c8a4b8e5
459 | 4c005aa660d081503bc76f97b4cabe65edbae9ea
460 | 606fa10cf71fe83bd6b89e106b83429db5c75c9c
461 | 1c07f7d59f0ab61dd951353adce8da496ea0f237
462 | d8832d771d9f070d118d9d1ebeb074bb08da104d
463 | 48f6aea73067f2e612db9ee5029fcf160149e0a3
464 | d378e4b76bc896ccabccb49fd0de75c7d3a6a02c
465 | 593254cb2aa775fcc39a83cdfef9e34383d87532
466 | 61b167472e814f7aaf6b1fa1f2137adb8545a495
467 | 542fe8fb06b0dfd93aace171dbeafa46d9ccba10
468 | 6df16cc8533f9279e56edc17c6433efb946ba952
469 | e50b2c9475ed0f3debb83d274eea68a8f8f658af
470 | 84b109279c8006c9182a214ac0e2580465ef7c74
471 | 59a08230b179f8ba72d1c0428c91880faef55505
472 | 1dff7b038fb84ba30d97334caab6ce31ce4766d0
473 | 707b0bffb521517f55880421eea7cfb46963d4d9
474 | 3ad47749d2e27c203eaf6b38e671254c391fd60e
475 | 4fd520413ef1a64ab0d0253e9c4d4bdd3dd3e8c0
476 | 06e73b6fe42e1d171e75eeeb60c0121638f7cd46
477 | ffdd37a00195493042f80de4c552abd40683237f
478 | 6e4f512d49224d7fc917454fc72ed90e1d48e124
479 | b1bce942f7cda98b99b76af9dd3e472e5b23df33
480 | 2c74571c5a56b2b04b654fa1f58b6caf0d64a4a0
481 | 65aba0d8fcfd12ab7fb198718436a6e2a27c01cd
482 | ad27133ba709519c70c08221596eb33f9edd1a26
483 | 9218ea9d4b50c1b6351e262913e8334e2c283f3d
484 | b0a81337c1813e5ce7f186fed59aba69afa27d99
485 | 6bb3e6f7ed935c18efe06c2a08f02f165c2eee0f
486 | 5f6447e1e78e51fe9b8db614c5bfdaa790065f4c
487 | b10af68624e47656a30170e9d3c3bbe2dc2b4168
488 | 926f29351134c31bce5cf12b603d76f1d7b893b3
489 | 949ce7daaa3f2e2c3bc25f17bba715152ff08aab
490 | 5c08388d378fd623b61c9ed01fdca59e99a88329
491 | a36ae93ec8662cfd0e55ee933ae7ecd8ea7d05a3
492 | 1d1612a0a10513017b26ebd26251bab320575737
493 | b81fcd144b4617c575f32edb4a539e9b3bfff604
494 | 35e8b7aec6599ba885924a01bb8c6cdb59b86cbc
495 | 5cd87f5c11980405ea91c2717484b384f088a03b
496 | d2b8131b2815d01c86ad2c040a24398bdcdc5295
497 | e4f659ff8e19b838105400409f87890e97234ad5
498 | 61fc182bcb33d30c4588587ca6cf2bf0534cb36d
499 | a66747d8ce105535056fc7894d91ca18218783d0
500 | eea2d281ca95719eb675efc63b08bc0db2a18d66
501 | 68ae7340f26226af9d2fb5a7bb320934f22843db
502 | 20c6f89a1896e80df6757fe4077bb5f4402b664e
503 | 0c066058803ce2ccd67488e1ef267a1ea45d92c2
504 | 54d751625a966d090c3f8e37d8996f321672aaf4
505 | c84cdcc3de777129fcb4ca47b7217cf4ed344a7b
506 | 009bcf532409685ebbcb1769224518ebdeac0d1b
507 | e73d4674ca977c5e1b1ae8a8948fd0e521fb1389
508 | a26957979a976a141d79fcdbc52f35bba8a72990
509 | a819845f8bd788bcb1feb9090aaa1f16e485a3ae
510 | 00ac952f247949089ad7f42c33c373be211b1c27
511 | ef615d80df3bbaad9348522bf8a64d6aefa31ee5
512 | a6a2b66163cd485994ed89a8e72bf1d992018474
513 | 1a59134869d691c0948ec5fe4ac3d1be1c79dee0
514 | 9b3fba327e0b70e0eeae30a88576ce81fcb57543
515 | 3607fde243c99a4c66d6462600437eb30efd099a
516 | 3fb5c16d6ee783c074bb00ca621b4033bbcff922
517 | bfe887c1cb52a31cd0f1edf0210c092e14f75ad0
518 | 1ba24dad41f6aa0a08bf1c7a6c1b85ba78e75f0f
519 | 95d043691317e70f972032237351333d1739486b
520 | 9d035df4c2887b02f5b6c6d16743d900e9a0edb8
521 | 754fdeb17b50bb42787c356d1cf9ab72eddb33b9
522 | 93b3adee018fa832fcb0d62872c30b5e5c2b00fe
523 | bd997f1baf86e37368140cd1efb6e860bcab9cbf
524 | 170b4095b38b03de03728cbe0ba07edc7a108dc2
525 | 20b04147d88fd97172444db764abc351ca69d039
526 | c21c43785010bb52bf545b49bf5fee344e422704
527 | a04f2f60d3763b2962a1d60f3e0ba59c747452c7
528 | b901b9424dd97a310ec7baad7323bf39b0e9b81c
529 | da7445eecdc2097292b86c70fd57b186cdca3cca
530 | 882c03688df02f4f6974bbc91a920748e468a132
531 | 8ad6dfa2c5d1934459ba4434ef2b2727078b71c5
532 | 8e6ad9362f30a20b8e2296e023bd4ac3f2f9f8ee
533 | 6d554cbd71b4d816fe20d65b870745cb6e3db5d1
534 | 5d7550151203d9bf08000055dc21e2d24283e1bc
535 | 4be2c0ca24d4acdf44810ed601ed4b7b6ae7e083
536 | 3609b564f691a11d12c6a91ce6bea88ee454ab20
537 | 0670b7fc2971c06eebcf4ad1425678cf21bea6e7
538 | 952589b9b44e6173838c39b6e5f00c87e8d0a10e
539 | 76f5084cd938a8e26f7cb0ba935c08ca3cca1775
540 | 471c7fbaaf743edd4e23a11083d6ac762b54ef39
541 | 5b31afd5c9e518d9ed30ea7a2519bba095207317
542 | f7b1fa4a66cffac03972bf90bdfaf5f6c88eda35
543 | e337290ff925d8ef969c6add4dcfa82b4547b2d5
544 | 730fe6ffc1e92874a556461a4d5fd9498b184c94
545 | d2c206737d55b6e735495e93b4dd301915a8c242
546 | 57579d255ffe7be33e31440456b08ec24e7ba398
547 | 02753c686e8eddb520f25fe4725ff3ec535d5f12
548 | dd3f77a240651a4e52e404607c109f78dd751f3a
549 | 91d1ca686e71d59d8914e1841cf2eeb3f881f2ee
550 | 0b480798263cd4cd624fee735ce60027777e3e94
551 | 12e5754ca858f88191fffb10c3c60c1a023d4823
552 | f1c95dad0ee46c137275af196f8aadcd7f88785f
553 | e33b85905847fa78e5985e53408a4c2efbcfacb8
554 | 7c455ab867bfc6012a14b140947b7a687caf9f30
555 | 6ae2932333a4e645f9bcc9e2e3abede14ea34ce4
556 | df05f764173568bff6c97bdf44dc87d3802bc272
557 | 817e3772691d831137a04f3b6b4893199b2c5481
558 | 5152d6d6a598e0b3ed3fb96051533da370f51e21
559 | 23a07efb8d9029599c5c9c412b6d97c31db7e12f
560 | bc2121a7c9e6f3f5cfd446974f2e3e600a8bb538
561 | 09faf1856235a450ed7b0f9af13ad4fa9054e61a
562 | fe2f47c0ed11cda5a23b28c35e860b3706e97f4e
563 | 22feb0d16fee6236b3f060f5195bd7568c0619ad
564 | 439a3bb2ed3f0f99a94ad2bf0f1e281a2eb60e47
565 | f6727a6f02ea5a3cb65add8357e0e918d565772e
566 | a5e5182347042e721cbd4e55d84b13209a2530c0
567 | aa04f725e655e92815663f866d8190eda07c0d36
568 | a7a524356a21b3b847715808a880125493d9ae61
569 | 7d0891b7b5bfd8dd0f2073278a44be578b7a3c60
570 | ad627566e39fe69585d2a5f06d586c66922089de
571 | 4de70d154c47559b224485a0fb8cb323e10c1115
572 | 8d81395915d37dc155a6a41310a83dd973344f14
573 | 7e83085bc3570589a4302e1a45f6b2bc0bf6da81
574 | 939770f45c3713d506e96f01b72dd3c1acc3baa5
575 | 35a66fa4dac47f6471a743b707a69e588a2d11de
576 | b2c8c0b205d744bb8d943fb3ce0bb45c084e0e6f
577 | 346e8983c3a37f0919292c98f257d4e8fe70c5fa
578 | a5b146b3f81e9fa608803ee06fd61f85e29722b7
579 | 63f468677f204966fff6c52c0ddfb7efacd0cf88
580 | 23140a2f7b37f2a4e31b77055532d5e621e519b1
581 | 7b102e19df6ba43c7893c374b385addf72e870aa
582 | 1533792bf0eebb452256dd88f38a0210f5864b7b
583 | 89494873634a41eec80f709b9f0da46d075cf0cf
584 | 9eec65f3ad69f02e501edd4d878548b1b73edf85
585 | 963e9c19c3b8ec5c16e57a7152f4bbea3461525a
586 | 3fcea5c2571337d7b735fe8d713632d8680931ee
587 | d4f9ca12ac1b86a42dc82acc2d7da02b36617c75
588 | f2971eb9d51422b13d3170cb966e56f35e34a8a9
589 | b796054f0b2aaf6d24256c820d39b17a7fe154f5
590 | a20c637c0b75dab21ff8582016842b5b35ff9dde
591 | c3240c0519e59868b5f15bd02db28bed828fb1ce
592 | 486ff5ad9809ab24c21631da27f86d785bef7115
593 | eaf2672e78534f291e414f0483aab0cd582065e3
594 | 080c2d2ade10f1b3463cd7579bc67a99f68c179d
595 | f0278c155f9ca54109bd6df559c7f9f7d3fe9a7e
596 | caf9014ac1a1092f9e5ba8c039cb49434a48e51a
597 | b7097c7db3e3f3ff0885d9b6c6e226ee6511c17f
598 | 6ce1797dd1c771512e6e784483c7e543bc565c03
599 | 3b5a76fa7cd5ff2fc05643422f48ef6b2c4fe399
600 | 7fe2beebbe3febd4618854ae5f5c2466fe409c74
601 | 14c9235d0adcfa85c24cbe5c1c3ba95575c249a5
602 | 35de10656646710e2b1976050d672ee610842d75
603 | e1c94b29f99a0ac04b2cef3fe633826776fa1f18
604 | 95c925d842389addac8756b3c8b40b880f050f19
605 | 5ac1364d42c341082edbe51392c7ba96f5eb95d0
606 | 04434b5cf39c0316cc4cec99fc74a9f099574c2a
607 | 56eca70d60c2c81805e98a938bec97337891d6a5
608 | b22769a7843f06f0052eb9c192e82ee868aafd3e
609 | ce531d4661de9584959b189470f509f26e429b40
610 | 615fd1e78c5c06f54340b91a65e49a6285fc4f3d
611 | e9d6975ba990ab258af26b14d7320c4557a17c84
612 | d1c981e602283c0673998eeb8105d96cd1d0d03d
613 | d54bf7e5f6dd38ee58736e27a6a67a7fdbd79232
614 | 28f99b5610a2b1f21e7beef9be3124e4524e0b45
615 | c9442f5be882e648ac4f46cde2c86c9bc98efd1b
616 | fa313e520b0b95506d0b34226a5b631929244f3e
617 | 724bb19f08b2408764379bbf8e89afca1b5f619f
618 | 06e91bbd7fe034e00a5f25c7d49ff21ab2764380
619 | be2e9a963f2208b45b63a599680c45eaeeecbd22
620 | 288ddbed85d58d3e707f43c4bf2273199d0da717
621 | 359ee017c4077f1c10bbfac93b92592a67662cdb
622 | 7e38a71ca7e16c122335e89b12fe4a9d39e7b39b
623 | b0ca2a7bebb5305f3e3760f94bb20bbfe42c6a9d
624 | 525c8b458c996cdbdddcea20e66af5276d8dcfc2
625 | c6f6cdda910fb16e1bf8b0591037340d338de8b3
626 | e67870efb3aed5c5fe6d70d951338b53a7d5ae0f
627 | a9cf9b6f40d045688e53823b7d5fe6925a077bab
628 | a7e3c89367266102e811bb5b0c8fa50301e4e3cd
629 | 6ef243e00b88d32c65bb0d138949202eb6a86aab
630 | 29a3e3a786f4e875c6865b44f68f73453e03fe87
631 | 58fb3058c5055525aabdd345bf576b6b50570e31
632 | 6a68893bc63f6a921359752f5fd0b02a3573ca15
633 | 0f57b34a1e8d498d8262c93c5e838c6e9ccb31b2
634 | cdf71c7a0a124d986e5649ac7035b1e4f6c4c0e2
635 | c0a50bff369a1d75adaa4ca26bb7f463de3ba6d6
636 | f471b8a43984faa7940bed0971e4b41abb8d5e23
637 | da445cd8cf6178e7f867266c02b658f47c681ec9
638 | 83b24fceb0b4e0070ea6c6f60ad6e8c3cbcb410d
639 | a2eb11f3faf888ae795134868b9e445f03882f0a
640 | c7df1dd8c3085ad60726cb5df637213c47dcbf44
641 | dd7ad9db544a228984e3631a3be2a7241549570f
642 | 0090a7299a87bbebfa02a3a49b39276750f93662
643 | a9e43c25cacf5199b26faff98b324ce4d6d31786
644 | 6ced9da9defa350361a0f56f696493898e9a484e
645 | c083876c2257e4fcfa7b19071a6b7a3e3aff5670
646 | d0593c7a79fc102a21aa30c221e228fc8a8f10d2
647 | accf251c719cf8f1d16a5687390caa8bd3214c6a
648 | 133a4e13f11ba4cd1c5a407ac62990013f5ab8d7
649 | 90a4ec668d371536acfa0522fda4fd1557bcef41
650 | 66554dc3a2ea9e5983ab3cfac197db7a98085e97
651 | 8d5a9ba2dfe14bafe364d1e89452a2f328fefd96
652 | 2126f21d3639c6b26edc6e0139e261839a901577
653 | 4322d0be11b74d1d98000589fe9f8753ba25f62d
654 | 566098d6aabbb120262df4537f804b009083a257
655 | a2f4b72146cb9e0115f05fb5c79621a77be9c384
656 | 7e4807fd379b54257d228d566f76132271d94e4a
657 | f45cc5cbf3a0b696886b68927afd29bcbb2eda8b
658 | d0e1f1bd95169aa9ac08aa81db92d88379b423c6
659 | a686fbbd0d3e1a986a3769ef6fe0eddf6086b597
660 | 398f9e0002964f9a765ecb49ad85b1d18c537b3f
661 | 4ae0bf3096b4870fa15aa3e94b886eda608bb7d8
662 | 065d2593379d21e16907543c3a02dfa1f1941ddb
663 | b3832f0c03776d306e81f0c9fe3b476b760b4d18
664 | 332901e8417872d76d3d6f3e6f41916308b9c7e3
665 | f30882e56569ce9ee9636b5b46a8dd0020a060ae
666 | 79a17dfeec2320df41542d8e158855903ded10bf
667 | 7ac93b637187ce75d9c425cbadd4dc94c0ac03d6
668 | 16b17b0613b89db33d4a0b87951a448292bac353
669 | 63223392124609a185f414e5f26fa2dea3e18bda
670 | 14df0fb83d091fa7aff2561b2be3c3541a5c51ce
671 | 3904e522da8780eb443371d11a82ccb6450b2ca1
672 | d83d9e482bddbdcc645186563f9237b8b40acd6a
673 | 17a741d2a71c0ad157c4c7d44f97ff47a1aa64de
674 | 6b357cede4926c7ad57d62c0e118d1c8643d3660
675 | 8a219c73acc245f2d55b29b27aeda557091bf99f
676 | daedffefbf01748208df87a3c40e369e613fda4b
677 | b7cdc4d13140ebcc18a682a27b16feb0eb1b6f34
678 | 5b7f59bf06af8ae2ef21a8458bddc34d52e33618
679 | da323c54d77b4ac5541609e0610353fe8b0b91c8
680 | 4a381e8fd42917d0a915ad6a4471b68d467f13eb
681 | 1e39a45dde7dfd2b128725918dde34a981dd3f61
682 | f56dc1cff1b4b4375666a2aa1a65ba4271ad6e86
683 | 043df34ec58c18333bb9b5ed09fa43817ecd5312
684 | f738d1824411e8970c2a5657595e90397db27e1f
685 | 2462adf49b50cf55a287061cc8f3cdfa5e7586c0
686 | bf8fbc28cefe92563a0a1c079d224a9c165c1e52
687 | b2ad26cf3bbc26adf8c61be017aa08c0427c33ed
688 | eb69cc5cb91b7932d272423262375877d19426cb
689 | 31e790096da1867e335feace43e7d0f80c67d673
690 | 5f88e50f8eea461ac37eeb37bd6f768facd84fe5
691 | 810ef3a2b43732db9c013aa5abf627ed9c810b63
692 | 626821d2692e7ffc97fa2981ddaef0c37e1e6026
693 | 34715d887106a8f72c9e1ec3474de61d37b67d54
694 | 79e876228053032de8ee70b794c3e62229029b8a
695 | 87f00959dddc29494a8b97a80b2b11abf228caea
696 | 7969ff8328b92036e2bfe50a4951e068205840f0
697 | 27df1c7f5f72c96f23bf4116d22a1a8534d04ad6
698 | f5e846a36ced8bfa1dfd3f1593c2f7cc1380aa16
699 | 65e55164449704b69770dfbf2b46775d00f4c6bc
700 | 600717190fb23dedbd617402c770ebcebcf7439b
701 | 46385fa57b9d0fbf2e6d74bd535bae4e27bfc0a7
702 | 19f7e78d683a74d8ac33d67804be3ca6c7e78276
703 | 02a8b6f6c7993115255a7d484ad5d29492bb2ddf
704 | 6dad22217e8e7ecdf23d3927a3d4dafd931216fb
705 | 6804e22218009f2920b7a31fc0b1e81cd35f1463
706 | 60bb0b08fa3839350cc93cdd84cdde9ba64734ea
707 | 017d2329e373a75ce622c4e51ae9643fd5c94010
708 | fb61b83f6d0c34601cc05796af88ce1e2437a6aa
709 | 812c9b7bf17eb705756398459fd351ee4cb5bc64
710 | 5ff1439610784f189424da05502ae80d46c4b07b
711 | f67c770654de996eae0ccb3edf2c1bd1e1511c7f
712 | 1748b37c96f90b81f5bc627e517455ecd7c98d55
713 | d5a3bd147e90276076d68894e7f793662e514300
714 | 4caeb38c20a732f18dec15ee6142e3b335ca6a4d
715 | 8ae0dd98a9b870815f9bc64bfbf842543f09081e
716 | 2c4be751b09c16ce1010e8abbc0df79f255474d3
717 | 4e24c5cd5fa70c985dc7c29f7bf4e5bc8d13af82
718 | 6202acef0355437c283a86b2ebdb7dfed9e7e146
719 | 4f47fd64a8b8564ea75f66fa5cd033259b5eb345
720 | 8c5f589c33841fc8e71ec22bab71cf41ed88dcc8
721 | 340ec41f8a9f4c97cc67ae40b5c7ba2d609c5e4a
722 | c6a57af0e5d45711665651a03a3b8b638656bfbe
723 | 809a6ea9060ef5a4026e61cf91c65b7fd1218a7a
724 | 250e947844e4f4538d42523850d44cd0e3279b4d
725 | d1b1ec1c878d29712c79287bfe992ba6791c504f
726 | 42c9b565f0fa1b3420bf60ef2f5eeefdf90ba224
727 | 3383b154f233a922e4d17d79944bcf18803ef162
728 | 2fef5408dcc5998a555cec6a2cdcf42079c539d8
729 | 6b9e9450ceb03c486cd14d5da0096e726ef42e31
730 | 7e3730586aa1be5733eb71425e65a153f215ab4f
731 | eb55dac93d2f096614feaa394271bb0764f84493
732 | d2cc7ab2033442f8ca0abfd343baebe02da67085
733 | 2e25728e824664f31734be5090b82830d636376e
734 | cc22825a32f7eb1fac3a9f56cfc04c21b04c84da
735 | c9e8fd2db303e0cd8798d90aa55a71b582dcc13f
736 | 151cd6d848dd9adf6c8f0dcc6213ca4773c7117d
737 | 87631c1b9a9b32a4b80333fb2133f063e0655919
738 | 450b2768257ab7c36985b59475408f9d085d6849
739 | 6ec40e717579b1e97f6310e89e9526266e2dc858
740 | 32b3d419d80463f676acef772bdd6e2e9c1cbb04
741 | 4751418bc7011331b29b1e127c76aa2dfdd4b157
742 | 4f8bf56007893e530058e86474abd10be687789a
743 | 75167c1f67d3b52cf380701f70a0ae19c543c2d1
744 | 3b4cc44dd64c2f01fb7325de10f553f52a15f21a
745 | 4bf8aad66e6a736a307e21aa885cf647f4d3d091
746 | 114ff0bddeeb3f70695abb3b8eb72ae50ab6ceb8
747 | 00d37c8269a4a1856119bd8cae177f79af9c3995
748 | ef18aa80d10c422353737f54674a62d560c3841b
749 | 5d4595b28a418a0d896ee990aecb70d174e83938
750 | 86496de0a9bc54ded557c9a612b957c7f4f03051
751 | 7f12b35f443711098092027e953ac26ee1dcee51
752 | 80e4074ddf6603463ce54b17adf3f74cb1a6c9ef
753 | 425d52deefec8b92e14295298d6ab4e6379d8d50
754 | 9822045b10d370cc11dc3af5c67087430458e9bb
755 | eb9985b519a7aa480f7bcd0cdc430bab14516ac0
756 | b0f319727e40f3cb8fd08d19a63d1d63ad6838c5
757 | 15188b1ecbe60dd50f3cb9634a88bb7066a15116
758 | 9d9ace61c04718ae2d2a44d7a744356ae13cb815
759 | 1746588f53d9be52e1fb11ec9a5ba9def13ab8f4
760 | d4559def7fe0975759f29379a0ed9ffb62685228
761 | 7c7ae0340c7dd19a5ea480786e63c4b7aac08aee
762 | 0de490e56eea811288015ec3321de30075842cfa
763 | 81a00542630b6a711ecc0822a5c87474bdcf5b3a
764 | 08efd4bb49f04a696e7b658c77f7d2d2cb658c75
765 | 4e2b1ac05ee94c9accb386b98563caf731454362
766 | 85140498e1f7d0c2d76c0f7f849dceff3cadffc2
767 | 69e7a83b25fdd71c0cb798f581a24bf25108b089
768 | dfd03d882d0be712b65e3e599836229305325ce6
769 | 76b72e1eca956490479b8984cf60175862f347e9
770 | 7037a7358a851c7ae94822af5f94499eb3e54618
771 | 5d1e2111769407c05cffdc0a46aca9620394369d
772 | 36de5c0355eff2d4926b1dabd5e361785d518fdc
773 | d101d207d193c78b247a5e9e3e634e6ab02e7c82
774 | 6fecac3e59683604db90661efbdcf38cc9d2f2b0
775 | b326900df3588e7b0260709cc86171a08077656c
776 | 06e9f8ce640845614936df59b742fbbdc6a208de
777 | 2d2bf35571156edb614aade1d45a5d43cf50e285
778 | 216ef395ffe0c88633298ebada3edfd468a7da7d
779 | f9bec7909386fbe2222b60c7733f9bb9f84fece6
780 | 8d2c39d4f63e99be39e9c691b453de724a6e7dc2
781 | 638f517a4b93f8bf00af1f3b1f8bfbb1e9727615
782 | aa8e5c7fad88d1bccbf5cc879150c28ede0a869d
783 | 9f34e1da97dc6feac62ea266b48650792da4751e
784 | 8a4693a438ad488c9da50f85d875fac423e43d44
785 | 37592089d864a7a8919b2c36ffd45df5d9f0939f
786 | d8a04e7b78a4bd47c851036989162084447c9588
787 | 1217b90c42d0eae3fe37a9a047f590b334df01ae
788 | ea2dc57fc4eb8e345170fea226a85a4c2a69f129
789 | 5ac73b51585f0d707bdbba026d4365fbaf583997
790 | 1f8ab1587428abc7df287f852017a5dcb509a2f8
791 | c328e30210481304a1ac1f65aa8c7235f4dea1bc
792 | e4cdff46d23fbb9417c298a70f0668797453cc5d
793 | ed16a72726a36ebfe3cb05b799bc10632a568137
794 | 0b96ef7ef010317777302f8e7f0d4dac02200335
795 | 02327461b336804e43891ea8a11f1cc5fa3a0cb0
796 | 4c50b0bf0bbe1f6ae62d5d743d861670e84b2876
797 | 1cd978b9d61ce9b8d0243a7ec9adc48675d1a952
798 | 7d515f81634d845d74d9f33b04470812765b432f
799 | d8f6be2bb1463262aaa8bd22ebfd1a32d902fccb
800 | ff2c420665c0f3df71b558d22dfd1869f50c9cfe
801 | 087615ba18d492bce8313c919a938efa81f93bd0
802 | 0787ab9e389747fba0b21bf2e97a14922babe4b3
803 | 5f8ce05aa4dc5a546986aacc5bddcb8e324bcbea
804 | 5eb72694336fd0cbacbe1a66d7151d21ff9b0911
805 | bd44872ddf9b7f32338a0a9cfd9f3bd680988ced
806 | defa3abadd2d2a07c862e668515360c43b7ef9c3
807 | 178ddad0e475d003179ee4c7a61410ff7325d96f
808 | ffbca6f1135ab49655f110f6519bc3ede9d10ce9
809 | 19008243edf1525e35abc01381fa96d5eaed6c25
810 | 799f1eeb7b617de86b74ab9ac8f9c53be64759d6
811 | ef1f742492fd35534877ad089511668a8b20de9b
812 | aad45a4e918fefae9d4affdf1de3c3b4c9822eb5
813 | 436a83e5eee6647e071daf364cc61081f7af330b
814 | 3f0e70ab65277768a7af65b2f7cece9b6754eb90
815 | 2ae4c8b4e6b32ed56f9e839173447ce86926acf6
816 | cd55b098b52da1c0fc77926f6644cb995fece25b
817 | 15570914c6891bee3aba54cd8d3357b221567965
818 | bff9337fbaba065e92208f9e68f4a8dc368b2b24
819 | 148caf895637d908013935a84b4c006bfef34ab0
820 | bcee827497ff2ea89ebb68f20d393d89b4b00394
821 | 0e777d12c9e18ac4a95c3ec98e03442ddcc3951f
822 | 40f6e6fb717651b6ddfe4a1f64080177549d3a60
823 | 33a9bda4e4253572d72ccb56fed01a0cce699237
824 | 158cccb971e9831d98219fb14dbf64f61de9afd6
825 | 84f1e39a820648942e0e6baef53594392a6784aa
826 | 3f323b7b6abaf0f202530d277a8b375892c44f01
827 | 7c49c826a424e32eb46bca49436a862c93f5c595
828 | 1ca09927993c37557622a21415b9bab2c541e466
829 | 2c7257349d7c0454f0badc32ab16b06d459c78e5
830 | db8542096e6a47bb9ebf14762770344364b4a252
831 | 061bd9c748c9f8095adef590d46f30949afe3adc
832 | b8d92ef64bebbd5407dc19cde206aeac45a6059b
833 | 2ced40b4f0ff8bc4c65e93ee695ae22cebaecffc
834 | 78e0444ba387dfe388e878b91df4cb85eb43d7c5
835 | 63d0948191632a095d89b4ab75c5f6092bfe0557
836 | ae117222d4f363a090f423c5f29984092b459053
837 | 42253ee162da24892fccc9dfc40d4c365951724f
838 | 40ea8c851a06cebd6f9db52eae8c4180352f4bf8
839 | 23b89230081765472b0b16ed4f827858ab5ef995
840 | 93fabfd14b2f8e37675f8240c0102161db1015b8
841 | a7f0254bc3e1519afa9e905777618492b148d7e1
842 | 5743b0de0633aae4f364fe5575e54ca2f97cbc88
843 | e57b318e23cde3b2e1461b2c2f6f1919baaaa583
844 | 30b135d28b2f1eba658e96b074405b4cdee5d5d0
845 | 53a0dbb2d5d91b2d5d2e7963ae22f42b07eb3cd8
846 | 775bc6cff0d60513c427ff40c4d8362f3c3cf4dd
847 | 6da3f7857678c1a71aa11648d04bd15808b32429
848 | 5f305f7f8ad41cfde433ea07ea934388784ca799
849 | 501f9b5d3dd5e7c235ebaaf74d1346cc927301ef
850 | 1d5c015003c849e8af02de29f7fb04d985c382c0
851 | d91c6837265b896a05674b24d7a11fd3a5f16e96
852 | 1c3354596b550b64194e40085aa8a6d9aa28cf4d
853 | 28be6a301b493486f38f9a494072d60c098d6c42
854 | dd7dff17371fea447eab2441b0834c968ca4e6e9
855 | 3320ad46ef6b1670124d7d042473836afdfab51c
856 | 36495cf618b05eb0e84d66feaf0b918607bc8969
857 | f07a24f078e929133c5d7d72bff3b4c7578fd543
858 | 6b6d5db4a86116683ac590cb506ecc0ec56ba1ac
859 | 7c6bdd60bc0a371caebfe5db8b4734577a7d4bd4
860 | cd856f01d55bb1b001f18d1c09612e4aff622849
861 | 42940c999bc17c1ce42a896056b29b48cc05ccbb
862 | c60535d0a0074e3739b78cae3424427523af9197
863 | ed1e73b2bfcba7fc8322e15bcbe29befde9cb3b4
864 | 3b3b5f017d891e4f4737cdb7089c226a03cd064f
865 | 8d99402e73d4a08e8a6372a3931e7876eff4cd7c
866 | 8ddb1eb0be4a61349ebae9d9f49a0a10ebfbf248
867 | 621d476c0b95b133cc1e21e02efb74fdf06c2410
868 | 4c8122fd557c72530285ddb4d7236a1e26e1a777
869 | 489f1df415312031233719573df0c96d35db4d91
870 | 01804f236fa7d0ef0051bce32998927b3d492137
871 | cf33de82b721ba0a79f23c7c4c243606b16e4fe8
872 | b89db7e04a32478b3b06a99e77c663ed385da67f
873 | a8b831e43fb71e72a1f18d5565c9178c9cdfd32d
874 | a1b0746e131d7264fa4caaab8f7b4d6b8b5e9994
875 | 5f7d39f41a4f18130bc9534844c40746afa63ee5
876 | a9307650eb19fe107b054a95e2e2106c8f5a1023
877 | 1f59d7ffb4541dc91bbb30690f0cc5d7c374769e
878 | 5754a0cd2d35238577049b53b83ccd425ae3d814
879 | 2fdce9aab59353f61545e2c56a6f42a054491f65
880 | f58da2e3121a4c9bbbbcd10a596ac56cbc308651
881 | 98e6cc44027eb6327b1cef62e7d0b5b915e73c8e
882 | f78d0aac0cbf99be38b9f016e64c9783c5463692
883 | a20f00975cc82a25e498490c0141c04c72dad522
884 | edfcabb79ab332956eb0f011255a41570b64c471
885 | a8240fcea930bd14db2c523962b69ac26a806775
886 | bb61ddedc9692447291a75a2ac6f94e58d8be023
887 | cc12091129de5fe6fb4d4b95f29887bfb48172b2
888 | 60d26594bf8c0c206b7fc72885164067210d44de
889 | 28cefe359c6c5364e543cf94a11658f8fd76ce12
890 | ef2ce41c003a052f202fa169015acf2c77993bb8
891 | 8cda2ea5b292e761aa715053badec5f434fbea8a
892 | a8f118b31711d2f379262a6bb68fe0f5dcc7879d
893 | d9452d0f04d972aeaf8544163666f382ac33af92
894 | 0e0fbb57fd5edb4bcbc1040e68d36c8e0a15d1fd
895 | 0fd461949d545e994abe44675d6b793240591775
896 | 109a435614b65c4c6fc074233001c898be4ee350
897 | 82a32f3a37832016b5e837d8aa945cc93d67c9f5
898 | 71315568d9a1b3466ed4a14d8cd163dbe86a6047
899 | 176d7d00b6ea8cc8bfd8359cd7622cda50e26eb0
900 | 643a1b5be1e8b2759856603ca96dce9f2440a9f2
901 | 34a37db54884f4eb607d3441a8422e21d91a0c79
902 | 7c6ebab0131293c2bd7adcd705612a94dbbfe252
903 | 7fb1742a1c1608da4dc75897c1bc99030175aa3f
904 | 9440cdae225fbcd98b8ef23ceba5c86a63201c70
905 | 8fba99aa12cf5c467506022328187e56cb43ed58
906 | a229711c246f89833f62234cac67bbed94f83008
907 | 6f150b8d8c3074edd7e8e32c766a2d4c37542452
908 | 05f5d665cf587376f15d28c34fa57e6cdf7fffa3
909 | 8ebab50fe4d3784125add7875ddeccf58953ac35
910 | 140319a69afb2166a674a7b86f2c4c82fe33f84f
911 | 48be2ee42311d0eae4e182e5b217b5247f795eb4
912 | 8d4f92a3e2b42ae6a278697b2789cf34b87b9ff4
913 | 68c20f82fa3e26a87fac0ebd629fd16a6371a9cb
914 | a9d4d8bf674288f91fec39540a75613f4da04b5e
915 | b03719e5016094f0e1452df3f8ec40019e5bd279
916 | 1b2cf0ddfd37df4d6baabd96db5a04d1957b59c9
917 | d49c40c31302979a38c47d9c15f44de743278e1d
918 | ccfa6b620ccfb03588a7ec31a3c98df2e085b9a4
919 | bd7e2fc16ef448663715da4916ea56fb6356b5d1
920 | 67600392dc3ad4efa5659c87ae55bc69d2c4411c
921 | 1eb59f0d785b5eed4f21a2ccc35d63003c7fbc89
922 | 2a7f212649bef4bbdb2d48b5799bfc3cf5876d92
923 | 136ff2b3421477d8aefdf4d10a052faaba90fca7
924 | 478419b4e78dbab0a7fe5031f059fe8a8c98cec2
925 | aad306ee6e12dd91bf15d720b67d7c6222ac5bac
926 | 1732fc7c815661513a74ac3dbb6755853996877e
927 | a684df1564ce8d41eba41e22f6b59e19b73ce4ff
928 | 8a31bfd366a6f679f185b87c162261ed6e835ccf
929 | bd5d6ab51575137b59d5f697c7950cacaa4182ca
930 | 7c8963a4a704ed1c56583ed4d2e7c46b3f7595c0
931 | 0eff7b45401faa55f22f22a0e6d8ae37ab4d8480
932 | ea10be60a172b61423628930b498e658ec184023
933 | 18253446f90c7b7a3689895a1a890f57d45cddcc
934 | 0797c99a1573e2a7b2138cfb8d0d748ebd255121
935 | 59dc0ae646965ad6561410ba62ee1f5906a5cc8f
936 | acb9ca236d0ba57340540eff4e7f07b2e5ffc7e5
937 | d39617b7947508c28f60eacbaace2bbc4d08baf5
938 | a8d77c7b9f779815df327a615964e7d0d56b183f
939 | 46b955153e18ba9f3acafdc0109b72ef754ba9bb
940 | 332f0604b39a4e599e3c099f594579da5c2daf1e
941 | 1cd9b2bda13d8f2849db65950042a0204e01b735
942 | 1bf4b779e7e778c8f700566d0efd82b7ade55dbf
943 | 79d2e14f84c411784891ed8151905747f339d0d2
944 | c720b996034ab363599ec196d7adae0b8511b050
945 | 3e1f911da0e974fcee74a3b41f1a2414bb633327
946 | f1b9c3d01343ac43388f5e5742c2366d7088c496
947 | d7407bf6c85eb1f7106a6b0179b7bbbc71b2ddc0
948 | df28f1dbb313e16455e18f20d3c1449d773f7b7a
949 | 2258cbf2539580fe88828d1414a1eb504951a0d6
950 | 9ea05ba9b371be8dfaeec30fa2ec3ce676901c5e
951 | fbfe78964fcb56e58a1e034e11145204356a9d1d
952 | 8ff05c2aafb4e4f2c1fc7926a574f6899cf450b7
953 | 1b653c0a4c9111896098e3755672b2b9a0da17e2
954 | 80987557014324e5ec3839b477d3feb74c2c038d
955 | f2df1b42d96fb39b0a10e648cea3eea1e01d7c2f
956 | 603cd0cd93f2d04ad50684db6b6bccb36e7457c0
957 | f41472494c9a2137142c3d24076b4e927a4f23cd
958 | 7acbf311e9f42fd29a9e63f2532eb702ad7a3edb
959 | 949caccfe8dd75ad92efb9dba23ec1b2f39af9a9
960 | 45d8c0eebb0e3d082fab34d17266419f09c170b4
961 | d6289779bafb606ca9fa42043610315406b9a2c1
962 | f16678cf4bdf551e13c29bcbd3df0252e641dce1
963 | c3a772e05388b9fc0ea58b1a0384140c16d3e903
964 | 15ad252481cdca2122f6e355623a101c2e2a33b3
965 | 8f57366360ff48ba30f45b6fa83cd76664e84435
966 | 4d035ac0655f4b8624002d8b9e402776a66d647f
967 | 8c2da6b02d84fcc35673f02a8e6e1a09e254e72e
968 | 13a2786e6bfc2fe5e86a69d4c3c5ca0ac6da5fa9
969 | d1c85cd90ce557086eebcc0670211ff77cc37c91
970 | 8f56a4f7074d679c3151cc1743cc2da6cd15844c
971 | 9a591de9a309b8df99ddaf342e84196d875a7dba
972 | b84e1b45e41accbfb940e12fc5387a207057aa2c
973 | 39b4557e558ae981364e66bfcd329afdc91d53ff
974 | 0b528c52df66baadfc18c1b99720109fd3966e89
975 | 1601d00ca61feb15ffc6b9854c9091d0e9e52ea2
976 | c942a895dc
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/items.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your scraped items
4 | #
5 | # See documentation in:
6 | # http://doc.scrapy.org/en/latest/topics/items.html
7 |
8 | from scrapy import Field
9 | import scrapy
10 |
11 |
12 | class CommentaryItem(scrapy.Item):
13 | id = Field()
14 | uri = Field()
15 | firstimage = Field()
16 | title = Field()
17 | author = Field()
18 | time = Field()
19 | description = Field()
20 | content = Field()
21 | view = Field()
22 | image_urls = Field()
23 | images = Field()
24 |
25 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/main.py:
--------------------------------------------------------------------------------
1 | '''
2 | @author: Administrator
3 | '''
4 | import scrapy.cmdline
5 | from scrapy.crawler import CrawlerProcess
6 | from wallstreetcnScrapy.spiders import CommentarySpider, jqkaCommentarySpider,\
7 | sinaCommentarySpider
8 | #,'-s','JOBDIR=crawls/CommentarySpider-1'
9 | def main():
10 | # process = CrawlerProcess()
11 | # process.crawl(CommentarySpider.CommentarySpider,args=['-s','JOBDIR=crawls/CommentarySpider-1'])
12 | # process.crawl(jqkaCommentarySpider.jqkaCommentarySpider,args=['-s','JOBDIR=crawls/CommentarySpider-2'])
13 | # process.crawl(sinaCommentarySpider.sinaCommentarySpider,args=['-s','JOBDIR=crawls/CommentarySpider-3'])
14 | # process.start()
15 | # scrapy.cmdline.execute(argv=['scrapy', 'crawl', 'CommentarySpider','-s','JOBDIR=crawls/CommentarySpider-1'])
16 | scrapy.cmdline.execute(argv=['scrapy', 'crawl', 'jqkaCommentarySpider','-s','JOBDIR=crawls/CommentarySpider-2'])
17 | # scrapy.cmdline.execute(argv=['scrapy', 'crawl', 'sinaCommentarySpider','-s','JOBDIR=crawls/CommentarySpider-3'])
18 | if __name__ =='__main__':
19 | main()
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/middlewares.py:
--------------------------------------------------------------------------------
1 | import random
2 | import base64
3 | from scrapy.utils.project import get_project_settings
4 | settings = get_project_settings()
5 | PROXIES = settings['PROXIES']
6 |
7 | class RandomUserAgent(object):
8 | """Randomly rotate user agents based on a list of predefined ones"""
9 | def __init__(self, agents):
10 | self.agents = agents
11 |
12 | @classmethod
13 | def from_crawler(cls, crawler):
14 | return cls(crawler.settings.getlist('USER_AGENTS'))
15 |
16 | def process_request(self, request, spider):
17 | request.headers.setdefault('User-Agent', random.choice(self.agents))
18 |
19 | class ProxyMiddleware(object):
20 | def process_request(self, request, spider):
21 | proxy = random.choice(PROXIES)
22 | if proxy['user_pass'] is not None:
23 | request.meta['proxy'] = "http://%s" % proxy['ip_port']
24 | encoded_user_pass = base64.encodestring(proxy['user_pass'])
25 | request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass
26 | else:
27 | request.meta['proxy'] = "http://%s" % proxy['ip_port']
28 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define your item pipelines here
4 | #
5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
7 | import solr
8 | from scrapy.utils.project import get_project_settings
9 | from twisted.enterprise import adbapi
10 | from hashlib import md5
11 | from scrapy import log
12 | import os
13 | import requests
14 | import re
15 | from scrapy.exceptions import DropItem
16 |
17 | settings = get_project_settings()
18 |
19 | class RequiredFieldsPipeline(object):
20 |
21 | required_fields = ('title', 'uri', 'author')
22 |
23 | def process_item(self, item, spider):
24 | for field in self.required_fields:
25 |
26 | if not item[field]:
27 | # log.msg("Field '%s' missing" % (field))
28 | print "Field '%s' missing" % (field)
29 | raise DropItem("Field '%s' missing: %r" % (field, item))
30 | return item
31 |
32 | class IdGeneratePipeline(object):
33 |
34 | def process_item(self, item, spider):
35 | item['id'] = self._get_linkmd5id(item)
36 | return item
37 |
38 | def _get_linkmd5id(self, item):
39 | return md5((item['uri'] + item['title'] + item['author']).encode("utf8")).hexdigest()
40 |
41 |
42 | class ImageDownloadPipeline(object):
43 | def process_item(self, item, spider):
44 | if 'image_urls' in item:
45 | images = []
46 | abpath = '%s/%s/%s/%s' % (spider.name, item['id'][0],item['id'][1],item['id'])
47 | dir_path = '%s/%s' % (settings['IMAGES_STORE'], abpath)
48 | if not os.path.exists(dir_path) and len(item['image_urls'])>0:
49 | os.makedirs(dir_path)
50 | for image_url in item['image_urls']:
51 | name = image_url.split('/')[-1]
52 | _i = name.rfind('!')
53 | if _i > 4:
54 | name = name[:_i]
55 | name = re.sub('\\\|/|:|\*|\?|"|<|>','_',name)
56 | image_file_name = name[-100:]
57 | file_path = '%s/%s' % (dir_path, image_file_name)
58 | images.append((image_url, file_path))
59 | if os.path.exists(file_path):
60 | continue
61 | with open(file_path, 'wb') as handle:
62 | try:
63 | response = requests.get(image_url, stream=True)
64 | for block in response.iter_content(1024):
65 | if not block:
66 | break
67 | handle.write(block)
68 | # log.msg("download img to %s" % file_path)
69 | except:
70 | continue
71 | item['images'] = images
72 | if not images:
73 | pass
74 | else:
75 | _ = images[0][1]
76 | item['firstimage'] = '%s/%s' % (abpath, _[_.rfind('/')+1:])
77 | print item['firstimage']
78 | return item
79 |
80 | class SolrPipeline(object):
81 |
82 | def __init__(self):
83 | self.mapping = settings['SOLR_MAPPING'].items()
84 | self.solr = solr.Solr(settings['SOLR_URL'])
85 |
86 | def process_item(self, item, spider):
87 | solr_item = {}
88 | for dst, src in self.mapping:
89 | if type(src) is str:
90 | solr_item[dst] = item[src] if src in item else None
91 | elif type(src) is list:
92 | #########
93 | solr_item[dst] = [item[i] if i in item else None for i in src]
94 | else:
95 | assert False
96 | self.solr.add(solr_item)
97 | return item
98 |
99 | class MysqlPipeline(object):
100 | def __init__(self, dbpool):
101 | self.dbpool = dbpool
102 | @classmethod
103 | def from_settings(cls, settings):
104 | dbargs = dict(
105 | host=settings['MYSQL_HOST'],
106 | db=settings['MYSQL_DBNAME'],
107 | user=settings['MYSQL_USER'],
108 | passwd=settings['MYSQL_PASSWD'],
109 | port=settings['MYSQL_PORT'],
110 | charset='utf8',
111 | use_unicode=True,
112 | )
113 | dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
114 | return cls(dbpool)
115 |
116 | def process_item(self, item, spider):
117 | d = self.dbpool.runInteraction(self._do_upinsert, item, spider)
118 | d.addErrback(self._handle_error, item, spider)
119 | d.addBoth(lambda _: item)
120 | return d
121 |
122 |
123 | def _parseItem(self, item):
124 | id1 = str(item['id'].encode('utf8'))
125 | uri = str(item['uri'].encode('utf8'))
126 | title = str(item['title'].encode('utf8'))
127 | author = str(item['author'].encode('utf8'))
128 | time = str(item['time'].encode('utf8').replace('T', ' ')[:-1])
129 | description = str(item['description'].encode('utf8'))
130 | content = str(item['content'].encode('utf8'))
131 | images = str(item['images'])
132 | view = str(item['view'].encode('utf8'))
133 | return uri, title, author, time, description, content, images, view, id1
134 |
135 | def _do_upinsert(self, conn, item, spider):
136 | conn.execute("""SELECT EXISTS(
137 | SELECT 1 FROM wstable WHERE id = %s
138 | )""", (item['id'],))
139 | ret = conn.fetchone()[0]
140 | uri, title, author, time, description, content, images, view, id1 = self._parseItem(item)
141 | if ret:
142 | conn.execute("""
143 | update wstable set uri = %s, title = %s, author = %s, time1 = %s, description = %s, content = %s, images = %s, view1 = %s where id = %s
144 | """, (uri,title,author,time,description,content,images,view,id1))
145 | # log.msg("""
146 | # update wstable set uri = %s, title = %s, author = %s, time1 = %s, description = %s, content = %s, images = %s, view1 = %s where id = %s
147 | # """ % (uri,title,author,time,description,content,images,view,id1))
148 | else:
149 | # log.msg("""
150 | # insert into wstable(id, uri, title, author, time1, description, content, images, view1)
151 | # values(%s, %s, %s, %s, %s, %s, %s, %s, %s)
152 | # """ % (id1,uri,title,author,time,description,content,images,view))
153 | conn.execute("""
154 | insert into wstable(id, uri, title, author, time1, description, content, images, view1)
155 | values(%s, %s, %s, %s, %s, %s, %s, %s, %s)
156 | """, (id1,uri,title,author,time,description,content,images,view))
157 | # log.msg('finished item %s' % item['id'])
158 | print 'finished item %s' % item['id']
159 |
160 | def _handle_error(self, failue, item, spider):
161 | log.err(failue)
162 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Scrapy settings for wallstreetcnScrapy project
4 | #
5 | # For simplicity, this file contains only settings considered important or
6 | # commonly used. You can find more settings consulting the documentation:
7 | #
8 | # http://doc.scrapy.org/en/latest/topics/settings.html
9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11 |
12 | BOT_NAME = 'wallstreetcnScrapy'
13 |
14 | SPIDER_MODULES = ['wallstreetcnScrapy.spiders']
15 | NEWSPIDER_MODULE = 'wallstreetcnScrapy.spiders'
16 |
17 | COMMANDS_MODULE = 'wallstreetcnScrapy.commands'
18 |
19 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
20 | #USER_AGENT = 'wallstreetcnScrapy (+http://www.yourdomain.com)'
21 |
22 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
23 | #CONCURRENT_REQUESTS=32
24 |
25 | # Configure a delay for requests for the same website (default: 0)
26 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
27 | # See also autothrottle settings and docs
28 | # DOWNLOAD_DELAY=0.1
29 | # The download delay setting will honor only one of:
30 | #CONCURRENT_REQUESTS_PER_DOMAIN=16
31 | #CONCURRENT_REQUESTS_PER_IP=16
32 |
33 | # Disable cookies (enabled by default)
34 | COOKIES_ENABLED=False
35 |
36 | # Disable Telnet Console (enabled by default)
37 | #TELNETCONSOLE_ENABLED=False
38 |
39 | # Override the default request headers:
40 | #DEFAULT_REQUEST_HEADERS = {
41 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
42 | # 'Accept-Language': 'en',
43 | #}
44 |
45 | # Enable or disable spider middlewares
46 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
47 | SPIDER_MIDDLEWARES = {
48 | # 'wallstreetcnScrapy.middlewares.MyCustomSpiderMiddleware': 543,
49 | 'wallstreetcnScrapy.middlewares.RandomUserAgent': 1,
50 | 'wallstreetcnScrapy.middlewares.ProxyMiddleware': 100,
51 |
52 | }
53 |
54 | # Enable or disable downloader middlewares
55 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
56 | # DOWNLOADER_MIDDLEWARES = {
57 | # 'wallstreetcnScrapy.middlewares.MyCustomDownloaderMiddleware': 1,
58 | # }
59 |
60 | # Enable or disable extensions
61 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
62 | #EXTENSIONS = {
63 | # 'scrapy.telnet.TelnetConsole': None,
64 | #}
65 |
66 | # Configure item pipelines
67 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
68 | ITEM_PIPELINES = {
69 | 'wallstreetcnScrapy.pipelines.RequiredFieldsPipeline': 250,
70 | 'wallstreetcnScrapy.pipelines.IdGeneratePipeline': 300,
71 | 'wallstreetcnScrapy.pipelines.ImageDownloadPipeline':350,
72 | 'wallstreetcnScrapy.pipelines.SolrPipeline': 400,
73 | 'wallstreetcnScrapy.pipelines.MysqlPipeline': 450,
74 |
75 | }
76 | SOLR_URL = 'http://localhost:8080/solr/#/wscore'
77 | SOLR_MAPPING = {
78 |
79 | 'id': 'id',
80 | 'uri': 'uri',
81 | 'firstimage': 'firstimage',
82 | 'title': 'title',
83 | 'title_py': 'title',
84 | 'author': 'author',
85 | 'authorString': 'author',
86 | 'author_py': 'author',
87 | 'time': 'time',
88 | 'description': 'description',
89 | 'content': 'content',
90 | }
91 |
92 | # Enable and configure the AutoThrottle extension (disabled by default)
93 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
94 | # NOTE: AutoThrottle will honour the standard settings for concurrency and delay
95 | #AUTOTHROTTLE_ENABLED=True
96 | # The initial download delay
97 | #AUTOTHROTTLE_START_DELAY=5
98 | # The maximum download delay to be set in case of high latencies
99 | #AUTOTHROTTLE_MAX_DELAY=60
100 | # Enable showing throttling stats for every response received:
101 | #AUTOTHROTTLE_DEBUG=False
102 |
103 | # Enable and configure HTTP caching (disabled by default)
104 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
105 | #HTTPCACHE_ENABLED=True
106 | #HTTPCACHE_EXPIRATION_SECS=0
107 | #HTTPCACHE_DIR='httpcache'
108 | #HTTPCACHE_IGNORE_HTTP_CODES=[]
109 | #HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'
110 |
111 | #img
112 | # IMAGES_STORE = '\\tmp\\images'
113 | IMAGES_STORE = 'H://Administrator/Documents'
114 |
115 | # log
116 | LOG_ENABLED = True
117 | LOG_LEVEL = 'ERROR'
118 | LOG_ENCODING = 'utf-8'
119 | LOG_FILE = 'H://Administrator/Documents/Workspace/python/wallstreetcnScrapy/log/ws.log'
120 |
121 | # start MySQL database configure setting
122 | MYSQL_HOST = 'localhost'
123 | MYSQL_DBNAME = 'gpcore'
124 | MYSQL_USER = 'root'
125 | MYSQL_PASSWD = '123456'
126 | MYSQL_PORT = 3306
127 | # end of MySQL database configure setting
128 |
129 | USER_AGENTS = [
130 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
131 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
132 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
133 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
134 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
135 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
136 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
137 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
138 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
139 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
140 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
141 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
142 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
143 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
144 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
145 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
146 | ]
147 |
148 | PROXIES = [
149 | {'ip_port': '110.73.38.112:8123', 'user_pass': ''},
150 | {'ip_port': '49.68.236.204:8118', 'user_pass': ''},
151 | {'ip_port': '182.88.255.120:8123', 'user_pass': ''},
152 | {'ip_port': '117.42.71.88:8118', 'user_pass': ''},
153 | {'ip_port': '182.90.21.10:80', 'user_pass': ''},
154 | {'ip_port': '182.90.15.7:80', 'user_pass': ''},
155 | {'ip_port': '113.87.241.4:9999', 'user_pass': ''},
156 | {'ip_port': '112.255.23.180:9999', 'user_pass': ''},
157 | ]
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/spiders/.gitignore:
--------------------------------------------------------------------------------
1 | /CommentarySpider.pyc
2 | /__init__.pyc
3 | /jqkaCommentarySpider.pyc
4 | /sinaCommentarySpider.pyc
5 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/spiders/CommentarySpider.py:
--------------------------------------------------------------------------------
1 | '''
2 | @author: Administrator
3 | '''
4 | #coding=utf-8
5 | from scrapy.spiders.crawl import CrawlSpider, Rule
6 | from scrapy.utils.response import get_base_url
7 | from wallstreetcnScrapy.items import CommentaryItem
8 | from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
9 | from bs4 import BeautifulSoup
10 | import re
11 | # from scrapy import log
12 |
13 |
14 | class CommentarySpider(CrawlSpider):
15 | name = 'CommentarySpider'
16 | allowed_domains = ['wallstreetcn.com']
17 | start_urls = [
18 | 'http://wallstreetcn.com/news?status=published&type=news&order=-created_at&limit=30&page=1',
19 | ]
20 | rules = [
21 | Rule(LxmlLinkExtractor(allow=("page=\d+",))),
22 | Rule(LxmlLinkExtractor(allow=("node/\d+")),follow=True,callback='parse_commentary'),
23 | ]
24 | def parse_commentary(self, response):
25 | sel = response.selector
26 | item = CommentaryItem()
27 | #get uri
28 | item['uri'] = get_base_url(response)
29 | print 'Download from uri: %s' % item['uri']
30 | # log.msg('Download from uri: %s' % item['uri'])
31 | #get title
32 | _ = sel.xpath('//h1[@class="article-title"]/text()')
33 | item['title'] = '' if not _ else _[0].extract()
34 | #get time
35 | _ = sel.xpath('//span[@class="item time"]/text()')
36 | _time = '' if not _ else _[0].extract()
37 | if not _time:
38 | item['time'] = None
39 | else:
40 | _time = re.sub('[^\u4E00-\u9FA5\s]', '-',_time)
41 | _time = _time[:10]+'T'+_time[12:]+'Z'
42 | item['time'] = _time
43 | #get author
44 | _ = sel.xpath('//span[@class="item author"]/a/text()')
45 | item['author'] = '' if not _ else _[0].extract()
46 | #get description
47 | _ = sel.xpath('//meta[@name="description"]/@content')
48 | item['description'] = '' if not _ else _[0].extract()[:-84]
49 | #get content & imgs & view
50 | _ = sel.xpath('//div[@class="article-content"]')
51 | _ = _.extract()[0]
52 | _view = _[:-123]+'' if len(_) > 200 else _
53 | _content = BeautifulSoup(_view)
54 | item['content'] = _content.text
55 | _image_urls = []
56 | for img in _content.find_all('img'):
57 | if img.has_key('src') and img['src'].startswith('http'):
58 | _image_urls.append(img['src'])
59 | elif img.has_key('alt') and img['alt'].startswith('http'):
60 | _image_urls.append(img['alt'])
61 | else:
62 | continue
63 | item['image_urls'] = _image_urls
64 | #item['image_urls'] = [img.src if img.src is not None and img.src.startswith('http') else img.alt if img.alt is not None else None for img in _content.find_all('img')]
65 | item['view'] = _view
66 | return item
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/spiders/jqkaCommentarySpider.py:
--------------------------------------------------------------------------------
1 | '''
2 | @author: Administrator
3 | '''
4 | #coding=utf-8
5 | from scrapy.spiders.crawl import CrawlSpider, Rule
6 | from scrapy.utils.response import get_base_url
7 | from wallstreetcnScrapy.items import CommentaryItem
8 | from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
9 | from bs4 import BeautifulSoup
10 | # from scrapy import log
11 |
12 |
13 | class jqkaCommentarySpider(CrawlSpider):
14 | name = 'jqkaCommentarySpider'
15 | allowed_domains = ['news.10jqka.com.cn']
16 | start_urls = [
17 | 'http://news.10jqka.com.cn/',
18 | ]
19 | rules = [
20 | Rule(LxmlLinkExtractor(allow=("\d+/\w+.shtml")),follow=True,callback='parse_commentary'),
21 | ]
22 | def parse_commentary(self, response):
23 | sel = response.selector
24 | item = CommentaryItem()
25 | #get uri
26 | item['uri'] = get_base_url(response)
27 | print 'Download from uri: %s' % item['uri']
28 | # log.msg('Download from uri: %s' % item['uri'])
29 | #get title
30 | _ = sel.xpath('//div[@class="atc-head"]/h1/text()')
31 | item['title'] = '' if not _ else _[0].extract()
32 | #get time
33 | _ = sel.xpath('//span[@class="time"]/text()')
34 | _time = '' if not _ else _[0].extract()
35 | if not _time:
36 | item['time'] = None
37 | else:
38 | _time = _time[:10]+'T'+_time[12:]+'Z'
39 | item['time'] = _time
40 | #get author
41 | _ = sel.xpath('//span[@id="source_baidu"]/text()')
42 | _author = '' if not _ else BeautifulSoup(_[0].extract()).text
43 | try:
44 | item['author'] = _author[-1]
45 | except:
46 | item['author'] = 'NULL'
47 | #get description
48 | _ = sel.xpath('////meta[@name="description"]/@content')
49 | item['description'] = '' if not _ else _[0].extract()
50 | #get content & imgs & view
51 | _ = sel.xpath('//div[@class="atc-content"]')
52 | _ = _.extract()[0]
53 | i = _.rfind('')
54 | _view = _[:i]+r'' if i > -1 else _
55 | _content = BeautifulSoup(_view)
56 | item['content'] = _content.text
57 | _image_urls = []
58 | for img in _content.find_all('img'):
59 | if img.has_key('src') and img['src'].startswith('http'):
60 | _image_urls.append(img['src'])
61 | elif img.has_key('alt') and img['alt'].startswith('http'):
62 | _image_urls.append(img['alt'])
63 | else:
64 | continue
65 | item['image_urls'] = _image_urls
66 | #item['image_urls'] = [img.src if img.src is not None and img.src.startswith('http') else img.alt if img.alt is not None else None for img in _content.find_all('img')]
67 | item['view'] = _view
68 | return item
69 |
70 |
71 |
72 |
73 |
74 |
--------------------------------------------------------------------------------
/wallstreetcnScrapy/wallstreetcnScrapy/spiders/sinaCommentarySpider.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | '''
3 | @author: Administrator
4 | '''
5 | from scrapy.spiders.crawl import CrawlSpider, Rule
6 | from scrapy.utils.response import get_base_url
7 | from wallstreetcnScrapy.items import CommentaryItem
8 | from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
9 | from bs4 import BeautifulSoup
10 | # from scrapy import log
11 |
12 |
13 | class sinaCommentarySpider(CrawlSpider):
14 | name = 'sinaCommentarySpider'
15 | allowed_domains = ['finance.sina.com.cn']
16 | start_urls = [
17 | 'http://finance.sina.com.cn/',
18 | ]
19 | rules = [
20 | Rule(LxmlLinkExtractor(allow=("doc-\w+.shtml")),follow=True,callback='parse_commentary'),
21 | ]
22 | def parse_commentary(self, response):
23 | sel = response.selector
24 | item = CommentaryItem()
25 | #get uri
26 | item['uri'] = get_base_url(response)
27 | print 'Download from uri: %s' % item['uri']
28 | # log.msg('Download from uri: %s' % item['uri'])
29 | #get title
30 | _ = sel.xpath('//meta[@property="og:title"]/@content')
31 | item['title'] = '' if not _ else _[0].extract()
32 | #get time
33 | _ = sel.xpath('//meta[@property="article:published_time"]/@content')
34 | _time = '' if not _ else _[0].extract()
35 | if not _time:
36 | item['time'] = None
37 | else:
38 | _time = _time[:-6]+'Z'
39 | item['time'] = _time
40 | #get author
41 | _ = sel.xpath('//meta[@property="article:author"]/@content')
42 | item['author'] = '' if not _ else _[0].extract()
43 | #get description
44 | _ = sel.xpath('//meta[@name="description"]/@content')
45 | item['description'] = '' if not _ else _[0].extract()
46 | #get content & imgs & view
47 | _ = sel.xpath('//div[@class="article article_16"]')
48 | _ = _[0].extract()
49 | _content = BeautifulSoup(_)
50 | item['content'] = item['view'] = _content.text;
51 | _image_urls = []
52 | for img in _content.find_all('img'):
53 | if img.has_key('src') and img['src'].startswith('http'):
54 | _image_urls.append(img['src'])
55 | elif img.has_key('alt') and img['alt'].startswith('http'):
56 | _image_urls.append(img['alt'])
57 | else:
58 | continue
59 | item['image_urls'] = _image_urls
60 | #item['image_urls'] = [img.src if img.src is not None and img.src.startswith('http') else img.alt if img.alt is not None else None for img in _content.find_all('img')]
61 | return item
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------