├── README.md └── wallstreetcnScrapy ├── .project ├── .pydevproject ├── .settings └── org.eclipse.core.resources.prefs ├── db └── database.sql ├── log └── ws.log ├── scrapy.cfg └── wallstreetcnScrapy ├── .gitignore ├── __init__.py ├── commands.pyc ├── crawls ├── CommentarySpider-1 │ ├── requests.seen │ └── spider.state └── CommentarySpider-2 │ ├── requests.queue │ └── p0 │ └── requests.seen ├── items.py ├── main.py ├── middlewares.py ├── pipelines.py ├── settings.py └── spiders ├── .gitignore ├── CommentarySpider.py ├── __init__.py ├── jqkaCommentarySpider.py └── sinaCommentarySpider.py /README.md: -------------------------------------------------------------------------------- 1 | # wallstreetcnScrapy 2 | a crawler for wallstreetcn,finance.sina by Scrapy 3 | 4 | This is the crawler part of my graduation project which is a financial infomation search engine.
5 | It crawled three websites by using Scrapy as follows:
6 | http://wallstreetcn.com/news,
7 | http://finance.sina.com.cn/,
8 | http://news.10jqka.com.cn/
9 | then send the data to solr server to build index. And it stored the data in MySQL for backup. 10 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | wallstreetcnScrapy 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /${PROJECT_DIR_NAME} 5 | 6 | python 2.7 7 | python27 8 | 9 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//wallstreetcnScrapy/items.py=utf-8 3 | encoding//wallstreetcnScrapy/pipelines.py=utf-8 4 | encoding//wallstreetcnScrapy/settings.py=utf-8 5 | encoding//wallstreetcnScrapy/spiders/jqkaCommentarySpider.py=UTF-8 6 | encoding//wallstreetcnScrapy/spiders/sinaCommentarySpider.py=UTF-8 7 | encoding/=UTF-8 8 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/db/database.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `wstable` ( 2 | `id` CHAR(32) NOT NULL COMMENT 'uri+title+author md5编码id', 3 | `uri` VARCHAR(255) COMMENT 'uri', 4 | `title` VARCHAR(255) COMMENT '标题', 5 | `author` VARCHAR(255) COMMENT '作者', 6 | `time1` DATETIME COMMENT '时间', 7 | `description` TEXT COMMENT '描述', 8 | `content` TEXT COMMENT '内容', 9 | `images` TEXT COMMENT 'json uris', 10 | `view1` TEXT COMMENT 'view', 11 | PRIMARY KEY (`id`) 12 | ) ENGINE=MyISAM DEFAULT CHARSET=utf8; 13 | select * from `wstable`; 14 | DROP TABLE `wstable`; 15 | TRUNCATE TABLE `wstable`; 16 | select count(*) from `wstable` -------------------------------------------------------------------------------- /wallstreetcnScrapy/log/ws.log: -------------------------------------------------------------------------------- 1 | 2016-04-29 17:27:04 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/) 2 | Traceback (most recent call last): 3 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 4 | yield next(it) 5 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 6 | for x in result: 7 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 8 | return (_set_referer(r) for r in result or ()) 9 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 10 | return (r for r in result or () if _filter(r)) 11 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 12 | return (r for r in result or () if _filter(r)) 13 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 14 | cb_res = callback(response, **cb_kwargs) or () 15 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 16 | _ = _.extract()[0] 17 | IndexError: list index out of range 18 | 2016-04-29 17:27:09 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589704962.shtml) 19 | Traceback (most recent call last): 20 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 21 | yield next(it) 22 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 23 | for x in result: 24 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 25 | return (_set_referer(r) for r in result or ()) 26 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 27 | return (r for r in result or () if _filter(r)) 28 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 29 | return (r for r in result or () if _filter(r)) 30 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 31 | cb_res = callback(response, **cb_kwargs) or () 32 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 33 | _ = _.extract()[0] 34 | IndexError: list index out of range 35 | 2016-04-29 17:27:13 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589706632.shtml) 36 | Traceback (most recent call last): 37 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 38 | yield next(it) 39 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 40 | for x in result: 41 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 42 | return (_set_referer(r) for r in result or ()) 43 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 44 | return (r for r in result or () if _filter(r)) 45 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 46 | return (r for r in result or () if _filter(r)) 47 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 48 | cb_res = callback(response, **cb_kwargs) or () 49 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 50 | _ = _.extract()[0] 51 | IndexError: list index out of range 52 | 2016-04-29 17:27:13 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589706632.shtml) 53 | Traceback (most recent call last): 54 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 55 | yield next(it) 56 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 57 | for x in result: 58 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 59 | return (_set_referer(r) for r in result or ()) 60 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 61 | return (r for r in result or () if _filter(r)) 62 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 63 | return (r for r in result or () if _filter(r)) 64 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 65 | cb_res = callback(response, **cb_kwargs) or () 66 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 67 | _ = _.extract()[0] 68 | IndexError: list index out of range 69 | 2016-04-29 17:27:13 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160428/c589706632.shtml) 70 | Traceback (most recent call last): 71 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 72 | yield next(it) 73 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 74 | for x in result: 75 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 76 | return (_set_referer(r) for r in result or ()) 77 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 78 | return (r for r in result or () if _filter(r)) 79 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 80 | return (r for r in result or () if _filter(r)) 81 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 82 | cb_res = callback(response, **cb_kwargs) or () 83 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 84 | _ = _.extract()[0] 85 | IndexError: list index out of range 86 | 2016-04-29 17:27:14 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160429/c589802622.shtml) 87 | Traceback (most recent call last): 88 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 89 | yield next(it) 90 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 91 | for x in result: 92 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 93 | return (_set_referer(r) for r in result or ()) 94 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 95 | return (r for r in result or () if _filter(r)) 96 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 97 | return (r for r in result or () if _filter(r)) 98 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 99 | cb_res = callback(response, **cb_kwargs) or () 100 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 101 | _ = _.extract()[0] 102 | IndexError: list index out of range 103 | 2016-04-29 17:27:21 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160427/c589681503.shtml) 104 | Traceback (most recent call last): 105 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 106 | yield next(it) 107 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 108 | for x in result: 109 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 110 | return (_set_referer(r) for r in result or ()) 111 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 112 | return (r for r in result or () if _filter(r)) 113 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 114 | return (r for r in result or () if _filter(r)) 115 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 116 | cb_res = callback(response, **cb_kwargs) or () 117 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 118 | _ = _.extract()[0] 119 | IndexError: list index out of range 120 | 2016-04-29 17:27:48 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160317/c588573874.shtml) 121 | Traceback (most recent call last): 122 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 123 | yield next(it) 124 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 125 | for x in result: 126 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 127 | return (_set_referer(r) for r in result or ()) 128 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 129 | return (r for r in result or () if _filter(r)) 130 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 131 | return (r for r in result or () if _filter(r)) 132 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 133 | cb_res = callback(response, **cb_kwargs) or () 134 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 135 | _ = _.extract()[0] 136 | IndexError: list index out of range 137 | 2016-04-29 17:27:58 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151229/c586749894.shtml) 138 | Traceback (most recent call last): 139 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 140 | yield next(it) 141 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 142 | for x in result: 143 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 144 | return (_set_referer(r) for r in result or ()) 145 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 146 | return (r for r in result or () if _filter(r)) 147 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 148 | return (r for r in result or () if _filter(r)) 149 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 150 | cb_res = callback(response, **cb_kwargs) or () 151 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 152 | _ = _.extract()[0] 153 | IndexError: list index out of range 154 | 2016-04-29 17:27:58 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160323/c588693373.shtml) 155 | Traceback (most recent call last): 156 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 157 | yield next(it) 158 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 159 | for x in result: 160 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 161 | return (_set_referer(r) for r in result or ()) 162 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 163 | return (r for r in result or () if _filter(r)) 164 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 165 | return (r for r in result or () if _filter(r)) 166 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 167 | cb_res = callback(response, **cb_kwargs) or () 168 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 169 | _ = _.extract()[0] 170 | IndexError: list index out of range 171 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 172 | Traceback (most recent call last): 173 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 174 | yield next(it) 175 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 176 | for x in result: 177 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 178 | return (_set_referer(r) for r in result or ()) 179 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 180 | return (r for r in result or () if _filter(r)) 181 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 182 | return (r for r in result or () if _filter(r)) 183 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 184 | cb_res = callback(response, **cb_kwargs) or () 185 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 186 | _ = _.extract()[0] 187 | IndexError: list index out of range 188 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 189 | Traceback (most recent call last): 190 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 191 | yield next(it) 192 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 193 | for x in result: 194 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 195 | return (_set_referer(r) for r in result or ()) 196 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 197 | return (r for r in result or () if _filter(r)) 198 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 199 | return (r for r in result or () if _filter(r)) 200 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 201 | cb_res = callback(response, **cb_kwargs) or () 202 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 203 | _ = _.extract()[0] 204 | IndexError: list index out of range 205 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 206 | Traceback (most recent call last): 207 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 208 | yield next(it) 209 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 210 | for x in result: 211 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 212 | return (_set_referer(r) for r in result or ()) 213 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 214 | return (r for r in result or () if _filter(r)) 215 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 216 | return (r for r in result or () if _filter(r)) 217 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 218 | cb_res = callback(response, **cb_kwargs) or () 219 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 220 | _ = _.extract()[0] 221 | IndexError: list index out of range 222 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 223 | Traceback (most recent call last): 224 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 225 | yield next(it) 226 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 227 | for x in result: 228 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 229 | return (_set_referer(r) for r in result or ()) 230 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 231 | return (r for r in result or () if _filter(r)) 232 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 233 | return (r for r in result or () if _filter(r)) 234 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 235 | cb_res = callback(response, **cb_kwargs) or () 236 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 237 | _ = _.extract()[0] 238 | IndexError: list index out of range 239 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 240 | Traceback (most recent call last): 241 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 242 | yield next(it) 243 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 244 | for x in result: 245 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 246 | return (_set_referer(r) for r in result or ()) 247 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 248 | return (r for r in result or () if _filter(r)) 249 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 250 | return (r for r in result or () if _filter(r)) 251 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 252 | cb_res = callback(response, **cb_kwargs) or () 253 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 254 | _ = _.extract()[0] 255 | IndexError: list index out of range 256 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 257 | Traceback (most recent call last): 258 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 259 | yield next(it) 260 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 261 | for x in result: 262 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 263 | return (_set_referer(r) for r in result or ()) 264 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 265 | return (r for r in result or () if _filter(r)) 266 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 267 | return (r for r in result or () if _filter(r)) 268 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 269 | cb_res = callback(response, **cb_kwargs) or () 270 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 271 | _ = _.extract()[0] 272 | IndexError: list index out of range 273 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 274 | Traceback (most recent call last): 275 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 276 | yield next(it) 277 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 278 | for x in result: 279 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 280 | return (_set_referer(r) for r in result or ()) 281 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 282 | return (r for r in result or () if _filter(r)) 283 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 284 | return (r for r in result or () if _filter(r)) 285 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 286 | cb_res = callback(response, **cb_kwargs) or () 287 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 288 | _ = _.extract()[0] 289 | IndexError: list index out of range 290 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 291 | Traceback (most recent call last): 292 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 293 | yield next(it) 294 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 295 | for x in result: 296 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 297 | return (_set_referer(r) for r in result or ()) 298 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 299 | return (r for r in result or () if _filter(r)) 300 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 301 | return (r for r in result or () if _filter(r)) 302 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 303 | cb_res = callback(response, **cb_kwargs) or () 304 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 305 | _ = _.extract()[0] 306 | IndexError: list index out of range 307 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 308 | Traceback (most recent call last): 309 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 310 | yield next(it) 311 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 312 | for x in result: 313 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 314 | return (_set_referer(r) for r in result or ()) 315 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 316 | return (r for r in result or () if _filter(r)) 317 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 318 | return (r for r in result or () if _filter(r)) 319 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 320 | cb_res = callback(response, **cb_kwargs) or () 321 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 322 | _ = _.extract()[0] 323 | IndexError: list index out of range 324 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 325 | Traceback (most recent call last): 326 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 327 | yield next(it) 328 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 329 | for x in result: 330 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 331 | return (_set_referer(r) for r in result or ()) 332 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 333 | return (r for r in result or () if _filter(r)) 334 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 335 | return (r for r in result or () if _filter(r)) 336 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 337 | cb_res = callback(response, **cb_kwargs) or () 338 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 339 | _ = _.extract()[0] 340 | IndexError: list index out of range 341 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 342 | Traceback (most recent call last): 343 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 344 | yield next(it) 345 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 346 | for x in result: 347 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 348 | return (_set_referer(r) for r in result or ()) 349 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 350 | return (r for r in result or () if _filter(r)) 351 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 352 | return (r for r in result or () if _filter(r)) 353 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 354 | cb_res = callback(response, **cb_kwargs) or () 355 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 356 | _ = _.extract()[0] 357 | IndexError: list index out of range 358 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 359 | Traceback (most recent call last): 360 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 361 | yield next(it) 362 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 363 | for x in result: 364 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 365 | return (_set_referer(r) for r in result or ()) 366 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 367 | return (r for r in result or () if _filter(r)) 368 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 369 | return (r for r in result or () if _filter(r)) 370 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 371 | cb_res = callback(response, **cb_kwargs) or () 372 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 373 | _ = _.extract()[0] 374 | IndexError: list index out of range 375 | 2016-04-29 17:28:02 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160413/c589202183.shtml) 376 | Traceback (most recent call last): 377 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 378 | yield next(it) 379 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 380 | for x in result: 381 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 382 | return (_set_referer(r) for r in result or ()) 383 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 384 | return (r for r in result or () if _filter(r)) 385 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 386 | return (r for r in result or () if _filter(r)) 387 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 388 | cb_res = callback(response, **cb_kwargs) or () 389 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 390 | _ = _.extract()[0] 391 | IndexError: list index out of range 392 | 2016-04-29 17:28:25 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160330/c588893392.shtml) 393 | Traceback (most recent call last): 394 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 395 | yield next(it) 396 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 397 | for x in result: 398 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 399 | return (_set_referer(r) for r in result or ()) 400 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 401 | return (r for r in result or () if _filter(r)) 402 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 403 | return (r for r in result or () if _filter(r)) 404 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 405 | cb_res = callback(response, **cb_kwargs) or () 406 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 407 | _ = _.extract()[0] 408 | IndexError: list index out of range 409 | 2016-04-29 17:28:33 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160308/c588398925.shtml) 410 | Traceback (most recent call last): 411 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 412 | yield next(it) 413 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 414 | for x in result: 415 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 416 | return (_set_referer(r) for r in result or ()) 417 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 418 | return (r for r in result or () if _filter(r)) 419 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 420 | return (r for r in result or () if _filter(r)) 421 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 422 | cb_res = callback(response, **cb_kwargs) or () 423 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 424 | _ = _.extract()[0] 425 | IndexError: list index out of range 426 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 427 | Traceback (most recent call last): 428 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 429 | yield next(it) 430 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 431 | for x in result: 432 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 433 | return (_set_referer(r) for r in result or ()) 434 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 435 | return (r for r in result or () if _filter(r)) 436 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 437 | return (r for r in result or () if _filter(r)) 438 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 439 | cb_res = callback(response, **cb_kwargs) or () 440 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 441 | _ = _.extract()[0] 442 | IndexError: list index out of range 443 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 444 | Traceback (most recent call last): 445 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 446 | yield next(it) 447 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 448 | for x in result: 449 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 450 | return (_set_referer(r) for r in result or ()) 451 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 452 | return (r for r in result or () if _filter(r)) 453 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 454 | return (r for r in result or () if _filter(r)) 455 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 456 | cb_res = callback(response, **cb_kwargs) or () 457 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 458 | _ = _.extract()[0] 459 | IndexError: list index out of range 460 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 461 | Traceback (most recent call last): 462 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 463 | yield next(it) 464 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 465 | for x in result: 466 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 467 | return (_set_referer(r) for r in result or ()) 468 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 469 | return (r for r in result or () if _filter(r)) 470 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 471 | return (r for r in result or () if _filter(r)) 472 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 473 | cb_res = callback(response, **cb_kwargs) or () 474 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 475 | _ = _.extract()[0] 476 | IndexError: list index out of range 477 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 478 | Traceback (most recent call last): 479 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 480 | yield next(it) 481 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 482 | for x in result: 483 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 484 | return (_set_referer(r) for r in result or ()) 485 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 486 | return (r for r in result or () if _filter(r)) 487 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 488 | return (r for r in result or () if _filter(r)) 489 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 490 | cb_res = callback(response, **cb_kwargs) or () 491 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 492 | _ = _.extract()[0] 493 | IndexError: list index out of range 494 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 495 | Traceback (most recent call last): 496 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 497 | yield next(it) 498 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 499 | for x in result: 500 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 501 | return (_set_referer(r) for r in result or ()) 502 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 503 | return (r for r in result or () if _filter(r)) 504 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 505 | return (r for r in result or () if _filter(r)) 506 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 507 | cb_res = callback(response, **cb_kwargs) or () 508 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 509 | _ = _.extract()[0] 510 | IndexError: list index out of range 511 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 512 | Traceback (most recent call last): 513 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 514 | yield next(it) 515 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 516 | for x in result: 517 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 518 | return (_set_referer(r) for r in result or ()) 519 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 520 | return (r for r in result or () if _filter(r)) 521 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 522 | return (r for r in result or () if _filter(r)) 523 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 524 | cb_res = callback(response, **cb_kwargs) or () 525 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 526 | _ = _.extract()[0] 527 | IndexError: list index out of range 528 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 529 | Traceback (most recent call last): 530 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 531 | yield next(it) 532 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 533 | for x in result: 534 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 535 | return (_set_referer(r) for r in result or ()) 536 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 537 | return (r for r in result or () if _filter(r)) 538 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 539 | return (r for r in result or () if _filter(r)) 540 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 541 | cb_res = callback(response, **cb_kwargs) or () 542 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 543 | _ = _.extract()[0] 544 | IndexError: list index out of range 545 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 546 | Traceback (most recent call last): 547 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 548 | yield next(it) 549 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 550 | for x in result: 551 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 552 | return (_set_referer(r) for r in result or ()) 553 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 554 | return (r for r in result or () if _filter(r)) 555 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 556 | return (r for r in result or () if _filter(r)) 557 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 558 | cb_res = callback(response, **cb_kwargs) or () 559 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 560 | _ = _.extract()[0] 561 | IndexError: list index out of range 562 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 563 | Traceback (most recent call last): 564 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 565 | yield next(it) 566 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 567 | for x in result: 568 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 569 | return (_set_referer(r) for r in result or ()) 570 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 571 | return (r for r in result or () if _filter(r)) 572 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 573 | return (r for r in result or () if _filter(r)) 574 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 575 | cb_res = callback(response, **cb_kwargs) or () 576 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 577 | _ = _.extract()[0] 578 | IndexError: list index out of range 579 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 580 | Traceback (most recent call last): 581 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 582 | yield next(it) 583 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 584 | for x in result: 585 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 586 | return (_set_referer(r) for r in result or ()) 587 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 588 | return (r for r in result or () if _filter(r)) 589 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 590 | return (r for r in result or () if _filter(r)) 591 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 592 | cb_res = callback(response, **cb_kwargs) or () 593 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 594 | _ = _.extract()[0] 595 | IndexError: list index out of range 596 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 597 | Traceback (most recent call last): 598 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 599 | yield next(it) 600 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 601 | for x in result: 602 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 603 | return (_set_referer(r) for r in result or ()) 604 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 605 | return (r for r in result or () if _filter(r)) 606 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 607 | return (r for r in result or () if _filter(r)) 608 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 609 | cb_res = callback(response, **cb_kwargs) or () 610 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 611 | _ = _.extract()[0] 612 | IndexError: list index out of range 613 | 2016-04-29 17:28:35 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20151218/c586554016.shtml) 614 | Traceback (most recent call last): 615 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 616 | yield next(it) 617 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 618 | for x in result: 619 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 620 | return (_set_referer(r) for r in result or ()) 621 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 622 | return (r for r in result or () if _filter(r)) 623 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 624 | return (r for r in result or () if _filter(r)) 625 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 626 | cb_res = callback(response, **cb_kwargs) or () 627 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 628 | _ = _.extract()[0] 629 | IndexError: list index out of range 630 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 631 | Traceback (most recent call last): 632 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 633 | yield next(it) 634 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 635 | for x in result: 636 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 637 | return (_set_referer(r) for r in result or ()) 638 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 639 | return (r for r in result or () if _filter(r)) 640 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 641 | return (r for r in result or () if _filter(r)) 642 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 643 | cb_res = callback(response, **cb_kwargs) or () 644 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 645 | _ = _.extract()[0] 646 | IndexError: list index out of range 647 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 648 | Traceback (most recent call last): 649 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 650 | yield next(it) 651 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 652 | for x in result: 653 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 654 | return (_set_referer(r) for r in result or ()) 655 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 656 | return (r for r in result or () if _filter(r)) 657 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 658 | return (r for r in result or () if _filter(r)) 659 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 660 | cb_res = callback(response, **cb_kwargs) or () 661 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 662 | _ = _.extract()[0] 663 | IndexError: list index out of range 664 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 665 | Traceback (most recent call last): 666 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 667 | yield next(it) 668 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 669 | for x in result: 670 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 671 | return (_set_referer(r) for r in result or ()) 672 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 673 | return (r for r in result or () if _filter(r)) 674 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 675 | return (r for r in result or () if _filter(r)) 676 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 677 | cb_res = callback(response, **cb_kwargs) or () 678 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 679 | _ = _.extract()[0] 680 | IndexError: list index out of range 681 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 682 | Traceback (most recent call last): 683 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 684 | yield next(it) 685 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 686 | for x in result: 687 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 688 | return (_set_referer(r) for r in result or ()) 689 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 690 | return (r for r in result or () if _filter(r)) 691 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 692 | return (r for r in result or () if _filter(r)) 693 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 694 | cb_res = callback(response, **cb_kwargs) or () 695 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 696 | _ = _.extract()[0] 697 | IndexError: list index out of range 698 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 699 | Traceback (most recent call last): 700 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 701 | yield next(it) 702 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 703 | for x in result: 704 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 705 | return (_set_referer(r) for r in result or ()) 706 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 707 | return (r for r in result or () if _filter(r)) 708 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 709 | return (r for r in result or () if _filter(r)) 710 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 711 | cb_res = callback(response, **cb_kwargs) or () 712 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 713 | _ = _.extract()[0] 714 | IndexError: list index out of range 715 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 716 | Traceback (most recent call last): 717 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 718 | yield next(it) 719 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 720 | for x in result: 721 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 722 | return (_set_referer(r) for r in result or ()) 723 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 724 | return (r for r in result or () if _filter(r)) 725 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 726 | return (r for r in result or () if _filter(r)) 727 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 728 | cb_res = callback(response, **cb_kwargs) or () 729 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 730 | _ = _.extract()[0] 731 | IndexError: list index out of range 732 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 733 | Traceback (most recent call last): 734 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 735 | yield next(it) 736 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 737 | for x in result: 738 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 739 | return (_set_referer(r) for r in result or ()) 740 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 741 | return (r for r in result or () if _filter(r)) 742 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 743 | return (r for r in result or () if _filter(r)) 744 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 745 | cb_res = callback(response, **cb_kwargs) or () 746 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 747 | _ = _.extract()[0] 748 | IndexError: list index out of range 749 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 750 | Traceback (most recent call last): 751 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 752 | yield next(it) 753 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 754 | for x in result: 755 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 756 | return (_set_referer(r) for r in result or ()) 757 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 758 | return (r for r in result or () if _filter(r)) 759 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 760 | return (r for r in result or () if _filter(r)) 761 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 762 | cb_res = callback(response, **cb_kwargs) or () 763 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 764 | _ = _.extract()[0] 765 | IndexError: list index out of range 766 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 767 | Traceback (most recent call last): 768 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 769 | yield next(it) 770 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 771 | for x in result: 772 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 773 | return (_set_referer(r) for r in result or ()) 774 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 775 | return (r for r in result or () if _filter(r)) 776 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 777 | return (r for r in result or () if _filter(r)) 778 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 779 | cb_res = callback(response, **cb_kwargs) or () 780 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 781 | _ = _.extract()[0] 782 | IndexError: list index out of range 783 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 784 | Traceback (most recent call last): 785 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 786 | yield next(it) 787 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 788 | for x in result: 789 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 790 | return (_set_referer(r) for r in result or ()) 791 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 792 | return (r for r in result or () if _filter(r)) 793 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 794 | return (r for r in result or () if _filter(r)) 795 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 796 | cb_res = callback(response, **cb_kwargs) or () 797 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 798 | _ = _.extract()[0] 799 | IndexError: list index out of range 800 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 801 | Traceback (most recent call last): 802 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 803 | yield next(it) 804 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 805 | for x in result: 806 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 807 | return (_set_referer(r) for r in result or ()) 808 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 809 | return (r for r in result or () if _filter(r)) 810 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 811 | return (r for r in result or () if _filter(r)) 812 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 813 | cb_res = callback(response, **cb_kwargs) or () 814 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 815 | _ = _.extract()[0] 816 | IndexError: list index out of range 817 | 2016-04-29 17:28:57 [scrapy] ERROR: Spider error processing (referer: http://news.10jqka.com.cn/20160219/c587934900.shtml) 818 | Traceback (most recent call last): 819 | File "H:\python27\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback 820 | yield next(it) 821 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 28, in process_spider_output 822 | for x in result: 823 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 22, in 824 | return (_set_referer(r) for r in result or ()) 825 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in 826 | return (r for r in result or () if _filter(r)) 827 | File "H:\python27\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 54, in 828 | return (r for r in result or () if _filter(r)) 829 | File "H:\python27\lib\site-packages\scrapy\spiders\crawl.py", line 67, in _parse_response 830 | cb_res = callback(response, **cb_kwargs) or () 831 | File "H:\Administrator\Documents\Workspace\python\wallstreetcnScrapy\wallstreetcnScrapy\spiders\jqkaCommentarySpider.py", line 52, in parse_commentary 832 | _ = _.extract()[0] 833 | IndexError: list index out of range 834 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = wallstreetcnScrapy.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = wallstreetcnScrapy 12 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/.gitignore: -------------------------------------------------------------------------------- 1 | /__init__.pyc 2 | /items.pyc 3 | /middlewares.pyc 4 | /pipelines.pyc 5 | /settings.pyc 6 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/__init__.py -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/commands.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/commands.pyc -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-1/spider.state: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-1/spider.state -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-2/requests.queue/p0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jianzhichun/wallstreetcnScrapy/e6124d2dc4b26def96cd070a87eb5158d9435e4e/wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-2/requests.queue/p0 -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/crawls/CommentarySpider-2/requests.seen: -------------------------------------------------------------------------------- 1 | 8aebb558bc4b0334eab8ad89792ccbe0a97725ab 2 | added85ef9dd4e673e68715d8904dda064b8f7b7 3 | 731aba78419f9810eae6f5f7546c6094699eb1ae 4 | 8d1c74de525c1c8d2ec96aee420cc6fc84b27d86 5 | cb176cc78e26600726d0d25a3f9fa6a511865e18 6 | 2e0c21d9a3764dacb9ba2a134fca2cb213af7b2a 7 | 90650bd5ae9386880c361f02145672aca82c0b93 8 | 091d395c743fddd07f0026d7bd35b4b1738bf4ee 9 | b400280df54ac4d8239264b584ffa04df7c29045 10 | 88062c9896fb793cc0ec4c7dc93085b41997ed64 11 | 5cbfd4b4d4b14b63b86e405e885b2aa43f0ec13a 12 | ac37cc4cdf9a9133a0e39353b8e9eb560238929c 13 | 6e682298aa5da7ef58e74d2f5c4beb8c95acbac0 14 | 8cddfc89ebec5c4beda10e4d714fb0aae4aaab26 15 | cb061d4fa4207105a45e11d1ca7b75da3ed7deee 16 | 1c9b2705d0a03ef8dbea9b4e869d00e23abdb9b5 17 | 40ae640fdd89ee351fc0ceea9ee7228635cbde52 18 | dbc3a382075adf2827d7cdb12844ac72125a5034 19 | af122d81afcd49e342dee53ea992136310034563 20 | be5416a2f521087a147a6dd6d731fa387d9a19ae 21 | 5cf5ee79c69dbec99122e169a6be84a5a7a1ed3a 22 | 94dcc41140da785b46b57cf531d22aab7eb921f3 23 | ca892a7f17fe2f0f207b09c1d11bf589218f6167 24 | 83dcb5c1b88b67b2bf7956502c2dddc50e4f41e3 25 | 851d288c904cf600e4db17f63cea4f38120b5d2e 26 | 57017c756c4e504b650f201d1a407b136118f6f1 27 | 1c909005ae12d591d9c6722e9ffa17296928a972 28 | 417c7ab1ad59575c52c3cb9721a0a0717e61eb68 29 | e06ec62b002bce90b61aba3545719f23c336c187 30 | e2e732d544fd5de3ec49752cebd8045ea765c996 31 | 273c4159a0536bcd14d1848cdce58baf96027f54 32 | 3b47ff9e7de313c6ddd33163f9f474ac143fd92b 33 | eada7e64ce22556eb9757aa6e9ec06223450c299 34 | a9516bee191db986256f1cc49b6291ca63c19792 35 | 2cd246937a1ed88daae413271a5465d939c4cbe0 36 | 80a84189cd872560645ae5af11851a04a14a1f3a 37 | 0c2b8edfd6df91cafc3eccc8599260d93b268d2a 38 | c3c995c0f501864b470d0da559613279c7462652 39 | bb656708350857a1862df1e795dde7d983d88a1f 40 | a34475ef90ee6f47cad81e9a6c2412a411986aa7 41 | b7e77e55573947c2aca27401995e48dfe1622d5b 42 | edb4fd917b2936f6f389bf3ab2601ed69659000a 43 | 51ec37f9681f1c4ed4be0ea685d85f07fad47e68 44 | d50522c133c4ff181dca39d1f48d710eb94107ed 45 | 7121b83f43d010208d29048ad9c0a0538858bb46 46 | 9de69255a864d2451f9e66ef698318c0d26a8f7e 47 | 1ccc5008b4a042e653f5a05f85b107f471d96387 48 | 9459bcc7f6dadb607c847cc4729903a05c74a0f5 49 | b52775ce8d7d324ada9ee043c614bba296a92de5 50 | 8d2b4dd9fdd0171085591fb72b287eb47b5f85b6 51 | 80c93e56e3a85f0d9eb6d5e3bcd26c9cdd5c07d6 52 | 6996696418a6040b5a1626d69fd6c1868b1c5385 53 | 16f43e6b0f2183f15e911018220980d761e85ba6 54 | 2c0ecabe88dd71c1960b084235a4650d89352387 55 | 5d57efd2aab1cebc63792a46ff393e5e800adb5f 56 | 6271b716c1928eceaa2f7b72bbbbcc83a118d3fb 57 | 981f2f94e204abf20308cea91498cca14697d639 58 | f75ab2c4855128ef22ac76e42e9f91e8f58efa63 59 | ace38f61d056ec33ddcad73a2b4db3b318ab84ef 60 | a7cc447c1137e49fe54240d52245015fd936e7b4 61 | 770f3c48995da6bd6361b17e982d6d6c8c534921 62 | 0b60fb4d2f17d27e8faf7cdc64837b21a4edf8ce 63 | 63f3a4cc1bf3fe5860cacb6e8ca842edafd66ae9 64 | ad77c12868c31affda88dcb35ec5a6580c39713a 65 | d0d9fce1e22d2479ecb370b37b3db74a7fbe7b1e 66 | b410467bb3852ff8b9df418425befe52beb13db7 67 | 364d1e2996ddd522a1bfe652e8f7f2e06db84e3b 68 | f3c176cbc6267428714c8d59b8b07b813d4e56e9 69 | 3bf30cff105f5ad48306f670892063ebd72f0371 70 | 5a4c9ca63a615d6067411038f8331ddf313c5152 71 | f33e6f988ba620b8559bed8187e444c50b99912a 72 | 2a0c94ab37275668667ed8c7d52577924a25a2d7 73 | 405e4b1e023c15ea7583dd7569885d2ca0b0bc1c 74 | 216987c13420b53556677d7dd2388bda7d348ae9 75 | 6e17d5ab874e80532c937d5ea4677d11fcc12701 76 | d548162422a6a37bc7b8db2a897b793f11862eb2 77 | 67a65d07e6969c5a2d261649d6a91356e5dee721 78 | c3e3ac0fd8579d0848e359a7b44a18529bcc8ad9 79 | 993a881bd51323d9c7c92b77ecf4fa8b4791b4be 80 | 6f2c7d1ed1cedc9978229acfd44ccb0093669dbb 81 | d62474dc5da163f03f0fca123181a5ce860d20a9 82 | 3f11334ad2c1fdb219e76aa409877a121211fc1f 83 | 47d94662880466a572df3b3fc12146b6ee8c7d2d 84 | 88f157c7b449f3a076b6f002b0d3b435b1efc298 85 | 289fdb8537bbb767a5bd33e57693f81e43ae981f 86 | 5601018aa4a291181c772997510771e85f16fb92 87 | eb903ce6543099874989a50f1dbce25c07ab3dda 88 | 9b430039f2afa7050f81fa2780f3b9a01c92cc82 89 | 7b5e343f4a11843454a3e7a15984027fbc32be78 90 | 7064527f1ac61e00e562d4c4126ec3e4f177c6a3 91 | befdab2b9f5f5751f2aa32bdffc174501b9012b3 92 | 741d57bb513a669e18d67bf7881f9e6c0bf12e31 93 | 9100cb7044a55d0c82d44f03c2acaf8c42d0afcd 94 | f3fe07fe6867880e3f43be568e7b059bb1a3bf75 95 | ffbf6761185ef4533f0a90f2176b5730329eaec6 96 | d4d332466a8ecc57dc1d9e673b260fe6d8e8611e 97 | f721f684621a089570f8648215a4921f648b65dc 98 | be96bd9ae8ef76e10adb2f8c5e82d001c5448535 99 | 761ae2519b6567cfd88989e573eff23780a2b5bc 100 | 5ce9315486033abe4e700e3c53b1613c4e875f79 101 | 6a0e9f87dbb0b5f6539883e183017e3bb848a510 102 | d6e5d51afa980caa7e7c5ec74c503216d874ee27 103 | 7e933b7e65902636486612b61728745dfdd4f5be 104 | 77d2fe9290a9de4a592a793125f3465a1d2c8d9d 105 | 7ccb3fbe40f908dd457cce248599011649d0744a 106 | b169d85b4e88199e602e919acccfaac3d8546c94 107 | edce20e76413054c42df17b96e4ec35160203c40 108 | 75c8d36cc13308eb9be90a97407369551e365469 109 | b7a3fbcd61bc47127986a43c50f637d903a524f0 110 | e6b4afdf8566b29f4f599b8cc3e6e5f7fa50c0fc 111 | af0c553ca7243192ef890f4590538871c5f87770 112 | 00bf089068a64d7865f07e4d628f89eaca7ca367 113 | cc39a7deba9f373f11a44605e47fb807f7169e51 114 | 2db10f629c3fabc9df104c24d62f3b7836b83cc0 115 | b431c3e1c243461c962d40d81315f382d7bc2034 116 | c3ec3b1ed02a5dc90072725acd77366ad5684d4e 117 | ef66b887d778206f3c3a0c5988dc186e2c9ef7d4 118 | e23a8df6d469fadb279e07c9710b35c09b805746 119 | 0eca410e2befddfb0b849a66843a1b282ec919b0 120 | e7982fd8a49a98f4a738649e9e2ed0b06706e08d 121 | 88b04cec0c56f8d7eee9bd5185e0011e5e253b5f 122 | fee0a1bbfdf33c5bb3fe54eecaa2fd7ae36740c2 123 | afa6048155b2f7a08853b7b74d081f5b32393380 124 | 25f24b081dc3f8c9ec6bad5ffec7f5657b7ee587 125 | db72dd5769c613c7753b9cb7a37452c1d8710e09 126 | 40ac28b030036e23bd2ea279db64c224c3bcc08e 127 | 037224950fb57390954bd09f83fda5616fb9fcbe 128 | 0f6b30ca0d326649463f9ce06187ecb67057ad01 129 | d4f85df8c6c2a741e605688f62825400e49c987b 130 | 691838ffbd7e606faad24ebcdb73c7d1c27f488b 131 | bb20f211f364412b8c7365656f4cbae8bff9347e 132 | f951a6a7362c6cb627e1ba702d076ae5f1088669 133 | 67f68446d452d38f1c5c07275c8832e2f694792d 134 | 2fbcb821ed3efe2e07458f91167973912d5ec16c 135 | f9a21da8d4035cc0c17a6818020d32c2380769da 136 | 5a2b664dfdb2492d20eee870857a8bbe07dd349f 137 | 901c1144f788189dd45e93be4145c73f9058f3a6 138 | 64828da864c79f4bbc9c4c30669076a44053cd37 139 | 17cf2fe8ee9587fa18e4f47abed13563d6e5b561 140 | 9e3af1dd861b9fe5cf047bf7a826a271184667ce 141 | 513c8446fcf4e15cca4c8a1f667423cb0931c780 142 | 0f23ce1dcbee08bb26d9e76c858e3169c12b8074 143 | 4f914f10d7d3060bc707533c41b3684cb135cc2d 144 | 0c8e4feb0e41255a200da404f9851f3c4d593a20 145 | 8948002471ce79e6d0278aba8d48e4ead5b483f6 146 | 6c2b0f8226a1ce712ba1ed1a02fbbac724d45c9b 147 | 7eff25ca7b71b75717bababd0bd85837cd62908a 148 | 1dcfcbdb5763aa0b27953df2cfc774c03e166765 149 | 09778ddf8927aadd2eb751cb21ef302d11d2817b 150 | df934e8febda106658d4ab6a381e02a14c3b090c 151 | 877c1f4acf13452dd84125d72e77518724da0c9b 152 | 6c1f29817725711506d980c9602b3bc73180006e 153 | ecf8b8f9a08aab683cbdafd3bbbe6ccab67d1c2e 154 | 2bce08d27c4fc4af759a2bf91b75f12d3b627588 155 | e2885d106445e29d2d7573d04453f1bd86d9fbe1 156 | d6e70a475505c4445f2ab2b0df0d90619c75dd5d 157 | ff48264df39079fb9814923dc656f9f5cfe47a35 158 | 5c9b9f50c80e79b6c3c08bea7e6bc3556d98d30c 159 | bc4051a5dd62d52039cae458548bdaaec70f0300 160 | fc0207271c9b1d559a3416fd8f35bbfb227e187f 161 | dae577c51a5ddcfc5e1836667247f9d0133195ba 162 | 4d0f07118c697ac7c7c2ff2a4f93471b3e843f31 163 | 2970c9e29a78f796ef0a875e31149f5d45ee90f7 164 | e82541f847c1b5283f55937c5f420a056a3a7377 165 | 2521c367eb5739446c6e1a69a96738771eb777eb 166 | bf1cf6079d7b2d879288aeecad40582e8e36fa4c 167 | 727c0a64d15039ddc820e7e8aafb4a5b5b99f5ce 168 | 3d2899372ce6092fa3b484bc71eeebb8b8dd9e2a 169 | bddbb614b866ff1d375b0bb2d167ec07ec5066db 170 | 9478150f9f515fe30bf4c7397e60c97a0d6d1743 171 | 7c8e71ffd5284991478c70ea62937442d5b6d8ea 172 | 80f3f1d08903ffce68cf7ecac9d6ccec9de3bfe4 173 | 3e900608bef9e299a2946c28e8006aa1a52527de 174 | e52a2d1b0fb39d125da970372a7883517d306c9a 175 | 77e1dfa9660a91be1b151479c6d0004307f9e518 176 | 106b55b52ee8b18b1f33d278a475876da61f6851 177 | dd7df5315ebcd2b3ca343ef2359ef7398a854d71 178 | 7bcdd92593b2500194fa164f5066d5ed6de07bf9 179 | 68ee60013085a7730cad0dc1755d8646cab6985d 180 | 7be8e6b3b477988bead4de545b41ae07f96ff482 181 | f03947c0bdbe68019f441fcb63a12aac8f3c8e24 182 | 5adfc7622a2d97248fdbc3f42d85da39d2b88052 183 | 5650660db430e8580346cfd43519f79c368ab597 184 | b6682c520898c7217d34374bd34634a89b79d6e7 185 | c5b4bc387bbc8ed0f247a41a7f8851347d8cd013 186 | 046b8febb81e076faf3c63a1778852d24d9a667a 187 | 256eb65918fcfb738f010293868fce22694d7968 188 | be699bdfefb9197b4fe083a69138ae9e67566610 189 | 97ad074b9114625620a671f65a10a1f2cb26cb56 190 | 458822cfa8a425cf6e27e20f6cd1a73d2a890aa3 191 | e76dbd2cf97834c338d1b7e7e4ec878b697233ad 192 | 8f4d0a37acb0f8838ee8aa5cdbd129607aff8a47 193 | e28665f3a45c63da8b63a226364ede5db2233cc3 194 | c962852c5f3f8df8e049fdbd30a2c53c13ce1e28 195 | 19b7191ca9b5b42977a62e127a1bc8d490b83552 196 | 869344fbeb64ac41aefd8be0628de987e1ee141d 197 | e56fbe1917d06161eba1e7e004565fe3de606f9e 198 | 8b5a30051f4157eb43145ded0dab5f33cab9e5ba 199 | 0d2c776b9bda4ae8d8e4af9590eb87739956c6a8 200 | 34da778896aca39b923d0405cd9372c88aec75d0 201 | cf23f2c9f74cb416f0af984de668e3669e9696ca 202 | f8b5d6154d7530f205486573918a3dae1c0c9be7 203 | a79aadd407f2cd9bdc89a4afb3fac7aa40e65716 204 | df960e082c94609670913431c3d4c2c9cb437009 205 | 7e97e0dc3760e51246f315b8e143ebe33a15ef35 206 | decc8ceba1a3737fd60467ec4ce9dab4ab2fb104 207 | ff3c80f5df0f1b8ddac728875ee5c45f7c62f076 208 | a69c61e827e3beb3febd7e2d742dde87f4e76184 209 | 700fbced740c6fcfa7e1ffeeb747ad1e2ad75b27 210 | aab13ce295b0be93d808139a12c1664be70c41da 211 | e71735db0732b84840e0f8bdf89d790d37cbd2db 212 | b38e934550e069fcc5a36c8f7c87d58b0538f6e4 213 | ec51a6a9bd0574f434dfb69f185be8cd1bcd3319 214 | 68077ed84cb0115fd4b0754239c3080a9867e628 215 | d63b378b4bc2fcaf4c2eb4966587ea8142196dd4 216 | 21853e35df8d600c4fecc0b4973dff11397dc7c2 217 | 27182af3ca5747460aa95f231ab33a67093124de 218 | 1dfc4e9f7e0712115d3867085e279824c4d276a0 219 | a079f5848f3a36b5ba27bbe38eb5cf338efb66bc 220 | 16605ebe9be47846f9cad54f91ca6dfe4908e853 221 | 3283651219d1afc91ba0769fc1db0ffd06742f8a 222 | 14814cc244e0b19256c10a804e12b58de60d108c 223 | 2b16b50d551d2f5c108a07fc71c7d253a4858b1b 224 | 1616b7246494a777ee32ab097db9f58938268c27 225 | 6ea65ceec712473a4f356e5312553f21e0e62219 226 | b507ed35dc4779d4f1f474103e1b15b01006b06c 227 | 27ae04d11f5179c0e1a3908590b63dbe05fcc0ae 228 | eae8c9d441d8af80b9b303875dc84c8c33ab7ea9 229 | c37d06aa7bafa30a3a2371c780adb828e7342a26 230 | 05a7b39bdf7fe58223d95335b3d94e3520e37268 231 | 44a7da99ab89512fe23dc96d4bf6233549f04900 232 | 5517435add61a9bd8098af5d192b5bb0dd8c43e6 233 | 7d484c0754a8389436cf63b18b0eb000a582b95c 234 | 19459cee34c7349242cd4308eb71678c56ec2d9e 235 | b4f7bc2e6249844a4b9220be02535c42f733b6c2 236 | 4543efb5f2b33c78811d2de51220843a0b150c6d 237 | 0d832adda6ef5b5e3aa49049a67485b0511a11f4 238 | 1759868d04546a83cee501a3613184b27940666b 239 | 0201a6176da93ce2469b356f488147fcf09a533f 240 | 800c2a0152345bb9a30b6a7616c0e22883b0dd8c 241 | a7f3b2d593b9f6d635d6b464f76e1b416d17d834 242 | db36320ad6cb76837016e1a2c1efaa31c47f8137 243 | 7ef4b60a20e4244d616f5a8b53544bbe9cbfef9b 244 | 98d4262fac9873fc4c1586488ad1a8c7b93c8df1 245 | 7381505235e920e549c09afe8239cecb56f343ec 246 | c98f8da85e23d7d5ecbbbe60f87f089c3baf20ec 247 | e247aa7ce186c93e8064901d55feea88c733c65b 248 | 76db8900ed3c1f7daaff79b4d889f942f621a7ca 249 | f2eaec825ddd9d622d9de32cf09adde52f3aaa36 250 | 67be55c711c4efa7584e46f682787cb28ae68a6c 251 | 04dc027c5f12ea87a07f2eae9e41b35a04ba2d04 252 | 03e075910193e4501da6f4bbdf2906ad3a515223 253 | a4577682a4b8157350c2520bc9501d9c7fa6ce2a 254 | 1e45f6e848ea16afdadde88116aff40207647ebc 255 | 4ba17602f6c135456298e6a1ee0690f6b67a6e96 256 | eefc648101f429dcdd910ee4c5d70933116fbefc 257 | 6b28aaa593239a9605c21d4d8ff9f0ff17a1c329 258 | 67640f9f6b782e094c12d5e004bda4e741b226c7 259 | c824f5c55cb83946e215d875ddf00ac2c9aae61f 260 | 5d3f732badff84379cd11453ffd5f40df8ef9ffb 261 | b41a3f3647661bc47995c640e7cfbf0cd892689d 262 | 9fa7edbac6ad9bc88e3b5af676209a7a5b7a2054 263 | 56b4ec2a0446d10b12da39f13c6d9911747b4347 264 | 5acc93253f78aea7b894419f246d735459062e9e 265 | bb3883bc92d8fc6693c18c422c622d6c32e0494c 266 | 4cda6a7e35444bb23fa03ae2765d53f69ee66c17 267 | 7bc95627fed6f1e8b3c2543d2a91199d62caea91 268 | 14ffbc0813eeb43bcbad29484a3d2bb78ce83531 269 | 8056b3d1c17a3c041ff2c14b4cefd4490338d536 270 | 2a7f1868c618fb8c5f45b10c5d8ab12e4ddd561b 271 | 27fe4d4bd32dac154ac16a4977d96bb6b5419aa6 272 | 6a4d177567d7d05b16d34e92fb12436daf9b2033 273 | 54ca65af580934fcaedec57f93f2e58070cd0c81 274 | df6c6475a7b365a92c4da20fe94d7c08a02dfb5f 275 | b1234f5d87511d609ae291b5bba097463aaad8a0 276 | fd45c5cb2b5f4accd3380d3b3cc32d19ab0b621a 277 | 8c121af61b55c13b45bd52b15c27cb3701de0422 278 | 2513bbcd056ee311fac724fc9fd904d5061af839 279 | 8b0d30187e040931334f064072812572b8308feb 280 | 9f4e3078c621fc9e7f0d2fb926d2505cbd771551 281 | 74f2f5738369104c3facc74b96385fc0d2cd3841 282 | 721def5538b4d80947c76e6c9ecb56c92b0c2522 283 | c3c16be5ecf22e643e48d7fc7fa62696134e469b 284 | 648787f881c50cb1e027126885356385b7b4616c 285 | 5257187d6421c4b870e437f84ee852bb16dc2dca 286 | 6cec40bcaec952ad5f443c1fafc9e202e6d9b336 287 | 2e5691a7e83d016944265bff083c65e4e729779c 288 | 6735a4a3d25633cbc9de9b27275a42c79bafc057 289 | 72f0e701b75b51ad17a55c7545fafd99543954d0 290 | aa45b150e1c2be81725b061ead3ab014af720a9b 291 | e7d37261dab67bd3f9c64e14a18e73409a7c7ecc 292 | 7b26e1e185f194d4d13b2aaaf1fc87472d319708 293 | 63d5769a8b22fe908e1693abb7c01eb8e5824072 294 | 8e3777f48abf9642dc04824ad9b9b8e8f096d321 295 | a071bca269b3e2be9e700ca1f2b2a3609d1f3d50 296 | 47bc4571689de764a2f95f970c14e0e066a44164 297 | 7bfa2d000a65694c5cb7b2cacdfed75348bf7d1d 298 | d2ebec3d3ba1207b01764028a21f269a4b6bec8f 299 | 7dafd8934628ac1d1ae2ac3a9a2c12b527bbaa4a 300 | a9a2631569682431ed1e8341e489462a0832f1b6 301 | 2ea78022fed6297afd47a8375ce4cd06e29c9151 302 | 120e8e48a3ab4bd63eca5f5fafd1900ade869efa 303 | e368064fe4dc14fcee7b8f664b8f6a51f9fb0899 304 | b1e6009af726806bbe93918664d60bcd5fecddad 305 | d7e0f968f1749149a3dabce696672bc06acee32e 306 | 95f24b871550b823542cae4dcd8c6d490ec90721 307 | 2490c8557dfeccdb9fa7d79214a3d77e2fb5f4bb 308 | 1f70691bf049e62beab03072e36368f92f0b2a36 309 | 0b57050b3c864a25d1fc0f17aab76458a8ca3314 310 | 834d24d3be63b3c0bda560a62e3c00478f4ec3a5 311 | 1ea8626174d82d992b006929008e23edd248f3c4 312 | d69cdcb41d6d6d9afe443fea566d3f9f493931b1 313 | 332eb3a291913db3eeed42d2cfb8050cbbcc5f76 314 | 63a40e1f31cfede575514277100fe35d3f052b40 315 | 812a13047f1e54f4f08a1273d9ab6486e7b7c699 316 | 17d7e3cbabbfa59ba99922a15eed9d2062b43aa5 317 | d0ae9ba925cc6bfb8365986f7470def5a135a7a6 318 | 47ebcdb42eb03eb9680932dbd675c7d763d36fa5 319 | afa96bec4a7542ac94ebb9efb071c3d5b997b612 320 | c44c98f05b152fe9862df5a33eba4f6f7b248a27 321 | f7ffeed8cd7e3416b733b773fe56dd88d1bf8794 322 | 808e8a2de4572ad05844fabdb9684308ae5cad00 323 | a15ced6dcc4b1bf90f805a3bbeff88f8f6516a5f 324 | 2542c1ad5cc5bea42d4c4abb432967e1e08e5398 325 | 87ca3d6bdf40aa7cb294b0c8a2e20dd552a5305f 326 | d51008f050eb278400bb79ecded2ef51aa64b0cb 327 | 305643172f39641606676716a11d2b1fadfff04e 328 | bac501443e55f27ce90c7d5f66c235c79ffe00de 329 | 7f19d97b24a9b9c57de38634da00fab3abf5a733 330 | d9aa7a13dbe24aa765637fca1a6b1ce3a6a6882f 331 | 52012758bd20c5e436e69720d2004e5cac6cbe71 332 | 27e7419fbb591aceecab76d4a6a5f666ee9a759f 333 | b745b72abdc94d31c3005f9e88728fcc589b4d18 334 | 7745852852e95402c9a8ef4ace301b6f02040fae 335 | 0b62d2474ebdbfd3f48be50eea510f9bc0fa1d09 336 | 57c5c16a44965b28f134ad04cac233383fcc0f0b 337 | 86b1456a5ba256604719d8bd2ae959e19441cea8 338 | e7921ccbb1915d48ccfe60d8adcf50c00f082031 339 | d9978a9b225827c359888761d87b06857d99ca11 340 | 5c9f8d2bd2a617ccb2fe1b72bec33ff8123960ff 341 | 25f850a43fb6bab709ce809a20b8b5be1e01560b 342 | af8924387d68fb736a0dd84fa04a4882239c4bd3 343 | db684dcf4fb06aea2412fb1f2814e7bbe9deeb31 344 | aaf1e44c38191739f6e54bc014b443c78fc4bba7 345 | c9a6cf11bde7811ea3abbadb329ea1856eaa6e39 346 | dfe735fd8850880caff7099fde94fe1611745fa4 347 | 4108a36b6760732684530f415ac230a1a94f3c66 348 | cddc06c36f6f55b82946c1e7fb67467b2f603345 349 | 8a437a74ecd59cb4cfc35fe7e0305b49a2f1cb44 350 | 492c5eaebf9a5751a668809733434027e2044473 351 | 712fabe65be6b80433ac3fc75668b1ef8ed451de 352 | c88fe19bbb98ed773f993adcfff3c4531e0503f4 353 | cb53df10ec5128abd5289d81a4bd84b0fa0f2cdf 354 | 6d7171301edb4abbc775796a01dbd90b424f9672 355 | 26713436ec199423c19cc7ef34d8e27b2e27c515 356 | 4d0ffe031d34025f2cb99726a2ba873c6326d33f 357 | 7ae64f6226cd59af7cb6758769f4dfdb103cdac3 358 | 9acd24e2965a61945dfc5b16da270b7b6023af4f 359 | dadc47e61c329814d4f6c4dd837f0d0a7a09fc71 360 | df4ec590397e421933073d040b119a80a8b3e8cb 361 | c074634efe730f956d8123ab7f235a9bb9ae1fff 362 | 3cb919c36e832534a7a36712c9de0b81fc856d6e 363 | fbec0d300e88035e5a763c4a717cc286195c033d 364 | 677334e2c69a26ecc37b3bf1d0cc124171ffc7b4 365 | 7e1e2f77834bc652d670cc5f11de7a4ec42835db 366 | 8eb1dccf4e31f0e4a8ae57a0744934f0c6102e3c 367 | aca7f700ace587fcb4b085776215ab2046026893 368 | f9088fd591c68754b8056256330aa75f47e2bb6b 369 | ca35c99a2a2a94d73d2714de928289045a5dbbd9 370 | ad56aeb660423aa08efa1d029fb8bce174dd1141 371 | 28b57bad27fc049341a91d02f063070f70bc4fc7 372 | 724f802662ed5e06a6d252d65d4f1c7c4cda3009 373 | bf6355368e6147c9ef7b8d59d2ab8a6aa945f334 374 | 55e923386a679b9b89a0620c4862ed301c2fbafe 375 | 34db7cb5b56b9baa7e86598eab91166cacf5f086 376 | 4395d648616fbb70e076bc8e75f263ce2951aafe 377 | 458e53ed4c26ae4ab1256c3a16fe1580487d1151 378 | 746ce634bb6d74c9106ea47662118aa09af04bef 379 | 961b2d2578a89c39ac11f94668e8841bb85bcf56 380 | 46684ad433426aff59f871476956a3abcab16cfe 381 | 93f7430b418a1a6d6b0420981eac737a6421cd87 382 | ab9b31b15e209ee70e0c0a8197985e08a39e4bec 383 | 77559f8e1a177f4df55bc92f9142c5e957109f6d 384 | 79dbb926185ce645f1ea3cfd05ec02367ec123ca 385 | 89d6b3c1ae65c8c433731d98154b66f44f1c02d7 386 | 62a7d26236517ef169e74d76587454efd1c4c2b6 387 | a8d49816b362e3f94c920be54037cffe252acf2e 388 | d9be6b594b6361673454feba6c089c2ad8f6b6c1 389 | 62eef7346ee7b22c955442b8f3a0e25ae248d119 390 | ffd327a01feddef224036510d41647994f0f2654 391 | 95e62c791ce9ffc5986561516e9912309369db2e 392 | 3e24d001950e402bd03609534c4d6bd99ae00d49 393 | 02b3f55a033e6f56343e642b9a4d392998c6150a 394 | 47024106663462b2ef1991014dd8dcdfae2fe856 395 | 03a92584280e0719919e7d1e70c446194882ace3 396 | 6691e8828ef08bddf41de642f73d97ee47c1ae81 397 | ff63ada7fe7dd879802156b30400fb337700140a 398 | 4d3267cb1e55079c9ea7d8809b81e1186369c2af 399 | 0358345f606fb40077b5a34477dce4599e136578 400 | 62e6c5843d01138bd931309212e8befea04c034e 401 | 1083ebebbda09607568fc1f7e1bab303565f6f82 402 | 1756eef2d8a23b1046f8114742d07245cef7b3c3 403 | 07ab9f38293a55b0646dc0db82c54149afc0c4b4 404 | df28aa8fd02f6292d0a706933ccad2dcb92d872d 405 | 134c475c754c3dda740b38bf1549dee96213bbb4 406 | c6e01d8c69505bb4641cddd7cb3c6c9088fd773d 407 | 3c0fdc9aa0177a95e3f0eeda0ad278c6f9428e83 408 | 08f4d911e4af16119da5a260d1ffbf9ad2187fcb 409 | 9ed00ded58be7934a9046d9d9532d06b034f063d 410 | afb0b8b7a169ccf5bd0496a0f80bf06bea47c92c 411 | 62c0e9822322db728c86fce1db64cc7d5b6ac975 412 | 25fa4d7ebb729eb03415d72c01e03f4eb4c25f7d 413 | 0bae35f8e91ed8e08dab747c17ef437fd6015905 414 | fe34900b3149e7d539f7da19a2f26b7550b56528 415 | 182ad9bed4f71023da577f6b5968361eb9beb202 416 | 3bb3039f78b40a28dd838e5a0056e650f819705b 417 | 2a6be549fc69420f62e4f0dc43e3580b45b21838 418 | 5d1f64747a33bba32d1dcc638e8decf9fdb36307 419 | 071fdded9649f452a9c67d5996e525d8106f4747 420 | 009cb5d2500ab2f23ec73fa218861cfb54f293ee 421 | 1e2298037aba3802bff44380de4aaf1ad9a690ee 422 | 9b476fe7156e03d12469c73b614e4e24da8e4da1 423 | f7f490292c3110f1a60265de12c85533cb8be7d5 424 | c866ce4e266a392637f3f4c040f2303bb92f06ca 425 | 4ab41dbe3dcc3a037a1a1f76b335573a32485db7 426 | 0507913198ce5e8b979aeee6930aac74e55ab00a 427 | d8d8b9dabae93d1d52c109868b13895be480a398 428 | 7ca7ef22459a92fbad804d46ad7cc875b366bfa0 429 | 9976f4fe0793978af7672204606df447a5b3fc8c 430 | 7097ce1d52014caa2ae1c6fdd63389f78428d8ef 431 | 1f0365410cf81fd7f213792d826fa276d3696aa7 432 | 2b2d68c364609ef58256c01da7293af2b4f4f1bd 433 | e5626a23d9df8bfb74e91b8966fdf0a81b39cde6 434 | 06a1e4b3cb550f01b4eb04b6451021169e970637 435 | 94cbec065a52da99bf42867e234db33d4851bb77 436 | 03910223b4f55411893c941de07991ba9b9fd58b 437 | 43bb3dca130a18675114e712aa786038c4af3f50 438 | 68cc28ce35171eb68a9a9fefcb74d6209fa0c3f2 439 | cf524f9ee90eb7afd51da430111fa78a26b76e8f 440 | 1f607db8612c1da44063abea5e01e3a4f7398bb3 441 | 5f7cf3212fc815e9fe72fda3143076972f5fdf3c 442 | fd3820b95e92f23c412ed926134b57028ce7c545 443 | 3e760678ac4a58c79b14834b7f41bc151ea0a481 444 | adf988440fb1486238a2ca6cff65b1c7d739a154 445 | c45d5c3b15f7460731ba5198f7a95d3492b15c1e 446 | 8c7f90f4eadf7a9c30dc3c2ff211a94d6d0718d9 447 | 5fcb95a09c011ad16e0e4bb2ba2dc62d6d98d6ba 448 | 0ff1b042a5776f0582b78f283902014c58b7bcad 449 | 5388a98ade82c938de247560cce387dadae774e9 450 | f4eaaa00fc4d043bc733496198ac3019531f009b 451 | de2d7a195a64f9349b1639d9e82ef52c6feb9ca6 452 | 7adc9a869134494ef1edb89d23791304bde4ec78 453 | 68d9d9b00dee03def191a1554018021737c190c0 454 | e96265e7a17dc84662a9793c224667d3db8ee52a 455 | 81fad681155cc8c26adcfe44f40ba7f2fd938396 456 | 4879713e8055bb5bdbad2b6e91619e97eaa122cd 457 | 7e2fea277f1fcf367e5c84cdaa2abe438221f209 458 | e634811a599e016db5abf5ea8a479398c8a4b8e5 459 | 4c005aa660d081503bc76f97b4cabe65edbae9ea 460 | 606fa10cf71fe83bd6b89e106b83429db5c75c9c 461 | 1c07f7d59f0ab61dd951353adce8da496ea0f237 462 | d8832d771d9f070d118d9d1ebeb074bb08da104d 463 | 48f6aea73067f2e612db9ee5029fcf160149e0a3 464 | d378e4b76bc896ccabccb49fd0de75c7d3a6a02c 465 | 593254cb2aa775fcc39a83cdfef9e34383d87532 466 | 61b167472e814f7aaf6b1fa1f2137adb8545a495 467 | 542fe8fb06b0dfd93aace171dbeafa46d9ccba10 468 | 6df16cc8533f9279e56edc17c6433efb946ba952 469 | e50b2c9475ed0f3debb83d274eea68a8f8f658af 470 | 84b109279c8006c9182a214ac0e2580465ef7c74 471 | 59a08230b179f8ba72d1c0428c91880faef55505 472 | 1dff7b038fb84ba30d97334caab6ce31ce4766d0 473 | 707b0bffb521517f55880421eea7cfb46963d4d9 474 | 3ad47749d2e27c203eaf6b38e671254c391fd60e 475 | 4fd520413ef1a64ab0d0253e9c4d4bdd3dd3e8c0 476 | 06e73b6fe42e1d171e75eeeb60c0121638f7cd46 477 | ffdd37a00195493042f80de4c552abd40683237f 478 | 6e4f512d49224d7fc917454fc72ed90e1d48e124 479 | b1bce942f7cda98b99b76af9dd3e472e5b23df33 480 | 2c74571c5a56b2b04b654fa1f58b6caf0d64a4a0 481 | 65aba0d8fcfd12ab7fb198718436a6e2a27c01cd 482 | ad27133ba709519c70c08221596eb33f9edd1a26 483 | 9218ea9d4b50c1b6351e262913e8334e2c283f3d 484 | b0a81337c1813e5ce7f186fed59aba69afa27d99 485 | 6bb3e6f7ed935c18efe06c2a08f02f165c2eee0f 486 | 5f6447e1e78e51fe9b8db614c5bfdaa790065f4c 487 | b10af68624e47656a30170e9d3c3bbe2dc2b4168 488 | 926f29351134c31bce5cf12b603d76f1d7b893b3 489 | 949ce7daaa3f2e2c3bc25f17bba715152ff08aab 490 | 5c08388d378fd623b61c9ed01fdca59e99a88329 491 | a36ae93ec8662cfd0e55ee933ae7ecd8ea7d05a3 492 | 1d1612a0a10513017b26ebd26251bab320575737 493 | b81fcd144b4617c575f32edb4a539e9b3bfff604 494 | 35e8b7aec6599ba885924a01bb8c6cdb59b86cbc 495 | 5cd87f5c11980405ea91c2717484b384f088a03b 496 | d2b8131b2815d01c86ad2c040a24398bdcdc5295 497 | e4f659ff8e19b838105400409f87890e97234ad5 498 | 61fc182bcb33d30c4588587ca6cf2bf0534cb36d 499 | a66747d8ce105535056fc7894d91ca18218783d0 500 | eea2d281ca95719eb675efc63b08bc0db2a18d66 501 | 68ae7340f26226af9d2fb5a7bb320934f22843db 502 | 20c6f89a1896e80df6757fe4077bb5f4402b664e 503 | 0c066058803ce2ccd67488e1ef267a1ea45d92c2 504 | 54d751625a966d090c3f8e37d8996f321672aaf4 505 | c84cdcc3de777129fcb4ca47b7217cf4ed344a7b 506 | 009bcf532409685ebbcb1769224518ebdeac0d1b 507 | e73d4674ca977c5e1b1ae8a8948fd0e521fb1389 508 | a26957979a976a141d79fcdbc52f35bba8a72990 509 | a819845f8bd788bcb1feb9090aaa1f16e485a3ae 510 | 00ac952f247949089ad7f42c33c373be211b1c27 511 | ef615d80df3bbaad9348522bf8a64d6aefa31ee5 512 | a6a2b66163cd485994ed89a8e72bf1d992018474 513 | 1a59134869d691c0948ec5fe4ac3d1be1c79dee0 514 | 9b3fba327e0b70e0eeae30a88576ce81fcb57543 515 | 3607fde243c99a4c66d6462600437eb30efd099a 516 | 3fb5c16d6ee783c074bb00ca621b4033bbcff922 517 | bfe887c1cb52a31cd0f1edf0210c092e14f75ad0 518 | 1ba24dad41f6aa0a08bf1c7a6c1b85ba78e75f0f 519 | 95d043691317e70f972032237351333d1739486b 520 | 9d035df4c2887b02f5b6c6d16743d900e9a0edb8 521 | 754fdeb17b50bb42787c356d1cf9ab72eddb33b9 522 | 93b3adee018fa832fcb0d62872c30b5e5c2b00fe 523 | bd997f1baf86e37368140cd1efb6e860bcab9cbf 524 | 170b4095b38b03de03728cbe0ba07edc7a108dc2 525 | 20b04147d88fd97172444db764abc351ca69d039 526 | c21c43785010bb52bf545b49bf5fee344e422704 527 | a04f2f60d3763b2962a1d60f3e0ba59c747452c7 528 | b901b9424dd97a310ec7baad7323bf39b0e9b81c 529 | da7445eecdc2097292b86c70fd57b186cdca3cca 530 | 882c03688df02f4f6974bbc91a920748e468a132 531 | 8ad6dfa2c5d1934459ba4434ef2b2727078b71c5 532 | 8e6ad9362f30a20b8e2296e023bd4ac3f2f9f8ee 533 | 6d554cbd71b4d816fe20d65b870745cb6e3db5d1 534 | 5d7550151203d9bf08000055dc21e2d24283e1bc 535 | 4be2c0ca24d4acdf44810ed601ed4b7b6ae7e083 536 | 3609b564f691a11d12c6a91ce6bea88ee454ab20 537 | 0670b7fc2971c06eebcf4ad1425678cf21bea6e7 538 | 952589b9b44e6173838c39b6e5f00c87e8d0a10e 539 | 76f5084cd938a8e26f7cb0ba935c08ca3cca1775 540 | 471c7fbaaf743edd4e23a11083d6ac762b54ef39 541 | 5b31afd5c9e518d9ed30ea7a2519bba095207317 542 | f7b1fa4a66cffac03972bf90bdfaf5f6c88eda35 543 | e337290ff925d8ef969c6add4dcfa82b4547b2d5 544 | 730fe6ffc1e92874a556461a4d5fd9498b184c94 545 | d2c206737d55b6e735495e93b4dd301915a8c242 546 | 57579d255ffe7be33e31440456b08ec24e7ba398 547 | 02753c686e8eddb520f25fe4725ff3ec535d5f12 548 | dd3f77a240651a4e52e404607c109f78dd751f3a 549 | 91d1ca686e71d59d8914e1841cf2eeb3f881f2ee 550 | 0b480798263cd4cd624fee735ce60027777e3e94 551 | 12e5754ca858f88191fffb10c3c60c1a023d4823 552 | f1c95dad0ee46c137275af196f8aadcd7f88785f 553 | e33b85905847fa78e5985e53408a4c2efbcfacb8 554 | 7c455ab867bfc6012a14b140947b7a687caf9f30 555 | 6ae2932333a4e645f9bcc9e2e3abede14ea34ce4 556 | df05f764173568bff6c97bdf44dc87d3802bc272 557 | 817e3772691d831137a04f3b6b4893199b2c5481 558 | 5152d6d6a598e0b3ed3fb96051533da370f51e21 559 | 23a07efb8d9029599c5c9c412b6d97c31db7e12f 560 | bc2121a7c9e6f3f5cfd446974f2e3e600a8bb538 561 | 09faf1856235a450ed7b0f9af13ad4fa9054e61a 562 | fe2f47c0ed11cda5a23b28c35e860b3706e97f4e 563 | 22feb0d16fee6236b3f060f5195bd7568c0619ad 564 | 439a3bb2ed3f0f99a94ad2bf0f1e281a2eb60e47 565 | f6727a6f02ea5a3cb65add8357e0e918d565772e 566 | a5e5182347042e721cbd4e55d84b13209a2530c0 567 | aa04f725e655e92815663f866d8190eda07c0d36 568 | a7a524356a21b3b847715808a880125493d9ae61 569 | 7d0891b7b5bfd8dd0f2073278a44be578b7a3c60 570 | ad627566e39fe69585d2a5f06d586c66922089de 571 | 4de70d154c47559b224485a0fb8cb323e10c1115 572 | 8d81395915d37dc155a6a41310a83dd973344f14 573 | 7e83085bc3570589a4302e1a45f6b2bc0bf6da81 574 | 939770f45c3713d506e96f01b72dd3c1acc3baa5 575 | 35a66fa4dac47f6471a743b707a69e588a2d11de 576 | b2c8c0b205d744bb8d943fb3ce0bb45c084e0e6f 577 | 346e8983c3a37f0919292c98f257d4e8fe70c5fa 578 | a5b146b3f81e9fa608803ee06fd61f85e29722b7 579 | 63f468677f204966fff6c52c0ddfb7efacd0cf88 580 | 23140a2f7b37f2a4e31b77055532d5e621e519b1 581 | 7b102e19df6ba43c7893c374b385addf72e870aa 582 | 1533792bf0eebb452256dd88f38a0210f5864b7b 583 | 89494873634a41eec80f709b9f0da46d075cf0cf 584 | 9eec65f3ad69f02e501edd4d878548b1b73edf85 585 | 963e9c19c3b8ec5c16e57a7152f4bbea3461525a 586 | 3fcea5c2571337d7b735fe8d713632d8680931ee 587 | d4f9ca12ac1b86a42dc82acc2d7da02b36617c75 588 | f2971eb9d51422b13d3170cb966e56f35e34a8a9 589 | b796054f0b2aaf6d24256c820d39b17a7fe154f5 590 | a20c637c0b75dab21ff8582016842b5b35ff9dde 591 | c3240c0519e59868b5f15bd02db28bed828fb1ce 592 | 486ff5ad9809ab24c21631da27f86d785bef7115 593 | eaf2672e78534f291e414f0483aab0cd582065e3 594 | 080c2d2ade10f1b3463cd7579bc67a99f68c179d 595 | f0278c155f9ca54109bd6df559c7f9f7d3fe9a7e 596 | caf9014ac1a1092f9e5ba8c039cb49434a48e51a 597 | b7097c7db3e3f3ff0885d9b6c6e226ee6511c17f 598 | 6ce1797dd1c771512e6e784483c7e543bc565c03 599 | 3b5a76fa7cd5ff2fc05643422f48ef6b2c4fe399 600 | 7fe2beebbe3febd4618854ae5f5c2466fe409c74 601 | 14c9235d0adcfa85c24cbe5c1c3ba95575c249a5 602 | 35de10656646710e2b1976050d672ee610842d75 603 | e1c94b29f99a0ac04b2cef3fe633826776fa1f18 604 | 95c925d842389addac8756b3c8b40b880f050f19 605 | 5ac1364d42c341082edbe51392c7ba96f5eb95d0 606 | 04434b5cf39c0316cc4cec99fc74a9f099574c2a 607 | 56eca70d60c2c81805e98a938bec97337891d6a5 608 | b22769a7843f06f0052eb9c192e82ee868aafd3e 609 | ce531d4661de9584959b189470f509f26e429b40 610 | 615fd1e78c5c06f54340b91a65e49a6285fc4f3d 611 | e9d6975ba990ab258af26b14d7320c4557a17c84 612 | d1c981e602283c0673998eeb8105d96cd1d0d03d 613 | d54bf7e5f6dd38ee58736e27a6a67a7fdbd79232 614 | 28f99b5610a2b1f21e7beef9be3124e4524e0b45 615 | c9442f5be882e648ac4f46cde2c86c9bc98efd1b 616 | fa313e520b0b95506d0b34226a5b631929244f3e 617 | 724bb19f08b2408764379bbf8e89afca1b5f619f 618 | 06e91bbd7fe034e00a5f25c7d49ff21ab2764380 619 | be2e9a963f2208b45b63a599680c45eaeeecbd22 620 | 288ddbed85d58d3e707f43c4bf2273199d0da717 621 | 359ee017c4077f1c10bbfac93b92592a67662cdb 622 | 7e38a71ca7e16c122335e89b12fe4a9d39e7b39b 623 | b0ca2a7bebb5305f3e3760f94bb20bbfe42c6a9d 624 | 525c8b458c996cdbdddcea20e66af5276d8dcfc2 625 | c6f6cdda910fb16e1bf8b0591037340d338de8b3 626 | e67870efb3aed5c5fe6d70d951338b53a7d5ae0f 627 | a9cf9b6f40d045688e53823b7d5fe6925a077bab 628 | a7e3c89367266102e811bb5b0c8fa50301e4e3cd 629 | 6ef243e00b88d32c65bb0d138949202eb6a86aab 630 | 29a3e3a786f4e875c6865b44f68f73453e03fe87 631 | 58fb3058c5055525aabdd345bf576b6b50570e31 632 | 6a68893bc63f6a921359752f5fd0b02a3573ca15 633 | 0f57b34a1e8d498d8262c93c5e838c6e9ccb31b2 634 | cdf71c7a0a124d986e5649ac7035b1e4f6c4c0e2 635 | c0a50bff369a1d75adaa4ca26bb7f463de3ba6d6 636 | f471b8a43984faa7940bed0971e4b41abb8d5e23 637 | da445cd8cf6178e7f867266c02b658f47c681ec9 638 | 83b24fceb0b4e0070ea6c6f60ad6e8c3cbcb410d 639 | a2eb11f3faf888ae795134868b9e445f03882f0a 640 | c7df1dd8c3085ad60726cb5df637213c47dcbf44 641 | dd7ad9db544a228984e3631a3be2a7241549570f 642 | 0090a7299a87bbebfa02a3a49b39276750f93662 643 | a9e43c25cacf5199b26faff98b324ce4d6d31786 644 | 6ced9da9defa350361a0f56f696493898e9a484e 645 | c083876c2257e4fcfa7b19071a6b7a3e3aff5670 646 | d0593c7a79fc102a21aa30c221e228fc8a8f10d2 647 | accf251c719cf8f1d16a5687390caa8bd3214c6a 648 | 133a4e13f11ba4cd1c5a407ac62990013f5ab8d7 649 | 90a4ec668d371536acfa0522fda4fd1557bcef41 650 | 66554dc3a2ea9e5983ab3cfac197db7a98085e97 651 | 8d5a9ba2dfe14bafe364d1e89452a2f328fefd96 652 | 2126f21d3639c6b26edc6e0139e261839a901577 653 | 4322d0be11b74d1d98000589fe9f8753ba25f62d 654 | 566098d6aabbb120262df4537f804b009083a257 655 | a2f4b72146cb9e0115f05fb5c79621a77be9c384 656 | 7e4807fd379b54257d228d566f76132271d94e4a 657 | f45cc5cbf3a0b696886b68927afd29bcbb2eda8b 658 | d0e1f1bd95169aa9ac08aa81db92d88379b423c6 659 | a686fbbd0d3e1a986a3769ef6fe0eddf6086b597 660 | 398f9e0002964f9a765ecb49ad85b1d18c537b3f 661 | 4ae0bf3096b4870fa15aa3e94b886eda608bb7d8 662 | 065d2593379d21e16907543c3a02dfa1f1941ddb 663 | b3832f0c03776d306e81f0c9fe3b476b760b4d18 664 | 332901e8417872d76d3d6f3e6f41916308b9c7e3 665 | f30882e56569ce9ee9636b5b46a8dd0020a060ae 666 | 79a17dfeec2320df41542d8e158855903ded10bf 667 | 7ac93b637187ce75d9c425cbadd4dc94c0ac03d6 668 | 16b17b0613b89db33d4a0b87951a448292bac353 669 | 63223392124609a185f414e5f26fa2dea3e18bda 670 | 14df0fb83d091fa7aff2561b2be3c3541a5c51ce 671 | 3904e522da8780eb443371d11a82ccb6450b2ca1 672 | d83d9e482bddbdcc645186563f9237b8b40acd6a 673 | 17a741d2a71c0ad157c4c7d44f97ff47a1aa64de 674 | 6b357cede4926c7ad57d62c0e118d1c8643d3660 675 | 8a219c73acc245f2d55b29b27aeda557091bf99f 676 | daedffefbf01748208df87a3c40e369e613fda4b 677 | b7cdc4d13140ebcc18a682a27b16feb0eb1b6f34 678 | 5b7f59bf06af8ae2ef21a8458bddc34d52e33618 679 | da323c54d77b4ac5541609e0610353fe8b0b91c8 680 | 4a381e8fd42917d0a915ad6a4471b68d467f13eb 681 | 1e39a45dde7dfd2b128725918dde34a981dd3f61 682 | f56dc1cff1b4b4375666a2aa1a65ba4271ad6e86 683 | 043df34ec58c18333bb9b5ed09fa43817ecd5312 684 | f738d1824411e8970c2a5657595e90397db27e1f 685 | 2462adf49b50cf55a287061cc8f3cdfa5e7586c0 686 | bf8fbc28cefe92563a0a1c079d224a9c165c1e52 687 | b2ad26cf3bbc26adf8c61be017aa08c0427c33ed 688 | eb69cc5cb91b7932d272423262375877d19426cb 689 | 31e790096da1867e335feace43e7d0f80c67d673 690 | 5f88e50f8eea461ac37eeb37bd6f768facd84fe5 691 | 810ef3a2b43732db9c013aa5abf627ed9c810b63 692 | 626821d2692e7ffc97fa2981ddaef0c37e1e6026 693 | 34715d887106a8f72c9e1ec3474de61d37b67d54 694 | 79e876228053032de8ee70b794c3e62229029b8a 695 | 87f00959dddc29494a8b97a80b2b11abf228caea 696 | 7969ff8328b92036e2bfe50a4951e068205840f0 697 | 27df1c7f5f72c96f23bf4116d22a1a8534d04ad6 698 | f5e846a36ced8bfa1dfd3f1593c2f7cc1380aa16 699 | 65e55164449704b69770dfbf2b46775d00f4c6bc 700 | 600717190fb23dedbd617402c770ebcebcf7439b 701 | 46385fa57b9d0fbf2e6d74bd535bae4e27bfc0a7 702 | 19f7e78d683a74d8ac33d67804be3ca6c7e78276 703 | 02a8b6f6c7993115255a7d484ad5d29492bb2ddf 704 | 6dad22217e8e7ecdf23d3927a3d4dafd931216fb 705 | 6804e22218009f2920b7a31fc0b1e81cd35f1463 706 | 60bb0b08fa3839350cc93cdd84cdde9ba64734ea 707 | 017d2329e373a75ce622c4e51ae9643fd5c94010 708 | fb61b83f6d0c34601cc05796af88ce1e2437a6aa 709 | 812c9b7bf17eb705756398459fd351ee4cb5bc64 710 | 5ff1439610784f189424da05502ae80d46c4b07b 711 | f67c770654de996eae0ccb3edf2c1bd1e1511c7f 712 | 1748b37c96f90b81f5bc627e517455ecd7c98d55 713 | d5a3bd147e90276076d68894e7f793662e514300 714 | 4caeb38c20a732f18dec15ee6142e3b335ca6a4d 715 | 8ae0dd98a9b870815f9bc64bfbf842543f09081e 716 | 2c4be751b09c16ce1010e8abbc0df79f255474d3 717 | 4e24c5cd5fa70c985dc7c29f7bf4e5bc8d13af82 718 | 6202acef0355437c283a86b2ebdb7dfed9e7e146 719 | 4f47fd64a8b8564ea75f66fa5cd033259b5eb345 720 | 8c5f589c33841fc8e71ec22bab71cf41ed88dcc8 721 | 340ec41f8a9f4c97cc67ae40b5c7ba2d609c5e4a 722 | c6a57af0e5d45711665651a03a3b8b638656bfbe 723 | 809a6ea9060ef5a4026e61cf91c65b7fd1218a7a 724 | 250e947844e4f4538d42523850d44cd0e3279b4d 725 | d1b1ec1c878d29712c79287bfe992ba6791c504f 726 | 42c9b565f0fa1b3420bf60ef2f5eeefdf90ba224 727 | 3383b154f233a922e4d17d79944bcf18803ef162 728 | 2fef5408dcc5998a555cec6a2cdcf42079c539d8 729 | 6b9e9450ceb03c486cd14d5da0096e726ef42e31 730 | 7e3730586aa1be5733eb71425e65a153f215ab4f 731 | eb55dac93d2f096614feaa394271bb0764f84493 732 | d2cc7ab2033442f8ca0abfd343baebe02da67085 733 | 2e25728e824664f31734be5090b82830d636376e 734 | cc22825a32f7eb1fac3a9f56cfc04c21b04c84da 735 | c9e8fd2db303e0cd8798d90aa55a71b582dcc13f 736 | 151cd6d848dd9adf6c8f0dcc6213ca4773c7117d 737 | 87631c1b9a9b32a4b80333fb2133f063e0655919 738 | 450b2768257ab7c36985b59475408f9d085d6849 739 | 6ec40e717579b1e97f6310e89e9526266e2dc858 740 | 32b3d419d80463f676acef772bdd6e2e9c1cbb04 741 | 4751418bc7011331b29b1e127c76aa2dfdd4b157 742 | 4f8bf56007893e530058e86474abd10be687789a 743 | 75167c1f67d3b52cf380701f70a0ae19c543c2d1 744 | 3b4cc44dd64c2f01fb7325de10f553f52a15f21a 745 | 4bf8aad66e6a736a307e21aa885cf647f4d3d091 746 | 114ff0bddeeb3f70695abb3b8eb72ae50ab6ceb8 747 | 00d37c8269a4a1856119bd8cae177f79af9c3995 748 | ef18aa80d10c422353737f54674a62d560c3841b 749 | 5d4595b28a418a0d896ee990aecb70d174e83938 750 | 86496de0a9bc54ded557c9a612b957c7f4f03051 751 | 7f12b35f443711098092027e953ac26ee1dcee51 752 | 80e4074ddf6603463ce54b17adf3f74cb1a6c9ef 753 | 425d52deefec8b92e14295298d6ab4e6379d8d50 754 | 9822045b10d370cc11dc3af5c67087430458e9bb 755 | eb9985b519a7aa480f7bcd0cdc430bab14516ac0 756 | b0f319727e40f3cb8fd08d19a63d1d63ad6838c5 757 | 15188b1ecbe60dd50f3cb9634a88bb7066a15116 758 | 9d9ace61c04718ae2d2a44d7a744356ae13cb815 759 | 1746588f53d9be52e1fb11ec9a5ba9def13ab8f4 760 | d4559def7fe0975759f29379a0ed9ffb62685228 761 | 7c7ae0340c7dd19a5ea480786e63c4b7aac08aee 762 | 0de490e56eea811288015ec3321de30075842cfa 763 | 81a00542630b6a711ecc0822a5c87474bdcf5b3a 764 | 08efd4bb49f04a696e7b658c77f7d2d2cb658c75 765 | 4e2b1ac05ee94c9accb386b98563caf731454362 766 | 85140498e1f7d0c2d76c0f7f849dceff3cadffc2 767 | 69e7a83b25fdd71c0cb798f581a24bf25108b089 768 | dfd03d882d0be712b65e3e599836229305325ce6 769 | 76b72e1eca956490479b8984cf60175862f347e9 770 | 7037a7358a851c7ae94822af5f94499eb3e54618 771 | 5d1e2111769407c05cffdc0a46aca9620394369d 772 | 36de5c0355eff2d4926b1dabd5e361785d518fdc 773 | d101d207d193c78b247a5e9e3e634e6ab02e7c82 774 | 6fecac3e59683604db90661efbdcf38cc9d2f2b0 775 | b326900df3588e7b0260709cc86171a08077656c 776 | 06e9f8ce640845614936df59b742fbbdc6a208de 777 | 2d2bf35571156edb614aade1d45a5d43cf50e285 778 | 216ef395ffe0c88633298ebada3edfd468a7da7d 779 | f9bec7909386fbe2222b60c7733f9bb9f84fece6 780 | 8d2c39d4f63e99be39e9c691b453de724a6e7dc2 781 | 638f517a4b93f8bf00af1f3b1f8bfbb1e9727615 782 | aa8e5c7fad88d1bccbf5cc879150c28ede0a869d 783 | 9f34e1da97dc6feac62ea266b48650792da4751e 784 | 8a4693a438ad488c9da50f85d875fac423e43d44 785 | 37592089d864a7a8919b2c36ffd45df5d9f0939f 786 | d8a04e7b78a4bd47c851036989162084447c9588 787 | 1217b90c42d0eae3fe37a9a047f590b334df01ae 788 | ea2dc57fc4eb8e345170fea226a85a4c2a69f129 789 | 5ac73b51585f0d707bdbba026d4365fbaf583997 790 | 1f8ab1587428abc7df287f852017a5dcb509a2f8 791 | c328e30210481304a1ac1f65aa8c7235f4dea1bc 792 | e4cdff46d23fbb9417c298a70f0668797453cc5d 793 | ed16a72726a36ebfe3cb05b799bc10632a568137 794 | 0b96ef7ef010317777302f8e7f0d4dac02200335 795 | 02327461b336804e43891ea8a11f1cc5fa3a0cb0 796 | 4c50b0bf0bbe1f6ae62d5d743d861670e84b2876 797 | 1cd978b9d61ce9b8d0243a7ec9adc48675d1a952 798 | 7d515f81634d845d74d9f33b04470812765b432f 799 | d8f6be2bb1463262aaa8bd22ebfd1a32d902fccb 800 | ff2c420665c0f3df71b558d22dfd1869f50c9cfe 801 | 087615ba18d492bce8313c919a938efa81f93bd0 802 | 0787ab9e389747fba0b21bf2e97a14922babe4b3 803 | 5f8ce05aa4dc5a546986aacc5bddcb8e324bcbea 804 | 5eb72694336fd0cbacbe1a66d7151d21ff9b0911 805 | bd44872ddf9b7f32338a0a9cfd9f3bd680988ced 806 | defa3abadd2d2a07c862e668515360c43b7ef9c3 807 | 178ddad0e475d003179ee4c7a61410ff7325d96f 808 | ffbca6f1135ab49655f110f6519bc3ede9d10ce9 809 | 19008243edf1525e35abc01381fa96d5eaed6c25 810 | 799f1eeb7b617de86b74ab9ac8f9c53be64759d6 811 | ef1f742492fd35534877ad089511668a8b20de9b 812 | aad45a4e918fefae9d4affdf1de3c3b4c9822eb5 813 | 436a83e5eee6647e071daf364cc61081f7af330b 814 | 3f0e70ab65277768a7af65b2f7cece9b6754eb90 815 | 2ae4c8b4e6b32ed56f9e839173447ce86926acf6 816 | cd55b098b52da1c0fc77926f6644cb995fece25b 817 | 15570914c6891bee3aba54cd8d3357b221567965 818 | bff9337fbaba065e92208f9e68f4a8dc368b2b24 819 | 148caf895637d908013935a84b4c006bfef34ab0 820 | bcee827497ff2ea89ebb68f20d393d89b4b00394 821 | 0e777d12c9e18ac4a95c3ec98e03442ddcc3951f 822 | 40f6e6fb717651b6ddfe4a1f64080177549d3a60 823 | 33a9bda4e4253572d72ccb56fed01a0cce699237 824 | 158cccb971e9831d98219fb14dbf64f61de9afd6 825 | 84f1e39a820648942e0e6baef53594392a6784aa 826 | 3f323b7b6abaf0f202530d277a8b375892c44f01 827 | 7c49c826a424e32eb46bca49436a862c93f5c595 828 | 1ca09927993c37557622a21415b9bab2c541e466 829 | 2c7257349d7c0454f0badc32ab16b06d459c78e5 830 | db8542096e6a47bb9ebf14762770344364b4a252 831 | 061bd9c748c9f8095adef590d46f30949afe3adc 832 | b8d92ef64bebbd5407dc19cde206aeac45a6059b 833 | 2ced40b4f0ff8bc4c65e93ee695ae22cebaecffc 834 | 78e0444ba387dfe388e878b91df4cb85eb43d7c5 835 | 63d0948191632a095d89b4ab75c5f6092bfe0557 836 | ae117222d4f363a090f423c5f29984092b459053 837 | 42253ee162da24892fccc9dfc40d4c365951724f 838 | 40ea8c851a06cebd6f9db52eae8c4180352f4bf8 839 | 23b89230081765472b0b16ed4f827858ab5ef995 840 | 93fabfd14b2f8e37675f8240c0102161db1015b8 841 | a7f0254bc3e1519afa9e905777618492b148d7e1 842 | 5743b0de0633aae4f364fe5575e54ca2f97cbc88 843 | e57b318e23cde3b2e1461b2c2f6f1919baaaa583 844 | 30b135d28b2f1eba658e96b074405b4cdee5d5d0 845 | 53a0dbb2d5d91b2d5d2e7963ae22f42b07eb3cd8 846 | 775bc6cff0d60513c427ff40c4d8362f3c3cf4dd 847 | 6da3f7857678c1a71aa11648d04bd15808b32429 848 | 5f305f7f8ad41cfde433ea07ea934388784ca799 849 | 501f9b5d3dd5e7c235ebaaf74d1346cc927301ef 850 | 1d5c015003c849e8af02de29f7fb04d985c382c0 851 | d91c6837265b896a05674b24d7a11fd3a5f16e96 852 | 1c3354596b550b64194e40085aa8a6d9aa28cf4d 853 | 28be6a301b493486f38f9a494072d60c098d6c42 854 | dd7dff17371fea447eab2441b0834c968ca4e6e9 855 | 3320ad46ef6b1670124d7d042473836afdfab51c 856 | 36495cf618b05eb0e84d66feaf0b918607bc8969 857 | f07a24f078e929133c5d7d72bff3b4c7578fd543 858 | 6b6d5db4a86116683ac590cb506ecc0ec56ba1ac 859 | 7c6bdd60bc0a371caebfe5db8b4734577a7d4bd4 860 | cd856f01d55bb1b001f18d1c09612e4aff622849 861 | 42940c999bc17c1ce42a896056b29b48cc05ccbb 862 | c60535d0a0074e3739b78cae3424427523af9197 863 | ed1e73b2bfcba7fc8322e15bcbe29befde9cb3b4 864 | 3b3b5f017d891e4f4737cdb7089c226a03cd064f 865 | 8d99402e73d4a08e8a6372a3931e7876eff4cd7c 866 | 8ddb1eb0be4a61349ebae9d9f49a0a10ebfbf248 867 | 621d476c0b95b133cc1e21e02efb74fdf06c2410 868 | 4c8122fd557c72530285ddb4d7236a1e26e1a777 869 | 489f1df415312031233719573df0c96d35db4d91 870 | 01804f236fa7d0ef0051bce32998927b3d492137 871 | cf33de82b721ba0a79f23c7c4c243606b16e4fe8 872 | b89db7e04a32478b3b06a99e77c663ed385da67f 873 | a8b831e43fb71e72a1f18d5565c9178c9cdfd32d 874 | a1b0746e131d7264fa4caaab8f7b4d6b8b5e9994 875 | 5f7d39f41a4f18130bc9534844c40746afa63ee5 876 | a9307650eb19fe107b054a95e2e2106c8f5a1023 877 | 1f59d7ffb4541dc91bbb30690f0cc5d7c374769e 878 | 5754a0cd2d35238577049b53b83ccd425ae3d814 879 | 2fdce9aab59353f61545e2c56a6f42a054491f65 880 | f58da2e3121a4c9bbbbcd10a596ac56cbc308651 881 | 98e6cc44027eb6327b1cef62e7d0b5b915e73c8e 882 | f78d0aac0cbf99be38b9f016e64c9783c5463692 883 | a20f00975cc82a25e498490c0141c04c72dad522 884 | edfcabb79ab332956eb0f011255a41570b64c471 885 | a8240fcea930bd14db2c523962b69ac26a806775 886 | bb61ddedc9692447291a75a2ac6f94e58d8be023 887 | cc12091129de5fe6fb4d4b95f29887bfb48172b2 888 | 60d26594bf8c0c206b7fc72885164067210d44de 889 | 28cefe359c6c5364e543cf94a11658f8fd76ce12 890 | ef2ce41c003a052f202fa169015acf2c77993bb8 891 | 8cda2ea5b292e761aa715053badec5f434fbea8a 892 | a8f118b31711d2f379262a6bb68fe0f5dcc7879d 893 | d9452d0f04d972aeaf8544163666f382ac33af92 894 | 0e0fbb57fd5edb4bcbc1040e68d36c8e0a15d1fd 895 | 0fd461949d545e994abe44675d6b793240591775 896 | 109a435614b65c4c6fc074233001c898be4ee350 897 | 82a32f3a37832016b5e837d8aa945cc93d67c9f5 898 | 71315568d9a1b3466ed4a14d8cd163dbe86a6047 899 | 176d7d00b6ea8cc8bfd8359cd7622cda50e26eb0 900 | 643a1b5be1e8b2759856603ca96dce9f2440a9f2 901 | 34a37db54884f4eb607d3441a8422e21d91a0c79 902 | 7c6ebab0131293c2bd7adcd705612a94dbbfe252 903 | 7fb1742a1c1608da4dc75897c1bc99030175aa3f 904 | 9440cdae225fbcd98b8ef23ceba5c86a63201c70 905 | 8fba99aa12cf5c467506022328187e56cb43ed58 906 | a229711c246f89833f62234cac67bbed94f83008 907 | 6f150b8d8c3074edd7e8e32c766a2d4c37542452 908 | 05f5d665cf587376f15d28c34fa57e6cdf7fffa3 909 | 8ebab50fe4d3784125add7875ddeccf58953ac35 910 | 140319a69afb2166a674a7b86f2c4c82fe33f84f 911 | 48be2ee42311d0eae4e182e5b217b5247f795eb4 912 | 8d4f92a3e2b42ae6a278697b2789cf34b87b9ff4 913 | 68c20f82fa3e26a87fac0ebd629fd16a6371a9cb 914 | a9d4d8bf674288f91fec39540a75613f4da04b5e 915 | b03719e5016094f0e1452df3f8ec40019e5bd279 916 | 1b2cf0ddfd37df4d6baabd96db5a04d1957b59c9 917 | d49c40c31302979a38c47d9c15f44de743278e1d 918 | ccfa6b620ccfb03588a7ec31a3c98df2e085b9a4 919 | bd7e2fc16ef448663715da4916ea56fb6356b5d1 920 | 67600392dc3ad4efa5659c87ae55bc69d2c4411c 921 | 1eb59f0d785b5eed4f21a2ccc35d63003c7fbc89 922 | 2a7f212649bef4bbdb2d48b5799bfc3cf5876d92 923 | 136ff2b3421477d8aefdf4d10a052faaba90fca7 924 | 478419b4e78dbab0a7fe5031f059fe8a8c98cec2 925 | aad306ee6e12dd91bf15d720b67d7c6222ac5bac 926 | 1732fc7c815661513a74ac3dbb6755853996877e 927 | a684df1564ce8d41eba41e22f6b59e19b73ce4ff 928 | 8a31bfd366a6f679f185b87c162261ed6e835ccf 929 | bd5d6ab51575137b59d5f697c7950cacaa4182ca 930 | 7c8963a4a704ed1c56583ed4d2e7c46b3f7595c0 931 | 0eff7b45401faa55f22f22a0e6d8ae37ab4d8480 932 | ea10be60a172b61423628930b498e658ec184023 933 | 18253446f90c7b7a3689895a1a890f57d45cddcc 934 | 0797c99a1573e2a7b2138cfb8d0d748ebd255121 935 | 59dc0ae646965ad6561410ba62ee1f5906a5cc8f 936 | acb9ca236d0ba57340540eff4e7f07b2e5ffc7e5 937 | d39617b7947508c28f60eacbaace2bbc4d08baf5 938 | a8d77c7b9f779815df327a615964e7d0d56b183f 939 | 46b955153e18ba9f3acafdc0109b72ef754ba9bb 940 | 332f0604b39a4e599e3c099f594579da5c2daf1e 941 | 1cd9b2bda13d8f2849db65950042a0204e01b735 942 | 1bf4b779e7e778c8f700566d0efd82b7ade55dbf 943 | 79d2e14f84c411784891ed8151905747f339d0d2 944 | c720b996034ab363599ec196d7adae0b8511b050 945 | 3e1f911da0e974fcee74a3b41f1a2414bb633327 946 | f1b9c3d01343ac43388f5e5742c2366d7088c496 947 | d7407bf6c85eb1f7106a6b0179b7bbbc71b2ddc0 948 | df28f1dbb313e16455e18f20d3c1449d773f7b7a 949 | 2258cbf2539580fe88828d1414a1eb504951a0d6 950 | 9ea05ba9b371be8dfaeec30fa2ec3ce676901c5e 951 | fbfe78964fcb56e58a1e034e11145204356a9d1d 952 | 8ff05c2aafb4e4f2c1fc7926a574f6899cf450b7 953 | 1b653c0a4c9111896098e3755672b2b9a0da17e2 954 | 80987557014324e5ec3839b477d3feb74c2c038d 955 | f2df1b42d96fb39b0a10e648cea3eea1e01d7c2f 956 | 603cd0cd93f2d04ad50684db6b6bccb36e7457c0 957 | f41472494c9a2137142c3d24076b4e927a4f23cd 958 | 7acbf311e9f42fd29a9e63f2532eb702ad7a3edb 959 | 949caccfe8dd75ad92efb9dba23ec1b2f39af9a9 960 | 45d8c0eebb0e3d082fab34d17266419f09c170b4 961 | d6289779bafb606ca9fa42043610315406b9a2c1 962 | f16678cf4bdf551e13c29bcbd3df0252e641dce1 963 | c3a772e05388b9fc0ea58b1a0384140c16d3e903 964 | 15ad252481cdca2122f6e355623a101c2e2a33b3 965 | 8f57366360ff48ba30f45b6fa83cd76664e84435 966 | 4d035ac0655f4b8624002d8b9e402776a66d647f 967 | 8c2da6b02d84fcc35673f02a8e6e1a09e254e72e 968 | 13a2786e6bfc2fe5e86a69d4c3c5ca0ac6da5fa9 969 | d1c85cd90ce557086eebcc0670211ff77cc37c91 970 | 8f56a4f7074d679c3151cc1743cc2da6cd15844c 971 | 9a591de9a309b8df99ddaf342e84196d875a7dba 972 | b84e1b45e41accbfb940e12fc5387a207057aa2c 973 | 39b4557e558ae981364e66bfcd329afdc91d53ff 974 | 0b528c52df66baadfc18c1b99720109fd3966e89 975 | 1601d00ca61feb15ffc6b9854c9091d0e9e52ea2 976 | c942a895dc -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy import Field 9 | import scrapy 10 | 11 | 12 | class CommentaryItem(scrapy.Item): 13 | id = Field() 14 | uri = Field() 15 | firstimage = Field() 16 | title = Field() 17 | author = Field() 18 | time = Field() 19 | description = Field() 20 | content = Field() 21 | view = Field() 22 | image_urls = Field() 23 | images = Field() 24 | 25 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: Administrator 3 | ''' 4 | import scrapy.cmdline 5 | from scrapy.crawler import CrawlerProcess 6 | from wallstreetcnScrapy.spiders import CommentarySpider, jqkaCommentarySpider,\ 7 | sinaCommentarySpider 8 | #,'-s','JOBDIR=crawls/CommentarySpider-1' 9 | def main(): 10 | # process = CrawlerProcess() 11 | # process.crawl(CommentarySpider.CommentarySpider,args=['-s','JOBDIR=crawls/CommentarySpider-1']) 12 | # process.crawl(jqkaCommentarySpider.jqkaCommentarySpider,args=['-s','JOBDIR=crawls/CommentarySpider-2']) 13 | # process.crawl(sinaCommentarySpider.sinaCommentarySpider,args=['-s','JOBDIR=crawls/CommentarySpider-3']) 14 | # process.start() 15 | # scrapy.cmdline.execute(argv=['scrapy', 'crawl', 'CommentarySpider','-s','JOBDIR=crawls/CommentarySpider-1']) 16 | scrapy.cmdline.execute(argv=['scrapy', 'crawl', 'jqkaCommentarySpider','-s','JOBDIR=crawls/CommentarySpider-2']) 17 | # scrapy.cmdline.execute(argv=['scrapy', 'crawl', 'sinaCommentarySpider','-s','JOBDIR=crawls/CommentarySpider-3']) 18 | if __name__ =='__main__': 19 | main() -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/middlewares.py: -------------------------------------------------------------------------------- 1 | import random 2 | import base64 3 | from scrapy.utils.project import get_project_settings 4 | settings = get_project_settings() 5 | PROXIES = settings['PROXIES'] 6 | 7 | class RandomUserAgent(object): 8 | """Randomly rotate user agents based on a list of predefined ones""" 9 | def __init__(self, agents): 10 | self.agents = agents 11 | 12 | @classmethod 13 | def from_crawler(cls, crawler): 14 | return cls(crawler.settings.getlist('USER_AGENTS')) 15 | 16 | def process_request(self, request, spider): 17 | request.headers.setdefault('User-Agent', random.choice(self.agents)) 18 | 19 | class ProxyMiddleware(object): 20 | def process_request(self, request, spider): 21 | proxy = random.choice(PROXIES) 22 | if proxy['user_pass'] is not None: 23 | request.meta['proxy'] = "http://%s" % proxy['ip_port'] 24 | encoded_user_pass = base64.encodestring(proxy['user_pass']) 25 | request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass 26 | else: 27 | request.meta['proxy'] = "http://%s" % proxy['ip_port'] 28 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import solr 8 | from scrapy.utils.project import get_project_settings 9 | from twisted.enterprise import adbapi 10 | from hashlib import md5 11 | from scrapy import log 12 | import os 13 | import requests 14 | import re 15 | from scrapy.exceptions import DropItem 16 | 17 | settings = get_project_settings() 18 | 19 | class RequiredFieldsPipeline(object): 20 | 21 | required_fields = ('title', 'uri', 'author') 22 | 23 | def process_item(self, item, spider): 24 | for field in self.required_fields: 25 | 26 | if not item[field]: 27 | # log.msg("Field '%s' missing" % (field)) 28 | print "Field '%s' missing" % (field) 29 | raise DropItem("Field '%s' missing: %r" % (field, item)) 30 | return item 31 | 32 | class IdGeneratePipeline(object): 33 | 34 | def process_item(self, item, spider): 35 | item['id'] = self._get_linkmd5id(item) 36 | return item 37 | 38 | def _get_linkmd5id(self, item): 39 | return md5((item['uri'] + item['title'] + item['author']).encode("utf8")).hexdigest() 40 | 41 | 42 | class ImageDownloadPipeline(object): 43 | def process_item(self, item, spider): 44 | if 'image_urls' in item: 45 | images = [] 46 | abpath = '%s/%s/%s/%s' % (spider.name, item['id'][0],item['id'][1],item['id']) 47 | dir_path = '%s/%s' % (settings['IMAGES_STORE'], abpath) 48 | if not os.path.exists(dir_path) and len(item['image_urls'])>0: 49 | os.makedirs(dir_path) 50 | for image_url in item['image_urls']: 51 | name = image_url.split('/')[-1] 52 | _i = name.rfind('!') 53 | if _i > 4: 54 | name = name[:_i] 55 | name = re.sub('\\\|/|:|\*|\?|"|<|>','_',name) 56 | image_file_name = name[-100:] 57 | file_path = '%s/%s' % (dir_path, image_file_name) 58 | images.append((image_url, file_path)) 59 | if os.path.exists(file_path): 60 | continue 61 | with open(file_path, 'wb') as handle: 62 | try: 63 | response = requests.get(image_url, stream=True) 64 | for block in response.iter_content(1024): 65 | if not block: 66 | break 67 | handle.write(block) 68 | # log.msg("download img to %s" % file_path) 69 | except: 70 | continue 71 | item['images'] = images 72 | if not images: 73 | pass 74 | else: 75 | _ = images[0][1] 76 | item['firstimage'] = '%s/%s' % (abpath, _[_.rfind('/')+1:]) 77 | print item['firstimage'] 78 | return item 79 | 80 | class SolrPipeline(object): 81 | 82 | def __init__(self): 83 | self.mapping = settings['SOLR_MAPPING'].items() 84 | self.solr = solr.Solr(settings['SOLR_URL']) 85 | 86 | def process_item(self, item, spider): 87 | solr_item = {} 88 | for dst, src in self.mapping: 89 | if type(src) is str: 90 | solr_item[dst] = item[src] if src in item else None 91 | elif type(src) is list: 92 | ######### 93 | solr_item[dst] = [item[i] if i in item else None for i in src] 94 | else: 95 | assert False 96 | self.solr.add(solr_item) 97 | return item 98 | 99 | class MysqlPipeline(object): 100 | def __init__(self, dbpool): 101 | self.dbpool = dbpool 102 | @classmethod 103 | def from_settings(cls, settings): 104 | dbargs = dict( 105 | host=settings['MYSQL_HOST'], 106 | db=settings['MYSQL_DBNAME'], 107 | user=settings['MYSQL_USER'], 108 | passwd=settings['MYSQL_PASSWD'], 109 | port=settings['MYSQL_PORT'], 110 | charset='utf8', 111 | use_unicode=True, 112 | ) 113 | dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs) 114 | return cls(dbpool) 115 | 116 | def process_item(self, item, spider): 117 | d = self.dbpool.runInteraction(self._do_upinsert, item, spider) 118 | d.addErrback(self._handle_error, item, spider) 119 | d.addBoth(lambda _: item) 120 | return d 121 | 122 | 123 | def _parseItem(self, item): 124 | id1 = str(item['id'].encode('utf8')) 125 | uri = str(item['uri'].encode('utf8')) 126 | title = str(item['title'].encode('utf8')) 127 | author = str(item['author'].encode('utf8')) 128 | time = str(item['time'].encode('utf8').replace('T', ' ')[:-1]) 129 | description = str(item['description'].encode('utf8')) 130 | content = str(item['content'].encode('utf8')) 131 | images = str(item['images']) 132 | view = str(item['view'].encode('utf8')) 133 | return uri, title, author, time, description, content, images, view, id1 134 | 135 | def _do_upinsert(self, conn, item, spider): 136 | conn.execute("""SELECT EXISTS( 137 | SELECT 1 FROM wstable WHERE id = %s 138 | )""", (item['id'],)) 139 | ret = conn.fetchone()[0] 140 | uri, title, author, time, description, content, images, view, id1 = self._parseItem(item) 141 | if ret: 142 | conn.execute(""" 143 | update wstable set uri = %s, title = %s, author = %s, time1 = %s, description = %s, content = %s, images = %s, view1 = %s where id = %s 144 | """, (uri,title,author,time,description,content,images,view,id1)) 145 | # log.msg(""" 146 | # update wstable set uri = %s, title = %s, author = %s, time1 = %s, description = %s, content = %s, images = %s, view1 = %s where id = %s 147 | # """ % (uri,title,author,time,description,content,images,view,id1)) 148 | else: 149 | # log.msg(""" 150 | # insert into wstable(id, uri, title, author, time1, description, content, images, view1) 151 | # values(%s, %s, %s, %s, %s, %s, %s, %s, %s) 152 | # """ % (id1,uri,title,author,time,description,content,images,view)) 153 | conn.execute(""" 154 | insert into wstable(id, uri, title, author, time1, description, content, images, view1) 155 | values(%s, %s, %s, %s, %s, %s, %s, %s, %s) 156 | """, (id1,uri,title,author,time,description,content,images,view)) 157 | # log.msg('finished item %s' % item['id']) 158 | print 'finished item %s' % item['id'] 159 | 160 | def _handle_error(self, failue, item, spider): 161 | log.err(failue) 162 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for wallstreetcnScrapy project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'wallstreetcnScrapy' 13 | 14 | SPIDER_MODULES = ['wallstreetcnScrapy.spiders'] 15 | NEWSPIDER_MODULE = 'wallstreetcnScrapy.spiders' 16 | 17 | COMMANDS_MODULE = 'wallstreetcnScrapy.commands' 18 | 19 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 20 | #USER_AGENT = 'wallstreetcnScrapy (+http://www.yourdomain.com)' 21 | 22 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 23 | #CONCURRENT_REQUESTS=32 24 | 25 | # Configure a delay for requests for the same website (default: 0) 26 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 27 | # See also autothrottle settings and docs 28 | # DOWNLOAD_DELAY=0.1 29 | # The download delay setting will honor only one of: 30 | #CONCURRENT_REQUESTS_PER_DOMAIN=16 31 | #CONCURRENT_REQUESTS_PER_IP=16 32 | 33 | # Disable cookies (enabled by default) 34 | COOKIES_ENABLED=False 35 | 36 | # Disable Telnet Console (enabled by default) 37 | #TELNETCONSOLE_ENABLED=False 38 | 39 | # Override the default request headers: 40 | #DEFAULT_REQUEST_HEADERS = { 41 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 42 | # 'Accept-Language': 'en', 43 | #} 44 | 45 | # Enable or disable spider middlewares 46 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 47 | SPIDER_MIDDLEWARES = { 48 | # 'wallstreetcnScrapy.middlewares.MyCustomSpiderMiddleware': 543, 49 | 'wallstreetcnScrapy.middlewares.RandomUserAgent': 1, 50 | 'wallstreetcnScrapy.middlewares.ProxyMiddleware': 100, 51 | 52 | } 53 | 54 | # Enable or disable downloader middlewares 55 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 56 | # DOWNLOADER_MIDDLEWARES = { 57 | # 'wallstreetcnScrapy.middlewares.MyCustomDownloaderMiddleware': 1, 58 | # } 59 | 60 | # Enable or disable extensions 61 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 62 | #EXTENSIONS = { 63 | # 'scrapy.telnet.TelnetConsole': None, 64 | #} 65 | 66 | # Configure item pipelines 67 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 68 | ITEM_PIPELINES = { 69 | 'wallstreetcnScrapy.pipelines.RequiredFieldsPipeline': 250, 70 | 'wallstreetcnScrapy.pipelines.IdGeneratePipeline': 300, 71 | 'wallstreetcnScrapy.pipelines.ImageDownloadPipeline':350, 72 | 'wallstreetcnScrapy.pipelines.SolrPipeline': 400, 73 | 'wallstreetcnScrapy.pipelines.MysqlPipeline': 450, 74 | 75 | } 76 | SOLR_URL = 'http://localhost:8080/solr/#/wscore' 77 | SOLR_MAPPING = { 78 | 79 | 'id': 'id', 80 | 'uri': 'uri', 81 | 'firstimage': 'firstimage', 82 | 'title': 'title', 83 | 'title_py': 'title', 84 | 'author': 'author', 85 | 'authorString': 'author', 86 | 'author_py': 'author', 87 | 'time': 'time', 88 | 'description': 'description', 89 | 'content': 'content', 90 | } 91 | 92 | # Enable and configure the AutoThrottle extension (disabled by default) 93 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 94 | # NOTE: AutoThrottle will honour the standard settings for concurrency and delay 95 | #AUTOTHROTTLE_ENABLED=True 96 | # The initial download delay 97 | #AUTOTHROTTLE_START_DELAY=5 98 | # The maximum download delay to be set in case of high latencies 99 | #AUTOTHROTTLE_MAX_DELAY=60 100 | # Enable showing throttling stats for every response received: 101 | #AUTOTHROTTLE_DEBUG=False 102 | 103 | # Enable and configure HTTP caching (disabled by default) 104 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 105 | #HTTPCACHE_ENABLED=True 106 | #HTTPCACHE_EXPIRATION_SECS=0 107 | #HTTPCACHE_DIR='httpcache' 108 | #HTTPCACHE_IGNORE_HTTP_CODES=[] 109 | #HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage' 110 | 111 | #img 112 | # IMAGES_STORE = '\\tmp\\images' 113 | IMAGES_STORE = 'H://Administrator/Documents' 114 | 115 | # log 116 | LOG_ENABLED = True 117 | LOG_LEVEL = 'ERROR' 118 | LOG_ENCODING = 'utf-8' 119 | LOG_FILE = 'H://Administrator/Documents/Workspace/python/wallstreetcnScrapy/log/ws.log' 120 | 121 | # start MySQL database configure setting 122 | MYSQL_HOST = 'localhost' 123 | MYSQL_DBNAME = 'gpcore' 124 | MYSQL_USER = 'root' 125 | MYSQL_PASSWD = '123456' 126 | MYSQL_PORT = 3306 127 | # end of MySQL database configure setting 128 | 129 | USER_AGENTS = [ 130 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 131 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", 132 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 133 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", 134 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", 135 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", 136 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", 137 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", 138 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", 139 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", 140 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", 141 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", 142 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", 143 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 144 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 145 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", 146 | ] 147 | 148 | PROXIES = [ 149 | {'ip_port': '110.73.38.112:8123', 'user_pass': ''}, 150 | {'ip_port': '49.68.236.204:8118', 'user_pass': ''}, 151 | {'ip_port': '182.88.255.120:8123', 'user_pass': ''}, 152 | {'ip_port': '117.42.71.88:8118', 'user_pass': ''}, 153 | {'ip_port': '182.90.21.10:80', 'user_pass': ''}, 154 | {'ip_port': '182.90.15.7:80', 'user_pass': ''}, 155 | {'ip_port': '113.87.241.4:9999', 'user_pass': ''}, 156 | {'ip_port': '112.255.23.180:9999', 'user_pass': ''}, 157 | ] -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/spiders/.gitignore: -------------------------------------------------------------------------------- 1 | /CommentarySpider.pyc 2 | /__init__.pyc 3 | /jqkaCommentarySpider.pyc 4 | /sinaCommentarySpider.pyc 5 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/spiders/CommentarySpider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: Administrator 3 | ''' 4 | #coding=utf-8 5 | from scrapy.spiders.crawl import CrawlSpider, Rule 6 | from scrapy.utils.response import get_base_url 7 | from wallstreetcnScrapy.items import CommentaryItem 8 | from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor 9 | from bs4 import BeautifulSoup 10 | import re 11 | # from scrapy import log 12 | 13 | 14 | class CommentarySpider(CrawlSpider): 15 | name = 'CommentarySpider' 16 | allowed_domains = ['wallstreetcn.com'] 17 | start_urls = [ 18 | 'http://wallstreetcn.com/news?status=published&type=news&order=-created_at&limit=30&page=1', 19 | ] 20 | rules = [ 21 | Rule(LxmlLinkExtractor(allow=("page=\d+",))), 22 | Rule(LxmlLinkExtractor(allow=("node/\d+")),follow=True,callback='parse_commentary'), 23 | ] 24 | def parse_commentary(self, response): 25 | sel = response.selector 26 | item = CommentaryItem() 27 | #get uri 28 | item['uri'] = get_base_url(response) 29 | print 'Download from uri: %s' % item['uri'] 30 | # log.msg('Download from uri: %s' % item['uri']) 31 | #get title 32 | _ = sel.xpath('//h1[@class="article-title"]/text()') 33 | item['title'] = '' if not _ else _[0].extract() 34 | #get time 35 | _ = sel.xpath('//span[@class="item time"]/text()') 36 | _time = '' if not _ else _[0].extract() 37 | if not _time: 38 | item['time'] = None 39 | else: 40 | _time = re.sub('[^\u4E00-\u9FA5\s]', '-',_time) 41 | _time = _time[:10]+'T'+_time[12:]+'Z' 42 | item['time'] = _time 43 | #get author 44 | _ = sel.xpath('//span[@class="item author"]/a/text()') 45 | item['author'] = '' if not _ else _[0].extract() 46 | #get description 47 | _ = sel.xpath('//meta[@name="description"]/@content') 48 | item['description'] = '' if not _ else _[0].extract()[:-84] 49 | #get content & imgs & view 50 | _ = sel.xpath('//div[@class="article-content"]') 51 | _ = _.extract()[0] 52 | _view = _[:-123]+'' if len(_) > 200 else _ 53 | _content = BeautifulSoup(_view) 54 | item['content'] = _content.text 55 | _image_urls = [] 56 | for img in _content.find_all('img'): 57 | if img.has_key('src') and img['src'].startswith('http'): 58 | _image_urls.append(img['src']) 59 | elif img.has_key('alt') and img['alt'].startswith('http'): 60 | _image_urls.append(img['alt']) 61 | else: 62 | continue 63 | item['image_urls'] = _image_urls 64 | #item['image_urls'] = [img.src if img.src is not None and img.src.startswith('http') else img.alt if img.alt is not None else None for img in _content.find_all('img')] 65 | item['view'] = _view 66 | return item 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/spiders/jqkaCommentarySpider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: Administrator 3 | ''' 4 | #coding=utf-8 5 | from scrapy.spiders.crawl import CrawlSpider, Rule 6 | from scrapy.utils.response import get_base_url 7 | from wallstreetcnScrapy.items import CommentaryItem 8 | from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor 9 | from bs4 import BeautifulSoup 10 | # from scrapy import log 11 | 12 | 13 | class jqkaCommentarySpider(CrawlSpider): 14 | name = 'jqkaCommentarySpider' 15 | allowed_domains = ['news.10jqka.com.cn'] 16 | start_urls = [ 17 | 'http://news.10jqka.com.cn/', 18 | ] 19 | rules = [ 20 | Rule(LxmlLinkExtractor(allow=("\d+/\w+.shtml")),follow=True,callback='parse_commentary'), 21 | ] 22 | def parse_commentary(self, response): 23 | sel = response.selector 24 | item = CommentaryItem() 25 | #get uri 26 | item['uri'] = get_base_url(response) 27 | print 'Download from uri: %s' % item['uri'] 28 | # log.msg('Download from uri: %s' % item['uri']) 29 | #get title 30 | _ = sel.xpath('//div[@class="atc-head"]/h1/text()') 31 | item['title'] = '' if not _ else _[0].extract() 32 | #get time 33 | _ = sel.xpath('//span[@class="time"]/text()') 34 | _time = '' if not _ else _[0].extract() 35 | if not _time: 36 | item['time'] = None 37 | else: 38 | _time = _time[:10]+'T'+_time[12:]+'Z' 39 | item['time'] = _time 40 | #get author 41 | _ = sel.xpath('//span[@id="source_baidu"]/text()') 42 | _author = '' if not _ else BeautifulSoup(_[0].extract()).text 43 | try: 44 | item['author'] = _author[-1] 45 | except: 46 | item['author'] = 'NULL' 47 | #get description 48 | _ = sel.xpath('////meta[@name="description"]/@content') 49 | item['description'] = '' if not _ else _[0].extract() 50 | #get content & imgs & view 51 | _ = sel.xpath('//div[@class="atc-content"]') 52 | _ = _.extract()[0] 53 | i = _.rfind('') 54 | _view = _[:i]+r'' if i > -1 else _ 55 | _content = BeautifulSoup(_view) 56 | item['content'] = _content.text 57 | _image_urls = [] 58 | for img in _content.find_all('img'): 59 | if img.has_key('src') and img['src'].startswith('http'): 60 | _image_urls.append(img['src']) 61 | elif img.has_key('alt') and img['alt'].startswith('http'): 62 | _image_urls.append(img['alt']) 63 | else: 64 | continue 65 | item['image_urls'] = _image_urls 66 | #item['image_urls'] = [img.src if img.src is not None and img.src.startswith('http') else img.alt if img.alt is not None else None for img in _content.find_all('img')] 67 | item['view'] = _view 68 | return item 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /wallstreetcnScrapy/wallstreetcnScrapy/spiders/sinaCommentarySpider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | ''' 3 | @author: Administrator 4 | ''' 5 | from scrapy.spiders.crawl import CrawlSpider, Rule 6 | from scrapy.utils.response import get_base_url 7 | from wallstreetcnScrapy.items import CommentaryItem 8 | from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor 9 | from bs4 import BeautifulSoup 10 | # from scrapy import log 11 | 12 | 13 | class sinaCommentarySpider(CrawlSpider): 14 | name = 'sinaCommentarySpider' 15 | allowed_domains = ['finance.sina.com.cn'] 16 | start_urls = [ 17 | 'http://finance.sina.com.cn/', 18 | ] 19 | rules = [ 20 | Rule(LxmlLinkExtractor(allow=("doc-\w+.shtml")),follow=True,callback='parse_commentary'), 21 | ] 22 | def parse_commentary(self, response): 23 | sel = response.selector 24 | item = CommentaryItem() 25 | #get uri 26 | item['uri'] = get_base_url(response) 27 | print 'Download from uri: %s' % item['uri'] 28 | # log.msg('Download from uri: %s' % item['uri']) 29 | #get title 30 | _ = sel.xpath('//meta[@property="og:title"]/@content') 31 | item['title'] = '' if not _ else _[0].extract() 32 | #get time 33 | _ = sel.xpath('//meta[@property="article:published_time"]/@content') 34 | _time = '' if not _ else _[0].extract() 35 | if not _time: 36 | item['time'] = None 37 | else: 38 | _time = _time[:-6]+'Z' 39 | item['time'] = _time 40 | #get author 41 | _ = sel.xpath('//meta[@property="article:author"]/@content') 42 | item['author'] = '' if not _ else _[0].extract() 43 | #get description 44 | _ = sel.xpath('//meta[@name="description"]/@content') 45 | item['description'] = '' if not _ else _[0].extract() 46 | #get content & imgs & view 47 | _ = sel.xpath('//div[@class="article article_16"]') 48 | _ = _[0].extract() 49 | _content = BeautifulSoup(_) 50 | item['content'] = item['view'] = _content.text; 51 | _image_urls = [] 52 | for img in _content.find_all('img'): 53 | if img.has_key('src') and img['src'].startswith('http'): 54 | _image_urls.append(img['src']) 55 | elif img.has_key('alt') and img['alt'].startswith('http'): 56 | _image_urls.append(img['alt']) 57 | else: 58 | continue 59 | item['image_urls'] = _image_urls 60 | #item['image_urls'] = [img.src if img.src is not None and img.src.startswith('http') else img.alt if img.alt is not None else None for img in _content.find_all('img')] 61 | return item 62 | 63 | 64 | 65 | 66 | 67 | --------------------------------------------------------------------------------