├── README.md ├── community ├── community │ ├── __init__.py │ ├── __init__.pyc │ ├── items.py │ ├── items.pyc │ ├── logfile.log │ ├── middlewares.py │ ├── pipelines.py │ ├── pipelines.pyc │ ├── return_01.log │ ├── settings.py │ ├── settings.pyc │ └── spiders │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── communitySpider.py │ │ └── communitySpider.pyc └── scrapy.cfg └── tutorial ├── 4 ├── 5 ├── cron.sh ├── items.json ├── scrapy.cfg └── tutorial ├── __init__.py ├── __init__.pyc ├── add ├── input ├── items.py ├── items.pyc ├── middlewares.py ├── pipelines.py ├── settings.py ├── settings.pyc └── spiders ├── __init__.py ├── __init__.pyc ├── kldp_spider.py └── kldp_spider.pyc /README.md: -------------------------------------------------------------------------------- 1 | # Python-WebCrawling 2 | "파이썬을 이용한 웹 크롤링(Web Crawling) 어플리케이션 만들기" 강좌기반 파이썬 웹크롤러 프로젝트 3 | - - - 4 | ### 백엔드란 5 | 사용자의 요청을 받아서, __저장되어 있는 정보를 바탕으로 각 사용자에게 적합한 페이지__ 를 전송. 6 | 1. 웹 서버(Apache, IIS, nginx, GWS, etc.) 7 | * 사용자의 요청에 맞게 데이터(HTML, image, etc.)를 전송해주는 프로그램 8 | 2. 데이터 베이스(MySql, Oracle, MsSql, PostgreSQL, LightSql, MongoDB, etc.) 9 | * 사용자의 정보를 저장하는 저장소 10 | 3. 스크립트 엔진(php, jsp, asp) 11 | * 웹서버에서 사용자의 요청을 분석해주는 프로그램 12 | 4. 웹프레임워크(Django, Ruby onRails, etc) 13 | * 웹개발을 보다 편리하게 만들어 주는 도구 14 | * 웹서버, 데이터베이스 등 역할 일부 지원 15 |   * 쿼리작업까지도 웹프레임워크가 진행 16 | 17 | __웹서버, 스크립트엔진에 대한 이해 필요__ 18 | 19 | - - - 20 | 21 | ### Web-Crawler Overview 22 | WebServer(Apache2) | Database(MySql) 23 | Crawler(Beautiful Soup 4) | Django 1.8 | Python 2.7 24 | OS (Ubuntu 14.04) 25 | Cloud Service (Google Compute Engine) 26 | 27 | ### Python을 이용한 웹크롤링 28 | * 개발환경구축 29 | * BeautifulSoup 4.x(데이터를 가져오는데 특화된 라이브러리), Scrapy(크롤링에서 강력 / 크롤링전용 라이브러리) 30 | * 사이트 구조 분석 방법 31 | * 특정 사이트 데이터 취득 32 | 33 | - - - 34 | 35 | ## 개발환경 36 | * 운영체제 : Ubuntu 16.04.2 37 | * 언어 : Python 2.7 38 | * 사용 라이브러리 : BeautifulSoup, Scrapy 39 | * BeautifulSoup보다 Scrapy가 더 강력, 그래서 Scrapy로 진행될 것 40 | * 기타 : virtualenv, virtualenvwrapper, docker 41 | * 버전 맞춰주는 것도 일이여서 가상환경에 알맞은 패키지 버전 설치하면 깔끔하게 파이썬 쓸수 있음 42 | 43 | __크롤링은 주로 링크나 텍스트를 복사__
44 | __우분투를 쓰는 이유는, 사용자가 많아서 여러가지 반영이 쉽게 됨__ 45 | 46 | * Beautiful Soup 설치 전 필요한 패키지 있음 47 | * apt-get install libxml2-dev libxslt-dev python-dev zlib1g-dev 48 | * apt-get install python-lxml 49 | * pip install lxml (파이썬 환경에서의 lxml 설치) 50 | * 의존패키지 설치 완료 후, beautifulsoup 설치 51 | * pip install beautifulsoup4 52 | 53 | * Scrapy 설치 전 필요한 패키지 있음 54 | * apt-get install libffi-dev libssl-dev 55 | * 의존패키지 설치 완료 후, Scrapy 설치 56 | 57 | __가상환경은 프로젝트의 충돌을 방지__ 58 | 59 | - - - 60 | 61 | ## Beautiful Soup VS Scrapy 62 | * Beautiful Soup 63 | * html문서에서 원하는 정보를 손쉽게 가져올 수 있는 방법을 제공. 64 | * 자동으로 인코딩을 유니코드로 변환해서 UTF-8로 출력. 65 | * lxml, html5lib 파서를 이용함 66 | * http://www.crummpy.com/software/BeautifulSoup/bs4/doc/ 67 | 68 | * Scrapy 69 | * web scraper framework 70 | * 다양한 selector 지원 71 | * 파이프 라인(데이터 필터링) 72 | * 로깅(데이터 잘 들어오는지) 73 | * 이메일(데이터 들어왔을때 이메일 전송) 74 | * http://doc.scrapy.org/en/0.24/intro/tutorial.html 75 | 76 | 77 | * Beautiful Soup은 HTML의 문서를 가져와서 파싱을 해주는 파서의 역할이 강하며,
78 | 문서를 가져와서 네비게이션하는 기능이나 자동으로 인코딩을 유니코드로 변환해서 UTF-8로 출력. 79 | 80 | * Scrapy는 Web에서 데이터를 들고와서 하는 전체적 내용을 프레임워크 형태로 제작한 라이브러리.
81 | 프레임워크라 다양한 기능지원(파이프라인, 로깅, 이메일) 82 | 83 | * Beautiful Soup에도 파이프라인, 로깅, 이메일 기능이 있는데, 직접 구현해야함. 84 | 85 | 86 | * Beautiful Soup 레퍼런스 일부 발췌 87 |
 88 | from bs4 import BeautifulSoup
 89 | soup = BeautifulSoup(html_doc, 'html.parser')
 90 | 
 91 | soup.title
 92 | The Dormouse's story
 93 | 
 94 | soup.title.name
 95 | u`title`
 96 | 
 97 | soup.title.string
 98 | The Dormouse's story
 99 | 
100 | soup.title.parent.name
101 | u`head`
102 | 
103 | soup.a
104 | Elsie
105 | 
106 | soup.find_all('a') : 가장 많이 쓰이는 명령어.
107 | [Elsie,
108 |   Lacie,
109 |   Tillie]
110 | 
111 | soup.find(id="link3")
112 | Tillie
113 | 
114 | soup.find_all(href=re.compile("elsie"), id='link1')
115 | [three]
116 | 
117 | data_soup.find_all(attrs={"data-foo":"value"})
118 | [
foo!
] 119 |
120 | 121 | __태그 이름이나 css 속성, 정규식을 통해서도 데이터를 수집하는 것이 가능함.__ 122 | 123 | __Scrapy의 경우, 데이터를 들고올때 클래스형태로 만들 수 있습니다.__ 124 | * items.py 웹환경에서 title, 링크, 글을 쓴 저자를 가져오고자 할때, Item에서의 지정이 가능 125 | * pipelines.py 어떤 Scrapy를 통해 데이터를 들고와서 그 데이터에 대해 후처리를 하고 싶을때, 데이터 필터링이나 데이터베이스에 입력하고 싶을때 126 | * settings.py spider라고 부르는데, spider에 대한 설정들이 들어있음. 127 | * spiders 폴더 안에는 실제 불러오고싶은 내용에 대한 코드가 위치한다. 128 | 129 | - - - 130 | 131 | __scrapy crawl "스파이더명"__ 132 | __스파이더는 여러개 만들어서 동시 실행이 가능.__ 133 | 134 | __크론탭을 이용해서 크롤링 관련 배치작업 설정할 수 있음.__ 135 | 136 | 137 | - - - 138 | 139 | # Scrapy를 이용해서 웹데이터를 크롤링하는 방법 140 | Scrapy에 대한 전반적인 이야기, 사이트에서 실제 데이터를 가져오는 것까지 141 | 142 | 143 | ## 목차 144 | 1. 웹 크롤링 이슈 145 | * 웹데이터 저작권 146 | * 사이트의 크롤링 정책 147 | 2. scrapy 구조 148 | 3. scrapy 149 | * spiders 150 | * selector 151 | * pipline 152 | * logging 153 | 4. 웹사이트 크롤링 실정 154 | * clien.net 155 | * bobaedraem.co.kr 156 | 157 | 158 | ## 저작권 159 | * 저작권법 허용 160 | * 단순 링크 - 사이트 대표 주소를 링크 161 | * 직접 링크 - 특정 게시물을 링크 162 | * 저작권법 위반 163 | * 프레임 링크 - 저작물의 일부를 홈페이지에 표시 164 | * 임베드 링크 - 저작물 전체를 홈페이지에 표시 165 | 166 | 167 | ## 로봇 배제 표준(robots.txt) 168 | * 웹사이트에 로봇이 접근하는 것을 방지하기 위한 규약 169 | * 예제 170 | * 모두 허용 171 | > User-agent: * 172 | > Allow: / 173 | * 모두 차단 174 | > User-agent: * 175 | > Disallow: / 176 | * 다양한 조합 177 | > User-agent: googlebot (googlebot 로봇만 적용) 178 | > Disallow: /private/ (이 디렉토리를 접근 차단) 179 | > User-agent: googlebot-news (googlebot-news 로봇만 적용) 180 | > Disallow: / (모든 디렉토리를 접근 차단한다) 181 | > User-agent: * (모든 로봇 적용) 182 | > Disallow: /something/ (이 디렉토리를 접근 차단) 183 | * 실제 사이트의 robots.txt: 뽐뿌, 클리앙, SLR 클럽 184 | * 중고시장 관련된 대부분의 사이트는 로그인한 사용자만 접근허용 가능하니, 185 | * 왠만해서 그런 크롤링안하는 것이 좋음 186 |   * 크롤링할지 말지에 대한 선택은 robots.txt 파일 내용 확인 187 | 188 | 크롤링은 적절히 delay를 주면 해당 사이트에 큰 무리를 주지 않을 수 있음 189 | 190 | #### 뽐뿌 robots.txt 예제(크롤링할때 딜레이를 1초 주는것을 권고) 191 |
192 | User-agent: *
193 | Crawl-delay: 1
194 | Disallow: /include/
195 | Disallow: /zboard/view.php?id=market
196 | Disallow: /zboard/view.php?id=market_phone
197 | Disallow: /zboard/view.php?id=market_social
198 | Disallow: /zboard/view.php?id=cmarket
199 | Disallow: /zboard/view.php?id=onmarket
200 | Disallow: /zboard/view.php?id=market_story
201 | Disallow: /zboard/view.php?id=gonggu
202 | Disallow: /zboard/view.php?id=my
203 | Disallow: /search_bbs.php
204 | Disallow: /zboard/view_info2.php
205 | Disallow: /bookmark/
206 | Disallow: /chat
207 | 
208 | 209 | #### slrclub robots.txt 예제(User-Agent 전체 사이트 접근을 막고 있음) 210 |
211 | User-agent: Googlebot
212 | Disallow:
213 | User-agent: Googlebot*
214 | Disallow:
215 | User-agent: Mediapartners-Google*
216 | Disallow: 
217 | User-agent: ZumBot
218 | Disallow: 
219 | User-agent: Yeti
220 | Disallow: 
221 | User-agent: daumoa
222 | Disallow: 
223 | User-agent: KaBot
224 | Disallow: 
225 | User-Agent:*
226 | Disallow:/ 
227 | 
228 | 229 | - - - 230 | 231 | ## Scrapy 구조 232 | * Scrapy 실행 명령 233 | * scrapy startproject "project name" 234 | 235 | * Scrapy 동작 236 | * items 정의 237 | * 스타트 url 지정(start_requests, start_urls), callback 함수 지정(parse()) 238 | * start_urls라는 url 리스트를 생성하는 방법 (스트링 리스트) 239 | * start_requests를 정의하는 방법 240 | * callback 함수 정의 241 | * selector(xpath, css)를 이용하여 데이터 선택 242 | * Pipeline을 이용하여 데이터를 필터링하거나 데이터베이스에 저장 243 | 244 | spiders 폴더 내부에 실제 크롤링하는 로직이 위치하게 됨. 245 | 크롤링 대상 게시물들에 대한 게시물, 저작자, 제목, url 등을 items에 저장. 246 | items에 저장된 데이터 기반으로 pipe라인에서 DB에 넣을지, 247 | 특별한 규칙에 의해 게시물을 필터링할것인지를 결정함 248 | 249 | settings.py 파일의 경우, pipelines의 순서를 결정하거나, 250 | 로그 파일을 지정하고 로그파일 레벨도 지정 가능. 251 | 252 | scrapy.cfg는 전체 프로젝트 배포시 관련 설정들에 대한 나열. 253 | 254 | 255 | ### Spiders 256 | * 크롤러의 이름 지정 257 | * name 258 | * 스타트 url 지정 259 | * start_urls 260 | * 시작 주소를 리스트 형태로 추가 가능 261 | * start_requests 262 | * 콜백함수를 지정할 수 있음 263 | * 사이트에 로그인할때 사용 264 | * 파서 정의 265 | * def parse(self, response): 266 | 267 | ### Selector 268 | * HTML 문서에 특정 노드를 선택하도록 지원하는 함수(쉽게) 269 | * css vs xpath selector 270 | 271 | __특정 문자열 가져오기__ 272 |
273 | $ response.xpath('//title/text()')
274 | []
275 | $ response.css('title::text')
276 | []
277 | 
278 | $ response.xpath('//base/@href').extract()
279 | [u'http://example.com/']
280 | 
281 | $ response.css('base::attr(href)').extract()
282 | [u'http://example.com/']
283 | 
284 | $ response.xpath('//a[contains(@href, "image")]/@href').extract()
285 | [u'image1.html',
286 |  u'image2.html',
287 |  u'image3.html'.
288 |  u'image4.html',
289 |  u'image5.html']
290 | 
291 | $ response.css('a[href*=image]::attr(href)').extract()
292 | [u'image1.html',
293 |  u'image2.html',
294 |  u'image3.html',
295 |  u'image4.html',
296 |  u'image5.html']
297 | 
298 | $ response.xpath('//a[contains(@href, "image")]/imge/@src').extract()
299 | [u'image1_thumb.jpg',
300 |  u'image2_thumb.jpg',
301 |  u'image3_thumb.jpg',
302 |  u'image4_thumb.jpg',
303 |  u'image5_thumb.jpg']
304 | 
305 | $ response.css('a[href*=image] img::attr(src)').extract()
306 | [u'image1_thumb.jpg',
307 |  u'image2_thumb.jpg',
308 |  u'image3_thumb.jpg',
309 |  u'image4_thumb.jpg',
310 |  u'image5_thumb.jpg']
311 | 
312 | 313 | 314 | ### Pipeline 315 | * 데이터를 크롤링한 이후에 특정 행동을 수행(크게 4가지 특성) 316 | * 데이터의 유효성 검사 317 | * 중복 체크 318 | * 데이터베이스에 아이템 저장 319 | * 필터링 320 | * settings.py 321 | * 파이프 클래스 및 순서를 지정 322 |
323 | ITEM_PIPLINES = {
324 | 	// '클래스명':우선순위(낮은게 먼저 실행됨)
325 | 	'oneq.pipelines.CommunityPipeline':300,
326 | }
327 | 
328 | 329 | 330 | ### Logging 331 | * Settings.py 332 | * LOG_FILE='logfile.log' 333 | * LOG_LEVEL=logging.DEBUG 334 | * Log Level 335 | 1. logging.CRITICAL - for critical errors(highest severity) 336 | 2. logging.ERROR - for regular errors 337 | 3. logging.WARNING - for warning messages 338 | 4. logging.INFO - for informational messages 339 | 5. logging.DEBUG - for debugging messages(lowest severity) 340 | 341 | 342 | -------------------------------------------------------------------------------- /community/community/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/community/community/__init__.py -------------------------------------------------------------------------------- /community/community/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/community/community/__init__.pyc -------------------------------------------------------------------------------- /community/community/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class CommunityItem(scrapy.Item): # 최초 클래스 생성시 아이템을 입력하여 만들면 됨 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | source = scrapy.Field() 15 | category = scrapy.Field() # 어디 게시판에서 가져오는지 표시 16 | title = scrapy.Field() 17 | url = scrapy.Field() 18 | hits = scrapy.Field() 19 | date = scrapy.Field() 20 | pass 21 | -------------------------------------------------------------------------------- /community/community/items.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/community/community/items.pyc -------------------------------------------------------------------------------- /community/community/logfile.log: -------------------------------------------------------------------------------- 1 | 2017-05-10 20:55:27 [scrapy.utils.log] INFO: Scrapy 1.3.3 started (bot: community) 2 | 2017-05-10 20:55:27 [scrapy.utils.log] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'community.spiders', 'ROBOTSTXT_OBEY': True, 'SPIDER_MODULES': ['community.spiders'], 'LOG_FILE': 'logfile.log', 'BOT_NAME': 'community'} 3 | 2017-05-10 20:55:27 [scrapy.middleware] INFO: Enabled extensions: 4 | ['scrapy.extensions.logstats.LogStats', 5 | 'scrapy.extensions.telnet.TelnetConsole', 6 | 'scrapy.extensions.corestats.CoreStats'] 7 | 2017-05-10 20:55:27 [scrapy.middleware] INFO: Enabled downloader middlewares: 8 | ['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware', 9 | 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', 10 | 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', 11 | 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware', 12 | 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', 13 | 'scrapy.downloadermiddlewares.retry.RetryMiddleware', 14 | 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware', 15 | 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware', 16 | 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware', 17 | 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware', 18 | 'scrapy.downloadermiddlewares.stats.DownloaderStats'] 19 | 2017-05-10 20:55:27 [scrapy.middleware] INFO: Enabled spider middlewares: 20 | ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware', 21 | 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware', 22 | 'scrapy.spidermiddlewares.referer.RefererMiddleware', 23 | 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware', 24 | 'scrapy.spidermiddlewares.depth.DepthMiddleware'] 25 | 2017-05-10 20:55:27 [scrapy.middleware] INFO: Enabled item pipelines: 26 | ['community.pipelines.CommunityPipeline'] 27 | 2017-05-10 20:55:27 [scrapy.core.engine] INFO: Spider opened 28 | 2017-05-10 20:55:27 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 29 | 2017-05-10 20:55:27 [scrapy.extensions.telnet] DEBUG: Telnet console listening on 127.0.0.1:6023 30 | 2017-05-10 20:55:27 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (302) to from 31 | 2017-05-10 20:55:27 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 32 | 2017-05-10 20:55:27 [scrapy.core.engine] DEBUG: Crawled (404) (referer: None) 33 | 2017-05-10 20:55:27 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (302) to from 34 | 2017-05-10 20:55:27 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (301) to from 35 | 2017-05-10 20:55:27 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (302) to from 36 | 2017-05-10 20:55:27 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 37 | 2017-05-10 20:55:27 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 38 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 39 | {'category': 'free', 40 | 'date': '2017-05-11 06:41:01', 41 | 'hits': u'0', 42 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 43 | 'title': u'\u4e2d\ub9e4\uccb4 "\ubb38\uc7ac\uc778 \uc9c4\uc9c0\ud558\uac8c \uc0c1\ub300\ud560 \uc778\ubb3c\u2026\u97d3\ud2b9\uc0ac\ub2e8 \ubc29\uc911 \uc608\uc0c1"', 44 | 'url': u'http://www.clien.net/service/board/park/10730553'} 45 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 46 | {'category': 'free', 47 | 'date': '2017-05-11 06:40:46', 48 | 'hits': u'0', 49 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 50 | 'title': u'\ubb38\uc81c\ub9ce\uc740 \ubb38\uc7ac\uc778~', 51 | 'url': u'http://www.clien.net/service/board/park/10730552'} 52 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 53 | {'category': 'free', 54 | 'date': '2017-05-11 06:38:38', 55 | 'hits': u'0', 56 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 57 | 'title': u'\ud074\ub7c9 \uc548\ub4dc\uc571 \ucd9c\uc2dc \ub418\uc5c8\ub124\uc694.', 58 | 'url': u'http://www.clien.net/service/board/park/10730551'} 59 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 60 | {'category': 'free', 61 | 'date': '2017-05-11 06:37:44', 62 | 'hits': u'0', 63 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 64 | 'title': u'\ud45c\ud604\uc758 \uc790\uc720\ub97c \uc911\uc694\uc2dc \uc5ec\uae30\ub294 \ub354\ubbfc\uc8fc\uac00 \uacfc\uc5f0 \uc77c\ubca0\ub97c \uc5b4\ub5bb\uac8c \ub300\ucc98\ud560\uae4c..\uac71\uc815\ub418\ub124\uc694', 65 | 'url': u'http://www.clien.net/service/board/park/10730549'} 66 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 67 | {'category': 'free', 68 | 'date': '2017-05-11 06:33:44', 69 | 'hits': u'0', 70 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 71 | 'title': u'\uc774\uc81c \uc5f0\ud569\ub274\uc2a4 \uc18d\ubcf4\uac00 \uae30\ub2e4\ub824 \uc9c0\ub294\uad70\uc694', 72 | 'url': u'http://www.clien.net/service/board/park/10730544'} 73 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 74 | {'category': 'free', 75 | 'date': '2017-05-11 06:32:21', 76 | 'hits': u'0', 77 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 78 | 'title': u'18\ub300 \ub300\ud1b5\ub839 \ucde8\uc784\uc2dd \ub54c \ub2f9\uc2dc \ubb38\uc7ac\uc778 \uc758\uc6d0 \ubd88\ucc38\ud588\ub124\uc694', 79 | 'url': u'http://www.clien.net/service/board/park/10730543'} 80 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 81 | {'category': 'free', 82 | 'date': '2017-05-11 06:28:45', 83 | 'hits': u'0', 84 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 85 | 'title': u'MBC \uc815\uc0c1\ud654 \uc218\uc21c\uc740 \uc5b4\ub5a4 \uc808\ucc28\ub85c \uc9c4\ud589\ub420\uae4c\uc694', 86 | 'url': u'http://www.clien.net/service/board/park/10730542'} 87 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 88 | {'category': 'free', 89 | 'date': '2017-05-11 06:23:13', 90 | 'hits': u'0', 91 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 92 | 'title': u'avira \uc774\uc0c1\ud55c \ud31d\uc5c5\uc774 \ub5a0\uc11c \uc9c0\uc6b0\ub824\uad6c\uc694 -_-', 93 | 'url': u'http://www.clien.net/service/board/park/10730540'} 94 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 95 | {'category': 'free', 96 | 'date': '2017-05-11 06:17:36', 97 | 'hits': u'0', 98 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 99 | 'title': u'\uc81c\uac00 \ub9d0\uc774\uc8e0 \ubb38\uc7ac\uc778\ub300\ud1b5\ub839\ubcf4\ub2e4 \ub354 \uc88b\uc740\uac8c \uc788\ub354\uad70\uc694', 100 | 'url': u'http://www.clien.net/service/board/park/10730538'} 101 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 102 | {'category': 'free', 103 | 'date': '2017-05-11 06:16:21', 104 | 'hits': u'0', 105 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 106 | 'title': u'\uadf8\ub098\uc800\ub098 \uc740\uc218\ubbf8 \uc758\uc6d0\ub2d8 \ubbf8\ucfe0\ub0e5 \ucf54\uc2a4\ud504\ub808\ub294 \uc5b4\ub5bb\uac8c \ub418\ub294\uac74\uac00\uc694 ?', 107 | 'url': u'http://www.clien.net/service/board/park/10730537'} 108 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 109 | {'category': 'free', 110 | 'date': '2017-05-11 06:13:05', 111 | 'hits': u'0', 112 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 113 | 'title': u'\uc870\uc120\uc77c\ubcf4 \uae30\uc0ac\uc758 \ub313\uae00\uc744 \ubcf4\uba74...', 114 | 'url': u'http://www.clien.net/service/board/park/10730536'} 115 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 116 | {'category': 'free', 117 | 'date': '2017-05-11 06:12:50', 118 | 'hits': u'0', 119 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 120 | 'title': u'\uc5b4\uca4c\ub2e4\ubcf4\ub2c8 \ub124\uc774\ubc84 \uae30\uc0ac \ubca0\ub313\uc774 \ub42c\ub124\uc694;;', 121 | 'url': u'http://www.clien.net/service/board/park/10730535'} 122 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 123 | {'category': 'free', 124 | 'date': '2017-05-11 06:09:28', 125 | 'hits': u'0', 126 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 127 | 'title': u'\uad50\uc721\ubd80 \uc678\uad50\ubd80 \uad6d\ubc29\ubd80\ub294 \ud0c8\ud0c8 \ud138\uc5b4\uc57c\uc8e0', 128 | 'url': u'http://www.clien.net/service/board/park/10730534'} 129 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 130 | {'category': 'free', 131 | 'date': '2017-05-11 05:57:17', 132 | 'hits': u'0', 133 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 134 | 'title': u'\ucde8\uc784\uc2dd\uc5d0 \ud6c4\ubcf4\uac00 \ucc38\uc11d \uc548\ud588\ub2e4\uace0 \uc778\uc131\uc774 \uc5b4\uca4c\uace0 \ud558\ub294\uac74', 135 | 'url': u'http://www.clien.net/service/board/park/10730533'} 136 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 137 | {'category': 'free', 138 | 'date': '2017-05-10 06:36:00', 139 | 'hits': 6, 140 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 141 | 'title': u'\ub3c4\ucc29', 142 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261472&bm=1'} 143 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 144 | {'category': 'free', 145 | 'date': '2017-05-10 06:06:00', 146 | 'hits': 49, 147 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 148 | 'title': u'\uacbd\ud488\uc744 \uc911\uace0\uc81c\ud488\uc73c\ub85c \uc8fc\ub294\ub370 \uc5b4\ub5bb\uac8c \uc0dd\uac01\ud558\uc2dc\ub098\uc694', 149 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261471&bm=1'} 150 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 151 | {'category': 'free', 152 | 'date': '2017-05-11 05:54:09', 153 | 'hits': u'0', 154 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 155 | 'title': u'\uad6d\ubbfc\uc758\ub2f9 "\u5b89\ud6c4\ubcf4 \uc0ac\ud1f4\ud558\ub77c\ub294 \uc1a1\uc601\uae38, \uc878\ubd80 \uac11\uc9c8"', 156 | 'url': u'http://www.clien.net/service/board/park/10730532'} 157 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 158 | {'category': 'free', 159 | 'date': '2017-05-11 05:47:44', 160 | 'hits': u'2', 161 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 162 | 'title': u'\uc2ec\uc0c1\uc815\uc740 \ub2f9\uc0ac\uc5d0\uc11c \ud2f0\ube44\ub85c \ucde8\uc784\uc2dd \ubcf8\uac74\uac00\uc694?', 163 | 'url': u'http://www.clien.net/service/board/park/10730530'} 164 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 165 | {'category': 'free', 166 | 'date': '2017-05-11 05:42:25', 167 | 'hits': u'0', 168 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 169 | 'title': u'\ub178\ubb34\ud604 \ub300\ud1b5\ub839\uc774 \ud1b5\uc77c\ubd80\uc7a5\uad00\uc73c\ub85c \uc601\uc785\ud558\ub824\uace0 \ud588\ub358 \uc778\ubb3c\uc774 \ub204\uad70\uc9c0 \uc544\uc138\uc694?', 170 | 'url': u'http://www.clien.net/service/board/park/10730529'} 171 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 172 | {'category': 'free', 173 | 'date': '2017-05-11 05:40:11', 174 | 'hits': u'1', 175 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 176 | 'title': u'\uc774\uba85\ubc15\uadfc\ud61c \ucd5c\uace0\ub85c \ubd80\ub044\ub7fd\ub358 \uc9e4 \ub458...', 177 | 'url': u'http://www.clien.net/service/board/park/10730528'} 178 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 179 | {'category': 'free', 180 | 'date': '2017-05-11 05:33:04', 181 | 'hits': u'1', 182 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 183 | 'title': u'\uc758\ub839\uad70 \u314a\u3148.gif', 184 | 'url': u'http://www.clien.net/service/board/park/10730527'} 185 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 186 | {'category': 'free', 187 | 'date': '2017-05-11 05:16:34', 188 | 'hits': u'0', 189 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 190 | 'title': u'\uc640\uc6b0~\uc720\uc2b9\ubbfc \ub2e4\uc2dc\ubcf4\uac8c \ub418\ub124\uc694...', 191 | 'url': u'http://www.clien.net/service/board/park/10730524'} 192 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 193 | {'category': 'free', 194 | 'date': '2017-05-11 05:13:27', 195 | 'hits': u'1', 196 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 197 | 'title': u'\uc544\uc9c1\ub3c4 "\ubb38\uc7ac\uc778 \ub300\ud1b5\ub839"\uc774\ub77c\uace0 \ub4e4\uc73c\uba74 \uac00\uc2b4\uc774 \ucc21\ud569\ub2c8\ub2e4.', 198 | 'url': u'http://www.clien.net/service/board/park/10730523'} 199 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 200 | {'category': 'free', 201 | 'date': '2017-05-11 05:09:14', 202 | 'hits': u'0', 203 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 204 | 'title': u'\ud074\ub9ac\uc559 \ubaa9\ub85d\uc73c\ub85c \uac00\ub294 \ubc84\ud2bc\uc740 \ud398\uc774\uc9c0 \uc5b4\ub514 \uc788\uc5c8\ub4e0 \uccab\ud398\uc774\uc9c0 \ubaa9\ub85d\uc73c\ub85c \uc810\ud504\ud558\ub124\uc694.\u3161,,\u3161', 205 | 'url': u'http://www.clien.net/service/board/park/10730522'} 206 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 207 | {'category': 'free', 208 | 'date': '2017-05-11 04:58:47', 209 | 'hits': u'3', 210 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 211 | 'title': u'\ub300\ud55c\ubbfc\uad6d\uc758 \ud5a5\ud6c4 \uc815\uce58 \uc804\ub9dd\uc740 \uc218\uaf34\uc138\ub825\uc774 \ubb34\uae30\ub825\ud568\uc744 \ub290\ub07c\uac8c \ub420\uac70\ub77c \ubd05\ub2c8\ub2e4.', 212 | 'url': u'http://www.clien.net/service/board/park/10730521'} 213 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 214 | {'category': 'free', 215 | 'date': '2017-05-11 04:53:50', 216 | 'hits': u'1', 217 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 218 | 'title': u'\uc778\ud130\ub137\ubc29\uc1a1\uc740 \ubd10\ub3c4 \ubd10\ub3c4 \uc2e0\uae30\ud558\ub124\uc694', 219 | 'url': u'http://www.clien.net/service/board/park/10730519'} 220 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 221 | {'category': 'free', 222 | 'date': '2017-05-11 04:53:41', 223 | 'hits': u'0', 224 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 225 | 'title': u'\ud2b8\ub7fc\ud504 \ud2b9\uac80 \uc8fc\uc7a5\ud558\ub294 \uacf5\ud654\ub2f9 \uc758\uc6d0\ub4e4\uc774 \ub298\uc5b4\ub098\uae30 \uc2dc\uc791\ud588\ub2e4.', 226 | 'url': u'http://www.clien.net/service/board/park/10730518'} 227 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 228 | {'category': 'free', 229 | 'date': '2017-05-10 04:18:00', 230 | 'hits': 204, 231 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 232 | 'title': u'\uc7ac\ubc0b\ub294\uac70\ubc1c\uacac\ud588\ub124\uc694', 233 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261470&bm=1'} 234 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 235 | {'category': 'free', 236 | 'date': '2017-05-10 03:49:00', 237 | 'hits': 126, 238 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 239 | 'title': u'\uc77c\ubca0\ub4e4 \uc6ec\ub9cc\ud558\uba74 \uc774\ud574\ud558\ub824\uace0\ud558\ub294 \ud55c\uc0ac\ub78c\uc73c\ub85c\uc11c.', 240 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261469&bm=1'} 241 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 242 | {'category': 'free', 243 | 'date': '2017-05-10 03:25:00', 244 | 'hits': 177, 245 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 246 | 'title': u'\ubb38\uc7ac\uc778\ub300\ud1b5\ub839 \uc695\ud55c\uac70 \uace0\uc18c\uc548\ub418\ub098\uc694?', 247 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261468&bm=1'} 248 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 249 | {'category': 'free', 250 | 'date': '2017-05-10 02:57:00', 251 | 'hits': 65, 252 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 253 | 'title': u'\uc218\ucd9c \uc911\uace0\ucc28 \ubb38\uc758\uad00\ub828', 254 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261467&bm=1'} 255 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 256 | {'category': 'free', 257 | 'date': '2017-05-10 02:38:00', 258 | 'hits': 110, 259 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 260 | 'title': u'\ucda9\uccad\ub3c4 \uac1c\uadf8', 261 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261466&bm=1'} 262 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 263 | {'category': 'free', 264 | 'date': '2017-05-10 02:18:00', 265 | 'hits': 147, 266 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 267 | 'title': u'\ub274 \ub808\uc778\uc9c0\ub85c\ubc84 \ubcf4\uad6c \uad6c\ud569\ub2c8\ub2e4(\uac1c\uc778 \uc9c1\uac70\ub798 \uc6d0\ud569\ub2c8\ub2e4^^~)', 268 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261465&bm=1'} 269 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 270 | {'category': 'free', 271 | 'date': '2017-05-10 02:15:00', 272 | 'hits': 133, 273 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 274 | 'title': u'\uae30\ub150\uc73c\ub85c \uadf8\ub824\ubcf4\uc558\uc2b5\ub2c8\ub2e4', 275 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261464&bm=1'} 276 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 277 | {'category': 'free', 278 | 'date': '2017-05-10 01:53:00', 279 | 'hits': 274, 280 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 281 | 'title': u'19\uc0b4 \ucd9c\ud1f4\uadfc\uc911\uace0\ucc28\ucd94\ucc9c\uc880\uc694', 282 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261463&bm=1'} 283 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 284 | {'category': 'free', 285 | 'date': '2017-05-10 01:48:00', 286 | 'hits': 209, 287 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 288 | 'title': u'\ucef4\ub9f9 \uc870\ub9bd\uc2dd\ucef4\ud130 \uacac\uc801 \uc0ac\uae30\uc778\uac00\uc694\u3160\u3160', 289 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261462&bm=1'} 290 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 291 | {'category': 'free', 292 | 'date': '2017-05-10 01:47:00', 293 | 'hits': 360, 294 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 295 | 'title': u'\ubd80\uc0b0 \ubc94\ucc9c\ub3d9\uc5d0\uc11c C63AMG \ucc28\ub7c9\uc744 \ub3c4\ub09c\ub2f9\ud588\uc2b5\ub2c8\ub2e4.', 296 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261461&bm=1'} 297 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 298 | {'category': 'free', 299 | 'date': '2017-05-11 04:52:20', 300 | 'hits': u'1', 301 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 302 | 'title': u'\ubb34\uac70\uc6b4 \uba38\ub9ac\uce74\ub77d.gif', 303 | 'url': u'http://www.clien.net/service/board/park/10730516'} 304 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 305 | {'category': 'free', 306 | 'date': '2017-05-11 04:50:27', 307 | 'hits': u'1', 308 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 309 | 'title': u'\ud1a0\uc6b0 \ubbf8\uc0ac\uc77c \ud53c\ud558\ub294 \ubc29\ubc95.gif', 310 | 'url': u'http://www.clien.net/service/board/park/10730515'} 311 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 312 | {'category': 'free', 313 | 'date': '2017-05-11 04:47:50', 314 | 'hits': u'0', 315 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 316 | 'title': u'\uc624\ubc14\ub9c8 \uc804 \ub300\ud1b5\ub839 \uc704\uc548\ubd80 \uad00\ub828 \uc798\ubabb \uc54c\uace0 \uacc4\uc2dc\ub294\ub370', 317 | 'url': u'http://www.clien.net/service/board/park/10730514'} 318 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 319 | {'category': 'free', 320 | 'date': '2017-05-11 04:44:17', 321 | 'hits': u'0', 322 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 323 | 'title': u'\ub300\ub959 \uc0ac\uadf9 \uadfc\ud669.gif', 324 | 'url': u'http://www.clien.net/service/board/park/10730513'} 325 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 326 | {'category': 'free', 327 | 'date': '2017-05-11 04:42:10', 328 | 'hits': u'0', 329 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 330 | 'title': u'\uc5b4\uc81c \ub4e4\uc5c8\ub358 \ub9d0\uc911 \uc81c\uc77c \ud669\ub2f9\ud55c \ub9d0.txt', 331 | 'url': u'http://www.clien.net/service/board/park/10730512'} 332 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 333 | {'category': 'free', 334 | 'date': '2017-05-10 01:33:00', 335 | 'hits': 239, 336 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 337 | 'title': u'\uc5ec\uae30\uac00 \ubcf4\ubc30\ub4dc\ub9bc\uc778\uac00\uc694', 338 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261460&bm=1'} 339 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 340 | {'category': 'free', 341 | 'date': '2017-05-10 01:01:00', 342 | 'hits': 69, 343 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 344 | 'title': u'GPS', 345 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261459&bm=1'} 346 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 347 | {'category': 'free', 348 | 'date': '2017-05-10 00:59:00', 349 | 'hits': 198, 350 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 351 | 'title': u'\ubc95\ucabd\uc5d0 \uc788\uc73c\uc2ec \ud615\ub2d8\ub4e4..\ub54c\uc778\ub3c8\uc5d0 \uad00\ud55c \uc870\uc5b8\uc880 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.', 352 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261458&bm=1'} 353 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 354 | {'category': 'free', 355 | 'date': '2017-05-10 00:51:00', 356 | 'hits': 390, 357 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 358 | 'title': u'\uc7ac\uc778\uc774\ud615\uc774 \uacbd\uc720\uac12 \uc62c\ub9b0\ub2e4\ub294\ub370', 359 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261457&bm=1'} 360 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 361 | {'category': 'free', 362 | 'date': '2017-05-10 00:29:00', 363 | 'hits': 281, 364 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 365 | 'title': u'\uc0ac\uace0\ub09c \uc904 \ubaa8\ub974\uace0 \uadf8\ub0e5 \uc654\ub294\ub370 \ube14\ubc15 \ubcf4\ub2c8... \ub4a4\ub85c \ube90\ub354\ub77c\uad6c\uc694;', 366 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261456&bm=1'} 367 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 368 | {'category': 'free', 369 | 'date': '2017-05-10 00:29:00', 370 | 'hits': 288, 371 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 372 | 'title': u'\uc774\uac8c\ubb50\ub0d0\u314b', 373 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261455&bm=1'} 374 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 375 | {'category': 'free', 376 | 'date': '2017-05-10 00:19:00', 377 | 'hits': 267, 378 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 379 | 'title': u'\ubcf4\ubc30\ud615\ub2d8\ub4e4 \uc791\uc740 \uc1fc\ud551\ubab0 \ucc3d\uc5c5\ud574\ubcf4\ub824\ud569\ub2c8\ub2e4.. \uc870\uc5b8\uc880 \ubd80\ud0c1\ub4dc\ub824\uc694', 380 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261454&bm=1'} 381 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 382 | {'category': 'free', 383 | 'date': '2017-05-10 00:15:00', 384 | 'hits': 234, 385 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 386 | 'title': u'\uc6d4\uae09\ub0a0\uc774\ub124\uc694', 387 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261453&bm=1'} 388 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 389 | {'category': 'free', 390 | 'date': '2017-05-10 00:13:00', 391 | 'hits': 65, 392 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 393 | 'title': u'\uc0ac\ub098\uc774\ub294', 394 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261452&bm=1'} 395 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 396 | {'category': 'free', 397 | 'date': '2017-05-10 00:08:00', 398 | 'hits': 454, 399 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 400 | 'title': u'\uba38\ud50c\ub7ec \ubd88\ubc95\uac1c\uc870 \uc2e0\uace0\ud588\uc2b5\ub2c8\ub2e4 \uadf8\ub7f0\ub370', 401 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261451&bm=1'} 402 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 403 | {'category': 'free', 404 | 'date': '2017-05-10 00:04:00', 405 | 'hits': 142, 406 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 407 | 'title': u'\uad7f\ub098\uc787', 408 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261450&bm=1'} 409 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 410 | {'category': 'free', 411 | 'date': '2017-05-10 00:03:00', 412 | 'hits': 241, 413 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 414 | 'title': u'\u3146\u3142 \uc815\uc740\uc544. \ud558\ub098\ub9cc \ubb3b\uc790.', 415 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261449&bm=1'} 416 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 417 | {'category': 'free', 418 | 'date': '2017-05-10 00:00:00', 419 | 'hits': 158, 420 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 421 | 'title': u'\uc8c4\uc1a1\ud569\ub2c8\ub2e4...', 422 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261448&bm=1'} 423 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 424 | {'category': 'free', 425 | 'date': '2017-05-10 00:00:00', 426 | 'hits': 838, 427 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 428 | 'title': u'\ud754\ud55c \uc4f0\ub9ac \uc194\ub85c\uc758 \ub2e8\ud1a1\ubc29,,,', 429 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261447&bm=1'} 430 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 431 | {'category': 'free', 432 | 'date': '2017-05-10 00:00:00', 433 | 'hits': 82, 434 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 435 | 'title': u'\uad7f\ub098\uc787!', 436 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261446&bm=1'} 437 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 438 | {'category': 'free', 439 | 'date': '2017-05-10 00:00:00', 440 | 'hits': 468, 441 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 442 | 'title': u'\uc120\ubb3c \ubc1b\uc740\uac78 \uc720\uac8c\uc5d0 \uc62c\ub838\ub124\uc694.', 443 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261445&bm=1'} 444 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 445 | {'category': 'free', 446 | 'date': '2017-05-10 00:00:00', 447 | 'hits': 171, 448 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 449 | 'title': u'\uc544..', 450 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261444&bm=1'} 451 | 2017-05-10 20:55:27 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 452 | {'category': 'free', 453 | 'date': '2017-05-10 00:00:00', 454 | 'hits': 172, 455 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 456 | 'title': u'\uc2e0\ucc28 \uc8fc\ud589\uac70\ub9ac \ubb38\uc758', 457 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261443&bm=1'} 458 | 2017-05-10 20:55:27 [scrapy.core.engine] INFO: Closing spider (finished) 459 | 2017-05-10 20:55:27 [scrapy.statscollectors] INFO: Dumping Scrapy stats: 460 | {'downloader/request_bytes': 1880, 461 | 'downloader/request_count': 8, 462 | 'downloader/request_method_count/GET': 8, 463 | 'downloader/response_bytes': 33165, 464 | 'downloader/response_count': 8, 465 | 'downloader/response_status_count/200': 3, 466 | 'downloader/response_status_count/301': 1, 467 | 'downloader/response_status_count/302': 3, 468 | 'downloader/response_status_count/404': 1, 469 | 'finish_reason': 'finished', 470 | 'finish_time': datetime.datetime(2017, 5, 10, 20, 55, 27, 645798), 471 | 'item_scraped_count': 60, 472 | 'log_count/DEBUG': 69, 473 | 'log_count/INFO': 7, 474 | 'response_received_count': 4, 475 | 'scheduler/dequeued': 5, 476 | 'scheduler/dequeued/memory': 5, 477 | 'scheduler/enqueued': 5, 478 | 'scheduler/enqueued/memory': 5, 479 | 'start_time': datetime.datetime(2017, 5, 10, 20, 55, 27, 309965)} 480 | 2017-05-10 20:55:27 [scrapy.core.engine] INFO: Spider closed (finished) 481 | 2017-05-10 20:56:03 [scrapy.utils.log] INFO: Scrapy 1.3.3 started (bot: community) 482 | 2017-05-10 20:56:03 [scrapy.utils.log] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'community.spiders', 'ROBOTSTXT_OBEY': True, 'SPIDER_MODULES': ['community.spiders'], 'LOG_FILE': 'logfile.log', 'BOT_NAME': 'community'} 483 | 2017-05-10 20:56:03 [scrapy.middleware] INFO: Enabled extensions: 484 | ['scrapy.extensions.logstats.LogStats', 485 | 'scrapy.extensions.telnet.TelnetConsole', 486 | 'scrapy.extensions.corestats.CoreStats'] 487 | 2017-05-10 20:56:03 [scrapy.middleware] INFO: Enabled downloader middlewares: 488 | ['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware', 489 | 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', 490 | 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', 491 | 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware', 492 | 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', 493 | 'scrapy.downloadermiddlewares.retry.RetryMiddleware', 494 | 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware', 495 | 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware', 496 | 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware', 497 | 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware', 498 | 'scrapy.downloadermiddlewares.stats.DownloaderStats'] 499 | 2017-05-10 20:56:03 [scrapy.middleware] INFO: Enabled spider middlewares: 500 | ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware', 501 | 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware', 502 | 'scrapy.spidermiddlewares.referer.RefererMiddleware', 503 | 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware', 504 | 'scrapy.spidermiddlewares.depth.DepthMiddleware'] 505 | 2017-05-10 20:56:03 [scrapy.middleware] INFO: Enabled item pipelines: 506 | ['community.pipelines.CommunityPipeline'] 507 | 2017-05-10 20:56:03 [scrapy.core.engine] INFO: Spider opened 508 | 2017-05-10 20:56:03 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 509 | 2017-05-10 20:56:03 [scrapy.extensions.telnet] DEBUG: Telnet console listening on 127.0.0.1:6023 510 | 2017-05-10 20:56:03 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (302) to from 511 | 2017-05-10 20:56:03 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 512 | 2017-05-10 20:56:03 [scrapy.core.engine] DEBUG: Crawled (404) (referer: None) 513 | 2017-05-10 20:56:03 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (302) to from 514 | 2017-05-10 20:56:03 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (301) to from 515 | 2017-05-10 20:56:03 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (302) to from 516 | 2017-05-10 20:56:03 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 517 | 2017-05-10 20:56:03 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 518 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 519 | {'category': 'free', 520 | 'date': '2017-05-11 06:43:55', 521 | 'hits': u'0', 522 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 523 | 'title': u'\uac04\uc18c\ud55c \ucde8\uc784\uc2dd \uc88b\ub124\uc694', 524 | 'url': u'http://www.clien.net/service/board/park/10730554'} 525 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 526 | {'category': 'free', 527 | 'date': '2017-05-11 06:41:01', 528 | 'hits': u'0', 529 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 530 | 'title': u'\u4e2d\ub9e4\uccb4 "\ubb38\uc7ac\uc778 \uc9c4\uc9c0\ud558\uac8c \uc0c1\ub300\ud560 \uc778\ubb3c\u2026\u97d3\ud2b9\uc0ac\ub2e8 \ubc29\uc911 \uc608\uc0c1"', 531 | 'url': u'http://www.clien.net/service/board/park/10730553'} 532 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 533 | {'category': 'free', 534 | 'date': '2017-05-11 06:40:46', 535 | 'hits': u'0', 536 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 537 | 'title': u'\ubb38\uc81c\ub9ce\uc740 \ubb38\uc7ac\uc778~', 538 | 'url': u'http://www.clien.net/service/board/park/10730552'} 539 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 540 | {'category': 'free', 541 | 'date': '2017-05-11 06:38:38', 542 | 'hits': u'0', 543 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 544 | 'title': u'\ud074\ub7c9 \uc548\ub4dc\uc571 \ucd9c\uc2dc \ub418\uc5c8\ub124\uc694.', 545 | 'url': u'http://www.clien.net/service/board/park/10730551'} 546 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 547 | {'category': 'free', 548 | 'date': '2017-05-11 06:37:44', 549 | 'hits': u'0', 550 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 551 | 'title': u'\ud45c\ud604\uc758 \uc790\uc720\ub97c \uc911\uc694\uc2dc \uc5ec\uae30\ub294 \ub354\ubbfc\uc8fc\uac00 \uacfc\uc5f0 \uc77c\ubca0\ub97c \uc5b4\ub5bb\uac8c \ub300\ucc98\ud560\uae4c..\uac71\uc815\ub418\ub124\uc694', 552 | 'url': u'http://www.clien.net/service/board/park/10730549'} 553 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 554 | {'category': 'free', 555 | 'date': '2017-05-11 06:33:44', 556 | 'hits': u'0', 557 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 558 | 'title': u'\uc774\uc81c \uc5f0\ud569\ub274\uc2a4 \uc18d\ubcf4\uac00 \uae30\ub2e4\ub824 \uc9c0\ub294\uad70\uc694', 559 | 'url': u'http://www.clien.net/service/board/park/10730544'} 560 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 561 | {'category': 'free', 562 | 'date': '2017-05-11 06:32:21', 563 | 'hits': u'0', 564 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 565 | 'title': u'18\ub300 \ub300\ud1b5\ub839 \ucde8\uc784\uc2dd \ub54c \ub2f9\uc2dc \ubb38\uc7ac\uc778 \uc758\uc6d0 \ubd88\ucc38\ud588\ub124\uc694', 566 | 'url': u'http://www.clien.net/service/board/park/10730543'} 567 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 568 | {'category': 'free', 569 | 'date': '2017-05-11 06:28:45', 570 | 'hits': u'0', 571 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 572 | 'title': u'MBC \uc815\uc0c1\ud654 \uc218\uc21c\uc740 \uc5b4\ub5a4 \uc808\ucc28\ub85c \uc9c4\ud589\ub420\uae4c\uc694', 573 | 'url': u'http://www.clien.net/service/board/park/10730542'} 574 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 575 | {'category': 'free', 576 | 'date': '2017-05-11 06:23:13', 577 | 'hits': u'0', 578 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 579 | 'title': u'avira \uc774\uc0c1\ud55c \ud31d\uc5c5\uc774 \ub5a0\uc11c \uc9c0\uc6b0\ub824\uad6c\uc694 -_-', 580 | 'url': u'http://www.clien.net/service/board/park/10730540'} 581 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 582 | {'category': 'free', 583 | 'date': '2017-05-11 06:17:36', 584 | 'hits': u'0', 585 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 586 | 'title': u'\uc81c\uac00 \ub9d0\uc774\uc8e0 \ubb38\uc7ac\uc778\ub300\ud1b5\ub839\ubcf4\ub2e4 \ub354 \uc88b\uc740\uac8c \uc788\ub354\uad70\uc694', 587 | 'url': u'http://www.clien.net/service/board/park/10730538'} 588 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 589 | {'category': 'free', 590 | 'date': '2017-05-11 06:16:21', 591 | 'hits': u'0', 592 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 593 | 'title': u'\uadf8\ub098\uc800\ub098 \uc740\uc218\ubbf8 \uc758\uc6d0\ub2d8 \ubbf8\ucfe0\ub0e5 \ucf54\uc2a4\ud504\ub808\ub294 \uc5b4\ub5bb\uac8c \ub418\ub294\uac74\uac00\uc694 ?', 594 | 'url': u'http://www.clien.net/service/board/park/10730537'} 595 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 596 | {'category': 'free', 597 | 'date': '2017-05-11 06:13:05', 598 | 'hits': u'0', 599 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 600 | 'title': u'\uc870\uc120\uc77c\ubcf4 \uae30\uc0ac\uc758 \ub313\uae00\uc744 \ubcf4\uba74...', 601 | 'url': u'http://www.clien.net/service/board/park/10730536'} 602 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 603 | {'category': 'free', 604 | 'date': '2017-05-11 06:12:50', 605 | 'hits': u'0', 606 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 607 | 'title': u'\uc5b4\uca4c\ub2e4\ubcf4\ub2c8 \ub124\uc774\ubc84 \uae30\uc0ac \ubca0\ub313\uc774 \ub42c\ub124\uc694;;', 608 | 'url': u'http://www.clien.net/service/board/park/10730535'} 609 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 610 | {'category': 'free', 611 | 'date': '2017-05-10 06:36:00', 612 | 'hits': 8, 613 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 614 | 'title': u'\ub3c4\ucc29', 615 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261472&bm=1'} 616 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 617 | {'category': 'free', 618 | 'date': '2017-05-10 06:06:00', 619 | 'hits': 49, 620 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 621 | 'title': u'\uacbd\ud488\uc744 \uc911\uace0\uc81c\ud488\uc73c\ub85c \uc8fc\ub294\ub370 \uc5b4\ub5bb\uac8c \uc0dd\uac01\ud558\uc2dc\ub098\uc694', 622 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261471&bm=1'} 623 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 624 | {'category': 'free', 625 | 'date': '2017-05-11 06:09:28', 626 | 'hits': u'0', 627 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 628 | 'title': u'\uad50\uc721\ubd80 \uc678\uad50\ubd80 \uad6d\ubc29\ubd80\ub294 \ud0c8\ud0c8 \ud138\uc5b4\uc57c\uc8e0', 629 | 'url': u'http://www.clien.net/service/board/park/10730534'} 630 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 631 | {'category': 'free', 632 | 'date': '2017-05-11 05:57:17', 633 | 'hits': u'0', 634 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 635 | 'title': u'\ucde8\uc784\uc2dd\uc5d0 \ud6c4\ubcf4\uac00 \ucc38\uc11d \uc548\ud588\ub2e4\uace0 \uc778\uc131\uc774 \uc5b4\uca4c\uace0 \ud558\ub294\uac74', 636 | 'url': u'http://www.clien.net/service/board/park/10730533'} 637 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 638 | {'category': 'free', 639 | 'date': '2017-05-11 05:54:09', 640 | 'hits': u'0', 641 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 642 | 'title': u'\uad6d\ubbfc\uc758\ub2f9 "\u5b89\ud6c4\ubcf4 \uc0ac\ud1f4\ud558\ub77c\ub294 \uc1a1\uc601\uae38, \uc878\ubd80 \uac11\uc9c8"', 643 | 'url': u'http://www.clien.net/service/board/park/10730532'} 644 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 645 | {'category': 'free', 646 | 'date': '2017-05-11 05:47:44', 647 | 'hits': u'2', 648 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 649 | 'title': u'\uc2ec\uc0c1\uc815\uc740 \ub2f9\uc0ac\uc5d0\uc11c \ud2f0\ube44\ub85c \ucde8\uc784\uc2dd \ubcf8\uac74\uac00\uc694?', 650 | 'url': u'http://www.clien.net/service/board/park/10730530'} 651 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 652 | {'category': 'free', 653 | 'date': '2017-05-11 05:42:25', 654 | 'hits': u'0', 655 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 656 | 'title': u'\ub178\ubb34\ud604 \ub300\ud1b5\ub839\uc774 \ud1b5\uc77c\ubd80\uc7a5\uad00\uc73c\ub85c \uc601\uc785\ud558\ub824\uace0 \ud588\ub358 \uc778\ubb3c\uc774 \ub204\uad70\uc9c0 \uc544\uc138\uc694?', 657 | 'url': u'http://www.clien.net/service/board/park/10730529'} 658 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 659 | {'category': 'free', 660 | 'date': '2017-05-11 05:40:11', 661 | 'hits': u'1', 662 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 663 | 'title': u'\uc774\uba85\ubc15\uadfc\ud61c \ucd5c\uace0\ub85c \ubd80\ub044\ub7fd\ub358 \uc9e4 \ub458...', 664 | 'url': u'http://www.clien.net/service/board/park/10730528'} 665 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 666 | {'category': 'free', 667 | 'date': '2017-05-11 05:33:04', 668 | 'hits': u'1', 669 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 670 | 'title': u'\uc758\ub839\uad70 \u314a\u3148.gif', 671 | 'url': u'http://www.clien.net/service/board/park/10730527'} 672 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 673 | {'category': 'free', 674 | 'date': '2017-05-11 05:16:34', 675 | 'hits': u'0', 676 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 677 | 'title': u'\uc640\uc6b0~\uc720\uc2b9\ubbfc \ub2e4\uc2dc\ubcf4\uac8c \ub418\ub124\uc694...', 678 | 'url': u'http://www.clien.net/service/board/park/10730524'} 679 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 680 | {'category': 'free', 681 | 'date': '2017-05-11 05:13:27', 682 | 'hits': u'1', 683 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 684 | 'title': u'\uc544\uc9c1\ub3c4 "\ubb38\uc7ac\uc778 \ub300\ud1b5\ub839"\uc774\ub77c\uace0 \ub4e4\uc73c\uba74 \uac00\uc2b4\uc774 \ucc21\ud569\ub2c8\ub2e4.', 685 | 'url': u'http://www.clien.net/service/board/park/10730523'} 686 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 687 | {'category': 'free', 688 | 'date': '2017-05-11 05:09:14', 689 | 'hits': u'0', 690 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 691 | 'title': u'\ud074\ub9ac\uc559 \ubaa9\ub85d\uc73c\ub85c \uac00\ub294 \ubc84\ud2bc\uc740 \ud398\uc774\uc9c0 \uc5b4\ub514 \uc788\uc5c8\ub4e0 \uccab\ud398\uc774\uc9c0 \ubaa9\ub85d\uc73c\ub85c \uc810\ud504\ud558\ub124\uc694.\u3161,,\u3161', 692 | 'url': u'http://www.clien.net/service/board/park/10730522'} 693 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 694 | {'category': 'free', 695 | 'date': '2017-05-11 04:58:47', 696 | 'hits': u'3', 697 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 698 | 'title': u'\ub300\ud55c\ubbfc\uad6d\uc758 \ud5a5\ud6c4 \uc815\uce58 \uc804\ub9dd\uc740 \uc218\uaf34\uc138\ub825\uc774 \ubb34\uae30\ub825\ud568\uc744 \ub290\ub07c\uac8c \ub420\uac70\ub77c \ubd05\ub2c8\ub2e4.', 699 | 'url': u'http://www.clien.net/service/board/park/10730521'} 700 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 701 | {'category': 'free', 702 | 'date': '2017-05-10 04:18:00', 703 | 'hits': 209, 704 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 705 | 'title': u'\uc7ac\ubc0b\ub294\uac70\ubc1c\uacac\ud588\ub124\uc694', 706 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261470&bm=1'} 707 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 708 | {'category': 'free', 709 | 'date': '2017-05-10 03:49:00', 710 | 'hits': 128, 711 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 712 | 'title': u'\uc77c\ubca0\ub4e4 \uc6ec\ub9cc\ud558\uba74 \uc774\ud574\ud558\ub824\uace0\ud558\ub294 \ud55c\uc0ac\ub78c\uc73c\ub85c\uc11c.', 713 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261469&bm=1'} 714 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 715 | {'category': 'free', 716 | 'date': '2017-05-10 03:25:00', 717 | 'hits': 178, 718 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 719 | 'title': u'\ubb38\uc7ac\uc778\ub300\ud1b5\ub839 \uc695\ud55c\uac70 \uace0\uc18c\uc548\ub418\ub098\uc694?', 720 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261468&bm=1'} 721 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 722 | {'category': 'free', 723 | 'date': '2017-05-10 02:57:00', 724 | 'hits': 65, 725 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 726 | 'title': u'\uc218\ucd9c \uc911\uace0\ucc28 \ubb38\uc758\uad00\ub828', 727 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261467&bm=1'} 728 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 729 | {'category': 'free', 730 | 'date': '2017-05-10 02:38:00', 731 | 'hits': 110, 732 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 733 | 'title': u'\ucda9\uccad\ub3c4 \uac1c\uadf8', 734 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261466&bm=1'} 735 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 736 | {'category': 'free', 737 | 'date': '2017-05-10 02:18:00', 738 | 'hits': 148, 739 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 740 | 'title': u'\ub274 \ub808\uc778\uc9c0\ub85c\ubc84 \ubcf4\uad6c \uad6c\ud569\ub2c8\ub2e4(\uac1c\uc778 \uc9c1\uac70\ub798 \uc6d0\ud569\ub2c8\ub2e4^^~)', 741 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261465&bm=1'} 742 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 743 | {'category': 'free', 744 | 'date': '2017-05-11 04:53:50', 745 | 'hits': u'1', 746 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 747 | 'title': u'\uc778\ud130\ub137\ubc29\uc1a1\uc740 \ubd10\ub3c4 \ubd10\ub3c4 \uc2e0\uae30\ud558\ub124\uc694', 748 | 'url': u'http://www.clien.net/service/board/park/10730519'} 749 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 750 | {'category': 'free', 751 | 'date': '2017-05-11 04:53:41', 752 | 'hits': u'0', 753 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 754 | 'title': u'\ud2b8\ub7fc\ud504 \ud2b9\uac80 \uc8fc\uc7a5\ud558\ub294 \uacf5\ud654\ub2f9 \uc758\uc6d0\ub4e4\uc774 \ub298\uc5b4\ub098\uae30 \uc2dc\uc791\ud588\ub2e4.', 755 | 'url': u'http://www.clien.net/service/board/park/10730518'} 756 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 757 | {'category': 'free', 758 | 'date': '2017-05-11 04:52:20', 759 | 'hits': u'1', 760 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 761 | 'title': u'\ubb34\uac70\uc6b4 \uba38\ub9ac\uce74\ub77d.gif', 762 | 'url': u'http://www.clien.net/service/board/park/10730516'} 763 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 764 | {'category': 'free', 765 | 'date': '2017-05-11 04:50:27', 766 | 'hits': u'1', 767 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 768 | 'title': u'\ud1a0\uc6b0 \ubbf8\uc0ac\uc77c \ud53c\ud558\ub294 \ubc29\ubc95.gif', 769 | 'url': u'http://www.clien.net/service/board/park/10730515'} 770 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 771 | {'category': 'free', 772 | 'date': '2017-05-11 04:47:50', 773 | 'hits': u'0', 774 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 775 | 'title': u'\uc624\ubc14\ub9c8 \uc804 \ub300\ud1b5\ub839 \uc704\uc548\ubd80 \uad00\ub828 \uc798\ubabb \uc54c\uace0 \uacc4\uc2dc\ub294\ub370', 776 | 'url': u'http://www.clien.net/service/board/park/10730514'} 777 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.clien.net/service/board/park&page=1> 778 | {'category': 'free', 779 | 'date': '2017-05-11 04:44:17', 780 | 'hits': u'0', 781 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 782 | 'title': u'\ub300\ub959 \uc0ac\uadf9 \uadfc\ud669.gif', 783 | 'url': u'http://www.clien.net/service/board/park/10730513'} 784 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 785 | {'category': 'free', 786 | 'date': '2017-05-10 02:15:00', 787 | 'hits': 135, 788 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 789 | 'title': u'\uae30\ub150\uc73c\ub85c \uadf8\ub824\ubcf4\uc558\uc2b5\ub2c8\ub2e4', 790 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261464&bm=1'} 791 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 792 | {'category': 'free', 793 | 'date': '2017-05-10 01:53:00', 794 | 'hits': 275, 795 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 796 | 'title': u'19\uc0b4 \ucd9c\ud1f4\uadfc\uc911\uace0\ucc28\ucd94\ucc9c\uc880\uc694', 797 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261463&bm=1'} 798 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 799 | {'category': 'free', 800 | 'date': '2017-05-10 01:48:00', 801 | 'hits': 209, 802 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 803 | 'title': u'\ucef4\ub9f9 \uc870\ub9bd\uc2dd\ucef4\ud130 \uacac\uc801 \uc0ac\uae30\uc778\uac00\uc694\u3160\u3160', 804 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261462&bm=1'} 805 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 806 | {'category': 'free', 807 | 'date': '2017-05-10 01:47:00', 808 | 'hits': 360, 809 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 810 | 'title': u'\ubd80\uc0b0 \ubc94\ucc9c\ub3d9\uc5d0\uc11c C63AMG \ucc28\ub7c9\uc744 \ub3c4\ub09c\ub2f9\ud588\uc2b5\ub2c8\ub2e4.', 811 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261461&bm=1'} 812 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 813 | {'category': 'free', 814 | 'date': '2017-05-10 01:33:00', 815 | 'hits': 239, 816 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 817 | 'title': u'\uc5ec\uae30\uac00 \ubcf4\ubc30\ub4dc\ub9bc\uc778\uac00\uc694', 818 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261460&bm=1'} 819 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 820 | {'category': 'free', 821 | 'date': '2017-05-10 01:01:00', 822 | 'hits': 69, 823 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 824 | 'title': u'GPS', 825 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261459&bm=1'} 826 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 827 | {'category': 'free', 828 | 'date': '2017-05-10 00:59:00', 829 | 'hits': 199, 830 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 831 | 'title': u'\ubc95\ucabd\uc5d0 \uc788\uc73c\uc2ec \ud615\ub2d8\ub4e4..\ub54c\uc778\ub3c8\uc5d0 \uad00\ud55c \uc870\uc5b8\uc880 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.', 832 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261458&bm=1'} 833 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 834 | {'category': 'free', 835 | 'date': '2017-05-10 00:51:00', 836 | 'hits': 390, 837 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 838 | 'title': u'\uc7ac\uc778\uc774\ud615\uc774 \uacbd\uc720\uac12 \uc62c\ub9b0\ub2e4\ub294\ub370', 839 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261457&bm=1'} 840 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 841 | {'category': 'free', 842 | 'date': '2017-05-10 00:29:00', 843 | 'hits': 281, 844 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 845 | 'title': u'\uc0ac\uace0\ub09c \uc904 \ubaa8\ub974\uace0 \uadf8\ub0e5 \uc654\ub294\ub370 \ube14\ubc15 \ubcf4\ub2c8... \ub4a4\ub85c \ube90\ub354\ub77c\uad6c\uc694;', 846 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261456&bm=1'} 847 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 848 | {'category': 'free', 849 | 'date': '2017-05-10 00:29:00', 850 | 'hits': 288, 851 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 852 | 'title': u'\uc774\uac8c\ubb50\ub0d0\u314b', 853 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261455&bm=1'} 854 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 855 | {'category': 'free', 856 | 'date': '2017-05-10 00:19:00', 857 | 'hits': 267, 858 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 859 | 'title': u'\ubcf4\ubc30\ud615\ub2d8\ub4e4 \uc791\uc740 \uc1fc\ud551\ubab0 \ucc3d\uc5c5\ud574\ubcf4\ub824\ud569\ub2c8\ub2e4.. \uc870\uc5b8\uc880 \ubd80\ud0c1\ub4dc\ub824\uc694', 860 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261454&bm=1'} 861 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 862 | {'category': 'free', 863 | 'date': '2017-05-10 00:15:00', 864 | 'hits': 234, 865 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 866 | 'title': u'\uc6d4\uae09\ub0a0\uc774\ub124\uc694', 867 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261453&bm=1'} 868 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 869 | {'category': 'free', 870 | 'date': '2017-05-10 00:13:00', 871 | 'hits': 65, 872 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 873 | 'title': u'\uc0ac\ub098\uc774\ub294', 874 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261452&bm=1'} 875 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 876 | {'category': 'free', 877 | 'date': '2017-05-10 00:08:00', 878 | 'hits': 455, 879 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 880 | 'title': u'\uba38\ud50c\ub7ec \ubd88\ubc95\uac1c\uc870 \uc2e0\uace0\ud588\uc2b5\ub2c8\ub2e4 \uadf8\ub7f0\ub370', 881 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261451&bm=1'} 882 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 883 | {'category': 'free', 884 | 'date': '2017-05-10 00:04:00', 885 | 'hits': 142, 886 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 887 | 'title': u'\uad7f\ub098\uc787', 888 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261450&bm=1'} 889 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 890 | {'category': 'free', 891 | 'date': '2017-05-10 00:03:00', 892 | 'hits': 241, 893 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 894 | 'title': u'\u3146\u3142 \uc815\uc740\uc544. \ud558\ub098\ub9cc \ubb3b\uc790.', 895 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261449&bm=1'} 896 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 897 | {'category': 'free', 898 | 'date': '2017-05-10 00:00:00', 899 | 'hits': 158, 900 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 901 | 'title': u'\uc8c4\uc1a1\ud569\ub2c8\ub2e4...', 902 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261448&bm=1'} 903 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 904 | {'category': 'free', 905 | 'date': '2017-05-10 00:00:00', 906 | 'hits': 839, 907 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 908 | 'title': u'\ud754\ud55c \uc4f0\ub9ac \uc194\ub85c\uc758 \ub2e8\ud1a1\ubc29,,,', 909 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261447&bm=1'} 910 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 911 | {'category': 'free', 912 | 'date': '2017-05-10 00:00:00', 913 | 'hits': 82, 914 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 915 | 'title': u'\uad7f\ub098\uc787!', 916 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261446&bm=1'} 917 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 918 | {'category': 'free', 919 | 'date': '2017-05-10 00:00:00', 920 | 'hits': 469, 921 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 922 | 'title': u'\uc120\ubb3c \ubc1b\uc740\uac78 \uc720\uac8c\uc5d0 \uc62c\ub838\ub124\uc694.', 923 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261445&bm=1'} 924 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 925 | {'category': 'free', 926 | 'date': '2017-05-10 00:00:00', 927 | 'hits': 171, 928 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 929 | 'title': u'\uc544..', 930 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261444&bm=1'} 931 | 2017-05-10 20:56:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.bobaedream.co.kr/list?code=freeb&page=1> 932 | {'category': 'free', 933 | 'date': '2017-05-10 00:00:00', 934 | 'hits': 172, 935 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 936 | 'title': u'\uc2e0\ucc28 \uc8fc\ud589\uac70\ub9ac \ubb38\uc758', 937 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261443&bm=1'} 938 | 2017-05-10 20:56:03 [scrapy.core.engine] INFO: Closing spider (finished) 939 | 2017-05-10 20:56:03 [scrapy.statscollectors] INFO: Dumping Scrapy stats: 940 | {'downloader/request_bytes': 1880, 941 | 'downloader/request_count': 8, 942 | 'downloader/request_method_count/GET': 8, 943 | 'downloader/response_bytes': 33088, 944 | 'downloader/response_count': 8, 945 | 'downloader/response_status_count/200': 3, 946 | 'downloader/response_status_count/301': 1, 947 | 'downloader/response_status_count/302': 3, 948 | 'downloader/response_status_count/404': 1, 949 | 'finish_reason': 'finished', 950 | 'finish_time': datetime.datetime(2017, 5, 10, 20, 56, 3, 791676), 951 | 'item_scraped_count': 60, 952 | 'log_count/DEBUG': 69, 953 | 'log_count/INFO': 7, 954 | 'response_received_count': 4, 955 | 'scheduler/dequeued': 5, 956 | 'scheduler/dequeued/memory': 5, 957 | 'scheduler/enqueued': 5, 958 | 'scheduler/enqueued/memory': 5, 959 | 'start_time': datetime.datetime(2017, 5, 10, 20, 56, 3, 454048)} 960 | 2017-05-10 20:56:03 [scrapy.core.engine] INFO: Spider closed (finished) 961 | 2017-05-10 20:58:03 [scrapy.utils.log] INFO: Scrapy 1.3.3 started (bot: community) 962 | 2017-05-10 20:58:03 [scrapy.utils.log] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'community.spiders', 'ROBOTSTXT_OBEY': True, 'LOG_LEVEL': 20, 'SPIDER_MODULES': ['community.spiders'], 'BOT_NAME': 'community', 'LOG_FILE': 'logfile.log'} 963 | 2017-05-10 20:58:03 [scrapy.middleware] INFO: Enabled extensions: 964 | ['scrapy.extensions.logstats.LogStats', 965 | 'scrapy.extensions.telnet.TelnetConsole', 966 | 'scrapy.extensions.corestats.CoreStats'] 967 | 2017-05-10 20:58:03 [scrapy.middleware] INFO: Enabled downloader middlewares: 968 | ['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware', 969 | 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', 970 | 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', 971 | 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware', 972 | 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', 973 | 'scrapy.downloadermiddlewares.retry.RetryMiddleware', 974 | 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware', 975 | 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware', 976 | 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware', 977 | 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware', 978 | 'scrapy.downloadermiddlewares.stats.DownloaderStats'] 979 | 2017-05-10 20:58:03 [scrapy.middleware] INFO: Enabled spider middlewares: 980 | ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware', 981 | 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware', 982 | 'scrapy.spidermiddlewares.referer.RefererMiddleware', 983 | 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware', 984 | 'scrapy.spidermiddlewares.depth.DepthMiddleware'] 985 | 2017-05-10 20:58:03 [scrapy.middleware] INFO: Enabled item pipelines: 986 | ['community.pipelines.CommunityPipeline'] 987 | 2017-05-10 20:58:03 [scrapy.core.engine] INFO: Spider opened 988 | 2017-05-10 20:58:03 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 989 | 2017-05-10 20:58:03 [scrapy.core.engine] INFO: Closing spider (finished) 990 | 2017-05-10 20:58:03 [scrapy.statscollectors] INFO: Dumping Scrapy stats: 991 | {'downloader/request_bytes': 1880, 992 | 'downloader/request_count': 8, 993 | 'downloader/request_method_count/GET': 8, 994 | 'downloader/response_bytes': 33145, 995 | 'downloader/response_count': 8, 996 | 'downloader/response_status_count/200': 3, 997 | 'downloader/response_status_count/301': 1, 998 | 'downloader/response_status_count/302': 3, 999 | 'downloader/response_status_count/404': 1, 1000 | 'finish_reason': 'finished', 1001 | 'finish_time': datetime.datetime(2017, 5, 10, 20, 58, 3, 844150), 1002 | 'item_scraped_count': 60, 1003 | 'log_count/INFO': 7, 1004 | 'response_received_count': 4, 1005 | 'scheduler/dequeued': 5, 1006 | 'scheduler/dequeued/memory': 5, 1007 | 'scheduler/enqueued': 5, 1008 | 'scheduler/enqueued/memory': 5, 1009 | 'start_time': datetime.datetime(2017, 5, 10, 20, 58, 3, 537422)} 1010 | 2017-05-10 20:58:03 [scrapy.core.engine] INFO: Spider closed (finished) 1011 | 2017-05-10 21:00:24 [scrapy.utils.log] INFO: Scrapy 1.3.3 started (bot: community) 1012 | 2017-05-10 21:00:24 [scrapy.utils.log] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'community.spiders', 'ROBOTSTXT_OBEY': True, 'LOG_LEVEL': 20, 'SPIDER_MODULES': ['community.spiders'], 'BOT_NAME': 'community', 'LOG_FILE': 'logfile.log'} 1013 | 2017-05-10 21:00:24 [scrapy.middleware] INFO: Enabled extensions: 1014 | ['scrapy.extensions.logstats.LogStats', 1015 | 'scrapy.extensions.telnet.TelnetConsole', 1016 | 'scrapy.extensions.corestats.CoreStats'] 1017 | 2017-05-10 21:00:24 [scrapy.middleware] INFO: Enabled downloader middlewares: 1018 | ['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware', 1019 | 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', 1020 | 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', 1021 | 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware', 1022 | 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', 1023 | 'scrapy.downloadermiddlewares.retry.RetryMiddleware', 1024 | 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware', 1025 | 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware', 1026 | 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware', 1027 | 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware', 1028 | 'scrapy.downloadermiddlewares.stats.DownloaderStats'] 1029 | 2017-05-10 21:00:24 [scrapy.middleware] INFO: Enabled spider middlewares: 1030 | ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware', 1031 | 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware', 1032 | 'scrapy.spidermiddlewares.referer.RefererMiddleware', 1033 | 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware', 1034 | 'scrapy.spidermiddlewares.depth.DepthMiddleware'] 1035 | 2017-05-10 21:00:24 [scrapy.middleware] INFO: Enabled item pipelines: 1036 | ['community.pipelines.CommunityPipeline'] 1037 | 2017-05-10 21:00:24 [scrapy.core.engine] INFO: Spider opened 1038 | 2017-05-10 21:00:24 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 1039 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1040 | 'date': '2017-05-11 06:46:19', 1041 | 'hits': u'0', 1042 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1043 | 'title': '\xec\x9d\xb4\xeb\x82\x99\xec\x97\xb0 \xec\xa0\x84\xeb\x82\xa8\xec\xa7\x80\xec\x82\xac, \xea\xb5\xad\xeb\xac\xb4\xec\xb4\x9d\xeb\xa6\xac \xec\xa7\x80\xeb\xaa\x85', 1044 | 'url': u'http://www.clien.net/service/board/park/10730555'} 1045 | Traceback (most recent call last): 1046 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1047 | current.result = callback(current.result, *args, **kw) 1048 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1049 | if word in unicode(item['title']): 1050 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1051 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1052 | 'date': '2017-05-11 06:43:55', 1053 | 'hits': u'0', 1054 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1055 | 'title': '\xea\xb0\x84\xec\x86\x8c\xed\x95\x9c \xec\xb7\xa8\xec\x9e\x84\xec\x8b\x9d \xec\xa2\x8b\xeb\x84\xa4\xec\x9a\x94', 1056 | 'url': u'http://www.clien.net/service/board/park/10730554'} 1057 | Traceback (most recent call last): 1058 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1059 | current.result = callback(current.result, *args, **kw) 1060 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1061 | if word in unicode(item['title']): 1062 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1063 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1064 | 'date': '2017-05-11 06:41:01', 1065 | 'hits': u'0', 1066 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1067 | 'title': '\xe4\xb8\xad\xeb\xa7\xa4\xec\xb2\xb4 "\xeb\xac\xb8\xec\x9e\xac\xec\x9d\xb8 \xec\xa7\x84\xec\xa7\x80\xed\x95\x98\xea\xb2\x8c \xec\x83\x81\xeb\x8c\x80\xed\x95\xa0 \xec\x9d\xb8\xeb\xac\xbc\xe2\x80\xa6\xe9\x9f\x93\xed\x8a\xb9\xec\x82\xac\xeb\x8b\xa8 \xeb\xb0\xa9\xec\xa4\x91 \xec\x98\x88\xec\x83\x81"', 1068 | 'url': u'http://www.clien.net/service/board/park/10730553'} 1069 | Traceback (most recent call last): 1070 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1071 | current.result = callback(current.result, *args, **kw) 1072 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1073 | if word in unicode(item['title']): 1074 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xe4 in position 0: ordinal not in range(128) 1075 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1076 | 'date': '2017-05-11 06:38:38', 1077 | 'hits': u'0', 1078 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1079 | 'title': '\xed\x81\xb4\xeb\x9f\x89 \xec\x95\x88\xeb\x93\x9c\xec\x95\xb1 \xec\xb6\x9c\xec\x8b\x9c \xeb\x90\x98\xec\x97\x88\xeb\x84\xa4\xec\x9a\x94.', 1080 | 'url': u'http://www.clien.net/service/board/park/10730551'} 1081 | Traceback (most recent call last): 1082 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1083 | current.result = callback(current.result, *args, **kw) 1084 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1085 | if word in unicode(item['title']): 1086 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 0: ordinal not in range(128) 1087 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1088 | 'date': '2017-05-11 06:37:44', 1089 | 'hits': u'0', 1090 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1091 | 'title': '\xed\x91\x9c\xed\x98\x84\xec\x9d\x98 \xec\x9e\x90\xec\x9c\xa0\xeb\xa5\xbc \xec\xa4\x91\xec\x9a\x94\xec\x8b\x9c \xec\x97\xac\xea\xb8\xb0\xeb\x8a\x94 \xeb\x8d\x94\xeb\xaf\xbc\xec\xa3\xbc\xea\xb0\x80 \xea\xb3\xbc\xec\x97\xb0 \xec\x9d\xbc\xeb\xb2\xa0\xeb\xa5\xbc \xec\x96\xb4\xeb\x96\xbb\xea\xb2\x8c \xeb\x8c\x80\xec\xb2\x98\xed\x95\xa0\xea\xb9\x8c..\xea\xb1\xb1\xec\xa0\x95\xeb\x90\x98\xeb\x84\xa4\xec\x9a\x94', 1092 | 'url': u'http://www.clien.net/service/board/park/10730549'} 1093 | Traceback (most recent call last): 1094 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1095 | current.result = callback(current.result, *args, **kw) 1096 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1097 | if word in unicode(item['title']): 1098 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 0: ordinal not in range(128) 1099 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1100 | 'date': '2017-05-11 06:33:44', 1101 | 'hits': u'0', 1102 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1103 | 'title': '\xec\x9d\xb4\xec\xa0\x9c \xec\x97\xb0\xed\x95\xa9\xeb\x89\xb4\xec\x8a\xa4 \xec\x86\x8d\xeb\xb3\xb4\xea\xb0\x80 \xea\xb8\xb0\xeb\x8b\xa4\xeb\xa0\xa4 \xec\xa7\x80\xeb\x8a\x94\xea\xb5\xb0\xec\x9a\x94', 1104 | 'url': u'http://www.clien.net/service/board/park/10730544'} 1105 | Traceback (most recent call last): 1106 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1107 | current.result = callback(current.result, *args, **kw) 1108 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1109 | if word in unicode(item['title']): 1110 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1111 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1112 | 'date': '2017-05-11 06:32:21', 1113 | 'hits': u'0', 1114 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1115 | 'title': '18\xeb\x8c\x80 \xeb\x8c\x80\xed\x86\xb5\xeb\xa0\xb9 \xec\xb7\xa8\xec\x9e\x84\xec\x8b\x9d \xeb\x95\x8c \xeb\x8b\xb9\xec\x8b\x9c \xeb\xac\xb8\xec\x9e\xac\xec\x9d\xb8 \xec\x9d\x98\xec\x9b\x90 \xeb\xb6\x88\xec\xb0\xb8\xed\x96\x88\xeb\x84\xa4\xec\x9a\x94', 1116 | 'url': u'http://www.clien.net/service/board/park/10730543'} 1117 | Traceback (most recent call last): 1118 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1119 | current.result = callback(current.result, *args, **kw) 1120 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1121 | if word in unicode(item['title']): 1122 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 2: ordinal not in range(128) 1123 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1124 | 'date': '2017-05-11 06:28:45', 1125 | 'hits': u'0', 1126 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1127 | 'title': 'MBC \xec\xa0\x95\xec\x83\x81\xed\x99\x94 \xec\x88\x98\xec\x88\x9c\xec\x9d\x80 \xec\x96\xb4\xeb\x96\xa4 \xec\xa0\x88\xec\xb0\xa8\xeb\xa1\x9c \xec\xa7\x84\xed\x96\x89\xeb\x90\xa0\xea\xb9\x8c\xec\x9a\x94', 1128 | 'url': u'http://www.clien.net/service/board/park/10730542'} 1129 | Traceback (most recent call last): 1130 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1131 | current.result = callback(current.result, *args, **kw) 1132 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1133 | if word in unicode(item['title']): 1134 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 4: ordinal not in range(128) 1135 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1136 | 'date': '2017-05-11 06:23:13', 1137 | 'hits': u'0', 1138 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1139 | 'title': 'avira \xec\x9d\xb4\xec\x83\x81\xed\x95\x9c \xed\x8c\x9d\xec\x97\x85\xec\x9d\xb4 \xeb\x96\xa0\xec\x84\x9c \xec\xa7\x80\xec\x9a\xb0\xeb\xa0\xa4\xea\xb5\xac\xec\x9a\x94 -_-', 1140 | 'url': u'http://www.clien.net/service/board/park/10730540'} 1141 | Traceback (most recent call last): 1142 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1143 | current.result = callback(current.result, *args, **kw) 1144 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1145 | if word in unicode(item['title']): 1146 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 6: ordinal not in range(128) 1147 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1148 | 'date': '2017-05-11 06:17:36', 1149 | 'hits': u'0', 1150 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1151 | 'title': '\xec\xa0\x9c\xea\xb0\x80 \xeb\xa7\x90\xec\x9d\xb4\xec\xa3\xa0 \xeb\xac\xb8\xec\x9e\xac\xec\x9d\xb8\xeb\x8c\x80\xed\x86\xb5\xeb\xa0\xb9\xeb\xb3\xb4\xeb\x8b\xa4 \xeb\x8d\x94 \xec\xa2\x8b\xec\x9d\x80\xea\xb2\x8c \xec\x9e\x88\xeb\x8d\x94\xea\xb5\xb0\xec\x9a\x94', 1152 | 'url': u'http://www.clien.net/service/board/park/10730538'} 1153 | Traceback (most recent call last): 1154 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1155 | current.result = callback(current.result, *args, **kw) 1156 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1157 | if word in unicode(item['title']): 1158 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1159 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1160 | 'date': '2017-05-11 06:16:21', 1161 | 'hits': u'0', 1162 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1163 | 'title': '\xea\xb7\xb8\xeb\x82\x98\xec\xa0\x80\xeb\x82\x98 \xec\x9d\x80\xec\x88\x98\xeb\xaf\xb8 \xec\x9d\x98\xec\x9b\x90\xeb\x8b\x98 \xeb\xaf\xb8\xec\xbf\xa0\xeb\x83\xa5 \xec\xbd\x94\xec\x8a\xa4\xed\x94\x84\xeb\xa0\x88\xeb\x8a\x94 \xec\x96\xb4\xeb\x96\xbb\xea\xb2\x8c \xeb\x90\x98\xeb\x8a\x94\xea\xb1\xb4\xea\xb0\x80\xec\x9a\x94 ?', 1164 | 'url': u'http://www.clien.net/service/board/park/10730537'} 1165 | Traceback (most recent call last): 1166 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1167 | current.result = callback(current.result, *args, **kw) 1168 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1169 | if word in unicode(item['title']): 1170 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1171 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1172 | 'date': '2017-05-10 06:36:00', 1173 | 'hits': 14, 1174 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1175 | 'title': '\xeb\x8f\x84\xec\xb0\xa9', 1176 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261472&bm=1'} 1177 | Traceback (most recent call last): 1178 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1179 | current.result = callback(current.result, *args, **kw) 1180 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1181 | if word in unicode(item['title']): 1182 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1183 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1184 | 'date': '2017-05-10 06:06:00', 1185 | 'hits': 51, 1186 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1187 | 'title': '\xea\xb2\xbd\xed\x92\x88\xec\x9d\x84 \xec\xa4\x91\xea\xb3\xa0\xec\xa0\x9c\xed\x92\x88\xec\x9c\xbc\xeb\xa1\x9c \xec\xa3\xbc\xeb\x8a\x94\xeb\x8d\xb0 \xec\x96\xb4\xeb\x96\xbb\xea\xb2\x8c \xec\x83\x9d\xea\xb0\x81\xed\x95\x98\xec\x8b\x9c\xeb\x82\x98\xec\x9a\x94', 1188 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261471&bm=1'} 1189 | Traceback (most recent call last): 1190 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1191 | current.result = callback(current.result, *args, **kw) 1192 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1193 | if word in unicode(item['title']): 1194 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1195 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1196 | 'date': '2017-05-11 06:13:05', 1197 | 'hits': u'0', 1198 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1199 | 'title': '\xec\xa1\xb0\xec\x84\xa0\xec\x9d\xbc\xeb\xb3\xb4 \xea\xb8\xb0\xec\x82\xac\xec\x9d\x98 \xeb\x8c\x93\xea\xb8\x80\xec\x9d\x84 \xeb\xb3\xb4\xeb\xa9\xb4...', 1200 | 'url': u'http://www.clien.net/service/board/park/10730536'} 1201 | Traceback (most recent call last): 1202 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1203 | current.result = callback(current.result, *args, **kw) 1204 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1205 | if word in unicode(item['title']): 1206 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1207 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1208 | 'date': '2017-05-11 06:12:50', 1209 | 'hits': u'0', 1210 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1211 | 'title': '\xec\x96\xb4\xec\xa9\x8c\xeb\x8b\xa4\xeb\xb3\xb4\xeb\x8b\x88 \xeb\x84\xa4\xec\x9d\xb4\xeb\xb2\x84 \xea\xb8\xb0\xec\x82\xac \xeb\xb2\xa0\xeb\x8c\x93\xec\x9d\xb4 \xeb\x90\xac\xeb\x84\xa4\xec\x9a\x94;;', 1212 | 'url': u'http://www.clien.net/service/board/park/10730535'} 1213 | Traceback (most recent call last): 1214 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1215 | current.result = callback(current.result, *args, **kw) 1216 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1217 | if word in unicode(item['title']): 1218 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1219 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1220 | 'date': '2017-05-11 06:09:28', 1221 | 'hits': u'0', 1222 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1223 | 'title': '\xea\xb5\x90\xec\x9c\xa1\xeb\xb6\x80 \xec\x99\xb8\xea\xb5\x90\xeb\xb6\x80 \xea\xb5\xad\xeb\xb0\xa9\xeb\xb6\x80\xeb\x8a\x94 \xed\x83\x88\xed\x83\x88 \xed\x84\xb8\xec\x96\xb4\xec\x95\xbc\xec\xa3\xa0', 1224 | 'url': u'http://www.clien.net/service/board/park/10730534'} 1225 | Traceback (most recent call last): 1226 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1227 | current.result = callback(current.result, *args, **kw) 1228 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1229 | if word in unicode(item['title']): 1230 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1231 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1232 | 'date': '2017-05-11 05:57:17', 1233 | 'hits': u'0', 1234 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1235 | 'title': '\xec\xb7\xa8\xec\x9e\x84\xec\x8b\x9d\xec\x97\x90 \xed\x9b\x84\xeb\xb3\xb4\xea\xb0\x80 \xec\xb0\xb8\xec\x84\x9d \xec\x95\x88\xed\x96\x88\xeb\x8b\xa4\xea\xb3\xa0 \xec\x9d\xb8\xec\x84\xb1\xec\x9d\xb4 \xec\x96\xb4\xec\xa9\x8c\xea\xb3\xa0 \xed\x95\x98\xeb\x8a\x94\xea\xb1\xb4', 1236 | 'url': u'http://www.clien.net/service/board/park/10730533'} 1237 | Traceback (most recent call last): 1238 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1239 | current.result = callback(current.result, *args, **kw) 1240 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1241 | if word in unicode(item['title']): 1242 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1243 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1244 | 'date': '2017-05-11 05:54:09', 1245 | 'hits': u'0', 1246 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1247 | 'title': '\xea\xb5\xad\xeb\xaf\xbc\xec\x9d\x98\xeb\x8b\xb9 "\xe5\xae\x89\xed\x9b\x84\xeb\xb3\xb4 \xec\x82\xac\xed\x87\xb4\xed\x95\x98\xeb\x9d\xbc\xeb\x8a\x94 \xec\x86\xa1\xec\x98\x81\xea\xb8\xb8, \xec\xa1\xb8\xeb\xb6\x80 \xea\xb0\x91\xec\xa7\x88"', 1248 | 'url': u'http://www.clien.net/service/board/park/10730532'} 1249 | Traceback (most recent call last): 1250 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1251 | current.result = callback(current.result, *args, **kw) 1252 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1253 | if word in unicode(item['title']): 1254 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1255 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1256 | 'date': '2017-05-11 05:47:44', 1257 | 'hits': u'2', 1258 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1259 | 'title': '\xec\x8b\xac\xec\x83\x81\xec\xa0\x95\xec\x9d\x80 \xeb\x8b\xb9\xec\x82\xac\xec\x97\x90\xec\x84\x9c \xed\x8b\xb0\xeb\xb9\x84\xeb\xa1\x9c \xec\xb7\xa8\xec\x9e\x84\xec\x8b\x9d \xeb\xb3\xb8\xea\xb1\xb4\xea\xb0\x80\xec\x9a\x94?', 1260 | 'url': u'http://www.clien.net/service/board/park/10730530'} 1261 | Traceback (most recent call last): 1262 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1263 | current.result = callback(current.result, *args, **kw) 1264 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1265 | if word in unicode(item['title']): 1266 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1267 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1268 | 'date': '2017-05-11 05:42:25', 1269 | 'hits': u'0', 1270 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1271 | 'title': '\xeb\x85\xb8\xeb\xac\xb4\xed\x98\x84 \xeb\x8c\x80\xed\x86\xb5\xeb\xa0\xb9\xec\x9d\xb4 \xed\x86\xb5\xec\x9d\xbc\xeb\xb6\x80\xec\x9e\xa5\xea\xb4\x80\xec\x9c\xbc\xeb\xa1\x9c \xec\x98\x81\xec\x9e\x85\xed\x95\x98\xeb\xa0\xa4\xea\xb3\xa0 \xed\x96\x88\xeb\x8d\x98 \xec\x9d\xb8\xeb\xac\xbc\xec\x9d\xb4 \xeb\x88\x84\xea\xb5\xb0\xec\xa7\x80 \xec\x95\x84\xec\x84\xb8\xec\x9a\x94?', 1272 | 'url': u'http://www.clien.net/service/board/park/10730529'} 1273 | Traceback (most recent call last): 1274 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1275 | current.result = callback(current.result, *args, **kw) 1276 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1277 | if word in unicode(item['title']): 1278 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1279 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1280 | 'date': '2017-05-10 04:18:00', 1281 | 'hits': 217, 1282 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1283 | 'title': '\xec\x9e\xac\xeb\xb0\x8b\xeb\x8a\x94\xea\xb1\xb0\xeb\xb0\x9c\xea\xb2\xac\xed\x96\x88\xeb\x84\xa4\xec\x9a\x94', 1284 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261470&bm=1'} 1285 | Traceback (most recent call last): 1286 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1287 | current.result = callback(current.result, *args, **kw) 1288 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1289 | if word in unicode(item['title']): 1290 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1291 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1292 | 'date': '2017-05-10 03:49:00', 1293 | 'hits': 138, 1294 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1295 | 'title': '\xec\x9d\xbc\xeb\xb2\xa0\xeb\x93\xa4 \xec\x9b\xac\xeb\xa7\x8c\xed\x95\x98\xeb\xa9\xb4 \xec\x9d\xb4\xed\x95\xb4\xed\x95\x98\xeb\xa0\xa4\xea\xb3\xa0\xed\x95\x98\xeb\x8a\x94 \xed\x95\x9c\xec\x82\xac\xeb\x9e\x8c\xec\x9c\xbc\xeb\xa1\x9c\xec\x84\x9c.', 1296 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261469&bm=1'} 1297 | Traceback (most recent call last): 1298 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1299 | current.result = callback(current.result, *args, **kw) 1300 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1301 | if word in unicode(item['title']): 1302 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1303 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1304 | 'date': '2017-05-10 03:25:00', 1305 | 'hits': 186, 1306 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1307 | 'title': '\xeb\xac\xb8\xec\x9e\xac\xec\x9d\xb8\xeb\x8c\x80\xed\x86\xb5\xeb\xa0\xb9 \xec\x9a\x95\xed\x95\x9c\xea\xb1\xb0 \xea\xb3\xa0\xec\x86\x8c\xec\x95\x88\xeb\x90\x98\xeb\x82\x98\xec\x9a\x94?', 1308 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261468&bm=1'} 1309 | Traceback (most recent call last): 1310 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1311 | current.result = callback(current.result, *args, **kw) 1312 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1313 | if word in unicode(item['title']): 1314 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1315 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1316 | 'date': '2017-05-10 02:57:00', 1317 | 'hits': 66, 1318 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1319 | 'title': '\xec\x88\x98\xec\xb6\x9c \xec\xa4\x91\xea\xb3\xa0\xec\xb0\xa8 \xeb\xac\xb8\xec\x9d\x98\xea\xb4\x80\xeb\xa0\xa8', 1320 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261467&bm=1'} 1321 | Traceback (most recent call last): 1322 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1323 | current.result = callback(current.result, *args, **kw) 1324 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1325 | if word in unicode(item['title']): 1326 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1327 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1328 | 'date': '2017-05-10 02:38:00', 1329 | 'hits': 111, 1330 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1331 | 'title': '\xec\xb6\xa9\xec\xb2\xad\xeb\x8f\x84 \xea\xb0\x9c\xea\xb7\xb8', 1332 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261466&bm=1'} 1333 | Traceback (most recent call last): 1334 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1335 | current.result = callback(current.result, *args, **kw) 1336 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1337 | if word in unicode(item['title']): 1338 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1339 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1340 | 'date': '2017-05-10 02:18:00', 1341 | 'hits': 152, 1342 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1343 | 'title': '\xeb\x89\xb4 \xeb\xa0\x88\xec\x9d\xb8\xec\xa7\x80\xeb\xa1\x9c\xeb\xb2\x84 \xeb\xb3\xb4\xea\xb5\xac \xea\xb5\xac\xed\x95\xa9\xeb\x8b\x88\xeb\x8b\xa4(\xea\xb0\x9c\xec\x9d\xb8 \xec\xa7\x81\xea\xb1\xb0\xeb\x9e\x98 \xec\x9b\x90\xed\x95\xa9\xeb\x8b\x88\xeb\x8b\xa4^^~)', 1344 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261465&bm=1'} 1345 | Traceback (most recent call last): 1346 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1347 | current.result = callback(current.result, *args, **kw) 1348 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1349 | if word in unicode(item['title']): 1350 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1351 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1352 | 'date': '2017-05-10 02:15:00', 1353 | 'hits': 141, 1354 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1355 | 'title': '\xea\xb8\xb0\xeb\x85\x90\xec\x9c\xbc\xeb\xa1\x9c \xea\xb7\xb8\xeb\xa0\xa4\xeb\xb3\xb4\xec\x95\x98\xec\x8a\xb5\xeb\x8b\x88\xeb\x8b\xa4', 1356 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261464&bm=1'} 1357 | Traceback (most recent call last): 1358 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1359 | current.result = callback(current.result, *args, **kw) 1360 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1361 | if word in unicode(item['title']): 1362 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1363 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1364 | 'date': '2017-05-10 01:53:00', 1365 | 'hits': 282, 1366 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1367 | 'title': '19\xec\x82\xb4 \xec\xb6\x9c\xed\x87\xb4\xea\xb7\xbc\xec\xa4\x91\xea\xb3\xa0\xec\xb0\xa8\xec\xb6\x94\xec\xb2\x9c\xec\xa2\x80\xec\x9a\x94', 1368 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261463&bm=1'} 1369 | Traceback (most recent call last): 1370 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1371 | current.result = callback(current.result, *args, **kw) 1372 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1373 | if word in unicode(item['title']): 1374 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 2: ordinal not in range(128) 1375 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1376 | 'date': '2017-05-10 01:48:00', 1377 | 'hits': 211, 1378 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1379 | 'title': '\xec\xbb\xb4\xeb\xa7\xb9 \xec\xa1\xb0\xeb\xa6\xbd\xec\x8b\x9d\xec\xbb\xb4\xed\x84\xb0 \xea\xb2\xac\xec\xa0\x81 \xec\x82\xac\xea\xb8\xb0\xec\x9d\xb8\xea\xb0\x80\xec\x9a\x94\xe3\x85\xa0\xe3\x85\xa0', 1380 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261462&bm=1'} 1381 | Traceback (most recent call last): 1382 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1383 | current.result = callback(current.result, *args, **kw) 1384 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1385 | if word in unicode(item['title']): 1386 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1387 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1388 | 'date': '2017-05-11 05:40:11', 1389 | 'hits': u'1', 1390 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1391 | 'title': '\xec\x9d\xb4\xeb\xaa\x85\xeb\xb0\x95\xea\xb7\xbc\xed\x98\x9c \xec\xb5\x9c\xea\xb3\xa0\xeb\xa1\x9c \xeb\xb6\x80\xeb\x81\x84\xeb\x9f\xbd\xeb\x8d\x98 \xec\xa7\xa4 \xeb\x91\x98...', 1392 | 'url': u'http://www.clien.net/service/board/park/10730528'} 1393 | Traceback (most recent call last): 1394 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1395 | current.result = callback(current.result, *args, **kw) 1396 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1397 | if word in unicode(item['title']): 1398 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1399 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1400 | 'date': '2017-05-11 05:33:04', 1401 | 'hits': u'1', 1402 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1403 | 'title': '\xec\x9d\x98\xeb\xa0\xb9\xea\xb5\xb0 \xe3\x85\x8a\xe3\x85\x88.gif', 1404 | 'url': u'http://www.clien.net/service/board/park/10730527'} 1405 | Traceback (most recent call last): 1406 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1407 | current.result = callback(current.result, *args, **kw) 1408 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1409 | if word in unicode(item['title']): 1410 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1411 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1412 | 'date': '2017-05-11 05:16:34', 1413 | 'hits': u'0', 1414 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1415 | 'title': '\xec\x99\x80\xec\x9a\xb0~\xec\x9c\xa0\xec\x8a\xb9\xeb\xaf\xbc \xeb\x8b\xa4\xec\x8b\x9c\xeb\xb3\xb4\xea\xb2\x8c \xeb\x90\x98\xeb\x84\xa4\xec\x9a\x94...', 1416 | 'url': u'http://www.clien.net/service/board/park/10730524'} 1417 | Traceback (most recent call last): 1418 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1419 | current.result = callback(current.result, *args, **kw) 1420 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1421 | if word in unicode(item['title']): 1422 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1423 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1424 | 'date': '2017-05-11 05:13:27', 1425 | 'hits': u'1', 1426 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1427 | 'title': '\xec\x95\x84\xec\xa7\x81\xeb\x8f\x84 "\xeb\xac\xb8\xec\x9e\xac\xec\x9d\xb8 \xeb\x8c\x80\xed\x86\xb5\xeb\xa0\xb9"\xec\x9d\xb4\xeb\x9d\xbc\xea\xb3\xa0 \xeb\x93\xa4\xec\x9c\xbc\xeb\xa9\xb4 \xea\xb0\x80\xec\x8a\xb4\xec\x9d\xb4 \xec\xb0\xa1\xed\x95\xa9\xeb\x8b\x88\xeb\x8b\xa4.', 1428 | 'url': u'http://www.clien.net/service/board/park/10730523'} 1429 | Traceback (most recent call last): 1430 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1431 | current.result = callback(current.result, *args, **kw) 1432 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1433 | if word in unicode(item['title']): 1434 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1435 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1436 | 'date': '2017-05-11 05:09:14', 1437 | 'hits': u'0', 1438 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1439 | 'title': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99 \xeb\xaa\xa9\xeb\xa1\x9d\xec\x9c\xbc\xeb\xa1\x9c \xea\xb0\x80\xeb\x8a\x94 \xeb\xb2\x84\xed\x8a\xbc\xec\x9d\x80 \xed\x8e\x98\xec\x9d\xb4\xec\xa7\x80 \xec\x96\xb4\xeb\x94\x94 \xec\x9e\x88\xec\x97\x88\xeb\x93\xa0 \xec\xb2\xab\xed\x8e\x98\xec\x9d\xb4\xec\xa7\x80 \xeb\xaa\xa9\xeb\xa1\x9d\xec\x9c\xbc\xeb\xa1\x9c \xec\xa0\x90\xed\x94\x84\xed\x95\x98\xeb\x84\xa4\xec\x9a\x94.\xe3\x85\xa1,,\xe3\x85\xa1', 1440 | 'url': u'http://www.clien.net/service/board/park/10730522'} 1441 | Traceback (most recent call last): 1442 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1443 | current.result = callback(current.result, *args, **kw) 1444 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1445 | if word in unicode(item['title']): 1446 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 0: ordinal not in range(128) 1447 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1448 | 'date': '2017-05-11 04:58:47', 1449 | 'hits': u'3', 1450 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1451 | 'title': '\xeb\x8c\x80\xed\x95\x9c\xeb\xaf\xbc\xea\xb5\xad\xec\x9d\x98 \xed\x96\xa5\xed\x9b\x84 \xec\xa0\x95\xec\xb9\x98 \xec\xa0\x84\xeb\xa7\x9d\xec\x9d\x80 \xec\x88\x98\xea\xbc\xb4\xec\x84\xb8\xeb\xa0\xa5\xec\x9d\xb4 \xeb\xac\xb4\xea\xb8\xb0\xeb\xa0\xa5\xed\x95\xa8\xec\x9d\x84 \xeb\x8a\x90\xeb\x81\xbc\xea\xb2\x8c \xeb\x90\xa0\xea\xb1\xb0\xeb\x9d\xbc \xeb\xb4\x85\xeb\x8b\x88\xeb\x8b\xa4.', 1452 | 'url': u'http://www.clien.net/service/board/park/10730521'} 1453 | Traceback (most recent call last): 1454 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1455 | current.result = callback(current.result, *args, **kw) 1456 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1457 | if word in unicode(item['title']): 1458 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1459 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1460 | 'date': '2017-05-11 04:53:50', 1461 | 'hits': u'1', 1462 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1463 | 'title': '\xec\x9d\xb8\xed\x84\xb0\xeb\x84\xb7\xeb\xb0\xa9\xec\x86\xa1\xec\x9d\x80 \xeb\xb4\x90\xeb\x8f\x84 \xeb\xb4\x90\xeb\x8f\x84 \xec\x8b\xa0\xea\xb8\xb0\xed\x95\x98\xeb\x84\xa4\xec\x9a\x94', 1464 | 'url': u'http://www.clien.net/service/board/park/10730519'} 1465 | Traceback (most recent call last): 1466 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1467 | current.result = callback(current.result, *args, **kw) 1468 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1469 | if word in unicode(item['title']): 1470 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1471 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1472 | 'date': '2017-05-11 04:53:41', 1473 | 'hits': u'0', 1474 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1475 | 'title': '\xed\x8a\xb8\xeb\x9f\xbc\xed\x94\x84 \xed\x8a\xb9\xea\xb2\x80 \xec\xa3\xbc\xec\x9e\xa5\xed\x95\x98\xeb\x8a\x94 \xea\xb3\xb5\xed\x99\x94\xeb\x8b\xb9 \xec\x9d\x98\xec\x9b\x90\xeb\x93\xa4\xec\x9d\xb4 \xeb\x8a\x98\xec\x96\xb4\xeb\x82\x98\xea\xb8\xb0 \xec\x8b\x9c\xec\x9e\x91\xed\x96\x88\xeb\x8b\xa4.', 1476 | 'url': u'http://www.clien.net/service/board/park/10730518'} 1477 | Traceback (most recent call last): 1478 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1479 | current.result = callback(current.result, *args, **kw) 1480 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1481 | if word in unicode(item['title']): 1482 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 0: ordinal not in range(128) 1483 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1484 | 'date': '2017-05-11 04:52:20', 1485 | 'hits': u'1', 1486 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1487 | 'title': '\xeb\xac\xb4\xea\xb1\xb0\xec\x9a\xb4 \xeb\xa8\xb8\xeb\xa6\xac\xec\xb9\xb4\xeb\x9d\xbd.gif', 1488 | 'url': u'http://www.clien.net/service/board/park/10730516'} 1489 | Traceback (most recent call last): 1490 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1491 | current.result = callback(current.result, *args, **kw) 1492 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1493 | if word in unicode(item['title']): 1494 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1495 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1496 | 'date': '2017-05-11 04:50:27', 1497 | 'hits': u'1', 1498 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1499 | 'title': '\xed\x86\xa0\xec\x9a\xb0 \xeb\xaf\xb8\xec\x82\xac\xec\x9d\xbc \xed\x94\xbc\xed\x95\x98\xeb\x8a\x94 \xeb\xb0\xa9\xeb\xb2\x95.gif', 1500 | 'url': u'http://www.clien.net/service/board/park/10730515'} 1501 | Traceback (most recent call last): 1502 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1503 | current.result = callback(current.result, *args, **kw) 1504 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1505 | if word in unicode(item['title']): 1506 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 0: ordinal not in range(128) 1507 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1508 | 'date': '2017-05-10 01:47:00', 1509 | 'hits': 369, 1510 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1511 | 'title': '\xeb\xb6\x80\xec\x82\xb0 \xeb\xb2\x94\xec\xb2\x9c\xeb\x8f\x99\xec\x97\x90\xec\x84\x9c C63AMG \xec\xb0\xa8\xeb\x9f\x89\xec\x9d\x84 \xeb\x8f\x84\xeb\x82\x9c\xeb\x8b\xb9\xed\x96\x88\xec\x8a\xb5\xeb\x8b\x88\xeb\x8b\xa4.', 1512 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261461&bm=1'} 1513 | Traceback (most recent call last): 1514 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1515 | current.result = callback(current.result, *args, **kw) 1516 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1517 | if word in unicode(item['title']): 1518 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1519 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1520 | 'date': '2017-05-10 01:33:00', 1521 | 'hits': 241, 1522 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1523 | 'title': '\xec\x97\xac\xea\xb8\xb0\xea\xb0\x80 \xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc\xec\x9d\xb8\xea\xb0\x80\xec\x9a\x94', 1524 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261460&bm=1'} 1525 | Traceback (most recent call last): 1526 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1527 | current.result = callback(current.result, *args, **kw) 1528 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1529 | if word in unicode(item['title']): 1530 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1531 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1532 | 'date': '2017-05-10 00:59:00', 1533 | 'hits': 200, 1534 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1535 | 'title': '\xeb\xb2\x95\xec\xaa\xbd\xec\x97\x90 \xec\x9e\x88\xec\x9c\xbc\xec\x8b\xac \xed\x98\x95\xeb\x8b\x98\xeb\x93\xa4..\xeb\x95\x8c\xec\x9d\xb8\xeb\x8f\x88\xec\x97\x90 \xea\xb4\x80\xed\x95\x9c \xec\xa1\xb0\xec\x96\xb8\xec\xa2\x80 \xeb\xb6\x80\xed\x83\x81\xeb\x93\x9c\xeb\xa6\xbd\xeb\x8b\x88\xeb\x8b\xa4.', 1536 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261458&bm=1'} 1537 | Traceback (most recent call last): 1538 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1539 | current.result = callback(current.result, *args, **kw) 1540 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1541 | if word in unicode(item['title']): 1542 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1543 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1544 | 'date': '2017-05-10 00:51:00', 1545 | 'hits': 395, 1546 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1547 | 'title': '\xec\x9e\xac\xec\x9d\xb8\xec\x9d\xb4\xed\x98\x95\xec\x9d\xb4 \xea\xb2\xbd\xec\x9c\xa0\xea\xb0\x92 \xec\x98\xac\xeb\xa6\xb0\xeb\x8b\xa4\xeb\x8a\x94\xeb\x8d\xb0', 1548 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261457&bm=1'} 1549 | Traceback (most recent call last): 1550 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1551 | current.result = callback(current.result, *args, **kw) 1552 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1553 | if word in unicode(item['title']): 1554 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1555 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1556 | 'date': '2017-05-10 00:29:00', 1557 | 'hits': 283, 1558 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1559 | 'title': '\xec\x82\xac\xea\xb3\xa0\xeb\x82\x9c \xec\xa4\x84 \xeb\xaa\xa8\xeb\xa5\xb4\xea\xb3\xa0 \xea\xb7\xb8\xeb\x83\xa5 \xec\x99\x94\xeb\x8a\x94\xeb\x8d\xb0 \xeb\xb8\x94\xeb\xb0\x95 \xeb\xb3\xb4\xeb\x8b\x88... \xeb\x92\xa4\xeb\xa1\x9c \xeb\xba\x90\xeb\x8d\x94\xeb\x9d\xbc\xea\xb5\xac\xec\x9a\x94;', 1560 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261456&bm=1'} 1561 | Traceback (most recent call last): 1562 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1563 | current.result = callback(current.result, *args, **kw) 1564 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1565 | if word in unicode(item['title']): 1566 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1567 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1568 | 'date': '2017-05-10 00:29:00', 1569 | 'hits': 290, 1570 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1571 | 'title': '\xec\x9d\xb4\xea\xb2\x8c\xeb\xad\x90\xeb\x83\x90\xe3\x85\x8b', 1572 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261455&bm=1'} 1573 | Traceback (most recent call last): 1574 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1575 | current.result = callback(current.result, *args, **kw) 1576 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1577 | if word in unicode(item['title']): 1578 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1579 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1580 | 'date': '2017-05-10 00:19:00', 1581 | 'hits': 268, 1582 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1583 | 'title': '\xeb\xb3\xb4\xeb\xb0\xb0\xed\x98\x95\xeb\x8b\x98\xeb\x93\xa4 \xec\x9e\x91\xec\x9d\x80 \xec\x87\xbc\xed\x95\x91\xeb\xaa\xb0 \xec\xb0\xbd\xec\x97\x85\xed\x95\xb4\xeb\xb3\xb4\xeb\xa0\xa4\xed\x95\xa9\xeb\x8b\x88\xeb\x8b\xa4.. \xec\xa1\xb0\xec\x96\xb8\xec\xa2\x80 \xeb\xb6\x80\xed\x83\x81\xeb\x93\x9c\xeb\xa0\xa4\xec\x9a\x94', 1584 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261454&bm=1'} 1585 | Traceback (most recent call last): 1586 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1587 | current.result = callback(current.result, *args, **kw) 1588 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1589 | if word in unicode(item['title']): 1590 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1591 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1592 | 'date': '2017-05-10 00:15:00', 1593 | 'hits': 236, 1594 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1595 | 'title': '\xec\x9b\x94\xea\xb8\x89\xeb\x82\xa0\xec\x9d\xb4\xeb\x84\xa4\xec\x9a\x94', 1596 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261453&bm=1'} 1597 | Traceback (most recent call last): 1598 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1599 | current.result = callback(current.result, *args, **kw) 1600 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1601 | if word in unicode(item['title']): 1602 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1603 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1604 | 'date': '2017-05-10 00:13:00', 1605 | 'hits': 65, 1606 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1607 | 'title': '\xec\x82\xac\xeb\x82\x98\xec\x9d\xb4\xeb\x8a\x94', 1608 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261452&bm=1'} 1609 | Traceback (most recent call last): 1610 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1611 | current.result = callback(current.result, *args, **kw) 1612 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1613 | if word in unicode(item['title']): 1614 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1615 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1616 | 'date': '2017-05-11 04:47:50', 1617 | 'hits': u'0', 1618 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1619 | 'title': '\xec\x98\xa4\xeb\xb0\x94\xeb\xa7\x88 \xec\xa0\x84 \xeb\x8c\x80\xed\x86\xb5\xeb\xa0\xb9 \xec\x9c\x84\xec\x95\x88\xeb\xb6\x80 \xea\xb4\x80\xeb\xa0\xa8 \xec\x9e\x98\xeb\xaa\xbb \xec\x95\x8c\xea\xb3\xa0 \xea\xb3\x84\xec\x8b\x9c\xeb\x8a\x94\xeb\x8d\xb0', 1620 | 'url': u'http://www.clien.net/service/board/park/10730514'} 1621 | Traceback (most recent call last): 1622 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1623 | current.result = callback(current.result, *args, **kw) 1624 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1625 | if word in unicode(item['title']): 1626 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1627 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1628 | 'date': '2017-05-11 04:44:17', 1629 | 'hits': u'0', 1630 | 'source': '\xed\x81\xb4\xeb\xa6\xac\xec\x95\x99', 1631 | 'title': '\xeb\x8c\x80\xeb\xa5\x99 \xec\x82\xac\xea\xb7\xb9 \xea\xb7\xbc\xed\x99\xa9.gif', 1632 | 'url': u'http://www.clien.net/service/board/park/10730513'} 1633 | Traceback (most recent call last): 1634 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1635 | current.result = callback(current.result, *args, **kw) 1636 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1637 | if word in unicode(item['title']): 1638 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1639 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1640 | 'date': '2017-05-10 00:08:00', 1641 | 'hits': 458, 1642 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1643 | 'title': '\xeb\xa8\xb8\xed\x94\x8c\xeb\x9f\xac \xeb\xb6\x88\xeb\xb2\x95\xea\xb0\x9c\xec\xa1\xb0 \xec\x8b\xa0\xea\xb3\xa0\xed\x96\x88\xec\x8a\xb5\xeb\x8b\x88\xeb\x8b\xa4 \xea\xb7\xb8\xeb\x9f\xb0\xeb\x8d\xb0', 1644 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261451&bm=1'} 1645 | Traceback (most recent call last): 1646 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1647 | current.result = callback(current.result, *args, **kw) 1648 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1649 | if word in unicode(item['title']): 1650 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xeb in position 0: ordinal not in range(128) 1651 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1652 | 'date': '2017-05-10 00:04:00', 1653 | 'hits': 142, 1654 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1655 | 'title': '\xea\xb5\xbf\xeb\x82\x98\xec\x9e\x87', 1656 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261450&bm=1'} 1657 | Traceback (most recent call last): 1658 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1659 | current.result = callback(current.result, *args, **kw) 1660 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1661 | if word in unicode(item['title']): 1662 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1663 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1664 | 'date': '2017-05-10 00:03:00', 1665 | 'hits': 243, 1666 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1667 | 'title': '\xe3\x85\x86\xe3\x85\x82 \xec\xa0\x95\xec\x9d\x80\xec\x95\x84. \xed\x95\x98\xeb\x82\x98\xeb\xa7\x8c \xeb\xac\xbb\xec\x9e\x90.', 1668 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261449&bm=1'} 1669 | Traceback (most recent call last): 1670 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1671 | current.result = callback(current.result, *args, **kw) 1672 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1673 | if word in unicode(item['title']): 1674 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xe3 in position 0: ordinal not in range(128) 1675 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1676 | 'date': '2017-05-10 00:00:00', 1677 | 'hits': 159, 1678 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1679 | 'title': '\xec\xa3\x84\xec\x86\xa1\xed\x95\xa9\xeb\x8b\x88\xeb\x8b\xa4...', 1680 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261448&bm=1'} 1681 | Traceback (most recent call last): 1682 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1683 | current.result = callback(current.result, *args, **kw) 1684 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1685 | if word in unicode(item['title']): 1686 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1687 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1688 | 'date': '2017-05-10 00:00:00', 1689 | 'hits': 845, 1690 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1691 | 'title': '\xed\x9d\x94\xed\x95\x9c \xec\x93\xb0\xeb\xa6\xac \xec\x86\x94\xeb\xa1\x9c\xec\x9d\x98 \xeb\x8b\xa8\xed\x86\xa1\xeb\xb0\xa9,,,', 1692 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261447&bm=1'} 1693 | Traceback (most recent call last): 1694 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1695 | current.result = callback(current.result, *args, **kw) 1696 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1697 | if word in unicode(item['title']): 1698 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xed in position 0: ordinal not in range(128) 1699 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1700 | 'date': '2017-05-10 00:00:00', 1701 | 'hits': 83, 1702 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1703 | 'title': '\xea\xb5\xbf\xeb\x82\x98\xec\x9e\x87!', 1704 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261446&bm=1'} 1705 | Traceback (most recent call last): 1706 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1707 | current.result = callback(current.result, *args, **kw) 1708 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1709 | if word in unicode(item['title']): 1710 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xea in position 0: ordinal not in range(128) 1711 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1712 | 'date': '2017-05-10 00:00:00', 1713 | 'hits': 474, 1714 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1715 | 'title': '\xec\x84\xa0\xeb\xac\xbc \xeb\xb0\x9b\xec\x9d\x80\xea\xb1\xb8 \xec\x9c\xa0\xea\xb2\x8c\xec\x97\x90 \xec\x98\xac\xeb\xa0\xb8\xeb\x84\xa4\xec\x9a\x94.', 1716 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261445&bm=1'} 1717 | Traceback (most recent call last): 1718 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1719 | current.result = callback(current.result, *args, **kw) 1720 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1721 | if word in unicode(item['title']): 1722 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1723 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1724 | 'date': '2017-05-10 00:00:00', 1725 | 'hits': 171, 1726 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1727 | 'title': '\xec\x95\x84..', 1728 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261444&bm=1'} 1729 | Traceback (most recent call last): 1730 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1731 | current.result = callback(current.result, *args, **kw) 1732 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1733 | if word in unicode(item['title']): 1734 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1735 | 2017-05-10 21:00:24 [scrapy.core.scraper] ERROR: Error processing {'category': 'free', 1736 | 'date': '2017-05-10 00:00:00', 1737 | 'hits': 175, 1738 | 'source': '\xeb\xb3\xb4\xeb\xb0\xb0\xeb\x93\x9c\xeb\xa6\xbc', 1739 | 'title': '\xec\x8b\xa0\xec\xb0\xa8 \xec\xa3\xbc\xed\x96\x89\xea\xb1\xb0\xeb\xa6\xac \xeb\xac\xb8\xec\x9d\x98', 1740 | 'url': u'http://www.bobaedream.co.kr/view?code=freeb&No=1261443&bm=1'} 1741 | Traceback (most recent call last): 1742 | File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 653, in _runCallbacks 1743 | current.result = callback(current.result, *args, **kw) 1744 | File "/svc/webapp/community/community/pipelines.py", line 18, in process_item 1745 | if word in unicode(item['title']): 1746 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xec in position 0: ordinal not in range(128) 1747 | 2017-05-10 21:00:24 [scrapy.core.engine] INFO: Closing spider (finished) 1748 | 2017-05-10 21:00:24 [scrapy.statscollectors] INFO: Dumping Scrapy stats: 1749 | {'downloader/request_bytes': 1880, 1750 | 'downloader/request_count': 8, 1751 | 'downloader/request_method_count/GET': 8, 1752 | 'downloader/response_bytes': 33161, 1753 | 'downloader/response_count': 8, 1754 | 'downloader/response_status_count/200': 3, 1755 | 'downloader/response_status_count/301': 1, 1756 | 'downloader/response_status_count/302': 3, 1757 | 'downloader/response_status_count/404': 1, 1758 | 'finish_reason': 'finished', 1759 | 'finish_time': datetime.datetime(2017, 5, 10, 21, 0, 24, 746457), 1760 | 'item_scraped_count': 1, 1761 | 'log_count/ERROR': 59, 1762 | 'log_count/INFO': 7, 1763 | 'response_received_count': 4, 1764 | 'scheduler/dequeued': 5, 1765 | 'scheduler/dequeued/memory': 5, 1766 | 'scheduler/enqueued': 5, 1767 | 'scheduler/enqueued/memory': 5, 1768 | 'start_time': datetime.datetime(2017, 5, 10, 21, 0, 24, 406675)} 1769 | 2017-05-10 21:00:24 [scrapy.core.engine] INFO: Spider closed (finished) 1770 | -------------------------------------------------------------------------------- /community/community/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class CommunitySpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /community/community/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | # exception을 통해 필터링 파이프라인 등록해야 비로소 동작함 9 | from scrapy.exceptions import DropItem 10 | 11 | class CommunityPipeline(object): 12 | 13 | words_to_filter = [u'19금', u'문재인'] 14 | 15 | # item에 관한 필터링 처리 16 | def process_item(self, item, spider): 17 | for word in self.words_to_filter: 18 | if word in unicode(item['title']): 19 | print "!!!!! 문재인 exception" 20 | raise DropItem("Contains forbidden word: %s" % word) 21 | else: 22 | return item 23 | -------------------------------------------------------------------------------- /community/community/pipelines.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/community/community/pipelines.pyc -------------------------------------------------------------------------------- /community/community/return_01.log: -------------------------------------------------------------------------------- 1 | ================================================== 2 | 中매체 "문재인 진지하게 상대할 인물…韓특사단 방중 예상" 3 | 클리앙 1번째 글 크롤링 완료 4 | ================================================== 5 | 문제많은 문재인~ 6 | 클리앙 2번째 글 크롤링 완료 7 | ================================================== 8 | 클량 안드앱 출시 되었네요. 9 | 클리앙 3번째 글 크롤링 완료 10 | ================================================== 11 | 표현의 자유를 중요시 여기는 더민주가 과연 일베를 어떻게 대처할까..걱정되네요 12 | 클리앙 4번째 글 크롤링 완료 13 | ================================================== 14 | 이제 연합뉴스 속보가 기다려 지는군요 15 | 클리앙 5번째 글 크롤링 완료 16 | ================================================== 17 | 18대 대통령 취임식 때 당시 문재인 의원 불참했네요 18 | 클리앙 6번째 글 크롤링 완료 19 | ================================================== 20 | MBC 정상화 수순은 어떤 절차로 진행될까요 21 | 클리앙 7번째 글 크롤링 완료 22 | ================================================== 23 | avira 이상한 팝업이 떠서 지우려구요 -_- 24 | 클리앙 8번째 글 크롤링 완료 25 | ================================================== 26 | 제가 말이죠 문재인대통령보다 더 좋은게 있더군요 27 | 클리앙 9번째 글 크롤링 완료 28 | ================================================== 29 | 그나저나 은수미 의원님 미쿠냥 코스프레는 어떻게 되는건가요 ? 30 | 클리앙 10번째 글 크롤링 완료 31 | ================================================== 32 | 조선일보 기사의 댓글을 보면... 33 | 클리앙 11번째 글 크롤링 완료 34 | ================================================== 35 | 어쩌다보니 네이버 기사 베댓이 됬네요;; 36 | 클리앙 12번째 글 크롤링 완료 37 | ================================================== 38 | 교육부 외교부 국방부는 탈탈 털어야죠 39 | 클리앙 13번째 글 크롤링 완료 40 | ================================================== 41 | 취임식에 후보가 참석 안했다고 인성이 어쩌고 하는건 42 | 클리앙 14번째 글 크롤링 완료 43 | -------------------------------------------------- 44 | 도착 45 | 보배드림1번째 글 크롤링 완료 46 | ================================================== 47 | 국민의당 "安후보 사퇴하라는 송영길, 졸부 갑질" 48 | 클리앙 15번째 글 크롤링 완료 49 | ================================================== 50 | 심상정은 당사에서 티비로 취임식 본건가요? 51 | 클리앙 16번째 글 크롤링 완료 52 | ================================================== 53 | 노무현 대통령이 통일부장관으로 영입하려고 했던 인물이 누군지 아세요? 54 | 클리앙 17번째 글 크롤링 완료 55 | ================================================== 56 | 이명박근혜 최고로 부끄럽던 짤 둘... 57 | 클리앙 18번째 글 크롤링 완료 58 | ================================================== 59 | 의령군 ㅊㅈ.gif 60 | 클리앙 19번째 글 크롤링 완료 61 | ================================================== 62 | 와우~유승민 다시보게 되네요... 63 | 클리앙 20번째 글 크롤링 완료 64 | ================================================== 65 | 아직도 "문재인 대통령"이라고 들으면 가슴이 찡합니다. 66 | 클리앙 21번째 글 크롤링 완료 67 | ================================================== 68 | 클리앙 목록으로 가는 버튼은 페이지 어디 있었든 첫페이지 목록으로 점프하네요.ㅡ,,ㅡ 69 | 클리앙 22번째 글 크롤링 완료 70 | ================================================== 71 | 대한민국의 향후 정치 전망은 수꼴세력이 무기력함을 느끼게 될거라 봅니다. 72 | 클리앙 23번째 글 크롤링 완료 73 | ================================================== 74 | 인터넷방송은 봐도 봐도 신기하네요 75 | 클리앙 24번째 글 크롤링 완료 76 | ================================================== 77 | 트럼프 특검 주장하는 공화당 의원들이 늘어나기 시작했다. 78 | 클리앙 25번째 글 크롤링 완료 79 | -------------------------------------------------- 80 | 경품을 중고제품으로 주는데 어떻게 생각하시나요 81 | 보배드림2번째 글 크롤링 완료 82 | -------------------------------------------------- 83 | 재밋는거발견했네요 84 | 보배드림3번째 글 크롤링 완료 85 | -------------------------------------------------- 86 | 일베들 웬만하면 이해하려고하는 한사람으로서. 87 | 보배드림4번째 글 크롤링 완료 88 | -------------------------------------------------- 89 | 문재인대통령 욕한거 고소안되나요? 90 | 보배드림5번째 글 크롤링 완료 91 | -------------------------------------------------- 92 | 수출 중고차 문의관련 93 | 보배드림6번째 글 크롤링 완료 94 | -------------------------------------------------- 95 | 충청도 개그 96 | 보배드림7번째 글 크롤링 완료 97 | -------------------------------------------------- 98 | 뉴 레인지로버 보구 구합니다(개인 직거래 원합니다^^~) 99 | 보배드림8번째 글 크롤링 완료 100 | -------------------------------------------------- 101 | 기념으로 그려보았습니다 102 | 보배드림9번째 글 크롤링 완료 103 | -------------------------------------------------- 104 | 19살 출퇴근중고차추천좀요 105 | 보배드림10번째 글 크롤링 완료 106 | -------------------------------------------------- 107 | 컴맹 조립식컴터 견적 사기인가요ㅠㅠ 108 | 보배드림11번째 글 크롤링 완료 109 | ================================================== 110 | 무거운 머리카락.gif 111 | 클리앙 26번째 글 크롤링 완료 112 | ================================================== 113 | 토우 미사일 피하는 방법.gif 114 | 클리앙 27번째 글 크롤링 완료 115 | ================================================== 116 | 오바마 전 대통령 위안부 관련 잘못 알고 계시는데 117 | 클리앙 28번째 글 크롤링 완료 118 | ================================================== 119 | 대륙 사극 근황.gif 120 | 클리앙 29번째 글 크롤링 완료 121 | ================================================== 122 | 어제 들었던 말중 제일 황당한 말.txt 123 | 클리앙 30번째 글 크롤링 완료 124 | -------------------------------------------------- 125 | 부산 범천동에서 C63AMG 차량을 도난당했습니다. 126 | 보배드림12번째 글 크롤링 완료 127 | -------------------------------------------------- 128 | 여기가 보배드림인가요 129 | 보배드림13번째 글 크롤링 완료 130 | -------------------------------------------------- 131 | GPS 132 | 보배드림14번째 글 크롤링 완료 133 | -------------------------------------------------- 134 | 법쪽에 있으심 형님들..때인돈에 관한 조언좀 부탁드립니다. 135 | 보배드림15번째 글 크롤링 완료 136 | -------------------------------------------------- 137 | 재인이형이 경유값 올린다는데 138 | 보배드림16번째 글 크롤링 완료 139 | -------------------------------------------------- 140 | 사고난 줄 모르고 그냥 왔는데 블박 보니... 뒤로 뺐더라구요; 141 | 보배드림17번째 글 크롤링 완료 142 | -------------------------------------------------- 143 | 이게뭐냐ㅋ 144 | 보배드림18번째 글 크롤링 완료 145 | -------------------------------------------------- 146 | 보배형님들 작은 쇼핑몰 창업해보려합니다.. 조언좀 부탁드려요 147 | 보배드림19번째 글 크롤링 완료 148 | -------------------------------------------------- 149 | 월급날이네요 150 | 보배드림20번째 글 크롤링 완료 151 | -------------------------------------------------- 152 | 사나이는 153 | 보배드림21번째 글 크롤링 완료 154 | -------------------------------------------------- 155 | 머플러 불법개조 신고했습니다 그런데 156 | 보배드림22번째 글 크롤링 완료 157 | -------------------------------------------------- 158 | 굿나잇 159 | 보배드림23번째 글 크롤링 완료 160 | -------------------------------------------------- 161 | ㅆㅂ 정은아. 하나만 묻자. 162 | 보배드림24번째 글 크롤링 완료 163 | -------------------------------------------------- 164 | 죄송합니다... 165 | 보배드림25번째 글 크롤링 완료 166 | -------------------------------------------------- 167 | 흔한 쓰리 솔로의 단톡방,,, 168 | 보배드림26번째 글 크롤링 완료 169 | -------------------------------------------------- 170 | 굿나잇! 171 | 보배드림27번째 글 크롤링 완료 172 | -------------------------------------------------- 173 | 선물 받은걸 유게에 올렸네요. 174 | 보배드림28번째 글 크롤링 완료 175 | -------------------------------------------------- 176 | 아.. 177 | 보배드림29번째 글 크롤링 완료 178 | -------------------------------------------------- 179 | 신차 주행거리 문의 180 | 보배드림30번째 글 크롤링 완료 181 | -------------------------------------------------------------------------------- /community/community/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | # Scrapy settings for community project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'community' 13 | 14 | SPIDER_MODULES = ['community.spiders'] 15 | NEWSPIDER_MODULE = 'community.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'community (+http://www.yourdomain.com)' 20 | 21 | # Obey robots.txt rules 22 | ROBOTSTXT_OBEY = True 23 | 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 25 | #CONCURRENT_REQUESTS = 32 26 | 27 | # Configure a delay for requests for the same website (default: 0) 28 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 29 | # See also autothrottle settings and docs 30 | #DOWNLOAD_DELAY = 3 31 | # The download delay setting will honor only one of: 32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 33 | #CONCURRENT_REQUESTS_PER_IP = 16 34 | 35 | # Disable cookies (enabled by default) 36 | #COOKIES_ENABLED = False 37 | 38 | # Disable Telnet Console (enabled by default) 39 | #TELNETCONSOLE_ENABLED = False 40 | 41 | # Override the default request headers: 42 | #DEFAULT_REQUEST_HEADERS = { 43 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 44 | # 'Accept-Language': 'en', 45 | #} 46 | 47 | # Enable or disable spider middlewares 48 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 49 | #SPIDER_MIDDLEWARES = { 50 | # 'community.middlewares.CommunitySpiderMiddleware': 543, 51 | #} 52 | 53 | # Enable or disable downloader middlewares 54 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 55 | #DOWNLOADER_MIDDLEWARES = { 56 | # 'community.middlewares.MyCustomDownloaderMiddleware': 543, 57 | #} 58 | 59 | # Enable or disable extensions 60 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 67 | ITEM_PIPELINES = { 68 | 'community.pipelines.CommunityPipeline': 300, 69 | } 70 | 71 | # Enable and configure the AutoThrottle extension (disabled by default) 72 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 73 | #AUTOTHROTTLE_ENABLED = True 74 | # The initial download delay 75 | #AUTOTHROTTLE_START_DELAY = 5 76 | # The maximum download delay to be set in case of high latencies 77 | #AUTOTHROTTLE_MAX_DELAY = 60 78 | # The average number of requests Scrapy should be sending in parallel to 79 | # each remote server 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 81 | # Enable showing throttling stats for every response received: 82 | #AUTOTHROTTLE_DEBUG = False 83 | 84 | # Enable and configure HTTP caching (disabled by default) 85 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 86 | #HTTPCACHE_ENABLED = True 87 | #HTTPCACHE_EXPIRATION_SECS = 0 88 | #HTTPCACHE_DIR = 'httpcache' 89 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 91 | 92 | LOG_FILE = 'logfile.log' 93 | LOG_LEVEL = logging.INFO 94 | -------------------------------------------------------------------------------- /community/community/settings.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/community/community/settings.pyc -------------------------------------------------------------------------------- /community/community/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /community/community/spiders/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/community/community/spiders/__init__.pyc -------------------------------------------------------------------------------- /community/community/spiders/communitySpider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __author__='12bme' 3 | 4 | import scrapy,re 5 | 6 | from community.items import CommunityItem 7 | from datetime import datetime 8 | 9 | class CommunitySpider(scrapy.Spider): 10 | name = "communityCrawler" 11 | # 크롤링할 사이트의 주소 12 | # start_urls = [] 13 | 14 | # 어차피 크롤링은 주기적으로 이루어지는 거니까 주기 고민해서 최신 어디까지 가저올건지 고민. 15 | def start_requests(self): 16 | for i in range(1, 2, 1): 17 | yield scrapy.Request("http://www.clien.net/cs2/bbs/board.php?bo_table=park&page=%d" % i, self.parse_clien) #두번째 변수는 파서 18 | yield scrapy.Request("http://www.bobaedream.co.kr/list?code=freeb&page=%d" % i, self.parse_bobae) 19 | 20 | def parse_clien(self, response): 21 | index = 0 22 | for sel in response.xpath('//html/body/div[2]/div/div[1]/div[5]/div/div'): 23 | item = CommunityItem() 24 | 25 | item['source'] = '클리앙' 26 | item['category'] = 'free' 27 | title = sel.xpath('div/div[@class="list-title"]/a[@class="list-subject"]/text()').extract()[0].strip() 28 | item['title'] = title.encode('utf-8') 29 | #item['title'] = sel.xpath('td[@class="post_subject"]/a/text()').extract_first() 30 | item['url'] = 'http://www.clien.net' + sel.xpath('div/div[@class="list-title"]/a[@class="list-subject"]/@href').extract()[0] 31 | 32 | #날짜 들고오기 33 | dateTmp = datetime.strptime(sel.xpath('div/div[@class="list-time"]/span/span[@class="timestamp"]/text()').extract()[0], "%Y-%m-%d %H:%M:%S") 34 | item['date'] = dateTmp.strftime("%Y-%m-%d %H:%M:%S") 35 | 36 | #조회수 들고오기 37 | hits = sel.xpath('div/div[2]/span/text()').extract()[0].strip() 38 | item['hits'] = hits 39 | #item['hits'] = int(td[4].xpath('text()').extract()[0]) 40 | 41 | index = index + 1 42 | 43 | print '='*50 44 | print item['title'] 45 | print "클리앙 " + str(index) + "번째 글 크롤링 완료" 46 | #print str(unicode('한글','euc-kr').encode('euc-kr')) 47 | 48 | yield item 49 | 50 | def parse_bobae(self, response): 51 | index = 0 52 | for sel in response.xpath('//tbody/tr[@itemtype="http://schema.org/Article"]'): 53 | item = CommunityItem() 54 | 55 | date_now = datetime.now() 56 | 57 | date_str_tmp = sel.xpath('td[@class="date"]/text()').extract()[0] 58 | prog = re.compile('[0-9]{2}:[0-9]{2}') 59 | if prog.match(date_str_tmp): 60 | date_str = date_now.strftime('%y/%m/%d') + ' ' + date_str_tmp + ':00' 61 | else: 62 | date_str = date_now.strftime('%y/') + date_str_tmp + ' ' + '00:00:00' 63 | 64 | dateTmp = datetime.strptime(date_str, "%y/%m/%d %H:%M:%S") 65 | 66 | item['source'] = '보배드림' 67 | item['category'] = 'free' 68 | title = sel.xpath('td[@class="pl14"]/a/text()').extract()[0] 69 | item['title'] = title.encode('utf-8') 70 | item['url'] = "http://www.bobaedream.co.kr" + sel.xpath('td[@class="pl14"]/a/@href').extract()[0] 71 | item['date'] = dateTmp.strftime("%Y-%m-%d %H:%M:%S") 72 | item['hits'] = int(sel.xpath('td[@class="count"]/text()').extract()[0]) 73 | 74 | index += 1 75 | print '-'*50 76 | print item['title'] 77 | print '보배드림' + str(index) + "번째 글 크롤링 완료" 78 | 79 | yield item 80 | -------------------------------------------------------------------------------- /community/community/spiders/communitySpider.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/community/community/spiders/communitySpider.pyc -------------------------------------------------------------------------------- /community/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = community.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = community 12 | -------------------------------------------------------------------------------- /tutorial/4: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 공부 | KLDP 10 | 16 | 20 | 23 | 35 | 42 | 49 | 52 | 53 | 58 | 59 | 64 | 65 | 66 | 67 | 68 | 73 | 74 | 75 | 78 | 79 |
80 | 81 | 99 | 100 |
101 | 102 |
103 | 104 |

공부

105 |
106 | 108 |
109 |
110 | 111 | 112 |
113 |
114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 132 | 134 | 135 | 138 | 139 | 140 | 147 | 149 | 150 | 153 | 154 | 155 | 162 | 164 | 165 | 168 | 169 | 170 |
포럼주제글 개수마지막 포스트
126 |
127 | 새 글 없음 128 |
129 | 130 |
개발 작업 도중 일어난 문제점을 서로 상의하세요.
131 |
133 | 32323 138611 137 |
141 |
142 | 새 글 없음 143 |
144 | 145 |
OS 및 애플리케이션 설치, 활용에 관한 질문/답변
146 |
148 | 34158 134839 152 |
156 |
157 | 새 글 없음 158 |
159 | 160 |
내가 아는 것을 다른 사람들과 나누고 싶을때...
161 |
163 | 935 6365 167 |
171 |
172 |
173 | 174 |
175 |
176 |
177 | 178 | 205 | 206 | 250 | 251 | 252 |
253 | 254 | 255 | 256 | 272 | 273 |
274 | 275 | 276 | 277 | -------------------------------------------------------------------------------- /tutorial/5: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 프로그래밍 QnA | KLDP 11 | 17 | 21 | 24 | 36 | 43 | 50 | 53 | 54 | 59 | 60 | 65 | 66 | 67 | 68 | 69 | 74 | 75 | 76 | 79 | 80 |
81 | 82 | 100 | 101 |
102 | 103 |
104 | 105 |

프로그래밍 QnA

106 |
107 | 109 |
110 |
111 | 112 | 113 |
114 |
115 | 116 | 117 | 118 | 119 | 120 | 121 | 127 | 135 | 137 | 140 | 141 | 142 | 148 | 156 | 158 | 161 | 162 | 163 | 169 | 177 | 179 | 182 | 183 | 184 | 190 | 198 | 200 | 203 | 204 | 205 | 211 | 219 | 221 | 222 | 223 | 224 | 230 | 238 | 240 | 243 | 244 | 245 | 251 | 259 | 261 | 264 | 265 | 266 | 272 | 280 | 282 | 285 | 286 | 287 | 293 | 301 | 303 | 306 | 307 | 308 | 314 | 322 | 324 | 327 | 328 | 329 | 335 | 343 | 345 | 348 | 349 | 350 | 356 | 364 | 366 | 369 | 370 | 371 | 377 | 385 | 387 | 390 | 391 | 392 | 398 | 406 | 408 | 411 | 412 | 413 | 419 | 427 | 429 | 432 | 433 | 434 | 440 | 448 | 450 | 453 | 454 | 455 | 461 | 469 | 471 | 474 | 475 | 476 | 482 | 490 | 492 | 495 | 496 | 497 | 503 | 511 | 513 | 516 | 517 | 518 | 524 | 532 | 534 | 537 | 538 | 539 | 545 | 553 | 555 | 558 | 559 | 560 | 566 | 574 | 576 | 579 | 580 | 581 | 587 | 595 | 597 | 600 | 601 | 602 | 608 | 616 | 618 | 621 | 622 | 623 | 629 | 637 | 639 | 640 | 641 | 642 |
제목댓글마지막 댓글정렬 - 오름차순
122 | 123 | 일반 토픽 124 | 125 |
126 |
128 | 130 |
131 | 133 |
134 |
136 | 6 139 |
143 | 144 | 일반 토픽 145 | 146 |
147 |
149 | 151 |
152 | 154 |
155 |
157 | 4 160 |
164 | 165 | 일반 토픽 166 | 167 |
168 |
170 | 172 |
173 | 175 |
176 |
178 | 6 181 |
185 | 186 | 일반 토픽 187 | 188 |
189 |
191 | 193 |
194 | 196 |
197 |
199 | 11 202 |
206 | 207 | 일반 토픽 208 | 209 |
210 |
212 | 214 |
215 | 217 |
218 |
220 | 0 없음
225 | 226 | 일반 토픽 227 | 228 |
229 |
231 | 233 |
234 | 236 |
237 |
239 | 2 242 |
246 | 247 | 일반 토픽 248 | 249 |
250 |
252 | 254 |
255 | 257 |
258 |
260 | 2 263 |
267 | 268 | 일반 토픽 269 | 270 |
271 |
273 | 275 |
276 | 278 |
279 |
281 | 1 284 |
288 | 289 | 일반 토픽 290 | 291 |
292 |
294 | 296 |
297 | 299 |
300 |
302 | 1 305 |
309 | 310 | 일반 토픽 311 | 312 |
313 |
315 | 317 |
318 | 320 |
321 |
323 | 3 326 |
330 | 331 | 일반 토픽 332 | 333 |
334 |
336 | 338 |
339 | 341 |
342 |
344 | 2 347 |
351 | 352 | 일반 토픽 353 | 354 |
355 |
357 | 359 |
360 | 362 |
363 |
365 | 5 368 |
372 | 373 | 일반 토픽 374 | 375 |
376 |
378 | 380 |
381 | 383 |
384 |
386 | 6 389 |
393 | 394 | 일반 토픽 395 | 396 |
397 |
399 | 401 |
402 | 404 |
405 |
407 | 5 410 |
414 | 415 | 일반 토픽 416 | 417 |
418 |
420 | 422 |
423 | 425 |
426 |
428 | 3 431 |
435 | 436 | 일반 토픽 437 | 438 |
439 |
441 | 443 |
444 | 446 |
447 |
449 | 3 452 |
456 | 457 | 일반 토픽 458 | 459 |
460 |
462 | 464 |
465 | 467 |
468 |
470 | 1 473 |
477 | 478 | 일반 토픽 479 | 480 |
481 |
483 | 485 |
486 | 488 |
489 |
491 | 3 494 |
498 | 499 | 일반 토픽 500 | 501 |
502 |
504 | 506 |
507 | 509 |
510 |
512 | 3 515 |
519 | 520 | 일반 토픽 521 | 522 |
523 |
525 | 527 |
528 | 530 |
531 |
533 | 3 536 |
540 | 541 | 일반 토픽 542 | 543 |
544 |
546 | 548 |
549 | 551 |
552 |
554 | 1 557 |
561 | 562 | 일반 토픽 563 | 564 |
565 |
567 | 569 |
570 | 572 |
573 |
575 | 2 578 |
582 | 583 | 일반 토픽 584 | 585 |
586 |
588 | 590 |
591 | 593 |
594 |
596 | 4 599 |
603 | 604 | 일반 토픽 605 | 606 |
607 |
609 | 611 |
612 | 614 |
615 |
617 | 1 620 |
624 | 625 | 일반 토픽 626 | 627 |
628 |
630 | 632 |
633 | 635 |
636 |
638 | 0 없음
643 |

페이지

656 |
657 | 658 |
659 |
660 | RSS - 프로그래밍 QnA 구독하기
661 | 662 | 689 | 690 | 736 | 737 | 738 |
739 | 740 | 741 | 742 | 758 | 759 |
760 | 761 | 762 | 763 | -------------------------------------------------------------------------------- /tutorial/cron.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /home/ 4 | 5 | PATH=$PATH:/usr/local/bin 6 | 7 | 8 | export PATH 9 | #scrapy crawl kldp -o items.json 10 | -------------------------------------------------------------------------------- /tutorial/items.json: -------------------------------------------------------------------------------- 1 | [ 2 | {"index": 1, "title": ["c\ub85c \uad6c\ud604\ud55c \uc54c\uace0\ub9ac\uc998 \ubcf4\ub2e4\uac00 \uc774\ud574\uac00 \ub418\uc9c0 \uc54a\ub294 \uc810\uc774 \uc788\uc2b5\ub2c8\ub2e4."], "link": ["/node/157525"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 3 | {"index": 2, "title": ["\uc548\ub155\ud558\uc138\uc694. \uc11c\ubc84 \uc18c\ucf13 \ud504\ub85c\uadf8\ub798\ubc0d\uc5d0 \ub300\ud574 \uad81\uae08\ud55c \uc810\uc774 \uc788\uc2b5\ub2c8\ub2e4."], "link": ["/node/157506"], "desc": []}, 4 | {"index": 3, "title": ["aes \uc554\ud638\ud654\ud558\uba74\uc11c \uc18d\ub3c4\ub97c \uc99d\uac00\uc2dc\ud0a4\ub294 \ubc29\ubc95 \uc5c6\uc744\uae4c\uc694?"], "link": ["/node/157448"], "desc": []}, 5 | {"index": 4, "title": ["aes \uc54c\uace0\ub9ac\uc998 : 128 \uc774\uc911 \uc554\ud638\ud654 vs 256 \ud55c\ubc88 \uc554\ud638\ud654 -> \ubb50\uac00 \ub354 \uac15\ub825\ud560\uae4c\uc694?"], "link": ["/node/157451"], "desc": []}, 6 | {"index": 5, "title": ["\ub9ac\ub205\uc2a4 \ub514\ubc14\uc774\uc2a4\ub4dc\ub77c\uc774\ubc84\uc640 \ubaa8\ub4c8 \uc9c8\ubb38\ub4dc\ub9bd\ub2c8\ub2e4. "], "link": ["/node/157540"], "desc": []}, 7 | {"index": 6, "title": ["\uc258 \uc2a4\ud06c\ub9bd\ud2b8(sh) \uc774\ud574\ud558\uae30..\uc9c8\ubb38\uc785\ub2c8\ub2e4."], "link": ["/node/157534"], "desc": []}, 8 | {"index": 7, "title": ["\uc11c\ubc84 \ud074\ub77c\uc774\uc5b8\ud2b8 \uad6c\uc870\uc5d0\uc11c \ub2e4\uc911\uc811\uc18d\uc2dc \ubd80\ubaa8 \uc790\uc2dd \ud504\ub85c\uc138\uc2a4\uc758 \uc6b0\uc120\uc21c\uc704\uc5d0 \ub300\ud574"], "link": ["/node/157533"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 9 | {"index": 8, "title": ["c\uc5b8\uc5b4 \uc2a4\ud0dd \ubb38\uc790\uc5f4 \uac70\uafb8\ub85c \ucd9c\ub825 \uc9c8\ubb38\uc785\ub2c8\ub2e4"], "link": ["/node/157538"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 10 | {"index": 9, "title": ["c++ \ucef4\ud30c\uc77c\ub7ec \uba54\ubaa8\ub9ac \ubc30\uce58\uc5d0 \ub300\ud558\uc5ec.."], "link": ["/node/157537"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 11 | {"index": 10, "title": ["\ud504\ub85c\uadf8\ub798\ubc0d \uc5b8\uc5b4 \uc9c8\ubb38"], "link": ["/node/157532"], "desc": []}, 12 | {"index": 11, "title": ["\ub808\ub4dc\ube14\ub799\ud2b8\ub9ac \uad6c\ud604\uc774 \uc798 \ub410\ub294\uc9c0 \ud14c\uc2a4\ud2b8\ud558\ub294 \ubc29\ubc95\uc774 \ubb50\uac00 \uc788\uc744\uae4c\uc694?"], "link": ["/node/157504"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 13 | {"index": 12, "title": ["\ub9ac\ub205\uc2a4 \ud658\uacbd\uc5d0\uc11c\uc758 \ub9c1\ud06c\ub4dc\ub9ac\uc2a4\ud2b8 \ud574\uc81c\ud560 \ub54c\uc5d0 \uad00\ud574\uc11c"], "link": ["/node/157529"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 14 | {"index": 13, "title": ["\ud14d\uc2a4\ud2b8 \uc5d0\ub514\ud130\ub97c \ub9cc\ub4e4\ub824\uace0 \ud569\ub2c8\ub2e4."], "link": ["/node/70608"], "desc": []}, 15 | {"index": 14, "title": ["\ud30c\uc77c \uc785\ucd9c\ub825\uc744 \uc785\ucd9c\ub825\ud568\uc218 \uc4f0\uc9c0 \uc54a\uace0 \uad6c\ud604\uc774 \uac00\ub2a5\ud55c\uac00\uc694?"], "link": ["/node/157530"], "desc": []}, 16 | {"index": 15, "title": ["\ucef4\ud4e8\ud130 \ub124\ud2b8\uc6cc\ud0b9 \uc218\uc5c5 \uc65c \ubc30\uc6cc\uc57c \ud558\ub098? \ub370\uc774\ud130 \uc1a1\uc218\uc2e0 \ubc29\ubc95?"], "link": ["/node/157528"], "desc": []}, 17 | {"index": 16, "title": ["sigusr1 \uc0ac\uc6a9\ud558\ub294\ub370 \uc9c8\ubb38\ub4dc\ub9bd\ub2c8\ub2e4."], "link": ["/node/157516"], "desc": []}, 18 | {"index": 17, "title": ["\ud504\ub85c\uadf8\ub7a8 \uc2e4\ud589 \uc2dc \uba54\ubaa8\ub9ac \uc801\uc7ac \uacfc\uc815\uad00\ub828\ud558\uc5ec \uc9c8\ubb38 \ub4dc\ub9bd\ub2c8\ub2e4."], "link": ["/node/157524"], "desc": []}, 19 | {"index": 18, "title": ["\ub9e4\ucd08\ub9c8\ub2e4 \uc0dd\uc131\ub418\ub294 \ub370\uc774\ud130 \uc800\uc7a5 \ubc29\ubc95?"], "link": ["/node/157512"], "desc": []}, 20 | {"index": 19, "title": ["malloc(), free() \ub3d9\uc801\ud560\ub2f9 \uc9c8\ubb38\uc785\ub2c8\ub2e4."], "link": ["/node/157522"], "desc": []}, 21 | {"index": 20, "title": ["[\uc644\ub8cc]c\uc758 \ubbf8\ub9ac \uc815\uc758\ub41c \ub9e4\ud06c\ub85c\uc640 \ucef4\ud30c\uc77c\ub7ec,\ub77c\uc774\ube0c\ub7ec\ub9ac \uad00\ub828\ud574\uc11c"], "link": ["/node/157485"], "desc": []}, 22 | {"index": 21, "title": ["[\uc54c\uace0\ub9ac\uc998] \ubc31\ud2b8\ub798\ud0b9 \uc798\ubaa8\ub974\uaca0\uc2b5\ub2c8\ub2e4."], "link": ["/node/157499"], "desc": []}, 23 | {"index": 22, "title": ["sysgcc \ud06c\ub85c\uc2a4 \ucef4\ud30c\uc77c \ud560 \ub54c, \ub77c\uc774\ube0c\ub7ec\ub9ac \ub9c1\ud06c \uc5d0\ub7ec\uc5d0 \ub300\ud574\uc11c \uc9c8\ubb38\ub4dc\ub9bd\ub2c8\ub2e4."], "link": ["/node/157455"], "desc": []}, 24 | {"index": 23, "title": ["C++\uc758 string \ub0b4\ubd80 \ubc30\uc5f4 \uc9c1\uc811\uc811\uadfc\uc5d0 \ub300\ud55c \uc758\uacac"], "link": ["/node/157514"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 25 | {"index": 24, "title": ["[Linux] sed \uba85\ub839\uc5b4\uc5d0\ub300\ud558\uc5ec"], "link": ["/node/157491"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 26 | {"index": 25, "title": ["ibatis \uc9c8\ubb38"], "link": ["/node/157510"], "desc": []} 27 | ][ 28 | {"index": 1, "title": "c\ub85c \uad6c\ud604\ud55c \uc54c\uace0\ub9ac\uc998 \ubcf4\ub2e4\uac00 \uc774\ud574\uac00 \ub418\uc9c0 \uc54a\ub294 \uc810\uc774 \uc788\uc2b5\ub2c8\ub2e4.", "link": ["/node/157525"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 29 | {"index": 2, "title": "\uc548\ub155\ud558\uc138\uc694. \uc11c\ubc84 \uc18c\ucf13 \ud504\ub85c\uadf8\ub798\ubc0d\uc5d0 \ub300\ud574 \uad81\uae08\ud55c \uc810\uc774 \uc788\uc2b5\ub2c8\ub2e4.", "link": ["/node/157506"], "desc": []}, 30 | {"index": 3, "title": "aes \uc554\ud638\ud654\ud558\uba74\uc11c \uc18d\ub3c4\ub97c \uc99d\uac00\uc2dc\ud0a4\ub294 \ubc29\ubc95 \uc5c6\uc744\uae4c\uc694?", "link": ["/node/157448"], "desc": []}, 31 | {"index": 4, "title": "aes \uc54c\uace0\ub9ac\uc998 : 128 \uc774\uc911 \uc554\ud638\ud654 vs 256 \ud55c\ubc88 \uc554\ud638\ud654 -> \ubb50\uac00 \ub354 \uac15\ub825\ud560\uae4c\uc694?", "link": ["/node/157451"], "desc": []}, 32 | {"index": 5, "title": "\ub9ac\ub205\uc2a4 \ub514\ubc14\uc774\uc2a4\ub4dc\ub77c\uc774\ubc84\uc640 \ubaa8\ub4c8 \uc9c8\ubb38\ub4dc\ub9bd\ub2c8\ub2e4. ", "link": ["/node/157540"], "desc": []}, 33 | {"index": 6, "title": "\uc258 \uc2a4\ud06c\ub9bd\ud2b8(sh) \uc774\ud574\ud558\uae30..\uc9c8\ubb38\uc785\ub2c8\ub2e4.", "link": ["/node/157534"], "desc": []}, 34 | {"index": 7, "title": "\uc11c\ubc84 \ud074\ub77c\uc774\uc5b8\ud2b8 \uad6c\uc870\uc5d0\uc11c \ub2e4\uc911\uc811\uc18d\uc2dc \ubd80\ubaa8 \uc790\uc2dd \ud504\ub85c\uc138\uc2a4\uc758 \uc6b0\uc120\uc21c\uc704\uc5d0 \ub300\ud574", "link": ["/node/157533"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 35 | {"index": 8, "title": "c\uc5b8\uc5b4 \uc2a4\ud0dd \ubb38\uc790\uc5f4 \uac70\uafb8\ub85c \ucd9c\ub825 \uc9c8\ubb38\uc785\ub2c8\ub2e4", "link": ["/node/157538"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 36 | {"index": 9, "title": "c++ \ucef4\ud30c\uc77c\ub7ec \uba54\ubaa8\ub9ac \ubc30\uce58\uc5d0 \ub300\ud558\uc5ec..", "link": ["/node/157537"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 37 | {"index": 10, "title": "\ud504\ub85c\uadf8\ub798\ubc0d \uc5b8\uc5b4 \uc9c8\ubb38", "link": ["/node/157532"], "desc": []}, 38 | {"index": 11, "title": "\ub808\ub4dc\ube14\ub799\ud2b8\ub9ac \uad6c\ud604\uc774 \uc798 \ub410\ub294\uc9c0 \ud14c\uc2a4\ud2b8\ud558\ub294 \ubc29\ubc95\uc774 \ubb50\uac00 \uc788\uc744\uae4c\uc694?", "link": ["/node/157504"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 39 | {"index": 12, "title": "\ub9ac\ub205\uc2a4 \ud658\uacbd\uc5d0\uc11c\uc758 \ub9c1\ud06c\ub4dc\ub9ac\uc2a4\ud2b8 \ud574\uc81c\ud560 \ub54c\uc5d0 \uad00\ud574\uc11c", "link": ["/node/157529"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 40 | {"index": 13, "title": "\ud14d\uc2a4\ud2b8 \uc5d0\ub514\ud130\ub97c \ub9cc\ub4e4\ub824\uace0 \ud569\ub2c8\ub2e4.", "link": ["/node/70608"], "desc": []}, 41 | {"index": 14, "title": "\ud30c\uc77c \uc785\ucd9c\ub825\uc744 \uc785\ucd9c\ub825\ud568\uc218 \uc4f0\uc9c0 \uc54a\uace0 \uad6c\ud604\uc774 \uac00\ub2a5\ud55c\uac00\uc694?", "link": ["/node/157530"], "desc": []}, 42 | {"index": 15, "title": "\ucef4\ud4e8\ud130 \ub124\ud2b8\uc6cc\ud0b9 \uc218\uc5c5 \uc65c \ubc30\uc6cc\uc57c \ud558\ub098? \ub370\uc774\ud130 \uc1a1\uc218\uc2e0 \ubc29\ubc95?", "link": ["/node/157528"], "desc": []}, 43 | {"index": 16, "title": "sigusr1 \uc0ac\uc6a9\ud558\ub294\ub370 \uc9c8\ubb38\ub4dc\ub9bd\ub2c8\ub2e4.", "link": ["/node/157516"], "desc": []}, 44 | {"index": 17, "title": "\ud504\ub85c\uadf8\ub7a8 \uc2e4\ud589 \uc2dc \uba54\ubaa8\ub9ac \uc801\uc7ac \uacfc\uc815\uad00\ub828\ud558\uc5ec \uc9c8\ubb38 \ub4dc\ub9bd\ub2c8\ub2e4.", "link": ["/node/157524"], "desc": []}, 45 | {"index": 18, "title": "\ub9e4\ucd08\ub9c8\ub2e4 \uc0dd\uc131\ub418\ub294 \ub370\uc774\ud130 \uc800\uc7a5 \ubc29\ubc95?", "link": ["/node/157512"], "desc": []}, 46 | {"index": 19, "title": "malloc(), free() \ub3d9\uc801\ud560\ub2f9 \uc9c8\ubb38\uc785\ub2c8\ub2e4.", "link": ["/node/157522"], "desc": []}, 47 | {"index": 20, "title": "[\uc644\ub8cc]c\uc758 \ubbf8\ub9ac \uc815\uc758\ub41c \ub9e4\ud06c\ub85c\uc640 \ucef4\ud30c\uc77c\ub7ec,\ub77c\uc774\ube0c\ub7ec\ub9ac \uad00\ub828\ud574\uc11c", "link": ["/node/157485"], "desc": []}, 48 | {"index": 21, "title": "[\uc54c\uace0\ub9ac\uc998] \ubc31\ud2b8\ub798\ud0b9 \uc798\ubaa8\ub974\uaca0\uc2b5\ub2c8\ub2e4.", "link": ["/node/157499"], "desc": []}, 49 | {"index": 22, "title": "sysgcc \ud06c\ub85c\uc2a4 \ucef4\ud30c\uc77c \ud560 \ub54c, \ub77c\uc774\ube0c\ub7ec\ub9ac \ub9c1\ud06c \uc5d0\ub7ec\uc5d0 \ub300\ud574\uc11c \uc9c8\ubb38\ub4dc\ub9bd\ub2c8\ub2e4.", "link": ["/node/157455"], "desc": []}, 50 | {"index": 23, "title": "C++\uc758 string \ub0b4\ubd80 \ubc30\uc5f4 \uc9c1\uc811\uc811\uadfc\uc5d0 \ub300\ud55c \uc758\uacac", "link": ["/node/157514"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 51 | {"index": 24, "title": "[Linux] sed \uba85\ub839\uc5b4\uc5d0\ub300\ud558\uc5ec", "link": ["/node/157491"], "desc": ["\uc775\uba85 \uc0ac\uc6a9\uc790"]}, 52 | {"index": 25, "title": "ibatis \uc9c8\ubb38", "link": ["/node/157510"], "desc": []} 53 | ] -------------------------------------------------------------------------------- /tutorial/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = tutorial.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = tutorial 12 | -------------------------------------------------------------------------------- /tutorial/tutorial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/__init__.py -------------------------------------------------------------------------------- /tutorial/tutorial/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/__init__.pyc -------------------------------------------------------------------------------- /tutorial/tutorial/add: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/add -------------------------------------------------------------------------------- /tutorial/tutorial/input: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/input -------------------------------------------------------------------------------- /tutorial/tutorial/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | ''' 9 | 파이썬 주석 처리 10 | ''' 11 | 12 | ''' 13 | class TutorialItem(scrapy.Item): 14 | # define the fields for your item here like: 15 | # name = scrapy.Field() 16 | pass 17 | ''' 18 | import scrapy 19 | 20 | class KldpItem(scrapy.Item): 21 | index = scrapy.Field() 22 | title = scrapy.Field() 23 | link = scrapy.Field() 24 | desc = scrapy.Field() 25 | -------------------------------------------------------------------------------- /tutorial/tutorial/items.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/items.pyc -------------------------------------------------------------------------------- /tutorial/tutorial/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class TutorialSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /tutorial/tutorial/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class TutorialPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /tutorial/tutorial/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for tutorial project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'tutorial' 13 | 14 | SPIDER_MODULES = ['tutorial.spiders'] 15 | NEWSPIDER_MODULE = 'tutorial.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'tutorial (+http://www.yourdomain.com)' 20 | 21 | # Obey robots.txt rules 22 | ROBOTSTXT_OBEY = True 23 | 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 25 | #CONCURRENT_REQUESTS = 32 26 | 27 | # Configure a delay for requests for the same website (default: 0) 28 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 29 | # See also autothrottle settings and docs 30 | #DOWNLOAD_DELAY = 3 31 | # The download delay setting will honor only one of: 32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 33 | #CONCURRENT_REQUESTS_PER_IP = 16 34 | 35 | # Disable cookies (enabled by default) 36 | #COOKIES_ENABLED = False 37 | 38 | # Disable Telnet Console (enabled by default) 39 | #TELNETCONSOLE_ENABLED = False 40 | 41 | # Override the default request headers: 42 | #DEFAULT_REQUEST_HEADERS = { 43 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 44 | # 'Accept-Language': 'en', 45 | #} 46 | 47 | # Enable or disable spider middlewares 48 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 49 | #SPIDER_MIDDLEWARES = { 50 | # 'tutorial.middlewares.TutorialSpiderMiddleware': 543, 51 | #} 52 | 53 | # Enable or disable downloader middlewares 54 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 55 | #DOWNLOADER_MIDDLEWARES = { 56 | # 'tutorial.middlewares.MyCustomDownloaderMiddleware': 543, 57 | #} 58 | 59 | # Enable or disable extensions 60 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 67 | #ITEM_PIPELINES = { 68 | # 'tutorial.pipelines.TutorialPipeline': 300, 69 | #} 70 | 71 | # Enable and configure the AutoThrottle extension (disabled by default) 72 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 73 | #AUTOTHROTTLE_ENABLED = True 74 | # The initial download delay 75 | #AUTOTHROTTLE_START_DELAY = 5 76 | # The maximum download delay to be set in case of high latencies 77 | #AUTOTHROTTLE_MAX_DELAY = 60 78 | # The average number of requests Scrapy should be sending in parallel to 79 | # each remote server 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 81 | # Enable showing throttling stats for every response received: 82 | #AUTOTHROTTLE_DEBUG = False 83 | 84 | # Enable and configure HTTP caching (disabled by default) 85 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 86 | #HTTPCACHE_ENABLED = True 87 | #HTTPCACHE_EXPIRATION_SECS = 0 88 | #HTTPCACHE_DIR = 'httpcache' 89 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 91 | -------------------------------------------------------------------------------- /tutorial/tutorial/settings.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/settings.pyc -------------------------------------------------------------------------------- /tutorial/tutorial/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /tutorial/tutorial/spiders/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/spiders/__init__.pyc -------------------------------------------------------------------------------- /tutorial/tutorial/spiders/kldp_spider.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | ___author___ = '12bme' 3 | 4 | import scrapy 5 | from tutorial.items import KldpItem 6 | 7 | class KldpSpider(scrapy.Spider):#scrapy spider를 상속 받는다. 8 | name = "kldp"#unique한 이름 지정 9 | allowed_domains = ["kldp.org"] 10 | # 스크랩할 url 지정 11 | start_urls = [ 12 | "https://kldp.org/forum/4", 13 | "https://kldp.org/forum/5" 14 | ] 15 | 16 | # spider 실제 동작 코드 17 | def parse(self, response): 18 | ''' 19 | # url 패스의 마지막 글자를 파일이름으로 지정한다. 20 | filename = response.url.split("/")[-1] 21 | # 만들어진 파일에 페이지 소스코드를 기록한다. 22 | with open(filename, "wb") as f: 23 | f.write(response.body) 24 | ''' 25 | curPath = response.url.split("/")[-1] 26 | index = 0 27 | for sel in response.xpath('//*[@id="forum-topic-'+curPath+'"]/tbody/tr'): 28 | item = KldpItem() #item에 넣고 싶을때 29 | index += 1 30 | item['index'] = index 31 | item['title'] = sel.xpath('td[2]/div[1]/a/text()').extract() 32 | item['link'] = sel.xpath('td[2]/div[1]/a/@href').extract() #attribute 값을 가지고 온다 33 | item['desc'] = sel.xpath('td[2]/div[2]/span/span/text()').extract() 34 | #print '*'*30 35 | #print title, link, desc 36 | yield item 37 | ''' 38 | yield는 일종의 generator 개념이라고 생각하면 된다. item이 생성될때마다 리스트형태로 쌓임. 39 | 쌓인 데이터를 어떻게 처리할 것인가에 대한 문제는 pipe라인이나 다른부분에서 처리하면 됨. 40 | 아무튼 yield는 스택형태로 쌓인다. 41 | ''' 42 | -------------------------------------------------------------------------------- /tutorial/tutorial/spiders/kldp_spider.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/12bme/Python-WebCrawling/bc0de861e46d8939eaf0e47504e1bad0d692d5ac/tutorial/tutorial/spiders/kldp_spider.pyc --------------------------------------------------------------------------------