├── .gitignore
├── README.md
├── example.py
├── scrapy.ini
├── weibo_login.py
└── weibo_scrapy.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | #################
  2 | ## Eclipse
  3 | #################
  4 | 
  5 | *.pydevproject
  6 | .project
  7 | .metadata
  8 | bin/
  9 | tmp/
 10 | *.tmp
 11 | *.bak
 12 | *.swp
 13 | *~.nib
 14 | local.properties
 15 | .classpath
 16 | .settings/
 17 | .loadpath
 18 | *~
 19 | *txt$
 20 | *TXT$
 21 | following*
 22 | *csv
 23 | *txt
 24 | # External tool builders
 25 | .externalToolBuilders/
 26 | 
 27 | # Locally stored "Eclipse launch configurations"
 28 | *.launch
 29 | 
 30 | # CDT-specific
 31 | .cproject
 32 | 
 33 | # PDT-specific
 34 | .buildpath
 35 | 
 36 | 
 37 | #################
 38 | ## Visual Studio
 39 | #################
 40 | 
 41 | ## Ignore Visual Studio temporary files, build results, and
 42 | ## files generated by popular Visual Studio add-ons.
 43 | 
 44 | # User-specific files
 45 | *.suo
 46 | *.user
 47 | *.sln.docstates
 48 | 
 49 | # Build results
 50 | [Dd]ebug/
 51 | [Rr]elease/
 52 | *_i.c
 53 | *_p.c
 54 | *.ilk
 55 | *.meta
 56 | *.obj
 57 | *.pch
 58 | *.pdb
 59 | *.pgc
 60 | *.pgd
 61 | *.rsp
 62 | *.sbr
 63 | *.tlb
 64 | *.tli
 65 | *.tlh
 66 | *.tmp
 67 | *.vspscc
 68 | .builds
 69 | *.dotCover
 70 | 
 71 | 
 72 | # Visual C++ cache files
 73 | ipch/
 74 | *.aps
 75 | *.ncb
 76 | *.opensdf
 77 | *.sdf
 78 | 
 79 | # Visual Studio profiler
 80 | *.psess
 81 | *.vsp
 82 | 
 83 | # ReSharper is a .NET coding add-in
 84 | _ReSharper*
 85 | 
 86 | # Installshield output folder
 87 | [Ee]xpress
 88 | 
 89 | # DocProject is a documentation generator add-in
 90 | DocProject/buildhelp/
 91 | DocProject/Help/*.HxT
 92 | DocProject/Help/*.HxC
 93 | DocProject/Help/*.hhc
 94 | DocProject/Help/*.hhk
 95 | DocProject/Help/*.hhp
 96 | DocProject/Help/Html2
 97 | DocProject/Help/html
 98 | 
 99 | # Click-Once directory
100 | publish
101 | 
102 | # Others
103 | [Bb]in
104 | [Oo]bj
105 | sql
106 | TestResults
107 | *.Cache
108 | ClientBin
109 | stylecop.*
110 | ~$*
111 | *.dbmdl
112 | Generated_Code #added for RIA/Silverlight projects
113 | 
114 | # Backup & report files from converting an old project file to a newer
115 | # Visual Studio version. Backup files are not needed, because we have git ;-)
116 | _UpgradeReport_Files/
117 | Backup*/
118 | UpgradeLog*.XML
119 | 
120 | 
121 | 
122 | ############
123 | ## Windows
124 | ############
125 | 
126 | # Windows image file caches
127 | Thumbs.db
128 | 
129 | # Folder config file
130 | Desktop.ini
131 | 
132 | 
133 | #############
134 | ## Python
135 | #############
136 | 
137 | *.py[co]
138 | 
139 | # Packages
140 | *.egg
141 | *.egg-info
142 | dist
143 | build
144 | eggs
145 | parts
146 | bin
147 | var
148 | sdist
149 | develop-eggs
150 | .installed.cfg
151 | 
152 | # Installer logs
153 | pip-log.txt
154 | 
155 | # Unit test / coverage reports
156 | .coverage
157 | .tox
158 | 
159 | #Translations
160 | *.mo
161 | 
162 | #Mr Developer
163 | .mr.developer.cfg
164 | 
165 | # Mac crap
166 | .DS_Store
167 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | WEIBO_SCRAPY
 2 | ============
 3 | 
 4 | WEIBO\_SCRAPY是一个PYTHON实现的，使用多线程抓取WEIBO信息的框架。WEIBO\_SCRAPY框架给用户提供WEIBO的模拟登录和多线程抓取微博信息的接口，让用户只需关心抓取的业务逻辑，而不用处理棘手的WEIBO模拟登录和多线程编程。
 5 | 
 6 | WEIBO\_SCRAPY is a **Multi-Threading** SINA WEIBO data extraction Framework in Python. WEIBO\_SCRAPY provides WEIBO login simulator and interface for WEIBO data extraction with multi-threading, it saves users a lot of time by getting users out of writing WEIBO login simulator from scratch and multi-threading programming, users now can focus on their own **extraction** logic.
 7 | 
 8 | 
 9 | =======
10 | 
11 | ###WEIBO\_SCRAPY的功能
12 | 1\. 微博模拟登录
13 | 
14 | 2\. 多线程抓取框架
15 | 
16 | 3\. **抓取任务**接口
17 | 
18 | 4\. 抓取参数配置
19 | 
20 | ###WEIBO\_SCRAPY Provides
21 | 1\. WEIBO Login Simulator
22 | 
23 | 2\. Multi-Threading Extraction Framework
24 | 
25 | 3\. **Extraction Task** Interface
26 | 
27 | 4\. Easy Way of Parameters Configuration
28 | 
29 | ###How to Use WEIBO\_SCRAPY
30 | 	#!/usr/bin/env python
31 | 	#coding=utf8
32 | 
33 | 	from weibo_scrapy import scrapy
34 | 
35 | 	class my_scrapy(scrapy):
36 | 		
37 | 		def scrapy_do_task(self, uid=None):
38 | 		     '''
39 | 		    User needs to overwrite this method to perform uid-based scrapy task.
40 | 		    @param uid: weibo uid
41 | 		    @return: a list of uids gained from this task, optional
42 | 		    '''
43 | 		     super(my_scrapy, self).__init__(**kwds)
44 | 		     
45 | 		     #do what you want with uid here, note that this scrapy is uid based, so make sure there are uids in task queue, 
46 | 		     #or gain new uids from this function
47 | 		     print 'WOW...'
48 | 		     return 'replace this string with uid list which gained from this task'
49 | 		 
50 | 	if __name__ == '__main__':
51 | 		
52 | 		s = my_scrapy(uids_file = 'uids_all.txt', config = 'my.ini')
53 | 		s.scrapy()
54 | 
55 | ###相关阅读(Readings)
56 | [基于UID的WEIBO信息抓取框架WEIBO_SCRAPY](http://yoyzhou.github.io/blog/2013/04/08/weibo-scrapy-framework-with-multi-threading/)
57 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #coding=utf8
 3 | 
 4 | from weibo_scrapy import scrapy
 5 | 
 6 | 
 7 | class my_scrapy(scrapy):
 8 |     
 9 |     def scrapy_do_task(self, uid=None):
10 |          '''
11 |         User needs to overwrite this method to perform uid-based scrapy task.
12 |         @param uid: weibo uid
13 |         @return: a list of uids gained from this task, optional
14 |         '''
15 |          super(my_scrapy, self).__init__(**kwds)
16 |          
17 |          #do what you want with uid here, note that this scrapy is uid based, so make sure there are uids in task queue, 
18 |          #or gain new uids from this function
19 |          
20 |          return 'replace this string with uid list which gained from this task'
21 |      
22 | if __name__ == '__main__':
23 |     
24 |     s = my_scrapy(start_uid = '1197161814')
25 |     s.scrapy()
26 |     


--------------------------------------------------------------------------------
/scrapy.ini:
--------------------------------------------------------------------------------
 1 | [login_account_info]
 2 | #account info for login 
 3 | login_username = ur_weibo_account_id_here
 4 | login_uid = 1248521225
 5 | login_password = ur_weibo_account_password_here
 6 | cookies_file = weibo_cookies.dat
 7 | 
 8 | [scrapy_settings]
 9 | thread_number = 50
10 | wanted = 100000
11 | #only one property of below 2 is required, and start_uid takes advantage of uids_file
12 | #also note that arguments from constructor will overwrite this two properties 
13 | start_uid = 1248521225
14 | uids_file =
15 | 


--------------------------------------------------------------------------------
/weibo_login.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding=utf8
  3 | 
  4 | '''
  5 | Created on Mar 18, 2013
  6 | 
  7 | @author: yoyzhou
  8 | '''
  9 | 
 10 | try:
 11 |     import os
 12 |     import sys
 13 |     import urllib
 14 |     import urllib2
 15 |     import cookielib
 16 |     import base64
 17 |     import re
 18 |     import hashlib
 19 |     import json
 20 |     import rsa
 21 |     import binascii
 22 | 
 23 | except ImportError:
 24 |         print >> sys.stderr, """\
 25 | 
 26 | There was a problem importing one of the Python modules required.
 27 | The error leading to this problem was:
 28 | 
 29 | %s
 30 | 
 31 | Please install a package which provides this module, or
 32 | verify that the module is installed correctly.
 33 | 
 34 | It's possible that the above module doesn't match the current version of Python,
 35 | which is:
 36 | 
 37 | %s
 38 | 
 39 | """ % (sys.exc_info(), sys.version)
 40 |         sys.exit(1)
 41 | 
 42 | 
 43 | __prog__= "weibo_login"
 44 | __site__= "http://yoyzhou.github.com"
 45 | __weibo__= "@pigdata"
 46 | __version__="0.1 beta"
 47 | 
 48 | 
 49 | def get_prelogin_status(username):
 50 |     """
 51 |     Perform prelogin action, get prelogin status, including servertime, nonce, rsakv, etc.
 52 |     """
 53 |     #prelogin_url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&client=ssologin.js(v1.4.5)'
 54 |     prelogin_url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=' + get_user(username) + \
 55 |      '&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.5)';
 56 |     data = urllib2.urlopen(prelogin_url).read()
 57 |     p = re.compile('\((.*)\)')
 58 |     
 59 |     try:
 60 |         json_data = p.search(data).group(1)
 61 |         data = json.loads(json_data)
 62 |         servertime = str(data['servertime'])
 63 |         nonce = data['nonce']
 64 |         rsakv = data['rsakv']
 65 |         return servertime, nonce, rsakv
 66 |     except:
 67 |         print 'Getting prelogin status met error!'
 68 |         return None
 69 | 
 70 | 
 71 | def login(username, pwd, cookie_file):
 72 |     """"
 73 |         Login with use name, password and cookies.
 74 |         (1) If cookie file exists then try to load cookies;
 75 |         (2) If no cookies found then do login
 76 |     """
 77 |     #If cookie file exists then try to load cookies
 78 |     if os.path.exists(cookie_file):
 79 |         try:
 80 |             cookie_jar  = cookielib.LWPCookieJar(cookie_file)
 81 |             cookie_jar.load(ignore_discard=True, ignore_expires=True)
 82 |             loaded = 1
 83 |         except cookielib.LoadError:
 84 |             loaded = 0
 85 |             print 'Loading cookies error'
 86 |         
 87 |         #install loaded cookies for urllib2
 88 |         if loaded:
 89 |             cookie_support = urllib2.HTTPCookieProcessor(cookie_jar)
 90 |             opener         = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
 91 |             urllib2.install_opener(opener)
 92 |             print 'Loading cookies success'
 93 |             return 1
 94 |         else:
 95 |             return do_login(username, pwd, cookie_file)
 96 |     
 97 |     else:   #If no cookies found
 98 |         return do_login(username, pwd, cookie_file)
 99 | 
100 | 
101 | def do_login(username,pwd,cookie_file):
102 |     """"
103 |     Perform login action with use name, password and saving cookies.
104 |     @param username: login user name
105 |     @param pwd: login password
106 |     @param cookie_file: file name where to save cookies when login succeeded 
107 |     """
108 |     #POST data per LOGIN WEIBO, these fields can be captured using httpfox extension in FIrefox
109 |     login_data = {
110 |         'entry': 'weibo',
111 |         'gateway': '1',
112 |         'from': '',
113 |         'savestate': '7',
114 |         'userticket': '1',
115 |         'pagerefer':'',
116 |         'vsnf': '1',
117 |         'su': '',
118 |         'service': 'miniblog',
119 |         'servertime': '',
120 |         'nonce': '',
121 |         'pwencode': 'rsa2',
122 |         'rsakv': '',
123 |         'sp': '',
124 |         'encoding': 'UTF-8',
125 |         'prelt': '45',
126 |         'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
127 |         'returntype': 'META'
128 |         }
129 | 
130 |     cookie_jar2     = cookielib.LWPCookieJar()
131 |     cookie_support2 = urllib2.HTTPCookieProcessor(cookie_jar2)
132 |     opener2         = urllib2.build_opener(cookie_support2, urllib2.HTTPHandler)
133 |     urllib2.install_opener(opener2)
134 |     login_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)'
135 |     try:
136 |         servertime, nonce, rsakv = get_prelogin_status(username)
137 |     except:
138 |         return
139 |     
140 |     #Fill POST data
141 |     login_data['servertime'] = servertime
142 |     login_data['nonce'] = nonce
143 |     login_data['su'] = get_user(username)
144 |     login_data['sp'] = get_pwd_rsa(pwd, servertime, nonce)
145 |     login_data['rsakv'] = rsakv
146 |     login_data = urllib.urlencode(login_data)
147 |     http_headers = {'User-Agent':'Mozilla/5.0 (X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0'}
148 |     req_login  = urllib2.Request(
149 |         url = login_url,
150 |         data = login_data,
151 |         headers = http_headers
152 |     )
153 |     result = urllib2.urlopen(req_login)
154 |     text = result.read()
155 |     p = re.compile('location\.replace\(\"(.*?)\"\)') #double quote regex
156 |     ps = re.compile('location\.replace\(\'(.*?)\'\)') #single quote regex
157 |     
158 |     try:
159 |         #Search login redirection URL
160 |         try:
161 |             #first try with double quote regex
162 |             login_url = p.search(text).group(1)
163 |         except:
164 |             #try with double quote regex
165 |             login_url = ps.search(text).group(1)
166 |             
167 |         data = urllib2.urlopen(login_url).read()
168 |         
169 |         #Verify login feedback, check whether result is TRUE
170 |         patt_feedback = 'feedBackUrlCallBack\((.*)\)'
171 |         p = re.compile(patt_feedback, re.MULTILINE)
172 |         
173 |         feedback = p.search(data).group(1)
174 |         
175 |         feedback_json = json.loads(feedback)
176 |         if feedback_json['result']:
177 |             cookie_jar2.save(cookie_file,ignore_discard=True, ignore_expires=True)
178 |             return 1
179 |         else:
180 |             return 0
181 |     except:
182 |         return 0
183 | 
184 | 
185 | def get_pwd_wsse(pwd, servertime, nonce):
186 |     """
187 |         Get wsse encrypted password
188 |     """
189 |     pwd1 = hashlib.sha1(pwd).hexdigest()
190 |     pwd2 = hashlib.sha1(pwd1).hexdigest()
191 |     pwd3_ = pwd2 + servertime + nonce
192 |     pwd3 = hashlib.sha1(pwd3_).hexdigest()
193 |     return pwd3
194 | 
195 | def get_pwd_rsa(pwd, servertime, nonce):
196 |     """
197 |         Get rsa2 encrypted password, using RSA module from https://pypi.python.org/pypi/rsa/3.1.1, documents can be accessed at 
198 |         http://stuvel.eu/files/python-rsa-doc/index.html
199 |     """
200 |     #n, n parameter of RSA public key, which is published by WEIBO.COM
201 |     #hardcoded here but you can also find it from values return from prelogin status above
202 |     weibo_rsa_n = 'EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D245A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD3993CACC02DB784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443'
203 |     
204 |     #e, exponent parameter of RSA public key, WEIBO uses 0x10001, which is 65537 in Decimal
205 |     weibo_rsa_e = 65537
206 |    
207 |     message = str(servertime) + '\t' + str(nonce) + '\n' + str(pwd)
208 |     
209 |     #construct WEIBO RSA Publickey using n and e above, note that n is a hex string
210 |     key = rsa.PublicKey(int(weibo_rsa_n, 16), weibo_rsa_e)
211 |     
212 |     #get encrypted password
213 |     encropy_pwd = rsa.encrypt(message, key)
214 | 
215 |     #trun back encrypted password binaries to hex string
216 |     return binascii.b2a_hex(encropy_pwd)
217 | 
218 | 
219 | def get_user(username):
220 |     username_ = urllib.quote(username)
221 |     username = base64.encodestring(username_)[:-1]
222 |     return username
223 | 
224 | 
225 | if __name__ == '__main__':
226 |     
227 |     username = 'ur_user_name_here'
228 |     pwd = 'ur_password_here'
229 |     cookie_file = 'weibo_login_cookies.dat'
230 |     
231 |     if login(username, pwd, cookie_file):
232 |         print 'Login WEIBO succeeded'
233 |     else:
234 |         print 'Login WEIBO failed'
235 | 


--------------------------------------------------------------------------------
/weibo_scrapy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding=utf8
  3 | 
  4 | try:
  5 |     import sys
  6 |     import time
  7 |     import threading
  8 |     import Queue
  9 |     import ConfigParser
 10 |     from weibo_login import login
 11 | 
 12 | except ImportError:
 13 |         print >> sys.stderr, """\
 14 | 
 15 | There was a problem importing one of the Python modules required to run yum.
 16 | The error leading to this problem was:
 17 | 
 18 | %s
 19 | 
 20 | Please install a package which provides this module, or
 21 | verify that the module is installed correctly.
 22 | 
 23 | It's possible that the above module doesn't match the current version of Python,
 24 | which is:
 25 | 
 26 | %s
 27 | 
 28 | """ % (sys.exc_info(), sys.version)
 29 |         sys.exit(1)
 30 | 
 31 | 
 32 | __prog__= "weibo_scrapy"
 33 | __site__= "http://yoyzhou.github.com"
 34 | __weibo__= "@pigdata"
 35 | __version__="0.1 beta"
 36 | 
 37 | 
 38 | #####global variables#####
 39 | 
 40 | visited_uids = set()
 41 | task_queue = Queue.Queue()
 42 | lock = threading.Lock()
 43 | 
 44 | scraped = 0
 45 | config_file = 'scrapy.ini'
 46 | 
 47 | class scrapy(object):
 48 |     
 49 |     global visited_uids
 50 |     global task_queue
 51 |     global lock
 52 |     
 53 |     global scraped
 54 |     global config_file
 55 |     
 56 |     #//TODO add config file feature
 57 |     def __init__(self, config=None, thread_number=None, start_uid=None, uids_file=None):
 58 |         
 59 |         _config = {}
 60 |         if config:
 61 |             _config = self.__load_configuration__(config)
 62 |         else:
 63 |             _config = self.__load_configuration__(config_file)
 64 |         
 65 |         self.login_username = _config['login_username']
 66 |         self.login_uid = _config['login_uid']
 67 |         self.login_password = _config['login_password']
 68 |         self.cookies_file = _config['cookies_file']
 69 |         
 70 |         #get scrapy settings
 71 |         self.thread_number = _config['thread_number']
 72 |         
 73 |         self.start_uid = _config['start_uid']
 74 |         self.uids_file = _config['uids_file']
 75 |         self.wanted = _config[ 'wanted']
 76 |         
 77 |         #accepts arguments also, and arguments have higher priority
 78 |         if thread_number:
 79 |             self.thread_number = thread_number
 80 |         if start_uid and uids_file:
 81 |             raise Exception('You can only specify `start_uid` or `uids_file` in constructor')  
 82 |           
 83 |         if start_uid:
 84 |             self.start_uid = start_uid
 85 |             self.uids_file = None
 86 |         if uids_file:
 87 |             self.uids_file = uids_file
 88 |             self.start_uid = None
 89 |         
 90 |     def scrapy(self):
 91 |         
 92 |         login_status = login(self.login_username, self.login_password, self.cookies_file)
 93 |     
 94 |         if login_status:
 95 |             
 96 |             if self.start_uid:
 97 |                 task_queue.put(self.start_uid)
 98 |             
 99 |             elif self.uids_file:
100 |                 uids_list = self.__load_uids__()
101 |                 for uid in uids_list:
102 |                     task_queue.put(uid)
103 |                 
104 |             else: #start uid or uids file is needed
105 |                 raise Exception('ERROR: Start uid or uids file is needed.') 
106 |            
107 |             #spawn a pool of threads, and pass them queue instance 
108 |             for _ in range(self.thread_number):
109 |                 st = scrapy_threading(self.scrapy_do_task, self.wanted)
110 |                 st.setDaemon(True)
111 |                 st.start()
112 |                 
113 |             
114 |             task_queue.join()
115 |                 
116 | 
117 |     def scrapy_do_task(self, uid=None):
118 |         '''
119 |         User needs to overwrite this method to perform uid-based scrapy task.
120 |         @param uid: weibo uid
121 |         @return: a list of uids gained from this task, optional
122 |         '''
123 |         #return []
124 |         pass
125 |     
126 |     def __load_configuration__(self, config_file):
127 |         config = ConfigParser.RawConfigParser(allow_no_value=True)
128 |         config.read(config_file)
129 |         settings = {}
130 |         #get login account user info
131 |         settings['login_username'] = config.get('login_account_info', 'login_username')
132 |         settings['login_uid'] = config.get('login_account_info', 'login_uid')
133 |         settings['login_password'] = config.get('login_account_info', 'login_password')
134 |         settings['cookies_file'] = config.get('login_account_info', 'cookies_file')
135 |         
136 |         #get scrapy settings
137 |         settings['thread_number'] = config.getint('scrapy_settings', 'thread_number')
138 |         settings['start_uid'] = config.get('scrapy_settings', 'start_uid')
139 |         settings['uids_file'] = config.get('scrapy_settings', 'uids_file')
140 |         settings['wanted'] = config.getint('scrapy_settings', 'wanted')
141 |         
142 |         return settings
143 |     
144 |     def __load_uids__(self):
145 |         '''
146 |         Loads uids from file. File should be formatted as one uid on each line.
147 |         '''
148 |         uids_list = []
149 |         with open(self.uids_file, 'r') as uids:
150 |             for uid in uids:
151 |                 if uid:
152 |                     uids_list.append(uid.strip())
153 |        
154 |         return uids_list
155 |  
156 | class scrapy_threading(threading.Thread):
157 |     """Thread class to handle scrapy task"""
158 |     
159 |     def __init__(self, task, wanted):
160 |         threading.Thread.__init__(self)
161 |         self.do_task = task
162 |         self.wanted = wanted
163 |         
164 |     def run(self):
165 |         global visited_uids
166 |         global task_queue
167 |         global scraped
168 |         global lock
169 |     
170 |         while scraped < self.wanted:
171 |             
172 |             #crawl info based on each uid
173 |             if task_queue:
174 |               
175 |                 uid = task_queue.get()
176 |                 
177 |                 if uid in visited_uids: #already crawled
178 |                     task_queue.task_done()
179 |                 
180 |                 else:
181 |                     try:
182 |                         gains = self.do_task(uid)
183 |                         
184 |                         #per debug
185 |                         wow = '{0: <25}'.format('[' + time.asctime() + '] ') + ' uid_' + '{0: <12}'.format(uid)
186 |                         print wow
187 |                         for uid in gains:
188 |                             task_queue.put(uid)
189 |                         
190 |                         #signals that queue job is done
191 |                         task_queue.task_done()
192 |                         
193 |                         #counting scrapied number
194 |                         with lock:
195 |                             scraped += 1
196 |                             #per debug
197 |                             print 'scraped: ' + str(scraped)
198 |                             
199 |                     except Exception, e:
200 |                         print e
201 |                         pass
202 |                         
203 |             else:
204 |                 time.sleep(30)
205 |             


--------------------------------------------------------------------------------