├── .gitignore ├── LICENSE ├── Outdated ├── db_movie.py ├── weixin - 3.py └── weixin.py ├── README.md ├── Zotero ├── CustomZoteroWordPlugin.md └── Zotero.dotm ├── berndeutsch.py ├── douban_exporter.js ├── email_parser.py ├── 全国高校学科评估结果.txt └── 全国高校学科评估结果处理程式.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # some outputs: 56 | *.xlsx 57 | *.csv -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /Outdated/db_movie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 这是一个用以获取用户豆瓣数据的爬虫,使得用户可以进行数据的本地备份。 4 | 支持: 5 | 1.豆瓣电影,豆瓣读书【暂不支持】 6 | 2.csv文件为逗号分割符文件。 7 | 8 | @author: DannyVim 9 | """ 10 | import urllib2 as ur 11 | from bs4 import BeautifulSoup as bs 12 | import sys 13 | import time 14 | 15 | reload(sys) 16 | sys.setdefaultencoding('utf8') 17 | 18 | # BASE URL 19 | 20 | 21 | def basepage(wa): 22 | m_wish = 'http://movie.douban.com/people/' + user + '/wish?start=' 23 | m_do = 'http://movie.douban.com/people/' + user + '/do?start=' 24 | m_collect = 'http://movie.douban.com/people/' + user + '/collect?start=' 25 | if wa == 'do': 26 | baseurl = m_do 27 | elif wa == 'wish': 28 | baseurl = m_wish 29 | elif wa == 'collect': 30 | baseurl = m_collect 31 | link_list(baseurl) 32 | 33 | # 知道目录下有多少页,并且打开每一页获取数据 34 | 35 | 36 | def link_list(pageurl): 37 | info = ur.urlopen(pageurl) 38 | soup = bs(info) 39 | try: 40 | t = soup.find('span', class_='thispage')['data-total-page'] 41 | except TypeError: 42 | content(pageurl) 43 | else: 44 | n = 0 45 | t = int(t) - 1 46 | for i in range(t): 47 | pagelist = pageurl + str(n) 48 | content(pagelist) 49 | n = n + 15 50 | # 显示程序运行进度,但是这个只在CMD中有效OTZ 51 | percent = 1.0 * i / t * 100 52 | print 'complete percent:' + str(percent) + '%', 53 | sys.stdout.write("\r") 54 | time.sleep(0.1) 55 | 56 | # 利用bs4库把静态的网页解析出来并挑选有用数据 57 | 58 | 59 | def content(html): 60 | info = ur.urlopen(html) 61 | soup = bs(info) 62 | for tag in soup.body(attrs={'class': 'item'}): 63 | datum = open('datum.csv', 'a+') 64 | title = tag.em.string.strip() 65 | url = tag.li.a.get('href') 66 | date = tag.find('span', class_='date').get_text() 67 | comment = tag.find('span', class_='comment') 68 | if comment == None: 69 | comment = '' 70 | else: 71 | comment = comment.get_text() 72 | comment = comment.encode('utf-8') 73 | title = title.encode('utf-8') 74 | url = url.encode('utf-8') 75 | date = date.encode('utf-8') 76 | print >> datum, url, ',', date, ',', title, ',', comment 77 | datum.close() 78 | 79 | 80 | # 运行 81 | print u'这是一个用以获取用户豆瓣数据的爬虫,使得用户可以进行数据的本地备份。' 82 | user = raw_input('Please input your DB user name:') 83 | wanted = raw_input('Please input what you want to sync:(do,wish,collect)') 84 | 85 | 86 | basepage(wanted) 87 | -------------------------------------------------------------------------------- /Outdated/weixin - 3.py: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env python 3 | # coding=utf-8 4 | 5 | import os 6 | import urllib 7 | import re 8 | import cookielib 9 | import time 10 | import xml.dom.minidom 11 | import json 12 | import sys 13 | import math 14 | 15 | DEBUG = False 16 | 17 | MAX_GROUP_NUM = 35 # 每组人数 18 | 19 | QRImagePath = os.getcwd() + '/qrcode.jpg' 20 | 21 | tip = 0 22 | uuid = '' 23 | 24 | base_uri = '' 25 | redirect_uri = '' 26 | 27 | skey = '' 28 | wxsid = '' 29 | wxuin = '' 30 | pass_ticket = '' 31 | deviceId = 'e000000000000000' 32 | 33 | BaseRequest = {} 34 | 35 | ContactList = [] 36 | My = [] 37 | 38 | def getUUID(): 39 | global uuid 40 | 41 | url = 'https://login.weixin.qq.com/jslogin' 42 | params = { 43 | 'appid': 'wx782c26e4c19acffb', 44 | 'fun': 'new', 45 | 'lang': 'zh_CN', 46 | '_': int(time.time()), 47 | } 48 | 49 | request = urllib.Request(url = url, data = urllib.urlencode(params)) 50 | response = urllib.urlopen(request) 51 | data = response.read() 52 | 53 | # print data 54 | 55 | # window.QRLogin.code = 200; window.QRLogin.uuid = "oZwt_bFfRg=="; 56 | regx = r'window.QRLogin.code = (\d+); window.QRLogin.uuid = "(\S+?)"' 57 | pm = re.search(regx, data) 58 | 59 | code = pm.group(1) 60 | uuid = pm.group(2) 61 | 62 | if code == '200': 63 | return True 64 | 65 | return False 66 | 67 | def showQRImage(): 68 | global tip 69 | 70 | url = 'https://login.weixin.qq.com/qrcode/' + uuid 71 | params = { 72 | 't': 'webwx', 73 | '_': int(time.time()), 74 | } 75 | 76 | request = urllib.Request(url = url, data = urllib.urlencode(params)) 77 | response = urllib.urlopen(request) 78 | 79 | tip = 1 80 | 81 | f = open(QRImagePath, 'wb') 82 | f.write(response.read()) 83 | f.close() 84 | 85 | if sys.platform.find('darwin') >= 0: 86 | os.system('open %s' % QRImagePath) 87 | elif sys.platform.find('linux') >= 0: 88 | os.system('xdg-open %s' % QRImagePath) 89 | else: 90 | os.system('call %s' % QRImagePath) 91 | 92 | print('请使用微信扫描二维码以登录') 93 | 94 | def waitForLogin(): 95 | global tip, base_uri, redirect_uri 96 | 97 | url = 'https://login.weixin.qq.com/cgi-bin/mmwebwx-bin/login?tip=%s&uuid=%s&_=%s' % (tip, uuid, int(time.time())) 98 | 99 | request = urllib.Request(url = url) 100 | response = urllib.urlopen(request) 101 | data = response.read() 102 | 103 | # print data 104 | 105 | # window.code=500; 106 | regx = r'window.code=(\d+);' 107 | pm = re.search(regx, data) 108 | 109 | code = pm.group(1) 110 | 111 | if code == '201': #已扫描 112 | print('成功扫描,请在手机上点击确认以登录') 113 | tip = 0 114 | elif code == '200': #已登录 115 | print('正在登录...') 116 | regx = r'window.redirect_uri="(\S+?)";' 117 | pm = re.search(regx, data) 118 | redirect_uri = pm.group(1) + '&fun=new' 119 | base_uri = redirect_uri[:redirect_uri.rfind('/')] 120 | elif code == '408': #超时 121 | pass 122 | # elif code == '400' or code == '500': 123 | 124 | return code 125 | 126 | def login(): 127 | global skey, wxsid, wxuin, pass_ticket, BaseRequest 128 | 129 | request = urllib.Request(url = redirect_uri) 130 | response = urllib.urlopen(request) 131 | data = response.read() 132 | 133 | # print data 134 | 135 | ''' 136 | 137 | 0 138 | OK 139 | xxx 140 | xxx 141 | xxx 142 | xxx 143 | 1 144 | 145 | ''' 146 | 147 | doc = xml.dom.minidom.parseString(data) 148 | root = doc.documentElement 149 | 150 | for node in root.childNodes: 151 | if node.nodeName == 'skey': 152 | skey = node.childNodes[0].data 153 | elif node.nodeName == 'wxsid': 154 | wxsid = node.childNodes[0].data 155 | elif node.nodeName == 'wxuin': 156 | wxuin = node.childNodes[0].data 157 | elif node.nodeName == 'pass_ticket': 158 | pass_ticket = node.childNodes[0].data 159 | 160 | # print 'skey: %s, wxsid: %s, wxuin: %s, pass_ticket: %s' % (skey, wxsid, wxuin, pass_ticket) 161 | 162 | if skey == '' or wxsid == '' or wxuin == '' or pass_ticket == '': 163 | return False 164 | 165 | BaseRequest = { 166 | 'Uin': int(wxuin), 167 | 'Sid': wxsid, 168 | 'Skey': skey, 169 | 'DeviceID': deviceId, 170 | } 171 | 172 | return True 173 | 174 | def webwxinit(): 175 | 176 | url = base_uri + '/webwxinit?pass_ticket=%s&skey=%s&r=%s' % (pass_ticket, skey, int(time.time())) 177 | params = { 178 | 'BaseRequest': BaseRequest 179 | } 180 | 181 | request = urllib.Request(url = url, data = json.dumps(params)) 182 | request.add_header('ContentType', 'application/json; charset=UTF-8') 183 | response = urllib.urlopen(request) 184 | data = response.read() 185 | 186 | if DEBUG == True: 187 | f = open(os.getcwd() + '/webwxinit.json', 'wb') 188 | f.write(data) 189 | f.close() 190 | 191 | # print data 192 | 193 | global ContactList, My 194 | dic = json.loads(data) 195 | ContactList = dic['ContactList'] 196 | My = dic['User'] 197 | 198 | ErrMsg = dic['BaseResponse']['ErrMsg'] 199 | if len(ErrMsg) > 0: 200 | print(ErrMsg) 201 | 202 | Ret = dic['BaseResponse']['Ret'] 203 | if Ret != 0: 204 | return False 205 | 206 | return True 207 | 208 | def webwxgetcontact(): 209 | 210 | url = base_uri + '/webwxgetcontact?pass_ticket=%s&skey=%s&r=%s' % (pass_ticket, skey, int(time.time())) 211 | 212 | request = urllib.Request(url = url) 213 | request.add_header('ContentType', 'application/json; charset=UTF-8') 214 | response = urllib.urlopen(request) 215 | data = response.read() 216 | 217 | if DEBUG == True: 218 | f = open(os.getcwd() + '/webwxgetcontact.json', 'wb') 219 | f.write(data) 220 | f.close() 221 | 222 | # print data 223 | 224 | dic = json.loads(data) 225 | MemberList = dic['MemberList'] 226 | 227 | # 倒序遍历,不然删除的时候出问题.. 228 | SpecialUsers = ['newsapp', 'fmessage', 'filehelper', 'weibo', 'qqmail', 'fmessage', 'tmessage', 'qmessage', 'qqsync', 'floatbottle', 'lbsapp', 'shakeapp', 'medianote', 'qqfriend', 'readerapp', 'blogapp', 'facebookapp', 'masssendapp', 'meishiapp', 'feedsapp', 'voip', 'blogappweixin', 'weixin', 'brandsessionholder', 'weixinreminder', 'wxid_novlwrv3lqwv11', 'gh_22b87fa7cb3c', 'officialaccounts', 'notification_messages', 'wxid_novlwrv3lqwv11', 'gh_22b87fa7cb3c', 'wxitil', 'userexperience_alarm', 'notification_messages'] 229 | for i in range(len(MemberList) - 1, -1, -1): 230 | Member = MemberList[i] 231 | if Member['VerifyFlag'] & 8 != 0: # 公众号/服务号 232 | MemberList.remove(Member) 233 | elif Member['UserName'] in SpecialUsers: # 特殊账号 234 | MemberList.remove(Member) 235 | elif Member['UserName'].find('@@') != -1: # 群聊 236 | MemberList.remove(Member) 237 | elif Member['UserName'] == My['UserName']: # 自己 238 | MemberList.remove(Member) 239 | 240 | return MemberList 241 | 242 | def createChatroom(UserNames): 243 | MemberList = [] 244 | for UserName in UserNames: 245 | MemberList.append({'UserName': UserName}) 246 | 247 | 248 | url = base_uri + '/webwxcreatechatroom?pass_ticket=%s&r=%s' % (pass_ticket, int(time.time())) 249 | params = { 250 | 'BaseRequest': BaseRequest, 251 | 'MemberCount': len(MemberList), 252 | 'MemberList': MemberList, 253 | 'Topic': '', 254 | } 255 | 256 | request = urllib.Request(url = url, data = json.dumps(params)) 257 | request.add_header('ContentType', 'application/json; charset=UTF-8') 258 | response = urllib.urlopen(request) 259 | data = response.read() 260 | 261 | # print data 262 | 263 | dic = json.loads(data) 264 | ChatRoomName = dic['ChatRoomName'] 265 | MemberList = dic['MemberList'] 266 | DeletedList = [] 267 | for Member in MemberList: 268 | if Member['MemberStatus'] == 4: #被对方删除了 269 | DeletedList.append(Member['UserName']) 270 | 271 | ErrMsg = dic['BaseResponse']['ErrMsg'] 272 | if len(ErrMsg) > 0: 273 | print(ErrMsg) 274 | 275 | return (ChatRoomName, DeletedList) 276 | 277 | def deleteMember(ChatRoomName, UserNames): 278 | url = base_uri + '/webwxupdatechatroom?fun=delmember&pass_ticket=%s' % (pass_ticket) 279 | params = { 280 | 'BaseRequest': BaseRequest, 281 | 'ChatRoomName': ChatRoomName, 282 | 'DelMemberList': ','.join(UserNames), 283 | } 284 | 285 | request = urllib.Request(url = url, data = json.dumps(params)) 286 | request.add_header('ContentType', 'application/json; charset=UTF-8') 287 | response = urllib.urlopen(request) 288 | data = response.read() 289 | 290 | # print data 291 | 292 | dic = json.loads(data) 293 | ErrMsg = dic['BaseResponse']['ErrMsg'] 294 | if len(ErrMsg) > 0: 295 | print(ErrMsg) 296 | 297 | Ret = dic['BaseResponse']['Ret'] 298 | if Ret != 0: 299 | return False 300 | 301 | return True 302 | 303 | def addMember(ChatRoomName, UserNames): 304 | url = base_uri + '/webwxupdatechatroom?fun=addmember&pass_ticket=%s' % (pass_ticket) 305 | params = { 306 | 'BaseRequest': BaseRequest, 307 | 'ChatRoomName': ChatRoomName, 308 | 'AddMemberList': ','.join(UserNames), 309 | } 310 | 311 | request = urllib.Request(url = url, data = json.dumps(params)) 312 | request.add_header('ContentType', 'application/json; charset=UTF-8') 313 | response = urllib.urlopen(request) 314 | data = response.read() 315 | 316 | # print data 317 | 318 | dic = json.loads(data) 319 | MemberList = dic['MemberList'] 320 | DeletedList = [] 321 | for Member in MemberList: 322 | if Member['MemberStatus'] == 4: #被对方删除了 323 | DeletedList.append(Member['UserName']) 324 | 325 | ErrMsg = dic['BaseResponse']['ErrMsg'] 326 | if len(ErrMsg) > 0: 327 | print(ErrMsg) 328 | 329 | return DeletedList 330 | 331 | def main(): 332 | 333 | opener = urllib.build_opener(urllib.HTTPCookieProcessor(cookielib.CookieJar())) 334 | urllib.install_opener(opener) 335 | 336 | if getUUID() == False: 337 | print('获取uuid失败') 338 | return 339 | 340 | showQRImage() 341 | time.sleep(1) 342 | 343 | while waitForLogin() != '200': 344 | pass 345 | 346 | os.remove(QRImagePath) 347 | 348 | if login() == False: 349 | print('登录失败') 350 | return 351 | 352 | if webwxinit() == False: 353 | print('初始化失败') 354 | return 355 | 356 | MemberList = webwxgetcontact() 357 | 358 | MemberCount = len(MemberList) 359 | print('通讯录共%s位好友') % MemberCount 360 | 361 | ChatRoomName = '' 362 | result = [] 363 | for i in range(0, int(math.ceil(MemberCount / float(MAX_GROUP_NUM)))): 364 | UserNames = [] 365 | NickNames = [] 366 | DeletedList = '' 367 | for j in range(0, MAX_GROUP_NUM): 368 | if i * MAX_GROUP_NUM + j >= MemberCount: 369 | break 370 | 371 | Member = MemberList[i * MAX_GROUP_NUM + j] 372 | UserNames.append(Member['UserName']) 373 | NickNames.append(Member['NickName'].encode('utf-8')) 374 | 375 | print('第%s组...') % (i + 1) 376 | print(', '.join(NickNames)) 377 | print('回车键继续...') 378 | input('say soemthing:') 379 | 380 | # 新建群组/添加成员 381 | if ChatRoomName == '': 382 | (ChatRoomName, DeletedList) = createChatroom(UserNames) 383 | else: 384 | DeletedList = addMember(ChatRoomName, UserNames) 385 | 386 | DeletedCount = len(DeletedList) 387 | if DeletedCount > 0: 388 | result += DeletedList 389 | 390 | print('找到%s个被删好友') % DeletedCount 391 | # raw_input() 392 | 393 | # 删除成员 394 | deleteMember(ChatRoomName, UserNames) 395 | 396 | # todo 删除群组 397 | 398 | 399 | resultNames = [] 400 | for Member in MemberList: 401 | if Member['UserName'] in result: 402 | NickName = Member['NickName'] 403 | if Member['RemarkName'] != '': 404 | NickName += '(%s)' % Member['RemarkName'] 405 | resultNames.append(NickName.encode('utf-8')) 406 | 407 | print('---------- 被删除的好友列表 ----------') 408 | print('\n'.join(resultNames)) 409 | print('-----------------------------------') 410 | 411 | # windows下编码问题修复 412 | # http://blog.csdn.net/heyuxuanzee/article/details/8442718 413 | class UnicodeStreamFilter: 414 | def __init__(self, target): 415 | self.target = target 416 | self.encoding = 'utf-8' 417 | self.errors = 'replace' 418 | self.encode_to = self.target.encoding 419 | def write(self, s): 420 | if type(s) == str: 421 | s = s.decode('utf-8') 422 | s = s.encode(self.encode_to, self.errors).decode(self.encode_to) 423 | self.target.write(s) 424 | 425 | if sys.stdout.encoding == 'cp936': 426 | sys.stdout = UnicodeStreamFilter(sys.stdout) 427 | 428 | if __name__ == '__main__' : 429 | 430 | print('本程序的查询结果可能会引起一些心理上的不适,请小心使用...') 431 | print('回车键继续...') 432 | input('say something:') 433 | 434 | main() 435 | 436 | print('回车键结束') 437 | input('say something:') -------------------------------------------------------------------------------- /Outdated/weixin.py: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env python 3 | # coding=utf-8 4 | 5 | import os 6 | import urllib, urllib2 7 | import re 8 | import cookielib 9 | import time 10 | import xml.dom.minidom 11 | import json 12 | import sys 13 | import math 14 | 15 | DEBUG = False 16 | 17 | MAX_GROUP_NUM = 35 # 每组人数 18 | 19 | QRImagePath = os.getcwd() + '/qrcode.jpg' 20 | 21 | tip = 0 22 | uuid = '' 23 | 24 | base_uri = '' 25 | redirect_uri = '' 26 | 27 | skey = '' 28 | wxsid = '' 29 | wxuin = '' 30 | pass_ticket = '' 31 | deviceId = 'e000000000000000' 32 | 33 | BaseRequest = {} 34 | 35 | ContactList = [] 36 | My = [] 37 | 38 | def getUUID(): 39 | global uuid 40 | 41 | url = 'https://login.weixin.qq.com/jslogin' 42 | params = { 43 | 'appid': 'wx782c26e4c19acffb', 44 | 'fun': 'new', 45 | 'lang': 'zh_CN', 46 | '_': int(time.time()), 47 | } 48 | 49 | request = urllib2.Request(url = url, data = urllib.urlencode(params)) 50 | response = urllib2.urlopen(request) 51 | data = response.read() 52 | 53 | # print data 54 | 55 | # window.QRLogin.code = 200; window.QRLogin.uuid = "oZwt_bFfRg=="; 56 | regx = r'window.QRLogin.code = (\d+); window.QRLogin.uuid = "(\S+?)"' 57 | pm = re.search(regx, data) 58 | 59 | code = pm.group(1) 60 | uuid = pm.group(2) 61 | 62 | if code == '200': 63 | return True 64 | 65 | return False 66 | 67 | def showQRImage(): 68 | global tip 69 | 70 | url = 'https://login.weixin.qq.com/qrcode/' + uuid 71 | params = { 72 | 't': 'webwx', 73 | '_': int(time.time()), 74 | } 75 | 76 | request = urllib2.Request(url = url, data = urllib.urlencode(params)) 77 | response = urllib2.urlopen(request) 78 | 79 | tip = 1 80 | 81 | f = open(QRImagePath, 'wb') 82 | f.write(response.read()) 83 | f.close() 84 | 85 | if sys.platform.find('darwin') >= 0: 86 | os.system('open %s' % QRImagePath) 87 | elif sys.platform.find('linux') >= 0: 88 | os.system('xdg-open %s' % QRImagePath) 89 | else: 90 | os.system('call %s' % QRImagePath) 91 | 92 | print('请使用微信扫描二维码以登录') 93 | 94 | def waitForLogin(): 95 | global tip, base_uri, redirect_uri 96 | 97 | url = 'https://login.weixin.qq.com/cgi-bin/mmwebwx-bin/login?tip=%s&uuid=%s&_=%s' % (tip, uuid, int(time.time())) 98 | 99 | request = urllib2.Request(url = url) 100 | response = urllib2.urlopen(request) 101 | data = response.read() 102 | 103 | # print data 104 | 105 | # window.code=500; 106 | regx = r'window.code=(\d+);' 107 | pm = re.search(regx, data) 108 | 109 | code = pm.group(1) 110 | 111 | if code == '201': #已扫描 112 | print('成功扫描,请在手机上点击确认以登录') 113 | tip = 0 114 | elif code == '200': #已登录 115 | print('正在登录...') 116 | regx = r'window.redirect_uri="(\S+?)";' 117 | pm = re.search(regx, data) 118 | redirect_uri = pm.group(1) + '&fun=new' 119 | base_uri = redirect_uri[:redirect_uri.rfind('/')] 120 | elif code == '408': #超时 121 | pass 122 | # elif code == '400' or code == '500': 123 | 124 | return code 125 | 126 | def login(): 127 | global skey, wxsid, wxuin, pass_ticket, BaseRequest 128 | 129 | request = urllib2.Request(url = redirect_uri) 130 | response = urllib2.urlopen(request) 131 | data = response.read() 132 | 133 | # print data 134 | 135 | ''' 136 | 137 | 0 138 | OK 139 | xxx 140 | xxx 141 | xxx 142 | xxx 143 | 1 144 | 145 | ''' 146 | 147 | doc = xml.dom.minidom.parseString(data) 148 | root = doc.documentElement 149 | 150 | for node in root.childNodes: 151 | if node.nodeName == 'skey': 152 | skey = node.childNodes[0].data 153 | elif node.nodeName == 'wxsid': 154 | wxsid = node.childNodes[0].data 155 | elif node.nodeName == 'wxuin': 156 | wxuin = node.childNodes[0].data 157 | elif node.nodeName == 'pass_ticket': 158 | pass_ticket = node.childNodes[0].data 159 | 160 | # print 'skey: %s, wxsid: %s, wxuin: %s, pass_ticket: %s' % (skey, wxsid, wxuin, pass_ticket) 161 | 162 | if skey == '' or wxsid == '' or wxuin == '' or pass_ticket == '': 163 | return False 164 | 165 | BaseRequest = { 166 | 'Uin': int(wxuin), 167 | 'Sid': wxsid, 168 | 'Skey': skey, 169 | 'DeviceID': deviceId, 170 | } 171 | 172 | return True 173 | 174 | def webwxinit(): 175 | 176 | url = base_uri + '/webwxinit?pass_ticket=%s&skey=%s&r=%s' % (pass_ticket, skey, int(time.time())) 177 | params = { 178 | 'BaseRequest': BaseRequest 179 | } 180 | 181 | request = urllib2.Request(url = url, data = json.dumps(params)) 182 | request.add_header('ContentType', 'application/json; charset=UTF-8') 183 | response = urllib2.urlopen(request) 184 | data = response.read() 185 | 186 | if DEBUG == True: 187 | f = open(os.getcwd() + '/webwxinit.json', 'wb') 188 | f.write(data) 189 | f.close() 190 | 191 | # print data 192 | 193 | global ContactList, My 194 | dic = json.loads(data) 195 | ContactList = dic['ContactList'] 196 | My = dic['User'] 197 | 198 | ErrMsg = dic['BaseResponse']['ErrMsg'] 199 | if len(ErrMsg) > 0: 200 | print(ErrMsg) 201 | 202 | Ret = dic['BaseResponse']['Ret'] 203 | if Ret != 0: 204 | return False 205 | 206 | return True 207 | 208 | def webwxgetcontact(): 209 | 210 | url = base_uri + '/webwxgetcontact?pass_ticket=%s&skey=%s&r=%s' % (pass_ticket, skey, int(time.time())) 211 | 212 | request = urllib2.Request(url = url) 213 | request.add_header('ContentType', 'application/json; charset=UTF-8') 214 | response = urllib2.urlopen(request) 215 | data = response.read() 216 | 217 | if DEBUG == True: 218 | f = open(os.getcwd() + '/webwxgetcontact.json', 'wb') 219 | f.write(data) 220 | f.close() 221 | 222 | # print data 223 | 224 | dic = json.loads(data) 225 | MemberList = dic['MemberList'] 226 | 227 | # 倒序遍历,不然删除的时候出问题.. 228 | SpecialUsers = ['newsapp', 'fmessage', 'filehelper', 'weibo', 'qqmail', 'fmessage', 'tmessage', 'qmessage', 'qqsync', 'floatbottle', 'lbsapp', 'shakeapp', 'medianote', 'qqfriend', 'readerapp', 'blogapp', 'facebookapp', 'masssendapp', 'meishiapp', 'feedsapp', 'voip', 'blogappweixin', 'weixin', 'brandsessionholder', 'weixinreminder', 'wxid_novlwrv3lqwv11', 'gh_22b87fa7cb3c', 'officialaccounts', 'notification_messages', 'wxid_novlwrv3lqwv11', 'gh_22b87fa7cb3c', 'wxitil', 'userexperience_alarm', 'notification_messages'] 229 | for i in range(len(MemberList) - 1, -1, -1): 230 | Member = MemberList[i] 231 | if Member['VerifyFlag'] & 8 != 0: # 公众号/服务号 232 | MemberList.remove(Member) 233 | elif Member['UserName'] in SpecialUsers: # 特殊账号 234 | MemberList.remove(Member) 235 | elif Member['UserName'].find('@@') != -1: # 群聊 236 | MemberList.remove(Member) 237 | elif Member['UserName'] == My['UserName']: # 自己 238 | MemberList.remove(Member) 239 | 240 | return MemberList 241 | 242 | def createChatroom(UserNames): 243 | MemberList = [] 244 | for UserName in UserNames: 245 | MemberList.append({'UserName': UserName}) 246 | 247 | 248 | url = base_uri + '/webwxcreatechatroom?pass_ticket=%s&r=%s' % (pass_ticket, int(time.time())) 249 | params = { 250 | 'BaseRequest': BaseRequest, 251 | 'MemberCount': len(MemberList), 252 | 'MemberList': MemberList, 253 | 'Topic': '', 254 | } 255 | 256 | request = urllib2.Request(url = url, data = json.dumps(params)) 257 | request.add_header('ContentType', 'application/json; charset=UTF-8') 258 | response = urllib2.urlopen(request) 259 | data = response.read() 260 | 261 | # print data 262 | 263 | dic = json.loads(data) 264 | ChatRoomName = dic['ChatRoomName'] 265 | MemberList = dic['MemberList'] 266 | DeletedList = [] 267 | for Member in MemberList: 268 | if Member['MemberStatus'] == 4: #被对方删除了 269 | DeletedList.append(Member['UserName']) 270 | 271 | ErrMsg = dic['BaseResponse']['ErrMsg'] 272 | if len(ErrMsg) > 0: 273 | print(ErrMsg) 274 | 275 | return (ChatRoomName, DeletedList) 276 | 277 | def deleteMember(ChatRoomName, UserNames): 278 | url = base_uri + '/webwxupdatechatroom?fun=delmember&pass_ticket=%s' % (pass_ticket) 279 | params = { 280 | 'BaseRequest': BaseRequest, 281 | 'ChatRoomName': ChatRoomName, 282 | 'DelMemberList': ','.join(UserNames), 283 | } 284 | 285 | request = urllib2.Request(url = url, data = json.dumps(params)) 286 | request.add_header('ContentType', 'application/json; charset=UTF-8') 287 | response = urllib2.urlopen(request) 288 | data = response.read() 289 | 290 | # print data 291 | 292 | dic = json.loads(data) 293 | ErrMsg = dic['BaseResponse']['ErrMsg'] 294 | if len(ErrMsg) > 0: 295 | print(ErrMsg) 296 | 297 | Ret = dic['BaseResponse']['Ret'] 298 | if Ret != 0: 299 | return False 300 | 301 | return True 302 | 303 | def addMember(ChatRoomName, UserNames): 304 | url = base_uri + '/webwxupdatechatroom?fun=addmember&pass_ticket=%s' % (pass_ticket) 305 | params = { 306 | 'BaseRequest': BaseRequest, 307 | 'ChatRoomName': ChatRoomName, 308 | 'AddMemberList': ','.join(UserNames), 309 | } 310 | 311 | request = urllib2.Request(url = url, data = json.dumps(params)) 312 | request.add_header('ContentType', 'application/json; charset=UTF-8') 313 | response = urllib2.urlopen(request) 314 | data = response.read() 315 | 316 | # print data 317 | 318 | dic = json.loads(data) 319 | MemberList = dic['MemberList'] 320 | DeletedList = [] 321 | for Member in MemberList: 322 | if Member['MemberStatus'] == 4: #被对方删除了 323 | DeletedList.append(Member['UserName']) 324 | 325 | ErrMsg = dic['BaseResponse']['ErrMsg'] 326 | if len(ErrMsg) > 0: 327 | print(ErrMsg) 328 | 329 | return DeletedList 330 | 331 | def main(): 332 | 333 | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) 334 | urllib2.install_opener(opener) 335 | 336 | if getUUID() == False: 337 | print('获取uuid失败') 338 | return 339 | 340 | showQRImage() 341 | time.sleep(1) 342 | 343 | while waitForLogin() != '200': 344 | pass 345 | 346 | os.remove(QRImagePath) 347 | 348 | if login() == False: 349 | print('登录失败') 350 | return 351 | 352 | if webwxinit() == False: 353 | print('初始化失败') 354 | return 355 | 356 | MemberList = webwxgetcontact() 357 | 358 | MemberCount = len(MemberList) 359 | print('通讯录共%s位好友') % MemberCount 360 | 361 | ChatRoomName = '' 362 | result = [] 363 | for i in range(0, int(math.ceil(MemberCount / float(MAX_GROUP_NUM)))): 364 | UserNames = [] 365 | NickNames = [] 366 | DeletedList = '' 367 | for j in range(0, MAX_GROUP_NUM): 368 | if i * MAX_GROUP_NUM + j >= MemberCount: 369 | break 370 | 371 | Member = MemberList[i * MAX_GROUP_NUM + j] 372 | UserNames.append(Member['UserName']) 373 | NickNames.append(Member['NickName'].encode('utf-8')) 374 | 375 | print('第%s组...') % (i + 1) 376 | print(', '.join(NickNames)) 377 | print('回车键继续...') 378 | raw_input('say soemthing:') 379 | 380 | # 新建群组/添加成员 381 | if ChatRoomName == '': 382 | (ChatRoomName, DeletedList) = createChatroom(UserNames) 383 | else: 384 | DeletedList = addMember(ChatRoomName, UserNames) 385 | 386 | DeletedCount = len(DeletedList) 387 | if DeletedCount > 0: 388 | result += DeletedList 389 | 390 | print('找到%s个被删好友') % DeletedCount 391 | # raw_input() 392 | 393 | # 删除成员 394 | deleteMember(ChatRoomName, UserNames) 395 | 396 | # todo 删除群组 397 | 398 | 399 | resultNames = [] 400 | for Member in MemberList: 401 | if Member['UserName'] in result: 402 | NickName = Member['NickName'] 403 | if Member['RemarkName'] != '': 404 | NickName += '(%s)' % Member['RemarkName'] 405 | resultNames.append(NickName.encode('utf-8')) 406 | 407 | print('---------- 被删除的好友列表 ----------') 408 | print('\n'.join(resultNames)) 409 | print('-----------------------------------') 410 | 411 | # windows下编码问题修复 412 | # http://blog.csdn.net/heyuxuanzee/article/details/8442718 413 | class UnicodeStreamFilter: 414 | def __init__(self, target): 415 | self.target = target 416 | self.encoding = 'utf-8' 417 | self.errors = 'replace' 418 | self.encode_to = self.target.encoding 419 | def write(self, s): 420 | if type(s) == str: 421 | s = s.decode('utf-8') 422 | s = s.encode(self.encode_to, self.errors).decode(self.encode_to) 423 | self.target.write(s) 424 | 425 | if sys.stdout.encoding == 'cp936': 426 | sys.stdout = UnicodeStreamFilter(sys.stdout) 427 | 428 | if __name__ == '__main__' : 429 | 430 | print('本程序的查询结果可能会引起一些心理上的不适,请小心使用...') 431 | print('回车键继续...') 432 | raw_input('say something:') 433 | 434 | main() 435 | 436 | print('回车键结束') 437 | raw_input('say something:') -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tool-Collection 2 | 3 | ## 可用 4 | 5 | ### [douban_exporter](https://github.com/DannyVim/ToolsCollection/blob/master/douban_exporter.js) 6 | 7 | 修改自:[KiseXu(原作者)](https://openuserjs.org/scripts/KiseXu/%E8%B1%86%E7%93%A3%E7%94%B5%E5%BD%B1%E5%AF%BC%E5%87%BA%E5%B7%A5%E5%85%B7) 8 | 9 | 原描述为:“将豆瓣已看电影导出为csv文件。启用本脚本,进入豆瓣个人页面后,在『我看』部分会有一链接『导出看过电影』,点击即可。无需登录,支持导出任意用户已看电影。” 10 | 11 | 本维护版增加的功能: 12 | 13 | - 增加短评与豆瓣读书(v0.2) 14 | - 增加想读和想看列表(v0.3 [感谢tabokie的PR](https://github.com/tabokie)) 15 | 16 | 原作者保留一切权利。 17 | 18 | ### 高校学科 19 | 20 | re清洗高校学科评估的数据 21 | 22 | ### berndeutsch 23 | 24 | 爬取伯尔尼德语词汇列表 25 | 26 | ## 存档(可能失效) 27 | 28 | ### db_movie.py 29 | 30 | 用爬虫来备份DB数据。使用beautifulsoup库。 31 | 32 | ### weixin.py 33 | 34 | 找出删除你的微信好友 35 | -------------------------------------------------------------------------------- /Zotero/CustomZoteroWordPlugin.md: -------------------------------------------------------------------------------- 1 | You can use this file to replace the original one under the this folder(C:\Users\Ernest\AppData\Roaming\Microsoft\Word\STARTUP). 2 | 3 | 将此文件替换原路径下的同名文件(路径为C:\Users\Ernest\AppData\Roaming\Microsoft\Word\STARTUP) 4 | 5 | This revised file will change *Zotero Tab* into a Zotero *group* under Reference *Tab*. 6 | 7 | 该修改文件将使独立的“Zotero”选项卡变成“引用”选项卡下的“Zotero”功能组 8 | 9 | As shown below. 10 | 11 | 如下图所示。 12 | 13 | ![image](https://user-images.githubusercontent.com/7478103/138471852-f1915ae5-cb6b-45f5-8484-190b02caa632.png) 14 | -------------------------------------------------------------------------------- /Zotero/Zotero.dotm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/risshun/ToolsCollection/ce7698f9cea15dcbdc64ee73efc1b7e4412589c3/Zotero/Zotero.dotm -------------------------------------------------------------------------------- /berndeutsch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 爬取伯尔尼德语词汇表 4 | 5 | @author: DannyVim 6 | """ 7 | import requests 8 | from bs4 import BeautifulSoup as bs 9 | import pandas as pd 10 | 11 | baseurl = 'https://www.berndeutsch.ch' 12 | url = baseurl + '/web/words/index/page:' 13 | 14 | #单词与解释的存储列表 15 | w_list= [] 16 | m_list= [] 17 | 18 | #抓取词汇表页面 19 | def page(url,num): 20 | demo = requests.get(url).content 21 | soup = bs(demo,'lxml') 22 | list=soup.find('tbody') 23 | for wd in list.find_all('tr'): 24 | num += 0.1 25 | print(num) 26 | wurl = wd.td.a.get('href') 27 | word = wd.td.a.text 28 | mean = wd.td.next_sibling.next_sibling.text.strip() 29 | if '...' in mean: 30 | mean = word_mean(wurl) 31 | w_list.append(word) 32 | m_list.append(mean) 33 | 34 | #补充不完整的解释 35 | def word_mean(u): 36 | wurl = baseurl + u 37 | word_content = requests.get(wurl).content 38 | soup = bs(word_content, 'lxml') 39 | true_mean = soup.dl.next_sibling.next_sibling.dd.text.strip() 40 | return true_mean 41 | 42 | #主程序 43 | for i in range(1,792): 44 | print(i) 45 | page_url = url + str(i) 46 | page(page_url,i) 47 | 48 | #构造Series输出为xlsx 49 | berndemo = pd.Series(m_list,index=w_list, name = 'Erklärung') 50 | #berndemo.to_csv('bernlist.csv', index=True, sep=',', encoding='utf8') 51 | berndemo.to_excel('bernlist.xlsx',sheet_name='berndeutsch', index_label = 'Wort') -------------------------------------------------------------------------------- /douban_exporter.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name 豆瓣导出工具 3 | // @version 0.3 4 | // @description 导出豆瓣上看过和读过、想看和想读的列表。 5 | // @author KiseXu(原作者) DannyVim(修改者) tabokie(修改者) 6 | // @copyright 2018, KiseXu (https://kisexu.com) 7 | // @license MIT 8 | // @updateURL https://github.com/DannyVim/ToolsCollection/raw/master/douban_exporter.js 9 | // @downloadURL https://github.com/DannyVim/ToolsCollection/raw/master/douban_exporter.js 10 | // @supportURL https://github.com/DannyVim/ToolsCollection/issues 11 | // @match https://book.douban.com/people/*/collect* 12 | // @match https://book.douban.com/people/*/wish* 13 | // @match https://movie.douban.com/people/*/collect* 14 | // @match https://movie.douban.com/people/*/wish* 15 | // @match https://www.douban.com/people/* 16 | // @require https://unpkg.com/dexie@latest/dist/dexie.js 17 | // @grant none 18 | // ==/UserScript== 19 | 20 | (function() { 21 | 'use strict'; 22 | 23 | // 页面触发部分 24 | if (location.href.indexOf('//www.douban.com/') > -1) { 25 | // 加入导出按钮 26 | var people = location.href.slice(location.href.indexOf('/people') + 8, -1); 27 | var export_book_link = 'https://book.douban.com/people/' + people + '/collect?start=0&sort=time&rating=all&filter=all&mode=list&export=1'; 28 | $('#book h2 .pl a:last').after(' · 导出读过的图书'); 29 | var export_book_wish_link = 'https://book.douban.com/people/' + people + '/wish?start=0&sort=time&rating=all&filter=all&mode=list&export=1'; 30 | $('#book h2 .pl a:last').after(' · 导出想读的图书'); 31 | var export_movie_link = 'https://movie.douban.com/people/' + people + '/collect?start=0&sort=time&rating=all&filter=all&mode=list&export=1'; 32 | $('#movie .pl a:last').after(' · 导出看过的电影'); 33 | var export_movie_wish_link = 'https://movie.douban.com/people/' + people + '/wish?start=0&sort=time&rating=all&filter=all&mode=list&export=1'; 34 | $('#movie .pl a:last').after(' · 导出想看的电影'); 35 | } 36 | 37 | if (location.href.indexOf('//book.douban.com/') > -1 && location.href.indexOf('export=1') > -1) { 38 | // 开始导出 39 | if (location.href.indexOf('/wish') > -1) { 40 | getPage('book_wish_export'); 41 | } else { 42 | getPage('book_export'); 43 | } 44 | } 45 | 46 | if (location.href.indexOf('//movie.douban.com/') > -1 && location.href.indexOf('export=1') > -1) { 47 | // 开始导出 48 | if (location.href.indexOf('/wish') > -1) { 49 | getPage('movie_wish_export'); 50 | } else { 51 | getPage('movie_export'); 52 | } 53 | } 54 | 55 | 56 | // 获取当前页数据 57 | function getCurrentPageList() { 58 | var items = []; 59 | 60 | $('li.item').each(function(index) { 61 | items[index] = { 62 | title: $(this).find('a').text().replace(/修改删除/, '').replace(/加入购书单/,'').replace(/>/,'').trim(), 63 | rating: ($(this).find('.date span').attr('class')) ? $(this).find('.date span').attr('class').slice(6, 7) : '', 64 | date: $(this).find('.date').text().trim(), 65 | link: $(this).find('.title a').attr('href').trim(), 66 | comment:$(this).find('.comment').text().trim(), 67 | }; 68 | }); 69 | 70 | return items; 71 | } 72 | 73 | // 采集当前页数据,保存到indexedDB 74 | function getPage(descriptor) { 75 | const db = new Dexie('db_export'); 76 | db.version(1).stores({ 77 | items: `++id, title, rating, date, link,comment` 78 | }); 79 | 80 | var items = getCurrentPageList(); 81 | db.items.bulkAdd(items).then (function(){ 82 | console.log('保存成功'); 83 | // 获取下一页链接 84 | var next_link = $('span.next a').attr('href'); 85 | if (next_link) { 86 | next_link = next_link + '&export=1'; 87 | window.location.href = next_link; 88 | } else { 89 | if (descriptor == '') { 90 | exportAll('book_movie'); 91 | } else { 92 | exportAll(descriptor); 93 | } 94 | } 95 | }).catch(function(error) { 96 | console.log("Ooops: " + error); 97 | }); 98 | 99 | } 100 | 101 | // 导出所有数据到CSV 102 | function exportAll(fileName) { 103 | const db = new Dexie('db_export'); 104 | db.version(1).stores({ 105 | items: `++id, title, rating, date, link,comment` 106 | }); 107 | db.items.orderBy('date').toArray().then(function(all){ 108 | all = all.map(function(item,index,array){ 109 | delete item.id; 110 | return item; 111 | }); 112 | 113 | JSonToCSV.setDataConver({ 114 | data: all, 115 | fileName: fileName, 116 | columns: { 117 | title: ['标题', '个人评分', '打分日期', '条目链接','评论'], 118 | key: ['title', 'rating', 'date', 'link','comment'] 119 | } 120 | }); 121 | db.delete(); 122 | }); 123 | } 124 | 125 | // 导出CSV函数 126 | // https://github.com/liqingzheng/pc/blob/master/JsonExportToCSV.js 127 | var JSonToCSV = { 128 | /* 129 | * obj是一个对象,其中包含有: 130 | * ## data 是导出的具体数据 131 | * ## fileName 是导出时保存的文件名称 是string格式 132 | * ## showLabel 表示是否显示表头 默认显示 是布尔格式 133 | * ## columns 是表头对象,且title和key必须一一对应,包含有 134 | title:[], // 表头展示的文字 135 | key:[], // 获取数据的Key 136 | formatter: function() // 自定义设置当前数据的 传入(key, value) 137 | */ 138 | setDataConver: function(obj) { 139 | var bw = this.browser(); 140 | if(bw.ie < 9) return; // IE9以下的 141 | var data = obj.data, 142 | ShowLabel = typeof obj.showLabel === 'undefined' ? true : obj.showLabel, 143 | fileName = (obj.fileName || 'UserExport') + '.csv', 144 | columns = obj.columns || { 145 | title: [], 146 | key: [], 147 | formatter: undefined 148 | }; 149 | ShowLabel = typeof ShowLabel === 'undefined' ? true : ShowLabel; 150 | var row = "", CSV = '', key; 151 | // 如果要现实表头文字 152 | if (ShowLabel) { 153 | // 如果有传入自定义的表头文字 154 | if (columns.title.length) { 155 | columns.title.map(function(n) { 156 | row += n + ','; 157 | }); 158 | } else { 159 | // 如果没有,就直接取数据第一条的对象的属性 160 | for (key in data[0]) row += key + ','; 161 | } 162 | row = row.slice(0, -1); // 删除最后一个,号,即a,b, => a,b 163 | CSV += row + '\r\n'; // 添加换行符号 164 | } 165 | // 具体的数据处理 166 | data.map(function(n) { 167 | row = ''; 168 | // 如果存在自定义key值 169 | if (columns.key.length) { 170 | columns.key.map(function(m) { 171 | row += '"' + (typeof columns.formatter === 'function' ? columns.formatter(m, n[m]) || n[m] : n[m]) + '",'; 172 | }); 173 | } else { 174 | for (key in n) { 175 | row += '"' + (typeof columns.formatter === 'function' ? columns.formatter(key, n[key]) || n[key] : n[key]) + '",'; 176 | } 177 | } 178 | row.slice(0, row.length - 1); // 删除最后一个, 179 | CSV += row + '\r\n'; // 添加换行符号 180 | }); 181 | if(!CSV) return; 182 | this.SaveAs(fileName, CSV); 183 | }, 184 | SaveAs: function(fileName, csvData) { 185 | var bw = this.browser(); 186 | if(!bw.edge || !bw.ie) { 187 | var alink = document.createElement("a"); 188 | alink.id = "linkDwnldLink"; 189 | alink.href = this.getDownloadUrl(csvData); 190 | document.body.appendChild(alink); 191 | var linkDom = document.getElementById('linkDwnldLink'); 192 | linkDom.setAttribute('download', fileName); 193 | linkDom.click(); 194 | document.body.removeChild(linkDom); 195 | } 196 | else if(bw.ie >= 10 || bw.edge == 'edge') { 197 | var _utf = "\uFEFF"; 198 | var _csvData = new Blob([_utf + csvData], { 199 | type: 'text/csv' 200 | }); 201 | navigator.msSaveBlob(_csvData, fileName); 202 | } 203 | else { 204 | var oWin = window.top.open("about:blank", "_blank"); 205 | oWin.document.write('sep=,\r\n' + csvData); 206 | oWin.document.close(); 207 | oWin.document.execCommand('SaveAs', true, fileName); 208 | oWin.close(); 209 | } 210 | }, 211 | getDownloadUrl: function(csvData) { 212 | var _utf = "\uFEFF"; // 为了使Excel以utf-8的编码模式,同时也是解决中文乱码的问题 213 | if (window.Blob && window.URL && window.URL.createObjectURL) { 214 | csvData = new Blob([_utf + csvData], { 215 | type: 'text/csv' 216 | }); 217 | return URL.createObjectURL(csvData); 218 | } 219 | // return 'data:attachment/csv;charset=utf-8,' + _utf + encodeURIComponent(csvData); 220 | }, 221 | browser: function() { 222 | var Sys = {}; 223 | var ua = navigator.userAgent.toLowerCase(); 224 | var s; 225 | var dummy = (s = ua.indexOf('edge') !== - 1 ? Sys.edge = 'edge' : ua.match(/rv:([\d.]+)\) like gecko/)) ? Sys.ie = s[1]: 226 | (s = ua.match(/msie ([\d.]+)/)) ? Sys.ie = s[1] : 227 | (s = ua.match(/firefox\/([\d.]+)/)) ? Sys.firefox = s[1] : 228 | (s = ua.match(/chrome\/([\d.]+)/)) ? Sys.chrome = s[1] : 229 | (s = ua.match(/opera.([\d.]+)/)) ? Sys.opera = s[1] : 230 | (s = ua.match(/version\/([\d.]+).*safari/)) ? Sys.safari = s[1] : 0; 231 | return Sys; 232 | } 233 | }; 234 | 235 | })(); 236 | -------------------------------------------------------------------------------- /email_parser.py: -------------------------------------------------------------------------------- 1 | import email 2 | import os 3 | import html2text 4 | 5 | os.chdir('C:/Users/Ernest/Desktop/corpus') 6 | 7 | h2t = html2text.HTML2Text() 8 | h2t.body_width=20000 9 | 10 | def text_parser(path): 11 | with open(path) as eml: 12 | m = email.message_from_file(eml) 13 | 14 | if m.get_content_type!='mixed': 15 | for m in m.walk(): 16 | if m.get_content_subtype()=='plain': 17 | try: 18 | text = str(m.get_payload(decode=True),encoding='utf-8') 19 | except: 20 | text = str(m.get_payload(decode=True),encoding='gbk') 21 | text = text.replace('--\n发自我的网易邮箱平板适配版','') 22 | text = text.split('----------------')[0] 23 | text = text.strip() 24 | return text 25 | if m.get_content_subtype()=='html': 26 | try: 27 | text = str(m.get_payload(decode=True),encoding='utf-8') 28 | except: 29 | text = str(m.get_payload(decode=True),encoding='gbk') 30 | text = h2t.handle(text) 31 | text = text.strip() 32 | text = text.replace(' ','') 33 | return text 34 | 35 | for root, dirs, files in os.walk("."): 36 | for file_name in files: 37 | if file_name.endswith('.eml'): 38 | path = os.path.join(root,file_name) 39 | try: 40 | text = text_parser(path) 41 | text = text.replace(u'\u202f','') 42 | text = text.replace('\n ','') 43 | text = text.splitlines() 44 | text = list(filter(None, text)) 45 | for ed in text: 46 | if 'Editor:' in ed or 'Editors:' in ed: 47 | text = text[:text.index(ed)+1] 48 | text = '\n'.join(text) 49 | with open(path.replace('.eml', '')+'.txt', 'w') as txt: 50 | txt.write(text) 51 | except: 52 | print('fuck:',path) 53 | break 54 | print('\r','已完成:{:.2f}%'.format(round((files.index(file_name)+1)*100/len(files))), end='', flush=True) -------------------------------------------------------------------------------- /全国高校学科评估结果.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/risshun/ToolsCollection/ce7698f9cea15dcbdc64ee73efc1b7e4412589c3/全国高校学科评估结果.txt -------------------------------------------------------------------------------- /全国高校学科评估结果处理程式.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import re 4 | from collections import OrderedDict 5 | from pprint import pprint 6 | 7 | 8 | KEY0 = re.compile(r'学科代码及名称:\d+(\w+)') 9 | KEY1 = re.compile(r'\n([A-C][\+-]?)') 10 | DATA = re.compile(r'([0-9]{5}) *([\u4e00-\u9fa5]+)[^0-9]') 11 | 12 | 13 | def groups(content, regex): 14 | parts = regex.split(content) 15 | return OrderedDict(zip(parts[1::2], parts[2::2])) 16 | 17 | 18 | def read_data(file_name): 19 | with open(file_name, 'r') as handle: 20 | return [ 21 | (key0, key1, id, name) 22 | for key0, part0 in groups(handle.read(), KEY0).items() 23 | for key1, part1 in groups(part0, KEY1).items() 24 | for id, name in DATA.findall(part1) 25 | ] 26 | 27 | 28 | def main(): 29 | pprint(read_data(r"D:/Github/ToolsCollection/全国高校学科评估结果.txt")) 30 | 31 | 32 | if __name__ == '__main__': 33 | main() --------------------------------------------------------------------------------