├── .gitignore ├── README.md ├── img ├── 1564572090441.png ├── 1564572149417.png └── 1564572879319.png ├── python之战.xlsx ├── script.py └── xpath_example.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .idea/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 快速获取微信文章的标题及链接导出到xlsx 2 | 3 | > 起因:关注了几个优质的公众号进行学习,发现PC端的微信在翻阅历史文章的操作上严重缺乏用户体验。 4 | > 5 | > 目的:将作者所有的历史文章的标题及链接保存到xlsx方便查阅 6 | 7 | ## 项目地址 8 | 9 | https://github.com/iicey/mitm 10 | 11 | ## 使用说明 12 | 13 | ### 安装第三方库(Python3) 14 | 15 | ``` 16 | pip install openpyxl 17 | pip install mitmproxy 18 | ``` 19 | 20 | ### 设置代理(127.0.0.1:8080) 21 | 22 | ![1564572090441](https://github.com/iicey/mitm/blob/master/img/1564572090441.png) 23 | 24 | ![1564572149417](https://github.com/iicey/mitm/blob/master/img/1564572149417.png) 25 | 26 | ### 安装证书 27 | 28 | 访问http://mitm.it/ 安装Windows证书 29 | 30 | ### 启动脚本 31 | 32 | 开启cmd切换到mitm目录下,执行下面这段代码 33 | 34 | ``` 35 | mitmdump -s script.py 36 | ``` 37 | 38 | ### 最后一步 39 | 40 | 点开PC微信里微信公众号的列表页,向下滑动即可 41 | 42 | ![1564572879319](https://github.com/iicey/mitm/blob/master/img/1564572879319.png) -------------------------------------------------------------------------------- /img/1564572090441.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/img/1564572090441.png -------------------------------------------------------------------------------- /img/1564572149417.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/img/1564572149417.png -------------------------------------------------------------------------------- /img/1564572879319.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/img/1564572879319.png -------------------------------------------------------------------------------- /python之战.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/python之战.xlsx -------------------------------------------------------------------------------- /script.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import time 4 | import mitmproxy.http 5 | from openpyxl import Workbook, load_workbook 6 | 7 | 8 | def request(flow: mitmproxy.http.HTTPFlow): 9 | pass 10 | 11 | 12 | def response(flow: mitmproxy.http.HTTPFlow): 13 | if 'profile_ext' in flow.request.url: 14 | result = json.loads(flow.response.get_text()) 15 | general_msg_list = result.get('general_msg_list') 16 | for i in json.loads(general_msg_list).get('list'): 17 | app_msg_ext_info = i.get('app_msg_ext_info') 18 | title = app_msg_ext_info.get('title') 19 | content_url = app_msg_ext_info.get('content_url') 20 | author = app_msg_ext_info.get('author') 21 | comm_msg_info = i.get('comm_msg_info') 22 | datetime = comm_msg_info.get('datetime') 23 | publish_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(datetime)) 24 | try: 25 | wb = load_workbook(''.join([author, '.xlsx'])) 26 | except FileNotFoundError: 27 | wb = Workbook() 28 | ws = wb.active 29 | ws.append([publish_time, title, content_url]) 30 | wb.save(filename=''.join([author, '.xlsx'])) 31 | -------------------------------------------------------------------------------- /xpath_example.py: -------------------------------------------------------------------------------- 1 | detail = Selector(response) 2 | 3 | content = "\n\n".join(detail.xpath('//*[@id="js_content"]//text()').extract()) 4 | content = re.sub(r"\n+\s+", "\n\n", content) 5 | pics = detail.xpath('//*[@id="js_content"]//img/@data-src').extract() 6 | author = re.findall("var nickname = \"(.*?)\"", response.text)[0] 7 | weixinId = detail.xpath('//*[@class="profile_meta_label"]' 8 | '[contains(text(),"微信号")]/following-sibling::span/text()').extract_first() 9 | aId = re.findall("var biz = \"(.*?)\"", response.text)[0] 10 | article_time = re.findall("var ct = \"(.*?)\";", response.text)[0] 11 | date = str(datetime.datetime.fromtimestamp(int(article_time)).strftime('%Y-%m-%d %H:%M:%S')) 12 | --------------------------------------------------------------------------------