├── .gitignore
├── README.md
├── img
    ├── 1564572090441.png
    ├── 1564572149417.png
    └── 1564572879319.png
├── python之战.xlsx
├── script.py
└── xpath_example.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .idea/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 快速获取微信文章的标题及链接导出到xlsx
 2 | 
 3 | > 起因：关注了几个优质的公众号进行学习，发现PC端的微信在翻阅历史文章的操作上严重缺乏用户体验。
 4 | >
 5 | > 目的：将作者所有的历史文章的标题及链接保存到xlsx方便查阅
 6 | 
 7 | ## 项目地址
 8 | 
 9 | https://github.com/iicey/mitm
10 | 
11 | ## 使用说明
12 | 
13 | ### 安装第三方库(Python3)
14 | 
15 | ```
16 | pip install openpyxl
17 | pip install mitmproxy
18 | ```
19 | 
20 | ### 设置代理(127.0.0.1:8080)
21 | 
22 | ![1564572090441](https://github.com/iicey/mitm/blob/master/img/1564572090441.png)
23 | 
24 | ![1564572149417](https://github.com/iicey/mitm/blob/master/img/1564572149417.png)
25 | 
26 | ### 安装证书
27 | 
28 | 访问http://mitm.it/	安装Windows证书
29 | 
30 | ### 启动脚本
31 | 
32 | 开启cmd切换到mitm目录下，执行下面这段代码
33 | 
34 | ```
35 | mitmdump -s script.py
36 | ```
37 | 
38 | ### 最后一步
39 | 
40 | 点开PC微信里微信公众号的列表页，向下滑动即可
41 | 
42 | ![1564572879319](https://github.com/iicey/mitm/blob/master/img/1564572879319.png)


--------------------------------------------------------------------------------
/img/1564572090441.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/img/1564572090441.png


--------------------------------------------------------------------------------
/img/1564572149417.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/img/1564572149417.png


--------------------------------------------------------------------------------
/img/1564572879319.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/img/1564572879319.png


--------------------------------------------------------------------------------
/python之战.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iicey/mitm/81320169f2c6295d954d4705d8a604e31083c6cc/python之战.xlsx


--------------------------------------------------------------------------------
/script.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import time
 4 | import mitmproxy.http
 5 | from openpyxl import Workbook, load_workbook
 6 | 
 7 | 
 8 | def request(flow: mitmproxy.http.HTTPFlow):
 9 |     pass
10 | 
11 | 
12 | def response(flow: mitmproxy.http.HTTPFlow):
13 |     if 'profile_ext' in flow.request.url:
14 |         result = json.loads(flow.response.get_text())
15 |         general_msg_list = result.get('general_msg_list')
16 |         for i in json.loads(general_msg_list).get('list'):
17 |             app_msg_ext_info = i.get('app_msg_ext_info')
18 |             title = app_msg_ext_info.get('title')
19 |             content_url = app_msg_ext_info.get('content_url')
20 |             author = app_msg_ext_info.get('author')
21 |             comm_msg_info = i.get('comm_msg_info')
22 |             datetime = comm_msg_info.get('datetime')
23 |             publish_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(datetime))
24 |             try:
25 |                 wb = load_workbook(''.join([author, '.xlsx']))
26 |             except FileNotFoundError:
27 |                 wb = Workbook()
28 |             ws = wb.active
29 |             ws.append([publish_time, title, content_url])
30 |             wb.save(filename=''.join([author, '.xlsx']))
31 | 


--------------------------------------------------------------------------------
/xpath_example.py:
--------------------------------------------------------------------------------
 1 | detail = Selector(response)
 2 | 
 3 | content = "\n\n".join(detail.xpath('//*[@id="js_content"]//text()').extract())
 4 | content = re.sub(r"\n+\s+", "\n\n", content)
 5 | pics = detail.xpath('//*[@id="js_content"]//img/@data-src').extract()
 6 | author = re.findall("var nickname = \"(.*?)\"", response.text)[0]
 7 | weixinId = detail.xpath('//*[@class="profile_meta_label"]'
 8 |                         '[contains(text(),"微信号")]/following-sibling::span/text()').extract_first()
 9 | aId = re.findall("var biz = \"(.*?)\"", response.text)[0]
10 | article_time = re.findall("var ct = \"(.*?)\";", response.text)[0]
11 | date = str(datetime.datetime.fromtimestamp(int(article_time)).strftime('%Y-%m-%d %H:%M:%S'))
12 | 


--------------------------------------------------------------------------------