├── .gitignore ├── README.md └── weibo.py /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WeiboList 2 | 3 | 2020/2/10 更新,见 Issue:https://github.com/Python3WebSpider/WeiboList/issues/9 4 | -------------------------------------------------------------------------------- /weibo.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from urllib.parse import urlencode 3 | from pyquery import PyQuery as pq 4 | from pymongo import MongoClient 5 | 6 | base_url = 'https://m.weibo.cn/api/container/getIndex?' 7 | headers = { 8 | 'Host': 'm.weibo.cn', 9 | 'Referer': 'https://m.weibo.cn/u/2830678474', 10 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 11 | 'X-Requested-With': 'XMLHttpRequest', 12 | } 13 | client = MongoClient() 14 | db = client['weibo'] 15 | collection = db['weibo'] 16 | max_page = 10 17 | 18 | 19 | def get_page(page): 20 | params = { 21 | 'type': 'uid', 22 | 'value': '2830678474', 23 | 'containerid': '1076032830678474', 24 | 'page': page 25 | } 26 | url = base_url + urlencode(params) 27 | try: 28 | response = requests.get(url, headers=headers) 29 | if response.status_code == 200: 30 | return response.json(), page 31 | except requests.ConnectionError as e: 32 | print('Error', e.args) 33 | 34 | 35 | def parse_page(json, page: int): 36 | if json: 37 | items = json.get('data').get('cards') 38 | for index, item in enumerate(items): 39 | if page == 1 and index == 1: 40 | continue 41 | else: 42 | item = item.get('mblog', {}) 43 | weibo = {} 44 | weibo['id'] = item.get('id') 45 | weibo['text'] = pq(item.get('text')).text() 46 | weibo['attitudes'] = item.get('attitudes_count') 47 | weibo['comments'] = item.get('comments_count') 48 | weibo['reposts'] = item.get('reposts_count') 49 | yield weibo 50 | 51 | 52 | def save_to_mongo(result): 53 | if collection.insert(result): 54 | print('Saved to Mongo') 55 | 56 | 57 | if __name__ == '__main__': 58 | for page in range(1, max_page + 1): 59 | json = get_page(page) 60 | results = parse_page(*json) 61 | for result in results: 62 | print(result) 63 | save_to_mongo(result) 64 | --------------------------------------------------------------------------------