├── README.md ├── pufayinhang.csv ├── 中信表白活动518.xlsx ├── 中信表白活动620.xlsx ├── guangfayinhang.csv ├── pinganyinhang.csv ├── xingyeyinhang.csv ├── minshengyinhang.csv ├── zhaoshangyinhang.csv ├── zhongxinyinhang.csv ├── bank_analysis.py └── zhongxin_repost.py /README.md: -------------------------------------------------------------------------------- 1 | # ZhongxinWeibo-propagation-analysis 2 | -------------------------------------------------------------------------------- /pufayinhang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/pufayinhang.csv -------------------------------------------------------------------------------- /中信表白活动518.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/中信表白活动518.xlsx -------------------------------------------------------------------------------- /中信表白活动620.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/中信表白活动620.xlsx -------------------------------------------------------------------------------- /guangfayinhang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/guangfayinhang.csv -------------------------------------------------------------------------------- /pinganyinhang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/pinganyinhang.csv -------------------------------------------------------------------------------- /xingyeyinhang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/xingyeyinhang.csv -------------------------------------------------------------------------------- /minshengyinhang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/minshengyinhang.csv -------------------------------------------------------------------------------- /zhaoshangyinhang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/zhaoshangyinhang.csv -------------------------------------------------------------------------------- /zhongxinyinhang.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CheneyWoo/ZhongxinWeibo-propagation-analysis/master/zhongxinyinhang.csv -------------------------------------------------------------------------------- /bank_analysis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #coding=utf-8 3 | 4 | import urllib2 5 | import csv 6 | import json 7 | import sys 8 | reload(sys) 9 | sys.setdefaultencoding('utf-8') 10 | 11 | myheader1 = { 12 | 'cookie': '_T_WM=ab2659dc3d6dc49b941dfd4603194652; ALF=1537681408; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW2JQe9LinWeoVqTw8dpugC5JpX5K-hUgL.Fozcehe0So27SoM2dJLoI7DhIsHV9PLPdcva; MLOGIN=1; SUHB=0M2EEWKWuBg39i; SSOLoginState=1535089434', 13 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' 14 | } 15 | 16 | base_url1 = 'https://m.weibo.cn/api/container/getIndex?containerid=2304131907753765_-_WEIBO_SECOND_PROFILE_WEIBO&page_type=03&page={numbers}' 17 | 18 | file = open('minshengyinhang.csv', 'a+') 19 | writer = csv.writer(file) 20 | 21 | def get_page(url): 22 | request = urllib2.Request(url, headers=myheader1) 23 | html = urllib2.urlopen(request).read() 24 | return html 25 | #jsonpage = json.loads(html.encode('utf-8')) 26 | #return jsonpage 27 | 28 | def get_weibo_info(l,r): 29 | for number in range(l,r): 30 | url = base_url1.format(numbers = number) 31 | page = get_page(url) 32 | page = json.loads(page.encode('utf-8')) 33 | for i in range(0, len(page['data']['cards'])): 34 | data = [] 35 | attitudes_count = page['data']['cards'][i]['mblog']['attitudes_count'] 36 | comments_count = page['data']['cards'][i]['mblog']['comments_count'] 37 | reposts_count = page['data']['cards'][i]['mblog']['reposts_count'] 38 | created_at = page['data']['cards'][i]['mblog']['created_at'] 39 | 40 | data.append(attitudes_count) 41 | data.append(comments_count) 42 | data.append(reposts_count) 43 | data.append(created_at) 44 | 45 | writer.writerow(data) 46 | 47 | print attitudes_count, comments_count, reposts_count, created_at 48 | 49 | get_weibo_info(61,65) 50 | file.close() -------------------------------------------------------------------------------- /zhongxin_repost.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #coding=utf-8 3 | 4 | import urllib2 5 | import csv 6 | import time 7 | import json 8 | import re 9 | import requests 10 | from bs4 import BeautifulSoup 11 | from lxml import etree 12 | import sys 13 | reload(sys) 14 | sys.setdefaultencoding('utf-8') 15 | 16 | myheader1 = { 17 | 'cookie': '_T_WM=ab2659dc3d6dc49b941dfd4603194652; ALF=1537681408; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW2JQe9LinWeoVqTw8dpugC5JpX5K-hUgL.Fozcehe0So27SoM2dJLoI7DhIsHV9PLPdcva; MLOGIN=1; SUHB=0M2EEWKWuBg39i; SSOLoginState=1535089434', 18 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' 19 | } 20 | 21 | myheader2 = { 22 | 'cookie': '_T_WM=ab2659dc3d6dc49b941dfd4603194652; ALF=1538048715; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW2JQe9LinWeoVqTw8dpugC5JpX5K-hUgL.Fozcehe0So27SoM2dJLoI7DhIsHV9PLPdcva; SUHB=0U1IKs3b8Cvw7E; SSOLoginState=1535509073; MLOGIN=1', 23 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' 24 | } 25 | 26 | base_url1 = 'https://m.weibo.cn/api/statuses/repostTimeline?id={ids}&page={numbers}' 27 | base_url2 = 'https://m.weibo.cn/api/statuses/repostTimeline?id={ids}&page={numbers}' 28 | 29 | user_url = 'https://m.weibo.cn/status/{mids}?display=0&retcode=6102' 30 | 31 | file = open('xiaoyemeizi.csv', 'a+') 32 | writer = csv.writer(file) 33 | 34 | usercount = 0 35 | def get_page(url): 36 | request = urllib2.Request(url, headers=myheader2) 37 | html = urllib2.urlopen(request).read() 38 | return html 39 | #jsonpage = json.loads(html.encode('utf-8')) 40 | #return jsonpage 41 | 42 | def get_repostuser_info(id,l,r,level = 1): 43 | for number in range(l,r): 44 | #time.sleep(0.5) 45 | url = base_url2.format(ids = id,numbers = number) 46 | page = get_page(url) 47 | page = json.loads(page.encode('utf-8')) 48 | for i in range(0,len(page['data']['data'])): 49 | data = [] 50 | user_name = page['data']['data'][i]['user']['screen_name'] 51 | followers_count = page['data']['data'][i]['user']['followers_count'] 52 | reposts_count = page['data']['data'][i]['reposts_count'] 53 | date = page['data']['data'][i]['created_at'] 54 | text = page['data']['data'][i]['text'] 55 | mid = page['data']['data'][i]['mid'] 56 | 57 | data.append(user_name) 58 | data.append(followers_count) 59 | data.append(date) 60 | data.append(reposts_count) 61 | data.append(level) 62 | 63 | friends = re.findall("(?<=>@).*?(?=<)", text) 64 | data.append(len(friends)) 65 | for friend in friends: 66 | data.append('@' + str(friend)) 67 | writer.writerow(data) 68 | 69 | if reposts_count > 0 and mid!=id: 70 | get_repostuser_info(mid, 1, 2, level = level + 1) 71 | 72 | print user_name, followers_count, date, len(friends), reposts_count, level 73 | #print "page" + str(number) + " over" 74 | 75 | #get_repostuser_info(4253051688686091,1,1004,1) 76 | get_repostuser_info(4273989696658059,1,61,1) 77 | file.close() --------------------------------------------------------------------------------