├── comment.txt ├── readme_image └── demo.jpg ├── README.md ├── test.py ├── analysis.py ├── index.py └── comment.py /comment.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /readme_image/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NateHuangHao/sina_comment/HEAD/readme_image/demo.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sina_comment 2 | 一个基于SnowNLP的新浪微博评论情感分析工具 3 | 4 | 5 | 1.爬取评论 6 | 7 | 2.存入数据库 8 | 9 | 3.情感分析 10 | 11 | 4.输出评论 12 | 13 | 14 | # 效果如下: 15 | ![](https://github.com/NateHuangHao/sina_comment/raw/master/readme_image/demo.jpg) 16 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from snownlp import SnowNLP 3 | text = '你好啊!!' 4 | s = SnowNLP(text) 5 | print u'内容:%s' % text.decode('utf-8') 6 | print u'情感值:%s' % s.sentiments 7 | 8 | if s.sentiments > 0.5: 9 | print u'情感分析:积极' 10 | elif s.sentiments <= 0.5: 11 | print u'情感分析:消极' -------------------------------------------------------------------------------- /analysis.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from snownlp import SnowNLP 3 | import sys,pymysql 4 | 5 | conn = pymysql.connect(host='127.0.0.1',user='root',password='',charset="utf8",use_unicode = False) #连接服务器 6 | cur = conn.cursor() 7 | select_sql = "SELECT comment.text FROM sina_comment.comment" 8 | 9 | cur.execute(select_sql) 10 | rows = cur.fetchall() 11 | 12 | def snowanalysis(textlist) : 13 | fileObj = open('comment.txt','w+') 14 | for item in textlist : 15 | text = item[0].decode('utf-8') 16 | if text != '': 17 | # s = SnowNLP(text) 18 | # print text 19 | fileObj.write(item[0].decode("utf-8").encode("utf-8")) 20 | # print s.sentiments 21 | print '\r\n' 22 | 23 | snowanalysis(rows) 24 | 25 | -------------------------------------------------------------------------------- /index.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from weibo import APIClient 3 | import webbrowser 4 | 5 | import sys 6 | reload(sys) 7 | sys.setdefaultencoding('utf-8') 8 | 9 | APP_KEY = '1165200782' 10 | APP_SECRET = '5c57f343fb4c540e2a86ae1d28fbed2b' 11 | CALLBACK_URL = 'https://api.weibo.com/oauth2/default.html' 12 | 13 | client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET, redirect_uri=CALLBACK_URL) 14 | url = client.get_authorize_url() 15 | webbrowser.open_new(url) 16 | 17 | print 'input your code , and enter:' 18 | 19 | code = raw_input() 20 | r = client.request_access_token(code) 21 | access_token = r.access_token 22 | expires_in = r.expires_in 23 | client.set_access_token(access_token, expires_in) 24 | 25 | result = client.comments.show.get(id = 4160547165300149,count = 200,page = 1) 26 | 27 | for st in result.comments: 28 | text = st.text -------------------------------------------------------------------------------- /comment.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from snownlp import SnowNLP 3 | import sys,re,time,requests,pymysql 4 | 5 | # weibo_id = input('输入单条微博ID:') 6 | weibo_id = '4160547165300149' 7 | # url='https://m.weibo.cn/single/rcList?format=cards&id=' + weibo_id + '&type=comment&hot=1&page={}' #爬热门评论 8 | url = 'https://m.weibo.cn/api/comments/show?id=' + weibo_id + '&page={}' #爬时间排序评论 9 | headers = { 10 | 'User-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0', 11 | 'Host' : 'm.weibo.cn', 12 | 'Accept' : 'application/json, text/plain, */*', 13 | 'Accept-Language' : 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 14 | 'Accept-Encoding' : 'gzip, deflate, br', 15 | 'Referer' : 'https://m.weibo.cn/status/' + weibo_id, 16 | 'DNT' : '1', 17 | 'Connection' : 'keep-alive', 18 | } 19 | i = 1 20 | comment_num = 1 21 | 22 | conn = pymysql.connect(host='127.0.0.1',user='root',password='',charset="utf8",use_unicode = False) #连接服务器 23 | cur = conn.cursor() 24 | insert_sql = "insert into sina_comment.comment(comment_id,user_name,text,source) values(%s,%s,%s,%s)" 25 | select_sql = "SELECT * FROM sina_comment.comment WHERE comment_id = %s" 26 | 27 | num = 1 28 | 29 | # print u'正在插入数据。。。。。' 30 | while True: 31 | # if i==1: #爬热门评论 32 | # r = requests.get(url = url.format(i),headers = headers) 33 | # comment_page = r.json()[1]['card_group'] 34 | # else: 35 | # r = requests.get(url = url.format(i),headers = headers) 36 | # comment_page = r.json()[0]['card_group'] 37 | r = requests.get(url = url.format(i),headers = headers) #爬时间排序评论 38 | comment_page = r.json()['data'] 39 | if r.status_code == 200: 40 | try: 41 | # print u'正在读取第 %s 页的评论' % i 42 | for j in range(0,len(comment_page)): 43 | 44 | # print u'第 %s 条评论' % comment_num 45 | user = comment_page[j] 46 | 47 | comment_id = user['user']['id'] 48 | print u'评论ID:%s' % comment_id 49 | 50 | user_name = user['user']['screen_name'] 51 | print u'用户名:%s' % user_name 52 | 53 | created_at = user['created_at'] 54 | print u'创建时间:%s' % created_at 55 | 56 | text = re.sub('<.*?>|回复<.*?>:|[\U00010000-\U0010ffff]|[\uD800-\uDBFF][\uDC00-\uDFFF]','',user['text']) 57 | print u'评论内容:%s' % text 58 | 59 | source = user['source'] 60 | print u'用户机型:%s' % source 61 | 62 | if text != '': 63 | s = SnowNLP(text) 64 | print u'情感值:%s' % s.sentiments 65 | if s.sentiments >= 0.5: 66 | print u'情感分析:积极' 67 | elif s.sentiments < 0.5: 68 | print u'情感分析:消极' 69 | 70 | print '\r\n' 71 | 72 | 73 | # insert_param = (comment_id,user_name,text,source) 74 | # select_param = (comment_id) 75 | # cur.execute(select_sql,select_param) 76 | # rows = cur.fetchall() 77 | 78 | # if (len(rows) == 0): 79 | # try: 80 | # A = cur.execute(insert_sql,insert_param) 81 | # conn.commit() 82 | # print u'已插入 %s 条数据' % num 83 | # num += 1 84 | # except Exception as e: 85 | # print e 86 | # conn.rollback() 87 | 88 | comment_num+=1 89 | i+=1 90 | time.sleep(5) 91 | except: 92 | i+1 93 | pass 94 | else: 95 | break 96 | 97 | 98 | 99 | 100 | --------------------------------------------------------------------------------