├── 情感分析结果.xlsx
├── myDict
    ├── punc_dict.txt
    ├── degree_dict.txt
    ├── not_dict.txt
    └── stop_dict.txt
└── WRD_DataMining.ipynb


/情感分析结果.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CheneyWoo/Weibo-Sentiment-Calculating/master/情感分析结果.xlsx


--------------------------------------------------------------------------------
/myDict/punc_dict.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CheneyWoo/Weibo-Sentiment-Calculating/master/myDict/punc_dict.txt


--------------------------------------------------------------------------------
/myDict/degree_dict.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CheneyWoo/Weibo-Sentiment-Calculating/master/myDict/degree_dict.txt


--------------------------------------------------------------------------------
/myDict/not_dict.txt:
--------------------------------------------------------------------------------
 1 | 不
 2 | 不是
 3 | 并不
 4 | 没
 5 | 没有
 6 | 无
 7 | 非
 8 | 并非
 9 | 莫
10 | 弗
11 | 勿
12 | 毋
13 | 未
14 | 尚未
15 | 否
16 | 别
17 | 無
18 | 休
19 | 难道
20 | 差


--------------------------------------------------------------------------------
/myDict/stop_dict.txt:
--------------------------------------------------------------------------------
  1 | !
  2 | ！
  3 | ，
  4 | "
  5 | #
  6 | $
  7 | %
  8 | &
  9 | '
 10 | (
 11 | )
 12 | *
 13 | +
 14 | ,
 15 | -
 16 | --
 17 | .
 18 | ..
 19 | ...
 20 | ......
 21 | @
 22 | ./
 23 | .一
 24 | .数
 25 | .日
 26 | \
 27 | \\
 28 | /
 29 | //
 30 | 0
 31 | 1
 32 | 2
 33 | 3
 34 | 4
 35 | 5
 36 | 6
 37 | 7
 38 | 8
 39 | 9
 40 | :
 41 | ://
 42 | ::
 43 | ;
 44 | <
 45 | =
 46 | >
 47 | >>
 48 | ？
 49 | ?
 50 | @
 51 | A
 52 | Lex
 53 | [
 54 | \
 55 | ]
 56 | ^
 57 | _
 58 | `
 59 | exp
 60 | sub
 61 | sup
 62 | |
 63 | }
 64 | ~
 65 | ~~~~
 66 | ·
 67 | ×
 68 | ×××
 69 | Δ
 70 | Ψ
 71 | γ
 72 | μ
 73 | φ
 74 | φ．
 75 | В
 76 | —
 77 | ——
 78 | ———
 79 | ‘
 80 | ’
 81 | ’‘
 82 | “
 83 | ”
 84 | ”，
 85 | …
 86 | ……
 87 | …………………………………………………③
 88 | ′∈
 89 | ′｜
 90 | ℃
 91 | Ⅲ
 92 | ↑
 93 | →
 94 | ∈［
 95 | ∪φ∈
 96 | ≈
 97 | ①
 98 | ②
 99 | ②ｃ
100 | ③
101 | ③］
102 | ④
103 | ⑤
104 | ⑥
105 | ⑦
106 | ⑧
107 | ⑨
108 | ⑩
109 | ──
110 | ■
111 | ▲
112 | 　
113 | 、
114 | 。
115 | 〈
116 | 〉
117 | 《
118 | 》
119 | 》），
120 | 」
121 | 『
122 | 』
123 | 【
124 | 】
125 | 〔
126 | 〕
127 | 


--------------------------------------------------------------------------------
/WRD_DataMining.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from snownlp import SnowNLP\n",
 10 |     "s = SnowNLP(content_list[8])\n",
 11 |     "for sentence in s.sentences:\n",
 12 |     "    print sentence"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 6,
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "data": {
 22 |       "text/plain": [
 23 |        "0.5"
 24 |       ]
 25 |      },
 26 |      "execution_count": 6,
 27 |      "metadata": {},
 28 |      "output_type": "execute_result"
 29 |     }
 30 |    ],
 31 |    "source": [
 32 |     "s1 = SnowNLP(s.sentences[0])\n",
 33 |     "s1.sentiments"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 1,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "from collections import defaultdict\n",
 43 |     "import os\n",
 44 |     "import re\n",
 45 |     "import codecs\n",
 46 |     "import sys\n",
 47 |     "reload(sys)\n",
 48 |     "sys.setdefaultencoding(\"utf-8\")"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 57,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "#coding=utf-8\n",
 58 |     "import pandas as pd\n",
 59 |     "import numpy as np\n",
 60 |     "import csv\n",
 61 |     "\n",
 62 |     "df1 = pd.read_csv('Kunshan_Case/凤凰网视频.csv')\n",
 63 |     "content_list = list(df1['转发微博内容'])[8000:]"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 58,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "import math\n",
 73 |     "for i in range(len(content_list)):\n",
 74 |     "    if isinstance(content_list[i],float) and math.isnan(float(content_list[i])):\n",
 75 |     "        print i,content_list[i]\n",
 76 |     "        content_list[i] = 0"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 59,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "for i in range(140, len(content_list)):\n",
 86 |     "    content_list[i] = re.sub(r'//.*:','。',content_list[i])\n",
 87 |     "    content_list[i] = re.sub(r'//.*：','。',content_list[i])\n",
 88 |     "    #print i,content_list[i]"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 60,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "import jieba\n",
 98 |     "seg_list = jieba.cut(\"你今天有点美\", cut_all = False)\n",
 99 |     "content_str = \"/ \".join(seg_list)\n",
100 |     "print content_str"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 61,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "result_list = []\n",
110 |     "for i in range(len(content_list)):\n",
111 |     "    if isinstance(content_list[i], str):\n",
112 |     "        seg_list = jieba.cut(content_list[i].decode('utf-8'), cut_all = False)\n",
113 |     "    content_str = \"/ \".join(seg_list)\n",
114 |     "    print content_str\n",
115 |     "    temp_list = content_str.split('/')\n",
116 |     "    result_list.append(temp_list)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 62,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "#情感词\n",
126 |     "emotionDict = defaultdict()\n",
127 |     "emotionList = open('myDict/emotion_dict.txt', 'rw+')\n",
128 |     "lines = emotionList.readlines()\n",
129 |     "for item in lines:\n",
130 |     "    emotionDict[item.split('\\t')[0]] = item.split('\\t')[1][:-2]\n",
131 |     "del emotionDict['']\n",
132 |     "#否定词\n",
133 |     "notDict = defaultdict()\n",
134 |     "notList = open('myDict/not_dict.txt', 'rw+')\n",
135 |     "lines = notList.readlines()\n",
136 |     "for item in lines:\n",
137 |     "    notDict[item[:-1]] = -1\n",
138 |     "#程度副词\n",
139 |     "degreeDict = defaultdict()\n",
140 |     "degreeList = open('myDict/degree_dict.txt', 'rw+')\n",
141 |     "lines = degreeList.readlines()\n",
142 |     "for item in lines:\n",
143 |     "    degreeDict[item.split('\\t')[0]] = item.split('\\t')[1][:-2]\n",
144 |     "#结尾语气\n",
145 |     "puncDict = defaultdict()\n",
146 |     "puncDict['？'] = -1.5\n",
147 |     "puncDict['！'] = 2\n",
148 |     "puncDict['~'] = 1.2\n",
149 |     "puncDict['?'] = -1.5\n",
150 |     "puncDict['!'] = 2\n",
151 |     "#停顿\n",
152 |     "f = open('myDict/stop_dict.txt', 'rw+')\n",
153 |     "stopList = f.readlines()\n",
154 |     "for i in range(len(stopList)):\n",
155 |     "    stopList[i] = stopList[i][:-1]"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 63,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "#计算句子得分\n",
165 |     "def SentScore(sentence):\n",
166 |     "    Score = 0\n",
167 |     "    emotion = 0\n",
168 |     "    degree = 1\n",
169 |     "    notword = 0\n",
170 |     "    punc = 0\n",
171 |     "    for word in sentence:\n",
172 |     "        word = word[1:]\n",
173 |     "        if word not in stopList:\n",
174 |     "            if word in emotionDict.keys() and word not in notDict.keys() and word not in degreeDict.keys():\n",
175 |     "                emotion += float(emotionDict[word.encode('utf-8')])\n",
176 |     "                #print \"emotion:\",word,float(emotionDict[word.encode('utf-8')])\n",
177 |     "            elif word in notDict.keys() and word not in degreeDict.keys():\n",
178 |     "                notword = -1\n",
179 |     "                #print \"not:\", word\n",
180 |     "            elif word in degreeDict.keys():\n",
181 |     "                degree += float(degreeDict[word.encode('utf-8')])\n",
182 |     "                #print \"degree:\", float(degreeDict[word.encode('utf-8')])\n",
183 |     "            else:\n",
184 |     "                continue\n",
185 |     "        elif word in stopList:\n",
186 |     "            #print \"stop\"\n",
187 |     "            if word in puncDict.keys():\n",
188 |     "                punc += float(puncDict[word.encode('utf-8')])\n",
189 |     "                #print \"punc:\", word, float(puncDict[word.encode('utf-8')])\n",
190 |     "            Score += ((-1)**notword)*degree*emotion+punc\n",
191 |     "    return Score"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 64,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "SentResult = []\n",
201 |     "for i in range(len(result_list)):\n",
202 |     "    result = SentScore(result_list[i])\n",
203 |     "    SentResult.append(result)\n",
204 |     "    print i,result\n",
205 |     "print \"Finished\""
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 65,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "import xlrd \n",
215 |     "import xlwt\n",
216 |     "\n",
217 |     "f = xlwt.Workbook()\n",
218 |     "sheet1 = f.add_sheet(u'sheet1',cell_overwrite_ok=True) \n",
219 |     "for i in range(len(SentResult)):\n",
220 |     "    #SentResult[i] = MaxMinNormalization(SentResult[i], max(SentResult), min(SentResult))\n",
221 |     "    sheet1.write(i,0,SentResult[i])\n",
222 |     "print \"Finished\"\n",
223 |     "f.save('result.xls')"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 18,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "def MaxMinNormalization(x, Max, Min):\n",
233 |     "    if x>0:\n",
234 |     "        x = ((x) / Max)*10\n",
235 |     "    elif x<0:\n",
236 |     "        x = -(x) / (Min)*10\n",
237 |     "    elif x==0:\n",
238 |     "        x = 0\n",
239 |     "    return x\n",
240 |     "print SentResult[9]\n",
241 |     "print MaxMinNormalization(SentResult[9], max(SentResult), min(SentResult))"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": null,
247 |    "metadata": {},
248 |    "outputs": [],
249 |    "source": []
250 |   }
251 |  ],
252 |  "metadata": {
253 |   "kernelspec": {
254 |    "display_name": "Python 2",
255 |    "language": "python",
256 |    "name": "python2"
257 |   },
258 |   "language_info": {
259 |    "codemirror_mode": {
260 |     "name": "ipython",
261 |     "version": 2
262 |    },
263 |    "file_extension": ".py",
264 |    "mimetype": "text/x-python",
265 |    "name": "python",
266 |    "nbconvert_exporter": "python",
267 |    "pygments_lexer": "ipython2",
268 |    "version": "2.7.15"
269 |   }
270 |  },
271 |  "nbformat": 4,
272 |  "nbformat_minor": 2
273 | }
274 | 


--------------------------------------------------------------------------------