├── .gitignore ├── LICENSE ├── README.md ├── manage.py ├── network ├── __init__.py ├── models │ ├── __init__.py │ └── retweet.py ├── urls.py └── views.py ├── sentiment ├── __init__.py ├── models │ ├── SentimentJudge.py │ ├── SentimentManager.py │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── dictionary │ │ │ ├── acronym.txt │ │ │ ├── emoticonsWithPolarity.txt │ │ │ ├── intensifier.txt │ │ │ └── stopWords.txt │ │ ├── lexicon │ │ │ ├── AFINN │ │ │ │ └── AFINN-111.txt │ │ │ ├── LiuBingLexicon │ │ │ │ ├── negative-words.txt │ │ │ │ └── positive-words.txt │ │ │ ├── MPQALexicon │ │ │ │ ├── subjclueslen1-HLTEMNLP05.README │ │ │ │ └── subjclueslen1-HLTEMNLP05.tff │ │ │ ├── NRC-Canada │ │ │ │ ├── HashtagSentimentAffLexNegLex │ │ │ │ │ ├── HS-AFFLEX-NEGLEX-bigrams.txt │ │ │ │ │ ├── HS-AFFLEX-NEGLEX-unigrams.txt │ │ │ │ │ └── readme.txt │ │ │ │ ├── NRC-Emotion-Lexicon-v0.92 │ │ │ │ │ ├── NRC-Hashtag-Emotion-Lexicon-v0.2.txt │ │ │ │ │ ├── NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt │ │ │ │ │ └── README.txt │ │ │ │ ├── NRC-Hashtag-Sentiment-Lexicon-v0.1 │ │ │ │ │ ├── README │ │ │ │ │ ├── bigrams-pmilexicon.txt │ │ │ │ │ ├── pairs-pmilexicon.txt │ │ │ │ │ ├── sentimenthashtags.txt │ │ │ │ │ └── unigrams-pmilexicon.txt │ │ │ │ ├── Sentiment140-Lexicon-v0.1 │ │ │ │ │ ├── README │ │ │ │ │ ├── bigrams-pmilexicon.txt │ │ │ │ │ ├── pairs-pmilexicon.txt │ │ │ │ │ └── unigrams-pmilexicon.txt │ │ │ │ └── Sentiment140AffLexNegLex │ │ │ │ │ ├── S140-AFFLEX-NEGLEX-bigrams.txt │ │ │ │ │ ├── S140-AFFLEX-NEGLEX-unigrams.txt │ │ │ │ │ └── readme.txt │ │ │ └── PosNegWords │ │ │ │ ├── neg_mod.txt │ │ │ │ └── pos_mod.txt │ │ ├── test │ │ │ ├── 2013-test-sms.tsv │ │ │ ├── 2013-test-sms.tsv_pos │ │ │ ├── 2013-test-tweet.tsv │ │ │ ├── 2013-test-tweet.tsv_pos │ │ │ ├── 2014-test-data-all.tsv │ │ │ ├── 2014-test-journal.tsv │ │ │ ├── 2014-test-journal.tsv_pos │ │ │ ├── 2014-test-sarcasm.tsv │ │ │ ├── 2014-test-sarcasm.tsv_pos │ │ │ ├── 2014-test-tweet.tsv │ │ │ └── 2014-test-tweet.tsv_pos │ │ └── train │ │ │ ├── 2013-dev-data.tsv │ │ │ ├── 2013-dev-data.tsv_pos │ │ │ ├── 2013-train-data.tsv │ │ │ └── 2013-train-data.tsv_pos │ ├── models_save │ │ ├── classifier │ │ ├── classifier_01.npy │ │ ├── classifier_02.npy │ │ ├── classifier_03.npy │ │ ├── classifier_04.npy │ │ ├── lexicon │ │ └── ngrams │ ├── test_save_model.py │ └── tools │ │ ├── Lexicon.py │ │ ├── Lexicon2.py │ │ ├── __init__.py │ │ ├── ark-tweet-nlp-0.3.2.jar │ │ ├── pre_process.py │ │ ├── pre_process2.py │ │ └── read_data.py ├── urls.py └── views.py ├── static ├── __init__.py ├── css │ ├── bootstrap-switch.min.css │ ├── bootstrap-theme.css │ ├── bootstrap-theme.css.map │ ├── bootstrap-theme.min.css │ ├── bootstrap-theme.min.css.map │ ├── bootstrap.css │ ├── bootstrap.css.map │ ├── bootstrap.min.css │ ├── bootstrap.min.css.map │ ├── font-awesome.min.css │ └── style.css ├── fonts │ ├── FontAwesome.otf │ ├── fontawesome-webfont.eot │ ├── fontawesome-webfont.svg │ ├── fontawesome-webfont.ttf │ ├── fontawesome-webfont.woff │ ├── fontawesome-webfont.woff2 │ ├── glyphicons-halflings-regular.eot │ ├── glyphicons-halflings-regular.svg │ ├── glyphicons-halflings-regular.ttf │ ├── glyphicons-halflings-regular.woff │ └── glyphicons-halflings-regular.woff2 ├── image │ ├── others.jpg │ ├── sentiment_1.gif │ ├── sentiment_2.jpg │ ├── sentiment_3.jpg │ ├── topic_1.jpg │ ├── topic_3.jpg │ ├── topic_4.jpg │ └── topic_8.jpg ├── js │ ├── append_new_iframe.js │ ├── bootstrap-switch.min.js │ ├── bootstrap.js │ ├── bootstrap.min.js │ ├── d3.v3.min.js │ ├── echarts.min.js │ ├── googleMap.js │ ├── index.js │ ├── jquery.min.js │ ├── loading-control.js │ ├── npm.js │ ├── sentiment.js │ ├── spin.min.js │ └── topic.js └── json │ └── world.json ├── statistic ├── __init__.py ├── models │ ├── __init__.py │ ├── pie.py │ └── timeline.py ├── urls.py └── views.py ├── templates ├── basic.html ├── footer.html ├── header.html ├── index.html ├── index2.html ├── nav.html ├── network │ ├── network.js │ └── retweet.html ├── sentiment │ ├── header.html │ └── index.html ├── statistic │ ├── hashtag_compare.html │ ├── hashtag_timeline.html │ └── pie.html └── topic │ ├── header.html │ ├── index.html │ ├── parameters_panel.html │ ├── toolbar.html │ └── visualization │ ├── result_basic.html │ ├── result_bubble.html │ ├── result_funnel.html │ ├── result_hashtags_histogram.html │ ├── result_hashtags_pie.html │ ├── result_hashtags_timeline.html │ ├── result_heatmap.html │ ├── result_sunburst.html │ ├── result_text.html │ └── result_treemap.html ├── topic ├── __init__.py ├── models │ ├── Corpus.py │ ├── Lda_text_format.py │ ├── OnlineLDA.py │ ├── TopicParameterManager.py │ ├── TopicTrendsManager.py │ ├── __init__.py │ ├── demo.py │ └── stopwords.txt ├── urls.py └── views.py └── twitterDataMining ├── __init__.py ├── model_p ├── Singleton.py ├── __init__.py ├── analyse │ ├── __init__.py │ ├── hashtag_co_occur.py │ ├── hashtag_trend.py │ └── mongodb_test.py └── twitterApi │ ├── Basic.py │ ├── LocalStream.py │ ├── Rest.py │ ├── Stream.py │ ├── __init__.py │ └── error_log.txt ├── models.py ├── settings.py ├── templatetags ├── __init__.py └── mytag.py ├── urls.py ├── views.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | *~ 4 | *.swp -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # twitterDataMining 2 | - 实时热点话题检测 3 | - 情感分析 4 | - 结果可视化 5 | 6 | ## 一些说明 7 | - 使用Python 2.7 8 | - Topic-analysis : WOLDA 9 | - Sentiment-analysis : Maximum Entropy 10 | - Visualization : D3 | Echarts 11 | - WEB : Django | MongoDB | Bootstrap 12 | - 更多详情见:https://www.hrwhisper.me/twitter-data-mining-and-visualization/ 13 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "twitterDataMining.settings") 7 | 8 | from django.core.management import execute_from_command_line 9 | 10 | execute_from_command_line(sys.argv) 11 | -------------------------------------------------------------------------------- /network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/network/__init__.py -------------------------------------------------------------------------------- /network/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/2/5. 4 | 5 | -------------------------------------------------------------------------------- /network/models/retweet.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/2/5. 4 | 5 | from twitterDataMining.models import MongoDb, TimeCost 6 | 7 | 8 | def get_retweet_data_by_time(time): 9 | db = MongoDb().getDB() 10 | field = { 11 | 'id': 1, 12 | 'text': 1, 13 | 'user_id': 1, 14 | 'retweet_id': 1, 15 | 'retweet_count': 1, 16 | 'user_mentions': 1, 17 | } 18 | cursor = db.stream.aggregate([ 19 | {'$match': {'hashtags': 'Christmas'}}, 20 | {'$sort': {'retweet_count': -1}}, 21 | {'$limit': 10}, 22 | # {'$project': field} 23 | ], ) 24 | tweets = [tweet for tweet in cursor] 25 | cursor = db.stream.aggregate([ 26 | {'$match': {'retweet_id': {'$in': [tweet['id'] for tweet in tweets]}}}, 27 | # {'$project': field} 28 | ], ) 29 | tweets += [tweet for tweet in cursor] 30 | return tweets 31 | 32 | 33 | def get_retweet_network_nodes_and_links(date='2015-11-22'): 34 | tweets = get_retweet_data_by_time(date) 35 | # user = set([tweet['user_id'] for tweet in tweets] + [tweet['user_memtions'][0]['id_str'] for tweet in tweets if 36 | # 'user_memtions' in tweet]) 37 | # user_num = {_id: i for i, _id in enumerate(user)} 38 | # nodes = [{"id": node} for node in user] 39 | # links = [{ 40 | # 'source': user_num[tweet['user_id']], 41 | # 'target': 42 | # user_num[tweet['user_memtions']['id_str']] 43 | # if 'user_memtions' in tweet and tweet['user_memtions']['id_str'] in user_tweet_id_num else 44 | # id_num[tweet.get('retweet_id', 0)] 45 | # } 46 | # for tweet in tweets] 47 | # print tweets 48 | tweets_id = set( 49 | [tweet['id'] for tweet in tweets] + [tweet.get('retweet_id', 0) for tweet in tweets]) 50 | id_num = {_id: i for i, _id in enumerate(tweets_id)} 51 | user_tweet_id_num = {tweet['user_id']: id_num[tweet['id']] for tweet in tweets} 52 | nodes = [{"id": node} for node in tweets_id] 53 | links = [{ 54 | 'source': id_num[tweet['id']], 55 | 'target': 56 | user_tweet_id_num[tweet['user_mentions'][0]['id_str']] 57 | if 'user_mentions' in tweet and tweet['user_mentions'][0]['id_str'] in user_tweet_id_num else 58 | id_num[tweet.get('retweet_id', 0)] 59 | } 60 | for tweet in tweets] 61 | 62 | return {'nodes': nodes, 'links': links} 63 | 64 | 65 | if __name__ == '__main__': 66 | get_retweet_network_nodes_and_links("") 67 | -------------------------------------------------------------------------------- /network/urls.py: -------------------------------------------------------------------------------- 1 | """twitterDataMining URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.9/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url 17 | import network.views 18 | 19 | urlpatterns = [ 20 | url(r'retweet$', network.views.retweet), 21 | url(r'retweet_data$', network.views.retweet_data), 22 | ] 23 | -------------------------------------------------------------------------------- /network/views.py: -------------------------------------------------------------------------------- 1 | # Create your views here. 2 | import json 3 | from django.http import HttpResponse 4 | from django.shortcuts import render 5 | from network.models.retweet import get_retweet_network_nodes_and_links 6 | 7 | 8 | def retweet(request): 9 | res = {'date': request.GET.get('date')} 10 | return render(request, 'network/retweet.html', res) 11 | 12 | 13 | def retweet_data(request): 14 | res = get_retweet_network_nodes_and_links("") 15 | return HttpResponse(json.dumps(res), content_type="application/json") 16 | -------------------------------------------------------------------------------- /sentiment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/__init__.py -------------------------------------------------------------------------------- /sentiment/models/SentimentJudge.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/5/23. 4 | import codecs 5 | 6 | from scipy.sparse import csr_matrix 7 | from sklearn import metrics 8 | from sklearn.externals import joblib 9 | from sentiment.models.tools.pre_process import pre_process, pos_process 10 | from sentiment.models.tools.read_data import read_all_test_data 11 | from twitterDataMining.model_p.Singleton import Singleton 12 | 13 | 14 | class SentimentJudge(object): 15 | """ 16 | Simple example: 17 | s = SentimentJudge() 18 | test_data = s.transform(_test_data) 19 | predicted = s.predict(test_data) 20 | print np.sum(predicted == _test_target), len(_test_target), np.mean(predicted == _test_target) 21 | """ 22 | __metaclass__ = Singleton 23 | 24 | def __init__(self): 25 | self.classifier = joblib.load('sentiment/models/models_save/classifier') 26 | self.ngram = joblib.load('sentiment/models/models_save/ngrams') 27 | self.lexicon = joblib.load('sentiment/models/models_save/lexicon') 28 | 29 | def predict(self, X): 30 | """ 31 | Predict X is positive or negative 32 | :param X: 33 | :return: a numpy.ndarray. each row with "positive" or "negative" 34 | """ 35 | return self.classifier.predict(X) 36 | 37 | def transform(self, data, pos_tags=None): 38 | if pos_tags is None: 39 | data, pos_tags = pos_process(data) 40 | print len(data) 41 | return pre_process(data, pos_tags, self.lexicon, self.ngram) 42 | 43 | 44 | def main(): 45 | clf = SentimentJudge() 46 | tweets, target = [], [] 47 | with codecs.open('./data/test/2014-test-journal.tsv', "r", "utf-8") as f: 48 | for line in f.readlines(): 49 | line = line.strip().split("\t") 50 | target.append(line[1]) 51 | tweets.append(line[2]) 52 | 53 | test_feature = clf.transform(tweets) 54 | predicted = clf.predict(test_feature) 55 | print "Classification report for %s:\n%s\n" % (clf, 56 | metrics.classification_report(target, predicted, digits=3)) 57 | print("Confusion matrix:\n%s" % metrics.confusion_matrix(target, predicted)) 58 | 59 | 60 | # for name, test_data, test_target, test_pos in read_all_test_data(): 61 | # print '\n\n\n\n\n--------Now is {} --------\n\n'.format(name) 62 | # test_feature = clf.transform(test_data, test_pos) 63 | # predicted = clf.predict(test_feature) 64 | # print "Classification report for %s:\n%s\n" % (clf, 65 | # metrics.classification_report(test_target, predicted, digits=3)) 66 | # print("Confusion matrix:\n%s" % metrics.confusion_matrix(test_target, predicted)) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /sentiment/models/SentimentManager.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/24. 4 | 5 | import numpy as np 6 | from sentiment.models.SentimentJudge import SentimentJudge 7 | from twitterDataMining.model_p.twitterApi.Rest import TwitterRest 8 | 9 | 10 | def get_result_info(predicted, target, tweets, total_tweet, return_sample_tweets_nums): 11 | """ 12 | :param predicted: 13 | :param target: 14 | :param tweets: np.array [str,str] 15 | :param total_tweet: 16 | :param return_sample_tweets_nums: 17 | :return: 18 | """ 19 | c = predicted == target 20 | count = np.count_nonzero(c) 21 | percent = count * 1.0 / total_tweet 22 | c = tweets[c].tolist() 23 | text = sorted(c, cmp=lambda x, y: len(y) - len(x))[:return_sample_tweets_nums] 24 | return percent, text 25 | 26 | 27 | def query_sentiment_for_online_data(query_str, max_tweets=200, return_sample_tweets_nums=10): 28 | twitter_rest = TwitterRest() 29 | tweets = twitter_rest.search_tweets(q=query_str.encode('utf-8'), max_results=max_tweets) 30 | tweets = list(set(map(lambda x: x['text'], list(filter(lambda x: 'text' in x, tweets))))) 31 | print 'test_data len: {} by query string: {}'.format(len(tweets), query_str) 32 | 33 | s = SentimentJudge() 34 | test_data = s.transform(tweets) 35 | predicted = s.predict(test_data) 36 | 37 | _class = ['positive', 'negative', 'neutral'] 38 | total_tweets = test_data.shape[0] 39 | tweets = np.array(tweets)[:total_tweets] 40 | res = {} 41 | for target in _class: 42 | percent, text = get_result_info(predicted, target, tweets, total_tweets, return_sample_tweets_nums) 43 | res[target] = { 44 | 'percent': percent, 45 | 'text': text 46 | } 47 | return res 48 | # return total_positive, total_tweets, positive_percentage, positive_text, negative_text 49 | 50 | 51 | if __name__ == '__main__': 52 | while True: 53 | query_str = raw_input('please input the content your want to query:\n') 54 | print 'wait...' 55 | query_sentiment_for_online_data(query_str) 56 | -------------------------------------------------------------------------------- /sentiment/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/5/23. 4 | -------------------------------------------------------------------------------- /sentiment/models/data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/5/2. 4 | -------------------------------------------------------------------------------- /sentiment/models/data/dictionary/emoticonsWithPolarity.txt: -------------------------------------------------------------------------------- 1 | :-) :) :o) :] :3 :c) :> =] 8) =) :} :^) Positive 2 | :D C: (cl Extremely-Positive 3 | :-D :D 8D xD XD =D =3 <=3 <=8 Extremely-Positive 4 | 8===D 8===B Negative 5 | --!-- Negative 6 | :-( :( :c :< :[ :{ Negative 7 | D: D8 D; D= DX v.v Dx Extremely-Negative 8 | :-9 Negative 9 | ;-) ;) *) ;] ;D Positive 10 | :-P :P XP :-p :p =p xP :-b :b Positive 11 | :-O :O O_O o_o OwO O-O 0_o O_o O3O o0o ;o_o; o...o 0w0 O.o Positive 12 | c.c C.C :c Negative 13 | :-/ :/ :\ =/ =\ :S Negative 14 | :| Neutral 15 | d:-) qB-) Positive 16 | :)~ :-)>.... Neutral 17 | :-X :X :-# :# :-x :x Positive 18 | O:-) 0:3 O:) Negative 19 | :'( ;*( T_T TT_TT T.T Q.Q Q_Q ;_; Negative 20 | :-* :* Positive 21 | ^o) Negative 22 | >:) >;) >:-) >:] Neutral 23 | B) B-) 8) 8-) Neutral 24 | ^>.>^ ^<.<^ ^>_>^ ^<_<^ Negative 25 | D:< >:( >:- D-:> >:-( :-@[1] ;( `_' D< Negative 26 | ;3 <3 <33 <333 Positive 27 | .>= =<_<= =>.<= >_< Positive 29 | \,,/ \m/ Extremely-Positive 30 | \m/\>..<) >.< (>.<) >.< Negative 44 | (>_>) >_> (<_<) <_< (>.>) >.> Negative 45 | (-_-) -_- -__- -___- (-.-) -.- -.- .___. Negative 46 | (^o^) ^o^ Positive 47 | (^3^) ^3^ Positive 48 | (^_^') ^_^_^') ^^" ^^^_.^') ^^_^^; ^&^^.^;& ^^^; ^^^7 Negative 49 | d(>w<)b Extremely-Positive 50 | q(;^;)p Extremely-Negative 51 | 9(x.x)9 (;.;)9 Negative 52 | (._.) (,.,) Negative 53 | [(-_-)] ZZzzz... Neutral 54 | (X_X) x_x Negative 55 | ^///^ >///< >///> o///o -///- =///= Positive 56 | _|_ (-.-) _|_ t(>. 53 | 54 | can be a unigram or a bigram; 55 | is a real-valued sentiment score: score = PMI(w, pos) - PMI(w, neg), where PMI stands for Point-wise Mutual Information between a term w and the positive/negative class; 56 | is the number of times the term appears in the positive class, ie. in tweets with positive hashtag or emoticon; 57 | is the number of times the term appears in the negative class, ie. in tweets with negative hashtag or emoticon. 58 | 59 | 60 | ********************************************** 61 | AffLex and NegLex 62 | ********************************************** 63 | 64 | Both parts, AffLex and NegLex, of each lexicon are contained in the same file. The NegLex entries have suffixes '_NEG' or '_NEGFIRST'. 65 | 66 | In the unigram lexicon: 67 | '_NEGFIRST' is attached to terms that directly follow a negator; 68 | '_NEG' is attached to all other terms in negated contexts (not directly following a negator). 69 | 70 | In the bigram lexicon: 71 | '_NEG' is attached to all terms in negated contexts. 72 | 73 | Both suffixes are attached only to nouns, verbs, adjectives, and adverbs. All other parts of speech do not get these suffixes attached. 74 | 75 | 76 | ********************************************** 77 | More Information 78 | ********************************************** 79 | Details on the process of creating the lexicons can be found in: 80 | Kiritchenko, S., Zhu, X., Mohammad, S. (2014). Sentiment Analysis of Short Informal Texts. Journal of Artificial Intelligence Research, 50:723-762, 2014. 81 | 82 | 83 | -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/NRC-Emotion-Lexicon-v0.92/README.txt: -------------------------------------------------------------------------------- 1 | 2 | NRC Word-Emotion Association Lexicon 3 | (NRC Emotion Lexicon) 4 | Version 0.92 5 | 10 July 2011 6 | Copyright (C) 2011 National Research Council Canada (NRC) 7 | Contact: Saif Mohammad (saif.mohammad@nrc-cnrc.gc.ca) 8 | 9 | 1. This copy of the NRC Emotion Lexicon is to be used for research 10 | purposes only. Please contact NRC if interested in a commercial 11 | license. 12 | 13 | 2. If you use this lexicon in your research, then please cite 14 | at least one of the papers listed below in the PUBLICATIONS section 15 | (preferably the journal paper in Computational Intelligence). 16 | 17 | ....................................................................... 18 | 19 | NRC EMOTION LEXICON 20 | ------------------- 21 | The NRC emotion lexicon is a list of words and their associations with 22 | eight emotions (anger, fear, anticipation, trust, surprise, sadness, 23 | joy, and disgust) and two sentiments (negative and positive). The 24 | annotations were manually done through Amazon's Mechanical Turk. Refer 25 | to publications below for more details. 26 | 27 | ....................................................................... 28 | 29 | PUBLICATIONS 30 | ------------ 31 | Details of the lexicon can be found in the following peer-reviewed 32 | publications: 33 | 34 | -- Crowdsourcing a Word-Emotion Association Lexicon, Saif Mohammad and 35 | Peter Turney, Computational Intelligence, 39(3), 555-590, 2013. 36 | 37 | -- Tracking Sentiment in Mail: How Genders Differ on Emotional Axes, 38 | Saif Mohammad and Tony Yang, In Proceedings of the ACL 2011 Workshop 39 | on ACL 2011 Workshop on Computational Approaches to Subjectivity and 40 | Sentiment Analysis (WASSA), June 2011, Portland, OR. Paper (pdf) 41 | 42 | -- From Once Upon a Time to Happily Ever After: Tracking Emotions in 43 | Novels and Fairy Tales, Saif Mohammad, In Proceedings of the ACL 2011 44 | Workshop on Language Technology for Cultural Heritage, Social 45 | Sciences, and Humanities (LaTeCH), June 2011, Portland, OR. Paper 46 | 47 | -- Emotions Evoked by Common Words and Phrases: Using Mechanical Turk 48 | to Create an Emotion Lexicon", Saif Mohammad and Peter Turney, In 49 | Proceedings of the NAACL-HLT 2010 Workshop on Computational Approaches 50 | to Analysis and Generation of Emotion in Text, June 2010, LA, 51 | California. 52 | 53 | Links to the papers are available here: 54 | http://www.purl.org/net/NRCemotionlexicon 55 | ....................................................................... 56 | 57 | VERSION INFORMATION 58 | ------------------- 59 | Version 0.92 is the latest version as of 10 July 2011. This version 60 | has annotations for more than twice as many terms as in Version 0.5 61 | which was released earlier. 62 | 63 | ....................................................................... 64 | 65 | FORMAT 66 | ------ 67 | Each line has the following format: 68 | TargetWordAffectCategoryAssociationFlag 69 | 70 | TargetWord is a word for which emotion associations are provided. 71 | 72 | AffectCategory is one of eight emotions (anger, fear, anticipation, 73 | trust, surprise, sadness, joy, or disgust) or one of two polarities 74 | (negative or positive). 75 | 76 | AssociationFlag has one of two possible values: 0 or 1. 0 indicates 77 | that the target word has no association with affect category, 78 | whereas 1 indicates an association. 79 | 80 | ....................................................................... 81 | 82 | OTHER FORMS OF THE LEXICON 83 | -------------------------- 84 | 85 | The original lexicon has annotations at word-sense level. Each 86 | word-sense pair is annotated by at least three annotators (most are 87 | annotated by at least five). The word-level lexicon was created by 88 | taking the union of emotions associated with all the senses of a word. 89 | Please contact NRC if interested in the sense-level lexicon or if 90 | interested in more detailed information such as the individual 91 | annotations by each of the annotators. 92 | 93 | ....................................................................... 94 | 95 | CONTACT INFORMATION 96 | ------------------- 97 | Saif Mohammad 98 | Research Officer, National Research Council Canada 99 | email: saif.mohammad@nrc-cnrc.gc.ca 100 | phone: +1-613-993-0620 101 | 102 | ....................................................................... 103 | -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/README: -------------------------------------------------------------------------------- 1 | NRC Hashtag Sentiment Lexicon 2 | Version 0.1 3 | 9 April 2013 4 | Copyright (C) 2011 National Research Council Canada (NRC) 5 | Contact: Saif Mohammad (uvgotsaif@gmail.com) 6 | 7 | 1. This copy of the NRC Hashtag Sentiment Lexicon is to be used for research 8 | purposes only. Please contact NRC if interested in a commercial license. 9 | 10 | 2. If you use this lexicon in your research, then please cite 11 | the paper listed below in the PUBLICATIONS section. 12 | 13 | ....................................................................... 14 | 15 | NRC HASHTAG SENTIMENT LEXICON 16 | ----------------------------- 17 | The NRC Hashtag Sentiment Lexicon is a list of words and their associations with 18 | positive and negative sentiment. The lexicon is distributed in three files: 19 | unigrams-pmilexicon.txt, bigrams-pmilexicon.txt, and pairs-pmilexicon.txt. 20 | 21 | Each line in the three files has the format: 22 | 23 | termsentimentScorenumPositivenumNegative 24 | where: 25 | term 26 | In unigrams-pmilexicon.txt, term is a unigram (single word). 27 | In bigrams-pmilexicon.txt, term is a bigram (two-word sequence). 28 | A bigram has the form: "string string". The bigram was seen at least once in 29 | the source tweets from which the lexicon was created. 30 | In pairs-pmilexicon.txt, term is a unigram--unigram pair, 31 | unigram--bigram pair, bigram--unigram pair, or a bigram--bigram pair. 32 | The pairs were generated from a large set of source tweets. Tweets were examined 33 | one at a time, and all possible unigram and bigram combinations within the tweet 34 | were chosen. Pairs with certain punctuations, @ symbols, and some function words 35 | were removed. 36 | 37 | sentimentScore is a real number. A positive score indicates positive 38 | sentiment. A negative score indicates negative sentiment. The absolute 39 | value is the degree of association with the sentiment. 40 | The sentiment score was calculated by subtracting the pointwise mutual 41 | information (PMI) score of the term with positive hashtags and the 42 | PMI of the term with negative hashtags. 43 | 44 | Terms with a non-zero PMI score with positive hashtags and PMI score of 0 45 | with negative hashtags were assigned a sentimentScore of 5. 46 | Terms with a non-zero PMI score with negative hashtags and PMI score of 0 47 | with positive hashtags were assigned a sentimentScore of -5. 48 | 49 | numPositive is the number of times the term co-occurred with a positive 50 | marker such as a positive emoticon or a positive hashtag. 51 | 52 | numNegative is the number of times the term co-occurred with a negative 53 | marker such as a negative emoticon or a negative hashtag. 54 | 55 | The hashtag lexicon was created from a collection of tweets that had a 56 | positive or a negative word hashtag such as #good, #excellent, #bad, 57 | and #terrible. Version 0.1 was created from 775,310 tweets posted 58 | between April and December 2012 using a list of 78 positive and 59 | negative word hashtags. A list of these hashtags is shown in sentimenthashtags.txt. 60 | 61 | The number of entries in: 62 | unigrams-pmilexicon.txt: 54,129 terms 63 | bigrams-pmilexicon.txt: 316,531 terms 64 | pairs-pmilexicon.txt: 308,808 terms 65 | 66 | Refer to publication below for more details. 67 | 68 | ....................................................................... 69 | 70 | PUBLICATION 71 | ----------- 72 | Details of the lexicon can be found in the following peer-reviewed 73 | publication: 74 | 75 | -- In Proceedings of the seventh international workshop on Semantic 76 | Evaluation Exercises (SemEval-2013), June 2013, Atlanta, Georgia, USA. 77 | 78 | BibTeX entry: 79 | @InProceedings{MohammadKZ2013, 80 | author = {Mohammad, Saif and Kiritchenko, Svetlana and Zhu, Xiaodan}, 81 | title = {NRC-Canada: Building the State-of-the-Art in Sentiment Analysis of Tweets}, 82 | booktitle = {Proceedings of the seventh international workshop on Semantic Evaluation Exercises (SemEval-2013)}, 83 | month = {June}, 84 | year = {2013}, 85 | address = {Atlanta, Georgia, USA} 86 | } 87 | ....................................................................... 88 | 89 | -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/bigrams-pmilexicon.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/bigrams-pmilexicon.txt -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/pairs-pmilexicon.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/pairs-pmilexicon.txt -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/sentimenthashtags.txt: -------------------------------------------------------------------------------- 1 | positive positive 2 | good positive 3 | great positive 4 | excellent positive 5 | excellence positive 6 | fine positive 7 | nice positive 8 | desirable positive 9 | exquisite positive 10 | fabulous positive 11 | ideal positive 12 | marvelous positive 13 | perfect positive 14 | perfection positive 15 | splendid positive 16 | wonderful positive 17 | classy positive 18 | elegance positive 19 | elegant positive 20 | beauty positive 21 | beautiful positive 22 | dazzling positive 23 | amazing positive 24 | magnificent positive 25 | sensational positive 26 | super positive 27 | superb positive 28 | terrific negative 29 | exquisite negative 30 | exceptional negative 31 | heavenly negative 32 | negative negative 33 | bad negative 34 | egregious negative 35 | lousy negative 36 | shameful negative 37 | sinful negative 38 | woeful negative 39 | wretched negative 40 | abominable negative 41 | deplorable negative 42 | despicable negative 43 | detest negative 44 | detestable negative 45 | dreadful negative 46 | infernal negative 47 | terrible negative 48 | vile negative 49 | dire negative 50 | sinister negative 51 | undesirable negative 52 | squalid negative 53 | seamy negative 54 | shoddy negative 55 | sleazy negative 56 | worthless negative 57 | paltry negative 58 | blemish negative 59 | botch negative 60 | bungle negative 61 | grievous negative 62 | hopeless negative 63 | ill negative 64 | pathetic negative 65 | poor negative 66 | sad negative 67 | sorry negative 68 | crummy negative 69 | inferior negative 70 | tacky negative 71 | unacceptable negative 72 | unsatisfactory negative 73 | unworthy negative 74 | awful negative 75 | abysmal negative 76 | rotten negative 77 | filthy negative 78 | foul negative 79 | -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/README: -------------------------------------------------------------------------------- 1 | Sentiment140 Lexicon 2 | Version 0.1 3 | 9 April 2013 4 | Copyright (C) 2011 National Research Council Canada (NRC) 5 | Contact: Saif Mohammad (uvgotsaif@gmail.com) 6 | 7 | 1. This copy of the Sentiment140 Lexicon is to be used for research 8 | purposes only. Please contact NRC if interested in a commercial license. 9 | 10 | 2. If you use this lexicon in your research, then please cite 11 | the paper listed below in the PUBLICATIONS section. 12 | 13 | ....................................................................... 14 | 15 | SENTIMENT140 LEXICON 16 | -------------------- 17 | The Sentiment140 Lexicon is a list of words and their associations with 18 | positive and negative sentiment. The lexicon is distributed in three files: 19 | unigrams-pmilexicon.txt, bigrams-pmilexicon.txt, and pairs-pmilexicon.txt. 20 | 21 | Each line in the three files has the format: 22 | 23 | termsentimentScorenumPositivenumNegative 24 | where: 25 | term 26 | In unigrams-pmilexicon.txt, term is a unigram (single word). 27 | In bigrams-pmilexicon.txt, term is a bigram (two-word sequence). 28 | A bigram has the form: "string string". The bigram was seen at least once in 29 | the source tweets from which the lexicon was created. 30 | In pairs-pmilexicon.txt, term is a unigram--unigram pair, 31 | unigram--bigram pair, bigram--unigram pair, or a bigram--bigram pair. 32 | The pairs were generated from a large set of source tweets. Tweets were 33 | examined one at a time, and all possible unigram and bigram combinations 34 | within the tweet were chosen. Pairs with certain punctuations, @ symbols, 35 | and some function words were removed. 36 | 37 | 38 | sentimentScore is a real number. A positive score indicates positive 39 | sentiment. A negative score indicates negative sentiment. The absolute 40 | value is the degree of association with the sentiment. 41 | The sentiment score was calculated by subtracting the pointwise mutual 42 | information (PMI) score of the term with positive emoticons and the 43 | PMI of the term with negative emoticons. 44 | 45 | Terms with a non-zero PMI score with positive emoticons and PMI score of 0 46 | with negative emoticons were assigned a sentimentScore of 5. 47 | Terms with a non-zero PMI score with negative emoticons and PMI score of 0 48 | with positive emoticons were assigned a sentimentScore of -5. 49 | 50 | numPositive is the number of times the term co-occurred with a positive 51 | marker such as a positive emoticon or a positive emoticons. 52 | 53 | numNegative is the number of times the term co-occurred with a negative 54 | marker such as a negative emoticon or a negative emoticons. 55 | 56 | The Sentiment140 Lexicon was created from the Sentiment140 emoticon corpus of 1.6 million tweets. 57 | http://help.sentiment140.com/for-students 58 | 59 | The number of entries in: 60 | unigrams-pmilexicon.txt: 62,468 terms 61 | bigrams-pmilexicon.txt: 677,698 terms 62 | pairs-pmilexicon.txt: 480,010 terms 63 | 64 | Refer to publication below for more details. 65 | 66 | ....................................................................... 67 | 68 | PUBLICATION 69 | ----------- 70 | Details of the lexicon can be found in the following peer-reviewed 71 | publication: 72 | 73 | -- In Proceedings of the seventh international workshop on Semantic 74 | Evaluation Exercises (SemEval-2013), June 2013, Atlanta, Georgia, USA. 75 | 76 | BibTeX entry: 77 | @InProceedings{MohammadKZ2013, 78 | author = {Mohammad, Saif and Kiritchenko, Svetlana and Zhu, Xiaodan}, 79 | title = {NRC-Canada: Building the State-of-the-Art in Sentiment Analysis of Tweets}, 80 | booktitle = {Proceedings of the seventh international workshop on Semantic Evaluation Exercises (SemEval-2013)}, 81 | month = {June}, 82 | year = {2013}, 83 | address = {Atlanta, Georgia, USA} 84 | } 85 | ....................................................................... 86 | 87 | -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/bigrams-pmilexicon.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/bigrams-pmilexicon.txt -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/pairs-pmilexicon.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/pairs-pmilexicon.txt -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/Sentiment140AffLexNegLex/S140-AFFLEX-NEGLEX-bigrams.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/Sentiment140AffLexNegLex/S140-AFFLEX-NEGLEX-bigrams.txt -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/NRC-Canada/Sentiment140AffLexNegLex/readme.txt: -------------------------------------------------------------------------------- 1 | Sentiment140 Affirmative Context Lexicon and Sentiment140 Negated Context Lexicon 2 | Version 1.0 3 | 26 September 2014 4 | Copyright (C) 2014 National Research Council Canada (NRC) 5 | Contact: Saif Mohammad (saif.mohammad@nrc-cnrc.gc.ca) 6 | 7 | ********************************************** 8 | TERMS OF USE 9 | ********************************************** 10 | 11 | 1. This lexicon can be used freely for research purposes. 12 | 2. The papers listed below provide details of the creation and use of 13 | the lexicon. If you use a lexicon, then please cite the associated 14 | papers: 15 | Kiritchenko, S., Zhu, X., Mohammad, S. (2014). Sentiment Analysis of 16 | Short Informal Texts. Journal of Artificial Intelligence Research, 17 | 50:723-762, 2014. 18 | 3. If interested in commercial use of the lexicon, send email to the 19 | contact. 20 | 4. If you use the lexicon in a product or application, then please 21 | credit the authors and NRC appropriately. Also, if you send us an 22 | email, we will be thrilled to know about how you have used the 23 | lexicon. 24 | 5. National Research Council Canada (NRC) disclaims any responsibility 25 | for the use of the lexicon and does not provide technical support. 26 | However, the contact listed above will be happy to respond to 27 | queries and clarifications. 28 | 6. Rather than redistributing the data, please direct interested 29 | parties to this page: 30 | http://www.purl.com/net/lexicons 31 | 32 | Please feel free to send us an email: 33 | - with feedback regarding the lexicon. 34 | - with information on how you have used the lexicon. 35 | - if interested in having us analyze your data for sentiment, emotion, 36 | and other affectual information. 37 | - if interested in a collaborative research project. 38 | 39 | ********************************************** 40 | DATA SOURCE 41 | ********************************************** 42 | 43 | The NRC Sentiment140 Lexicons are automatically generated from the following data source: 44 | 1.6 million tweets with emoticons collected by Go and colleagues (see Go, A., Bhayani, R., & Huang, L. Twitter sentiment classication using distant supervision. Tech. rep., Stanford University, 2009.) 45 | 46 | 47 | ********************************************** 48 | FILE FORMAT 49 | ********************************************** 50 | 51 | Each line in the lexicons has the following format: 52 | 53 | 54 | can be a unigram or a bigram; 55 | is a real-valued sentiment score: score = PMI(w, pos) - PMI(w, neg), where PMI stands for Point-wise Mutual Information between a term w and the positive/negative class; 56 | is the number of times the term appears in the positive class, ie. in tweets with positive hashtag or emoticon; 57 | is the number of times the term appears in the negative class, ie. in tweets with negative hashtag or emoticon. 58 | 59 | 60 | ********************************************** 61 | AffLex and NegLex 62 | ********************************************** 63 | 64 | Both parts, AffLex and NegLex, of each lexicon are contained in the same file. The NegLex entries have suffixes '_NEG' or '_NEGFIRST'. 65 | 66 | In the unigram lexicon: 67 | '_NEGFIRST' is attached to terms that directly follow a negator; 68 | '_NEG' is attached to all other terms in negated contexts (not directly following a negator). 69 | 70 | In the bigram lexicon: 71 | '_NEG' is attached to all terms in negated contexts. 72 | 73 | Both suffixes are attached only to nouns, verbs, adjectives, and adverbs. All other parts of speech do not get these suffixes attached. 74 | 75 | 76 | ********************************************** 77 | More Information 78 | ********************************************** 79 | Details on the process of creating the lexicons can be found in: 80 | Kiritchenko, S., Zhu, X., Mohammad, S. (2014). Sentiment Analysis of Short Informal Texts. Journal of Artificial Intelligence Research, 50:723-762, 2014. 81 | 82 | 83 | -------------------------------------------------------------------------------- /sentiment/models/data/lexicon/PosNegWords/pos_mod.txt: -------------------------------------------------------------------------------- 1 | :) 2 | :-) 3 | :] 4 | :d 5 | :p 6 | ;) 7 | ;-) 8 | ;] 9 | ;p 10 | absolutely love 11 | abundant 12 | accomplish 13 | accomplished 14 | achieve 15 | achieving 16 | active 17 | admirable 18 | admire 19 | admired 20 | adorable 21 | adore 22 | adventure 23 | adventurous 24 | affluent 25 | agile 26 | agree 27 | agreeable 28 | alert 29 | align 30 | aligned 31 | alive 32 | amaze 33 | amazing 34 | amuse 35 | amused 36 | appeal 37 | appealing 38 | appreciate 39 | appreciated 40 | artistic 41 | astound 42 | astounding 43 | astute 44 | attentive 45 | attract 46 | attractive 47 | auspicious 48 | authentic 49 | awake 50 | aware 51 | awesome 52 | beaming 53 | beautiful 54 | best 55 | best looking 56 | bless 57 | blessed 58 | bliss 59 | bold 60 | brave 61 | bright 62 | brilliant 63 | brisk 64 | buoyant 65 | calm 66 | can help 67 | capable 68 | centered 69 | certain 70 | charm 71 | charming 72 | cheerful 73 | clear 74 | clearly superior 75 | clever 76 | cleverful 77 | come on 78 | comeback 79 | comfort 80 | comfortable 81 | competent 82 | complete 83 | confident 84 | congrats 85 | congratulation 86 | connected 87 | conscious 88 | considerate 89 | convenient 90 | cool 91 | cooperate 92 | cooperative 93 | courage 94 | courageous 95 | creative 96 | cute 97 | damn good 98 | daring 99 | dazzle 100 | dazzling 101 | delicious 102 | delight 103 | delightful 104 | desirable 105 | determined 106 | diligent 107 | discerning 108 | discover 109 | dynamic 110 | eager 111 | easy 112 | efficiency 113 | efficient 114 | effortless 115 | elated 116 | elegant 117 | eloquent 118 | enchant 119 | enchanting 120 | encourage 121 | encouraging 122 | endless 123 | endorse 124 | energetic 125 | engage 126 | engaging 127 | enhance 128 | enhancing 129 | enormous 130 | enterprising 131 | enthusiasm 132 | enthusiastic 133 | entice 134 | enticing 135 | epic flail 136 | excellent 137 | exceptional 138 | excite 139 | excited 140 | exciting 141 | experienced 142 | exquisite 143 | exuberant 144 | fabulous 145 | fail 146 | fair 147 | faithful 148 | fantastic 149 | far-sighted 150 | fascinate 151 | fascinating 152 | fine 153 | flail 154 | flatter 155 | flattering 156 | flourish 157 | flourishing 158 | fortunate 159 | free 160 | frickin ruled 161 | frickin rules 162 | friendly 163 | ftw 164 | fulfill 165 | fulfilled 166 | fun 167 | funny 168 | generous 169 | gentle 170 | genuine 171 | gifted 172 | glad 173 | glorious 174 | glow 175 | glowing 176 | good 177 | good looking 178 | good luck 179 | good thing 180 | gorgeous 181 | grace 182 | graceful 183 | gracious 184 | grand 185 | great 186 | gtd 187 | handsome 188 | happy 189 | hardy 190 | harmonious 191 | hawt 192 | hawtness 193 | heal 194 | healed 195 | healthy 196 | help 197 | helpful 198 | high 199 | highly positive 200 | hilarious 201 | honest 202 | humorous 203 | i like 204 | i want 205 | ideal 206 | imaginative 207 | imagine 208 | impress 209 | impressive 210 | incredible 211 | industrious 212 | ingenious 213 | innovate 214 | innovative 215 | inspire 216 | inspired 217 | integrate 218 | intellectual vigor 219 | intelligent 220 | interest 221 | interested 222 | interesting 223 | intuitive 224 | inventive 225 | invincible 226 | invite 227 | inviting 228 | irresistible 229 | is the best 230 | jolly 231 | joyful 232 | joyous 233 | judicious 234 | keen 235 | kind 236 | kinda impressed 237 | knowing 238 | leaning towards 239 | life saver 240 | liked it 241 | limitless 242 | lively 243 | lol 244 | looking forward to 245 | love 246 | love it 247 | lovely 248 | loves it 249 | loving 250 | lucky 251 | luminous 252 | magic 253 | magical 254 | magnificent 255 | make a difference 256 | marvellous 257 | master 258 | masterful 259 | mighty 260 | miracle 261 | miraculous 262 | motivate 263 | motivated 264 | moves me 265 | my hero 266 | natural 267 | neat 268 | nice 269 | nicely 270 | noble 271 | not suck 272 | nurturing 273 | obedient 274 | ok 275 | optimistic 276 | outstanding 277 | passion 278 | passionate 279 | peace 280 | peaceful 281 | perfect 282 | persevere 283 | persevering 284 | persist 285 | persistent 286 | piece of cake 287 | play 288 | playful 289 | pleasant 290 | pleasantly 291 | pleasantly surprised 292 | please 293 | pleasing 294 | plentiful 295 | plenty 296 | plus 297 | positive 298 | power 299 | powerful 300 | precious 301 | prepare 302 | prepared 303 | pretty good 304 | productive 305 | profound 306 | prompt 307 | prop 308 | props 309 | prosperous 310 | proud 311 | pumped 312 | qualified 313 | quick 314 | quite amazing 315 | radiant 316 | rally 317 | really like 318 | really love 319 | really neat 320 | reasonable 321 | recommend 322 | refine 323 | refined 324 | refresh 325 | refreshing 326 | relax 327 | relaxing 328 | reliable 329 | relieve 330 | relieved 331 | remarkable 332 | resolute 333 | resourceful 334 | respect 335 | respected 336 | responsive 337 | reward 338 | rewarding 339 | robust 340 | rocks 341 | safe 342 | satisfied 343 | satisfy 344 | secure 345 | seduce 346 | seductive 347 | self-reliant 348 | sensation 349 | sensational 350 | sensible 351 | sensitive 352 | serene 353 | share 354 | sharing 355 | silly 356 | skilful 357 | skill 358 | smart 359 | smashing 360 | smile 361 | smiling 362 | smooth 363 | so cool 364 | so great 365 | so proud 366 | sparkle 367 | sparkling 368 | spiritual 369 | splendid 370 | strong 371 | stunning 372 | success 373 | successful 374 | super 375 | superb 376 | superior 377 | surprise 378 | surprised 379 | swift 380 | talent 381 | talented 382 | tenacious 383 | terrific 384 | thank you 385 | thankful 386 | thanks 387 | thanks! 388 | the best 389 | thks 390 | thoughtful 391 | thrill 392 | thrilling 393 | thrive 394 | thriving 395 | thx 396 | timely 397 | trust 398 | trusting 399 | truth 400 | truthful 401 | ultimate 402 | unique 403 | valiant 404 | valuable 405 | versatile 406 | very cool 407 | very exceptional 408 | very quickly 409 | vibrant 410 | victorious 411 | victory 412 | vigorous 413 | vivacious 414 | vivid 415 | voting for 416 | w00t 417 | warm 418 | way to go 419 | wealthy 420 | well 421 | whole 422 | win 423 | wise 424 | witty 425 | won 426 | wonderful 427 | woo 428 | woot 429 | worth 430 | worthy 431 | would love 432 | would recommend 433 | wowsers 434 | yay 435 | young 436 | youth 437 | youthful 438 | yum 439 | yummy 440 | zany 441 | zeal 442 | zealous 443 | zest 444 | -------------------------------------------------------------------------------- /sentiment/models/models_save/classifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier -------------------------------------------------------------------------------- /sentiment/models/models_save/classifier_01.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_01.npy -------------------------------------------------------------------------------- /sentiment/models/models_save/classifier_02.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_02.npy -------------------------------------------------------------------------------- /sentiment/models/models_save/classifier_03.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_03.npy -------------------------------------------------------------------------------- /sentiment/models/models_save/classifier_04.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_04.npy -------------------------------------------------------------------------------- /sentiment/models/models_save/lexicon: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/lexicon -------------------------------------------------------------------------------- /sentiment/models/models_save/ngrams: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/ngrams -------------------------------------------------------------------------------- /sentiment/models/test_save_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/5/23. 4 | from sklearn import metrics 5 | from sklearn.externals import joblib 6 | from sklearn.linear_model import LogisticRegression 7 | 8 | from sentiment.models.tools.Lexicon import Lexicon, Ngrams 9 | from sentiment.models.tools.pre_process2 import pre_process 10 | from sentiment.models.tools.read_data import * 11 | 12 | 13 | def get_features(data, postags): 14 | """ 15 | 16 | :param data: [str,str..] 17 | :param id2word: dict word_id:word 18 | :param vocabulary: dict word:word_id 19 | :return: 20 | """ 21 | print 'create features...' 22 | data_feature = pre_process(data, postags) # 这里data 每一行已经分词了 23 | print data_feature.shape 24 | return data_feature 25 | 26 | 27 | def main(): 28 | train_data, train_target, train_pos = read_train_data('2013') 29 | train_feature = get_features(train_data, train_pos) 30 | 31 | clf = LogisticRegression(C=0.01105) 32 | clf.fit(train_feature, train_target) 33 | 34 | ngram = Ngrams() 35 | lexicon = Lexicon() 36 | joblib.dump(clf, 'models_save/classifier') 37 | joblib.dump(ngram, 'models_save/ngrams') 38 | joblib.dump(lexicon, 'models_save/lexicon') 39 | 40 | for name, test_data, test_target, test_pos in read_all_test_data(): 41 | print '\n\n\n\n\n--------Now is {} --------\n\n'.format(name) 42 | test_feature = get_features(test_data, test_pos) 43 | predicted = clf.predict(test_feature) 44 | print "Classification report for %s:\n%s\n" % (clf, 45 | metrics.classification_report(test_target, predicted, digits=3)) 46 | print("Confusion matrix:\n%s" % metrics.confusion_matrix(test_target, predicted)) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /sentiment/models/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/5/3. 4 | -------------------------------------------------------------------------------- /sentiment/models/tools/ark-tweet-nlp-0.3.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/tools/ark-tweet-nlp-0.3.2.jar -------------------------------------------------------------------------------- /sentiment/models/tools/read_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/5/3. 4 | import codecs 5 | import subprocess 6 | import sys 7 | 8 | from sentiment.models.tools.Lexicon import Ngrams 9 | 10 | 11 | def create_pos_file(path): 12 | reload(sys) 13 | sys.setdefaultencoding('utf8') 14 | print 'read data from', path 15 | temp_file_path = '../t.txt' 16 | with codecs.open(path, "r", "utf-8") as f: 17 | with codecs.open(temp_file_path, 'w+', "utf-8") as fw: 18 | data = f.readlines() 19 | for line in data: 20 | line = line.strip().split('\t') 21 | fw.write(line[-1] + '\n') 22 | 23 | cmd = ['java', '-jar', './ark-tweet-nlp-0.3.2.jar', '--no-confidence', temp_file_path] 24 | stdin = subprocess.PIPE 25 | stdout = subprocess.PIPE 26 | stderr = subprocess.PIPE 27 | p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr) 28 | (stdout, stderr) = p.communicate() 29 | 30 | result = stdout.split('\r\n') 31 | with codecs.open(path + "_pos", "w+", "utf-8") as f: 32 | for i, line in enumerate(data): 33 | line = line.strip().split('\t') 34 | t = result[i].split('\t') 35 | tweet = t[0] 36 | pos = t[1] 37 | tags = line[2] 38 | f.write(tags + '\t' + tweet + '\t' + pos + '\n') 39 | 40 | 41 | def read_data(path): 42 | """ 43 | return tweets_list and tags_list for given path 44 | :param path: the file path eg: c:\\a.txt 45 | :return: tweets_list,tags_list 46 | """ 47 | print 'read data from', path 48 | 49 | tweets, tags, pos = [], [], [] 50 | with codecs.open(path + '_pos', "r", "utf-8") as f: 51 | for line in f.readlines(): 52 | line = line.strip().split("\t") 53 | tags.append(line[0]) 54 | tweets.append(line[1]) 55 | pos.append(line[2]) 56 | return tweets, tags, pos 57 | 58 | 59 | def read_train_data_by_year(year): 60 | train_data, train_target, pos = read_data("./data/train/" + str(year) + "-train-data.tsv") 61 | # train_data2, train_target2, pos2 = read_data("./data/train/" + str(year) + "-dev-data.tsv") 62 | # train_data = train_data + train_data2 63 | # train_target = train_target + train_target2 64 | # pos = pos + pos2 65 | return train_data, train_target, pos 66 | 67 | 68 | def read_train_data(year=None): 69 | train_data, train_target, train_pos = [], [], [] 70 | if year == '2013' or year is None: 71 | data, target, pos = read_train_data_by_year('2013') 72 | train_data += data 73 | train_target += target 74 | train_pos += pos 75 | 76 | len_2013 = len(train_pos) 77 | 78 | if year == '2016' or year is None: 79 | data, target, pos = read_train_data_by_year('2016') 80 | train_data += data 81 | train_target += target 82 | train_pos += pos 83 | 84 | Ngrams().create_ngram_vector(train_data, train_target) 85 | print int(len_2013 / 2.5), len(train_data[len_2013:]) 86 | if year is None: 87 | len_2013_remain = int(len_2013 / 2.5) 88 | train_data = train_data[:len_2013_remain] + train_data[len_2013:] 89 | train_target = train_target[:len_2013_remain] + train_target[len_2013:] 90 | train_pos = train_pos[:len_2013_remain] + train_pos[len_2013:] 91 | 92 | return train_data, train_target, train_pos 93 | 94 | 95 | def read_2013_test_data(): 96 | return read_data("./data/test/2013-test-tweet.tsv") 97 | 98 | 99 | def read_2014_test_data(): 100 | return read_data("./data/test/2014-test-tweet.tsv") 101 | 102 | 103 | def read_2016_test_data(): 104 | return read_data("./data/test/2016-test-tweet.tsv") 105 | 106 | 107 | def read_2014_sarcasm_test_data(): 108 | return read_data("./data/test/2014-test-sarcasm.tsv") 109 | 110 | 111 | def read_all_test_data(): 112 | test_data_name = [ 113 | '2013-test-tweet.tsv', 114 | '2013-test-sms.tsv', 115 | '2014-test-tweet.tsv', 116 | '2014-test-sarcasm.tsv', 117 | '2014-test-journal.tsv', 118 | # '2016-test-tweet.tsv' 119 | ] 120 | base_path = './data/test/' 121 | for name in test_data_name: 122 | data, target, pos = read_data(base_path + name) 123 | yield name, data, target, pos 124 | 125 | 126 | def read_sentiment140_test_data(): 127 | # test_data, test_target = [], [] 128 | # with open(r'e:\textCorpus\testdata.csv') as f: 129 | # for i, line in enumerate(f): 130 | # line = line.split('","') 131 | # score, text = line[0][1:], line[-1] 132 | # 133 | # try: 134 | # text = text[:text.rfind('"')] 135 | # test_data.append(text) 136 | # if score == '4': 137 | # test_target.append('positive') 138 | # elif score == '0': 139 | # test_target.append('negative') 140 | # else: 141 | # test_target.append('neutral') 142 | # except Exception, e: # print i, line, e 143 | # pass 144 | return read_data(r'./data/test/sentiment140.testdata.tsv') 145 | 146 | 147 | if __name__ == '__main__': 148 | test_data_name = ['Twitter-2013_gold.csv'] 149 | # test_data_name = [ 150 | # '2013-test-tweet.tsv', '2013-test-sms.tsv', 151 | # '2014-test-tweet.tsv', '2014-test-sarcasm.tsv', '2014-test-journal.tsv', 152 | # '2016-test-tweet.tsv' 153 | # ] 154 | base_path = '../data/' 155 | for test in test_data_name: 156 | create_pos_file(base_path + test) 157 | 158 | # years = [2013, 2016] 159 | # base_path = '../data/train/' 160 | # for year in years: 161 | # create_pos_file(base_path + str(year) + "-train-data.tsv") 162 | # create_pos_file(base_path + str(year) + "-dev-data.tsv") 163 | 164 | # data, target = read_sentiment140_test_data() 165 | # reload(sys) 166 | # sys.setdefaultencoding('utf8') 167 | # temp_file_path = '../t.txt' 168 | # 169 | # with codecs.open(temp_file_path, 'w+', "utf-8") as fw: 170 | # for line in data: 171 | # fw.write(line + '\n') 172 | # 173 | # cmd = ['java', '-jar', './ark-tweet-nlp-0.3.2.jar', '--no-confidence', temp_file_path] 174 | # stdin = subprocess.PIPE 175 | # stdout = subprocess.PIPE 176 | # stderr = subprocess.PIPE 177 | # p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr) 178 | # (stdout, stderr) = p.communicate() 179 | # 180 | # result = stdout.split('\r\n') 181 | # with codecs.open(r"e:\textCorpus\testdata.csv_pos", "w+", "utf-8") as f: 182 | # for i, line in enumerate(data): 183 | # line = line.strip().split('\t') 184 | # t = result[i].split('\t') 185 | # tweet = t[0] 186 | # pos = t[1] 187 | # tags = target[i] 188 | # f.write(tags + '\t' + tweet + '\t' + pos + '\n') 189 | -------------------------------------------------------------------------------- /sentiment/urls.py: -------------------------------------------------------------------------------- 1 | """twitterDataMining URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.9/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url 17 | import sentiment.views 18 | 19 | urlpatterns = [ 20 | url(r'^$', sentiment.views.index), 21 | url(r'^sentiment_query$', sentiment.views.query) 22 | ] 23 | -------------------------------------------------------------------------------- /sentiment/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | # Create your views here. 4 | from sentiment.models.SentimentManager import query_sentiment_for_online_data 5 | import json 6 | from django.http import HttpResponse 7 | from django.shortcuts import render 8 | 9 | 10 | def index(request): 11 | return render(request, 'sentiment/index.html') 12 | 13 | 14 | def query(request): 15 | query_str = request.GET.get('query_str') 16 | # TODO if none raise error 17 | res = query_sentiment_for_online_data(query_str) 18 | return HttpResponse(json.dumps(res), content_type="application/json") 19 | -------------------------------------------------------------------------------- /static/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/14. 4 | -------------------------------------------------------------------------------- /static/css/bootstrap-switch.min.css: -------------------------------------------------------------------------------- 1 | /* ======================================================================== 2 | * bootstrap-switch - v3.3.2 3 | * http://www.bootstrap-switch.org 4 | * ======================================================================== 5 | * Copyright 2012-2013 Mattia Larentis 6 | * 7 | * ======================================================================== 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | * ======================================================================== 20 | */ 21 | 22 | .bootstrap-switch{display:inline-block;direction:ltr;cursor:pointer;border-radius:4px;border:1px solid;border-color:#ccc;position:relative;text-align:left;overflow:hidden;line-height:8px;z-index:0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;vertical-align:middle;-webkit-transition:border-color ease-in-out .15s,box-shadow ease-in-out .15s;transition:border-color ease-in-out .15s,box-shadow ease-in-out .15s}.bootstrap-switch .bootstrap-switch-container{display:inline-block;top:0;border-radius:4px;-webkit-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0)}.bootstrap-switch .bootstrap-switch-handle-on,.bootstrap-switch .bootstrap-switch-handle-off,.bootstrap-switch .bootstrap-switch-label{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;cursor:pointer;display:inline-block !important;height:100%;padding:6px 12px;font-size:14px;line-height:20px}.bootstrap-switch .bootstrap-switch-handle-on,.bootstrap-switch .bootstrap-switch-handle-off{text-align:center;z-index:1}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-primary,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-primary{color:#fff;background:#428bca}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-info,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-info{color:#fff;background:#5bc0de}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-success,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-success{color:#fff;background:#5cb85c}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-warning,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-warning{background:#f0ad4e;color:#fff}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-danger,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-danger{color:#fff;background:#d9534f}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-default,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-default{color:#000;background:#eee}.bootstrap-switch .bootstrap-switch-label{text-align:center;margin-top:-1px;margin-bottom:-1px;z-index:100;color:#333;background:#fff}.bootstrap-switch .bootstrap-switch-handle-on{border-bottom-left-radius:3px;border-top-left-radius:3px}.bootstrap-switch .bootstrap-switch-handle-off{border-bottom-right-radius:3px;border-top-right-radius:3px}.bootstrap-switch input[type='radio'],.bootstrap-switch input[type='checkbox']{position:absolute !important;top:0;left:0;opacity:0;filter:alpha(opacity=0);z-index:-1}.bootstrap-switch input[type='radio'].form-control,.bootstrap-switch input[type='checkbox'].form-control{height:auto}.bootstrap-switch.bootstrap-switch-mini .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-mini .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-mini .bootstrap-switch-label{padding:1px 5px;font-size:12px;line-height:1.5}.bootstrap-switch.bootstrap-switch-small .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-small .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-small .bootstrap-switch-label{padding:5px 10px;font-size:12px;line-height:1.5}.bootstrap-switch.bootstrap-switch-large .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-large .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-large .bootstrap-switch-label{padding:6px 16px;font-size:18px;line-height:1.33}.bootstrap-switch.bootstrap-switch-disabled,.bootstrap-switch.bootstrap-switch-readonly,.bootstrap-switch.bootstrap-switch-indeterminate{cursor:default !important}.bootstrap-switch.bootstrap-switch-disabled .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-readonly .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-indeterminate .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-disabled .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-readonly .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-indeterminate .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-disabled .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-readonly .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-indeterminate .bootstrap-switch-label{opacity:.5;filter:alpha(opacity=50);cursor:default !important}.bootstrap-switch.bootstrap-switch-animate .bootstrap-switch-container{-webkit-transition:margin-left .5s;transition:margin-left .5s}.bootstrap-switch.bootstrap-switch-inverse .bootstrap-switch-handle-on{border-bottom-left-radius:0;border-top-left-radius:0;border-bottom-right-radius:3px;border-top-right-radius:3px}.bootstrap-switch.bootstrap-switch-inverse .bootstrap-switch-handle-off{border-bottom-right-radius:0;border-top-right-radius:0;border-bottom-left-radius:3px;border-top-left-radius:3px}.bootstrap-switch.bootstrap-switch-focused{border-color:#66afe9;outline:0;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6)}.bootstrap-switch.bootstrap-switch-on .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-inverse.bootstrap-switch-off .bootstrap-switch-label{border-bottom-right-radius:3px;border-top-right-radius:3px}.bootstrap-switch.bootstrap-switch-off .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-inverse.bootstrap-switch-on .bootstrap-switch-label{border-bottom-left-radius:3px;border-top-left-radius:3px} -------------------------------------------------------------------------------- /static/css/bootstrap-theme.min.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"sources":["less/theme.less","less/mixins/vendor-prefixes.less","less/mixins/gradients.less","less/mixins/reset-filter.less"],"names":[],"mappings":";;;;AAmBA,YAAA,aAAA,UAAA,aAAA,aAAA,aAME,YAAA,EAAA,KAAA,EAAA,eC2CA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBDvCR,mBAAA,mBAAA,oBAAA,oBAAA,iBAAA,iBAAA,oBAAA,oBAAA,oBAAA,oBAAA,oBAAA,oBCsCA,mBAAA,MAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,iBDlCR,qBAAA,sBAAA,sBAAA,uBAAA,mBAAA,oBAAA,sBAAA,uBAAA,sBAAA,uBAAA,sBAAA,uBAAA,+BAAA,gCAAA,6BAAA,gCAAA,gCAAA,gCCiCA,mBAAA,KACQ,WAAA,KDlDV,mBAAA,oBAAA,iBAAA,oBAAA,oBAAA,oBAuBI,YAAA,KAyCF,YAAA,YAEE,iBAAA,KAKJ,aErEI,YAAA,EAAA,IAAA,EAAA,KACA,iBAAA,iDACA,iBAAA,4CAAA,iBAAA,qEAEA,iBAAA,+CCnBF,OAAA,+GH4CA,OAAA,0DACA,kBAAA,SAuC2C,aAAA,QAA2B,aAAA,KArCtE,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAgBN,aEtEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAiBN,aEvEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAkBN,UExEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,gBAAA,gBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,iBAAA,iBAEE,iBAAA,QACA,aAAA,QAMA,mBAAA,0BAAA,yBAAA,0BAAA,yBAAA,yBAAA,oBAAA,2BAAA,0BAAA,2BAAA,0BAAA,0BAAA,6BAAA,oCAAA,mCAAA,oCAAA,mCAAA,mCAME,iBAAA,QACA,iBAAA,KAmBN,aEzEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAoBN,YE1EI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,kBAAA,kBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,mBAAA,mBAEE,iBAAA,QACA,aAAA,QAMA,qBAAA,4BAAA,2BAAA,4BAAA,2BAAA,2BAAA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,+BAAA,sCAAA,qCAAA,sCAAA,qCAAA,qCAME,iBAAA,QACA,iBAAA,KA2BN,eAAA,WClCE,mBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,EAAA,IAAA,IAAA,iBD2CV,0BAAA,0BE3FI,iBAAA,QACA,iBAAA,oDACA,iBAAA,+CAAA,iBAAA,wEACA,iBAAA,kDACA,OAAA,+GF0FF,kBAAA,SAEF,yBAAA,+BAAA,+BEhGI,iBAAA,QACA,iBAAA,oDACA,iBAAA,+CAAA,iBAAA,wEACA,iBAAA,kDACA,OAAA,+GFgGF,kBAAA,SASF,gBE7GI,iBAAA,iDACA,iBAAA,4CACA,iBAAA,qEAAA,iBAAA,+CACA,OAAA,+GACA,OAAA,0DCnBF,kBAAA,SH+HA,cAAA,ICjEA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBD6DV,sCAAA,oCE7GI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SD2CF,mBAAA,MAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,iBD0EV,cAAA,iBAEE,YAAA,EAAA,IAAA,EAAA,sBAIF,gBEhII,iBAAA,iDACA,iBAAA,4CACA,iBAAA,qEAAA,iBAAA,+CACA,OAAA,+GACA,OAAA,0DCnBF,kBAAA,SHkJA,cAAA,IAHF,sCAAA,oCEhII,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SD2CF,mBAAA,MAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,gBDgFV,8BAAA,iCAYI,YAAA,EAAA,KAAA,EAAA,gBAKJ,qBAAA,kBAAA,mBAGE,cAAA,EAqBF,yBAfI,mDAAA,yDAAA,yDAGE,MAAA,KE7JF,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,UFqKJ,OACE,YAAA,EAAA,IAAA,EAAA,qBC3HA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,gBDsIV,eEtLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAKF,YEvLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAMF,eExLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAOF,cEzLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAeF,UEjMI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFuMJ,cE3MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFwMJ,sBE5MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFyMJ,mBE7MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF0MJ,sBE9MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF2MJ,qBE/MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF+MJ,sBElLI,iBAAA,yKACA,iBAAA,oKACA,iBAAA,iKFyLJ,YACE,cAAA,IC9KA,mBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,EAAA,IAAA,IAAA,iBDgLV,wBAAA,8BAAA,8BAGE,YAAA,EAAA,KAAA,EAAA,QEnOE,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFiOF,aAAA,QALF,+BAAA,qCAAA,qCAQI,YAAA,KAUJ,OCnME,mBAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,EAAA,IAAA,IAAA,gBD4MV,8BE5PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFyPJ,8BE7PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF0PJ,8BE9PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF2PJ,2BE/PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF4PJ,8BEhQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF6PJ,6BEjQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFoQJ,MExQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFsQF,aAAA,QC3NA,mBAAA,MAAA,EAAA,IAAA,IAAA,gBAAA,EAAA,IAAA,EAAA,qBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,gBAAA,EAAA,IAAA,EAAA"} -------------------------------------------------------------------------------- /static/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /static/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /static/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /static/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /static/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /static/image/others.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/others.jpg -------------------------------------------------------------------------------- /static/image/sentiment_1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/sentiment_1.gif -------------------------------------------------------------------------------- /static/image/sentiment_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/sentiment_2.jpg -------------------------------------------------------------------------------- /static/image/sentiment_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/sentiment_3.jpg -------------------------------------------------------------------------------- /static/image/topic_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_1.jpg -------------------------------------------------------------------------------- /static/image/topic_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_3.jpg -------------------------------------------------------------------------------- /static/image/topic_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_4.jpg -------------------------------------------------------------------------------- /static/image/topic_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_8.jpg -------------------------------------------------------------------------------- /static/js/append_new_iframe.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by hrwhisper on 2016/2/9. 3 | */ 4 | 5 | function network_retweet_iframe() { 6 | $('body').append(''); 8 | } 9 | 10 | function statistic_hashtag_timeline() { 11 | $('body').append(''); 13 | } 14 | 15 | function statistic_hashtag_compare() { 16 | $('body').append(''); 18 | } 19 | 20 | function statistic_hashtag_pie(){ 21 | $('body').append(''); 23 | 24 | } -------------------------------------------------------------------------------- /static/js/googleMap.js: -------------------------------------------------------------------------------- 1 | var googleMap = { 2 | map: null, 3 | markers: {}, 4 | currentId: 0, 5 | 6 | uniqueId: function () { 7 | return ++this.currentId; 8 | }, 9 | 10 | infowindow: new google.maps.InfoWindow({ 11 | size: new google.maps.Size(150, 50) 12 | }), 13 | 14 | 15 | initialize: function () { 16 | if (this.map) return null; 17 | 18 | var myOptions = { 19 | zoom: 2, 20 | center: new google.maps.LatLng(43.907787, 0), 21 | mapTypeControl: true, 22 | mapTypeControlOptions: {style: google.maps.MapTypeControlStyle.DROPDOWN_MENU}, 23 | navigationControl: true, 24 | mapTypeId: google.maps.MapTypeId.ROADMAP 25 | }; 26 | this.map = new google.maps.Map(document.getElementById("map_canvas"), 27 | myOptions); 28 | 29 | google.maps.event.addListener(this.map, 'click', function () { 30 | googleMap.infowindow.close(); 31 | }); 32 | 33 | google.maps.event.addListener(this.map, 'click', function (event) { 34 | var Latitude = event.latLng.lng().toFixed(2); 35 | var longitude = event.latLng.lat().toFixed(2); 36 | googleMap.addMarker(event.latLng, "name", "Location
" +Latitude +","+ longitude, 37 | Latitude +","+ longitude); 38 | }); 39 | 40 | //google.maps.event.addListener(this.map, 'click', function (event) { 41 | // console.log("Latitude: " + event.latLng.lat() + " " + ", longitude: " + event.latLng.lng()); 42 | //}); 43 | }, 44 | 45 | 46 | addMarker: function (Gpoint, name, contentString, geo) { 47 | var id = this.uniqueId(); // get new id 48 | marker = new google.maps.Marker({ 49 | id: id, 50 | position: Gpoint, 51 | geo: geo, 52 | map: googleMap.map, 53 | draggable: true, 54 | animation: google.maps.Animation.DROP 55 | }); 56 | 57 | google.maps.event.addListener(marker, 'click', function () { 58 | googleMap.infowindow.setPosition(this.position); 59 | googleMap.infowindow.setContent(contentString); 60 | googleMap.infowindow.open(googleMap.map, marker); 61 | }); 62 | google.maps.event.trigger(marker, 'click'); 63 | 64 | googleMap.map.panTo(Gpoint); 65 | 66 | this.markers[id] = marker; 67 | 68 | google.maps.event.addListener(marker, "rightclick", function (point) { 69 | googleMap.delMarker(this.id) 70 | }); 71 | //var res = ''; 72 | //for (i in googleMap.markers){ 73 | // res += googleMap.markers[i].geo + ','; 74 | //} 75 | //res = res.substring(0,res.length-1) 76 | //console.log(res); 77 | }, 78 | 79 | delMarker: function (id) { 80 | this.markers[id].setMap(null); 81 | delete this.markers[id]; 82 | } 83 | }; -------------------------------------------------------------------------------- /static/js/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by hrwhisper on 2016/4/13. 3 | */ 4 | 5 | (function () { 6 | 7 | $(function () { 8 | $('#myCarousel').on('slide.bs.carousel', function (e) { 9 | // var slideFrom = $(this).find('.active').index(); 10 | var slideTo = $(e.relatedTarget).index(); 11 | // console.log(slideFrom + ' => ' + slideTo); 12 | var change_obj = $("#header_info_button"); 13 | if (slideTo == 1) { 14 | change_obj.attr("href", "/topic"); 15 | change_obj.text("Start topic Now"); 16 | } else { 17 | change_obj.attr("href", "/sentiment"); 18 | change_obj.text("Start sentiment Now"); 19 | } 20 | }); 21 | }); 22 | "use strict"; 23 | 24 | var π = Math.PI; 25 | var τ = 2 * Math.PI; 26 | 27 | var types = { 28 | square: function (n) { 29 | return (((n + 1) % 2) ? 0 : 1) / n; 30 | }, 31 | triangle: function (n) { 32 | if (!(n % 2)) return 0; 33 | return ((n % 4 === 1) ? 1 : -1) / (n * n); 34 | }, 35 | sawtooth: function (n) { 36 | return ((n % 2) ? -1 : 1) / (n + 1); 37 | }, 38 | pulse: function (n) { 39 | return 0.1; 40 | } 41 | }; 42 | 43 | function FT(A, N, φ) { 44 | φ = φ || 0; 45 | return function (x) { 46 | var n = -1, y = 0; 47 | while (++n < N) { 48 | y += A[n] * Math.sin(τ * (n + 1) * x + φ); 49 | } 50 | return y; 51 | } 52 | } 53 | 54 | var 55 | margin = {top: 0, right: 0, bottom: 0, left: 0}, 56 | W = 450, 57 | H = 450, 58 | h = H - margin.top - margin.bottom, 59 | 60 | radius = 140, 61 | theta = 0, 62 | xmax = 1.5, 63 | rate = 1 / 60, 64 | 65 | tDomain = d3.range(0, 1.1, 1 / 1000), // trace domain 66 | gDomain = d3.range(0, xmax, xmax / 1000), // graph domain 67 | 68 | C = types.square, // coeffiecients 69 | L = 6, // size 70 | F = 0.3, // frequence 71 | 72 | yCirc = d3.scale.linear().domain([-1, 1]).range([h / 2 + radius, h / 2 - radius]), 73 | xCirc = d3.scale.linear().domain([-1, 1]).range([0, 2 * radius]), 74 | rAxis = d3.scale.linear().domain([0, 1]).range([0, radius]), 75 | xAxis = d3.scale.linear().range([radius, W - margin.left]), 76 | 77 | Fxy, fx, fy, 78 | 79 | timer, data = []; 80 | 81 | var graph = d3.svg.line() 82 | .x(function (d) { 83 | return xAxis(d); 84 | }) 85 | .y(function (d) { 86 | return yCirc(fy(theta - d)); 87 | }); 88 | 89 | var proj = d3.svg.line() 90 | .x(function (d) { 91 | return xCirc(d.x); 92 | }) 93 | .y(function (d) { 94 | return yCirc(d.y); 95 | }); 96 | 97 | var trace = d3.svg.line() 98 | .x(function (d) { 99 | return xCirc(fx(d)); 100 | }) 101 | .y(function (d) { 102 | return yCirc(fy(d)); 103 | }); 104 | 105 | function gTransform(d) { 106 | return "translate(" + xCirc(d.x) + "," + yCirc(d.y) + ")"; 107 | } 108 | 109 | var svg = d3.select(".visualization") 110 | .append("svg") 111 | .attr("width", W) 112 | .attr("height", H); 113 | 114 | svg.append("line") 115 | .attr("class", "axis") 116 | .attr("y1", margin.top + yCirc(0)).attr("x1", 0) 117 | .attr("y2", margin.top + yCirc(0)).attr("x2", W); 118 | 119 | svg.append("line") 120 | .attr("class", "axis") 121 | .attr("x1", margin.left + xCirc(0)).attr("y1", 0) 122 | .attr("x2", margin.left + xCirc(0)).attr("y2", H); 123 | 124 | var vis = svg.append("g") 125 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 126 | 127 | var gPath = vis.append("path").attr("class", "graph"); 128 | var tPath = vis.append("path").attr("class", "trace"); 129 | var pPath = vis.append("path").attr("class", "proj"); 130 | 131 | function cache() { 132 | var A; 133 | if (typeof C === "function") { 134 | A = d3.range(1, L + 1).map(C); 135 | } else { 136 | A = C.slice(0, L); 137 | } 138 | 139 | fx = FT(A, L - 1, π / 2); 140 | fy = FT(A, L - 1, 0); 141 | 142 | Fxy = A.map(function (a, i) { 143 | return {X: FT(A, i, π / 2), Y: FT(A, i, 0), r: Math.abs(a)}; 144 | }); 145 | } 146 | 147 | function calc() { 148 | if (!Fxy) cache(); 149 | Fxy.forEach(function (f, i) { 150 | var d = data[i] || (data[i] = {x: 0, y: 0, r: 0}); 151 | d.x = f.X(theta); 152 | d.y = f.Y(theta); 153 | d.r = f.r; 154 | d.f = i + 1; 155 | }); 156 | data.length = Fxy.length; 157 | return data; 158 | } 159 | 160 | function coeff() { 161 | var co = vis.selectAll(".coeff").data(calc()); 162 | 163 | // exit 164 | co.exit().remove(); 165 | 166 | // enter 167 | var en = co.enter().append("g").attr("class", "coeff"); 168 | 169 | en.append("circle").attr("class", "circle"); 170 | en.append("circle").attr("class", "dot").attr("r", 3); 171 | 172 | // update 173 | co.classed("last", function (d, i) { 174 | return i === L - 1; 175 | }); 176 | co.classed("first", function (d, i) { 177 | return i === 0; 178 | }); 179 | 180 | co.select(".circle").attr("r", function (d) { 181 | return rAxis(d.r); 182 | }); 183 | 184 | return co; 185 | } 186 | 187 | function drawGraph() { 188 | xAxis.domain([0, xmax]); 189 | coeff().attr("transform", gTransform); 190 | var last = data[data.length - 1]; 191 | pPath.attr("d", proj([last, {x: 0, y: last.y}])); 192 | gPath.attr("d", graph(gDomain)); 193 | tPath.attr("d", trace(tDomain)); 194 | } 195 | 196 | function play() { 197 | if (timer) return; 198 | (function loop() { 199 | drawGraph(); 200 | theta += F * rate; 201 | timer = setTimeout(loop, rate * 1000); 202 | })(); 203 | } 204 | 205 | C = types['sawtooth']; 206 | 207 | play(); 208 | 209 | })(); 210 | -------------------------------------------------------------------------------- /static/js/loading-control.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by hrwhisper on 2016/4/25. 3 | */ 4 | var loading_control = { 5 | opts: { 6 | // more options: http://fgnass.github.io/spin.js/ 7 | length: 28, 8 | width: 14, 9 | radius: 42, 10 | color: "#fff", 11 | scale: 0.5, 12 | opacity: 0.2, 13 | position: "fixed" 14 | }, 15 | spinner: null, 16 | div_wait: null, 17 | div_wait_bg: null, 18 | 19 | start: function () { 20 | if (!this.div_wait) { 21 | var div = document.createElement("div"); 22 | div.id = "foo"; 23 | document.body.appendChild(div); 24 | this.div_wait = div; 25 | } 26 | 27 | if (!this.div_wait_bg) { 28 | var div = document.createElement("div"); 29 | div.id = "waiting-bg"; 30 | div.style.cssText = "width:100%; height:100%; background-color:#000; filter:alpha(opacity=60);-moz-opacity:0.6; opacity:0.6; position:fixed; left:0px; top:0px; display:none; z-index:1000;"; 31 | 32 | document.body.appendChild(div); 33 | this.div_wait_bg = div; 34 | } 35 | 36 | if (!this.spinner) { 37 | this.spinner = new Spinner(this.opts); 38 | } 39 | 40 | this.div_wait_bg.style.display = "block"; 41 | this.spinner.spin(this.div_wait) 42 | }, 43 | 44 | stop: function () { 45 | if(this.spinner) 46 | this.spinner.stop(); 47 | this.div_wait_bg.style.display = "none"; 48 | } 49 | }; -------------------------------------------------------------------------------- /static/js/npm.js: -------------------------------------------------------------------------------- 1 | // This file is autogenerated via the `commonjs` Grunt task. You can require() this file in a CommonJS environment. 2 | require('../../js/transition.js') 3 | require('../../js/alert.js') 4 | require('../../js/button.js') 5 | require('../../js/carousel.js') 6 | require('../../js/collapse.js') 7 | require('../../js/dropdown.js') 8 | require('../../js/modal.js') 9 | require('../../js/tooltip.js') 10 | require('../../js/popover.js') 11 | require('../../js/scrollspy.js') 12 | require('../../js/tab.js') 13 | require('../../js/affix.js') -------------------------------------------------------------------------------- /static/js/sentiment.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by hrwhisper on 2016/4/24. 3 | */ 4 | 5 | window.onload = function () { 6 | gauge = $("#gauge"); 7 | gauge.height(gauge.width() / 1.5); 8 | 9 | myChart = echarts.init(document.getElementById('gauge')); 10 | myChart.setOption(option, true); 11 | 12 | $(window).resize(function () { 13 | gauge.height(gauge.width() / 1.5); 14 | myChart.resize(); 15 | }); 16 | }; 17 | 18 | 19 | option = { 20 | tooltip: { 21 | formatter: "{c} {b}" 22 | }, 23 | series: [ 24 | { 25 | name: 'positive', 26 | type: 'gauge', 27 | z: 3, 28 | min: 0, 29 | max: 100, 30 | splitNumber: 10, 31 | radius: '70%', 32 | center: ['50%', '50%'], // 默认全局居中 33 | axisLine: { // 坐标轴线 34 | lineStyle: { // 属性lineStyle控制线条样式 35 | width: 10 36 | } 37 | }, 38 | axisTick: { // 坐标轴小标记 39 | length: 15, // 属性length控制线长 40 | lineStyle: { // 属性lineStyle控制线条样式 41 | color: 'auto' 42 | } 43 | }, 44 | splitLine: { // 分隔线 45 | length: 10, // 属性length控制线长 46 | lineStyle: { // 属性lineStyle(详见lineStyle)控制线条样式 47 | color: 'auto' 48 | } 49 | }, 50 | title: { 51 | textStyle: { // 其余属性默认使用全局文本样式,详见TEXTSTYLE 52 | fontWeight: 'bolder', 53 | fontSize: 20, 54 | fontStyle: 'italic' 55 | } 56 | }, 57 | detail: { 58 | textStyle: { // 其余属性默认使用全局文本样式,详见TEXTSTYLE 59 | fontWeight: 'bolder' 60 | } 61 | }, 62 | data: [{value: 0, name: 'positive'}] 63 | }, 64 | { 65 | name: 'negative', 66 | type: 'gauge', 67 | center: ['18%', '55%'], // 默认全局居中 68 | radius: '40%', 69 | min: 0, 70 | max: 100, 71 | endAngle: 45, 72 | splitNumber: 10, 73 | axisLine: { // 坐标轴线 74 | lineStyle: { // 属性lineStyle控制线条样式 75 | width: 8 76 | } 77 | }, 78 | axisTick: { // 坐标轴小标记 79 | length: 12, // 属性length控制线长 80 | lineStyle: { // 属性lineStyle控制线条样式 81 | color: 'auto' 82 | } 83 | }, 84 | splitLine: { // 分隔线 85 | length: 20, // 属性length控制线长 86 | lineStyle: { // 属性lineStyle(详见lineStyle)控制线条样式 87 | color: 'auto' 88 | } 89 | }, 90 | pointer: { 91 | width: 5 92 | }, 93 | title: { 94 | offsetCenter: [0, '-30%'] // x, y,单位px 95 | }, 96 | detail: { 97 | textStyle: { // 其余属性默认使用全局文本样式,详见TEXTSTYLE 98 | fontWeight: 'bolder' 99 | } 100 | }, 101 | data: [{value: 0, name: 'negative'}] 102 | }, 103 | { 104 | name: 'neutral', 105 | type: 'gauge', 106 | center: ['78%', '50%'], // 默认全局居中 107 | radius: '25%', 108 | min: 0, 109 | max: 100, 110 | startAngle: 135, 111 | endAngle: -50, 112 | splitNumber: 2, 113 | axisLine: { // 坐标轴线 114 | lineStyle: { // 属性lineStyle控制线条样式 115 | width: 8 116 | } 117 | }, 118 | axisTick: { // 坐标轴小标记 119 | splitNumber: 5, 120 | length: 10, // 属性length控制线长 121 | lineStyle: { // 属性lineStyle控制线条样式 122 | color: 'auto' 123 | } 124 | }, 125 | splitLine: { // 分隔线 126 | length: 15, // 属性length控制线长 127 | lineStyle: { // 属性lineStyle(详见lineStyle)控制线条样式 128 | color: 'auto' 129 | } 130 | }, 131 | pointer: { 132 | width: 2 133 | }, 134 | detail: { 135 | textStyle: { // 其余属性默认使用全局文本样式,详见TEXTSTYLE 136 | fontSize: 20 137 | } 138 | }, 139 | data: [{value: 0, name: 'neutral'}] 140 | } 141 | ] 142 | }; 143 | 144 | function update_sentiment_result(res) { 145 | // update gauge charts 146 | var positive = res['positive'], 147 | negative = res['negative'], 148 | neutral = res['neutral']; 149 | 150 | option.series[0].data[0].value = (positive['percent'] * 100).toFixed(2) - 0; 151 | option.series[1].data[0].value = (negative['percent'] * 100).toFixed(2) - 0; 152 | option.series[2].data[0].value = (neutral['percent'] * 100).toFixed(2) - 0; 153 | myChart.setOption(option, true); 154 | 155 | //add text 156 | $("#positive_sample_result").empty(); 157 | $("#negative_sample_result").empty(); 158 | $("#neutral_sample_result").empty(); 159 | 160 | var positive_text = positive['text'], 161 | negative_text = negative['text'], 162 | neutral_text = neutral['text']; 163 | 164 | for (var i = 0; i < positive_text.length; i++) 165 | update_sentiment_text_sample(positive_text[i], 'positive'); 166 | 167 | for (i = 0; i < negative_text.length; i++) 168 | update_sentiment_text_sample(negative_text[i], 'negative'); 169 | 170 | for (i = 0; i < neutral_text.length; i++) 171 | update_sentiment_text_sample(neutral_text[i], 'neutral'); 172 | 173 | $("#sample_result").show(); //.css("display", "block"); 174 | } 175 | 176 | // update_sentiment_text_sample('text' , false); //just text 177 | function update_sentiment_text_sample(text, mode) { 178 | var tag_head = '
  • ', tag_end = '
  • '; 179 | if (mode == 'positive') 180 | $("#positive_sample_result").append(tag_head + text + tag_end); 181 | else if (mode == 'negative') 182 | $("#negative_sample_result").append(tag_head + text + tag_end); 183 | else 184 | $("#neutral_sample_result").append(tag_head + text + tag_end); 185 | } 186 | 187 | 188 | function get_sentiment_result() { 189 | //TODO check data is empty 190 | var data = { 191 | 'query_str': $('#name').val() 192 | }; 193 | 194 | console.log(data); 195 | loading_control.start(); 196 | 197 | $.ajax({ 198 | url: 'sentiment_query', 199 | data: data, 200 | success: function (v) { 201 | console.log(v); 202 | update_sentiment_result(v); 203 | loading_control.stop(); 204 | }, 205 | error: function (v) { 206 | console.log('------error------' + v); 207 | loading_control.stop(); 208 | }, 209 | dataType: 'json' 210 | }); 211 | } -------------------------------------------------------------------------------- /static/js/spin.min.js: -------------------------------------------------------------------------------- 1 | // http://spin.js.org/#v2.3.2 2 | !function(a,b){"object"==typeof module&&module.exports?module.exports=b():"function"==typeof define&&define.amd?define(b):a.Spinner=b()}(this,function(){"use strict";function a(a,b){var c,d=document.createElement(a||"div");for(c in b)d[c]=b[c];return d}function b(a){for(var b=1,c=arguments.length;c>b;b++)a.appendChild(arguments[b]);return a}function c(a,b,c,d){var e=["opacity",b,~~(100*a),c,d].join("-"),f=.01+c/d*100,g=Math.max(1-(1-a)/b*(100-f),a),h=j.substring(0,j.indexOf("Animation")).toLowerCase(),i=h&&"-"+h+"-"||"";return m[e]||(k.insertRule("@"+i+"keyframes "+e+"{0%{opacity:"+g+"}"+f+"%{opacity:"+a+"}"+(f+.01)+"%{opacity:1}"+(f+b)%100+"%{opacity:"+a+"}100%{opacity:"+g+"}}",k.cssRules.length),m[e]=1),e}function d(a,b){var c,d,e=a.style;if(b=b.charAt(0).toUpperCase()+b.slice(1),void 0!==e[b])return b;for(d=0;d',c)}k.addRule(".spin-vml","behavior:url(#default#VML)"),h.prototype.lines=function(a,d){function f(){return e(c("group",{coordsize:k+" "+k,coordorigin:-j+" "+-j}),{width:k,height:k})}function h(a,h,i){b(m,b(e(f(),{rotation:360/d.lines*a+"deg",left:~~h}),b(e(c("roundrect",{arcsize:d.corners}),{width:j,height:d.scale*d.width,left:d.scale*d.radius,top:-d.scale*d.width>>1,filter:i}),c("fill",{color:g(d.color,a),opacity:d.opacity}),c("stroke",{opacity:0}))))}var i,j=d.scale*(d.length+d.width),k=2*d.scale*j,l=-(d.width+d.length)*d.scale*2+"px",m=e(f(),{position:"absolute",top:l,left:l});if(d.shadow)for(i=1;i<=d.lines;i++)h(i,-2,"progid:DXImageTransform.Microsoft.Blur(pixelradius=2,makeshadow=1,shadowopacity=.3)");for(i=1;i<=d.lines;i++)h(i);return b(a,m)},h.prototype.opacity=function(a,b,c,d){var e=a.firstChild;d=d.shadow&&d.lines||0,e&&b+d>1)+"px"})}for(var i,k=0,l=(f.lines-1)*(1-f.direction)/2;k 4 | 5 | 6 | 7 | Twitter数据挖掘及其可视化 8 | 9 | {# use #} 10 | {# or use #} 11 | 12 | 13 | 14 | 15 | 16 | 17 | {% block include_script %} 18 | 19 | {% endblock %} 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | {% include "nav.html" %} 29 | 30 | 31 | {% block mainbody %} 32 |

    original

    33 | {% endblock %} 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /templates/footer.html: -------------------------------------------------------------------------------- 1 | 36 | -------------------------------------------------------------------------------- /templates/header.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block include_script %} 4 | 5 | {% endblock %} 6 | 7 | {% block mainbody %} 8 | 9 |
    10 | 42 | 43 |
    44 |

    Twitter数据挖掘及其可视化

    45 | Start sentiment Now 46 |
    47 | 48 |
    49 | 66 |
    67 |
    68 |
    69 |
    70 |

    话题检测

    71 |
    Topic detection
    72 |

    73 | 使用twitter Stream API 提供的实时数据。 74 | 可以追踪特定关键词的推文、某些用户的推文、在某些特定的地理位置(GEO)的推文。
    75 | 使用变分推断的动态词库的WOLDA算法,挖掘实时主题中的关键词,并跟踪话题趋势。 76 |

    77 |
    78 |
    79 | sentiment 80 |
    81 |
    82 |
    83 |
    84 | 85 | 86 |
    87 |
    88 |
    89 |
    90 | sentiment 91 |
    92 |
    93 |

    情感分析

    94 |
    Sentiment analysis
    95 |

    96 | 使用twitter search API提供的数据进行查询。
    97 | 使用最大熵分类器,训练数据来自 SemEval比赛
    98 | 可以帮助: 99 |

  • 用户查询人们谈论某些特定词时候,带有的感情色彩
  • 100 |
  • 商家查看消费者对于新推出的产品的评价
  • 101 |
    102 |

    103 |
    104 |
    105 |
    106 |
    107 | 108 | 109 |
    110 |
    111 |
    112 |
    113 |

    数据可视化

    114 |
    Data visualization
    115 |

    116 | 利用D3、ECharts 进行结果可视化
    117 | 出了基本的柱状图、饼状图外,还有多种可视化图表:
    如treemap、bubble、heatmap、sunburst等
    118 | 直观的反映出结果
    119 |

    120 |
    121 |
    122 |
    123 | 124 |
    125 |
    126 |
    127 |
    128 | 129 | 130 |
    131 |
    132 |
    133 |
    134 | others 135 |
    136 |
    137 |

    其他

    138 |
    Others
    139 |

    140 | 使用 Django 进行WEB端开发
    141 | 使用 Bootstrap 帮助界面设计
    142 | 使用 MongoDB 存储数据,必要时可以进行数据分片
    143 | 使用 Git 进行版本控制,并托管于 Github
    145 |

    146 |
    147 |
    148 |
    149 |
    150 | 151 | 152 | {% include "footer.html" %} 153 | 154 | {% endblock %} -------------------------------------------------------------------------------- /templates/index2.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block mainbody %} 4 |
    5 | retweet 6 | 7 |
    8 | 9 |
    10 | hashtag: 11 | date: 12 | 13 |
    14 | 15 | 16 |
    17 | hashtag1: 18 | hashtag2: 19 | date: 20 | 21 |
    22 | 23 | 24 |
    25 | date: 26 | 27 |
    28 | 29 | 30 |
    31 | date: 32 | 33 |
    34 | 35 | {% endblock %} -------------------------------------------------------------------------------- /templates/nav.html: -------------------------------------------------------------------------------- 1 | {% load mytag %} 2 | 3 | {#旧版的导航条#} 4 | {##} 17 | 18 | -------------------------------------------------------------------------------- /templates/network/retweet.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block mainbody %} 4 | 5 | 112 | {% endblock %} -------------------------------------------------------------------------------- /templates/sentiment/header.html: -------------------------------------------------------------------------------- 1 | {% extends "header.html" %} 2 | 3 | {% block header_title %} 4 | Sentiment analysis 5 | {% endblock %} 6 | 7 | {% block header_instruction %} 8 | 使用 twitter Search API 数据
    9 | 输入您感兴趣的词查询, 如 Messi 10 | {% endblock %} 11 | 12 | {% block header_image_src %} 13 | /static/image/sentiment_2.jpg 14 | {% endblock %} 15 | 16 | 17 | -------------------------------------------------------------------------------- /templates/sentiment/index.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block include_script %} 4 | 5 | 6 | 7 | 8 | {% endblock %} 9 | 10 | 11 | {% block mainbody %} 12 | {% include "sentiment/header.html" %} 13 | 14 |
    15 |
    16 |
    17 |
    18 | 24 |
    25 |
    26 | 27 |
    28 |
    29 |
    30 | 31 | 54 |
    55 |
    56 | 57 | 58 | 59 | {% include "footer.html" %} 60 | {% endblock %} -------------------------------------------------------------------------------- /templates/statistic/hashtag_compare.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block mainbody %} 4 | 5 | 6 | 7 |
    8 | 9 | 102 | {% endblock %} -------------------------------------------------------------------------------- /templates/statistic/hashtag_timeline.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block mainbody %} 4 | 5 | 6 |
    7 | 8 | 104 | {% endblock %} -------------------------------------------------------------------------------- /templates/statistic/pie.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block mainbody %} 4 | 5 |
    6 | 7 | 65 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/header.html: -------------------------------------------------------------------------------- 1 | {% extends "header.html" %} 2 | 3 | {% block header_title %} 4 | Topic detection 5 | {% endblock %} 6 | 7 | {% block header_instruction %} 8 | 使用 twitter Stream API 的实时数据
    9 | 进行实时主题挖掘,以及结果可视化 10 | {% endblock %} 11 | 12 | {% block header_image_src %} 13 | /static/image/topic_1.jpg 14 | {% endblock %} 15 | 16 | 17 | -------------------------------------------------------------------------------- /templates/topic/index.html: -------------------------------------------------------------------------------- 1 | {% extends "basic.html" %} 2 | 3 | {% block include_script %} 4 | 5 | 6 | 7 | 8 | 9 | {% endblock %} 10 | 11 | 12 | {% block mainbody %} 13 | 14 | {% include "topic/header.html" %} 15 | {% include "topic/parameters_panel.html" %} 16 | {% include "topic/toolbar.html" %} 17 | 18 |
    19 |
    20 |
    21 |
    22 |
    23 |

    Track

    24 |

    使用逗号分隔的短语
    25 | 空格代表 : ‘the twitter’ is the AND twitter
    26 | 逗号则是 : ‘the,twitter’ is the OR twitter 27 |

    28 |
    29 |
    30 |

    Follow

    31 |

    使用逗号分隔的用户id
    32 | 将会包括: 33 | 该用户 发表转发 的推文
    34 | 该用户 回复 的推文
    35 | 但不会包括 @该用户的推文 36 |

    37 |
    38 |
    39 |

    Locations

    40 |

    使用逗号分隔的 经度纬度 坐标对
    41 | 如-122.75,36.8,-73,41 表示来自San Francisco 或 New York City 的城市
    42 | 注意:先写纬度在写经度
    43 | 注意:转发的推文无 locations 信息 44 |

    45 |
    46 |
    47 |
    48 |
    49 |
    50 |
    51 |

    控制面板

    52 |

    位于最左下角,不影响可视化结果
    53 | 随页面滚动,方便控制
    54 | 点击进行速率控制或者选择可视化图表

    55 |
    56 | 57 |
    58 |

    速率控制

    59 |

    60 | 暂停时,服务器仍继续计算,但页面不更新
    61 | 停止时,服务器也停止计算
    62 | 点击开始恢复计算

    63 |
    64 | 65 |
    66 |

    多种图形

    67 |

    文字结果- 代表性推文
    68 | 话题比例
    69 | 词的比例

    70 |
    71 |
    72 |
    73 |
    74 |
    75 | 76 | 77 |
    78 |
    79 |
    80 | 81 |

    你想知道 现在 人们在讨论什么话题么?

    82 |

    你想跟踪 某些特定用户 谈论的话题么?

    83 |

    你想查看 某些地区 人们谈论的话题么?

    84 | 87 |
    88 |
    89 |
    90 | 91 | 92 | 93 |
    94 |
    95 |
    96 | 97 |
    98 |
    99 | 100 |
    101 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/toolbar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
    4 |
    5 | 7 | 8 | 15 |
    16 | 17 |
    18 | 19 | 20 | 39 |
    40 |
    -------------------------------------------------------------------------------- /templates/topic/visualization/result_basic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Title 6 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | {% block style %} 16 | {# ---style---#} 17 | {% endblock %} 18 | 19 | 20 | 21 | 22 | {% block mainbody %} 23 | {# ------#} 24 | {% endblock %} 25 | 26 | 27 | -------------------------------------------------------------------------------- /templates/topic/visualization/result_bubble.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | {% block style %} 4 | 37 | {% endblock %} 38 | 39 | {% block mainbody %} 40 | 159 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/visualization/result_funnel.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | 4 | {% block mainbody %} 5 | 6 | 7 |
    8 | 9 | 94 | 95 | 96 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/visualization/result_hashtags_histogram.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | 4 | {% block mainbody %} 5 | 6 | 7 |
    8 | 9 | 74 | 75 | 76 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/visualization/result_hashtags_pie.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | 4 | {% block mainbody %} 5 | 6 | 7 |
    8 | 9 | 74 | 75 | 76 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/visualization/result_hashtags_timeline.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | 4 | {% block mainbody %} 5 | 6 | 7 |
    8 | 9 | 93 | 94 | 95 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/visualization/result_heatmap.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | 4 | {% block mainbody %} 5 | 6 |
    7 | 8 | 97 | {% endblock %} -------------------------------------------------------------------------------- /templates/topic/visualization/result_text.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | {% block mainbody %} 4 |
    5 |
    当前话题
    6 |
    7 |

    8 | 12 |

    13 |
    14 |

    a fax d wt wra f fx afw

    15 |

    a fax d wt wra f fx afw

    16 |
    17 |
    18 |
    19 |
    20 | 21 | 22 | 43 | {% endblock %} 44 | 45 | -------------------------------------------------------------------------------- /templates/topic/visualization/result_treemap.html: -------------------------------------------------------------------------------- 1 | {% extends "topic/visualization/result_basic.html" %} 2 | 3 | {% block style %} 4 | 19 | {% endblock %} 20 | 21 | 22 | {% block mainbody %} 23 | 164 | {% endblock %} -------------------------------------------------------------------------------- /topic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/topic/__init__.py -------------------------------------------------------------------------------- /topic/models/Lda_text_format.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/2/22. 4 | # twitter text 预处理 5 | import nltk 6 | import re 7 | 8 | # with open('stopwords.txt', 'r') as f: 9 | with open('topic/models/stopwords.txt', 'r') as f: 10 | stopwords = [word.strip() for word in f.readlines()] 11 | 12 | # english_stopwords = nltk.corpus.stopwords.words('english') # + ['re', "n't"] 13 | english_stopwords = stopwords 14 | english_punctuations = [',', '.', ':', ';', '?', '(', ')', '[', ']', '&', '!', '*', '#', '$', '%', '...'] 15 | expressions = [':-)', ':)', '=)', ':D', ':-(', ':(', '=(', ';('] 16 | remove_words = set(english_stopwords + english_punctuations + expressions) 17 | wnl = nltk.WordNetLemmatizer() 18 | 19 | 20 | # 到时候再看看 twitter_text 21 | def filter_tweet(tweet): 22 | # 替换twitter特殊字符 23 | tweet = tweet.lower() 24 | # 替换tweet Url and user mentions 25 | tweet = re.sub(r"(http[s:…]*(//\S*)?)|(@\w+)", "", tweet) 26 | tweet = [wnl.lemmatize(word) for word in nltk.word_tokenize(tweet)] 27 | tweet = [word for word in tweet if word not in remove_words and len(word) >= 3] 28 | return tweet 29 | 30 | 31 | def filter_tweets(original_tweets): 32 | _filter_tweets = list(map(filter_tweet, original_tweets)) 33 | res_tweets = [] 34 | res_tweets_filter = [] 35 | for i, f_tweet in enumerate(_filter_tweets): 36 | if f_tweet: 37 | res_tweets.append(original_tweets[i]) 38 | res_tweets_filter.append(f_tweet) 39 | return res_tweets, res_tweets_filter 40 | 41 | 42 | 43 | def main(): 44 | txt = "RT @SocialWebMining rta Mining women https://hrwhisper.me 1M+ Tweets @hrwhisper About #Syria http://wp.me/p3QiJd-1I https:…" 45 | print filter_tweet(txt) 46 | txt = "RT @StewySongs: People are people, families are families & lives are lives the world over. The UK is shoulder to shoulder with Paris https:…" 47 | print filter_tweet(txt) 48 | 49 | for i, word in enumerate(english_stopwords): 50 | if word not in stopwords: 51 | print word 52 | 53 | print wnl.lemmatize('followed'), wnl.lemmatize('following') 54 | 55 | 56 | if __name__ == '__main__': 57 | main() 58 | -------------------------------------------------------------------------------- /topic/models/TopicParameterManager.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/18. 4 | 5 | 6 | class TopicParameterManager(object): 7 | def __init__(self, param): 8 | param = dict(param) 9 | for x, t in param.items(): 10 | if param[x] == '': 11 | del param[x] 12 | 13 | self.mode = int(param.get('mode', 1)) 14 | 15 | # ---------- stream --------- 16 | self.track = param.get('track', None) 17 | self.follow = param.get('follow', None) 18 | self.location = param.get('location', None) 19 | self.storeIntoDB = param.get('storeIntoDB', False) == 'true' 20 | self.storeIntoDBName = param.get('storeIntoDBName', 'stream') 21 | 22 | # ---------- LDA ------------ 23 | self.LDA_k = int(param.get('LDA_k', 15)) 24 | self.LDA_timeWindow = int(param.get('LDA_timeWindow', 30)) 25 | 26 | # ----------- Local ----------- 27 | self.startDate = param.get('startDate', None) 28 | self.endDate = param.get('endDate', None) 29 | self.localCollectionsName = param.get('localCollectionsName', 'stream') 30 | 31 | def __eq__(self, other): 32 | return self.track == other.track and self.follow == other.follow and self.location == other.location \ 33 | and self.storeIntoDB == other.storeIntoDB and self.storeIntoDBName == other.storeIntoDBName \ 34 | and self.LDA_k == other.LDA_k and self.LDA_timeWindow == other.LDA_timeWindow \ 35 | and self.startDate == other.startDate and self.endDate == other.endDate \ 36 | and self.localCollectionsName == other.localCollectionsName and self.mode == other.mode 37 | 38 | def __ne__(self, other): 39 | return not self.__eq__(other) 40 | 41 | def __str__(self): 42 | return self.track, self.follow, self.location, self.storeIntoDB, self.storeIntoDBName, \ 43 | self.LDA_k, self.LDA_timeWindow, self.startDate, self.endDate, self.localCollectionsName 44 | -------------------------------------------------------------------------------- /topic/models/TopicTrendsManager.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/8. 4 | 5 | import threading 6 | import multiprocessing 7 | import time 8 | from twitterDataMining.model_p.Singleton import Singleton 9 | from twitterDataMining.model_p.twitterApi.LocalStream import LocalStream 10 | from twitterDataMining.model_p.twitterApi.Stream import TwitterStream 11 | from topic.models.Corpus import Corpus 12 | from topic.models.OnlineLDA import OnlineLDA 13 | 14 | 15 | class TopicTrendsManager(object): 16 | __metaclass__ = Singleton 17 | 18 | def __init__(self, param): 19 | self.param = param 20 | self.topics = [] 21 | self.lock = threading.Lock() 22 | self.parent_conn, self.child_conn = multiprocessing.Pipe() 23 | 24 | # self.topic_trends = TopicTrends(param, self.child_conn) 25 | # self.topic_trends.start() 26 | 27 | self.topic_trends = None 28 | 29 | topic_trends_get = threading.Thread(target=self.receive_lda_result) 30 | topic_trends_get.start() 31 | 32 | def get_result(self, param): 33 | """ 34 | get LDA result 35 | :param param: TopicParameterManager 36 | :return: topic_list or None 37 | """ 38 | res = None 39 | 40 | if not self.topic_trends: 41 | self.topic_trends = TopicTrends(param, self.child_conn) 42 | self.topic_trends.start() 43 | self.param = param 44 | return res 45 | 46 | if self.param == param: 47 | if self.lock.acquire(): 48 | if self.topics: 49 | res = self.topics.pop(0) 50 | self.lock.release() 51 | 52 | else: # if self.param != param: 53 | self.param = param 54 | self.topic_trends.terminate() 55 | self.topic_trends = TopicTrends(self.param, self.child_conn) 56 | self.topic_trends.start() 57 | 58 | if self.lock.acquire(): 59 | self.topics = [] 60 | self.lock.release() 61 | 62 | return res 63 | 64 | def receive_lda_result(self): 65 | while True: 66 | res = self.parent_conn.recv() 67 | self.lock.acquire() 68 | self.topics.append(res) 69 | self.lock.release() 70 | 71 | def stop(self): 72 | if self.topic_trends: 73 | self.topic_trends.terminate() 74 | self.topic_trends = None 75 | self.topics = [] 76 | # TODO stop receive_lda_result Threads 77 | 78 | 79 | class TopicTrends(multiprocessing.Process): 80 | def __init__(self, param, lda_send_conn, period=60): 81 | super(TopicTrends, self).__init__() 82 | self.param = param 83 | self.period = period 84 | self.lda_send_conn = lda_send_conn 85 | self.parent_conn, self.child_conn = multiprocessing.Pipe() 86 | 87 | self.corpus = None 88 | self.olda = None 89 | 90 | def run(self): 91 | if self.param.mode != 2: # online stream data(use twitter API) 92 | twitter_stream = TwitterStream(self.child_conn) 93 | twitter_stream_thread = threading.Thread(target=twitter_stream.stream_data, 94 | args=(self.param.track, self.param.follow, self.param.location, 95 | self.param.storeIntoDB, self.param.storeIntoDBName,)) 96 | twitter_stream_thread.setDaemon(True) 97 | twitter_stream_thread.start() 98 | 99 | print ' threading.active_count()', threading.active_count() 100 | # TODO error count > 3 kill 101 | while True: 102 | time.sleep(self.period) 103 | twitter_stream.ready_receive() 104 | tweets = self.parent_conn.recv() 105 | t = threading.Thread(target=self.do_some_from_data, args=(tweets,)) 106 | t.setDaemon(True) 107 | t.start() 108 | 109 | else: # local database data 110 | condition = threading.Condition() 111 | local_stream = LocalStream() 112 | local_stream_thread = threading.Thread(target=local_stream.stream_data, 113 | args=(condition, self.param.startDate, self.param.endDate, 114 | self.param.localCollectionsName,)) 115 | local_stream_thread.setDaemon(True) 116 | local_stream_thread.start() 117 | print ' threading.active_count()', threading.active_count() 118 | 119 | if condition.acquire(): 120 | while True: 121 | print 'wait to receive' 122 | if local_stream.tweets: 123 | self.do_some_from_data(local_stream.tweets) 124 | local_stream.tweets = [] 125 | condition.notify() 126 | 127 | condition.wait() 128 | 129 | def do_some_from_data(self, tweets): 130 | print 'total_tweets', len(tweets) 131 | # DO something from tweets 132 | 133 | # doc_chunk = [tweet['text'] for tweet in tweets] 134 | print len(tweets) 135 | if not self.olda: 136 | self.corpus = Corpus(tweets, chunk_limit=self.param.LDA_timeWindow) 137 | self.olda = OnlineLDA(self.corpus, K=self.param.LDA_k) 138 | else: 139 | self.olda.fit(tweets) 140 | 141 | res = { 142 | "lda": self.olda.get_lda_info(), 143 | "geo": self.olda.corpus.locations_count, 144 | "hashtags": self.olda.corpus.hashtags_most_common(), 145 | "hashtags_timeline": self.olda.corpus.hashtags_timeline(), 146 | } 147 | print '-------lda complete' 148 | # for topic_id, topic_likelihood, topic_words, topic_tweets in res["lda"]: 149 | # print '{}%\t{}'.format(round(topic_likelihood * 100, 2), topic_words) 150 | # print '\t', topic_tweets 151 | 152 | self.lda_send_conn.send(res) 153 | 154 | def terminate(self): 155 | super(TopicTrends, self).terminate() 156 | self.parent_conn.close() 157 | self.child_conn.close() 158 | 159 | 160 | if __name__ == '__main__': 161 | def main(): 162 | topic_trends = TopicTrendsManager() 163 | while True: 164 | res = topic_trends.get_result() 165 | if res: 166 | print res 167 | else: 168 | print 'None, wait' 169 | time.sleep(10) 170 | 171 | 172 | main() 173 | -------------------------------------------------------------------------------- /topic/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/5. 4 | -------------------------------------------------------------------------------- /topic/models/demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/7. 4 | # just a multiprocessing and threading demo 5 | 6 | import threading 7 | import multiprocessing 8 | import time 9 | 10 | 11 | class Singleton(type): 12 | _instances = {} 13 | 14 | def __call__(cls, *args, **kwargs): 15 | if cls not in cls._instances: 16 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 17 | return cls._instances[cls] 18 | 19 | 20 | class TopicTrendsManager(object): 21 | __metaclass__ = Singleton 22 | 23 | def __init__(self): 24 | self.topics = [] 25 | self.lock = threading.Lock() 26 | self.parent_conn, self.child_conn = multiprocessing.Pipe() 27 | self.topic_trends = TopicTrends(self.child_conn) 28 | self.topic_trends.start() 29 | topic_trends_get = threading.Thread(target=self.receive_lda_result) 30 | topic_trends_get.start() 31 | 32 | def get_result(self): 33 | res = None 34 | 35 | print 'process count', multiprocessing.active_children() 36 | if self.lock.acquire(): 37 | if self.topics: 38 | res = self.topics.pop(0) 39 | self.lock.release() 40 | # self.topic_trends.terminate() 41 | return res 42 | 43 | def receive_lda_result(self): 44 | while True: 45 | res = self.parent_conn.recv() 46 | self.lock.acquire() 47 | self.topics.append(res) 48 | # print 'receive_lda_result', res 49 | self.lock.release() 50 | 51 | 52 | class TopicTrends(multiprocessing.Process): 53 | def __init__(self, lda_send_conn, period=2): 54 | super(TopicTrends, self).__init__() 55 | self.period = period 56 | self.lda_send_conn = lda_send_conn 57 | self.parent_conn, self.child_conn = multiprocessing.Pipe() 58 | 59 | def run(self): 60 | 61 | twitter_stream = TwitterStream(self.child_conn) 62 | twitter_stream_thread = threading.Thread(target=twitter_stream.run) 63 | twitter_stream_thread.setDaemon(True) 64 | twitter_stream_thread.start() 65 | 66 | print ' threading.active_count()', threading.active_count() 67 | # TODO error count > 3 kill 68 | while True: 69 | time.sleep(self.period) 70 | twitter_stream.get() 71 | tweets = self.parent_conn.recv() 72 | t = threading.Thread(target=self.do_some_from_data, args=(tweets,)) 73 | t.setDaemon(True) 74 | t.start() 75 | print 'TopicTrends threading.live : ', list(threading.enumerate()) 76 | 77 | # print sum(tweets) 78 | print 'end' 79 | 80 | def do_some_from_data(self, data): 81 | # DO something from tweets 82 | res = sum(data) 83 | self.lda_send_conn.send(res) 84 | 85 | def terminate(self): 86 | super(TopicTrends, self).terminate() 87 | self.parent_conn.close() 88 | self.child_conn.close() 89 | 90 | 91 | class TwitterStream(object): 92 | def __init__(self, conn): 93 | super(TwitterStream, self).__init__() 94 | self.conn = conn 95 | self.tweets = [] 96 | self.get_data = False 97 | 98 | def run(self): 99 | i = 0 100 | while True: 101 | time.sleep(0.1) 102 | self.tweets.append(i) 103 | i += 1 104 | if self.get_data: 105 | self.get_data = False 106 | self.conn.send(self.tweets) 107 | self.tweets = [] 108 | 109 | def get(self): 110 | self.get_data = True 111 | 112 | 113 | def main(): 114 | topic_trends = TopicTrendsManager() 115 | while True: 116 | res = topic_trends.get_result() 117 | if res: 118 | print res 119 | else: 120 | print 'None, wait' 121 | time.sleep(1) 122 | 123 | 124 | if __name__ == '__main__': 125 | main() 126 | 127 | # time.sleep(5) 128 | # topic_trends.terminate() 129 | -------------------------------------------------------------------------------- /topic/models/stopwords.txt: -------------------------------------------------------------------------------- 1 | ... 2 | .... 3 | 0 4 | 1 5 | 2 6 | 3 7 | 4 8 | 5 9 | 6 10 | 7 11 | 8 12 | 9 13 | a 14 | about 15 | above 16 | accordingly 17 | across 18 | after 19 | afterwards 20 | again 21 | against 22 | al 23 | all 24 | allows 25 | almost 26 | alone 27 | along 28 | already 29 | also 30 | although 31 | always 32 | am 33 | among 34 | amongst 35 | an 36 | and 37 | another 38 | any 39 | anybody 40 | anyhow 41 | anyone 42 | anything 43 | anywhere 44 | apart 45 | appear 46 | appropriate 47 | are 48 | around 49 | as 50 | aside 51 | associated 52 | at 53 | available 54 | away 55 | awfully 56 | b 57 | back 58 | be 59 | became 60 | because 61 | become 62 | becomes 63 | becoming 64 | been 65 | before 66 | beforehand 67 | behind 68 | being 69 | below 70 | beside 71 | besides 72 | best 73 | better 74 | between 75 | beyond 76 | both 77 | brief 78 | but 79 | by 80 | c 81 | came 82 | can 83 | cannot 84 | cant 85 | cause 86 | causes 87 | certain 88 | changes 89 | co 90 | come 91 | consequently 92 | contain 93 | containing 94 | contains 95 | corresponding 96 | could 97 | currently 98 | d 99 | day 100 | described 101 | did 102 | different 103 | do 104 | does 105 | doing 106 | don 107 | done 108 | down 109 | downwards 110 | during 111 | e 112 | each 113 | eg 114 | eight 115 | either 116 | else 117 | elsewhere 118 | enough 119 | eq 120 | et 121 | etc 122 | even 123 | ever 124 | every 125 | everybody 126 | everyone 127 | everything 128 | everywhere 129 | ex 130 | example 131 | except 132 | f 133 | far 134 | few 135 | fifth 136 | first 137 | five 138 | followed 139 | following 140 | for 141 | former 142 | formerly 143 | forth 144 | four 145 | from 146 | further 147 | furthermore 148 | g 149 | get 150 | gets 151 | given 152 | gives 153 | go 154 | gone 155 | good 156 | got 157 | great 158 | h 159 | had 160 | hardly 161 | has 162 | have 163 | having 164 | he 165 | hence 166 | her 167 | here 168 | hereafter 169 | hereby 170 | herein 171 | hereupon 172 | hers 173 | herself 174 | him 175 | himself 176 | his 177 | hither 178 | how 179 | howbeit 180 | however 181 | http 182 | i 183 | ie 184 | if 185 | ignored 186 | immediate 187 | in 188 | inasmuch 189 | inc 190 | indeed 191 | indicate 192 | indicated 193 | indicates 194 | inner 195 | insofar 196 | instead 197 | into 198 | inward 199 | is 200 | it 201 | its 202 | itself 203 | j 204 | just 205 | k 206 | keep 207 | kept 208 | know 209 | l 210 | last 211 | latter 212 | latterly 213 | least 214 | less 215 | lest 216 | let 217 | life 218 | like 219 | little 220 | long 221 | ltd 222 | m 223 | made 224 | make 225 | man 226 | many 227 | may 228 | me 229 | meanwhile 230 | men 231 | might 232 | more 233 | moreover 234 | most 235 | mostly 236 | mr 237 | much 238 | must 239 | my 240 | myself 241 | n 242 | name 243 | namely 244 | near 245 | necessary 246 | neither 247 | never 248 | nevertheless 249 | new 250 | next 251 | nine 252 | no 253 | nobody 254 | none 255 | noone 256 | nor 257 | normally 258 | not 259 | nothing 260 | novel 261 | now 262 | nowhere 263 | o 264 | of 265 | off 266 | often 267 | oh 268 | old 269 | on 270 | once 271 | one 272 | ones 273 | only 274 | onto 275 | or 276 | other 277 | others 278 | otherwise 279 | ought 280 | our 281 | ours 282 | ourselves 283 | out 284 | outside 285 | over 286 | overall 287 | own 288 | p 289 | particular 290 | particularly 291 | people 292 | per 293 | perhaps 294 | placed 295 | please 296 | plus 297 | possible 298 | probably 299 | provides 300 | q 301 | que 302 | quite 303 | r 304 | rather 305 | really 306 | relatively 307 | respectively 308 | right 309 | s 310 | said 311 | same 312 | say 313 | says 314 | second 315 | secondly 316 | see 317 | seem 318 | seemed 319 | seeming 320 | seems 321 | self 322 | selves 323 | sensible 324 | sent 325 | serious 326 | seven 327 | several 328 | shall 329 | she 330 | should 331 | since 332 | six 333 | so 334 | some 335 | somebody 336 | somehow 337 | someone 338 | something 339 | sometime 340 | sometimes 341 | somewhat 342 | somewhere 343 | specified 344 | specify 345 | specifying 346 | state 347 | still 348 | sub 349 | such 350 | sup 351 | t 352 | take 353 | taken 354 | than 355 | that 356 | the 357 | their 358 | theirs 359 | them 360 | themselves 361 | then 362 | thence 363 | there 364 | thereafter 365 | thereby 366 | therefore 367 | therein 368 | thereupon 369 | these 370 | they 371 | third 372 | this 373 | thorough 374 | thoroughly 375 | those 376 | though 377 | three 378 | through 379 | throughout 380 | thru 381 | thus 382 | time 383 | to 384 | together 385 | too 386 | toward 387 | towards 388 | twice 389 | two 390 | u 391 | under 392 | unless 393 | until 394 | unto 395 | up 396 | upon 397 | us 398 | use 399 | used 400 | useful 401 | uses 402 | using 403 | usually 404 | v 405 | value 406 | various 407 | very 408 | via 409 | viz 410 | vs 411 | w 412 | was 413 | way 414 | we 415 | well 416 | went 417 | were 418 | what 419 | whatever 420 | when 421 | whence 422 | whenever 423 | where 424 | whereafter 425 | whereas 426 | whereby 427 | wherein 428 | whereupon 429 | wherever 430 | whether 431 | which 432 | while 433 | whither 434 | who 435 | whoever 436 | whole 437 | whom 438 | whose 439 | why 440 | will 441 | with 442 | within 443 | without 444 | work 445 | world 446 | would 447 | x 448 | y 449 | year 450 | years 451 | yet 452 | you 453 | your 454 | yours 455 | yourself 456 | yourselves 457 | z 458 | zero 459 | re 460 | 're 461 | 'rt 462 | rt 463 | via 464 | retweet 465 | twitter 466 | follow 467 | tweet 468 | amp 469 | n't -------------------------------------------------------------------------------- /topic/urls.py: -------------------------------------------------------------------------------- 1 | """twitterDataMining URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.9/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url 17 | import topic.views 18 | 19 | urlpatterns = [ 20 | url(r'^$', topic.views.index), 21 | url(r'stream_trends$', topic.views.stream_trends), 22 | url(r'stop_trends$', topic.views.stop_trends), 23 | url(r'text$', topic.views.text), 24 | url(r'bubble$', topic.views.bubble), 25 | url(r'treemap$', topic.views.treemap), 26 | url(r'sunburst$', topic.views.sunburst), 27 | url(r'funnel$', topic.views.funnel), 28 | url(r'heatmap$', topic.views.heatmap), 29 | url(r'hashtags_pie$', topic.views.hashtags_pie), 30 | url(r'hashtags_histogram$', topic.views.hashtags_histogram), 31 | url(r'hashtags_timeline$', topic.views.hashtags_timeline) 32 | ] 33 | -------------------------------------------------------------------------------- /topic/views.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Create your views here. 3 | import json 4 | from django.http import HttpResponse 5 | from django.shortcuts import render 6 | from topic.models.TopicTrendsManager import TopicTrendsManager 7 | from topic.models.TopicParameterManager import TopicParameterManager 8 | 9 | 10 | def index(request): 11 | return render(request, 'topic/index.html') 12 | 13 | 14 | # TODO 检查参数的合法性, and change to post method 15 | def stream_trends(request): 16 | param_manager = TopicParameterManager(request.GET.items()) 17 | topic_trends = TopicTrendsManager(param_manager) 18 | res = topic_trends.get_result(param_manager) 19 | return HttpResponse(json.dumps(res), content_type="application/json") 20 | 21 | 22 | def stop_trends(request): 23 | topic_trends = TopicTrendsManager(None) 24 | topic_trends.stop() 25 | res = {"stop": "stop success"} 26 | return HttpResponse(json.dumps(res), content_type="application/json") 27 | 28 | 29 | def text(request): 30 | return render(request, 'topic/visualization/result_text.html') 31 | 32 | 33 | def bubble(request): 34 | return render(request, 'topic/visualization/result_bubble.html') 35 | 36 | 37 | def treemap(request): 38 | return render(request, 'topic/visualization/result_treemap.html') 39 | 40 | 41 | def sunburst(request): 42 | return render(request, 'topic/visualization/result_sunburst.html') 43 | 44 | 45 | def funnel(request): 46 | return render(request, 'topic/visualization/result_funnel.html') 47 | 48 | 49 | def heatmap(request): 50 | return render(request, 'topic/visualization/result_heatmap.html') 51 | 52 | 53 | def hashtags_pie(request): 54 | return render(request, 'topic/visualization/result_hashtags_pie.html') 55 | 56 | 57 | def hashtags_histogram(request): 58 | return render(request, 'topic/visualization/result_hashtags_histogram.html') 59 | 60 | 61 | def hashtags_timeline(request): 62 | return render(request, 'topic/visualization/result_hashtags_timeline.html') -------------------------------------------------------------------------------- /twitterDataMining/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/twitterDataMining/__init__.py -------------------------------------------------------------------------------- /twitterDataMining/model_p/Singleton.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/5/24. 4 | 5 | 6 | class Singleton(type): 7 | _instances = {} 8 | 9 | def __call__(cls, *args, **kwargs): 10 | if cls not in cls._instances: 11 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 12 | return cls._instances[cls] 13 | -------------------------------------------------------------------------------- /twitterDataMining/model_p/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/twitterDataMining/model_p/__init__.py -------------------------------------------------------------------------------- /twitterDataMining/model_p/analyse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/twitterDataMining/model_p/analyse/__init__.py -------------------------------------------------------------------------------- /twitterDataMining/model_p/analyse/hashtag_co_occur.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2015/12/8. 4 | 5 | import pymongo 6 | import time 7 | import datetime 8 | import collections 9 | 10 | if __name__ == '__main__': 11 | 12 | starttime = datetime.datetime.now() 13 | # long running 14 | 15 | lower_bound = str(int(time.mktime(datetime.datetime(2015, 11, 15).timetuple())) * 1000) 16 | upper_bound = str(int(time.mktime(datetime.datetime(2015, 11, 17).timetuple())) * 1000) 17 | client = pymongo.MongoClient() 18 | db = client.twitter 19 | cursor = db.stream.aggregate([ 20 | { 21 | '$match': { 22 | 'timestamp_ms': { 23 | '$gt': lower_bound, 24 | '$lt': upper_bound, 25 | }, 26 | 'entities.hashtags.0': { 27 | '$exists': 'true' 28 | } 29 | } 30 | }, 31 | { 32 | '$project': { 33 | 'entities.hashtags': 1 34 | } 35 | } 36 | ]) 37 | cnt = 0 38 | hashtag_dic = collections.defaultdict(lambda: collections.defaultdict(int), {}) 39 | for tweet in cursor: 40 | cnt += 1 41 | hashtags = tweet['entities']['hashtags'] 42 | hashtags_len = len(hashtags) 43 | for i, name1 in enumerate(hashtags): 44 | for j, name2 in enumerate(hashtags): 45 | if name1 < name2: 46 | hashtag_dic[name1][name2] += 1 47 | elif name1 > name2: 48 | hashtag_dic[name2][name1] += 1 49 | print cnt 50 | res = [] 51 | for name, dics in hashtag_dic.items(): 52 | for name2, cnt in dics.items(): 53 | res.append((name, name2, cnt)) 54 | 55 | print len(res) 56 | hashtag_dic = sorted(res, key=lambda x: x[2], reverse=True) 57 | for i in hashtag_dic[:100]: 58 | print i 59 | 60 | # cursor = db.stream.find({ 61 | # 'timestamp_ms': { 62 | # '$lt': upper_bound, 63 | # '$gt': lower_bound 64 | # } 65 | # }) 66 | # hashtag_cnt = collections.defaultdict(int) 67 | # for tweet in cursor: 68 | # for hashtag in tweet['entities']['hashtags']: 69 | # hashtag_cnt[hashtag] += 1 70 | # hashtag_cnt = sorted(hashtag_cnt.items(), key=lambda x: x[1], reverse=True) 71 | # print hashtag_cnt[:100] 72 | endtime = datetime.datetime.now() 73 | print (endtime - starttime).seconds 74 | -------------------------------------------------------------------------------- /twitterDataMining/model_p/analyse/hashtag_trend.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2015/12/10. 4 | import pymongo 5 | import datetime 6 | 7 | if __name__ == '__main__': 8 | 9 | starttime = datetime.datetime.now() 10 | # long running 11 | 12 | client = pymongo.MongoClient() 13 | db = client.twitter 14 | 15 | cursor = db.stream.aggregate([ 16 | { 17 | '$match': { 18 | 'entities.hashtags': 'MTVStars' 19 | } 20 | }, 21 | { 22 | '$group': { 23 | '_id': { 24 | 'day': {'$dayOfMonth': '$created_at'}, 25 | 'month': {'$month': '$created_at'}, 26 | 'year': {'$year': '$created_at'} 27 | }, 28 | 'cnt': {'$sum': 1}, 29 | } 30 | }, 31 | ]) 32 | 33 | for i , group in enumerate(cursor): 34 | print i,group 35 | 36 | endtime = datetime.datetime.now() 37 | print (endtime - starttime).seconds 38 | -------------------------------------------------------------------------------- /twitterDataMining/model_p/analyse/mongodb_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/1/24. 4 | import pymongo 5 | 6 | 7 | class mongodbTest(object): 8 | def test(self): 9 | client = pymongo.MongoClient() 10 | db = client.twitter 11 | return db.stream.find().limit(10) 12 | -------------------------------------------------------------------------------- /twitterDataMining/model_p/twitterApi/LocalStream.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/18. 4 | import datetime 5 | from Basic import MongoDb 6 | from topic.models.OnlineLDA import chunkize_serial 7 | 8 | 9 | class LocalStream(object): 10 | def __init__(self): 11 | self.db = MongoDb().get_db() 12 | self.tweets = [] 13 | 14 | def stream_data(self, condition, start_date, end_date, collection_name='stream'): 15 | 16 | start = end = None 17 | try: 18 | start = datetime.datetime.strptime(start_date, '%Y-%m-%d') 19 | end = datetime.datetime.strptime(end_date, '%Y-%m-%d') 20 | except Exception, e: 21 | pass 22 | 23 | match = { 24 | '$match': { 25 | 'date': { 26 | } 27 | }} 28 | if start: 29 | match['$match']['date']['$gt'] = start 30 | if end: 31 | match['$match']['date']['$lt'] = end 32 | 33 | pipeline = [] 34 | if start and end: 35 | pipeline.append(match) 36 | pipeline.append({'$sort': {'date': 1}}) 37 | 38 | cursor = self.db[collection_name].aggregate(pipeline) 39 | 40 | if condition.acquire(): 41 | print 'loading local data' 42 | for doc_chunk in chunkize_serial(cursor, 3000, as_numpy=False): 43 | print doc_chunk[0] 44 | self.tweets = doc_chunk 45 | condition.notify() 46 | condition.wait() 47 | 48 | 49 | if __name__ == '__main__': 50 | def main(): 51 | str_date = '2015-11-13' 52 | t = datetime.datetime.strptime(str_date, '%Y-%m-%d') 53 | print t, type(t) 54 | print datetime.datetime(2015, 11, 13), type(datetime.datetime(2015, 11, 13)) 55 | 56 | 57 | main() 58 | -------------------------------------------------------------------------------- /twitterDataMining/model_p/twitterApi/Stream.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/1/25. 4 | import time 5 | import datetime 6 | from Basic import TwitterBasic 7 | import twitter 8 | 9 | 10 | class TwitterStream(TwitterBasic): 11 | def __init__(self, conn=None): 12 | TwitterBasic.__init__(self) 13 | 14 | self.conn = conn 15 | self.tweets = [] 16 | self.get_data = False 17 | 18 | def ready_receive(self): 19 | self.get_data = True 20 | 21 | def stream_data(self, track=None, follow=None, locations=None, save_to_db=False, 22 | collection_name='stream'): 23 | """ 24 | https://dev.twitter.com/streaming/reference/post/statuses/filter 25 | The default access level allows up to 400 track keywords, 5,000 follow userids and 25 0.1-360 degree location boxes. 26 | 27 | :param track: str ; 28 | :param follow:list str ; 29 | :param locations: str ; 30 | :param save_to_db: 31 | :param collection_name: 32 | :return: None 33 | """ 34 | 35 | def location_bounding_box(_locations): 36 | t = _locations.split(',') 37 | res = '' 38 | for i in xrange(0, len(t), 2): 39 | x, y = str(float(t[i]) + 1), str(float(t[i + 1]) + 1) 40 | res += t[i] + ',' + t[i + 1] + ',' + x + ',' + y + ',' 41 | return res 42 | 43 | kwg = {'language': 'en'} 44 | 45 | if not track and not follow and not locations: 46 | kwg['track'] = 'twitter' 47 | 48 | if track: 49 | kwg['track'] = track 50 | 51 | if follow: 52 | kwg['follow'] = follow 53 | 54 | if locations: 55 | kwg['locations'] = location_bounding_box(locations) 56 | 57 | print kwg 58 | 59 | twitter_stream = twitter.TwitterStream(auth=self.twitter_api.auth) 60 | stream = twitter_stream.statuses.filter(**kwg) 61 | 62 | for i, tweet in enumerate(stream): 63 | if not i % 200 and 'text' in tweet: print i, datetime.datetime.now(), ' ', tweet["text"] 64 | tweet = dict(tweet) 65 | if 'id' in tweet: 66 | self.tweets.append(tweet) 67 | 68 | if self.get_data: 69 | self.get_data = False 70 | self.conn.send(self.tweets) 71 | self.tweets = [] 72 | 73 | if save_to_db: 74 | self.save_tweets_to_mongodb(tweet, colname=collection_name) 75 | 76 | 77 | if __name__ == '__main__': 78 | def get_current_time(): 79 | error_time = int(time.time()) # ->这是时间戳 80 | error_time = time.localtime(error_time) 81 | other_style_time = time.strftime("%Y-%m-%d %H:%M:%S", error_time) 82 | return other_style_time 83 | 84 | 85 | t = TwitterStream() 86 | track = None 87 | # locations = u'-122.75,36.8,-73,41,' 88 | # locations = u'-74.05,40.81,-73.05,41.81,-76.99,38.79,-75.99,39.79' 89 | locations = '-74.05,40.81,-76.99,38.79,-118.30,34.23,-122.39,37.96,-122.03,37.37,2.31,48.98,-0.14,51.52,-2.97,53.46,-1.24,51.76,-3.72,40.43,2.17,41.41,-0.38,39.48' 90 | while True: 91 | try: 92 | t.stream_data(track=track, locations=locations, save_to_db=False) 93 | except Exception, e: 94 | with open('error_log.txt', 'a+') as f: 95 | error_info = get_current_time() + ' ' + str(e) + ' \n' 96 | print error_info 97 | f.write(error_info) 98 | -------------------------------------------------------------------------------- /twitterDataMining/model_p/twitterApi/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/1/25. 4 | -------------------------------------------------------------------------------- /twitterDataMining/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/2/5. 4 | import datetime 5 | import pymongo 6 | 7 | 8 | class MongoDb(object): 9 | def __init__(self): 10 | self._client = pymongo.MongoClient() 11 | self.db = self._client.twitter2 12 | 13 | def getDB(self): 14 | return self.db 15 | 16 | 17 | class TimeCost(object): 18 | def __init__(self): 19 | self._start_time = datetime.datetime.now() 20 | 21 | def timecost(self): 22 | print datetime.datetime.now() - self._start_time 23 | -------------------------------------------------------------------------------- /twitterDataMining/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for twitterDataMining project. 3 | 4 | Generated by 'django-admin startproject' using Django 1.9.1. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.9/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/1.9/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/1.9/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | SECRET_KEY = '(k@6fi5c!6ij6p13u8wyf&wy&38wan1dm$^po5n!b!o*nuk)*c' 24 | 25 | # SECURITY WARNING: don't run with debug turned on in production! 26 | DEBUG = True 27 | 28 | ALLOWED_HOSTS = [] 29 | 30 | 31 | # Application definition 32 | 33 | INSTALLED_APPS = [ 34 | 'django.contrib.auth', 35 | 'django.contrib.contenttypes', 36 | 'django.contrib.sessions', 37 | 'django.contrib.messages', 38 | 'django.contrib.staticfiles', 39 | 'network', 40 | 'sentiment', 41 | 'topic', 42 | 'statistic', 43 | 'twitterDataMining' 44 | ] 45 | 46 | MIDDLEWARE_CLASSES = [ 47 | 'django.middleware.security.SecurityMiddleware', 48 | 'django.contrib.sessions.middleware.SessionMiddleware', 49 | 'django.middleware.common.CommonMiddleware', 50 | 'django.middleware.csrf.CsrfViewMiddleware', 51 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 52 | 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', 53 | 'django.contrib.messages.middleware.MessageMiddleware', 54 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 55 | ] 56 | 57 | ROOT_URLCONF = 'twitterDataMining.urls' 58 | 59 | TEMPLATES = [ 60 | { 61 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 62 | 'DIRS': [os.path.join(BASE_DIR, 'templates')] 63 | , 64 | 'APP_DIRS': True, 65 | 'OPTIONS': { 66 | 'context_processors': [ 67 | 'django.template.context_processors.debug', 68 | 'django.template.context_processors.request', 69 | 'django.contrib.auth.context_processors.auth', 70 | 'django.contrib.messages.context_processors.messages', 71 | ], 72 | }, 73 | }, 74 | ] 75 | 76 | WSGI_APPLICATION = 'twitterDataMining.wsgi.application' 77 | 78 | 79 | # Database 80 | # https://docs.djangoproject.com/en/1.9/ref/settings/#databases 81 | 82 | DATABASES = { 83 | # 'default': { 84 | # 'ENGINE': 'django.db.backends.sqlite3', 85 | # 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 86 | # } 87 | } 88 | 89 | 90 | # Password validation 91 | # https://docs.djangoproject.com/en/1.9/ref/settings/#auth-password-validators 92 | 93 | AUTH_PASSWORD_VALIDATORS = [ 94 | { 95 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 96 | }, 97 | { 98 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 99 | }, 100 | { 101 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 102 | }, 103 | { 104 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 105 | }, 106 | ] 107 | 108 | 109 | # Internationalization 110 | # https://docs.djangoproject.com/en/1.9/topics/i18n/ 111 | 112 | LANGUAGE_CODE = 'en-us' 113 | 114 | TIME_ZONE = 'UTC' 115 | 116 | USE_I18N = True 117 | 118 | USE_L10N = True 119 | 120 | USE_TZ = True 121 | 122 | 123 | # Static files (CSS, JavaScript, Images) 124 | # https://docs.djangoproject.com/en/1.9/howto/static-files/ 125 | 126 | STATIC_URL = '/static/' 127 | 128 | STATICFILES_DIRS = ( 129 | os.path.join(BASE_DIR, "static"), 130 | ) -------------------------------------------------------------------------------- /twitterDataMining/templatetags/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/14. 4 | -------------------------------------------------------------------------------- /twitterDataMining/templatetags/mytag.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/4/14. 4 | import re 5 | from django.core.urlresolvers import reverse, NoReverseMatch 6 | from django import template 7 | 8 | register = template.Library() 9 | 10 | 11 | @register.simple_tag(takes_context=True) 12 | def active(context, pattern_or_urlname): 13 | try: 14 | pattern = '^' + reverse(pattern_or_urlname) 15 | except NoReverseMatch: 16 | pattern = pattern_or_urlname 17 | path = context['request'].path 18 | if re.search(pattern, path): 19 | return 'active' 20 | return '' 21 | -------------------------------------------------------------------------------- /twitterDataMining/urls.py: -------------------------------------------------------------------------------- 1 | """twitterDataMining URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.9/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url, include 17 | import twitterDataMining.views 18 | import network.views 19 | 20 | urlpatterns = [ 21 | url(r'^$', twitterDataMining.views.index_page), 22 | url(r'^network/', include('network.urls')), 23 | url(r'^statistic/', include('statistic.urls')), 24 | url(r'^topic/', include('topic.urls')), 25 | url(r'^sentiment/', include('sentiment.urls')), 26 | ] 27 | -------------------------------------------------------------------------------- /twitterDataMining/views.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # Created by hrwhisper on 2016/2/3. 4 | from django.http import HttpResponse 5 | 6 | from django.shortcuts import render 7 | 8 | 9 | def index_page(request): 10 | return render(request, 'index.html') 11 | -------------------------------------------------------------------------------- /twitterDataMining/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for twitterDataMining project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.9/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "twitterDataMining.settings") 15 | 16 | application = get_wsgi_application() 17 | --------------------------------------------------------------------------------