├── .gitignore
├── LICENSE
├── README.md
├── manage.py
├── network
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   └── retweet.py
    ├── urls.py
    └── views.py
├── sentiment
    ├── __init__.py
    ├── models
    │   ├── SentimentJudge.py
    │   ├── SentimentManager.py
    │   ├── __init__.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── dictionary
    │   │   │   ├── acronym.txt
    │   │   │   ├── emoticonsWithPolarity.txt
    │   │   │   ├── intensifier.txt
    │   │   │   └── stopWords.txt
    │   │   ├── lexicon
    │   │   │   ├── AFINN
    │   │   │   │   └── AFINN-111.txt
    │   │   │   ├── LiuBingLexicon
    │   │   │   │   ├── negative-words.txt
    │   │   │   │   └── positive-words.txt
    │   │   │   ├── MPQALexicon
    │   │   │   │   ├── subjclueslen1-HLTEMNLP05.README
    │   │   │   │   └── subjclueslen1-HLTEMNLP05.tff
    │   │   │   ├── NRC-Canada
    │   │   │   │   ├── HashtagSentimentAffLexNegLex
    │   │   │   │   │   ├── HS-AFFLEX-NEGLEX-bigrams.txt
    │   │   │   │   │   ├── HS-AFFLEX-NEGLEX-unigrams.txt
    │   │   │   │   │   └── readme.txt
    │   │   │   │   ├── NRC-Emotion-Lexicon-v0.92
    │   │   │   │   │   ├── NRC-Hashtag-Emotion-Lexicon-v0.2.txt
    │   │   │   │   │   ├── NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt
    │   │   │   │   │   └── README.txt
    │   │   │   │   ├── NRC-Hashtag-Sentiment-Lexicon-v0.1
    │   │   │   │   │   ├── README
    │   │   │   │   │   ├── bigrams-pmilexicon.txt
    │   │   │   │   │   ├── pairs-pmilexicon.txt
    │   │   │   │   │   ├── sentimenthashtags.txt
    │   │   │   │   │   └── unigrams-pmilexicon.txt
    │   │   │   │   ├── Sentiment140-Lexicon-v0.1
    │   │   │   │   │   ├── README
    │   │   │   │   │   ├── bigrams-pmilexicon.txt
    │   │   │   │   │   ├── pairs-pmilexicon.txt
    │   │   │   │   │   └── unigrams-pmilexicon.txt
    │   │   │   │   └── Sentiment140AffLexNegLex
    │   │   │   │   │   ├── S140-AFFLEX-NEGLEX-bigrams.txt
    │   │   │   │   │   ├── S140-AFFLEX-NEGLEX-unigrams.txt
    │   │   │   │   │   └── readme.txt
    │   │   │   └── PosNegWords
    │   │   │   │   ├── neg_mod.txt
    │   │   │   │   └── pos_mod.txt
    │   │   ├── test
    │   │   │   ├── 2013-test-sms.tsv
    │   │   │   ├── 2013-test-sms.tsv_pos
    │   │   │   ├── 2013-test-tweet.tsv
    │   │   │   ├── 2013-test-tweet.tsv_pos
    │   │   │   ├── 2014-test-data-all.tsv
    │   │   │   ├── 2014-test-journal.tsv
    │   │   │   ├── 2014-test-journal.tsv_pos
    │   │   │   ├── 2014-test-sarcasm.tsv
    │   │   │   ├── 2014-test-sarcasm.tsv_pos
    │   │   │   ├── 2014-test-tweet.tsv
    │   │   │   └── 2014-test-tweet.tsv_pos
    │   │   └── train
    │   │   │   ├── 2013-dev-data.tsv
    │   │   │   ├── 2013-dev-data.tsv_pos
    │   │   │   ├── 2013-train-data.tsv
    │   │   │   └── 2013-train-data.tsv_pos
    │   ├── models_save
    │   │   ├── classifier
    │   │   ├── classifier_01.npy
    │   │   ├── classifier_02.npy
    │   │   ├── classifier_03.npy
    │   │   ├── classifier_04.npy
    │   │   ├── lexicon
    │   │   └── ngrams
    │   ├── test_save_model.py
    │   └── tools
    │   │   ├── Lexicon.py
    │   │   ├── Lexicon2.py
    │   │   ├── __init__.py
    │   │   ├── ark-tweet-nlp-0.3.2.jar
    │   │   ├── pre_process.py
    │   │   ├── pre_process2.py
    │   │   └── read_data.py
    ├── urls.py
    └── views.py
├── static
    ├── __init__.py
    ├── css
    │   ├── bootstrap-switch.min.css
    │   ├── bootstrap-theme.css
    │   ├── bootstrap-theme.css.map
    │   ├── bootstrap-theme.min.css
    │   ├── bootstrap-theme.min.css.map
    │   ├── bootstrap.css
    │   ├── bootstrap.css.map
    │   ├── bootstrap.min.css
    │   ├── bootstrap.min.css.map
    │   ├── font-awesome.min.css
    │   └── style.css
    ├── fonts
    │   ├── FontAwesome.otf
    │   ├── fontawesome-webfont.eot
    │   ├── fontawesome-webfont.svg
    │   ├── fontawesome-webfont.ttf
    │   ├── fontawesome-webfont.woff
    │   ├── fontawesome-webfont.woff2
    │   ├── glyphicons-halflings-regular.eot
    │   ├── glyphicons-halflings-regular.svg
    │   ├── glyphicons-halflings-regular.ttf
    │   ├── glyphicons-halflings-regular.woff
    │   └── glyphicons-halflings-regular.woff2
    ├── image
    │   ├── others.jpg
    │   ├── sentiment_1.gif
    │   ├── sentiment_2.jpg
    │   ├── sentiment_3.jpg
    │   ├── topic_1.jpg
    │   ├── topic_3.jpg
    │   ├── topic_4.jpg
    │   └── topic_8.jpg
    ├── js
    │   ├── append_new_iframe.js
    │   ├── bootstrap-switch.min.js
    │   ├── bootstrap.js
    │   ├── bootstrap.min.js
    │   ├── d3.v3.min.js
    │   ├── echarts.min.js
    │   ├── googleMap.js
    │   ├── index.js
    │   ├── jquery.min.js
    │   ├── loading-control.js
    │   ├── npm.js
    │   ├── sentiment.js
    │   ├── spin.min.js
    │   └── topic.js
    └── json
    │   └── world.json
├── statistic
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── pie.py
    │   └── timeline.py
    ├── urls.py
    └── views.py
├── templates
    ├── basic.html
    ├── footer.html
    ├── header.html
    ├── index.html
    ├── index2.html
    ├── nav.html
    ├── network
    │   ├── network.js
    │   └── retweet.html
    ├── sentiment
    │   ├── header.html
    │   └── index.html
    ├── statistic
    │   ├── hashtag_compare.html
    │   ├── hashtag_timeline.html
    │   └── pie.html
    └── topic
    │   ├── header.html
    │   ├── index.html
    │   ├── parameters_panel.html
    │   ├── toolbar.html
    │   └── visualization
    │       ├── result_basic.html
    │       ├── result_bubble.html
    │       ├── result_funnel.html
    │       ├── result_hashtags_histogram.html
    │       ├── result_hashtags_pie.html
    │       ├── result_hashtags_timeline.html
    │       ├── result_heatmap.html
    │       ├── result_sunburst.html
    │       ├── result_text.html
    │       └── result_treemap.html
├── topic
    ├── __init__.py
    ├── models
    │   ├── Corpus.py
    │   ├── Lda_text_format.py
    │   ├── OnlineLDA.py
    │   ├── TopicParameterManager.py
    │   ├── TopicTrendsManager.py
    │   ├── __init__.py
    │   ├── demo.py
    │   └── stopwords.txt
    ├── urls.py
    └── views.py
└── twitterDataMining
    ├── __init__.py
    ├── model_p
        ├── Singleton.py
        ├── __init__.py
        ├── analyse
        │   ├── __init__.py
        │   ├── hashtag_co_occur.py
        │   ├── hashtag_trend.py
        │   └── mongodb_test.py
        └── twitterApi
        │   ├── Basic.py
        │   ├── LocalStream.py
        │   ├── Rest.py
        │   ├── Stream.py
        │   ├── __init__.py
        │   └── error_log.txt
    ├── models.py
    ├── settings.py
    ├── templatetags
        ├── __init__.py
        └── mytag.py
    ├── urls.py
    ├── views.py
    └── wsgi.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.pyc
3 | *~
4 | *.swp


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # twitterDataMining
 2 | - 实时热点话题检测
 3 | - 情感分析
 4 | - 结果可视化
 5 | 
 6 | ## 一些说明
 7 | - 使用Python 2.7
 8 | - Topic-analysis :	WOLDA
 9 | - Sentiment-analysis :	Maximum Entropy
10 | - Visualization :	D3 | Echarts
11 | - WEB :	Django | MongoDB | Bootstrap
12 | - 更多详情见：https://www.hrwhisper.me/twitter-data-mining-and-visualization/
13 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | if __name__ == "__main__":
 6 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "twitterDataMining.settings")
 7 | 
 8 |     from django.core.management import execute_from_command_line
 9 | 
10 |     execute_from_command_line(sys.argv)
11 | 


--------------------------------------------------------------------------------
/network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/network/__init__.py


--------------------------------------------------------------------------------
/network/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/2/5.
4 | 
5 | 


--------------------------------------------------------------------------------
/network/models/retweet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/2/5.
 4 | 
 5 | from twitterDataMining.models import MongoDb, TimeCost
 6 | 
 7 | 
 8 | def get_retweet_data_by_time(time):
 9 |     db = MongoDb().getDB()
10 |     field = {
11 |         'id': 1,
12 |         'text': 1,
13 |         'user_id': 1,
14 |         'retweet_id': 1,
15 |         'retweet_count': 1,
16 |         'user_mentions': 1,
17 |     }
18 |     cursor = db.stream.aggregate([
19 |         {'$match': {'hashtags': 'Christmas'}},
20 |         {'$sort': {'retweet_count': -1}},
21 |         {'$limit': 10},
22 |         # {'$project': field}
23 |     ], )
24 |     tweets = [tweet for tweet in cursor]
25 |     cursor = db.stream.aggregate([
26 |         {'$match': {'retweet_id': {'$in': [tweet['id'] for tweet in tweets]}}},
27 |         # {'$project': field}
28 |     ], )
29 |     tweets += [tweet for tweet in cursor]
30 |     return tweets
31 | 
32 | 
33 | def get_retweet_network_nodes_and_links(date='2015-11-22'):
34 |     tweets = get_retweet_data_by_time(date)
35 |     # user = set([tweet['user_id'] for tweet in tweets] + [tweet['user_memtions'][0]['id_str'] for tweet in tweets if
36 |     #                                                         'user_memtions' in tweet])
37 |     # user_num = {_id: i for i, _id in enumerate(user)}
38 |     # nodes = [{"id": node} for node in user]
39 |     # links = [{
40 |     #              'source': user_num[tweet['user_id']],
41 |     #              'target':
42 |     #                  user_num[tweet['user_memtions']['id_str']]
43 |     #                  if 'user_memtions' in tweet and tweet['user_memtions']['id_str'] in user_tweet_id_num else
44 |     #                  id_num[tweet.get('retweet_id', 0)]
45 |     #          }
46 |     #          for tweet in tweets]
47 |     # print tweets
48 |     tweets_id = set(
49 |         [tweet['id'] for tweet in tweets] + [tweet.get('retweet_id', 0) for tweet in tweets])
50 |     id_num = {_id: i for i, _id in enumerate(tweets_id)}
51 |     user_tweet_id_num = {tweet['user_id']: id_num[tweet['id']] for tweet in tweets}
52 |     nodes = [{"id": node} for node in tweets_id]
53 |     links = [{
54 |                  'source': id_num[tweet['id']],
55 |                  'target':
56 |                      user_tweet_id_num[tweet['user_mentions'][0]['id_str']]
57 |                      if 'user_mentions' in tweet and tweet['user_mentions'][0]['id_str'] in user_tweet_id_num else
58 |                      id_num[tweet.get('retweet_id', 0)]
59 |              }
60 |              for tweet in tweets]
61 | 
62 |     return {'nodes': nodes, 'links': links}
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     get_retweet_network_nodes_and_links("")
67 | 


--------------------------------------------------------------------------------
/network/urls.py:
--------------------------------------------------------------------------------
 1 | """twitterDataMining URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/1.9/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.conf.urls import url, include
14 |     2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url
17 | import network.views
18 | 
19 | urlpatterns = [
20 |     url(r'retweet$', network.views.retweet),
21 |     url(r'retweet_data$', network.views.retweet_data),
22 | ]
23 | 


--------------------------------------------------------------------------------
/network/views.py:
--------------------------------------------------------------------------------
 1 | # Create your views here.
 2 | import json
 3 | from django.http import HttpResponse
 4 | from django.shortcuts import render
 5 | from network.models.retweet import get_retweet_network_nodes_and_links
 6 | 
 7 | 
 8 | def retweet(request):
 9 |     res = {'date': request.GET.get('date')}
10 |     return render(request, 'network/retweet.html', res)
11 | 
12 | 
13 | def retweet_data(request):
14 |     res = get_retweet_network_nodes_and_links("")
15 |     return HttpResponse(json.dumps(res), content_type="application/json")
16 | 


--------------------------------------------------------------------------------
/sentiment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/__init__.py


--------------------------------------------------------------------------------
/sentiment/models/SentimentJudge.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/5/23.
 4 | import codecs
 5 | 
 6 | from scipy.sparse import csr_matrix
 7 | from sklearn import metrics
 8 | from sklearn.externals import joblib
 9 | from sentiment.models.tools.pre_process import pre_process, pos_process
10 | from sentiment.models.tools.read_data import read_all_test_data
11 | from twitterDataMining.model_p.Singleton import Singleton
12 | 
13 | 
14 | class SentimentJudge(object):
15 |     """
16 |         Simple example:
17 |             s = SentimentJudge()
18 |             test_data = s.transform(_test_data)
19 |             predicted = s.predict(test_data)
20 |             print np.sum(predicted == _test_target), len(_test_target), np.mean(predicted == _test_target)
21 |     """
22 |     __metaclass__ = Singleton
23 | 
24 |     def __init__(self):
25 |         self.classifier = joblib.load('sentiment/models/models_save/classifier')
26 |         self.ngram = joblib.load('sentiment/models/models_save/ngrams')
27 |         self.lexicon = joblib.load('sentiment/models/models_save/lexicon')
28 | 
29 |     def predict(self, X):
30 |         """
31 |             Predict X is positive or negative
32 |         :param X:
33 |         :return: a numpy.ndarray. each row with "positive" or "negative"
34 |         """
35 |         return self.classifier.predict(X)
36 | 
37 |     def transform(self, data, pos_tags=None):
38 |         if pos_tags is None:
39 |             data, pos_tags = pos_process(data)
40 |             print len(data)
41 |         return pre_process(data, pos_tags, self.lexicon, self.ngram)
42 | 
43 | 
44 | def main():
45 |     clf = SentimentJudge()
46 |     tweets, target = [], []
47 |     with codecs.open('./data/test/2014-test-journal.tsv', "r", "utf-8") as f:
48 |         for line in f.readlines():
49 |             line = line.strip().split("\t")
50 |             target.append(line[1])
51 |             tweets.append(line[2])
52 | 
53 |     test_feature = clf.transform(tweets)
54 |     predicted = clf.predict(test_feature)
55 |     print "Classification report for  %s:\n%s\n" % (clf,
56 |                                                     metrics.classification_report(target, predicted, digits=3))
57 |     print("Confusion matrix:\n%s" % metrics.confusion_matrix(target, predicted))
58 | 
59 | 
60 |     # for name, test_data, test_target, test_pos in read_all_test_data():
61 |     #     print '\n\n\n\n\n--------Now is {} --------\n\n'.format(name)
62 |     #     test_feature = clf.transform(test_data, test_pos)
63 |     #     predicted = clf.predict(test_feature)
64 |     #     print "Classification report for  %s:\n%s\n" % (clf,
65 |     #                                                     metrics.classification_report(test_target, predicted, digits=3))
66 |     #     print("Confusion matrix:\n%s" % metrics.confusion_matrix(test_target, predicted))
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/sentiment/models/SentimentManager.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/4/24.
 4 | 
 5 | import numpy as np
 6 | from sentiment.models.SentimentJudge import SentimentJudge
 7 | from twitterDataMining.model_p.twitterApi.Rest import TwitterRest
 8 | 
 9 | 
10 | def get_result_info(predicted, target, tweets, total_tweet, return_sample_tweets_nums):
11 |     """
12 |     :param predicted:
13 |     :param target:
14 |     :param tweets: np.array  [str,str]
15 |     :param total_tweet:
16 |     :param return_sample_tweets_nums:
17 |     :return:
18 |     """
19 |     c = predicted == target
20 |     count = np.count_nonzero(c)
21 |     percent = count * 1.0 / total_tweet
22 |     c = tweets[c].tolist()
23 |     text = sorted(c, cmp=lambda x, y: len(y) - len(x))[:return_sample_tweets_nums]
24 |     return percent, text
25 | 
26 | 
27 | def query_sentiment_for_online_data(query_str, max_tweets=200, return_sample_tweets_nums=10):
28 |     twitter_rest = TwitterRest()
29 |     tweets = twitter_rest.search_tweets(q=query_str.encode('utf-8'), max_results=max_tweets)
30 |     tweets = list(set(map(lambda x: x['text'], list(filter(lambda x: 'text' in x, tweets)))))
31 |     print 'test_data len: {} by query string: {}'.format(len(tweets), query_str)
32 | 
33 |     s = SentimentJudge()
34 |     test_data = s.transform(tweets)
35 |     predicted = s.predict(test_data)
36 | 
37 |     _class = ['positive', 'negative', 'neutral']
38 |     total_tweets = test_data.shape[0]
39 |     tweets = np.array(tweets)[:total_tweets]
40 |     res = {}
41 |     for target in _class:
42 |         percent, text = get_result_info(predicted, target, tweets, total_tweets, return_sample_tweets_nums)
43 |         res[target] = {
44 |             'percent': percent,
45 |             'text': text
46 |         }
47 |     return res
48 |     # return total_positive, total_tweets, positive_percentage, positive_text, negative_text
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     while True:
53 |         query_str = raw_input('please input the content your want to query:\n')
54 |         print 'wait...'
55 |         query_sentiment_for_online_data(query_str)
56 | 


--------------------------------------------------------------------------------
/sentiment/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/5/23.
4 | 


--------------------------------------------------------------------------------
/sentiment/models/data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/5/2.
4 | 


--------------------------------------------------------------------------------
/sentiment/models/data/dictionary/emoticonsWithPolarity.txt:
--------------------------------------------------------------------------------
 1 | :-) :) :o) :] :3 :c) :> =] 8) =) :} :^) Positive
 2 | :D C: (cl Extremely-Positive
 3 | :-D :D 8D xD XD =D =3 <=3 <=8 Extremely-Positive
 4 | 8===D 8===B Negative
 5 | --!-- Negative
 6 | :-( :( :c :< :[ :{ Negative
 7 | D: D8 D; D= DX v.v Dx Extremely-Negative
 8 | :-9 Negative
 9 | ;-) ;) *) ;] ;D Positive
10 | :-P :P XP :-p :p =p xP :-b :b Positive
11 | :-O :O O_O o_o OwO O-O 0_o O_o O3O o0o ;o_o; o...o 0w0 O.o Positive
12 | c.c C.C :c Negative
13 | :-/ :/ :\ =/ =\ :S Negative
14 | :| Neutral
15 | d:-) qB-) Positive
16 | :)~ :-)>.... Neutral
17 | :-X :X :-# :# :-x :x Positive
18 | O:-) 0:3 O:) Negative
19 | :'( ;*( T_T TT_TT T.T Q.Q Q_Q ;_; Negative
20 | :-* :* Positive
21 | ^o) Negative
22 | >:) >;) >:-) >:] Neutral
23 | B) B-) 8) 8-) Neutral
24 | ^>.>^ ^<.<^ ^>_>^ ^<_<^ Negative
25 | D:< >:( >:- D-:> >:-( :-@[1] ;( `_' D< Negative
26 | ;3 <3 <33 <333 Positive
27 | </3	Negative
28 | =^_^= =>.>= =<_<= =>.<= >_< Positive
29 | \,,/ \m/ Extremely-Positive
30 | \m/\>.</\m/ Extremely-Positive
31 | \o/ Extremely-Positive
32 | \o o/ Positive
33 | d'-' d'_' d'-'b b'_'b Positive
34 | o/\o Positive
35 | :& Extremely-Negative
36 | :u Neutral
37 | @}-;-'--- Positive
38 | 3:00 Positive
39 | =]:-)= Neutral
40 | d^_^b d-_-b Positive
41 | (^_^) ^_^ (^-^) ^-^ (^=^) ^=^ (^.^) ^.^ Positive
42 | (~_^) (^_~) ~.^ ^.~ Positive
43 | (>.<) >.< (>.<) >.< Negative
44 | (>_>) >_> (<_<) <_< (>.>) >.> Negative
45 | (-_-) -_- -__- -___- (-.-) -.- -.- .___. Negative
46 | (^o^) ^o^ Positive
47 | (^3^) ^3^ Positive
48 | (^_^') ^_^_^') ^^" ^^^_.^') ^^_^^; ^&^^.^;& ^^^; ^^^7 Negative
49 | d(>w<)b Extremely-Positive
50 | q(;^;)p	Extremely-Negative
51 | 9(x.x)9 (;.;)9 Negative
52 | (._.) (,.,) Negative
53 | [(-_-)] ZZzzz... Neutral
54 | (X_X) x_x Negative
55 | ^///^ >///< >///> o///o -///- =///= Positive
56 | _|_ (-.-) _|_ t(>.<t) Negative
57 | (V)!_!(V) Neutral
58 | 0v0 Extremely-Positive
59 | \m/*.*\m/ Extremely-Positive
60 | O?O Negative
61 | &_& Positive
62 | 0-0 Positive
63 | (^^^) Negative
64 | (\_/) Positive
65 | B) Positive
66 | B(	Negative
67 | X3 Extremely-Positive
68 | :3 Positive
69 | x_O O_x Negative
70 | O|-|_ orz Positive
71 | m(_-_)m	Positive
72 | /\_/\ Neutral
73 | /\ Positive
74 | _/\_ Positive
75 | _/-\_ Positive
76 | /\/\ Positive
77 | :) :-) :] :d :p ;) ;-) ;] ;p Positive
78 | :( :-( Negative


--------------------------------------------------------------------------------
/sentiment/models/data/dictionary/intensifier.txt:
--------------------------------------------------------------------------------
 1 | so
 2 | fucking
 3 | wicked
 4 | too
 5 | hella
 6 | bare
 7 | very
 8 | most
 9 | rather
10 | really
11 | precious
12 | somewhat
13 | awful
14 | quite
15 | fully
16 | bloody
17 | real
18 | dead
19 | remarkably
20 | super
21 | dreadfully
22 | terribly
23 | veritable
24 | extremely
25 | moderately


--------------------------------------------------------------------------------
/sentiment/models/data/dictionary/stopWords.txt:
--------------------------------------------------------------------------------
  1 | a
  2 | about
  3 | again
  4 | am
  5 | an
  6 | and
  7 | are
  8 | as
  9 | at
 10 | be
 11 | because
 12 | been
 13 | before
 14 | being
 15 | between
 16 | both
 17 | by
 18 | could
 19 | did
 20 | do
 21 | does
 22 | doing
 23 | during
 24 | each
 25 | for
 26 | from
 27 | had
 28 | has
 29 | have
 30 | having
 31 | he
 32 | he'd
 33 | he'll
 34 | he's
 35 | her
 36 | here
 37 | here's
 38 | hers
 39 | herself
 40 | him
 41 | himself
 42 | his
 43 | how
 44 | how's
 45 | i
 46 | i'd
 47 | i'll
 48 | i'm
 49 | i've
 50 | if
 51 | in
 52 | into
 53 | is
 54 | it
 55 | it's
 56 | its
 57 | itself
 58 | let's
 59 | me
 60 | my
 61 | myself
 62 | of
 63 | off
 64 | on
 65 | once
 66 | or
 67 | other
 68 | ought
 69 | our
 70 | ours
 71 | ourselves
 72 | own
 73 | she
 74 | she'd
 75 | she'll
 76 | she's
 77 | should
 78 | that
 79 | that's
 80 | the
 81 | their
 82 | theirs
 83 | them
 84 | themselves
 85 | then
 86 | there
 87 | there's
 88 | these
 89 | they
 90 | they'd
 91 | they'll
 92 | they're
 93 | they've
 94 | this
 95 | those
 96 | was
 97 | we
 98 | we'd
 99 | we'll
100 | we're
101 | we've
102 | were
103 | what
104 | what's
105 | when
106 | when's
107 | where
108 | where's
109 | which
110 | while
111 | who
112 | who's
113 | whom
114 | why
115 | why's
116 | with
117 | would
118 | you
119 | you'd
120 | you'll
121 | you're
122 | you've
123 | your
124 | yours
125 | yourself
126 | yourselves
127 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/MPQALexicon/subjclueslen1-HLTEMNLP05.README:
--------------------------------------------------------------------------------
 1 | The subjectivity clues in the file subjclueslen1-HLTEMNLP05.tff were used 
 2 | in the work presented in:
 3 | 
 4 | Theresa Wilson, Janyce Wiebe and Paul Hoffmann (2005). Recognizing Contextual 
 5 | Polarity in Phrase-Level Sentiment Analysis. Proceedings of HLT/EMNLP 2005,
 6 | Vancouver, Canada.
 7 | 
 8 | ----------------------------------------------------------------------------
 9 | 
10 | I. Where the subjectivity clues are from
11 | 
12 | The clues in this file were collected from a number of sources.
13 | Some were culled from manually developed resources.  Others were 
14 | identified automatically using both annotated and unannotated data.
15 | A majority of the clues were collected as part of the work reported 
16 | in (Riloff and Wiebe, 2003).
17 | 
18 | II. Format of the clues
19 | 
20 | Each line in the file contains one subjectivity clue.  Below is an example:
21 | 
22 | type=strongsubj len=1 word1=abuse pos1=verb stemmed1=y priorpolarity=negative
23 | 
24 | a. type - either strongsubj or weaksubj  
25 | 	A clue that is subjective in most context is considered strongly 
26 | 	subjective (strongsubj), and those that may only have certain 
27 | 	subjective usages are considered weakly subjective (weaksubj).
28 | 
29 | b. len - length of the clue in words  
30 | 	All clues in this file are single words.
31 | 
32 | c. word1 - token or stem of the clue
33 | 
34 | d. pos1 - part of speech of the clue, may be anypos (any part of speech)
35 | 
36 | e. stemmed1 - y (yes) or n (no)
37 | 	Is the clue word1 stemmed?  If stemmed1=y, this means that the
38 | 	clue should match all unstemmed variants of the word with the
39 | 	corresponding part of speech.  For example, "abuse", above, will
40 | 	match "abuses" (verb), "abused" (verb), "abusing" (verb), but not
41 | 	"abuse" (noun) or "abuses" (noun).
42 | 
43 | f. priorpolarity - positive, negative, both, neutral
44 | 	The prior polarity of the clue.  Out of context, does the
45 | 	clue seem to evoke something positive or something negative.
46 | 
47 | ----------------------------------------------------------------------
48 | 
49 | Riloff and Wiebe (2003). Learning extraction patterns for subjective
50 | expressions. EMNLP-2003.
51 | 
52 | ----------------------------------------------------------------------
53 | 
54 | Theresa Wilson
55 | 11/16/2005
56 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/HashtagSentimentAffLexNegLex/HS-AFFLEX-NEGLEX-bigrams.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/HashtagSentimentAffLexNegLex/HS-AFFLEX-NEGLEX-bigrams.txt


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/HashtagSentimentAffLexNegLex/readme.txt:
--------------------------------------------------------------------------------
 1 | NRC Hashtag Affirmative Context Sentiment Lexicon and NRC Hashtag Negated Context Sentiment Lexicon 
 2 | Version 1.0
 3 | 26 September 2014
 4 | Copyright (C) 2014 National Research Council Canada (NRC)
 5 | Contact: Saif Mohammad (saif.mohammad@nrc-cnrc.gc.ca)
 6 | 
 7 | **********************************************
 8 | TERMS OF USE
 9 | **********************************************
10 | 
11 | 1. This lexicon can be used freely for research purposes. 
12 | 2. The papers listed below provide details of the creation and use of 
13 |    the lexicon. If you use a lexicon, then please cite the associated 
14 |    papers:
15 | 	Kiritchenko, S., Zhu, X., Mohammad, S. (2014). Sentiment Analysis 
16 | 	of Short Informal Texts. Journal of Artificial Intelligence Research, 
17 | 	50:723-762, 2014.
18 | 3. If interested in commercial use of the lexicon, send email to the 
19 |    contact. 
20 | 4. If you use the lexicon in a product or application, then please 
21 |    credit the authors and NRC appropriately. Also, if you send us an 
22 |    email, we will be thrilled to know about how you have used the 
23 |    lexicon.
24 | 5. National Research Council Canada (NRC) disclaims any responsibility 
25 |    for the use of the lexicon and does not provide technical support. 
26 |    However, the contact listed above will be happy to respond to 
27 |    queries and clarifications.
28 | 6. Rather than redistributing the data, please direct interested 
29 |    parties to this page:
30 |    http://www.purl.com/net/lexicons 
31 | 
32 | Please feel free to send us an email:
33 | - with feedback regarding the lexicon. 
34 | - with information on how you have used the lexicon. 
35 | - if interested in having us analyze your data for sentiment, emotion, 
36 |   and other affectual information.
37 | - if interested in a collaborative research project.
38 | 
39 | **********************************************
40 | DATA SOURCE
41 | **********************************************
42 | 
43 | The NRC HashtagSentiment Lexicons are automatically generated from the following data source: 
44 | 775,000 tweets with sentiment-word hashtags collected by NRC.
45 | 
46 | 
47 | **********************************************
48 | FILE FORMAT
49 | **********************************************
50 | 
51 | Each line in the lexicons has the following format:
52 | <term><tab><score><tab><Npos><tab><Nneg>
53 | 
54 | <term> can be a unigram or a bigram;
55 | <score> is a real-valued sentiment score: score = PMI(w, pos) - PMI(w, neg), where PMI stands for Point-wise Mutual Information between a term w and the positive/negative class;
56 | <Npos> is the number of times the term appears in the positive class, ie. in tweets with positive hashtag or emoticon;
57 | <Nneg> is the number of times the term appears in the negative class, ie. in tweets with negative hashtag or emoticon.
58 | 
59 | 
60 | **********************************************
61 | AffLex and NegLex
62 | **********************************************
63 | 
64 | Both parts, AffLex and NegLex, of each lexicon are contained in the same file. The NegLex entries have suffixes '_NEG' or '_NEGFIRST'.
65 | 
66 | In the unigram lexicon:
67 | '_NEGFIRST' is attached to terms that directly follow a negator;
68 | '_NEG' is attached to all other terms in negated contexts (not directly following a negator).
69 | 
70 | In the bigram lexicon:
71 | '_NEG' is attached to all terms in negated contexts.
72 | 
73 | Both suffixes are attached only to nouns, verbs, adjectives, and adverbs. All other parts of speech do not get these suffixes attached. 
74 | 
75 | 
76 | **********************************************
77 | More Information
78 | **********************************************
79 | Details on the process of creating the lexicons can be found in:
80 | Kiritchenko, S., Zhu, X., Mohammad, S. (2014). Sentiment Analysis of Short Informal Texts.  Journal of Artificial Intelligence Research, 50:723-762, 2014.
81 | 
82 |  
83 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/NRC-Emotion-Lexicon-v0.92/README.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | NRC Word-Emotion Association Lexicon
  3 | (NRC Emotion Lexicon)
  4 | Version 0.92
  5 | 10 July 2011
  6 | Copyright (C) 2011 National Research Council Canada (NRC)
  7 | Contact: Saif Mohammad (saif.mohammad@nrc-cnrc.gc.ca)
  8 | 
  9 | 1. This copy of the NRC Emotion Lexicon is to be used for research
 10 | purposes only.  Please contact NRC if interested in a commercial
 11 | license.
 12 | 
 13 | 2. If you use this lexicon in your research, then please cite
 14 | at least one of the papers listed below in the PUBLICATIONS section
 15 | (preferably the journal paper in Computational Intelligence).
 16 | 
 17 | .......................................................................
 18 | 
 19 | NRC EMOTION LEXICON
 20 | -------------------
 21 | The NRC emotion lexicon is a list of words and their associations with
 22 | eight emotions (anger, fear, anticipation, trust, surprise, sadness,
 23 | joy, and disgust) and two sentiments (negative and positive). The
 24 | annotations were manually done through Amazon's Mechanical Turk. Refer
 25 | to publications below for more details.
 26 | 
 27 | .......................................................................
 28 | 
 29 | PUBLICATIONS
 30 | ------------
 31 | Details of the lexicon can be found in the following peer-reviewed
 32 | publications:
 33 | 
 34 | -- Crowdsourcing a Word-Emotion Association Lexicon, Saif Mohammad and
 35 | Peter Turney, Computational Intelligence, 39(3), 555-590, 2013.
 36 |  	 
 37 | -- Tracking Sentiment in Mail: How Genders Differ on Emotional Axes,
 38 | Saif Mohammad and Tony Yang, In Proceedings of the ACL 2011 Workshop
 39 | on ACL 2011 Workshop on Computational Approaches to Subjectivity and
 40 | Sentiment Analysis (WASSA), June 2011, Portland, OR.  Paper (pdf)
 41 |  	 
 42 | -- From Once Upon a Time to Happily Ever After: Tracking Emotions in
 43 | Novels and Fairy Tales, Saif Mohammad, In Proceedings of the ACL 2011
 44 | Workshop on Language Technology for Cultural Heritage, Social
 45 | Sciences, and Humanities (LaTeCH), June 2011, Portland, OR.  Paper
 46 |  	 
 47 | -- Emotions Evoked by Common Words and Phrases: Using Mechanical Turk
 48 | to Create an Emotion Lexicon", Saif Mohammad and Peter Turney, In
 49 | Proceedings of the NAACL-HLT 2010 Workshop on Computational Approaches
 50 | to Analysis and Generation of Emotion in Text, June 2010, LA,
 51 | California.
 52 | 
 53 | Links to the papers are available here:
 54 | http://www.purl.org/net/NRCemotionlexicon
 55 | .......................................................................
 56 | 
 57 | VERSION INFORMATION
 58 | -------------------
 59 | Version 0.92 is the latest version as of 10 July 2011.  This version
 60 | has annotations for more than twice as many terms as in Version 0.5
 61 | which was released earlier.
 62 | 
 63 | .......................................................................
 64 | 
 65 | FORMAT
 66 | ------
 67 | Each line has the following format:
 68 | TargetWord<tab>AffectCategory<tab>AssociationFlag
 69 | 
 70 | TargetWord is a word for which emotion associations are provided.
 71 | 
 72 | AffectCategory is one of eight emotions (anger, fear, anticipation,
 73 | trust, surprise, sadness, joy, or disgust) or one of two polarities
 74 | (negative or positive).
 75 | 
 76 | AssociationFlag has one of two possible values: 0 or 1.  0 indicates
 77 | that the target word has no association with affect category,
 78 | whereas 1 indicates an association.
 79 | 
 80 | .......................................................................
 81 | 
 82 | OTHER FORMS OF THE LEXICON
 83 | --------------------------
 84 | 
 85 | The original lexicon has annotations at word-sense level.  Each
 86 | word-sense pair is annotated by at least three annotators (most are
 87 | annotated by at least five).  The word-level lexicon was created by
 88 | taking the union of emotions associated with all the senses of a word.
 89 | Please contact NRC if interested in the sense-level lexicon or if
 90 | interested in more detailed information such as the individual
 91 | annotations by each of the annotators.
 92 | 
 93 | .......................................................................
 94 | 
 95 | CONTACT INFORMATION
 96 | -------------------
 97 | Saif Mohammad
 98 | Research Officer, National Research Council Canada
 99 | email: saif.mohammad@nrc-cnrc.gc.ca
100 | phone: +1-613-993-0620
101 | 
102 | .......................................................................
103 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/README:
--------------------------------------------------------------------------------
 1 | NRC Hashtag Sentiment Lexicon
 2 | Version 0.1
 3 | 9 April 2013
 4 | Copyright (C) 2011 National Research Council Canada (NRC)
 5 | Contact: Saif Mohammad (uvgotsaif@gmail.com)
 6 | 
 7 | 1. This copy of the NRC Hashtag Sentiment Lexicon is to be used for research
 8 | purposes only.  Please contact NRC if interested in a commercial license.
 9 | 
10 | 2. If you use this lexicon in your research, then please cite
11 | the paper listed below in the PUBLICATIONS section.
12 | 
13 | .......................................................................
14 | 
15 | NRC HASHTAG SENTIMENT LEXICON
16 | -----------------------------
17 | The NRC Hashtag Sentiment Lexicon is a list of words and their associations with
18 | positive and negative sentiment. The lexicon is distributed in three files:
19 | unigrams-pmilexicon.txt, bigrams-pmilexicon.txt, and pairs-pmilexicon.txt.
20 | 
21 | Each line in the three files has the format:
22 | 
23 | term<tab>sentimentScore<tab>numPositive<tab>numNegative
24 | where:
25 |    term 
26 |       In unigrams-pmilexicon.txt, term is a unigram (single word).
27 | 	  In bigrams-pmilexicon.txt, term is a bigram (two-word sequence).
28 | 	  A bigram has the form: "string string". The bigram was seen at least once in 
29 | 	  the source tweets from which the lexicon was created. 
30 | 	  In pairs-pmilexicon.txt, term is a unigram--unigram pair,
31 |       unigram--bigram pair, bigram--unigram pair, or a bigram--bigram pair.
32 | 	  The pairs were generated from a large set of source tweets. Tweets were examined 
33 | 	  one at a time, and all possible unigram and bigram combinations within the tweet 
34 | 	  were chosen. Pairs with certain punctuations, @ symbols, and some function words 
35 | 	  were removed.
36 | 
37 |    sentimentScore is a real number. A positive score indicates positive 
38 |       sentiment. A negative score indicates negative sentiment. The absolute 
39 |       value is the degree of association with the sentiment.
40 | 	  The sentiment score was calculated by subtracting the pointwise mutual
41 | 	  information (PMI) score of the term with positive hashtags and the
42 | 	  PMI of the term with negative hashtags. 
43 | 	  
44 | 	  Terms with a non-zero PMI score with positive hashtags and PMI score of 0 
45 | 	  with negative hashtags were assigned a sentimentScore of 5.
46 | 	  Terms with a non-zero PMI score with negative hashtags and PMI score of 0 
47 | 	  with positive hashtags were assigned a sentimentScore of -5.
48 | 
49 |    numPositive is the number of times the term co-occurred with a positive 
50 |       marker such as a positive emoticon or a positive hashtag.
51 | 
52 |    numNegative is the number of times the term co-occurred with a negative 
53 |       marker such as a negative emoticon or a negative hashtag.
54 | 
55 | The hashtag lexicon was created from a collection of tweets that had a
56 | positive or a negative word hashtag such as #good, #excellent, #bad,
57 | and #terrible. Version 0.1 was created from 775,310 tweets posted
58 | between April and December 2012 using a list of 78 positive and
59 | negative word hashtags. A list of these hashtags is shown in sentimenthashtags.txt.
60 | 
61 | The number of entries in:
62 |   unigrams-pmilexicon.txt: 54,129 terms
63 |   bigrams-pmilexicon.txt: 316,531 terms
64 |   pairs-pmilexicon.txt: 308,808 terms
65 | 
66 | Refer to publication below for more details.
67 | 
68 | .......................................................................
69 | 
70 | PUBLICATION
71 | -----------
72 | Details of the lexicon can be found in the following peer-reviewed
73 | publication:
74 | 
75 | -- In Proceedings of the seventh international workshop on Semantic 
76 | Evaluation Exercises (SemEval-2013), June 2013, Atlanta, Georgia, USA. 
77 | 
78 | BibTeX entry:
79 | @InProceedings{MohammadKZ2013,
80 |   author    = {Mohammad, Saif and Kiritchenko, Svetlana and Zhu, Xiaodan},
81 |   title     = {NRC-Canada: Building the State-of-the-Art in Sentiment Analysis of Tweets},
82 |   booktitle = {Proceedings of the seventh international workshop on Semantic Evaluation Exercises (SemEval-2013)},
83 |   month     = {June},
84 |   year      = {2013},
85 |   address   = {Atlanta, Georgia, USA}
86 | }
87 | .......................................................................
88 | 
89 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/bigrams-pmilexicon.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/bigrams-pmilexicon.txt


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/pairs-pmilexicon.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/pairs-pmilexicon.txt


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/NRC-Hashtag-Sentiment-Lexicon-v0.1/sentimenthashtags.txt:
--------------------------------------------------------------------------------
 1 | positive	positive
 2 | good	positive
 3 | great	positive
 4 | excellent	positive
 5 | excellence	positive
 6 | fine	positive
 7 | nice	positive
 8 | desirable	positive
 9 | exquisite	positive
10 | fabulous	positive
11 | ideal	positive
12 | marvelous	positive
13 | perfect	positive
14 | perfection	positive
15 | splendid	positive
16 | wonderful	positive
17 | classy	positive
18 | elegance	positive
19 | elegant	positive
20 | beauty	positive
21 | beautiful	positive
22 | dazzling	positive
23 | amazing	positive
24 | magnificent	positive
25 | sensational	positive
26 | super	positive
27 | superb	positive
28 | terrific	negative
29 | exquisite	negative
30 | exceptional	negative
31 | heavenly	negative
32 | negative	negative
33 | bad	negative
34 | egregious	negative
35 | lousy	negative
36 | shameful	negative
37 | sinful	negative
38 | woeful	negative
39 | wretched	negative
40 | abominable	negative
41 | deplorable	negative
42 | despicable	negative
43 | detest	negative
44 | detestable	negative
45 | dreadful	negative
46 | infernal	negative
47 | terrible	negative
48 | vile	negative
49 | dire	negative
50 | sinister	negative
51 | undesirable	negative
52 | squalid	negative
53 | seamy	negative
54 | shoddy	negative
55 | sleazy	negative
56 | worthless	negative
57 | paltry	negative
58 | blemish	negative
59 | botch	negative
60 | bungle	negative
61 | grievous	negative
62 | hopeless	negative
63 | ill	negative
64 | pathetic	negative
65 | poor	negative
66 | sad	negative
67 | sorry	negative
68 | crummy	negative
69 | inferior	negative
70 | tacky	negative
71 | unacceptable	negative
72 | unsatisfactory	negative
73 | unworthy	negative
74 | awful	negative
75 | abysmal	negative
76 | rotten	negative
77 | filthy	negative
78 | foul	negative
79 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/README:
--------------------------------------------------------------------------------
 1 | Sentiment140 Lexicon
 2 | Version 0.1
 3 | 9 April 2013
 4 | Copyright (C) 2011 National Research Council Canada (NRC)
 5 | Contact: Saif Mohammad (uvgotsaif@gmail.com)
 6 | 
 7 | 1. This copy of the Sentiment140 Lexicon is to be used for research
 8 | purposes only.  Please contact NRC if interested in a commercial license.
 9 | 
10 | 2. If you use this lexicon in your research, then please cite
11 | the paper listed below in the PUBLICATIONS section.
12 | 
13 | .......................................................................
14 | 
15 | SENTIMENT140 LEXICON
16 | --------------------
17 | The Sentiment140 Lexicon is a list of words and their associations with
18 | positive and negative sentiment. The lexicon is distributed in three files:
19 | unigrams-pmilexicon.txt, bigrams-pmilexicon.txt, and pairs-pmilexicon.txt.
20 | 
21 | Each line in the three files has the format:
22 | 
23 | term<tab>sentimentScore<tab>numPositive<tab>numNegative
24 | where:
25 |    term 
26 |       In unigrams-pmilexicon.txt, term is a unigram (single word).
27 | 	  In bigrams-pmilexicon.txt, term is a bigram (two-word sequence).
28 | 	  A bigram has the form: "string string". The bigram was seen at least once in 
29 | 	  the source tweets from which the lexicon was created. 
30 | 	  In pairs-pmilexicon.txt, term is a unigram--unigram pair,
31 |       unigram--bigram pair, bigram--unigram pair, or a bigram--bigram pair.
32 | 	  The pairs were generated from a large set of source tweets. Tweets were 
33 | 	  examined one at a time, and all possible unigram and bigram combinations 
34 | 	  within the tweet were chosen. Pairs with certain punctuations, @ symbols,
35 | 	  and some function words were removed.
36 | 
37 | 	  
38 |    sentimentScore is a real number. A positive score indicates positive 
39 |       sentiment. A negative score indicates negative sentiment. The absolute 
40 |       value is the degree of association with the sentiment.
41 | 	  The sentiment score was calculated by subtracting the pointwise mutual
42 | 	  information (PMI) score of the term with positive emoticons and the
43 | 	  PMI of the term with negative emoticons. 
44 | 	  
45 | 	  Terms with a non-zero PMI score with positive emoticons and PMI score of 0 
46 | 	  with negative emoticons were assigned a sentimentScore of 5.
47 | 	  Terms with a non-zero PMI score with negative emoticons and PMI score of 0 
48 | 	  with positive emoticons were assigned a sentimentScore of -5.
49 | 
50 |    numPositive is the number of times the term co-occurred with a positive 
51 |       marker such as a positive emoticon or a positive emoticons.
52 | 
53 |    numNegative is the number of times the term co-occurred with a negative 
54 |       marker such as a negative emoticon or a negative emoticons.
55 | 
56 | The Sentiment140 Lexicon was created from the Sentiment140 emoticon corpus of 1.6 million tweets.
57 | http://help.sentiment140.com/for-students
58 | 
59 | The number of entries in:
60 |   unigrams-pmilexicon.txt: 62,468 terms
61 |   bigrams-pmilexicon.txt: 677,698 terms
62 |   pairs-pmilexicon.txt: 480,010 terms
63 | 
64 | Refer to publication below for more details.
65 | 
66 | .......................................................................
67 | 
68 | PUBLICATION
69 | -----------
70 | Details of the lexicon can be found in the following peer-reviewed
71 | publication:
72 | 
73 | -- In Proceedings of the seventh international workshop on Semantic 
74 | Evaluation Exercises (SemEval-2013), June 2013, Atlanta, Georgia, USA. 
75 | 
76 | BibTeX entry:
77 | @InProceedings{MohammadKZ2013,
78 |   author    = {Mohammad, Saif and Kiritchenko, Svetlana and Zhu, Xiaodan},
79 |   title     = {NRC-Canada: Building the State-of-the-Art in Sentiment Analysis of Tweets},
80 |   booktitle = {Proceedings of the seventh international workshop on Semantic Evaluation Exercises (SemEval-2013)},
81 |   month     = {June},
82 |   year      = {2013},
83 |   address   = {Atlanta, Georgia, USA}
84 | }
85 | .......................................................................
86 | 
87 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/bigrams-pmilexicon.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/bigrams-pmilexicon.txt


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/pairs-pmilexicon.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/Sentiment140-Lexicon-v0.1/pairs-pmilexicon.txt


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/Sentiment140AffLexNegLex/S140-AFFLEX-NEGLEX-bigrams.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/data/lexicon/NRC-Canada/Sentiment140AffLexNegLex/S140-AFFLEX-NEGLEX-bigrams.txt


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/NRC-Canada/Sentiment140AffLexNegLex/readme.txt:
--------------------------------------------------------------------------------
 1 | Sentiment140 Affirmative Context Lexicon and Sentiment140 Negated Context Lexicon
 2 | Version 1.0
 3 | 26 September 2014
 4 | Copyright (C) 2014 National Research Council Canada (NRC)
 5 | Contact: Saif Mohammad (saif.mohammad@nrc-cnrc.gc.ca)
 6 | 
 7 | **********************************************
 8 | TERMS OF USE
 9 | **********************************************
10 | 
11 | 1. This lexicon can be used freely for research purposes. 
12 | 2. The papers listed below provide details of the creation and use of 
13 |    the lexicon. If you use a lexicon, then please cite the associated 
14 |    papers:
15 | 	Kiritchenko, S., Zhu, X., Mohammad, S. (2014). Sentiment Analysis of 
16 | 	Short Informal Texts.  Journal of Artificial Intelligence Research, 
17 | 	50:723-762, 2014.
18 | 3. If interested in commercial use of the lexicon, send email to the 
19 |    contact. 
20 | 4. If you use the lexicon in a product or application, then please 
21 |    credit the authors and NRC appropriately. Also, if you send us an 
22 |    email, we will be thrilled to know about how you have used the 
23 |    lexicon.
24 | 5. National Research Council Canada (NRC) disclaims any responsibility 
25 |    for the use of the lexicon and does not provide technical support. 
26 |    However, the contact listed above will be happy to respond to 
27 |    queries and clarifications.
28 | 6. Rather than redistributing the data, please direct interested 
29 |    parties to this page:
30 |    http://www.purl.com/net/lexicons 
31 | 
32 | Please feel free to send us an email:
33 | - with feedback regarding the lexicon. 
34 | - with information on how you have used the lexicon. 
35 | - if interested in having us analyze your data for sentiment, emotion, 
36 |   and other affectual information.
37 | - if interested in a collaborative research project.
38 | 
39 | **********************************************
40 | DATA SOURCE
41 | **********************************************
42 | 
43 | The NRC Sentiment140 Lexicons are automatically generated from the following data source:
44 | 1.6 million tweets with emoticons collected by Go and colleagues (see Go, A., Bhayani, R., & Huang, L. Twitter sentiment classication using distant supervision. Tech. rep., Stanford University, 2009.)
45 | 
46 | 
47 | **********************************************
48 | FILE FORMAT
49 | **********************************************
50 | 
51 | Each line in the lexicons has the following format:
52 | <term><tab><score><tab><Npos><tab><Nneg>
53 | 
54 | <term> can be a unigram or a bigram;
55 | <score> is a real-valued sentiment score: score = PMI(w, pos) - PMI(w, neg), where PMI stands for Point-wise Mutual Information between a term w and the positive/negative class;
56 | <Npos> is the number of times the term appears in the positive class, ie. in tweets with positive hashtag or emoticon;
57 | <Nneg> is the number of times the term appears in the negative class, ie. in tweets with negative hashtag or emoticon.
58 | 
59 | 
60 | **********************************************
61 | AffLex and NegLex
62 | **********************************************
63 | 
64 | Both parts, AffLex and NegLex, of each lexicon are contained in the same file. The NegLex entries have suffixes '_NEG' or '_NEGFIRST'.
65 | 
66 | In the unigram lexicon:
67 | '_NEGFIRST' is attached to terms that directly follow a negator;
68 | '_NEG' is attached to all other terms in negated contexts (not directly following a negator).
69 | 
70 | In the bigram lexicon:
71 | '_NEG' is attached to all terms in negated contexts.
72 | 
73 | Both suffixes are attached only to nouns, verbs, adjectives, and adverbs. All other parts of speech do not get these suffixes attached. 
74 | 
75 | 
76 | **********************************************
77 | More Information
78 | **********************************************
79 | Details on the process of creating the lexicons can be found in:
80 | Kiritchenko, S., Zhu, X., Mohammad, S. (2014). Sentiment Analysis of Short Informal Texts.  Journal of Artificial Intelligence Research, 50:723-762, 2014.
81 | 
82 |  
83 | 


--------------------------------------------------------------------------------
/sentiment/models/data/lexicon/PosNegWords/pos_mod.txt:
--------------------------------------------------------------------------------
  1 | :)
  2 | :-)
  3 | :]
  4 | :d
  5 | :p
  6 | ;)
  7 | ;-)
  8 | ;]
  9 | ;p
 10 | absolutely love
 11 | abundant
 12 | accomplish
 13 | accomplished
 14 | achieve
 15 | achieving
 16 | active
 17 | admirable
 18 | admire
 19 | admired
 20 | adorable
 21 | adore
 22 | adventure
 23 | adventurous
 24 | affluent
 25 | agile
 26 | agree
 27 | agreeable
 28 | alert
 29 | align
 30 | aligned
 31 | alive
 32 | amaze
 33 | amazing
 34 | amuse
 35 | amused
 36 | appeal
 37 | appealing
 38 | appreciate
 39 | appreciated
 40 | artistic
 41 | astound
 42 | astounding
 43 | astute
 44 | attentive
 45 | attract
 46 | attractive
 47 | auspicious
 48 | authentic
 49 | awake
 50 | aware
 51 | awesome
 52 | beaming
 53 | beautiful
 54 | best
 55 | best looking
 56 | bless
 57 | blessed
 58 | bliss
 59 | bold
 60 | brave
 61 | bright
 62 | brilliant
 63 | brisk
 64 | buoyant
 65 | calm
 66 | can help
 67 | capable
 68 | centered
 69 | certain
 70 | charm
 71 | charming
 72 | cheerful
 73 | clear
 74 | clearly superior
 75 | clever
 76 | cleverful
 77 | come on
 78 | comeback
 79 | comfort
 80 | comfortable
 81 | competent
 82 | complete
 83 | confident
 84 | congrats
 85 | congratulation
 86 | connected
 87 | conscious
 88 | considerate
 89 | convenient
 90 | cool
 91 | cooperate
 92 | cooperative
 93 | courage
 94 | courageous
 95 | creative
 96 | cute
 97 | damn good
 98 | daring
 99 | dazzle
100 | dazzling
101 | delicious
102 | delight
103 | delightful
104 | desirable
105 | determined
106 | diligent
107 | discerning
108 | discover
109 | dynamic
110 | eager
111 | easy
112 | efficiency
113 | efficient
114 | effortless
115 | elated
116 | elegant
117 | eloquent
118 | enchant
119 | enchanting
120 | encourage
121 | encouraging
122 | endless
123 | endorse
124 | energetic
125 | engage
126 | engaging
127 | enhance
128 | enhancing
129 | enormous
130 | enterprising
131 | enthusiasm
132 | enthusiastic
133 | entice
134 | enticing
135 | epic flail
136 | excellent
137 | exceptional
138 | excite
139 | excited
140 | exciting
141 | experienced
142 | exquisite
143 | exuberant
144 | fabulous
145 | fail
146 | fair
147 | faithful
148 | fantastic
149 | far-sighted
150 | fascinate
151 | fascinating
152 | fine
153 | flail
154 | flatter
155 | flattering
156 | flourish
157 | flourishing
158 | fortunate
159 | free
160 | frickin ruled
161 | frickin rules
162 | friendly
163 | ftw
164 | fulfill
165 | fulfilled
166 | fun
167 | funny
168 | generous
169 | gentle
170 | genuine
171 | gifted
172 | glad
173 | glorious
174 | glow
175 | glowing
176 | good
177 | good looking
178 | good luck
179 | good thing
180 | gorgeous
181 | grace
182 | graceful
183 | gracious
184 | grand
185 | great
186 | gtd
187 | handsome
188 | happy
189 | hardy
190 | harmonious
191 | hawt
192 | hawtness
193 | heal
194 | healed
195 | healthy
196 | help
197 | helpful
198 | high
199 | highly positive
200 | hilarious
201 | honest
202 | humorous
203 | i like
204 | i want
205 | ideal
206 | imaginative
207 | imagine
208 | impress
209 | impressive
210 | incredible
211 | industrious
212 | ingenious
213 | innovate
214 | innovative
215 | inspire
216 | inspired
217 | integrate
218 | intellectual vigor
219 | intelligent
220 | interest
221 | interested
222 | interesting
223 | intuitive
224 | inventive
225 | invincible
226 | invite
227 | inviting
228 | irresistible
229 | is the best
230 | jolly
231 | joyful
232 | joyous
233 | judicious
234 | keen
235 | kind
236 | kinda impressed
237 | knowing
238 | leaning towards
239 | life saver
240 | liked it
241 | limitless
242 | lively
243 | lol
244 | looking forward to
245 | love
246 | love it
247 | lovely
248 | loves it
249 | loving
250 | lucky
251 | luminous
252 | magic
253 | magical
254 | magnificent
255 | make a difference
256 | marvellous
257 | master
258 | masterful
259 | mighty
260 | miracle
261 | miraculous
262 | motivate
263 | motivated
264 | moves me
265 | my hero
266 | natural
267 | neat
268 | nice
269 | nicely
270 | noble
271 | not suck
272 | nurturing
273 | obedient
274 | ok
275 | optimistic
276 | outstanding
277 | passion
278 | passionate
279 | peace
280 | peaceful
281 | perfect
282 | persevere
283 | persevering
284 | persist
285 | persistent
286 | piece of cake
287 | play
288 | playful
289 | pleasant
290 | pleasantly
291 | pleasantly surprised
292 | please
293 | pleasing
294 | plentiful
295 | plenty
296 | plus
297 | positive
298 | power
299 | powerful
300 | precious
301 | prepare
302 | prepared
303 | pretty good
304 | productive
305 | profound
306 | prompt
307 | prop
308 | props
309 | prosperous
310 | proud
311 | pumped
312 | qualified
313 | quick
314 | quite amazing
315 | radiant
316 | rally
317 | really like
318 | really love
319 | really neat
320 | reasonable
321 | recommend
322 | refine
323 | refined
324 | refresh
325 | refreshing
326 | relax
327 | relaxing
328 | reliable
329 | relieve
330 | relieved
331 | remarkable
332 | resolute
333 | resourceful
334 | respect
335 | respected
336 | responsive
337 | reward
338 | rewarding
339 | robust
340 | rocks
341 | safe
342 | satisfied
343 | satisfy
344 | secure
345 | seduce
346 | seductive
347 | self-reliant
348 | sensation
349 | sensational
350 | sensible
351 | sensitive
352 | serene
353 | share
354 | sharing
355 | silly
356 | skilful
357 | skill
358 | smart
359 | smashing
360 | smile
361 | smiling
362 | smooth
363 | so cool
364 | so great
365 | so proud
366 | sparkle
367 | sparkling
368 | spiritual
369 | splendid
370 | strong
371 | stunning
372 | success
373 | successful
374 | super
375 | superb
376 | superior
377 | surprise
378 | surprised
379 | swift
380 | talent
381 | talented
382 | tenacious
383 | terrific
384 | thank you
385 | thankful
386 | thanks
387 | thanks!
388 | the best
389 | thks
390 | thoughtful
391 | thrill
392 | thrilling
393 | thrive
394 | thriving
395 | thx
396 | timely
397 | trust
398 | trusting
399 | truth
400 | truthful
401 | ultimate
402 | unique
403 | valiant
404 | valuable
405 | versatile
406 | very cool
407 | very exceptional
408 | very quickly
409 | vibrant
410 | victorious
411 | victory
412 | vigorous
413 | vivacious
414 | vivid
415 | voting for
416 | w00t
417 | warm
418 | way to go
419 | wealthy
420 | well
421 | whole
422 | win
423 | wise
424 | witty
425 | won
426 | wonderful
427 | woo
428 | woot
429 | worth
430 | worthy
431 | would love
432 | would recommend
433 | wowsers
434 | yay
435 | young
436 | youth
437 | youthful
438 | yum
439 | yummy
440 | zany
441 | zeal
442 | zealous
443 | zest
444 | 


--------------------------------------------------------------------------------
/sentiment/models/models_save/classifier:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier


--------------------------------------------------------------------------------
/sentiment/models/models_save/classifier_01.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_01.npy


--------------------------------------------------------------------------------
/sentiment/models/models_save/classifier_02.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_02.npy


--------------------------------------------------------------------------------
/sentiment/models/models_save/classifier_03.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_03.npy


--------------------------------------------------------------------------------
/sentiment/models/models_save/classifier_04.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/classifier_04.npy


--------------------------------------------------------------------------------
/sentiment/models/models_save/lexicon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/lexicon


--------------------------------------------------------------------------------
/sentiment/models/models_save/ngrams:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/models_save/ngrams


--------------------------------------------------------------------------------
/sentiment/models/test_save_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/5/23.
 4 | from sklearn import metrics
 5 | from sklearn.externals import joblib
 6 | from sklearn.linear_model import LogisticRegression
 7 | 
 8 | from sentiment.models.tools.Lexicon import Lexicon, Ngrams
 9 | from sentiment.models.tools.pre_process2 import pre_process
10 | from sentiment.models.tools.read_data import *
11 | 
12 | 
13 | def get_features(data, postags):
14 |     """
15 | 
16 |     :param data: [str,str..]
17 |     :param id2word: dict  word_id:word
18 |     :param vocabulary: dict  word:word_id
19 |     :return:
20 |     """
21 |     print 'create features...'
22 |     data_feature = pre_process(data, postags)  # 这里data 每一行已经分词了
23 |     print data_feature.shape
24 |     return data_feature
25 | 
26 | 
27 | def main():
28 |     train_data, train_target, train_pos = read_train_data('2013')
29 |     train_feature = get_features(train_data, train_pos)
30 | 
31 |     clf = LogisticRegression(C=0.01105)
32 |     clf.fit(train_feature, train_target)
33 | 
34 |     ngram = Ngrams()
35 |     lexicon = Lexicon()
36 |     joblib.dump(clf, 'models_save/classifier')
37 |     joblib.dump(ngram, 'models_save/ngrams')
38 |     joblib.dump(lexicon, 'models_save/lexicon')
39 | 
40 |     for name, test_data, test_target, test_pos in read_all_test_data():
41 |         print '\n\n\n\n\n--------Now is {} --------\n\n'.format(name)
42 |         test_feature = get_features(test_data, test_pos)
43 |         predicted = clf.predict(test_feature)
44 |         print "Classification report for  %s:\n%s\n" % (clf,
45 |                                                         metrics.classification_report(test_target, predicted, digits=3))
46 |         print("Confusion matrix:\n%s" % metrics.confusion_matrix(test_target, predicted))
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/sentiment/models/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/5/3.
4 | 


--------------------------------------------------------------------------------
/sentiment/models/tools/ark-tweet-nlp-0.3.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/sentiment/models/tools/ark-tweet-nlp-0.3.2.jar


--------------------------------------------------------------------------------
/sentiment/models/tools/read_data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | # Created by hrwhisper on 2016/5/3.
  4 | import codecs
  5 | import subprocess
  6 | import sys
  7 | 
  8 | from sentiment.models.tools.Lexicon import Ngrams
  9 | 
 10 | 
 11 | def create_pos_file(path):
 12 |     reload(sys)
 13 |     sys.setdefaultencoding('utf8')
 14 |     print 'read data from', path
 15 |     temp_file_path = '../t.txt'
 16 |     with codecs.open(path, "r", "utf-8") as f:
 17 |         with codecs.open(temp_file_path, 'w+', "utf-8") as fw:
 18 |             data = f.readlines()
 19 |             for line in data:
 20 |                 line = line.strip().split('\t')
 21 |                 fw.write(line[-1] + '\n')
 22 | 
 23 |     cmd = ['java', '-jar', './ark-tweet-nlp-0.3.2.jar', '--no-confidence', temp_file_path]
 24 |     stdin = subprocess.PIPE
 25 |     stdout = subprocess.PIPE
 26 |     stderr = subprocess.PIPE
 27 |     p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr)
 28 |     (stdout, stderr) = p.communicate()
 29 | 
 30 |     result = stdout.split('\r\n')
 31 |     with codecs.open(path + "_pos", "w+", "utf-8") as f:
 32 |         for i, line in enumerate(data):
 33 |             line = line.strip().split('\t')
 34 |             t = result[i].split('\t')
 35 |             tweet = t[0]
 36 |             pos = t[1]
 37 |             tags = line[2]
 38 |             f.write(tags + '\t' + tweet + '\t' + pos + '\n')
 39 | 
 40 | 
 41 | def read_data(path):
 42 |     """
 43 |         return tweets_list and tags_list  for given path
 44 |     :param path: the file path eg:    c:\\a.txt
 45 |     :return: tweets_list,tags_list
 46 |     """
 47 |     print 'read data from', path
 48 | 
 49 |     tweets, tags, pos = [], [], []
 50 |     with codecs.open(path + '_pos', "r", "utf-8") as f:
 51 |         for line in f.readlines():
 52 |             line = line.strip().split("\t")
 53 |             tags.append(line[0])
 54 |             tweets.append(line[1])
 55 |             pos.append(line[2])
 56 |     return tweets, tags, pos
 57 | 
 58 | 
 59 | def read_train_data_by_year(year):
 60 |     train_data, train_target, pos = read_data("./data/train/" + str(year) + "-train-data.tsv")
 61 |     # train_data2, train_target2, pos2 = read_data("./data/train/" + str(year) + "-dev-data.tsv")
 62 |     # train_data = train_data + train_data2
 63 |     # train_target = train_target + train_target2
 64 |     # pos = pos + pos2
 65 |     return train_data, train_target, pos
 66 | 
 67 | 
 68 | def read_train_data(year=None):
 69 |     train_data, train_target, train_pos = [], [], []
 70 |     if year == '2013' or year is None:
 71 |         data, target, pos = read_train_data_by_year('2013')
 72 |         train_data += data
 73 |         train_target += target
 74 |         train_pos += pos
 75 | 
 76 |     len_2013 = len(train_pos)
 77 | 
 78 |     if year == '2016' or year is None:
 79 |         data, target, pos = read_train_data_by_year('2016')
 80 |         train_data += data
 81 |         train_target += target
 82 |         train_pos += pos
 83 | 
 84 |     Ngrams().create_ngram_vector(train_data, train_target)
 85 |     print int(len_2013 / 2.5), len(train_data[len_2013:])
 86 |     if year is None:
 87 |         len_2013_remain = int(len_2013 / 2.5)
 88 |         train_data = train_data[:len_2013_remain] + train_data[len_2013:]
 89 |         train_target = train_target[:len_2013_remain] + train_target[len_2013:]
 90 |         train_pos = train_pos[:len_2013_remain] + train_pos[len_2013:]
 91 | 
 92 |     return train_data, train_target, train_pos
 93 | 
 94 | 
 95 | def read_2013_test_data():
 96 |     return read_data("./data/test/2013-test-tweet.tsv")
 97 | 
 98 | 
 99 | def read_2014_test_data():
100 |     return read_data("./data/test/2014-test-tweet.tsv")
101 | 
102 | 
103 | def read_2016_test_data():
104 |     return read_data("./data/test/2016-test-tweet.tsv")
105 | 
106 | 
107 | def read_2014_sarcasm_test_data():
108 |     return read_data("./data/test/2014-test-sarcasm.tsv")
109 | 
110 | 
111 | def read_all_test_data():
112 |     test_data_name = [
113 |         '2013-test-tweet.tsv',
114 |         '2013-test-sms.tsv',
115 |         '2014-test-tweet.tsv',
116 |         '2014-test-sarcasm.tsv',
117 |         '2014-test-journal.tsv',
118 |         # '2016-test-tweet.tsv'
119 |     ]
120 |     base_path = './data/test/'
121 |     for name in test_data_name:
122 |         data, target, pos = read_data(base_path + name)
123 |         yield name, data, target, pos
124 | 
125 | 
126 | def read_sentiment140_test_data():
127 |     # test_data, test_target = [], []
128 |     # with open(r'e:\textCorpus\testdata.csv') as f:
129 |     #     for i, line in enumerate(f):
130 |     #         line = line.split('","')
131 |     #         score, text = line[0][1:], line[-1]
132 |     #
133 |     #         try:
134 |     #             text = text[:text.rfind('"')]
135 |     #             test_data.append(text)
136 |     #             if score == '4':
137 |     #                 test_target.append('positive')
138 |     #             elif score == '0':
139 |     #                 test_target.append('negative')
140 |     #             else:
141 |     #                 test_target.append('neutral')
142 |     #         except Exception, e:  # print i, line, e
143 |     #             pass
144 |     return read_data(r'./data/test/sentiment140.testdata.tsv')
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     test_data_name = ['Twitter-2013_gold.csv']
149 |     # test_data_name = [
150 |     #     '2013-test-tweet.tsv', '2013-test-sms.tsv',
151 |     #     '2014-test-tweet.tsv', '2014-test-sarcasm.tsv', '2014-test-journal.tsv',
152 |     #     '2016-test-tweet.tsv'
153 |     # ]
154 |     base_path = '../data/'
155 |     for test in test_data_name:
156 |         create_pos_file(base_path + test)
157 | 
158 |         # years = [2013, 2016]
159 |         # base_path = '../data/train/'
160 |         # for year in years:
161 |         #     create_pos_file(base_path + str(year) + "-train-data.tsv")
162 |         #     create_pos_file(base_path + str(year) + "-dev-data.tsv")
163 | 
164 |         # data, target = read_sentiment140_test_data()
165 |         # reload(sys)
166 |         # sys.setdefaultencoding('utf8')
167 |         # temp_file_path = '../t.txt'
168 |         #
169 |         # with codecs.open(temp_file_path, 'w+', "utf-8") as fw:
170 |         #     for line in data:
171 |         #         fw.write(line + '\n')
172 |         #
173 |         # cmd = ['java', '-jar', './ark-tweet-nlp-0.3.2.jar', '--no-confidence', temp_file_path]
174 |         # stdin = subprocess.PIPE
175 |         # stdout = subprocess.PIPE
176 |         # stderr = subprocess.PIPE
177 |         # p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr)
178 |         # (stdout, stderr) = p.communicate()
179 |         #
180 |         # result = stdout.split('\r\n')
181 |         # with codecs.open(r"e:\textCorpus\testdata.csv_pos", "w+", "utf-8") as f:
182 |         #     for i, line in enumerate(data):
183 |         #         line = line.strip().split('\t')
184 |         #         t = result[i].split('\t')
185 |         #         tweet = t[0]
186 |         #         pos = t[1]
187 |         #         tags = target[i]
188 |         #         f.write(tags + '\t' + tweet + '\t' + pos + '\n')
189 | 


--------------------------------------------------------------------------------
/sentiment/urls.py:
--------------------------------------------------------------------------------
 1 | """twitterDataMining URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/1.9/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.conf.urls import url, include
14 |     2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url
17 | import sentiment.views
18 | 
19 | urlpatterns = [
20 |     url(r'^$', sentiment.views.index),
21 |     url(r'^sentiment_query$', sentiment.views.query)
22 | ]
23 | 


--------------------------------------------------------------------------------
/sentiment/views.py:
--------------------------------------------------------------------------------
 1 | from django.shortcuts import render
 2 | 
 3 | # Create your views here.
 4 | from sentiment.models.SentimentManager import query_sentiment_for_online_data
 5 | import json
 6 | from django.http import HttpResponse
 7 | from django.shortcuts import render
 8 | 
 9 | 
10 | def index(request):
11 |     return render(request, 'sentiment/index.html')
12 | 
13 | 
14 | def query(request):
15 |     query_str = request.GET.get('query_str')
16 |     # TODO if none raise error
17 |     res = query_sentiment_for_online_data(query_str)
18 |     return HttpResponse(json.dumps(res), content_type="application/json")
19 | 


--------------------------------------------------------------------------------
/static/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/4/14.
4 | 


--------------------------------------------------------------------------------
/static/css/bootstrap-switch.min.css:
--------------------------------------------------------------------------------
 1 | /* ========================================================================
 2 |  * bootstrap-switch - v3.3.2
 3 |  * http://www.bootstrap-switch.org
 4 |  * ========================================================================
 5 |  * Copyright 2012-2013 Mattia Larentis
 6 |  *
 7 |  * ========================================================================
 8 |  * Licensed under the Apache License, Version 2.0 (the "License");
 9 |  * you may not use this file except in compliance with the License.
10 |  * You may obtain a copy of the License at
11 |  *
12 |  *     http://www.apache.org/licenses/LICENSE-2.0
13 |  *
14 |  * Unless required by applicable law or agreed to in writing, software
15 |  * distributed under the License is distributed on an "AS IS" BASIS,
16 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |  * See the License for the specific language governing permissions and
18 |  * limitations under the License.
19 |  * ========================================================================
20 |  */
21 | 
22 | .bootstrap-switch{display:inline-block;direction:ltr;cursor:pointer;border-radius:4px;border:1px solid;border-color:#ccc;position:relative;text-align:left;overflow:hidden;line-height:8px;z-index:0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;vertical-align:middle;-webkit-transition:border-color ease-in-out .15s,box-shadow ease-in-out .15s;transition:border-color ease-in-out .15s,box-shadow ease-in-out .15s}.bootstrap-switch .bootstrap-switch-container{display:inline-block;top:0;border-radius:4px;-webkit-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0)}.bootstrap-switch .bootstrap-switch-handle-on,.bootstrap-switch .bootstrap-switch-handle-off,.bootstrap-switch .bootstrap-switch-label{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;cursor:pointer;display:inline-block !important;height:100%;padding:6px 12px;font-size:14px;line-height:20px}.bootstrap-switch .bootstrap-switch-handle-on,.bootstrap-switch .bootstrap-switch-handle-off{text-align:center;z-index:1}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-primary,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-primary{color:#fff;background:#428bca}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-info,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-info{color:#fff;background:#5bc0de}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-success,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-success{color:#fff;background:#5cb85c}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-warning,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-warning{background:#f0ad4e;color:#fff}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-danger,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-danger{color:#fff;background:#d9534f}.bootstrap-switch .bootstrap-switch-handle-on.bootstrap-switch-default,.bootstrap-switch .bootstrap-switch-handle-off.bootstrap-switch-default{color:#000;background:#eee}.bootstrap-switch .bootstrap-switch-label{text-align:center;margin-top:-1px;margin-bottom:-1px;z-index:100;color:#333;background:#fff}.bootstrap-switch .bootstrap-switch-handle-on{border-bottom-left-radius:3px;border-top-left-radius:3px}.bootstrap-switch .bootstrap-switch-handle-off{border-bottom-right-radius:3px;border-top-right-radius:3px}.bootstrap-switch input[type='radio'],.bootstrap-switch input[type='checkbox']{position:absolute !important;top:0;left:0;opacity:0;filter:alpha(opacity=0);z-index:-1}.bootstrap-switch input[type='radio'].form-control,.bootstrap-switch input[type='checkbox'].form-control{height:auto}.bootstrap-switch.bootstrap-switch-mini .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-mini .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-mini .bootstrap-switch-label{padding:1px 5px;font-size:12px;line-height:1.5}.bootstrap-switch.bootstrap-switch-small .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-small .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-small .bootstrap-switch-label{padding:5px 10px;font-size:12px;line-height:1.5}.bootstrap-switch.bootstrap-switch-large .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-large .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-large .bootstrap-switch-label{padding:6px 16px;font-size:18px;line-height:1.33}.bootstrap-switch.bootstrap-switch-disabled,.bootstrap-switch.bootstrap-switch-readonly,.bootstrap-switch.bootstrap-switch-indeterminate{cursor:default !important}.bootstrap-switch.bootstrap-switch-disabled .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-readonly .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-indeterminate .bootstrap-switch-handle-on,.bootstrap-switch.bootstrap-switch-disabled .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-readonly .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-indeterminate .bootstrap-switch-handle-off,.bootstrap-switch.bootstrap-switch-disabled .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-readonly .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-indeterminate .bootstrap-switch-label{opacity:.5;filter:alpha(opacity=50);cursor:default !important}.bootstrap-switch.bootstrap-switch-animate .bootstrap-switch-container{-webkit-transition:margin-left .5s;transition:margin-left .5s}.bootstrap-switch.bootstrap-switch-inverse .bootstrap-switch-handle-on{border-bottom-left-radius:0;border-top-left-radius:0;border-bottom-right-radius:3px;border-top-right-radius:3px}.bootstrap-switch.bootstrap-switch-inverse .bootstrap-switch-handle-off{border-bottom-right-radius:0;border-top-right-radius:0;border-bottom-left-radius:3px;border-top-left-radius:3px}.bootstrap-switch.bootstrap-switch-focused{border-color:#66afe9;outline:0;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(102,175,233,0.6)}.bootstrap-switch.bootstrap-switch-on .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-inverse.bootstrap-switch-off .bootstrap-switch-label{border-bottom-right-radius:3px;border-top-right-radius:3px}.bootstrap-switch.bootstrap-switch-off .bootstrap-switch-label,.bootstrap-switch.bootstrap-switch-inverse.bootstrap-switch-on .bootstrap-switch-label{border-bottom-left-radius:3px;border-top-left-radius:3px}


--------------------------------------------------------------------------------
/static/css/bootstrap-theme.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["less/theme.less","less/mixins/vendor-prefixes.less","less/mixins/gradients.less","less/mixins/reset-filter.less"],"names":[],"mappings":";;;;AAmBA,YAAA,aAAA,UAAA,aAAA,aAAA,aAME,YAAA,EAAA,KAAA,EAAA,eC2CA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBDvCR,mBAAA,mBAAA,oBAAA,oBAAA,iBAAA,iBAAA,oBAAA,oBAAA,oBAAA,oBAAA,oBAAA,oBCsCA,mBAAA,MAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,iBDlCR,qBAAA,sBAAA,sBAAA,uBAAA,mBAAA,oBAAA,sBAAA,uBAAA,sBAAA,uBAAA,sBAAA,uBAAA,+BAAA,gCAAA,6BAAA,gCAAA,gCAAA,gCCiCA,mBAAA,KACQ,WAAA,KDlDV,mBAAA,oBAAA,iBAAA,oBAAA,oBAAA,oBAuBI,YAAA,KAyCF,YAAA,YAEE,iBAAA,KAKJ,aErEI,YAAA,EAAA,IAAA,EAAA,KACA,iBAAA,iDACA,iBAAA,4CAAA,iBAAA,qEAEA,iBAAA,+CCnBF,OAAA,+GH4CA,OAAA,0DACA,kBAAA,SAuC2C,aAAA,QAA2B,aAAA,KArCtE,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAgBN,aEtEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAiBN,aEvEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAkBN,UExEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,gBAAA,gBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,iBAAA,iBAEE,iBAAA,QACA,aAAA,QAMA,mBAAA,0BAAA,yBAAA,0BAAA,yBAAA,yBAAA,oBAAA,2BAAA,0BAAA,2BAAA,0BAAA,0BAAA,6BAAA,oCAAA,mCAAA,oCAAA,mCAAA,mCAME,iBAAA,QACA,iBAAA,KAmBN,aEzEI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,mBAAA,mBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,oBAAA,oBAEE,iBAAA,QACA,aAAA,QAMA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,uBAAA,8BAAA,6BAAA,8BAAA,6BAAA,6BAAA,gCAAA,uCAAA,sCAAA,uCAAA,sCAAA,sCAME,iBAAA,QACA,iBAAA,KAoBN,YE1EI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDAEA,OAAA,+GCnBF,OAAA,0DH4CA,kBAAA,SACA,aAAA,QAEA,kBAAA,kBAEE,iBAAA,QACA,oBAAA,EAAA,MAGF,mBAAA,mBAEE,iBAAA,QACA,aAAA,QAMA,qBAAA,4BAAA,2BAAA,4BAAA,2BAAA,2BAAA,sBAAA,6BAAA,4BAAA,6BAAA,4BAAA,4BAAA,+BAAA,sCAAA,qCAAA,sCAAA,qCAAA,qCAME,iBAAA,QACA,iBAAA,KA2BN,eAAA,WClCE,mBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,EAAA,IAAA,IAAA,iBD2CV,0BAAA,0BE3FI,iBAAA,QACA,iBAAA,oDACA,iBAAA,+CAAA,iBAAA,wEACA,iBAAA,kDACA,OAAA,+GF0FF,kBAAA,SAEF,yBAAA,+BAAA,+BEhGI,iBAAA,QACA,iBAAA,oDACA,iBAAA,+CAAA,iBAAA,wEACA,iBAAA,kDACA,OAAA,+GFgGF,kBAAA,SASF,gBE7GI,iBAAA,iDACA,iBAAA,4CACA,iBAAA,qEAAA,iBAAA,+CACA,OAAA,+GACA,OAAA,0DCnBF,kBAAA,SH+HA,cAAA,ICjEA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,iBD6DV,sCAAA,oCE7GI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SD2CF,mBAAA,MAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,iBD0EV,cAAA,iBAEE,YAAA,EAAA,IAAA,EAAA,sBAIF,gBEhII,iBAAA,iDACA,iBAAA,4CACA,iBAAA,qEAAA,iBAAA,+CACA,OAAA,+GACA,OAAA,0DCnBF,kBAAA,SHkJA,cAAA,IAHF,sCAAA,oCEhII,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SD2CF,mBAAA,MAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,gBDgFV,8BAAA,iCAYI,YAAA,EAAA,KAAA,EAAA,gBAKJ,qBAAA,kBAAA,mBAGE,cAAA,EAqBF,yBAfI,mDAAA,yDAAA,yDAGE,MAAA,KE7JF,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,UFqKJ,OACE,YAAA,EAAA,IAAA,EAAA,qBC3HA,mBAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,MAAA,EAAA,IAAA,EAAA,sBAAA,EAAA,IAAA,IAAA,gBDsIV,eEtLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAKF,YEvLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAMF,eExLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAOF,cEzLI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF8KF,aAAA,QAeF,UEjMI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFuMJ,cE3MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFwMJ,sBE5MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFyMJ,mBE7MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF0MJ,sBE9MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF2MJ,qBE/MI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF+MJ,sBElLI,iBAAA,yKACA,iBAAA,oKACA,iBAAA,iKFyLJ,YACE,cAAA,IC9KA,mBAAA,EAAA,IAAA,IAAA,iBACQ,WAAA,EAAA,IAAA,IAAA,iBDgLV,wBAAA,8BAAA,8BAGE,YAAA,EAAA,KAAA,EAAA,QEnOE,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFiOF,aAAA,QALF,+BAAA,qCAAA,qCAQI,YAAA,KAUJ,OCnME,mBAAA,EAAA,IAAA,IAAA,gBACQ,WAAA,EAAA,IAAA,IAAA,gBD4MV,8BE5PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFyPJ,8BE7PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF0PJ,8BE9PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF2PJ,2BE/PI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF4PJ,8BEhQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SF6PJ,6BEjQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFoQJ,MExQI,iBAAA,oDACA,iBAAA,+CACA,iBAAA,wEAAA,iBAAA,kDACA,OAAA,+GACA,kBAAA,SFsQF,aAAA,QC3NA,mBAAA,MAAA,EAAA,IAAA,IAAA,gBAAA,EAAA,IAAA,EAAA,qBACQ,WAAA,MAAA,EAAA,IAAA,IAAA,gBAAA,EAAA,IAAA,EAAA"}


--------------------------------------------------------------------------------
/static/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/FontAwesome.otf


--------------------------------------------------------------------------------
/static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/fonts/glyphicons-halflings-regular.woff2


--------------------------------------------------------------------------------
/static/image/others.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/others.jpg


--------------------------------------------------------------------------------
/static/image/sentiment_1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/sentiment_1.gif


--------------------------------------------------------------------------------
/static/image/sentiment_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/sentiment_2.jpg


--------------------------------------------------------------------------------
/static/image/sentiment_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/sentiment_3.jpg


--------------------------------------------------------------------------------
/static/image/topic_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_1.jpg


--------------------------------------------------------------------------------
/static/image/topic_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_3.jpg


--------------------------------------------------------------------------------
/static/image/topic_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_4.jpg


--------------------------------------------------------------------------------
/static/image/topic_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/static/image/topic_8.jpg


--------------------------------------------------------------------------------
/static/js/append_new_iframe.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by hrwhisper on 2016/2/9.
 3 |  */
 4 | 
 5 | function network_retweet_iframe() {
 6 |     $('body').append('<iframe src="/network/retweet" ' +
 7 |         'width="1200px" height="700px" id="network_retweet_iframe"></iframe>');
 8 | }
 9 | 
10 | function statistic_hashtag_timeline() {
11 |     $('body').append('<iframe src="/statistic/hashtag_timeline?date=2015-12-20&hashtag=Christmas" ' +
12 |         'width="1200px" height="700px" id="statistic_hashtag_timeline_iframe"></iframe>');
13 | }
14 | 
15 | function statistic_hashtag_compare() {
16 |     $('body').append('<iframe src="/statistic/hashtag_compare?date=2015-12-20&hashtag1=Christmas&hashtag2=christmas" ' +
17 |         'width="1200px" height="700px" id="statistic_hashtag_compare_iframe"></iframe>');
18 | }
19 | 
20 | function statistic_hashtag_pie(){
21 |      $('body').append('<iframe src="/statistic/hashtag_pie?date=2015-12-20" ' +
22 |         'width="1200px" height="700px" id="statistic_hashtag_compare_iframe"></iframe>');
23 | 
24 | }


--------------------------------------------------------------------------------
/static/js/googleMap.js:
--------------------------------------------------------------------------------
 1 | var googleMap = {
 2 |     map: null,
 3 |     markers: {},
 4 |     currentId: 0,
 5 | 
 6 |     uniqueId: function () {
 7 |         return ++this.currentId;
 8 |     },
 9 | 
10 |     infowindow: new google.maps.InfoWindow({
11 |         size: new google.maps.Size(150, 50)
12 |     }),
13 | 
14 | 
15 |     initialize: function () {
16 |         if (this.map) return null;
17 | 
18 |         var myOptions = {
19 |             zoom: 2,
20 |             center: new google.maps.LatLng(43.907787, 0),
21 |             mapTypeControl: true,
22 |             mapTypeControlOptions: {style: google.maps.MapTypeControlStyle.DROPDOWN_MENU},
23 |             navigationControl: true,
24 |             mapTypeId: google.maps.MapTypeId.ROADMAP
25 |         };
26 |         this.map = new google.maps.Map(document.getElementById("map_canvas"),
27 |             myOptions);
28 | 
29 |         google.maps.event.addListener(this.map, 'click', function () {
30 |             googleMap.infowindow.close();
31 |         });
32 | 
33 |         google.maps.event.addListener(this.map, 'click', function (event) {
34 |             var Latitude = event.latLng.lng().toFixed(2);
35 |             var longitude = event.latLng.lat().toFixed(2);
36 |             googleMap.addMarker(event.latLng, "name", "<b>Location</b><br>" +Latitude +","+ longitude,
37 |                 Latitude +","+ longitude);
38 |         });
39 | 
40 |         //google.maps.event.addListener(this.map, 'click', function (event) {
41 |         //    console.log("Latitude: " + event.latLng.lat() + " " + ", longitude: " + event.latLng.lng());
42 |         //});
43 |     },
44 | 
45 | 
46 |     addMarker: function (Gpoint, name, contentString, geo) {
47 |         var id = this.uniqueId(); // get new id
48 |         marker = new google.maps.Marker({
49 |             id: id,
50 |             position: Gpoint,
51 |             geo: geo,
52 |             map: googleMap.map,
53 |             draggable: true,
54 |             animation: google.maps.Animation.DROP
55 |         });
56 | 
57 |         google.maps.event.addListener(marker, 'click', function () {
58 |             googleMap.infowindow.setPosition(this.position);
59 |             googleMap.infowindow.setContent(contentString);
60 |             googleMap.infowindow.open(googleMap.map, marker);
61 |         });
62 |         google.maps.event.trigger(marker, 'click');
63 | 
64 |         googleMap.map.panTo(Gpoint);
65 | 
66 |         this.markers[id] = marker;
67 | 
68 |         google.maps.event.addListener(marker, "rightclick", function (point) {
69 |             googleMap.delMarker(this.id)
70 |         });
71 |         //var res = '';
72 |         //for (i in googleMap.markers){
73 |         //   res += googleMap.markers[i].geo + ',';
74 |         //}
75 |         //res = res.substring(0,res.length-1)
76 |         //console.log(res);
77 |     },
78 | 
79 |     delMarker: function (id) {
80 |         this.markers[id].setMap(null);
81 |         delete  this.markers[id];
82 |     }
83 | };


--------------------------------------------------------------------------------
/static/js/index.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by hrwhisper on 2016/4/13.
  3 |  */
  4 | 
  5 | (function () {
  6 | 
  7 |     $(function () {
  8 |         $('#myCarousel').on('slide.bs.carousel', function (e) {
  9 | //                var slideFrom = $(this).find('.active').index();
 10 |             var slideTo = $(e.relatedTarget).index();
 11 | //               console.log(slideFrom + ' => ' + slideTo);
 12 |             var change_obj = $("#header_info_button");
 13 |             if (slideTo == 1) {
 14 |                 change_obj.attr("href", "/topic");
 15 |                 change_obj.text("Start topic Now");
 16 |             } else {
 17 |                 change_obj.attr("href", "/sentiment");
 18 |                 change_obj.text("Start sentiment Now");
 19 |             }
 20 |         });
 21 |     });
 22 |     "use strict";
 23 | 
 24 |     var π = Math.PI;
 25 |     var τ = 2 * Math.PI;
 26 | 
 27 |     var types = {
 28 |         square: function (n) {
 29 |             return (((n + 1) % 2) ? 0 : 1) / n;
 30 |         },
 31 |         triangle: function (n) {
 32 |             if (!(n % 2)) return 0;
 33 |             return ((n % 4 === 1) ? 1 : -1) / (n * n);
 34 |         },
 35 |         sawtooth: function (n) {
 36 |             return ((n % 2) ? -1 : 1) / (n + 1);
 37 |         },
 38 |         pulse: function (n) {
 39 |             return 0.1;
 40 |         }
 41 |     };
 42 | 
 43 |     function FT(A, N, φ) {
 44 |         φ = φ || 0;
 45 |         return function (x) {
 46 |             var n = -1, y = 0;
 47 |             while (++n < N) {
 48 |                 y += A[n] * Math.sin(τ * (n + 1) * x + φ);
 49 |             }
 50 |             return y;
 51 |         }
 52 |     }
 53 | 
 54 |     var
 55 |         margin = {top: 0, right: 0, bottom: 0, left: 0},
 56 |         W = 450,
 57 |         H = 450,
 58 |         h = H - margin.top - margin.bottom,
 59 | 
 60 |         radius = 140,
 61 |         theta = 0,
 62 |         xmax = 1.5,
 63 |         rate = 1 / 60,
 64 | 
 65 |         tDomain = d3.range(0, 1.1, 1 / 1000),   // trace domain
 66 |         gDomain = d3.range(0, xmax, xmax / 1000), // graph domain
 67 | 
 68 |         C = types.square, // coeffiecients
 69 |         L = 6,            // size
 70 |         F = 0.3,          // frequence
 71 | 
 72 |         yCirc = d3.scale.linear().domain([-1, 1]).range([h / 2 + radius, h / 2 - radius]),
 73 |         xCirc = d3.scale.linear().domain([-1, 1]).range([0, 2 * radius]),
 74 |         rAxis = d3.scale.linear().domain([0, 1]).range([0, radius]),
 75 |         xAxis = d3.scale.linear().range([radius, W - margin.left]),
 76 | 
 77 |         Fxy, fx, fy,
 78 | 
 79 |         timer, data = [];
 80 | 
 81 |     var graph = d3.svg.line()
 82 |         .x(function (d) {
 83 |             return xAxis(d);
 84 |         })
 85 |         .y(function (d) {
 86 |             return yCirc(fy(theta - d));
 87 |         });
 88 | 
 89 |     var proj = d3.svg.line()
 90 |         .x(function (d) {
 91 |             return xCirc(d.x);
 92 |         })
 93 |         .y(function (d) {
 94 |             return yCirc(d.y);
 95 |         });
 96 | 
 97 |     var trace = d3.svg.line()
 98 |         .x(function (d) {
 99 |             return xCirc(fx(d));
100 |         })
101 |         .y(function (d) {
102 |             return yCirc(fy(d));
103 |         });
104 | 
105 |     function gTransform(d) {
106 |         return "translate(" + xCirc(d.x) + "," + yCirc(d.y) + ")";
107 |     }
108 | 
109 |     var svg = d3.select(".visualization")
110 |         .append("svg")
111 |         .attr("width", W)
112 |         .attr("height", H);
113 | 
114 |     svg.append("line")
115 |         .attr("class", "axis")
116 |         .attr("y1", margin.top + yCirc(0)).attr("x1", 0)
117 |         .attr("y2", margin.top + yCirc(0)).attr("x2", W);
118 | 
119 |     svg.append("line")
120 |         .attr("class", "axis")
121 |         .attr("x1", margin.left + xCirc(0)).attr("y1", 0)
122 |         .attr("x2", margin.left + xCirc(0)).attr("y2", H);
123 | 
124 |     var vis = svg.append("g")
125 |         .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
126 | 
127 |     var gPath = vis.append("path").attr("class", "graph");
128 |     var tPath = vis.append("path").attr("class", "trace");
129 |     var pPath = vis.append("path").attr("class", "proj");
130 | 
131 |     function cache() {
132 |         var A;
133 |         if (typeof C === "function") {
134 |             A = d3.range(1, L + 1).map(C);
135 |         } else {
136 |             A = C.slice(0, L);
137 |         }
138 | 
139 |         fx = FT(A, L - 1, π / 2);
140 |         fy = FT(A, L - 1, 0);
141 | 
142 |         Fxy = A.map(function (a, i) {
143 |             return {X: FT(A, i, π / 2), Y: FT(A, i, 0), r: Math.abs(a)};
144 |         });
145 |     }
146 | 
147 |     function calc() {
148 |         if (!Fxy) cache();
149 |         Fxy.forEach(function (f, i) {
150 |             var d = data[i] || (data[i] = {x: 0, y: 0, r: 0});
151 |             d.x = f.X(theta);
152 |             d.y = f.Y(theta);
153 |             d.r = f.r;
154 |             d.f = i + 1;
155 |         });
156 |         data.length = Fxy.length;
157 |         return data;
158 |     }
159 | 
160 |     function coeff() {
161 |         var co = vis.selectAll(".coeff").data(calc());
162 | 
163 |         // exit
164 |         co.exit().remove();
165 | 
166 |         // enter
167 |         var en = co.enter().append("g").attr("class", "coeff");
168 | 
169 |         en.append("circle").attr("class", "circle");
170 |         en.append("circle").attr("class", "dot").attr("r", 3);
171 | 
172 |         // update
173 |         co.classed("last", function (d, i) {
174 |             return i === L - 1;
175 |         });
176 |         co.classed("first", function (d, i) {
177 |             return i === 0;
178 |         });
179 | 
180 |         co.select(".circle").attr("r", function (d) {
181 |             return rAxis(d.r);
182 |         });
183 | 
184 |         return co;
185 |     }
186 | 
187 |     function drawGraph() {
188 |         xAxis.domain([0, xmax]);
189 |         coeff().attr("transform", gTransform);
190 |         var last = data[data.length - 1];
191 |         pPath.attr("d", proj([last, {x: 0, y: last.y}]));
192 |         gPath.attr("d", graph(gDomain));
193 |         tPath.attr("d", trace(tDomain));
194 |     }
195 | 
196 |     function play() {
197 |         if (timer) return;
198 |         (function loop() {
199 |             drawGraph();
200 |             theta += F * rate;
201 |             timer = setTimeout(loop, rate * 1000);
202 |         })();
203 |     }
204 | 
205 |     C = types['sawtooth'];
206 | 
207 |     play();
208 | 
209 | })();
210 | 


--------------------------------------------------------------------------------
/static/js/loading-control.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by hrwhisper on 2016/4/25.
 3 |  */
 4 | var loading_control = {
 5 |     opts: {
 6 |         // more options: http://fgnass.github.io/spin.js/
 7 |         length: 28,
 8 |         width: 14,
 9 |         radius: 42,
10 |         color: "#fff",
11 |         scale: 0.5,
12 |         opacity: 0.2,
13 |         position: "fixed"
14 |     },
15 |     spinner: null,
16 |     div_wait: null,
17 |     div_wait_bg: null,
18 | 
19 |     start: function () {
20 |         if (!this.div_wait) {
21 |             var div = document.createElement("div");
22 |             div.id = "foo";
23 |             document.body.appendChild(div);
24 |             this.div_wait = div;
25 |         }
26 | 
27 |         if (!this.div_wait_bg) {
28 |             var div = document.createElement("div");
29 |             div.id = "waiting-bg";
30 |             div.style.cssText = "width:100%; height:100%; background-color:#000; filter:alpha(opacity=60);-moz-opacity:0.6; opacity:0.6; position:fixed; left:0px; top:0px; display:none;  z-index:1000;";
31 | 
32 |             document.body.appendChild(div);
33 |             this.div_wait_bg = div;
34 |         }
35 | 
36 |         if (!this.spinner) {
37 |             this.spinner = new Spinner(this.opts);
38 |         }
39 | 
40 |         this.div_wait_bg.style.display = "block";
41 |         this.spinner.spin(this.div_wait)
42 |     },
43 | 
44 |     stop: function () {
45 |         if(this.spinner)
46 |             this.spinner.stop();
47 |         this.div_wait_bg.style.display = "none";
48 |     }
49 | };


--------------------------------------------------------------------------------
/static/js/npm.js:
--------------------------------------------------------------------------------
 1 | // This file is autogenerated via the `commonjs` Grunt task. You can require() this file in a CommonJS environment.
 2 | require('../../js/transition.js')
 3 | require('../../js/alert.js')
 4 | require('../../js/button.js')
 5 | require('../../js/carousel.js')
 6 | require('../../js/collapse.js')
 7 | require('../../js/dropdown.js')
 8 | require('../../js/modal.js')
 9 | require('../../js/tooltip.js')
10 | require('../../js/popover.js')
11 | require('../../js/scrollspy.js')
12 | require('../../js/tab.js')
13 | require('../../js/affix.js')


--------------------------------------------------------------------------------
/static/js/sentiment.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by hrwhisper on 2016/4/24.
  3 |  */
  4 | 
  5 | window.onload = function () {
  6 |     gauge = $("#gauge");
  7 |     gauge.height(gauge.width() / 1.5);
  8 | 
  9 |     myChart = echarts.init(document.getElementById('gauge'));
 10 |     myChart.setOption(option, true);
 11 | 
 12 |     $(window).resize(function () {
 13 |         gauge.height(gauge.width() / 1.5);
 14 |         myChart.resize();
 15 |     });
 16 | };
 17 | 
 18 | 
 19 | option = {
 20 |     tooltip: {
 21 |         formatter: "{c} {b}"
 22 |     },
 23 |     series: [
 24 |         {
 25 |             name: 'positive',
 26 |             type: 'gauge',
 27 |             z: 3,
 28 |             min: 0,
 29 |             max: 100,
 30 |             splitNumber: 10,
 31 |             radius: '70%',
 32 |             center: ['50%', '50%'],    // 默认全局居中
 33 |             axisLine: {            // 坐标轴线
 34 |                 lineStyle: {       // 属性lineStyle控制线条样式
 35 |                     width: 10
 36 |                 }
 37 |             },
 38 |             axisTick: {            // 坐标轴小标记
 39 |                 length: 15,        // 属性length控制线长
 40 |                 lineStyle: {       // 属性lineStyle控制线条样式
 41 |                     color: 'auto'
 42 |                 }
 43 |             },
 44 |             splitLine: {           // 分隔线
 45 |                 length: 10,         // 属性length控制线长
 46 |                 lineStyle: {       // 属性lineStyle（详见lineStyle）控制线条样式
 47 |                     color: 'auto'
 48 |                 }
 49 |             },
 50 |             title: {
 51 |                 textStyle: {       // 其余属性默认使用全局文本样式，详见TEXTSTYLE
 52 |                     fontWeight: 'bolder',
 53 |                     fontSize: 20,
 54 |                     fontStyle: 'italic'
 55 |                 }
 56 |             },
 57 |             detail: {
 58 |                 textStyle: {       // 其余属性默认使用全局文本样式，详见TEXTSTYLE
 59 |                     fontWeight: 'bolder'
 60 |                 }
 61 |             },
 62 |             data: [{value: 0, name: 'positive'}]
 63 |         },
 64 |         {
 65 |             name: 'negative',
 66 |             type: 'gauge',
 67 |             center: ['18%', '55%'],    // 默认全局居中
 68 |             radius: '40%',
 69 |             min: 0,
 70 |             max: 100,
 71 |             endAngle: 45,
 72 |             splitNumber: 10,
 73 |             axisLine: {            // 坐标轴线
 74 |                 lineStyle: {       // 属性lineStyle控制线条样式
 75 |                     width: 8
 76 |                 }
 77 |             },
 78 |             axisTick: {            // 坐标轴小标记
 79 |                 length: 12,        // 属性length控制线长
 80 |                 lineStyle: {       // 属性lineStyle控制线条样式
 81 |                     color: 'auto'
 82 |                 }
 83 |             },
 84 |             splitLine: {           // 分隔线
 85 |                 length: 20,         // 属性length控制线长
 86 |                 lineStyle: {       // 属性lineStyle（详见lineStyle）控制线条样式
 87 |                     color: 'auto'
 88 |                 }
 89 |             },
 90 |             pointer: {
 91 |                 width: 5
 92 |             },
 93 |             title: {
 94 |                 offsetCenter: [0, '-30%']       // x, y，单位px
 95 |             },
 96 |             detail: {
 97 |                 textStyle: {       // 其余属性默认使用全局文本样式，详见TEXTSTYLE
 98 |                     fontWeight: 'bolder'
 99 |                 }
100 |             },
101 |             data: [{value: 0, name: 'negative'}]
102 |         },
103 |         {
104 |             name: 'neutral',
105 |             type: 'gauge',
106 |             center: ['78%', '50%'],    // 默认全局居中
107 |             radius: '25%',
108 |             min: 0,
109 |             max: 100,
110 |             startAngle: 135,
111 |             endAngle: -50,
112 |             splitNumber: 2,
113 |             axisLine: {            // 坐标轴线
114 |                 lineStyle: {       // 属性lineStyle控制线条样式
115 |                     width: 8
116 |                 }
117 |             },
118 |             axisTick: {            // 坐标轴小标记
119 |                 splitNumber: 5,
120 |                 length: 10,        // 属性length控制线长
121 |                 lineStyle: {       // 属性lineStyle控制线条样式
122 |                     color: 'auto'
123 |                 }
124 |             },
125 |             splitLine: {           // 分隔线
126 |                 length: 15,         // 属性length控制线长
127 |                 lineStyle: {       // 属性lineStyle（详见lineStyle）控制线条样式
128 |                     color: 'auto'
129 |                 }
130 |             },
131 |             pointer: {
132 |                 width: 2
133 |             },
134 |             detail: {
135 |                 textStyle: {       // 其余属性默认使用全局文本样式，详见TEXTSTYLE
136 |                     fontSize: 20
137 |                 }
138 |             },
139 |             data: [{value: 0, name: 'neutral'}]
140 |         }
141 |     ]
142 | };
143 | 
144 | function update_sentiment_result(res) {
145 |     // update gauge charts
146 |     var positive = res['positive'],
147 |         negative = res['negative'],
148 |         neutral = res['neutral'];
149 | 
150 |     option.series[0].data[0].value = (positive['percent'] * 100).toFixed(2) - 0;
151 |     option.series[1].data[0].value = (negative['percent'] * 100).toFixed(2) - 0;
152 |     option.series[2].data[0].value = (neutral['percent'] * 100).toFixed(2) - 0;
153 |     myChart.setOption(option, true);
154 | 
155 |     //add text
156 |     $("#positive_sample_result").empty();
157 |     $("#negative_sample_result").empty();
158 |     $("#neutral_sample_result").empty();
159 | 
160 |     var positive_text = positive['text'],
161 |         negative_text = negative['text'],
162 |         neutral_text = neutral['text'];
163 | 
164 |     for (var i = 0; i < positive_text.length; i++)
165 |         update_sentiment_text_sample(positive_text[i], 'positive');
166 | 
167 |     for (i = 0; i < negative_text.length; i++)
168 |         update_sentiment_text_sample(negative_text[i], 'negative');
169 | 
170 |     for (i = 0; i < neutral_text.length; i++)
171 |         update_sentiment_text_sample(neutral_text[i], 'neutral');
172 | 
173 |     $("#sample_result").show(); //.css("display", "block");
174 | }
175 | 
176 | // update_sentiment_text_sample('text' , false); //just text
177 | function update_sentiment_text_sample(text, mode) {
178 |     var tag_head = '<li class="list-group-item">', tag_end = '</li>';
179 |     if (mode == 'positive')
180 |         $("#positive_sample_result").append(tag_head + text + tag_end);
181 |     else if (mode == 'negative')
182 |         $("#negative_sample_result").append(tag_head + text + tag_end);
183 |     else
184 |         $("#neutral_sample_result").append(tag_head + text + tag_end);
185 | }
186 | 
187 | 
188 | function get_sentiment_result() {
189 |     //TODO check data is empty
190 |     var data = {
191 |         'query_str': $('#name').val()
192 |     };
193 | 
194 |     console.log(data);
195 |     loading_control.start();
196 | 
197 |     $.ajax({
198 |         url: 'sentiment_query',
199 |         data: data,
200 |         success: function (v) {
201 |             console.log(v);
202 |             update_sentiment_result(v);
203 |             loading_control.stop();
204 |         },
205 |         error: function (v) {
206 |             console.log('------error------' + v);
207 |             loading_control.stop();
208 |         },
209 |         dataType: 'json'
210 |     });
211 | }


--------------------------------------------------------------------------------
/static/js/spin.min.js:
--------------------------------------------------------------------------------
1 | // http://spin.js.org/#v2.3.2
2 | !function(a,b){"object"==typeof module&&module.exports?module.exports=b():"function"==typeof define&&define.amd?define(b):a.Spinner=b()}(this,function(){"use strict";function a(a,b){var c,d=document.createElement(a||"div");for(c in b)d[c]=b[c];return d}function b(a){for(var b=1,c=arguments.length;c>b;b++)a.appendChild(arguments[b]);return a}function c(a,b,c,d){var e=["opacity",b,~~(100*a),c,d].join("-"),f=.01+c/d*100,g=Math.max(1-(1-a)/b*(100-f),a),h=j.substring(0,j.indexOf("Animation")).toLowerCase(),i=h&&"-"+h+"-"||"";return m[e]||(k.insertRule("@"+i+"keyframes "+e+"{0%{opacity:"+g+"}"+f+"%{opacity:"+a+"}"+(f+.01)+"%{opacity:1}"+(f+b)%100+"%{opacity:"+a+"}100%{opacity:"+g+"}}",k.cssRules.length),m[e]=1),e}function d(a,b){var c,d,e=a.style;if(b=b.charAt(0).toUpperCase()+b.slice(1),void 0!==e[b])return b;for(d=0;d<l.length;d++)if(c=l[d]+b,void 0!==e[c])return c}function e(a,b){for(var c in b)a.style[d(a,c)||c]=b[c];return a}function f(a){for(var b=1;b<arguments.length;b++){var c=arguments[b];for(var d in c)void 0===a[d]&&(a[d]=c[d])}return a}function g(a,b){return"string"==typeof a?a:a[b%a.length]}function h(a){this.opts=f(a||{},h.defaults,n)}function i(){function c(b,c){return a("<"+b+' xmlns="urn:schemas-microsoft.com:vml" class="spin-vml">',c)}k.addRule(".spin-vml","behavior:url(#default#VML)"),h.prototype.lines=function(a,d){function f(){return e(c("group",{coordsize:k+" "+k,coordorigin:-j+" "+-j}),{width:k,height:k})}function h(a,h,i){b(m,b(e(f(),{rotation:360/d.lines*a+"deg",left:~~h}),b(e(c("roundrect",{arcsize:d.corners}),{width:j,height:d.scale*d.width,left:d.scale*d.radius,top:-d.scale*d.width>>1,filter:i}),c("fill",{color:g(d.color,a),opacity:d.opacity}),c("stroke",{opacity:0}))))}var i,j=d.scale*(d.length+d.width),k=2*d.scale*j,l=-(d.width+d.length)*d.scale*2+"px",m=e(f(),{position:"absolute",top:l,left:l});if(d.shadow)for(i=1;i<=d.lines;i++)h(i,-2,"progid:DXImageTransform.Microsoft.Blur(pixelradius=2,makeshadow=1,shadowopacity=.3)");for(i=1;i<=d.lines;i++)h(i);return b(a,m)},h.prototype.opacity=function(a,b,c,d){var e=a.firstChild;d=d.shadow&&d.lines||0,e&&b+d<e.childNodes.length&&(e=e.childNodes[b+d],e=e&&e.firstChild,e=e&&e.firstChild,e&&(e.opacity=c))}}var j,k,l=["webkit","Moz","ms","O"],m={},n={lines:12,length:7,width:5,radius:10,scale:1,corners:1,color:"#000",opacity:.25,rotate:0,direction:1,speed:1,trail:100,fps:20,zIndex:2e9,className:"spinner",top:"50%",left:"50%",shadow:!1,hwaccel:!1,position:"absolute"};if(h.defaults={},f(h.prototype,{spin:function(b){this.stop();var c=this,d=c.opts,f=c.el=a(null,{className:d.className});if(e(f,{position:d.position,width:0,zIndex:d.zIndex,left:d.left,top:d.top}),b&&b.insertBefore(f,b.firstChild||null),f.setAttribute("role","progressbar"),c.lines(f,c.opts),!j){var g,h=0,i=(d.lines-1)*(1-d.direction)/2,k=d.fps,l=k/d.speed,m=(1-d.opacity)/(l*d.trail/100),n=l/d.lines;!function o(){h++;for(var a=0;a<d.lines;a++)g=Math.max(1-(h+(d.lines-a)*n)%l*m,d.opacity),c.opacity(f,a*d.direction+i,g,d);c.timeout=c.el&&setTimeout(o,~~(1e3/k))}()}return c},stop:function(){var a=this.el;return a&&(clearTimeout(this.timeout),a.parentNode&&a.parentNode.removeChild(a),this.el=void 0),this},lines:function(d,f){function h(b,c){return e(a(),{position:"absolute",width:f.scale*(f.length+f.width)+"px",height:f.scale*f.width+"px",background:b,boxShadow:c,transformOrigin:"left",transform:"rotate("+~~(360/f.lines*k+f.rotate)+"deg) translate("+f.scale*f.radius+"px,0)",borderRadius:(f.corners*f.scale*f.width>>1)+"px"})}for(var i,k=0,l=(f.lines-1)*(1-f.direction)/2;k<f.lines;k++)i=e(a(),{position:"absolute",top:1+~(f.scale*f.width/2)+"px",transform:f.hwaccel?"translate3d(0,0,0)":"",opacity:f.opacity,animation:j&&c(f.opacity,f.trail,l+k*f.direction,f.lines)+" "+1/f.speed+"s linear infinite"}),f.shadow&&b(i,e(h("#000","0 0 4px #000"),{top:"2px"})),b(d,b(i,h(g(f.color,k),"0 0 1px rgba(0,0,0,.1)")));return d},opacity:function(a,b,c){b<a.childNodes.length&&(a.childNodes[b].style.opacity=c)}}),"undefined"!=typeof document){k=function(){var c=a("style",{type:"text/css"});return b(document.getElementsByTagName("head")[0],c),c.sheet||c.styleSheet}();var o=e(a("group"),{behavior:"url(#default#VML)"});!d(o,"transform")&&o.adj?i():j=d(o,"animation")}return h});


--------------------------------------------------------------------------------
/statistic/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/2/8.
4 | 


--------------------------------------------------------------------------------
/statistic/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/2/8.
4 | 
5 | 


--------------------------------------------------------------------------------
/statistic/models/pie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/2/10.
 4 | import datetime
 5 | from collections import defaultdict, OrderedDict
 6 | 
 7 | from twitterDataMining.models import MongoDb, TimeCost
 8 | 
 9 | 
10 | def get_hashtag_pie_data_by_date(date='2015-12-24'):
11 |     db = MongoDb().getDB()
12 |     lower_bound = datetime.datetime(2015, 12, 24)
13 |     upper_bound = lower_bound + datetime.timedelta(days=1)
14 |     cursor = db.stream.aggregate([
15 |         {
16 |             '$match': {
17 |                 'hashtags': {'$exists': "true"},
18 |                 'time': {
19 |                     "$gt": lower_bound,
20 |                     "$lt": upper_bound
21 |                 }
22 |             }
23 |         },
24 |         {
25 |             '$project': {
26 |                 "hashtags": 1
27 |             }
28 |         }
29 |     ])
30 |     res = defaultdict(int)
31 |     for tweet in cursor:
32 |         for hashtag in tweet['hashtags']:
33 |             res[hashtag] += 1
34 | 
35 |     res = OrderedDict(sorted(res.items(), key=lambda x: x[1], reverse=True)[:20])
36 |     return {
37 |         'label_data': res.keys(),
38 |         'name_value': [{'name': key, 'value': value} for key, value in res.items()]
39 |     }
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     print get_hashtag_pie_data_by_date("2015-12-24")
44 | 


--------------------------------------------------------------------------------
/statistic/models/timeline.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | # Created by hrwhisper on 2016/2/8.
  4 | import datetime
  5 | 
  6 | import collections
  7 | 
  8 | from twitterDataMining.models import MongoDb, TimeCost
  9 | 
 10 | 
 11 | def get_time_from_res_dict(res_time):
 12 |     temp = map(str, [res_time['year'], res_time['month'], res_time['day'], res_time['hour']])
 13 |     return '-'.join(temp[:3]) + ' ' + temp[3].zfill(2) + ":00"
 14 | 
 15 | 
 16 | def get_hashtag_group_by_date(hashtag='Christmas', date='2015-12-20'):
 17 |     db = MongoDb().getDB()
 18 |     lower_bound = datetime.datetime(2015, 12, 20)
 19 |     upper_bound = lower_bound + datetime.timedelta(days=7)
 20 |     cursor = db.stream.aggregate([
 21 |         {
 22 |             '$match': {
 23 |                 'hashtags': hashtag,
 24 |                 'time': {
 25 |                     "$gt": lower_bound,
 26 |                     "$lt": upper_bound
 27 |                 }
 28 |             }
 29 |         },
 30 |         {
 31 |             '$group': {
 32 |                 '_id': {
 33 |                     'year': {'$year': '$time'},
 34 |                     'month': {'$month': '$time'},
 35 |                     'day': {'$dayOfMonth': '$time'},
 36 |                     'hour': {'$hour': "$time"}
 37 |                 },
 38 |                 'cnt': {'$sum': 1},
 39 |             }
 40 |         },
 41 |     ])
 42 |     res = sorted([(get_time_from_res_dict(res['_id']), res['cnt']) for res in cursor]
 43 |                  , key=lambda t: t[0])
 44 | 
 45 |     return {
 46 |         'dates': [t[0] for t in res],
 47 |         'cnt': [t[1] for t in res],
 48 |     }
 49 | 
 50 | 
 51 | def get_hashtags_group_by_date(hashtags, date='2015-12-20'):
 52 |     db = MongoDb().getDB()
 53 |     t = TimeCost()
 54 |     lower_bound = datetime.datetime(2015, 12, 20)
 55 |     upper_bound = lower_bound + datetime.timedelta(days=7)
 56 |     cursor = db.stream.aggregate([
 57 |         {
 58 |             '$match': {
 59 |                 'hashtags': {'$in': hashtags},
 60 |                 'time': {
 61 |                     '$gt': lower_bound,
 62 |                     '$lt': upper_bound
 63 |                 }
 64 |             }
 65 |         },
 66 |         {'$unwind': '$hashtags'},
 67 |         {
 68 |             '$match': {
 69 |                 'hashtags': {'$in': hashtags},
 70 |             }
 71 |         },
 72 |         {
 73 |             '$group': {
 74 |                 '_id': {
 75 |                     'hashtags': '$hashtags',
 76 |                     'year': {'$year': '$time'},
 77 |                     'month': {'$month': '$time'},
 78 |                     'day': {'$dayOfMonth': '$time'},
 79 |                     'hour': {'$hour': "$time"}
 80 |                 },
 81 |                 'cnt': {'$sum': 1},
 82 |             }
 83 |         },
 84 |         {
 85 |             '$project': {
 86 |                 '_id': 0,
 87 |                 'hashtag': "$_id.hashtags",
 88 |                 'info': {
 89 |                     'date': {
 90 |                         'year': "$_id.year",
 91 |                         'month': "$_id.month",
 92 |                         'day': "$_id.day",
 93 |                         'hour': "$_id.hour",
 94 |                     },
 95 |                     'cnt': "$cnt"
 96 |                 }
 97 |             }
 98 |         },
 99 |         {
100 |             '$group': {
101 |                 '_id': {
102 |                     'hashtag': '$hashtag',
103 |                 },
104 |                 'info': {'$push': '$info'}
105 |             }
106 |         },
107 |     ])
108 |     query_res = {t['_id']['hashtag']: {get_time_from_res_dict(cur['date']): cur['cnt']
109 |                                        for cur in t['info']} for t in cursor}
110 |     t.timecost()
111 |     print query_res
112 | 
113 |     dates = sorted(set(x for t in query_res.values() for x in t.keys()))
114 |     print len(dates), dates
115 | 
116 |     for key, values in query_res.items():
117 |         for date in dates:
118 |             if date not in values:
119 |                 values[date] = 0
120 |         values = sorted(values.items(), key=lambda x: x[0])
121 |         query_res[key] = [t[1] for t in values]
122 | 
123 |     return {
124 |         "dates": dates,
125 |         "cnt": query_res
126 |     }
127 | 
128 | 
129 | def get_hashtags_group_by_date2(hashtags, date='2015-12-20'):
130 |     db = MongoDb().getDB()
131 |     lower_bound = datetime.datetime(2015, 12, 20)
132 |     upper_bound = lower_bound + datetime.timedelta(days=7)
133 | 
134 |     query_res = collections.defaultdict(dict)
135 |     ti = TimeCost()
136 |     for hashtag in hashtags:
137 |         cursor = db.stream.aggregate([
138 |             {
139 |                 '$match': {
140 |                     'hashtags': hashtag,
141 |                     'time': {
142 |                         "$gt": lower_bound,
143 |                         "$lt": upper_bound
144 |                     }
145 |                 }
146 |             },
147 |             {
148 |                 '$group': {
149 |                     '_id': {
150 |                         'year': {'$year': '$time'},
151 |                         'month': {'$month': '$time'},
152 |                         'day': {'$dayOfMonth': '$time'},
153 |                         'hour': {'$hour': "$time"}
154 |                     },
155 |                     'cnt': {'$sum': 1},
156 |                 }
157 |             },
158 |         ])
159 |         query_res[hashtag] = {get_time_from_res_dict(t['_id']): t['cnt']
160 |                               for t in cursor}
161 | 
162 |     dates = sorted(set(x for t in query_res.values() for x in t.keys()))
163 |     print len(dates), dates
164 | 
165 |     for key, values in query_res.items():
166 |         for date in dates:
167 |             if date not in values:
168 |                 values[date] = 0
169 |         values = sorted(values.items(), key=lambda x: x[0])
170 |         query_res[key] = [t[1] for t in values]
171 | 
172 |     ti.timecost()
173 |     return {
174 |         "dates": dates,
175 |         "cnt": query_res
176 |     }
177 | 
178 | 
179 | if __name__ == '__main__':
180 |     t = TimeCost()
181 |     res = get_hashtag_group_by_date()
182 |     print res['dates']
183 |     print res['cnt']
184 |     t.timecost()
185 |     # res = get_hashtags_group_by_date2(hashtags=['Christmas', 'MTVStars'])
186 |     # print res['cnt']['Christmas']
187 |     # print res['cnt']['christmas']
188 |     # print res['dates']
189 | 


--------------------------------------------------------------------------------
/statistic/urls.py:
--------------------------------------------------------------------------------
 1 | """twitterDataMining URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/1.9/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.conf.urls import url, include
14 |     2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url
17 | import statistic.views
18 | 
19 | urlpatterns = [
20 |     url(r'hashtag_timeline$', statistic.views.hashtag_timeline),
21 |     url(r'hashtag_timeline_data$', statistic.views.hashtag_timeline_data),
22 |     url(r'hashtag_compare$', statistic.views.hashtag_compare),
23 |     url(r'hashtag_compare_data$', statistic.views.hashtag_compare_data),
24 |     url(r'hashtag_pie$', statistic.views.hashtag_pie),
25 |     url(r'hashtag_pie_data$', statistic.views.hashtag_pie_data),
26 | ]
27 | 


--------------------------------------------------------------------------------
/statistic/views.py:
--------------------------------------------------------------------------------
 1 | # Create your views here.
 2 | import json
 3 | from django.shortcuts import render
 4 | from django.http import HttpResponse
 5 | from statistic.models import timeline, pie
 6 | 
 7 | 
 8 | def hashtag_timeline(request):
 9 |     res = {
10 |         'date': request.GET.get('date'),
11 |         'hashtag': request.GET.get('hashtag'),
12 |     }
13 |     return render(request, 'statistic/hashtag_timeline.html', res)
14 | 
15 | 
16 | def hashtag_timeline_data(request):
17 |     res = timeline.get_hashtag_group_by_date(
18 |         hashtag=request.GET.get('hashtag'), date=request.GET.get('date')
19 |     )
20 |     return HttpResponse(json.dumps(res), content_type="application/json")
21 | 
22 | 
23 | def hashtag_compare(request):
24 |     res = {
25 |         'date': request.GET.get('date'),
26 |         'hashtag1': request.GET.get('hashtag1'),
27 |         'hashtag2': request.GET.get('hashtag2'),
28 |     }
29 |     return render(request, 'statistic/hashtag_compare.html', res)
30 | 
31 | 
32 | def hashtag_compare_data(request):
33 |     res = timeline.get_hashtags_group_by_date2(
34 |         hashtags=[request.GET.get('hashtag1'), request.GET.get('hashtag2')],
35 |         date=request.GET.get('date')
36 |     )
37 |     return HttpResponse(json.dumps(res), content_type="application/json")
38 | 
39 | 
40 | def hashtag_pie(request):
41 |     res = {
42 |         'date': request.GET.get('date'),
43 |     }
44 |     return render(request, 'statistic/pie.html', res)
45 | 
46 | 
47 | def hashtag_pie_data(request):
48 |     res = pie.get_hashtag_pie_data_by_date()
49 |     return HttpResponse(json.dumps(res), content_type="application/json")
50 | 


--------------------------------------------------------------------------------
/templates/basic.html:
--------------------------------------------------------------------------------
 1 | {#{% load static %}#}
 2 | 
 3 | <!DOCTYPE html>
 4 | <html>
 5 | <head>
 6 |     <meta charset="UTF-8">
 7 |     <title>Twitter数据挖掘及其可视化</title>
 8 | 
 9 |     {#     use <link href="{% static "css/bootstrap.min.css" %}" rel="stylesheet"> #}
10 |     {#     or use <link href="/static/css/bootstrap.min.css" %}" rel="stylesheet"> #}
11 |     <link href="/static/css/bootstrap.min.css" rel="stylesheet">
12 |     <link href="/static/css/font-awesome.min.css" rel="stylesheet">
13 |     <link href="/static/css/style.css" rel="stylesheet">
14 |     <script src="/static/js/jquery.min.js"></script>
15 |     <script src="/static/js/bootstrap.min.js"></script>
16 | 
17 |     {% block include_script %}
18 | 
19 |     {% endblock %}
20 | 
21 | 
22 |     <!-- <script src="/static/"js/append_new_iframe.js""></script> -->
23 | 
24 | </head>
25 | <body class="body">
26 | 
27 | <!--  导航栏-->
28 | {% include "nav.html" %}
29 | 
30 | 
31 | {% block mainbody %}
32 |     <p>original</p>
33 | {% endblock %}
34 | 
35 | 
36 | </body>
37 | </html>


--------------------------------------------------------------------------------
/templates/footer.html:
--------------------------------------------------------------------------------
 1 | <div class="footer">
 2 |     <div class="container">
 3 |         <div class="row">
 4 |             <div class="col-sm-12">
 5 |                 <span class="footer-header"> <i class="fa fa-twitter"
 6 |                                                 aria-hidden="true"> </i>  twitter  数据挖掘及其可视化 </span>
 7 |                 <br/><br/>
 8 |                 <div class="table-responsive keywords-wrap ">
 9 |                     <table class="table">
10 |                         <tr>
11 |                             <td>Topic-analysis :</td>
12 |                             <td>WOLDA</td>
13 |                         </tr>
14 |                         <tr>
15 |                             <td>Sentiment-analysis :</td>
16 |                             <td>Maximum Entropy</td>
17 |                         </tr>
18 |                         <tr>
19 |                             <td>Visualization :</td>
20 |                             <td>D3 | Echarts</td>
21 |                         </tr>
22 |                         <tr>
23 |                             <td>WEB :</td>
24 |                             <td>Django | MongoDB | Bootstrap</td>
25 |                         </tr>
26 |                     </table>
27 |                 </div>
28 |                 <br/><br/>
29 |                 <div class="copyright text-center">© Copyright <a href="https://www.hrwhisper.me" target="_blank">hrwhisper</a>
30 |                     - All Rights Reserved
31 |                 </div>
32 |             </div>
33 |         </div>
34 |     </div>
35 | </div>
36 | 


--------------------------------------------------------------------------------
/templates/header.html:
--------------------------------------------------------------------------------
 1 | <header class="page-header colored-background">
 2 |     <div class="container">
 3 |         <div class="row pull-left">
 4 |             <div class="col-sm-9">
 5 |                 <h1>
 6 |                     {% block header_title %}
 7 |                         Sentiment analysis
 8 |                     {% endblock %}
 9 |                 </h1>
10 |                 <p class="lead">
11 |                     {% block header_instruction %}
12 |                         输入您感兴趣的词，进行查询 <br>
13 |                         如iphone 、 google 等
14 |                     {% endblock %}
15 |                 </p>
16 |             </div>
17 |             <div class="col-sm-3 page-head-image">
18 |                 <img src="{% block header_image_src %}{% endblock %}" alt="sentiment" class="img-thumbnail img-circle">
19 |             </div>
20 |         </div>
21 | 
22 |     </div>
23 | </header>


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
  1 | {% extends "basic.html" %}
  2 | 
  3 | {% block include_script %}
  4 |     <script src="/static/js/d3.v3.min.js"></script>
  5 | {% endblock %}
  6 | 
  7 | {% block mainbody %}
  8 | 
  9 |     <div class="index_hearder">
 10 |         <div id="myCarousel" class="carousel slide" style="margin-top:-20px">
 11 |             <!-- 轮播（Carousel）指标 -->
 12 |             <ol class="carousel-indicators">
 13 |                 <li data-target="#myCarousel" data-slide-to="0" class="active"></li>
 14 |                 <li data-target="#myCarousel" data-slide-to="1"></li>
 15 |             </ol>
 16 |             <!-- 轮播（Carousel）项目 -->
 17 |             <div class="carousel-inner">
 18 |                 <div class="item active">
 19 |                     <div class="background-image-holder" style="background: url(/static/image/sentiment_3.jpg) 50% 0;
 20 |                     background-position: 50% 78% !important;">
 21 |                     </div>
 22 |                 </div>
 23 |                 <div class="item">
 24 |                     <div class="background-image-holder"
 25 |                          style="background: url(/static/image/topic_3.jpg);
 26 |                           background-position: 50% 24% !important;">
 27 |                     </div>
 28 |                 </div>
 29 | 
 30 |             </div>
 31 | 
 32 |             <!-- 轮播（Carousel）导航 -->
 33 |             <a class="left carousel-control" href="#myCarousel" role="button" data-slide="prev">
 34 |                 <span class="glyphicon glyphicon-chevron-left" aria-hidden="true"></span>
 35 |                 <span class="sr-only">Previous</span>
 36 |             </a>
 37 |             <a class="right carousel-control" href="#myCarousel" role="button" data-slide="next">
 38 |                 <span class="glyphicon glyphicon-chevron-right" aria-hidden="true"></span>
 39 |                 <span class="sr-only">Next</span>
 40 |             </a>
 41 |         </div>
 42 | 
 43 |         <div class="header_info">
 44 |             <h1 style="color: white;">Twitter数据挖掘及其可视化</h1>
 45 |             <a  href="/sentiment" class="btn btn-primary btn-filled" id="header_info_button">Start sentiment Now</a>
 46 |         </div>
 47 | 
 48 |     </div>
 49 |     <script>
 50 |         $(function () {
 51 |             $('#myCarousel').on('slide.bs.carousel', function (e) {
 52 | {#                var slideFrom = $(this).find('.active').index();#}
 53 |                 var slideTo = $(e.relatedTarget).index();
 54 | {#                console.log(slideFrom + ' => ' + slideTo);#}
 55 |                 var change_obj = $("#header_info_button");
 56 |                 if(slideTo == 1){
 57 |                     change_obj.attr("href","/topic");
 58 |                     change_obj.text("Start topic Now");
 59 |                 }else{
 60 |                     change_obj.attr("href","/sentiment");
 61 |                     change_obj.text("Start sentiment Now");
 62 |                 }
 63 |             });
 64 |         });
 65 |     </script>
 66 |     <section>
 67 |         <div class="container">
 68 |             <div class="row">
 69 |                 <div class="col-md-6">
 70 |                     <h1>话题检测</h1>
 71 |                     <h6>Topic detection</h6>
 72 |                     <p class="lead">
 73 |                         使用twitter Stream API 提供的实时数据。
 74 |                         可以追踪特定关键词的推文、某些用户的推文、在某些特定的地理位置（GEO）的推文。<br>
 75 |                         使用变分推断的动态词库的WOLDA算法，挖掘实时主题中的关键词，并跟踪话题趋势。
 76 |                     </p>
 77 |                 </div>
 78 |                 <div class="col-md-6 text-center">
 79 |                     <img src="/static/image/topic_4.jpg" alt="sentiment" class="img-thumbnail image-section">
 80 |                 </div>
 81 |             </div>
 82 |         </div>
 83 |     </section>
 84 | 
 85 | 
 86 |     <section>
 87 |         <div class="container">
 88 |             <div class="row">
 89 |                 <div class="col-md-6 text-center">
 90 |                     <img src="/static/image/sentiment_1.gif" alt="sentiment" class="img-thumbnail image-section">
 91 |                 </div>
 92 |                 <div class="col-md-6">
 93 |                     <h1>情感分析</h1>
 94 |                     <h6>Sentiment analysis</h6>
 95 |                     <p class="lead">
 96 |                         使用twitter search API提供的数据进行查询。<br>
 97 |                         使用最大熵分类器,训练数据来自 SemEval比赛 <br>
 98 |                         可以帮助:
 99 |                     <li>用户查询人们谈论某些特定词时候，带有的感情色彩</li>
100 |                     <li>商家查看消费者对于新推出的产品的评价</li>
101 |                     <br>
102 |                     </p>
103 |                 </div>
104 |             </div>
105 |         </div>
106 |     </section>
107 | 
108 | 
109 |     <section>
110 |         <div class="container">
111 |             <div class="row">
112 |                 <div class="col-sm-6">
113 |                     <h1>数据可视化</h1>
114 |                     <h6>Data visualization</h6>
115 |                     <p class="lead">
116 |                         利用D3、ECharts 进行结果可视化 <br>
117 |                         出了基本的柱状图、饼状图外，还有多种可视化图表：<br>如treemap、bubble、heatmap、sunburst等<br>
118 |                         直观的反映出结果<br>
119 |                     </p>
120 |                 </div>
121 |                 <div class="col-md-6 text-center ">
122 |                     <div class="visualization"></div>
123 |                     <script src="../static/js/index.js"></script>
124 |                 </div>
125 |             </div>
126 |         </div>
127 |     </section>
128 | 
129 | 
130 |     <section>
131 |         <div class="container">
132 |             <div class="row">
133 |                 <div class="col-md-6 text-center">
134 |                     <img src="/static/image/others.jpg" alt="others" class="img-responsive img-thumbnail image-section">
135 |                 </div>
136 |                 <div class="col-md-6">
137 |                     <h1>其他</h1>
138 |                     <h6>Others</h6>
139 |                     <p class="lead">
140 |                         使用 Django 进行WEB端开发 <br>
141 |                         使用 Bootstrap 帮助界面设计 <br>
142 |                         使用 MongoDB 存储数据，必要时可以进行数据分片 <br>
143 |                         使用 Git 进行版本控制，并托管于 <a href="https://github.com/hrwhisper/twitterDataMining">Github <i
144 |                             class="fa fa-github" aria-hidden="true"></i> </a><br>
145 |                     </p>
146 |                 </div>
147 |             </div>
148 |         </div>
149 |     </section>
150 | 
151 | 
152 |     {% include "footer.html" %}
153 | 
154 | {% endblock %}


--------------------------------------------------------------------------------
/templates/index2.html:
--------------------------------------------------------------------------------
 1 | {% extends "basic.html" %}
 2 | 
 3 | {% block mainbody %}
 4 |     <form action="" id="network_retweet_form">
 5 |         retweet<input type="text" name="date" value="2015-11-22"/>
 6 |         <input type="button" value="submit" onclick="network_retweet_iframe()">
 7 |     </form>
 8 | 
 9 |     <form action="" id="statistic_hashtag_timeline_form">
10 |         hashtag:<input type="text" name="hashtag" value="Christmas"/>
11 |         date:<input type="text" name="date" value="2015-12-20"/>
12 |         <input type="button" value="submit" onclick="statistic_hashtag_timeline()">
13 |     </form>
14 | 
15 | 
16 |     <form action="" id="statistic_hashtag_timeline_form">
17 |         hashtag1:<input type="text" name="hashtag" value="Christmas"/>
18 |         hashtag2:<input type="text" name="hashtag" value="christmas"/>
19 |         date:<input type="text" name="date" value="2015-12-20"/>
20 |         <input type="button" value="submit" onclick="statistic_hashtag_compare()">
21 |     </form>
22 | 
23 | 
24 |     <form action="" id="statistic_hashtag_timeline_form">
25 |         date:<input type="text" name="date" value="2015-12-20"/>
26 |         <input type="button" value="submit" onclick="statistic_hashtag_pie()">
27 |     </form>
28 | 
29 | 
30 |     <form action="topic/strean_trends" id="statistic_hashtag_timeline_form">
31 |         date:<input type="text" name="date" value="2015-12-20"/>
32 |         <input type="button" value="submit" onclick="statistic_hashtag_pie()">
33 |     </form>
34 | 
35 | {% endblock %}


--------------------------------------------------------------------------------
/templates/nav.html:
--------------------------------------------------------------------------------
 1 | {% load mytag %}
 2 | 
 3 | {#旧版的导航条#}
 4 | {#<nav class="navbar navbar-default" role="navigation">#}
 5 | {#    <div class="navbar-header">#}
 6 | {#        <a class="navbar-brand" href="/"> <i class="fa fa-twitter" style="font-size:20px;"></i> Twitter 数据挖掘与可视化</a>#}
 7 | {#    </div>#}
 8 | {#    <div>#}
 9 | {#        <ul class="nav navbar-nav">#}
10 | {#            <li class="nav-home {% active '^/$' %}"><a href="/">首页</a></li>#}
11 | {#            <li class="nav-topic {% active 'topic' %}"><a href="/topic">实时主题检测</a></li>#}
12 | {#            <li class="nav-topic {% active 'sentiment' %}"><a href="/sentiment">情感分析</a></li>#}
13 | {#            <li class="disabled"><a href="#">网络分析</a></li>#}
14 | {#        </ul>#}
15 | {#    </div>#}
16 | {#</nav>#}
17 | 
18 | <div class="nav-container">
19 |     <nav class="top-bar overlay-bar">
20 |         <div class="container">
21 |             <div class="row nav-menu">
22 |                 <div class="col-md-2 col-sm-3 columns">
23 |                     <i class="fa fa-twitter" style="font-size:40px;"></i>
24 |                 </div>
25 |                 <div class="col-md-10 col-sm-9 columns">
26 |                     <ul class="nav menu">
27 |                         <li class="nav-home {% active '^/$' %}"><a href="/">首页</a></li>
28 |                         <li class="nav-topic {% active 'topic' %}"><a href="/topic">实时主题检测</a></li>
29 |                         <li class="nav-topic {% active 'sentiment' %}"><a href="/sentiment">情感分析</a></li>
30 |                         <li class="disabled"><a href="#">
31 |                             <del>网络分析</del>
32 |                         </a></li>
33 |                     </ul>
34 |                 </div>
35 |             </div>
36 |         </div>
37 |     </nav>
38 | </div>


--------------------------------------------------------------------------------
/templates/network/retweet.html:
--------------------------------------------------------------------------------
  1 | {% extends "basic.html" %}
  2 | 
  3 | {% block mainbody %}
  4 |     <input type="hidden" value="{date}" id="date">
  5 |     <script>
  6 |         param = {
  7 |             "date": $("#date").innerHTML
  8 |         };
  9 |         $.ajax({
 10 |             url: '/network/retweet_data',
 11 |             data: param,
 12 |             success: function (v) {
 13 |                 console.log(v);
 14 |                 network_init(v);
 15 |             },
 16 |             dataType: 'json'
 17 |         });
 18 | 
 19 |         function network_init(data) {
 20 |             var width = 1024;
 21 |             var height = 700;
 22 |             var svg = d3.select("body")
 23 |                     .append("svg")
 24 |                     .attr("width", width)
 25 |                     .attr("height", height);
 26 | 
 27 |             var nodes = data.nodes;
 28 |             var edges = data.links;
 29 |             {#    var nodes = [{name: "桂林"}, {name: "广州"},#}
 30 |             {#                {name: "厦门"}, {name: "杭州"},#}
 31 |             {#                {name: "上海"}, {name: "青岛"},#}
 32 |             {#                {name: "天津"}];#}
 33 |             {##}
 34 |             {#            var edges = [{source: 0, target: 1}, {source: 0, target: 2},#}
 35 |             {#                {source: 0, target: 3}, {source: 1, target: 4},#}
 36 |             {#                {source: 1, target: 5}, {source: 1, target: 6}];#}
 37 |             var force = d3.layout.force()
 38 |                     .nodes(nodes) //指定节点数组
 39 |                     .links(edges) //指定连线数组
 40 |                     .size([width, height]) //指定作用域范围
 41 |                     .linkDistance(20) //指定连线长度
 42 |                     .charge([-400])	//相互之间的作用力
 43 |                     .gravity(0.2)
 44 |                     .start();//开始生效
 45 | 
 46 |             //console.log(nodes);
 47 |             //console.log(edges);
 48 | 
 49 |             //添加连线
 50 |             var svg_edges = svg.selectAll("line")
 51 |                     .data(edges)
 52 |                     .enter()
 53 |                     .append("line")
 54 |                     .style("stroke", "#ccc")
 55 |                     .style("stroke-width", 1);
 56 |             var color = d3.scale.category20();
 57 |             //添加节点
 58 |             var svg_nodes = svg.selectAll("circle")
 59 |                     .data(nodes)
 60 |                     .enter()
 61 |                     .append("circle")
 62 |                     .attr("r", 5)
 63 |                     .style("fill", function (d, i) {
 64 |                         return color(i);
 65 |                     })
 66 |                     .call(force.drag);  //使得节点能够拖动
 67 | 
 68 |             //添加描述节点的文字
 69 |             //var svg_texts = svg.selectAll("text")
 70 |             //        .data(nodes)
 71 |             //       .enter()
 72 |             //        .append("text")
 73 |             //        .style("fill", "black")
 74 |             //        .attr("dx", 20)
 75 |             //        .attr("dy", 8)
 76 |             //        .text(function (d) {
 77 |             //            return d.id;
 78 |             //        });
 79 | 
 80 |             force.on("tick", function () { //对于每一个时间间隔
 81 |                 //更新连线坐标
 82 |                 svg_edges.attr("x1", function (d) {
 83 |                             return d.source.x;
 84 |                         })
 85 |                         .attr("y1", function (d) {
 86 |                             return d.source.y;
 87 |                         })
 88 |                         .attr("x2", function (d) {
 89 |                             return d.target.x;
 90 |                         })
 91 |                         .attr("y2", function (d) {
 92 |                             return d.target.y;
 93 |                         });
 94 |                 //更新节点坐标
 95 |                 svg_nodes.attr("cx", function (d) {
 96 |                             return d.x;
 97 |                         })
 98 |                         .attr("cy", function (d) {
 99 |                             return d.y;
100 |                         });
101 | 
102 |                 //更新文字坐标
103 |                 //svg_texts.attr("x", function (d) {
104 |                 //            return d.x;
105 |                 //        })
106 |                 //        .attr("y", function (d) {
107 |                 //            return d.y;
108 |                 //        });
109 |             });
110 |         }
111 |     </script>
112 | {% endblock %}


--------------------------------------------------------------------------------
/templates/sentiment/header.html:
--------------------------------------------------------------------------------
 1 | {% extends "header.html" %}
 2 | 
 3 | {% block header_title %}
 4 |     Sentiment analysis
 5 | {% endblock %}
 6 | 
 7 | {% block header_instruction %}
 8 |     使用 twitter Search API 数据<br>
 9 |     输入您感兴趣的词查询, 如 Messi
10 | {% endblock %}
11 | 
12 | {% block header_image_src %}
13 |     /static/image/sentiment_2.jpg
14 | {% endblock %}
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/templates/sentiment/index.html:
--------------------------------------------------------------------------------
 1 | {% extends "basic.html" %}
 2 | 
 3 | {% block include_script %}
 4 |     <script src="/static/js/echarts.min.js"></script>
 5 |     <script src="/static/js/spin.min.js"></script>
 6 |     <script src="/static/js/loading-control.js"></script>
 7 |     <script src="/static/js/sentiment.js"></script>
 8 | {% endblock %}
 9 | 
10 | 
11 | {% block mainbody %}
12 |     {% include "sentiment/header.html" %}
13 | 
14 |     <section>
15 |         <div class="container">
16 |             <div class="row">
17 |                 <div class="col-md-8 col-md-offset-2 col-sm-10 col-sm-offset-1">
18 |                     <div class="navbar-form">
19 |                         <input type="text" class="form-control" style="width:92%" id="name" placeholder="Messi">
20 |                         <button href="#" onclick="get_sentiment_result()" class="btn btn-info btn-color"><span
21 |                                 class="glyphicon glyphicon-search"></span>
22 |                         </button>
23 |                     </div>
24 |                 </div>
25 |             </div>
26 | 
27 |             <div class="row">
28 |                 <div id="gauge"></div>
29 |             </div>
30 | 
31 |             <div class="row" id="sample_result" style="display: none">
32 |                 <div class="col-md-12">
33 |                     <div class="panel panel-default">
34 |                         <div class="panel-heading"><span class="lead"><b>Positive tweets</b></span></div>
35 |                         <ul class="list-group" id="positive_sample_result">
36 |                         </ul>
37 |                     </div>
38 |                 </div>
39 |                 <div class="col-md-12">
40 |                     <div class="panel panel-default">
41 |                         <div class="panel-heading"><span class="lead"><b>Negative tweets</b></span></div>
42 |                         <ul class="list-group" id="negative_sample_result">
43 |                         </ul>
44 |                     </div>
45 |                 </div>
46 |                 <div class="col-md-12">
47 |                     <div class="panel panel-default">
48 |                         <div class="panel-heading"><span class="lead"><b>Neutral tweets</b></span></div>
49 |                         <ul class="list-group" id="neutral_sample_result">
50 |                         </ul>
51 |                     </div>
52 |                 </div>
53 |             </div>
54 |         </div>
55 |     </section>
56 | 
57 | 
58 | 
59 |     {% include "footer.html" %}
60 | {% endblock %}


--------------------------------------------------------------------------------
/templates/statistic/hashtag_compare.html:
--------------------------------------------------------------------------------
  1 | {% extends "basic.html" %}
  2 | 
  3 | {% block mainbody %}
  4 |     <input type="hidden" value="{{ date }}" id="date">
  5 |     <input type="hidden" value="{{ hashtag1 }}" id="hashtag1">
  6 |     <input type="hidden" value="{{ hashtag2 }}" id="hashtag2">
  7 |     <div id="main" style="width: 600px;height:400px;"></div>
  8 | 
  9 |     <script>
 10 |         param = {
 11 |             "date": $("#date").val(),
 12 |             "hashtag1": $("#hashtag1").val(),
 13 |             "hashtag2": $("#hashtag2").val()
 14 |         };
 15 |         $.ajax({
 16 |             url: '/statistic/hashtag_compare_data',
 17 |             data: param,
 18 |             success: function (v) {
 19 |                 // console.log(v);
 20 |                 hashtag_compare_area_stack_init(v);
 21 |             },
 22 |             dataType: 'json'
 23 |         });
 24 | 
 25 |         function hashtag_compare_area_stack_init(data) {
 26 |             var myChart = echarts.init(document.getElementById('main'));
 27 |             option = {
 28 |                 title: {
 29 |                     text: '堆叠区域图'
 30 |                 },
 31 |                 tooltip: {
 32 |                     trigger: 'axis'
 33 |                 },
 34 |                 legend: {
 35 |                     data: [param.hashtag1, param.hashtag2]
 36 |                 },
 37 |                 dataZoom: [
 38 |                     {
 39 |                         show: true,
 40 |                         realtime: true,
 41 |                         start: 0,
 42 |                         end: 100
 43 |                     },
 44 |                     {
 45 |                         type: 'inside',
 46 |                         realtime: true,
 47 |                         start: 60,
 48 |                         end: 80
 49 |                     }
 50 |                 ],
 51 |                 toolbox: {
 52 |                     feature: {
 53 |                         dataView: {readOnly: false},
 54 |                         restore: {},
 55 |                         saveAsImage: {}
 56 |                     }
 57 |                 },
 58 |                 grid: {
 59 |                     left: '3%',
 60 |                     right: '4%',
 61 |                     bottom: 50,
 62 |                     containLabel: true
 63 |                 },
 64 |                 xAxis: [
 65 |                     {
 66 |                         type: 'category',
 67 |                         boundaryGap: false,
 68 |                         data: data.dates
 69 |                     }
 70 |                 ],
 71 |                 yAxis: [
 72 |                     {
 73 |                         type: 'value'
 74 |                     }
 75 |                 ],
 76 |                 series: [
 77 |                     {
 78 |                         name: param.hashtag1,
 79 |                         type: 'line',
 80 |                         stack: '总量',
 81 |                         areaStyle: {normal: {}},
 82 |                         data: data['cnt'][param.hashtag1]
 83 |                     },
 84 |                     {
 85 |                         name: param.hashtag2,
 86 |                         type: 'line',
 87 |                         stack: '总量',
 88 |                         label: {
 89 |                             normal: {
 90 |                                 show: true,
 91 |                                 position: 'top'
 92 |                             }
 93 |                         },
 94 |                         areaStyle: {normal: {}},
 95 |                         data: data['cnt'][param.hashtag2]
 96 |                     }
 97 |                 ]
 98 |             };
 99 |             myChart.setOption(option);
100 |         }
101 |     </script>
102 | {% endblock %}


--------------------------------------------------------------------------------
/templates/statistic/hashtag_timeline.html:
--------------------------------------------------------------------------------
  1 | {% extends "basic.html" %}
  2 | 
  3 | {% block mainbody %}
  4 |     <input type="hidden" value="{{ date }}" id="date">
  5 |     <input type="hidden" value="{{ hashtag }}" id="hashtag">
  6 |     <div id="main" style="width: 600px;height:400px;"></div>
  7 | 
  8 |     <script>
  9 |         param = {
 10 |             "date": $("#date").innerHTML,
 11 |             "hashtag": $("#hashtag").innerHTML
 12 |         };
 13 |         $.ajax({
 14 |             url: '/statistic/hashtag_timeline_data',
 15 |             data: param,
 16 |             success: function (v) {
 17 |                 console.log(v);
 18 |                 hashtag_timeline_init(v);
 19 |             },
 20 |             dataType: 'json'
 21 |         });
 22 | 
 23 |         function hashtag_timeline_init(data) {
 24 |             var myChart = echarts.init(document.getElementById('main'));
 25 | 
 26 |             option = {
 27 |                 title: {
 28 |                     text: $("#hashtag").val() + ' (hashtag) 时间关系图',
 29 |                     subtext: 'From ' + data['dates'][0] + ' To ' + data['dates'][data['dates'].length - 1],
 30 |                     x: 'center',
 31 |                     align: 'right'
 32 |                 },
 33 |                 toolbox: {
 34 |                     feature: {
 35 |                         dataView: {readOnly: false},
 36 |                         restore: {},
 37 |                         saveAsImage: {}
 38 |                     }
 39 |                 },
 40 |                 grid: {
 41 |                     bottom: 80
 42 |                 },
 43 |                 tooltip: {
 44 |                     trigger: 'axis',
 45 |                     formatter: function (params) {
 46 |                         return params[0].name + '<br/>' + params[0].data;
 47 |                     },
 48 |                     axisPointer: {
 49 |                         animation: false
 50 |                     }
 51 |                 },
 52 |                 dataZoom: [
 53 |                     {
 54 |                         show: true,
 55 |                         realtime: true,
 56 |                         start: 60,
 57 |                         end: 80
 58 |                     },
 59 |                     {
 60 |                         type: 'inside',
 61 |                         realtime: true,
 62 |                         start: 60,
 63 |                         end: 80
 64 |                     }
 65 |                 ],
 66 |                 xAxis: [
 67 |                     {
 68 |                         type: 'category',
 69 |                         boundaryGap: false,
 70 |                         axisLine: {onZero: false},
 71 |                         data: data['dates']
 72 |                                 .map(function (str) {
 73 |                                     return str.replace(' ', '\n')
 74 |                                 })
 75 |                     }
 76 |                 ],
 77 |                 yAxis: [
 78 |                     {
 79 |                         name: 'Count',
 80 |                         type: 'value'
 81 |                         // max: Math.max.apply({},data['cnt'])+50
 82 |                     }
 83 |                 ],
 84 |                 series: [
 85 |                     {
 86 |                         name: 'hashtag计数',
 87 |                         type: 'line',
 88 |                         hoverAnimation: false,
 89 |                         areaStyle: {
 90 |                             normal: {}
 91 |                         },
 92 |                         lineStyle: {
 93 |                             normal: {
 94 |                                 width: 1
 95 |                             }
 96 |                         },
 97 |                         data: data['cnt']
 98 |                     }
 99 |                 ]
100 |             };
101 |             myChart.setOption(option);
102 |         }
103 |     </script>
104 | {% endblock %}


--------------------------------------------------------------------------------
/templates/statistic/pie.html:
--------------------------------------------------------------------------------
 1 | {% extends "basic.html" %}
 2 | 
 3 | {% block mainbody %}
 4 |     <input type="hidden" value="{{ date }}" id="date">
 5 |     <div id="main" style="width: 1000px;height:600px;"></div>
 6 | 
 7 |     <script>
 8 |         param = {
 9 |             "date": $("#date").innerHTML,
10 |         };
11 |         $.ajax({
12 |             url: '/statistic/hashtag_pie_data',
13 |             data: param,
14 |             success: function (v) {
15 |                 pie_chart_init(v);
16 |             },
17 |             dataType: 'json'
18 |         });
19 | 
20 |         function pie_chart_init(data) {
21 |             var myChart = echarts.init(document.getElementById('main'));
22 | 
23 |             option = {
24 |                 title: {
25 |                     text: $("#date").val(),
26 |                     subtext: '比例图',
27 |                     x: 'center'
28 |                 },
29 |                 tooltip: {
30 |                     trigger: 'item',
31 |                     formatter: "{b} : {c} ({d}%)"
32 |                 },
33 |                 legend: {
34 |                     orient: 'vertical',
35 |                     left: 'left',
36 |                     data: data['label_data']
37 |                 },
38 |                 toolbox: {
39 |                     feature: {
40 |                         dataView: {readOnly: false},
41 |                         restore: {},
42 |                         saveAsImage: {}
43 |                     }
44 |                 },
45 |                 series: [
46 |                     {
47 |                         type: 'pie',
48 |                         radius: '55%',
49 |                         center: ['50%', '60%'],
50 |                         data: data['name_value'],
51 |                         itemStyle: {
52 |                             emphasis: {
53 |                                 shadowBlur: 10,
54 |                                 shadowOffsetX: 0,
55 |                                 shadowColor: 'rgba(0, 0, 0, 0.5)'
56 |                             }
57 |                         }
58 |                     }
59 |                 ]
60 |             };
61 | 
62 |             myChart.setOption(option);
63 |         }
64 |     </script>
65 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/header.html:
--------------------------------------------------------------------------------
 1 | {% extends "header.html" %}
 2 | 
 3 | {% block header_title %}
 4 |     Topic detection
 5 | {% endblock %}
 6 | 
 7 | {% block header_instruction %}
 8 |     使用 twitter Stream API 的实时数据<br>
 9 |     进行实时主题挖掘，以及结果可视化
10 | {% endblock %}
11 | 
12 | {% block header_image_src %}
13 |     /static/image/topic_1.jpg
14 | {% endblock %}
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/templates/topic/index.html:
--------------------------------------------------------------------------------
  1 | {% extends "basic.html" %}
  2 | 
  3 | {% block include_script %}
  4 |     <script src="/static/js/spin.min.js"></script>
  5 |     <script src="/static/js/loading-control.js"></script>
  6 |     <link href="/static/css/bootstrap-switch.min.css" rel="stylesheet">
  7 |     <script src="/static/js/d3.v3.min.js"></script>
  8 |     <script src="http://maps.google.com/maps/api/js"></script>
  9 | {% endblock %}
 10 | 
 11 | 
 12 | {% block mainbody %}
 13 | 
 14 |     {% include "topic/header.html" %}
 15 |     {% include "topic/parameters_panel.html" %}
 16 |     {% include "topic/toolbar.html" %}
 17 | 
 18 |     <section id="instruction">
 19 |         <div class="container font-family-yahei">
 20 |             <div class="row" >
 21 |                 <div class="col-md-12">
 22 |                     <div class="col-md-4">
 23 |                         <h4><i class="fa fa-pencil fa-fw" aria-hidden="true"></i><strong> Track</strong></h4>
 24 |                         <p>使用逗号分隔的短语 <br>
 25 |                             空格代表 <strong>与</strong>： ‘the twitter’ is the AND twitter <br>
 26 |                             逗号则是 <strong>或</strong>： ‘the,twitter’ is the OR twitter
 27 |                         </p>
 28 |                     </div>
 29 |                     <div class="col-md-4">
 30 |                         <h4><i class="fa fa-pencil fa-fw" aria-hidden="true"></i><strong> Follow </strong></h4>
 31 |                         <p>使用逗号分隔的用户id <br>
 32 |                             将会包括：
 33 |                             该用户 <strong>发表</strong>或 <strong>转发</strong> 的推文 <br>
 34 |                             该用户 <strong>回复</strong> 的推文 <br>
 35 |                             但不会包括 @该用户的推文
 36 |                         </p>
 37 |                     </div>
 38 |                     <div class="col-md-4">
 39 |                         <h4><i class="fa fa-pencil fa-fw" aria-hidden="true"></i><strong> Locations </strong></h4>
 40 |                         <p>使用逗号分隔的 经度纬度 <strong>坐标对</strong> <br>
 41 |                             如-122.75,36.8,-73,41 表示来自San Francisco 或 New York City 的城市<br>
 42 |                             注意：<strong>先写纬度在写经度</strong> <br>
 43 |                             注意：转发的推文无 locations 信息
 44 |                         </p>
 45 |                     </div>
 46 |                 </div>
 47 |             </div>
 48 |             <div class="row" style="padding-top:15px;">
 49 |                 <div class="col-md-12">
 50 |                     <div class="col-md-4">
 51 |                         <h4><i class="fa fa-cog" aria-hidden="true"></i><strong> 控制面板</strong></h4>
 52 |                         <p><i class="fa fa-angle-double-right" aria-hidden="true"></i> 位于最左下角，不影响可视化结果<br>
 53 |                             <i class="fa fa-angle-double-right" aria-hidden="true"></i> 随页面滚动，方便控制 <br>
 54 |                             <i class="fa fa-angle-double-right" aria-hidden="true"></i> 点击进行速率控制或者选择可视化图表</p>
 55 |                     </div>
 56 | 
 57 |                     <div class="col-md-4">
 58 |                         <h4><i class="fa fa-pause-circle-o" aria-hidden="true"></i> <strong> 速率控制</strong></h4>
 59 |                         <p>
 60 |                             <i class="fa fa-angle-double-right" aria-hidden="true"></i> 暂停时，服务器仍继续计算，但页面不更新 <br>
 61 |                             <i class="fa fa-angle-double-right" aria-hidden="true"></i> 停止时，服务器也停止计算<br>
 62 |                             <i class="fa fa-angle-double-right" aria-hidden="true"></i> 点击开始恢复计算 </p>
 63 |                     </div>
 64 | 
 65 |                     <div class="col-md-3">
 66 |                         <h4><i class="fa fa-pie-chart" aria-hidden="true"></i> <strong> 多种图形</strong></h4>
 67 |                         <p><i class="fa fa-angle-double-right" aria-hidden="true"></i> 文字结果- 代表性推文 <br>
 68 |                             <i class="fa fa-angle-double-right" aria-hidden="true"></i> 话题比例 <br>
 69 |                             <i class="fa fa-angle-double-right" aria-hidden="true"></i> 词的比例</p>
 70 |                     </div>
 71 |                 </div>
 72 |             </div>
 73 |         </div>
 74 |     </section>
 75 | 
 76 | 
 77 |     <section>
 78 |         <div class="container">
 79 |             <div class="text-center font-family-yahei" style="font-size:25px;">
 80 |                 <i class="fa fa-angle-double-down " aria-hidden="true" style="font-size:40px;margin-bottom: 50px;"></i>
 81 |                 <p><i class="fa fa-hand-o-right" aria-hidden="true"></i> 你想知道 <strong>现在</strong> 人们在讨论什么话题么？</p>
 82 |                 <p><i class="fa fa-hand-o-right" aria-hidden="true"></i> 你想跟踪 <strong>某些特定用户</strong> 谈论的话题么？</p>
 83 |                 <p><i class="fa fa-hand-o-right" aria-hidden="true"></i> 你想查看 <strong>某些地区</strong> 人们谈论的话题么？</p>
 84 |                 <button type="button" class="btn btn-primary btn-lg" data-toggle="modal" data-target="#streamParameters"
 85 |                         style="margin-top: 50px;background-color:#418594;border:none">START NOW
 86 |                 </button>
 87 |             </div>
 88 |         </div>
 89 |     </section>
 90 | 
 91 | 
 92 | 
 93 |     <section>
 94 |         <div class="container">
 95 |             <div class="col-sm-12" id="result">
 96 | 
 97 |             </div>
 98 |         </div>
 99 |         <script src="/static/js/topic.js"></script>
100 |     </section>
101 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/toolbar.html:
--------------------------------------------------------------------------------
 1 | <script src="/static/js/bootstrap-switch.min.js"></script>
 2 | 
 3 | <div id="topicToolBar">
 4 |    <div class="btn-group dropup">
 5 |         <button type="button" class="btn btn-default dropdown-toggle" data-toggle="dropdown">
 6 |             <i class="fa fa-cog" aria-hidden="true"></i></button>
 7 | 
 8 |         <ul class="dropdown-menu" role="menu">
 9 |             <li><a data-toggle="modal" data-target="#streamParameters"><i class="fa fa-pencil fa-fw" aria-hidden="true"></i> 设置</a></li>
10 |             <li class="divider"></li>
11 |             <li id="continue_stream"><a><i class="fa fa-play" aria-hidden="true"></i> continue </a></li>
12 |             <li id="pause_stream"><a><i class="fa fa-pause" aria-hidden="true"></i> pause </a></li>
13 |             <li id="stop_stream"><a><i class="fa fa-stop" aria-hidden="true"></i> stop </a></li>
14 |         </ul>
15 |     </div>
16 | 
17 |    <div class="btn-group dropup">
18 |       <button type="button" class="btn btn-default dropdown-toggle" data-toggle="dropdown"><i class="fa fa-pie-chart" aria-hidden="true"></i></button>
19 | 
20 |       <ul class="dropdown-menu" role="menu" style="width:180px">
21 |         <li><a href="#topic_text" >Text<span style="float:right"> <input type="checkbox" name="topic_text" data-size="mini"></span></a></li>
22 |         <li class="divider"></li>
23 | 
24 |         <li><a href="#hashtags_histogram">Histogram <span style="float:right"><input type="checkbox" name="hashtags_histogram" data-size="mini"></span></a></li>
25 |         <li><a href="#hashtags_pie">Pie <span style="float:right"><input type="checkbox" name="hashtags_pie" data-size="mini"></span></a></li>
26 |         <li><a href="#hashtags_timeline">Timeline <span style="float:right"><input type="checkbox" name="hashtags_timeline" data-size="mini"></span></a></li>
27 | 
28 |         <li class="divider"></li>
29 | 
30 |         <li><a href="#topic_treemap">Treemap<span style="float:right"> <input type="checkbox" name="topic_treemap" data-size="mini"> </span></a></li>
31 |         <li><a href="#topic_bubble">Bubble <span style="float:right"><input type="checkbox" name="topic_bubble" data-size="mini"></span></a></li>
32 |         <li><a href="#topic_sunburst"> Sunburst<span style="float:right"><input type="checkbox" name="topic_sunburst" data-size="mini"> </span></a></li>
33 |         <li><a href="#topic_funnel">Funnel<span style="float:right"> <input type="checkbox" name="topic_funnel" data-size="mini"> </span></a></li>
34 |         <li class="divider"></li>
35 | 
36 |         <li><a href="#heatmap">Heatmap<span style="float:right"> <input type="checkbox" name="heatmap" data-size="mini"> </span></a></li>
37 | 
38 |       </ul>
39 |    </div>
40 | </div>


--------------------------------------------------------------------------------
/templates/topic/visualization/result_basic.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>Title</title>
 6 |     <link href="/static/css/bootstrap.min.css" rel="stylesheet">
 7 |     <script src="/static/js/d3.v3.min.js"></script>
 8 |     <script src="/static/js/jquery.min.js"></script>
 9 |     <script src="/static/js/bootstrap.min.js"></script>
10 | 
11 |     <style>
12 |         body {background: transparent;}
13 |     </style>
14 | 
15 |     {% block style %}
16 |         {#        ---style---#}
17 |     {% endblock %}
18 | 
19 | </head>
20 | <body>
21 | 
22 | {% block mainbody %}
23 |     {#        ------#}
24 | {% endblock %}
25 | 
26 | </body>
27 | </html>


--------------------------------------------------------------------------------
/templates/topic/visualization/result_bubble.html:
--------------------------------------------------------------------------------
  1 | {% extends "topic/visualization/result_basic.html" %}
  2 | 
  3 | {% block style %}
  4 |     <style>
  5 |        
  6 | 
  7 |         text {
  8 |             font-size: 11px;
  9 |             pointer-events: none;
 10 |         }
 11 | 
 12 |         text.parent {
 13 |             fill: #1f77b4;
 14 |         }
 15 | 
 16 |         circle {
 17 |             fill: #ccc;
 18 |             stroke: #999;
 19 |             pointer-events: all;
 20 |         }
 21 | 
 22 |         circle.parent {
 23 |             fill: #1f77b4;
 24 |             fill-opacity: .1;
 25 |             stroke: steelblue;
 26 |         }
 27 | 
 28 |         circle.parent:hover {
 29 |             stroke: #ff7f0e;
 30 |             stroke-width: 1px;
 31 |         }
 32 | 
 33 |         circle.child {
 34 |             pointer-events: none;
 35 |         }
 36 |     </style>
 37 | {% endblock %}
 38 | 
 39 | {% block mainbody %}
 40 |     <script type="text/javascript">
 41 |         var w, h, r, x, y, vis, node, first = true;
 42 |         window.addEventListener("message", function (evt) {
 43 |             var data = JSON.parse(evt.data);
 44 |             console.log(data);
 45 |             draw(data);
 46 |         }, false);
 47 | 
 48 | 
 49 |         function draw(data) {
 50 |             if (!first) {
 51 |                 d3.select("svg").remove();
 52 |             }
 53 |             first = false;
 54 | 
 55 |             w = $("body").width();
 56 |             h = w / 1.4;
 57 |             r = 0.9 * h;
 58 |             x = d3.scale.linear().range([0, r]);
 59 |             y = d3.scale.linear().range([0, r]);
 60 |             var root;
 61 | 
 62 |             var pack = d3.layout.pack()
 63 |                     .size([r, r])
 64 |                     .value(function (d) {
 65 |                         return d.size;
 66 |                     });
 67 | 
 68 |             vis = d3.select("body").insert("svg:svg", "h2")
 69 |                     .attr("width", w)
 70 |                     .attr("height", h)
 71 |                     .append("svg:g")
 72 |                     .attr("transform", "translate(" + (w - r) / 2 + "," + (h - r) / 2 + ")");
 73 | 
 74 |             node = root = data;
 75 | 
 76 |             var nodes = pack.nodes(root);
 77 | 
 78 |             vis.selectAll("circle")
 79 |                     .data(nodes)
 80 |                     .enter().append("svg:circle")
 81 |                     .attr("class", function (d) {
 82 |                         return d.children ? "parent" : "child";
 83 |                     })
 84 |                     .attr("cx", function (d) {
 85 |                         return d.x;
 86 |                     })
 87 |                     .attr("cy", function (d) {
 88 |                         return d.y;
 89 |                     })
 90 |                     .attr("r", function (d) {
 91 |                         return d.r;
 92 |                     })
 93 |                     .on("click", function (d) {
 94 |                         return zoom(node == d ? root : d);
 95 |                     });
 96 | 
 97 |             vis.selectAll("text")
 98 |                     .data(nodes)
 99 |                     .enter().append("svg:text")
100 |                     .attr("class", function (d) {
101 |                         return d.children ? "parent" : "child";
102 |                     })
103 |                     .attr("x", function (d) {
104 |                         return d.x;
105 |                     })
106 |                     .attr("y", function (d) {
107 |                         return d.y;
108 |                     })
109 |                     .attr("dy", ".35em")
110 |                     .attr("text-anchor", "middle")
111 |                     .style("opacity", function (d) {
112 |                         return d.r > 20 ? 1 : 0;
113 |                     })
114 |                     .text(function (d) {
115 |                         return d.name;
116 |                     });
117 | 
118 |             d3.select(window).on("click", function () {
119 |                 zoom(root);
120 |             });
121 |         }
122 | 
123 | 
124 |         function zoom(d) {
125 |             var k = r / d.r / 2;
126 |             x.domain([d.x - d.r, d.x + d.r]);
127 |             y.domain([d.y - d.r, d.y + d.r]);
128 | 
129 |             var t = vis.transition()
130 |                     .duration(d3.event.altKey ? 7500 : 750);
131 | 
132 |             t.selectAll("circle")
133 |                     .attr("cx", function (d) {
134 |                         return x(d.x);
135 |                     })
136 |                     .attr("cy", function (d) {
137 |                         return y(d.y);
138 |                     })
139 |                     .attr("r", function (d) {
140 |                         return k * d.r;
141 |                     });
142 | 
143 |             t.selectAll("text")
144 |                     .attr("x", function (d) {
145 |                         return x(d.x);
146 |                     })
147 |                     .attr("y", function (d) {
148 |                         return y(d.y);
149 |                     })
150 |                     .style("opacity", function (d) {
151 |                         return k * d.r > 20 ? 1 : 0;
152 |                     });
153 | 
154 |             node = d;
155 |             d3.event.stopPropagation();
156 |         }
157 | 
158 |     </script>
159 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/visualization/result_funnel.html:
--------------------------------------------------------------------------------
 1 | {% extends "topic/visualization/result_basic.html" %}
 2 | 
 3 | 
 4 | {% block mainbody %}
 5 |     <script src="/static/js/echarts.min.js"></script>
 6 | 
 7 |     <div id="main" style="width: 600px;height:400px;"></div>
 8 | 
 9 |     <script type="text/javascript">
10 | 
11 |         window.addEventListener("message", function (evt) {
12 |             function funnel_format_data(data) {
13 |                 var t = data.children;
14 |                 var res = [];
15 |                 for (var i = 0; i < t.length; i++) {
16 |                     res.push({
17 |                         'name': t[i].name,
18 |                         'value': (t[i].size * 100).toFixed(2)
19 |                     });
20 |                 }
21 |                 return res;
22 |             }
23 | 
24 |             var data = JSON.parse(evt.data);
25 |             var res = funnel_format_data(data);
26 |             draw(res);
27 |         }, false);
28 | 
29 | 
30 |         function draw(data) {
31 |             var name = [];
32 |             for (var i = 0; i < data.length; i++)
33 |                 name.push(data[i].name);
34 | 
35 |             var main = $("#main"), body = $("body");
36 |             main.width(body.width());
37 |             main.height(body.width() / 1.34);
38 | 
39 |             var myChart = echarts.init(document.getElementById('main'));
40 | 
41 |             // 指定图表的配置项和数据
42 |             var option = {
43 |                 /*title: {
44 |                  text: '时间比例',
45 |                  subtext: '',
46 |                  left: 'left',
47 |                  top: 'bottom'
48 |                  },*/
49 |                 tooltip: {
50 |                     trigger: 'item',
51 |                     formatter: "{a} <br/>{b} : {c}%"
52 |                 },
53 |                 toolbox: {
54 |                     show: true,
55 |                     orient: 'vertical',
56 |                     top: 'center',
57 |                     feature: {
58 |                         dataView: {readOnly: false},
59 |                         restore: {},
60 |                         saveAsImage: {}
61 |                     }
62 |                 },
63 |                 legend: {
64 |                     orient: 'vertical',
65 |                     left: 'left',
66 |                     //bottom:'15%',
67 |                     data: name,
68 |                     formatter: function (name) {
69 |                         return 'Topic ' + name;
70 |                     }
71 |                 },
72 |                 calculable: true,
73 |                 series: [
74 |                     {
75 |                         name: '漏斗图',
76 |                         type: 'funnel',
77 |                         funnelAlign: 'right',
78 |                         width: '80%',
79 |                         height: '90%',
80 |                         label: {
81 |                             normal: {
82 |                                 position: 'inside',
83 |                                 formatter: '{b}   {c}%'
84 |                             }
85 |                         },
86 |                         center: ['25%', '25%'],
87 |                         data: data
88 |                     }
89 |                 ]
90 |             };
91 |             myChart.setOption(option);
92 |         }
93 |     </script>
94 | 
95 | 
96 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/visualization/result_hashtags_histogram.html:
--------------------------------------------------------------------------------
 1 | {% extends "topic/visualization/result_basic.html" %}
 2 | 
 3 | 
 4 | {% block mainbody %}
 5 |     <script src="/static/js/echarts.min.js"></script>
 6 | 
 7 |     <div id="main" style="width: 600px;height:400px;"></div>
 8 | 
 9 |     <script type="text/javascript">
10 | 
11 |         window.addEventListener("message", function (evt) {
12 |             function histogram_format_data(t) {
13 |                 var res = {
14 |                     "name": [],
15 |                     "value": []
16 |                 };
17 |                 for (var i = 0; i < t.length; i++) {
18 |                     res['name'].push(t[i][0]);
19 |                     res['value'].push(t[i][1]);
20 |                 }
21 |                 return res;
22 |             }
23 | 
24 |             var data = JSON.parse(evt.data);
25 |             var res = histogram_format_data(data);
26 |             draw(res);
27 |         }, false);
28 | 
29 | 
30 |         function draw(data) {
31 |             var main = $("#main"), body = $("body");
32 |             main.width(body.width());
33 |             main.height(body.width() / 1.34);
34 | 
35 |             var myChart = echarts.init(document.getElementById('main'));
36 | 
37 |             // 指定图表的配置项和数据
38 |             option = {
39 |                 title: {
40 |                     text: '最后一次更新的Hashtags统计图'
41 |                 },
42 |                 tooltip: {
43 |                     trigger: 'axis',
44 |                     axisPointer: {
45 |                         type: 'shadow'
46 |                     }
47 |                 },
48 |                 grid: {
49 |                     left: '3%',
50 |                     right: '4%',
51 |                     bottom: '3%',
52 |                     containLabel: true
53 |                 },
54 |                 xAxis: {
55 |                     type: 'value',
56 |                     boundaryGap: [0, 0.01]
57 |                 },
58 |                 yAxis: {
59 |                     type: 'category',
60 |                     data: data['name']
61 |                 },
62 |                 series: [
63 |                     {
64 |                         name: 'hashtags',
65 |                         type: 'bar',
66 |                         data: data['value']
67 |                     }
68 |                 ]
69 |             };
70 | 
71 |             myChart.setOption(option);
72 |         }
73 |     </script>
74 | 
75 | 
76 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/visualization/result_hashtags_pie.html:
--------------------------------------------------------------------------------
 1 | {% extends "topic/visualization/result_basic.html" %}
 2 | 
 3 | 
 4 | {% block mainbody %}
 5 |     <script src="/static/js/echarts.min.js"></script>
 6 | 
 7 |     <div id="main" style="width: 600px;height:400px;"></div>
 8 | 
 9 |     <script type="text/javascript">
10 |         window.addEventListener("message", function (evt) {
11 |             function pie_format_data(t) {
12 |                 var res = {
13 |                     'name': [],
14 |                     'data': []
15 |                 };
16 |                 for (var i = 0; i < t.length; i++) {
17 |                     res['name'].push(t[i][0]);
18 |                     res['data'].push({
19 |                         'name': t[i][0],
20 |                         'value': t[i][1]
21 |                     });
22 |                 }
23 |                 return res;
24 |             }
25 | 
26 |             var data = JSON.parse(evt.data);
27 |             var res = pie_format_data(data);
28 |             draw(res);
29 |         }, false);
30 | 
31 | 
32 |         function draw(data) {
33 |             var main = $("#main"), body = $("body");
34 |             main.width(body.width());
35 |             main.height(body.width() / 1.34);
36 | 
37 |             var myChart = echarts.init(document.getElementById('main'));
38 | 
39 |             // 指定图表的配置项和数据
40 |             option = {
41 |                 title: {
42 |                     text: '最后一次更新的Hashtags统计图'
43 |                 },
44 |                 tooltip: {
45 |                     trigger: 'item',
46 |                     formatter: "{a} <br/>{b} : {c} ({d}%)"
47 |                 },
48 |                 legend: {
49 |                     top: '5%',
50 |                     orient: 'vertical',
51 |                     left: 'left',
52 |                     data: data['name']
53 |                 },
54 |                 series: [
55 |                     {
56 |                         name: 'Hashtags',
57 |                         type: 'pie',
58 |                         radius: '55%',
59 |                         center: ['50%', '60%'],
60 |                         data: data['data'],
61 |                         itemStyle: {
62 |                             emphasis: {
63 |                                 shadowBlur: 10,
64 |                                 shadowOffsetX: 0,
65 |                                 shadowColor: 'rgba(0, 0, 0, 0.5)'
66 |                             }
67 |                         }
68 |                     }
69 |                 ]
70 |             };
71 |             myChart.setOption(option);
72 |         }
73 |     </script>
74 | 
75 | 
76 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/visualization/result_hashtags_timeline.html:
--------------------------------------------------------------------------------
 1 | {% extends "topic/visualization/result_basic.html" %}
 2 | 
 3 | 
 4 | {% block mainbody %}
 5 |     <script src="/static/js/echarts.min.js"></script>
 6 | 
 7 |     <div id="main" style="width: 600px;height:400px;"></div>
 8 | 
 9 |     <script type="text/javascript">
10 |         window.addEventListener("message", function (evt) {
11 |             function timeline_format_data(t) {
12 |                 var res = {
13 |                     'name': [],
14 |                     'time': [],
15 |                     'series': []
16 |                 };
17 |                 for (var name in t) {
18 |                     res['name'].push(name);
19 |                     res['series'].push({
20 |                         name: name,
21 |                         type: 'line',
22 |                         data: t[name]
23 |                     });
24 |                     var _len = t[name].length;
25 |                 }
26 |                 for (var i = 0; i < _len; i++) {
27 |                     res['time'].push(i);
28 |                 }
29 |                 return res;
30 |             }
31 | 
32 |             var data = JSON.parse(evt.data);
33 |             var res = timeline_format_data(data);
34 |             draw(res);
35 |         }, false);
36 | 
37 | 
38 |         function draw(data) {
39 |             var main = $("#main"), body = $("body");
40 |             main.width(body.width());
41 |             main.height(body.width() / 1.34);
42 | 
43 |             var myChart = echarts.init(document.getElementById('main'));
44 |             // 指定图表的配置项和数据
45 |             option = {
46 |                 title: {
47 |                     text: 'Hashtags时间变化图'
48 |                 },
49 |                 tooltip: {
50 |                     trigger: 'axis'
51 |                 },
52 |                 dataZoom: [
53 |                     {
54 |                         show: true,
55 |                         realtime: true,
56 |                         start: 0,
57 |                         end: 100
58 |                     },
59 |                     {
60 |                         type: 'inside',
61 |                         realtime: true,
62 |                         start: 0,
63 |                         end: 100
64 |                     }
65 |                 ],
66 |                 legend: {
67 |                     data: data['name']
68 |                 },
69 |                 grid: {
70 |                     left: '3%',
71 |                     right: '4%',
72 |                     bottom: '10%',
73 |                     containLabel: true
74 |                 },
75 |                 toolbox: {
76 |                     feature: {
77 |                         saveAsImage: {}
78 |                     }
79 |                 },
80 |                 xAxis: {
81 |                     type: 'category',
82 |                     boundaryGap: false,
83 |                     data: data['time']
84 |                 },
85 |                 yAxis: {
86 |                     type: 'value'
87 |                 },
88 |                 series: data['series']
89 |             };
90 |             myChart.setOption(option);
91 |         }
92 |     </script>
93 | 
94 | 
95 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/visualization/result_heatmap.html:
--------------------------------------------------------------------------------
 1 | {% extends "topic/visualization/result_basic.html" %}
 2 | 
 3 | 
 4 | {% block mainbody %}
 5 |     <script src="/static/js/echarts.min.js"></script>
 6 |     <div id="main"></div>
 7 | 
 8 |     <script type="text/javascript">
 9 |         var myChart = echarts.init(document.getElementById('main'));
10 | 
11 |         window.addEventListener("message", function (evt) {
12 | {#            console.log("Receive , heatmap");#}
13 |             var data = JSON.parse(evt.data);
14 | {#            console.log(data);#}
15 |             draw(data);
16 |         }, false);
17 | 
18 |         function get_max(data) {
19 |             var max = 0;
20 |             for (var i = 0; i < data.length; i++) {
21 |                 max = Math.max(data[i][2], max);
22 |             }
23 |             return max;
24 |         }
25 | 
26 |         function draw(data) {
27 |             var main = $("#main"), body = $("body");
28 |             main.width(body.width());
29 |             main.height(body.width() / 1.34);
30 | 
31 | 
32 |             console.log("max " + get_max(data));
33 |             $.get('/static/json/world.json', function (worldJson) {
34 |                 echarts.registerMap('world', worldJson);
35 |                 var myChart = echarts.init(document.getElementById('main'));
36 | 
37 |                 myChart.setOption({
38 |                     series: [{
39 |                         type: 'map',
40 |                         map: 'world'
41 |                     }]
42 |                 });
43 | 
44 |                 var option = {
45 |                     title: {
46 |                         text: '当前时间窗口下推文的地理位置信息',
47 |                         subtext: 'data from twitter.stream',
48 |                         left: 'center',
49 |                         textStyle: {
50 |                             color: '#fff'
51 |                         }
52 |                     },
53 |                     backgroundColor: '#404a59',
54 |                     visualMap: {
55 |                         min: 0,
56 |                         max: get_max(data),
57 |                         inRange: {
58 |                             color: ['#d94e5d', '#eac736', '#50a3ba'].reverse()
59 |                         },
60 |                         text: ['高', '低'],           // 文本，默认为数值文本
61 |                         calculable: true,
62 |                         textStyle: {
63 |                             color: '#bababa'
64 |                         }
65 |                     },
66 |                     geo: {
67 |                         map: 'world',
68 |                         label: {
69 |                             emphasis: {
70 |                                 show: false
71 |                             }
72 |                         },
73 |                         roam: true,
74 |                         itemStyle: {
75 |                             normal: {
76 |                                 areaColor: '#323c48',
77 |                                 borderColor: '#111'
78 |                             },
79 |                             emphasis: {
80 |                                 areaColor: '#2a333d'
81 |                             }
82 |                         }
83 |                     },
84 |                     series: [{
85 |                         name: 'AQI',
86 |                         type: 'heatmap',
87 |                         coordinateSystem: 'geo',
88 |                         data: data //经度 -  纬度 - value
89 |                     }]
90 |                 };
91 |                 myChart.setOption(option);
92 |             });
93 |         }
94 | 
95 | 
96 |     </script>
97 | {% endblock %}


--------------------------------------------------------------------------------
/templates/topic/visualization/result_text.html:
--------------------------------------------------------------------------------
 1 | {% extends "topic/visualization/result_basic.html" %}
 2 | 
 3 | {% block mainbody %}
 4 |     <div class="panel panel-default" style="background-color: #f9f9f9;">
 5 |         <div class="panel-heading">当前话题</div>
 6 |         <div class="panel-body" id="topicText">
 7 |             <h3>
 8 |                 <a role="button" data-toggle="collapse" href="#collapseTopic" aria-expanded="false"
 9 |                    aria-controls="collapseTopic">
10 |                     Topic 1 50%
11 |                 </a>
12 |             </h3>
13 |             <div class="collapse in" id="collapseTopic">
14 |                 <p>a fax d wt wra f fx afw </p>
15 |                 <p>a fax d wt wra f fx afw </p>
16 |             </div>
17 |             <hr>
18 |         </div>
19 |     </div>
20 | 
21 | 
22 |     <script>
23 |         window.addEventListener("message", function (evt) {
24 |             var data = JSON.parse(evt.data);
25 |             console.log(data);
26 | 
27 |             append_topic_text(data);
28 |         }, false);
29 | 
30 |         function append_topic_text(res) {
31 |             var topicText = $('#topicText');
32 |             topicText.empty();
33 |             for (var i = 0; i < res.length; i++) {
34 |                 var topic_html = '<h3><a role="button" data-toggle="collapse" href="#collapseTopic' + res[i][0] + '" ' +
35 |                         'aria-expanded="false" aria-controls="collapseTopic' + res[i][0] + '">Topic ' + res[i][0] + ' ' +
36 |                         res[i][1] + '</a></h3>' + '<div class="collapse in text-justify" id="collapseTopic' + res[i][0] + '">' +
37 |                         '<p>' + res[i][2] + '</p>' + '<p>' + res[i][3] + '</p></div>';
38 |                 if (i != res.length - 1) topic_html += '<hr>';
39 |                 topicText.append(topic_html);
40 |             }
41 |         }
42 |     </script>
43 | {% endblock %}
44 | 
45 | 


--------------------------------------------------------------------------------
/templates/topic/visualization/result_treemap.html:
--------------------------------------------------------------------------------
  1 | {% extends "topic/visualization/result_basic.html" %}
  2 | 
  3 | {% block style %}
  4 |     <style>
  5 |         .chart {
  6 |             display: block;
  7 |             margin: auto;
  8 |             margin-top: 40px;
  9 |         }
 10 | 
 11 |         text {
 12 |             font-size: 11px;
 13 |         }
 14 | 
 15 |         rect {
 16 |             fill: none;
 17 |         }
 18 |     </style>
 19 | {% endblock %}
 20 | 
 21 | 
 22 | {% block mainbody %}
 23 |     <script>
 24 |         var w, h,x, y, svg, first = true;
 25 |         window.addEventListener("message", function (evt) {
 26 |             var data = JSON.parse(evt.data);
 27 |             console.log('receive' + data);
 28 |             draw(data);
 29 |         }, false);
 30 | 
 31 |         function draw(data) {
 32 |             if (!first) {
 33 |                 d3.select("svg").remove();
 34 |             }
 35 |             first = false;
 36 |             w = $("body").width();
 37 |             h = w / 1.4;
 38 |             x = d3.scale.linear().range([0, w]);
 39 |             y = d3.scale.linear().range([0, h]);
 40 | 
 41 |             var color = d3.scale.category20c(),
 42 |                     root,
 43 |                     node;
 44 | 
 45 |             var treemap = d3.layout.treemap()
 46 |                     .round(false)
 47 |                     .size([w, h])
 48 |                     .sticky(true)
 49 |                     .value(function (d) {
 50 |                         return d.size;
 51 |                     });
 52 | 
 53 |             svg = d3.select("body").insert("svg:svg", "h2")
 54 |                     .attr("class", "chart")
 55 |                     .style("width", w + "px")
 56 |                     .style("height", h + "px")
 57 |                     .append("svg:svg")
 58 |                     .attr("width", w)
 59 |                     .attr("height", h)
 60 |                     .append("svg:g")
 61 |                     .attr("transform", "translate(.5,.5)");
 62 | 
 63 | 
 64 |             node = root = data;
 65 | 
 66 |             var nodes = treemap.nodes(root)
 67 |                     .filter(function (d) {
 68 |                         return !d.children;
 69 |                     });
 70 | 
 71 |             var cell = svg.selectAll("g")
 72 |                     .data(nodes)
 73 |                     .enter().append("svg:g")
 74 |                     .attr("class", "cell")
 75 |                     .attr("transform", function (d) {
 76 |                         return "translate(" + d.x + "," + d.y + ")";
 77 |                     })
 78 |                     .on("click", function (d) {
 79 |                         return zoom(node == d.parent ? root : d.parent);
 80 |                     });
 81 | 
 82 |             cell.append("svg:rect")
 83 |                     .attr("width", function (d) {
 84 |                         return d.dx - 1;
 85 |                     })
 86 |                     .attr("height", function (d) {
 87 |                         return d.dy - 1;
 88 |                     })
 89 |                     .style("fill", function (d) {
 90 |                         return color(d.parent.name);
 91 |                     });
 92 | 
 93 |             cell.append("svg:text")
 94 |                     .attr("x", function (d) {
 95 |                         return d.dx / 2;
 96 |                     })
 97 |                     .attr("y", function (d) {
 98 |                         return d.dy / 2;
 99 |                     })
100 |                     .attr("dy", ".35em")
101 |                     .attr("text-anchor", "middle")
102 |                     .text(function (d) {
103 |                         return d.name;
104 |                     })
105 |                     .style("opacity", function (d) {
106 |                         d.w = this.getComputedTextLength();
107 |                         return d.dx > d.w ? 1 : 0;
108 |                     });
109 | 
110 |             d3.select(window).on("click", function () {
111 |                 zoom(root);
112 |             });
113 | 
114 |             d3.select("select").on("change", function () {
115 |                 treemap.value(this.value == "size" ? size : count).nodes(root);
116 |                 zoom(node);
117 | 
118 |             });
119 |         }
120 | 
121 | 
122 |         function size(d) {
123 |             return d.size;
124 |         }
125 | 
126 |         function count(d) {
127 |             return 1;
128 |         }
129 | 
130 |         function zoom(d) {
131 |             var kx = w / d.dx, ky = h / d.dy;
132 |             x.domain([d.x, d.x + d.dx]);
133 |             y.domain([d.y, d.y + d.dy]);
134 | 
135 |             var t = svg.selectAll("g.cell").transition()
136 |                     .duration(d3.event.altKey ? 7500 : 750)
137 |                     .attr("transform", function (d) {
138 |                         return "translate(" + x(d.x) + "," + y(d.y) + ")";
139 |                     });
140 | 
141 |             t.select("rect")
142 |                     .attr("width", function (d) {
143 |                         return kx * d.dx - 1;
144 |                     })
145 |                     .attr("height", function (d) {
146 |                         return ky * d.dy - 1;
147 |                     });
148 | 
149 |             t.select("text")
150 |                     .attr("x", function (d) {
151 |                         return kx * d.dx / 2;
152 |                     })
153 |                     .attr("y", function (d) {
154 |                         return ky * d.dy / 2;
155 |                     })
156 |                     .style("opacity", function (d) {
157 |                         return kx * d.dx > d.w ? 1 : 0;
158 |                     });
159 | 
160 |             node = d;
161 |             d3.event.stopPropagation();
162 |         }
163 |     </script>
164 | {% endblock %}


--------------------------------------------------------------------------------
/topic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/topic/__init__.py


--------------------------------------------------------------------------------
/topic/models/Lda_text_format.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/2/22.
 4 | # twitter text 预处理
 5 | import nltk
 6 | import re
 7 | 
 8 | # with open('stopwords.txt', 'r') as f:
 9 | with open('topic/models/stopwords.txt', 'r') as f:
10 |     stopwords = [word.strip() for word in f.readlines()]
11 | 
12 | # english_stopwords = nltk.corpus.stopwords.words('english')  # + ['re', "n't"]
13 | english_stopwords = stopwords
14 | english_punctuations = [',', '.', ':', ';', '?', '(', ')', '[', ']', '&', '!', '*', '#', '$', '%', '...']
15 | expressions = [':-)', ':)', '=)', ':D', ':-(', ':(', '=(', ';(']
16 | remove_words = set(english_stopwords + english_punctuations + expressions)
17 | wnl = nltk.WordNetLemmatizer()
18 | 
19 | 
20 | # 到时候再看看 twitter_text
21 | def filter_tweet(tweet):
22 |     # 替换twitter特殊字符
23 |     tweet = tweet.lower()
24 |     # 替换tweet Url and user mentions
25 |     tweet = re.sub(r"(http[s:…]*(//\S*)?)|(@\w+)", "", tweet)
26 |     tweet = [wnl.lemmatize(word) for word in nltk.word_tokenize(tweet)]
27 |     tweet = [word for word in tweet if word not in remove_words and len(word) >= 3]
28 |     return tweet
29 | 
30 | 
31 | def filter_tweets(original_tweets):
32 |     _filter_tweets = list(map(filter_tweet, original_tweets))
33 |     res_tweets = []
34 |     res_tweets_filter = []
35 |     for i, f_tweet in enumerate(_filter_tweets):
36 |         if f_tweet:
37 |             res_tweets.append(original_tweets[i])
38 |             res_tweets_filter.append(f_tweet)
39 |     return res_tweets, res_tweets_filter
40 | 
41 | 
42 | 
43 | def main():
44 |     txt = "RT @SocialWebMining rta Mining women https://hrwhisper.me 1M+ Tweets @hrwhisper About #Syria http://wp.me/p3QiJd-1I https:…"
45 |     print filter_tweet(txt)
46 |     txt = "RT @StewySongs: People are people, families are families &amp; lives are lives the world over. The UK is shoulder to shoulder with Paris https:…"
47 |     print filter_tweet(txt)
48 | 
49 |     for i, word in enumerate(english_stopwords):
50 |         if word not in stopwords:
51 |             print word
52 | 
53 |     print wnl.lemmatize('followed'), wnl.lemmatize('following')
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/topic/models/TopicParameterManager.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/4/18.
 4 | 
 5 | 
 6 | class TopicParameterManager(object):
 7 |     def __init__(self, param):
 8 |         param = dict(param)
 9 |         for x, t in param.items():
10 |             if param[x] == '':
11 |                 del param[x]
12 | 
13 |         self.mode = int(param.get('mode', 1))
14 | 
15 |         # ---------- stream ---------
16 |         self.track = param.get('track', None)
17 |         self.follow = param.get('follow', None)
18 |         self.location = param.get('location', None)
19 |         self.storeIntoDB = param.get('storeIntoDB', False) == 'true'
20 |         self.storeIntoDBName = param.get('storeIntoDBName', 'stream')
21 | 
22 |         # ---------- LDA ------------
23 |         self.LDA_k = int(param.get('LDA_k', 15))
24 |         self.LDA_timeWindow = int(param.get('LDA_timeWindow', 30))
25 | 
26 |         # ----------- Local -----------
27 |         self.startDate = param.get('startDate', None)
28 |         self.endDate = param.get('endDate', None)
29 |         self.localCollectionsName = param.get('localCollectionsName', 'stream')
30 | 
31 |     def __eq__(self, other):
32 |         return self.track == other.track and self.follow == other.follow and self.location == other.location \
33 |                and self.storeIntoDB == other.storeIntoDB and self.storeIntoDBName == other.storeIntoDBName \
34 |                and self.LDA_k == other.LDA_k and self.LDA_timeWindow == other.LDA_timeWindow \
35 |                and self.startDate == other.startDate and self.endDate == other.endDate \
36 |                and self.localCollectionsName == other.localCollectionsName and self.mode == other.mode
37 | 
38 |     def __ne__(self, other):
39 |         return not self.__eq__(other)
40 | 
41 |     def __str__(self):
42 |         return self.track, self.follow, self.location, self.storeIntoDB, self.storeIntoDBName, \
43 |                self.LDA_k, self.LDA_timeWindow, self.startDate, self.endDate, self.localCollectionsName
44 | 


--------------------------------------------------------------------------------
/topic/models/TopicTrendsManager.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | # Created by hrwhisper on 2016/4/8.
  4 | 
  5 | import threading
  6 | import multiprocessing
  7 | import time
  8 | from twitterDataMining.model_p.Singleton import Singleton
  9 | from twitterDataMining.model_p.twitterApi.LocalStream import LocalStream
 10 | from twitterDataMining.model_p.twitterApi.Stream import TwitterStream
 11 | from topic.models.Corpus import Corpus
 12 | from topic.models.OnlineLDA import OnlineLDA
 13 | 
 14 | 
 15 | class TopicTrendsManager(object):
 16 |     __metaclass__ = Singleton
 17 | 
 18 |     def __init__(self, param):
 19 |         self.param = param
 20 |         self.topics = []
 21 |         self.lock = threading.Lock()
 22 |         self.parent_conn, self.child_conn = multiprocessing.Pipe()
 23 | 
 24 |         # self.topic_trends = TopicTrends(param, self.child_conn)
 25 |         # self.topic_trends.start()
 26 | 
 27 |         self.topic_trends = None
 28 | 
 29 |         topic_trends_get = threading.Thread(target=self.receive_lda_result)
 30 |         topic_trends_get.start()
 31 | 
 32 |     def get_result(self, param):
 33 |         """
 34 |             get LDA result
 35 |         :param param: TopicParameterManager
 36 |         :return: topic_list or None
 37 |         """
 38 |         res = None
 39 | 
 40 |         if not self.topic_trends:
 41 |             self.topic_trends = TopicTrends(param, self.child_conn)
 42 |             self.topic_trends.start()
 43 |             self.param = param
 44 |             return res
 45 | 
 46 |         if self.param == param:
 47 |             if self.lock.acquire():
 48 |                 if self.topics:
 49 |                     res = self.topics.pop(0)
 50 |                 self.lock.release()
 51 | 
 52 |         else:  # if self.param != param:
 53 |             self.param = param
 54 |             self.topic_trends.terminate()
 55 |             self.topic_trends = TopicTrends(self.param, self.child_conn)
 56 |             self.topic_trends.start()
 57 | 
 58 |             if self.lock.acquire():
 59 |                 self.topics = []
 60 |                 self.lock.release()
 61 | 
 62 |         return res
 63 | 
 64 |     def receive_lda_result(self):
 65 |         while True:
 66 |             res = self.parent_conn.recv()
 67 |             self.lock.acquire()
 68 |             self.topics.append(res)
 69 |             self.lock.release()
 70 | 
 71 |     def stop(self):
 72 |         if self.topic_trends:
 73 |             self.topic_trends.terminate()
 74 |             self.topic_trends = None
 75 |         self.topics = []
 76 |         # TODO stop receive_lda_result Threads
 77 | 
 78 | 
 79 | class TopicTrends(multiprocessing.Process):
 80 |     def __init__(self, param, lda_send_conn, period=60):
 81 |         super(TopicTrends, self).__init__()
 82 |         self.param = param
 83 |         self.period = period
 84 |         self.lda_send_conn = lda_send_conn
 85 |         self.parent_conn, self.child_conn = multiprocessing.Pipe()
 86 | 
 87 |         self.corpus = None
 88 |         self.olda = None
 89 | 
 90 |     def run(self):
 91 |         if self.param.mode != 2:  # online stream data(use twitter API)
 92 |             twitter_stream = TwitterStream(self.child_conn)
 93 |             twitter_stream_thread = threading.Thread(target=twitter_stream.stream_data,
 94 |                                                      args=(self.param.track, self.param.follow, self.param.location,
 95 |                                                            self.param.storeIntoDB, self.param.storeIntoDBName,))
 96 |             twitter_stream_thread.setDaemon(True)
 97 |             twitter_stream_thread.start()
 98 | 
 99 |             print ' threading.active_count()', threading.active_count()
100 |             # TODO error count > 3 kill
101 |             while True:
102 |                 time.sleep(self.period)
103 |                 twitter_stream.ready_receive()
104 |                 tweets = self.parent_conn.recv()
105 |                 t = threading.Thread(target=self.do_some_from_data, args=(tweets,))
106 |                 t.setDaemon(True)
107 |                 t.start()
108 | 
109 |         else:  # local database data
110 |             condition = threading.Condition()
111 |             local_stream = LocalStream()
112 |             local_stream_thread = threading.Thread(target=local_stream.stream_data,
113 |                                                    args=(condition, self.param.startDate, self.param.endDate,
114 |                                                          self.param.localCollectionsName,))
115 |             local_stream_thread.setDaemon(True)
116 |             local_stream_thread.start()
117 |             print ' threading.active_count()', threading.active_count()
118 | 
119 |             if condition.acquire():
120 |                 while True:
121 |                     print 'wait to receive'
122 |                     if local_stream.tweets:
123 |                         self.do_some_from_data(local_stream.tweets)
124 |                         local_stream.tweets = []
125 |                         condition.notify()
126 | 
127 |                     condition.wait()
128 | 
129 |     def do_some_from_data(self, tweets):
130 |         print 'total_tweets', len(tweets)
131 |         # DO something from tweets
132 | 
133 |         # doc_chunk = [tweet['text'] for tweet in tweets]
134 |         print len(tweets)
135 |         if not self.olda:
136 |             self.corpus = Corpus(tweets, chunk_limit=self.param.LDA_timeWindow)
137 |             self.olda = OnlineLDA(self.corpus, K=self.param.LDA_k)
138 |         else:
139 |             self.olda.fit(tweets)
140 | 
141 |         res = {
142 |             "lda": self.olda.get_lda_info(),
143 |             "geo": self.olda.corpus.locations_count,
144 |             "hashtags": self.olda.corpus.hashtags_most_common(),
145 |             "hashtags_timeline": self.olda.corpus.hashtags_timeline(),
146 |         }
147 |         print '-------lda complete'
148 |         # for topic_id, topic_likelihood, topic_words, topic_tweets in res["lda"]:
149 |         #     print '{}%\t{}'.format(round(topic_likelihood * 100, 2), topic_words)
150 |         #     print '\t', topic_tweets
151 | 
152 |         self.lda_send_conn.send(res)
153 | 
154 |     def terminate(self):
155 |         super(TopicTrends, self).terminate()
156 |         self.parent_conn.close()
157 |         self.child_conn.close()
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     def main():
162 |         topic_trends = TopicTrendsManager()
163 |         while True:
164 |             res = topic_trends.get_result()
165 |             if res:
166 |                 print res
167 |             else:
168 |                 print 'None, wait'
169 |             time.sleep(10)
170 | 
171 | 
172 |     main()
173 | 


--------------------------------------------------------------------------------
/topic/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/4/5.
4 | 


--------------------------------------------------------------------------------
/topic/models/demo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | # Created by hrwhisper on 2016/4/7.
  4 | # just a multiprocessing and threading demo
  5 | 
  6 | import threading
  7 | import multiprocessing
  8 | import time
  9 | 
 10 | 
 11 | class Singleton(type):
 12 |     _instances = {}
 13 | 
 14 |     def __call__(cls, *args, **kwargs):
 15 |         if cls not in cls._instances:
 16 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
 17 |         return cls._instances[cls]
 18 | 
 19 | 
 20 | class TopicTrendsManager(object):
 21 |     __metaclass__ = Singleton
 22 | 
 23 |     def __init__(self):
 24 |         self.topics = []
 25 |         self.lock = threading.Lock()
 26 |         self.parent_conn, self.child_conn = multiprocessing.Pipe()
 27 |         self.topic_trends = TopicTrends(self.child_conn)
 28 |         self.topic_trends.start()
 29 |         topic_trends_get = threading.Thread(target=self.receive_lda_result)
 30 |         topic_trends_get.start()
 31 | 
 32 |     def get_result(self):
 33 |         res = None
 34 | 
 35 |         print 'process count', multiprocessing.active_children()
 36 |         if self.lock.acquire():
 37 |             if self.topics:
 38 |                 res = self.topics.pop(0)
 39 |             self.lock.release()
 40 |         # self.topic_trends.terminate()
 41 |         return res
 42 | 
 43 |     def receive_lda_result(self):
 44 |         while True:
 45 |             res = self.parent_conn.recv()
 46 |             self.lock.acquire()
 47 |             self.topics.append(res)
 48 |             # print 'receive_lda_result', res
 49 |             self.lock.release()
 50 | 
 51 | 
 52 | class TopicTrends(multiprocessing.Process):
 53 |     def __init__(self, lda_send_conn, period=2):
 54 |         super(TopicTrends, self).__init__()
 55 |         self.period = period
 56 |         self.lda_send_conn = lda_send_conn
 57 |         self.parent_conn, self.child_conn = multiprocessing.Pipe()
 58 | 
 59 |     def run(self):
 60 | 
 61 |         twitter_stream = TwitterStream(self.child_conn)
 62 |         twitter_stream_thread = threading.Thread(target=twitter_stream.run)
 63 |         twitter_stream_thread.setDaemon(True)
 64 |         twitter_stream_thread.start()
 65 | 
 66 |         print ' threading.active_count()', threading.active_count()
 67 |         # TODO error count > 3 kill
 68 |         while True:
 69 |             time.sleep(self.period)
 70 |             twitter_stream.get()
 71 |             tweets = self.parent_conn.recv()
 72 |             t = threading.Thread(target=self.do_some_from_data, args=(tweets,))
 73 |             t.setDaemon(True)
 74 |             t.start()
 75 |             print 'TopicTrends threading.live : ', list(threading.enumerate())
 76 | 
 77 |             # print sum(tweets)
 78 |         print 'end'
 79 | 
 80 |     def do_some_from_data(self, data):
 81 |         # DO something from tweets
 82 |         res = sum(data)
 83 |         self.lda_send_conn.send(res)
 84 | 
 85 |     def terminate(self):
 86 |         super(TopicTrends, self).terminate()
 87 |         self.parent_conn.close()
 88 |         self.child_conn.close()
 89 | 
 90 | 
 91 | class TwitterStream(object):
 92 |     def __init__(self, conn):
 93 |         super(TwitterStream, self).__init__()
 94 |         self.conn = conn
 95 |         self.tweets = []
 96 |         self.get_data = False
 97 | 
 98 |     def run(self):
 99 |         i = 0
100 |         while True:
101 |             time.sleep(0.1)
102 |             self.tweets.append(i)
103 |             i += 1
104 |             if self.get_data:
105 |                 self.get_data = False
106 |                 self.conn.send(self.tweets)
107 |                 self.tweets = []
108 | 
109 |     def get(self):
110 |         self.get_data = True
111 | 
112 | 
113 | def main():
114 |     topic_trends = TopicTrendsManager()
115 |     while True:
116 |         res = topic_trends.get_result()
117 |         if res:
118 |             print res
119 |         else:
120 |             print 'None, wait'
121 |         time.sleep(1)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     main()
126 | 
127 |     # time.sleep(5)
128 |     # topic_trends.terminate()
129 | 


--------------------------------------------------------------------------------
/topic/models/stopwords.txt:
--------------------------------------------------------------------------------
  1 | ...
  2 | ....
  3 | 0
  4 | 1
  5 | 2
  6 | 3
  7 | 4
  8 | 5
  9 | 6
 10 | 7
 11 | 8
 12 | 9
 13 | a
 14 | about
 15 | above
 16 | accordingly
 17 | across
 18 | after
 19 | afterwards
 20 | again
 21 | against
 22 | al
 23 | all
 24 | allows
 25 | almost
 26 | alone
 27 | along
 28 | already
 29 | also
 30 | although
 31 | always
 32 | am
 33 | among
 34 | amongst
 35 | an
 36 | and
 37 | another
 38 | any
 39 | anybody
 40 | anyhow
 41 | anyone
 42 | anything
 43 | anywhere
 44 | apart
 45 | appear
 46 | appropriate
 47 | are
 48 | around
 49 | as
 50 | aside
 51 | associated
 52 | at
 53 | available
 54 | away
 55 | awfully
 56 | b
 57 | back
 58 | be
 59 | became
 60 | because
 61 | become
 62 | becomes
 63 | becoming
 64 | been
 65 | before
 66 | beforehand
 67 | behind
 68 | being
 69 | below
 70 | beside
 71 | besides
 72 | best
 73 | better
 74 | between
 75 | beyond
 76 | both
 77 | brief
 78 | but
 79 | by
 80 | c
 81 | came
 82 | can
 83 | cannot
 84 | cant
 85 | cause
 86 | causes
 87 | certain
 88 | changes
 89 | co
 90 | come
 91 | consequently
 92 | contain
 93 | containing
 94 | contains
 95 | corresponding
 96 | could
 97 | currently
 98 | d
 99 | day
100 | described
101 | did
102 | different
103 | do
104 | does
105 | doing
106 | don
107 | done
108 | down
109 | downwards
110 | during
111 | e
112 | each
113 | eg
114 | eight
115 | either
116 | else
117 | elsewhere
118 | enough
119 | eq
120 | et
121 | etc
122 | even
123 | ever
124 | every
125 | everybody
126 | everyone
127 | everything
128 | everywhere
129 | ex
130 | example
131 | except
132 | f
133 | far
134 | few
135 | fifth
136 | first
137 | five
138 | followed
139 | following
140 | for
141 | former
142 | formerly
143 | forth
144 | four
145 | from
146 | further
147 | furthermore
148 | g
149 | get
150 | gets
151 | given
152 | gives
153 | go
154 | gone
155 | good
156 | got
157 | great
158 | h
159 | had
160 | hardly
161 | has
162 | have
163 | having
164 | he
165 | hence
166 | her
167 | here
168 | hereafter
169 | hereby
170 | herein
171 | hereupon
172 | hers
173 | herself
174 | him
175 | himself
176 | his
177 | hither
178 | how
179 | howbeit
180 | however
181 | http
182 | i
183 | ie
184 | if
185 | ignored
186 | immediate
187 | in
188 | inasmuch
189 | inc
190 | indeed
191 | indicate
192 | indicated
193 | indicates
194 | inner
195 | insofar
196 | instead
197 | into
198 | inward
199 | is
200 | it
201 | its
202 | itself
203 | j
204 | just
205 | k
206 | keep
207 | kept
208 | know
209 | l
210 | last
211 | latter
212 | latterly
213 | least
214 | less
215 | lest
216 | let
217 | life
218 | like
219 | little
220 | long
221 | ltd
222 | m
223 | made
224 | make
225 | man
226 | many
227 | may
228 | me
229 | meanwhile
230 | men
231 | might
232 | more
233 | moreover
234 | most
235 | mostly
236 | mr
237 | much
238 | must
239 | my
240 | myself
241 | n
242 | name
243 | namely
244 | near
245 | necessary
246 | neither
247 | never
248 | nevertheless
249 | new
250 | next
251 | nine
252 | no
253 | nobody
254 | none
255 | noone
256 | nor
257 | normally
258 | not
259 | nothing
260 | novel
261 | now
262 | nowhere
263 | o
264 | of
265 | off
266 | often
267 | oh
268 | old
269 | on
270 | once
271 | one
272 | ones
273 | only
274 | onto
275 | or
276 | other
277 | others
278 | otherwise
279 | ought
280 | our
281 | ours
282 | ourselves
283 | out
284 | outside
285 | over
286 | overall
287 | own
288 | p
289 | particular
290 | particularly
291 | people
292 | per
293 | perhaps
294 | placed
295 | please
296 | plus
297 | possible
298 | probably
299 | provides
300 | q
301 | que
302 | quite
303 | r
304 | rather
305 | really
306 | relatively
307 | respectively
308 | right
309 | s
310 | said
311 | same
312 | say
313 | says
314 | second
315 | secondly
316 | see
317 | seem
318 | seemed
319 | seeming
320 | seems
321 | self
322 | selves
323 | sensible
324 | sent
325 | serious
326 | seven
327 | several
328 | shall
329 | she
330 | should
331 | since
332 | six
333 | so
334 | some
335 | somebody
336 | somehow
337 | someone
338 | something
339 | sometime
340 | sometimes
341 | somewhat
342 | somewhere
343 | specified
344 | specify
345 | specifying
346 | state
347 | still
348 | sub
349 | such
350 | sup
351 | t
352 | take
353 | taken
354 | than
355 | that
356 | the
357 | their
358 | theirs
359 | them
360 | themselves
361 | then
362 | thence
363 | there
364 | thereafter
365 | thereby
366 | therefore
367 | therein
368 | thereupon
369 | these
370 | they
371 | third
372 | this
373 | thorough
374 | thoroughly
375 | those
376 | though
377 | three
378 | through
379 | throughout
380 | thru
381 | thus
382 | time
383 | to
384 | together
385 | too
386 | toward
387 | towards
388 | twice
389 | two
390 | u
391 | under
392 | unless
393 | until
394 | unto
395 | up
396 | upon
397 | us
398 | use
399 | used
400 | useful
401 | uses
402 | using
403 | usually
404 | v
405 | value
406 | various
407 | very
408 | via
409 | viz
410 | vs
411 | w
412 | was
413 | way
414 | we
415 | well
416 | went
417 | were
418 | what
419 | whatever
420 | when
421 | whence
422 | whenever
423 | where
424 | whereafter
425 | whereas
426 | whereby
427 | wherein
428 | whereupon
429 | wherever
430 | whether
431 | which
432 | while
433 | whither
434 | who
435 | whoever
436 | whole
437 | whom
438 | whose
439 | why
440 | will
441 | with
442 | within
443 | without
444 | work
445 | world
446 | would
447 | x
448 | y
449 | year
450 | years
451 | yet
452 | you
453 | your
454 | yours
455 | yourself
456 | yourselves
457 | z
458 | zero
459 | re
460 | 're
461 | 'rt
462 | rt
463 | via
464 | retweet
465 | twitter
466 | follow
467 | tweet
468 | amp
469 | n't


--------------------------------------------------------------------------------
/topic/urls.py:
--------------------------------------------------------------------------------
 1 | """twitterDataMining URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/1.9/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.conf.urls import url, include
14 |     2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url
17 | import topic.views
18 | 
19 | urlpatterns = [
20 |     url(r'^$', topic.views.index),
21 |     url(r'stream_trends$', topic.views.stream_trends),
22 |     url(r'stop_trends$', topic.views.stop_trends),
23 |     url(r'text$', topic.views.text),
24 |     url(r'bubble$', topic.views.bubble),
25 |     url(r'treemap$', topic.views.treemap),
26 |     url(r'sunburst$', topic.views.sunburst),
27 |     url(r'funnel$', topic.views.funnel),
28 |     url(r'heatmap$', topic.views.heatmap),
29 |     url(r'hashtags_pie$', topic.views.hashtags_pie),
30 |     url(r'hashtags_histogram$', topic.views.hashtags_histogram),
31 |     url(r'hashtags_timeline$', topic.views.hashtags_timeline)
32 | ]
33 | 


--------------------------------------------------------------------------------
/topic/views.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # Create your views here.
 3 | import json
 4 | from django.http import HttpResponse
 5 | from django.shortcuts import render
 6 | from topic.models.TopicTrendsManager import TopicTrendsManager
 7 | from topic.models.TopicParameterManager import TopicParameterManager
 8 | 
 9 | 
10 | def index(request):
11 |     return render(request, 'topic/index.html')
12 | 
13 | 
14 | # TODO 检查参数的合法性, and change to post method
15 | def stream_trends(request):
16 |     param_manager = TopicParameterManager(request.GET.items())
17 |     topic_trends = TopicTrendsManager(param_manager)
18 |     res = topic_trends.get_result(param_manager)
19 |     return HttpResponse(json.dumps(res), content_type="application/json")
20 | 
21 | 
22 | def stop_trends(request):
23 |     topic_trends = TopicTrendsManager(None)
24 |     topic_trends.stop()
25 |     res = {"stop": "stop success"}
26 |     return HttpResponse(json.dumps(res), content_type="application/json")
27 | 
28 | 
29 | def text(request):
30 |     return render(request, 'topic/visualization/result_text.html')
31 | 
32 | 
33 | def bubble(request):
34 |     return render(request, 'topic/visualization/result_bubble.html')
35 | 
36 | 
37 | def treemap(request):
38 |     return render(request, 'topic/visualization/result_treemap.html')
39 | 
40 | 
41 | def sunburst(request):
42 |     return render(request, 'topic/visualization/result_sunburst.html')
43 | 
44 | 
45 | def funnel(request):
46 |     return render(request, 'topic/visualization/result_funnel.html')
47 | 
48 | 
49 | def heatmap(request):
50 |     return render(request, 'topic/visualization/result_heatmap.html')
51 | 
52 | 
53 | def hashtags_pie(request):
54 |     return render(request, 'topic/visualization/result_hashtags_pie.html')
55 | 
56 | 
57 | def hashtags_histogram(request):
58 |     return render(request, 'topic/visualization/result_hashtags_histogram.html')
59 | 
60 | 
61 | def hashtags_timeline(request):
62 |     return render(request, 'topic/visualization/result_hashtags_timeline.html')


--------------------------------------------------------------------------------
/twitterDataMining/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/twitterDataMining/__init__.py


--------------------------------------------------------------------------------
/twitterDataMining/model_p/Singleton.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/5/24.
 4 | 
 5 | 
 6 | class Singleton(type):
 7 |     _instances = {}
 8 | 
 9 |     def __call__(cls, *args, **kwargs):
10 |         if cls not in cls._instances:
11 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
12 |         return cls._instances[cls]
13 | 


--------------------------------------------------------------------------------
/twitterDataMining/model_p/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/twitterDataMining/model_p/__init__.py


--------------------------------------------------------------------------------
/twitterDataMining/model_p/analyse/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hrwhisper/twitterDataMining/5dc1ec836f2870cd421468d464f5b66ef0ac8931/twitterDataMining/model_p/analyse/__init__.py


--------------------------------------------------------------------------------
/twitterDataMining/model_p/analyse/hashtag_co_occur.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2015/12/8.
 4 | 
 5 | import pymongo
 6 | import time
 7 | import datetime
 8 | import collections
 9 | 
10 | if __name__ == '__main__':
11 | 
12 |     starttime = datetime.datetime.now()
13 |     # long running
14 | 
15 |     lower_bound = str(int(time.mktime(datetime.datetime(2015, 11, 15).timetuple())) * 1000)
16 |     upper_bound = str(int(time.mktime(datetime.datetime(2015, 11, 17).timetuple())) * 1000)
17 |     client = pymongo.MongoClient()
18 |     db = client.twitter
19 |     cursor = db.stream.aggregate([
20 |         {
21 |             '$match': {
22 |                 'timestamp_ms': {
23 |                     '$gt': lower_bound,
24 |                     '$lt': upper_bound,
25 |                 },
26 |                 'entities.hashtags.0': {
27 |                     '$exists': 'true'
28 |                 }
29 |             }
30 |         },
31 |         {
32 |             '$project': {
33 |                 'entities.hashtags': 1
34 |             }
35 |         }
36 |     ])
37 |     cnt = 0
38 |     hashtag_dic = collections.defaultdict(lambda: collections.defaultdict(int), {})
39 |     for tweet in cursor:
40 |         cnt += 1
41 |         hashtags = tweet['entities']['hashtags']
42 |         hashtags_len = len(hashtags)
43 |         for i, name1 in enumerate(hashtags):
44 |             for j, name2 in enumerate(hashtags):
45 |                 if name1 < name2:
46 |                     hashtag_dic[name1][name2] += 1
47 |                 elif name1 > name2:
48 |                     hashtag_dic[name2][name1] += 1
49 |     print cnt
50 |     res = []
51 |     for name, dics in hashtag_dic.items():
52 |         for name2, cnt in dics.items():
53 |             res.append((name, name2, cnt))
54 | 
55 |     print len(res)
56 |     hashtag_dic = sorted(res, key=lambda x: x[2], reverse=True)
57 |     for i in hashtag_dic[:100]:
58 |         print i
59 | 
60 |     # cursor = db.stream.find({
61 |     #     'timestamp_ms': {
62 |     #         '$lt': upper_bound,
63 |     #         '$gt': lower_bound
64 |     #     }
65 |     # })
66 |     # hashtag_cnt = collections.defaultdict(int)
67 |     # for tweet in cursor:
68 |     #     for hashtag in tweet['entities']['hashtags']:
69 |     #         hashtag_cnt[hashtag] += 1
70 |     # hashtag_cnt = sorted(hashtag_cnt.items(), key=lambda x: x[1], reverse=True)
71 |     # print hashtag_cnt[:100]
72 |     endtime = datetime.datetime.now()
73 |     print (endtime - starttime).seconds
74 | 


--------------------------------------------------------------------------------
/twitterDataMining/model_p/analyse/hashtag_trend.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2015/12/10.
 4 | import pymongo
 5 | import datetime
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     starttime = datetime.datetime.now()
10 |     # long running
11 | 
12 |     client = pymongo.MongoClient()
13 |     db = client.twitter
14 | 
15 |     cursor = db.stream.aggregate([
16 |         {
17 |             '$match': {
18 |                 'entities.hashtags': 'MTVStars'
19 |             }
20 |         },
21 |         {
22 |             '$group': {
23 |                 '_id': {
24 |                     'day': {'$dayOfMonth': '$created_at'},
25 |                     'month': {'$month': '$created_at'},
26 |                     'year': {'$year': '$created_at'}
27 |                 },
28 |                 'cnt': {'$sum': 1},
29 |             }
30 |         },
31 |     ])
32 | 
33 |     for i , group in enumerate(cursor):
34 |         print i,group
35 | 
36 |     endtime = datetime.datetime.now()
37 |     print (endtime - starttime).seconds
38 | 


--------------------------------------------------------------------------------
/twitterDataMining/model_p/analyse/mongodb_test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/1/24.
 4 | import pymongo
 5 | 
 6 | 
 7 | class mongodbTest(object):
 8 |     def test(self):
 9 |         client = pymongo.MongoClient()
10 |         db = client.twitter
11 |         return db.stream.find().limit(10)
12 | 


--------------------------------------------------------------------------------
/twitterDataMining/model_p/twitterApi/LocalStream.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/4/18.
 4 | import datetime
 5 | from Basic import MongoDb
 6 | from topic.models.OnlineLDA import chunkize_serial
 7 | 
 8 | 
 9 | class LocalStream(object):
10 |     def __init__(self):
11 |         self.db = MongoDb().get_db()
12 |         self.tweets = []
13 | 
14 |     def stream_data(self, condition, start_date, end_date, collection_name='stream'):
15 | 
16 |         start = end = None
17 |         try:
18 |             start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
19 |             end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
20 |         except Exception, e:
21 |             pass
22 | 
23 |         match = {
24 |             '$match': {
25 |                 'date': {
26 |                 }
27 |             }}
28 |         if start:
29 |             match['$match']['date']['$gt'] = start
30 |         if end:
31 |             match['$match']['date']['$lt'] = end
32 | 
33 |         pipeline = []
34 |         if start and end:
35 |             pipeline.append(match)
36 |             pipeline.append({'$sort': {'date': 1}})
37 | 
38 |         cursor = self.db[collection_name].aggregate(pipeline)
39 | 
40 |         if condition.acquire():
41 |             print 'loading local data'
42 |             for doc_chunk in chunkize_serial(cursor, 3000, as_numpy=False):
43 |                 print doc_chunk[0]
44 |                 self.tweets = doc_chunk
45 |                 condition.notify()
46 |                 condition.wait()
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     def main():
51 |         str_date = '2015-11-13'
52 |         t = datetime.datetime.strptime(str_date, '%Y-%m-%d')
53 |         print t, type(t)
54 |         print datetime.datetime(2015, 11, 13), type(datetime.datetime(2015, 11, 13))
55 | 
56 | 
57 |     main()
58 | 


--------------------------------------------------------------------------------
/twitterDataMining/model_p/twitterApi/Stream.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/1/25.
 4 | import time
 5 | import datetime
 6 | from Basic import TwitterBasic
 7 | import twitter
 8 | 
 9 | 
10 | class TwitterStream(TwitterBasic):
11 |     def __init__(self, conn=None):
12 |         TwitterBasic.__init__(self)
13 | 
14 |         self.conn = conn
15 |         self.tweets = []
16 |         self.get_data = False
17 | 
18 |     def ready_receive(self):
19 |         self.get_data = True
20 | 
21 |     def stream_data(self, track=None, follow=None, locations=None, save_to_db=False,
22 |                     collection_name='stream'):
23 |         """
24 |             https://dev.twitter.com/streaming/reference/post/statuses/filter
25 |             The default access level allows up to 400 track keywords, 5,000 follow userids and 25 0.1-360 degree location boxes.
26 | 
27 |         :param track: str    ;
28 |         :param follow:list str ;
29 |         :param locations: str ;
30 |         :param save_to_db:
31 |         :param collection_name:
32 |         :return: None
33 |         """
34 | 
35 |         def location_bounding_box(_locations):
36 |             t = _locations.split(',')
37 |             res = ''
38 |             for i in xrange(0, len(t), 2):
39 |                 x, y = str(float(t[i]) + 1), str(float(t[i + 1]) + 1)
40 |                 res += t[i] + ',' + t[i + 1] + ',' + x + ',' + y + ','
41 |             return res
42 | 
43 |         kwg = {'language': 'en'}
44 | 
45 |         if not track and not follow and not locations:
46 |             kwg['track'] = 'twitter'
47 | 
48 |         if track:
49 |             kwg['track'] = track
50 | 
51 |         if follow:
52 |             kwg['follow'] = follow
53 | 
54 |         if locations:
55 |             kwg['locations'] = location_bounding_box(locations)
56 | 
57 |         print kwg
58 | 
59 |         twitter_stream = twitter.TwitterStream(auth=self.twitter_api.auth)
60 |         stream = twitter_stream.statuses.filter(**kwg)
61 | 
62 |         for i, tweet in enumerate(stream):
63 |             if not i % 200 and 'text' in tweet: print i, datetime.datetime.now(), ' ', tweet["text"]
64 |             tweet = dict(tweet)
65 |             if 'id' in tweet:
66 |                 self.tweets.append(tweet)
67 | 
68 |                 if self.get_data:
69 |                     self.get_data = False
70 |                     self.conn.send(self.tweets)
71 |                     self.tweets = []
72 | 
73 |                 if save_to_db:
74 |                     self.save_tweets_to_mongodb(tweet, colname=collection_name)
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     def get_current_time():
79 |         error_time = int(time.time())  # ->这是时间戳
80 |         error_time = time.localtime(error_time)
81 |         other_style_time = time.strftime("%Y-%m-%d %H:%M:%S", error_time)
82 |         return other_style_time
83 | 
84 | 
85 |     t = TwitterStream()
86 |     track = None
87 |     # locations = u'-122.75,36.8,-73,41,'
88 |     # locations = u'-74.05,40.81,-73.05,41.81,-76.99,38.79,-75.99,39.79'
89 |     locations = '-74.05,40.81,-76.99,38.79,-118.30,34.23,-122.39,37.96,-122.03,37.37,2.31,48.98,-0.14,51.52,-2.97,53.46,-1.24,51.76,-3.72,40.43,2.17,41.41,-0.38,39.48'
90 |     while True:
91 |         try:
92 |             t.stream_data(track=track, locations=locations, save_to_db=False)
93 |         except Exception, e:
94 |             with open('error_log.txt', 'a+') as f:
95 |                 error_info = get_current_time() + '    ' + str(e) + ' \n'
96 |                 print error_info
97 |                 f.write(error_info)
98 | 


--------------------------------------------------------------------------------
/twitterDataMining/model_p/twitterApi/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/1/25.
4 | 


--------------------------------------------------------------------------------
/twitterDataMining/models.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/2/5.
 4 | import datetime
 5 | import pymongo
 6 | 
 7 | 
 8 | class MongoDb(object):
 9 |     def __init__(self):
10 |         self._client = pymongo.MongoClient()
11 |         self.db = self._client.twitter2
12 | 
13 |     def getDB(self):
14 |         return self.db
15 | 
16 | 
17 | class TimeCost(object):
18 |     def __init__(self):
19 |         self._start_time = datetime.datetime.now()
20 | 
21 |     def timecost(self):
22 |         print datetime.datetime.now() - self._start_time
23 | 


--------------------------------------------------------------------------------
/twitterDataMining/settings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Django settings for twitterDataMining project.
  3 | 
  4 | Generated by 'django-admin startproject' using Django 1.9.1.
  5 | 
  6 | For more information on this file, see
  7 | https://docs.djangoproject.com/en/1.9/topics/settings/
  8 | 
  9 | For the full list of settings and their values, see
 10 | https://docs.djangoproject.com/en/1.9/ref/settings/
 11 | """
 12 | 
 13 | import os
 14 | 
 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | 
 18 | 
 19 | # Quick-start development settings - unsuitable for production
 20 | # See https://docs.djangoproject.com/en/1.9/howto/deployment/checklist/
 21 | 
 22 | # SECURITY WARNING: keep the secret key used in production secret!
 23 | SECRET_KEY = '(k@6fi5c!6ij6p13u8wyf&wy&38wan1dm$^po5n!b!o*nuk)*c'
 24 | 
 25 | # SECURITY WARNING: don't run with debug turned on in production!
 26 | DEBUG = True
 27 | 
 28 | ALLOWED_HOSTS = []
 29 | 
 30 | 
 31 | # Application definition
 32 | 
 33 | INSTALLED_APPS = [
 34 |     'django.contrib.auth',
 35 |     'django.contrib.contenttypes',
 36 |     'django.contrib.sessions',
 37 |     'django.contrib.messages',
 38 |     'django.contrib.staticfiles',
 39 |     'network',
 40 |     'sentiment',
 41 |     'topic',
 42 |     'statistic',
 43 |     'twitterDataMining'
 44 | ]
 45 | 
 46 | MIDDLEWARE_CLASSES = [
 47 |     'django.middleware.security.SecurityMiddleware',
 48 |     'django.contrib.sessions.middleware.SessionMiddleware',
 49 |     'django.middleware.common.CommonMiddleware',
 50 |     'django.middleware.csrf.CsrfViewMiddleware',
 51 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
 52 |     'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
 53 |     'django.contrib.messages.middleware.MessageMiddleware',
 54 |     'django.middleware.clickjacking.XFrameOptionsMiddleware',
 55 | ]
 56 | 
 57 | ROOT_URLCONF = 'twitterDataMining.urls'
 58 | 
 59 | TEMPLATES = [
 60 |     {
 61 |         'BACKEND': 'django.template.backends.django.DjangoTemplates',
 62 |         'DIRS': [os.path.join(BASE_DIR, 'templates')]
 63 |         ,
 64 |         'APP_DIRS': True,
 65 |         'OPTIONS': {
 66 |             'context_processors': [
 67 |                 'django.template.context_processors.debug',
 68 |                 'django.template.context_processors.request',
 69 |                 'django.contrib.auth.context_processors.auth',
 70 |                 'django.contrib.messages.context_processors.messages',
 71 |             ],
 72 |         },
 73 |     },
 74 | ]
 75 | 
 76 | WSGI_APPLICATION = 'twitterDataMining.wsgi.application'
 77 | 
 78 | 
 79 | # Database
 80 | # https://docs.djangoproject.com/en/1.9/ref/settings/#databases
 81 | 
 82 | DATABASES = {
 83 |     # 'default': {
 84 |     #     'ENGINE': 'django.db.backends.sqlite3',
 85 |     #     'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
 86 |     # }
 87 | }
 88 | 
 89 | 
 90 | # Password validation
 91 | # https://docs.djangoproject.com/en/1.9/ref/settings/#auth-password-validators
 92 | 
 93 | AUTH_PASSWORD_VALIDATORS = [
 94 |     {
 95 |         'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
 96 |     },
 97 |     {
 98 |         'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
 99 |     },
100 |     {
101 |         'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
102 |     },
103 |     {
104 |         'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
105 |     },
106 | ]
107 | 
108 | 
109 | # Internationalization
110 | # https://docs.djangoproject.com/en/1.9/topics/i18n/
111 | 
112 | LANGUAGE_CODE = 'en-us'
113 | 
114 | TIME_ZONE = 'UTC'
115 | 
116 | USE_I18N = True
117 | 
118 | USE_L10N = True
119 | 
120 | USE_TZ = True
121 | 
122 | 
123 | # Static files (CSS, JavaScript, Images)
124 | # https://docs.djangoproject.com/en/1.9/howto/static-files/
125 | 
126 | STATIC_URL = '/static/'
127 | 
128 | STATICFILES_DIRS = (
129 |     os.path.join(BASE_DIR, "static"),
130 | )


--------------------------------------------------------------------------------
/twitterDataMining/templatetags/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | 
3 | # Created by hrwhisper on 2016/4/14.
4 | 


--------------------------------------------------------------------------------
/twitterDataMining/templatetags/mytag.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/4/14.
 4 | import re
 5 | from django.core.urlresolvers import reverse, NoReverseMatch
 6 | from django import template
 7 | 
 8 | register = template.Library()
 9 | 
10 | 
11 | @register.simple_tag(takes_context=True)
12 | def active(context, pattern_or_urlname):
13 |     try:
14 |         pattern = '^' + reverse(pattern_or_urlname)
15 |     except NoReverseMatch:
16 |         pattern = pattern_or_urlname
17 |     path = context['request'].path
18 |     if re.search(pattern, path):
19 |         return 'active'
20 |     return ''
21 | 


--------------------------------------------------------------------------------
/twitterDataMining/urls.py:
--------------------------------------------------------------------------------
 1 | """twitterDataMining URL Configuration
 2 | 
 3 | The `urlpatterns` list routes URLs to views. For more information please see:
 4 |     https://docs.djangoproject.com/en/1.9/topics/http/urls/
 5 | Examples:
 6 | Function views
 7 |     1. Add an import:  from my_app import views
 8 |     2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
 9 | Class-based views
10 |     1. Add an import:  from other_app.views import Home
11 |     2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
12 | Including another URLconf
13 |     1. Import the include() function: from django.conf.urls import url, include
14 |     2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
15 | """
16 | from django.conf.urls import url, include
17 | import twitterDataMining.views
18 | import network.views
19 | 
20 | urlpatterns = [
21 |     url(r'^$', twitterDataMining.views.index_page),
22 |     url(r'^network/', include('network.urls')),
23 |     url(r'^statistic/', include('statistic.urls')),
24 |     url(r'^topic/', include('topic.urls')),
25 |     url(r'^sentiment/', include('sentiment.urls')),
26 | ]
27 | 


--------------------------------------------------------------------------------
/twitterDataMining/views.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | # Created by hrwhisper on 2016/2/3.
 4 | from django.http import HttpResponse
 5 | 
 6 | from django.shortcuts import render
 7 | 
 8 | 
 9 | def index_page(request):
10 |     return render(request, 'index.html')
11 | 


--------------------------------------------------------------------------------
/twitterDataMining/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for twitterDataMining project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/1.9/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "twitterDataMining.settings")
15 | 
16 | application = get_wsgi_application()
17 | 


--------------------------------------------------------------------------------