├── gui ├── __init__.py └── socialgui.py ├── parser ├── model │ ├── __init__.py │ └── tweet.py ├── __init__.py └── operation │ ├── __init__.py │ ├── tweet_query.py │ └── tweet_operation.py ├── settings.py ├── images ├── logo.png ├── logov.png ├── analysis.png ├── analysisn.png └── app_icon.png ├── TweetAnalysis.db ├── requirements.txt ├── Dockerfile ├── static └── style.css ├── LICENSE ├── .gitignore ├── templates └── locations.html ├── README.md ├── analysis.py └── tracking.py /gui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /parser/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .tweet import Tweet 2 | -------------------------------------------------------------------------------- /parser/__init__.py: -------------------------------------------------------------------------------- 1 | from . import model 2 | from . import operation -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | GOOGLE_MAP_API_KEY = 'YOUR_API_KEY' 2 | PORT = 5000 3 | -------------------------------------------------------------------------------- /parser/model/tweet.py: -------------------------------------------------------------------------------- 1 | class Tweet: 2 | def __init__(self): 3 | pass 4 | -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/batuhaniskr/twitter-intelligence/HEAD/images/logo.png -------------------------------------------------------------------------------- /TweetAnalysis.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/batuhaniskr/twitter-intelligence/HEAD/TweetAnalysis.db -------------------------------------------------------------------------------- /images/logov.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/batuhaniskr/twitter-intelligence/HEAD/images/logov.png -------------------------------------------------------------------------------- /images/analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/batuhaniskr/twitter-intelligence/HEAD/images/analysis.png -------------------------------------------------------------------------------- /images/analysisn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/batuhaniskr/twitter-intelligence/HEAD/images/analysisn.png -------------------------------------------------------------------------------- /images/app_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/batuhaniskr/twitter-intelligence/HEAD/images/app_icon.png -------------------------------------------------------------------------------- /parser/operation/__init__.py: -------------------------------------------------------------------------------- 1 | from .tweet_operation import TweetManager 2 | from .tweet_query import TweetCriteria 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml>=4.2.1 2 | pyquery>=1.4.0 3 | Flask>=0.12.3 4 | pandas>=0.22.0 5 | termcolor>=1.1.0 6 | geopy>=1.11.0 7 | requests>=2.19.1 8 | numpy>=1.14.1 9 | matplotlib>=2.2.2 10 | PyQt5==5.11.2 -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7.1-alpine3.8 2 | 3 | LABEL maintainer Aleksandr.Makhinov 4 | ENV GOOGLE_MAP_API_KEY='YOUR_GOOGLE_MAP_API_KEY' 5 | 6 | WORKDIR /usr/src/app 7 | 8 | COPY requirements.txt ./ 9 | 10 | RUN apk --no-cache --update-cache add gcc gfortran build-base freetype-dev libpng-dev py3-qt5 libxml2-dev libxslt-dev python-dev \ 11 | && pip install --no-cache-dir setuptools \ 12 | && pip install --no-cache-dir -r requirements.txt 13 | 14 | COPY . . 15 | 16 | EXPOSE 5000 17 | 18 | VOLUME ./TweetAnalysis.db 19 | 20 | 21 | ENTRYPOINT ["/bin/sh"] 22 | -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | #map { 2 | height: 100%; 3 | } 4 | 5 | html, body { 6 | height: 100%; 7 | margin: 0; 8 | padding: 0; 9 | } 10 | 11 | #floating-panel { 12 | position: absolute; 13 | top: 10px; 14 | left: 25%; 15 | z-index: 5; 16 | background-color: #fff; 17 | padding: 5px; 18 | border: 1px solid #999; 19 | text-align: center; 20 | font-family: 'Roboto', 'sans-serif'; 21 | line-height: 30px; 22 | padding-left: 10px; 23 | } 24 | 25 | #floating-panel { 26 | background-color: #fff; 27 | border: 1px solid #999; 28 | left: 25%; 29 | padding: 5px; 30 | position: absolute; 31 | top: 10px; 32 | z-index: 5; 33 | } -------------------------------------------------------------------------------- /parser/operation/tweet_query.py: -------------------------------------------------------------------------------- 1 | class TweetCriteria: 2 | def __init__(self): 3 | self.maxTweets = 0 4 | 5 | def setUsername(self, username): 6 | self.username = username 7 | return self 8 | 9 | def setSince(self, since): 10 | self.since = since 11 | return self 12 | 13 | def setUntil(self, until): 14 | self.until = until 15 | return self 16 | 17 | def setQuerySearch(self, query): 18 | self.query = query 19 | return self 20 | 21 | def setMaxTweets(self, maxTweets): 22 | self.maxTweets = maxTweets 23 | return self 24 | 25 | def setLang(self, Lang): 26 | self.lang = Lang 27 | return self 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 BATUHAN\Batuhan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | .idea 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | env.bak/ 95 | venv.bak/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | 110 | -------------------------------------------------------------------------------- /templates/locations.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Location Analysis 7 | 8 | 12 | 13 | 14 |
15 | 16 | 17 | 18 | 19 |
20 |
21 | 22 | 23 | 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Twitter Intelligence 2 | 3 | A project written in Python for twitter tracking and analysis without using Twitter API. 4 | 5 | ## Prerequisites 6 | 14 | 15 | ## Database 16 | 17 | 24 | 25 | ## Usage Example 26 | 27 | Application work view: 28 | 29 | ![screen shot 2018-07-06 at 12 18 51](https://user-images.githubusercontent.com/17202632/42370978-d4ea95a6-8116-11e8-97c9-bd8bf0ac7299.png) 30 | 31 | 56 | 57 | 64 | 65 | ## Analysis 66 | 67 | analysis.py performs analysis processing. User, hashtag, and location analyzes are performed. 68 | 69 |
  • Get help:
  • 70 |
    71 |
    python3 analysis.py -h
    72 | 73 |
  • for location analysis
  • 74 |
    75 |
    python3 analysis py --location
    76 | 77 | ![map](https://user-images.githubusercontent.com/17202632/41524483-5baf98be-72e6-11e8-9130-c6db7380ae5d.png) 78 | 79 | location analysis runs through address http://localhost:5000/locations 80 | 81 | You must write Google Map Api Key in setting.py to display google map. 82 | 83 |
    GOOGLE_MAP_API_KEY='YOUR_GOOGLE_MAP_API_KEY'
    84 | 85 |
  • Runs hashtag analysis.
  • 86 |
    87 |
    python3 analysis.py --hashtag
    88 | 89 | ![hashtag](https://user-images.githubusercontent.com/17202632/43121336-135e21e6-8f26-11e8-93bd-16fe966f8aeb.png) 90 | 91 |
  • Runs user analysis.
  • 92 |
    93 |
    python3 analysis.py --user
    94 | 95 | 96 | ## Graphical User Interface 97 | If you want run gui application, you should change "#PyQt5==5.11.2" to "PyQt5==5.11.2" in requirements.txt and you can run the that command. 98 |
    pip3 install -r requirements.txt
    99 | 100 | socialgui.py used for gui application 101 | 102 | -------------------------------------------------------------------------------- /analysis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | 4 | import getopt 5 | import json 6 | import os.path 7 | import sqlite3 8 | import sys 9 | from collections import Counter 10 | 11 | import matplotlib.pyplot as pl 12 | import numpy as np 13 | from flask import Flask, render_template 14 | from geopy.geocoders import Nominatim 15 | from termcolor import colored 16 | 17 | import settings 18 | 19 | ROOT_DIR = os.path.dirname(os.pardir) 20 | db_path = os.path.join(ROOT_DIR, "TweetAnalysis.db") 21 | 22 | app = Flask(__name__) 23 | 24 | 25 | def main(argv): 26 | if len(argv) == 1 and argv[0] == '-h': 27 | print(""" 28 | [Analysis] 29 | 30 | [--location] for location analysis 31 | [--hashtag] for hashtag analysis 32 | [--user] for user location analysis 33 | """) 34 | 35 | return 36 | 37 | try: 38 | opts, args = getopt.getopt(argv, "", ("hashtag", "user", "location", "h")) 39 | 40 | for opt, arg in opts: 41 | if opt == '--location': 42 | port = settings.PORT 43 | app.run('127.0.0.1', port=port) 44 | elif opt == '--user': 45 | analysis_user() 46 | elif opt == '--hashtag': 47 | analysis_hashtag() 48 | else: 49 | print('Invalid selection.') 50 | except: 51 | print('You must pass some parameters. Use \"-h\" to help.') 52 | 53 | 54 | @app.route('/locations') 55 | def map(): 56 | location = location_analysis() 57 | api_key = settings.GOOGLE_MAP_API_KEY 58 | url = 'https://maps.googleapis.com/maps/api/js?key=' + api_key + '&libraries=visualization&callback=initMap' 59 | 60 | return render_template('locations.html', location=location, url=url) 61 | 62 | 63 | def analysis_user(): 64 | with sqlite3.connect(db_path) as db: 65 | conn = db 66 | c = conn.cursor() 67 | c.execute("SELECT username, count(*) as tekrar FROM Tweet group by username order by tekrar desc LIMIT 10") 68 | data = c.fetchall() 69 | ilk = [] 70 | y = [] 71 | i = 0 72 | for row in data: 73 | ilk.append(row[0]) 74 | y.append(row[1]) 75 | i = i + 1 76 | pl.figure(1) 77 | x = range(i) 78 | 79 | pl.bar(x, y, align='center') 80 | pl.xticks(x, ilk) 81 | # pl.plot(x, y, "-") 82 | pl.title('User - Tweet Count') 83 | pl.xlabel('Username') 84 | pl.ylabel('Tweet Count') 85 | print(colored("[INFO] Showing graph of user analysis", "green")) 86 | pl.show() 87 | 88 | 89 | def analysis_hashtag(): 90 | with sqlite3.connect(db_path) as db: 91 | conn = db 92 | c = conn.cursor() 93 | c.execute("SELECT hashtag from Tweet") 94 | hashtag_list = [] 95 | for row in c.fetchall(): 96 | if (row != ('',)): 97 | if " " in ''.join(row): 98 | for m in ''.join(row).split(' '): 99 | hashtag_list.append(m) 100 | else: 101 | signle_item = ''.join(row) 102 | hashtag_list.append(signle_item) 103 | 104 | counter = Counter(hashtag_list).most_common(10) 105 | pl.rcdefaults() 106 | 107 | keys = [] 108 | performance = [] 109 | 110 | for i in counter: 111 | performance.append(i[1]) 112 | keys.append(i[0]) 113 | 114 | pl.rcdefaults() 115 | y_pos = np.arange(len(keys)) 116 | error = np.random.rand(len(keys)) 117 | 118 | pl.barh(y_pos, performance, xerr=error, align='center', alpha=0.4, ) 119 | pl.yticks(y_pos, keys) 120 | pl.xlabel('quantity') 121 | pl.title('hashtags') 122 | print(colored("[INFO] Showing graph of hashtag analysis", "green")) 123 | pl.show() 124 | 125 | 126 | def location_analysis(): 127 | with sqlite3.connect(db_path) as db: 128 | conn = db 129 | c = conn.cursor() 130 | 131 | locxy = [] 132 | 133 | c.execute("Select place from location") 134 | loc_array = c.fetchall() 135 | 136 | # mapping 137 | geo_data = { 138 | 139 | "features": [] 140 | } 141 | for x in range(len(loc_array)): 142 | if (loc_array[x] != ''): 143 | geolocator = Nominatim() 144 | location = geolocator.geocode(loc_array[x]) 145 | locxy.append(location.latitude) 146 | locxy.append(location.longitude) 147 | 148 | geo_json_feature = { 149 | "lat": location.latitude, 150 | "lng": location.longitude 151 | } 152 | 153 | geo_data['features'].append(geo_json_feature) 154 | locxy.clear() 155 | 156 | json_location = json.dumps(geo_data) 157 | print(colored("[INFO] Showing graph of location analysis", "green")) 158 | return json_location 159 | 160 | 161 | if __name__ == '__main__': 162 | main(sys.argv[1:]) 163 | -------------------------------------------------------------------------------- /tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import getopt 5 | import parser 6 | import sqlite3 7 | import sys 8 | 9 | from termcolor import colored 10 | 11 | conn = sqlite3.connect('TweetAnalysis.db') 12 | conn.row_factory = lambda cursor, row: row[1] 13 | c = conn.cursor() 14 | hash_list = [] 15 | 16 | c.execute("CREATE TABLE IF NOT EXISTS Location (locationid, place)") 17 | c.execute("CREATE TABLE IF NOT EXISTS User (userid, username, locationid)") 18 | c.execute("CREATE TABLE IF NOT EXISTS Hashtag (hashtagid, content)") 19 | c.execute("CREATE TABLE IF NOT EXISTS HashtagTweet (hashtagid, tweetid)") 20 | c.execute( 21 | "CREATE TABLE IF NOT EXISTS Tweet (tweetid, text, username, hashtag, date, time, retweet, favorite, mention, userid, locationid)") 22 | 23 | 24 | def main(argv): 25 | # graph_data() 26 | if len(argv) == 0: 27 | print('You must pass some parameters. Use \"-h\" to help.') 28 | return 29 | 30 | if len(argv) == 1 and argv[0] == '-h': 31 | __tool_logo() 32 | print(""" 33 | \n""" + colored(" Examples:\n", "green") + """ 34 | """ + colored('# Get tweets by username\n', 'green') + 35 | """ 36 | python3 tracking.py --username "HaberSau"\n 37 | 38 | """ + colored('# Get tweets by query\n', 'green') + 39 | """ 40 | python3 tracking.py --query "sakarya"\n 41 | 42 | """ + colored('# Get twit at a specific date range\n', 'green') + """ 43 | python3 tracking.py --username "HaberSau" --since 2015-09-10 --until 2015-09-12 --maxtweets 10\n 44 | 45 | """ + colored(' # Get the last 10 top tweets by username\n', 'green') + """ 46 | python3 tracking.py --username "HaberSau" --maxtweets 10 --toptweets\n""") 47 | return 48 | location_value = False 49 | 50 | try: 51 | opts, args = getopt.getopt(argv, "", 52 | ("username=", "since=", "until=", "query=", "toptweets=", "maxtweets=", "location=")) 53 | 54 | tweet_criteria = parser.operation.TweetCriteria() 55 | 56 | for opt, arg in opts: 57 | if opt == '--username': 58 | tweet_criteria.username = arg 59 | elif opt == '--since': 60 | tweet_criteria.since = arg 61 | elif opt == '--until': 62 | tweet_criteria.until = arg 63 | elif opt == '--query': 64 | tweet_criteria.query = arg 65 | elif opt == '--toptweets': 66 | tweet_criteria.topTweets = True 67 | elif opt == '--maxtweets': 68 | tweet_criteria.maxTweets = int(arg) 69 | elif opt == '--location': 70 | location_value = bool(arg) 71 | print(location_value) 72 | 73 | __tool_logo() 74 | print('\n' + colored('[+] Searching...', 'green') + '\n') 75 | 76 | def receive_buffer(tweets): 77 | locationid = 1; 78 | hashtagid = 1; 79 | for t in tweets: 80 | hashtagstring = t.hashtags 81 | str = hashtagstring.split() 82 | 83 | for hash in str: 84 | hash_list.append(hash) 85 | params_hashtag = (hashtagid, hash) 86 | params_hashag_tweet = (hashtagid, t.id) 87 | if hash != "": 88 | hashtagid = hashtagid + 1 89 | c.execute("SELECT * FROM hashtag where content = '%s'" % hash) 90 | exits = c.fetchone() 91 | if exits is None: 92 | c.execute("SELECT hashtag FROM tweet ") 93 | 94 | c.execute("INSERT OR IGNORE INTO HashtagTweet VALUES (?,?)", params_hashag_tweet) 95 | c.execute("INSERT OR IGNORE INTO Hashtag VALUES (?,?)", params_hashtag) 96 | 97 | params_tweet = ( 98 | t.id, t.text, t.username, t.hashtags, t.date.strftime('%Y-%m-%d'), t.date.strftime('%H:%M'), 99 | t.retweets, 100 | t.favorites, t.mentions, t.user_id, locationid) 101 | 102 | c.execute("SELECT * FROM Tweet where tweetid ='%s'" % t.id) 103 | userexist = c.fetchone() 104 | if userexist is None: 105 | c.execute("INSERT INTO Tweet VALUES (?,?,?,?,?,?,?,?,?,?,?)", params_tweet) 106 | 107 | params_location = (locationid, t.geo) 108 | c.execute("SELECT * FROM location where place = '%s'" % t.geo) 109 | locationexist = c.fetchone() 110 | if locationexist is None and t.geo != '': 111 | c.execute("INSERT INTO Location VALUES(?,?)", params_location) 112 | locationid = locationid + 1 113 | 114 | c.execute("SELECT * FROM location where place = '%s'" % t.geo) 115 | locatuid = c.fetchone() 116 | params_user = (t.user_id, t.username, locatuid) 117 | c.execute("SELECT * FROM user where username ='%s'" % t.username) 118 | userexist = c.fetchone() 119 | if userexist is None: 120 | c.execute("INSERT OR IGNORE INTO User VALUES(?,?,?)", params_user) 121 | 122 | conn.commit() 123 | print(colored('\n[+] %d tweet received...\n' % len(tweets), 'green')) 124 | 125 | parser.operation.TweetManager.get_tweets(tweet_criteria, receive_buffer, location_search=location_value) 126 | 127 | except arg: 128 | print('You must pass some parameters. Use \"-h\" to help.' + arg) 129 | 130 | finally: 131 | print(colored('[+] Succesfully saved to the database.', 'green')) 132 | conn.close() 133 | 134 | 135 | def __tool_logo(): 136 | print(colored('''\n\t\t\033[1m 137 | ___________ .__.__ __ 138 | \__ ___/_ _ _|__|__|/ |_ ___________ 139 | | | \ \/ \/ / | \ __\/ __ \_ __ \ 140 | | | \ /| | || | \ ___/| | \/ 141 | |____| \/\_/ |__|__||__| \___ >__| 142 | .___ __ .__ .__ .__\/ 143 | | | _____/ |_ ____ | | | | |__| ____ ____ ____ ____ ____ 144 | | |/ \ __\/ __ \| | | | | |/ ___\_/ __ \ / \_/ ___\/ __ \ 145 | | | | \ | \ ___/| |_| |_| / /_/ > ___/| | \ \__\ ___/ 146 | |___|___| /__| \___ >____/____/__\___ / \___ >___| /\___ >___ > 147 | \/ \/ /_____/ \/ \/ \/ \/ 148 | /.\ 149 | Y \ 150 | / "L 151 | // "/ 152 | |/ /\_================== 153 | / / 154 | / / 155 | \/''', 'green')) 156 | 157 | 158 | if __name__ == '__main__': 159 | main(sys.argv[1:]) 160 | -------------------------------------------------------------------------------- /parser/operation/tweet_operation.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import http.cookiejar as cookielib 3 | import json 4 | import re 5 | import sys 6 | import urllib 7 | import urllib.request as urllib2 8 | 9 | import requests 10 | from lxml import html 11 | from pyquery import PyQuery 12 | from termcolor import colored 13 | 14 | from .. import model 15 | 16 | 17 | class TweetManager: 18 | 19 | def __init__(self): 20 | pass 21 | 22 | @staticmethod 23 | def get_tweets(tweet_criteria, receive_buffer=None, location_search=False, buffer_length=100, proxy=None): 24 | refresh_cursor = '' 25 | results = [] 26 | results_aux = [] 27 | cookiejar = cookielib.CookieJar() 28 | 29 | if hasattr(tweet_criteria, 'username') and ( 30 | tweet_criteria.username.startswith("\'") or tweet_criteria.username.startswith("\"")) and ( 31 | tweet_criteria.username.endswith("\'") or tweet_criteria.username.endswith("\"")): 32 | tweet_criteria.username = tweet_criteria.username[1:-1] 33 | 34 | active = True 35 | 36 | while active: 37 | try: 38 | json = TweetManager.get_json_response(tweet_criteria, refresh_cursor, cookiejar, proxy) 39 | if len(json['items_html'].strip()) == 0: 40 | break 41 | 42 | refresh_cursor = json['min_position'] 43 | scraped_tweets = PyQuery(json['items_html']) 44 | # Remove incomplete tweets withheld by Twitter Guidelines 45 | scraped_tweets.remove('div.withheld-tweet') 46 | tweets = scraped_tweets('div.js-stream-tweet') 47 | 48 | if len(tweets) == 0: 49 | break 50 | 51 | for tweet_html in tweets: 52 | tweetPQ = PyQuery(tweet_html) 53 | tweet = model.Tweet() 54 | 55 | username_tweet = tweetPQ("span:first.username.u-dir b").text() 56 | txt = re.sub(r"\s+", " ", tweetPQ("p.js-tweet-text").text()) 57 | txt = txt.replace('# ', '#') 58 | txt = txt.replace('@ ', '@') 59 | 60 | print(colored("@" + username_tweet + ": ", "red") + colored(txt, "green") + "\n") 61 | 62 | retweets = int(tweetPQ("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr( 63 | "data-tweet-stat-count").replace(",", "")) 64 | favorites = int(tweetPQ("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr( 65 | "data-tweet-stat-count").replace(",", "")) 66 | dateSec = int(tweetPQ("small.time span.js-short-timestamp").attr("data-time")) 67 | id = tweetPQ.attr("data-tweet-id") 68 | permalink = tweetPQ.attr("data-permalink-path") 69 | user_id = int(tweetPQ("a.js-user-profile-link").attr("data-user-id")) 70 | 71 | if location_search == True: 72 | page = requests.get('https://twitter.com/tubiity/status/' + id) 73 | script_geo = html.fromstring(page.content) 74 | location = script_geo.xpath('//a[@class="u-textUserColor js-nav js-geo-pivot-link"]/text()') 75 | sp_location = ','.join(location) 76 | tweet.geo = sp_location 77 | else: 78 | geo = '' 79 | tweet.geo = geo 80 | 81 | # user-information 82 | ''' If this code block is uncommented, application will be slower due to response time''' 83 | '''result = requests.get("https://twitter.com/" + username_tweet) 84 | c = result.content 85 | 86 | soup = BeautifulSoup(c, "html.parser") 87 | liste = [] 88 | samples = soup.find_all("a", 89 | "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor") 90 | # Follower, Follow and number of likes in list 91 | for a in samples: 92 | liste.append(a.attrs['title']) 93 | ''' 94 | 95 | tweet.id = id 96 | tweet.permalink = 'https://twitter.com' + permalink 97 | tweet.username = username_tweet 98 | tweet.text = txt 99 | tweet.date = datetime.datetime.fromtimestamp(dateSec) 100 | tweet.retweets = retweets 101 | tweet.favorites = favorites 102 | tweet.mentions = " ".join(re.compile('(@\\w*)').findall(tweet.text)) 103 | tweet.hashtags = " ".join(re.compile('(#\\w*)').findall(tweet.text)) 104 | tweet.user_id = user_id 105 | 106 | results.append(tweet) 107 | results_aux.append(tweet) 108 | 109 | if receive_buffer and len(results_aux) >= buffer_length: 110 | receive_buffer(results_aux) 111 | results_aux = [] 112 | 113 | if tweet_criteria.maxTweets > 0 and len(results) >= tweet_criteria.maxTweets: 114 | active = False 115 | break 116 | 117 | except: 118 | receive_buffer(results_aux) 119 | return 120 | 121 | if receive_buffer and len(results_aux) > 0: 122 | receive_buffer(results_aux) 123 | 124 | return results 125 | 126 | @staticmethod 127 | def get_json_response(tweet_criteria, refresh_cursor, cookiejar, proxy): 128 | url = "https://twitter.com/i/search/timeline?f=tweets&q=%s&src=typd&max_position=%s" 129 | 130 | url_data = '' 131 | 132 | if hasattr(tweet_criteria, 'username'): 133 | url_data += ' from:' + tweet_criteria.username 134 | 135 | if hasattr(tweet_criteria, 'query'): 136 | url_data += ' ' + tweet_criteria.query 137 | 138 | if hasattr(tweet_criteria, 'since'): 139 | url_data += ' since:' + tweet_criteria.since 140 | 141 | if hasattr(tweet_criteria, 'until'): 142 | url_data += ' until:' + tweet_criteria.until 143 | 144 | if hasattr(tweet_criteria, 'topTweets'): 145 | if tweet_criteria.topTweets: 146 | url = "https://twitter.com/i/search/timeline?q=%s&src=typd&max_position=%s" 147 | 148 | url = url % (urllib.parse.quote(url_data), urllib.parse.quote(refresh_cursor)) 149 | 150 | headers = [ 151 | ('Host', "twitter.com"), 152 | ('User-Agent', 153 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"), 154 | ('Accept', "application/json, text/javascript, */*; q=0.01"), 155 | ('Accept-Language', "de,en-US;q=0.7,en;q=0.3"), 156 | ('X-Requested-With', "XMLHttpRequest"), 157 | ('Referer', url), 158 | ('Connection', "keep-alive") 159 | ] 160 | 161 | if proxy: 162 | opener = urllib2.build_opener(urllib2.ProxyHandler({'http': proxy, 'https': proxy}), 163 | urllib2.HTTPCookieProcessor(cookiejar)) 164 | else: 165 | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) 166 | opener.addheaders = headers 167 | 168 | try: 169 | response = opener.open(url) 170 | json_response = response.read() 171 | except: 172 | print 173 | "Twitter weird response. Try to see on browser: https://twitter.com/search?q=%s&src=typd" % urllib.parse.quote( 174 | url_data) 175 | sys.exit() 176 | return 177 | 178 | data = json.loads(json_response) 179 | 180 | return data 181 | -------------------------------------------------------------------------------- /gui/socialgui.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sqlite3 4 | import sys 5 | 6 | from PyQt5 import QtWidgets 7 | 8 | import analysis 9 | import parser 10 | 11 | conn = sqlite3.connect('../TweetAnalysis.db') 12 | conn.row_factory = lambda cursor, row: row[1] 13 | c = conn.cursor() 14 | from PyQt5.QtWidgets import QApplication, QMainWindow 15 | from PyQt5 import QtCore, QtGui 16 | 17 | try: 18 | _fromUtf8 = QtCore.QString.fromUtf8 19 | except AttributeError: 20 | def _fromUtf8(s): 21 | return s 22 | 23 | try: 24 | _encoding = QtWidgets.QApplication.UnicodeUTF8 25 | 26 | 27 | def _translate(context, text, disambig): 28 | return QtWidgets.QApplication.translate(context, text, disambig, _encoding) 29 | except AttributeError: 30 | def _translate(context, text, disambig): 31 | return QtWidgets.QApplication.translate(context, text, disambig) 32 | 33 | 34 | class Ui_MainWindow(object): 35 | def setupUi(self, So): 36 | So.setObjectName(_fromUtf8("So")) 37 | So.resize(735, 600) 38 | icon = QtGui.QIcon() 39 | icon.addPixmap(QtGui.QPixmap(_fromUtf8("../.designer/backup/socailMediaTrends.jpg")), QtGui.QIcon.Normal, 40 | QtGui.QIcon.Off) 41 | So.setWindowIcon(icon) 42 | So.setStyleSheet(_fromUtf8("background-color:rgb(16, 170, 5)")) 43 | self.centralwidget = QtWidgets.QWidget(So) 44 | self.centralwidget.setObjectName(_fromUtf8("centralwidget")) 45 | self.pushButton = QtWidgets.QPushButton(self.centralwidget) 46 | self.pushButton.setGeometry(QtCore.QRect(360, 380, 99, 27)) 47 | font = QtGui.QFont() 48 | font.setPointSize(12) 49 | font.setBold(True) 50 | font.setWeight(75) 51 | self.pushButton.setFont(font) 52 | self.pushButton.setStyleSheet(_fromUtf8("background-color:rgb(65, 65, 65); color:rgb(255, 255, 255);")) 53 | self.pushButton.setObjectName(_fromUtf8("pushButton")) 54 | self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget) 55 | self.pushButton_2.setGeometry(QtCore.QRect(490, 380, 99, 27)) 56 | font = QtGui.QFont() 57 | font.setPointSize(12) 58 | font.setBold(True) 59 | font.setWeight(75) 60 | self.pushButton_2.setFont(font) 61 | self.pushButton_2.setStyleSheet(_fromUtf8("background-color:rgb(65, 65, 65); color:rgb(255, 255, 255);")) 62 | self.pushButton_2.setObjectName(_fromUtf8("pushButton_2")) 63 | self.lineEdit = QtWidgets.QLineEdit(self.centralwidget) 64 | self.lineEdit.setGeometry(QtCore.QRect(480, 100, 113, 27)) 65 | self.lineEdit.setStyleSheet(_fromUtf8("background-color:rgb(255, 255, 255)")) 66 | self.lineEdit.setObjectName(_fromUtf8("lineEdit")) 67 | self.lineEdit_2 = QtWidgets.QLineEdit(self.centralwidget) 68 | self.lineEdit_2.setGeometry(QtCore.QRect(480, 150, 113, 27)) 69 | self.lineEdit_2.setStyleSheet(_fromUtf8("background-color:rgb(255, 255, 255)")) 70 | self.lineEdit_2.setObjectName(_fromUtf8("lineEdit_2")) 71 | self.label = QtWidgets.QLabel(self.centralwidget) 72 | self.label.setGeometry(QtCore.QRect(360, 100, 68, 17)) 73 | self.label.setObjectName(_fromUtf8("label")) 74 | self.label_2 = QtWidgets.QLabel(self.centralwidget) 75 | self.label_2.setGeometry(QtCore.QRect(360, 150, 71, 17)) 76 | self.label_2.setObjectName(_fromUtf8("label_2")) 77 | self.label_3 = QtWidgets.QLabel(self.centralwidget) 78 | self.label_3.setGeometry(QtCore.QRect(360, 200, 90, 17)) 79 | self.label_3.setObjectName(_fromUtf8("label_3")) 80 | self.label_4 = QtWidgets.QLabel(self.centralwidget) 81 | self.label_4.setGeometry(QtCore.QRect(360, 260, 90, 17)) 82 | self.label_4.setObjectName(_fromUtf8("label_4")) 83 | self.label_5 = QtWidgets.QLabel(self.centralwidget) 84 | self.label_5.setGeometry(QtCore.QRect(360, 320, 68, 17)) 85 | self.label_5.setObjectName(_fromUtf8("label_5")) 86 | self.lineEdit_5 = QtWidgets.QLineEdit(self.centralwidget) 87 | self.lineEdit_5.setGeometry(QtCore.QRect(480, 320, 113, 27)) 88 | self.lineEdit_5.setStyleSheet(_fromUtf8("background-color:rgb(255, 255, 255)")) 89 | self.lineEdit_5.setStyleSheet(_fromUtf8("background-color:rgb(255, 255, 255)")) 90 | self.label.setStyleSheet(_fromUtf8("color:white")) 91 | self.label_2.setStyleSheet(_fromUtf8("color:white")) 92 | self.label_3.setStyleSheet(_fromUtf8("color:white")) 93 | self.label_4.setStyleSheet(_fromUtf8("color:white")) 94 | self.label_5.setStyleSheet(_fromUtf8("color:white")) 95 | self.lineEdit_5.setStyleSheet(_fromUtf8("background-color:rgb(255, 255, 255)")) 96 | self.lineEdit_5.setObjectName(_fromUtf8("lineEdit_5")) 97 | self.lineEdit_3 = QtWidgets.QLineEdit(self.centralwidget) 98 | self.lineEdit_3.setGeometry(QtCore.QRect(480, 200, 113, 27)) 99 | self.lineEdit_3.setStyleSheet(_fromUtf8("background-color:rgb(255, 255, 255)")) 100 | self.lineEdit_3.setObjectName(_fromUtf8("lineEdit_3")) 101 | self.lineEdit_4 = QtWidgets.QLineEdit(self.centralwidget) 102 | self.lineEdit_4.setGeometry(QtCore.QRect(480, 260, 113, 27)) 103 | self.lineEdit_4.setStyleSheet(_fromUtf8("background-color:rgb(255, 255, 255)")) 104 | self.lineEdit_4.setObjectName(_fromUtf8("lineEdit_4")) 105 | self.label_6 = QtWidgets.QLabel(self.centralwidget) 106 | self.label_6.setGeometry(QtCore.QRect(120, 20, 571, 51)) 107 | font = QtGui.QFont() 108 | font.setPointSize(23) 109 | font.setBold(True) 110 | font.setUnderline(False) 111 | font.setWeight(75) 112 | font.setStrikeOut(False) 113 | self.label_6.setFont(font) 114 | self.label_6.setStyleSheet(_fromUtf8("background-color:rgb(16, 170, 5); color: white;")) 115 | self.label_6.setObjectName(_fromUtf8("label_6")) 116 | self.pic_label = QtWidgets.QLabel(self.centralwidget) 117 | self.pic_label.setGeometry(QtCore.QRect(40, 70, 291, 361)) 118 | self.pic_label.setPixmap(QtGui.QPixmap("../" + "/images/app_icon.png")) 119 | 120 | self.pic_label.setObjectName(_fromUtf8("pic_label")) 121 | So.setCentralWidget(self.centralwidget) 122 | self.menubar = QtWidgets.QMenuBar(So) 123 | self.menubar.setGeometry(QtCore.QRect(0, 0, 735, 25)) 124 | self.menubar.setObjectName(_fromUtf8("menubar")) 125 | self.menuFile = QtWidgets.QMenu(self.menubar) 126 | self.menuFile.setObjectName(_fromUtf8("menuFile")) 127 | self.menuHelp = QtWidgets.QMenu(self.menubar) 128 | self.menuHelp.setObjectName(_fromUtf8("menuHelp")) 129 | So.setMenuBar(self.menubar) 130 | self.statusbar = QtWidgets.QStatusBar(So) 131 | self.statusbar.setObjectName(_fromUtf8("statusbar")) 132 | So.setStatusBar(self.statusbar) 133 | self.actionExit = QtWidgets.QAction(So) 134 | self.actionExit.setObjectName(_fromUtf8("actionExit")) 135 | self.actionAnalysis = QtWidgets.QAction(So) 136 | self.actionAnalysis.setObjectName(_fromUtf8("actionAnalysis")) 137 | self.actionExit_2 = QtWidgets.QAction(So) 138 | self.actionExit_2.setObjectName(_fromUtf8("actionExit_2")) 139 | self.pushButton_3 = QtWidgets.QAction(So) 140 | icon1 = QtGui.QIcon() 141 | icon1.addPixmap(QtGui.QPixmap(_fromUtf8(":/newPrefix/socailMediaTrends.jpg")), QtGui.QIcon.Normal, 142 | QtGui.QIcon.Off) 143 | self.pushButton_3.setIcon(icon1) 144 | self.pushButton_3.setObjectName(_fromUtf8("pushButton_3")) 145 | self.menuFile.addAction(self.actionExit) 146 | self.menuFile.addAction(self.actionAnalysis) 147 | self.menuFile.addAction(self.actionExit_2) 148 | self.menuFile.addSeparator() 149 | self.menuFile.addSeparator() 150 | self.menubar.addAction(self.menuFile.menuAction()) 151 | self.menubar.addAction(self.menuHelp.menuAction()) 152 | 153 | self.retranslateUi(So) 154 | QtCore.QMetaObject.connectSlotsByName(So) 155 | 156 | def retranslateUi(self, So): 157 | So.setWindowTitle(_translate("So", "Social Network Tracking And Analysis", None)) 158 | self.pushButton.setText(_translate("So", "Tracking", None)) 159 | self.pushButton.clicked.connect(self.tracking) # When the button is pressed 160 | 161 | self.pushButton_2.setText(_translate("So", "Analysis", None)) 162 | self.pushButton_2.clicked.connect(self.analysis_tweet) # When the button is pressed 163 | 164 | self.label.setText(_translate("So", "Query:", None)) 165 | self.label_2.setText(_translate("So", "Username:", None)) 166 | self.label_3.setText(_translate("So", "Since:(y-d-m)", None)) 167 | self.label_4.setText(_translate("So", "Until:(y-d-m)", None)) 168 | self.label_5.setText(_translate("So", "Quantity:", None)) 169 | self.label_6.setText(_translate("So", "Social Network Tracking And Analysis", None)) 170 | self.pic_label.setText(_translate("So", "", None)) 171 | self.menuFile.setTitle(_translate("So", "File", None)) 172 | self.menuHelp.setTitle(_translate("So", "Help", None)) 173 | self.actionExit.setText(_translate("So", "Tracking", None)) 174 | self.actionAnalysis.setText(_translate("So", "Analysis", None)) 175 | self.actionExit_2.setText(_translate("So", "Exit", None)) 176 | self.pushButton_3.setText(_translate("So", "deneme", None)) 177 | 178 | def analysis_tweet(self): 179 | analysis.analysis_graph() 180 | 181 | def tracking(self): 182 | tweet_criteria = parser.operation.TweetCriteria() 183 | 184 | tweet_criteria.username = self.lineEdit_2.text() 185 | tweet_criteria.query = self.lineEdit.text() 186 | if (self.lineEdit_3.text() != ""): 187 | tweet_criteria.since = self.lineEdit_3.text() 188 | if (self.lineEdit_4.text() != ""): 189 | tweet_criteria.until = self.lineEdit_4.text() 190 | if (self.lineEdit_5.text() != ""): 191 | tweet_criteria.maxTweets = int(self.lineEdit_5.text()) 192 | 193 | print('Searching...\n') 194 | 195 | def receiveBuffer(tweets): 196 | locationid = 1; 197 | hashtagid = 1; 198 | for t in tweets: 199 | hashtagstring = t.hashtags 200 | # userchefck = t.username 201 | str = hashtagstring.split() 202 | # print(usercheck) 203 | # serstr=usercheck.split() 204 | 205 | for hash in str: 206 | # hash_list.append(hash) 207 | paramsHashtag = (hashtagid, hash) 208 | paramsHashagTweet = (hashtagid, t.id) 209 | if hash != "": 210 | hashtagid = hashtagid + 1 211 | c.execute("SELECT * FROM hashtag where content = '%s'" % hash) 212 | # aynı içeriğin olup olmama kontrolü 213 | exits = c.fetchone() 214 | if exits is None: 215 | c.execute("SELECT hashtag FROM tweet ") 216 | 217 | c.execute("INSERT OR IGNORE INTO HashtagTweet VALUES (?,?)", paramsHashagTweet) 218 | c.execute("INSERT OR IGNORE INTO Hashtag VALUES (?,?)", paramsHashtag) 219 | 220 | a = t.date.strftime('%H:%M') 221 | 222 | paramsTweet = ( 223 | t.id, t.text, t.username, t.hashtags, t.date.strftime('%Y-%m-%d'), t.date.strftime('%H:%M'), 224 | t.retweets, 225 | t.favorites, t.mentions, t.user_id, locationid) 226 | 227 | c.execute("SELECT * FROM Tweet where tweetid ='%s'" % t.id) 228 | userexist = c.fetchone() 229 | if userexist is None: 230 | c.execute("INSERT INTO Tweet VALUES (?,?,?,?,?,?,?,?,?,?,?)", paramsTweet) 231 | 232 | # aynı içeriğin olup olmama kontrolü 233 | 234 | # geolocator = Nominatim() 235 | # location = geolocator.geocode("") 236 | # print(location) 237 | paramsLocation = (locationid, t.geo) 238 | c.execute("SELECT * FROM location where place = '%s'" % t.geo) 239 | locationexist = c.fetchone() 240 | if locationexist is None and t.geo != '': 241 | c.execute("INSERT INTO Location VALUES(?,?)", paramsLocation) 242 | locationid = locationid + 1 243 | 244 | c.execute("SELECT *FROM location where place = '%s'" % t.geo) 245 | locatuid = c.fetchone() 246 | paramsUser = (t.user_id, t.username, locatuid, t.follow, t.follower) 247 | c.execute("SELECT * FROM user where username ='%s'" % t.username) 248 | userexist = c.fetchone() 249 | if userexist is None: 250 | c.execute("INSERT OR IGNORE INTO User VALUES(?,?,?,?,?)", paramsUser) 251 | 252 | conn.commit() 253 | print('Veritabanına %d tweet daha kaydedildi...\n' % len(tweets)) 254 | 255 | parser.operation.TweetManager.get_tweets(tweet_criteria, receiveBuffer) 256 | 257 | 258 | if __name__ == "__main__": 259 | app = QApplication(sys.argv) 260 | MainWindow = QMainWindow() 261 | ui = Ui_MainWindow() 262 | ui.setupUi(MainWindow) 263 | MainWindow.show() 264 | sys.exit(app.exec_()) 265 | --------------------------------------------------------------------------------