├── README.md ├── requirements.txt └── stocktalk ├── listen.py ├── scripts ├── mongio.py └── streaming.py ├── app.py ├── static └── js │ ├── helpers.js │ └── charting.js └── templates └── index.html /README.md: -------------------------------------------------------------------------------- 1 | ## Deprecated 2 | 3 | This project is no longer maintained. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask>=0.12.2 2 | pymongo>=3.6.0 3 | nltk>=3.2.2 4 | tornado>=4.4.2 5 | tweepy>=3.5.0 6 | twython>=3.4.0 -------------------------------------------------------------------------------- /stocktalk/listen.py: -------------------------------------------------------------------------------- 1 | from scripts import settings, streaming 2 | 3 | # Query keys categorize tweets 4 | # Each key or category corresponds to an array of keywords 5 | queries = {'ETH': ['ETH', 'Ethereum'], 6 | 'LTC': ['LTC', 'Litecoin'], 7 | 'BTC': ['BTC', 'Bitcoin'], 8 | 'XRP': ['XRP', 'Ripple'], 9 | 'XLM': ['XLM', 'Stellar']} 10 | 11 | # Aggregate volume and sentiment every 15 minutes 12 | refresh = 15*60 13 | 14 | streaming.streamer(settings.credentials, 15 | queries, 16 | refresh, 17 | sentiment=True, 18 | debug=True) 19 | -------------------------------------------------------------------------------- /stocktalk/scripts/mongio.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | import json 3 | import sys 4 | 5 | # Local Files 6 | sys.path.append("..") 7 | from scripts import settings 8 | 9 | client = pymongo.MongoClient(settings.mongo_server, settings.mongo_id) 10 | db = client[settings.mongo_client] 11 | db.authenticate(settings.mongo_user, settings.mongo_pass) 12 | 13 | def push(query, datatype, data): 14 | d = db.logs.find_one({'query': query}) 15 | if d is not None: 16 | data = json.loads(d[datatype])+[data] 17 | d[datatype] = json.dumps(data) 18 | db.logs.save(d) 19 | else: 20 | db.logs.insert_one({'query': query, datatype: json.dumps([data])}) 21 | 22 | def load(query, datatype): 23 | d = db.logs.find_one({'query': query}) 24 | return json.loads(d[datatype]) -------------------------------------------------------------------------------- /stocktalk/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template 2 | from json import loads 3 | from scripts import mongio 4 | 5 | app = Flask(__name__) 6 | 7 | # ======== Routing =========================================================== # 8 | 9 | @app.route('/', methods=['GET']) 10 | def index(): 11 | volume, sentiment = {}, {} 12 | for document in mongio.db.logs.find(): 13 | logs = loads(document['logs']) 14 | t = [i['timestamp'] for i in logs] 15 | v = [i['volume'] for i in logs] 16 | s = [i['sentiment'] for i in logs] 17 | volume[document['query']] = [{'x': i, 'y': j} for i, j in zip(t, v)] 18 | sentiment[document['query']] = [{'x': i, 'y': j} for i, j in zip(t, s)] 19 | 20 | return render_template('index.html', v=volume, s=sentiment) 21 | 22 | # ======== Main ============================================================== # 23 | 24 | if __name__ == "__main__": 25 | app.run(debug=True) -------------------------------------------------------------------------------- /stocktalk/static/js/helpers.js: -------------------------------------------------------------------------------- 1 | /* 2 | Generates random data for testing volume chart 3 | @param {number} number of datapoints 4 | @param {date} optional date object specifying start date 5 | @return {array} simulated volume data 6 | */ 7 | function randomVolume(n, startDate) { 8 | startDate = startDate || new Date(); 9 | var startYear = startDate.getUTCFullYear(); 10 | var startMonth = startDate.getUTCMonth(); 11 | var startDay = startDate.getUTCDate(); 12 | var toReturn = new Array(n); 13 | var base = Math.random()*100 14 | for (var i = 0; i < n; i++) { 15 | draw = Math.random() 16 | toReturn[i] = { 17 | x: new Date(Date.UTC(startYear, startMonth, startDay + i/100)), 18 | y: draw > 0.9 && draw < 0.92 ? base + Math.random() * 100 : base + Math.random() * 30 19 | }; 20 | }; 21 | return toReturn; 22 | } 23 | 24 | /* 25 | Generates random data for testing sentiment chart 26 | @param {number} number of datapoints 27 | @param {date} optional date object specifying start date 28 | @return {array} simulated sentiment data 29 | */ 30 | function randomSentiment(n, startDate) { 31 | startDate = startDate || new Date(); 32 | var startYear = startDate.getUTCFullYear(); 33 | var startMonth = startDate.getUTCMonth(); 34 | var startDay = startDate.getUTCDate(); 35 | var toReturn = new Array(n); 36 | for (var i = 0; i < n; i++) { 37 | toReturn[i] = { 38 | x: new Date(Date.UTC(startYear, startMonth, startDay + i/100)), 39 | y: Math.random() > 0.5 ? Math.random()/1.5 : -1 * Math.random()/1.5 40 | }; 41 | }; 42 | return toReturn; 43 | } 44 | 45 | /* 46 | Sums an array of numbers 47 | @param {array} 48 | @return {number} sum 49 | */ 50 | function sum(array) { 51 | total = 0; 52 | for (var i = 0; i < array.length; i++) { 53 | total += array[i] 54 | } 55 | return total; 56 | } 57 | 58 | /* 59 | Averages an array of numbers 60 | @param {array} 61 | @return {number} average 62 | */ 63 | function avg(array) { 64 | if (array.length == 0){ 65 | return 0 66 | } 67 | total = 0; 68 | for (var i = 0; i < array.length; i++) { 69 | total += array[i] 70 | } 71 | return total/array.length; 72 | } 73 | 74 | /* 75 | Averages an array of numbers 76 | @param {array} 77 | @return {number} average 78 | */ 79 | function ms(n, tf) { 80 | if (tf == 's') { 81 | return n*1000 82 | } 83 | if (tf == 'm') { 84 | return n*60*1000 85 | } 86 | if (tf == 'h') { 87 | return n*60*60*1000 88 | } 89 | if (tf == 'd') { 90 | return n*24*60*60*1000 91 | } 92 | if (tf == 'w') { 93 | return n*7*24*60*60*1000 94 | } 95 | } 96 | 97 | /* 98 | Converts lower timeframe timeseries data into a higher timeframe 99 | @param {array} An array of dictionaries e.g 100 | data = [{x: Date Object, y: 603}, 101 | {x: Date Object, y: 423}, 102 | {x: Date Object, y: 552}, 103 | ... ] 104 | @param {number} How many sec, min, hour, day, week? 105 | @param {number} And of what higher timeframe? 106 | @param {function} Use sum for volume and avg for sentiment 107 | @return {array} Resampled data 108 | */ 109 | function resample(data, n, tf, method) { 110 | resampled = [] 111 | for (var i = 0; i < data.length; i++) { 112 | aggregated_stamps = [data[i]['x']] 113 | aggregated_values = [data[i]['y']] 114 | try { 115 | while (data[i+1]['x'].getTime() < aggregated_stamps[0].getTime()+ms(n, tf)) { 116 | aggregated_stamps.push(data[i+1]['x']) 117 | aggregated_values.push(data[i+1]['y']) 118 | i ++ 119 | if (i > data.length) { 120 | break 121 | } 122 | } 123 | } catch(TypeError) { 124 | resampled.push({'x': aggregated_stamps[0], 'y': method(aggregated_values)}) 125 | break 126 | } 127 | resampled.push({'x': aggregated_stamps[0], 'y': method(aggregated_values)}) 128 | } 129 | return resampled 130 | } -------------------------------------------------------------------------------- /stocktalk/static/js/charting.js: -------------------------------------------------------------------------------- 1 | /* 2 | Adapted from http://plottablejs.org/examples/finance/ 3 | 4 | What this does is it takes an array of data and looks for a 5 | div element with the passed id. It creates an svg element 6 | inside of the div element where the chart is then created. 7 | 8 | This is how the charts are destroyed and created interactively, 9 | through destroying and creating this svg element. 10 | */ 11 | function newChart(data, id) { 12 | $("#"+id+"-placeholder").append('') 13 | var xScale = new Plottable.Scales.Time(); 14 | var xAxis = new Plottable.Axes.Numeric(xScale, "bottom"); 15 | xAxis.formatter(Plottable.Formatters.multiTime()); 16 | var yScale = new Plottable.Scales.Linear(); 17 | var yAxis = new Plottable.Axes.Numeric(yScale, "left"); 18 | var colorScale = new Plottable.Scales.Color(); 19 | var series = new Plottable.Dataset(data, { name: "series" }); 20 | var plot = new Plottable.Plots.Line(); 21 | plot.x(function(d) { return d.x; }, xScale).y(function(d) { return d.y; }, yScale); 22 | plot.attr("stroke", function(d, i, dataset) { return dataset.metadata().name; }, colorScale); 23 | plot.addDataset(series) 24 | plot.autorangeMode("y"); 25 | var sparklineXScale = new Plottable.Scales.Time(); 26 | var sparklineXAxis = new Plottable.Axes.Time(sparklineXScale, "bottom"); 27 | var sparklineYScale = new Plottable.Scales.Linear(); 28 | var sparkline = new Plottable.Plots.Line(); 29 | sparkline.x(function(d) { return d.x; }, sparklineXScale).y(function(d) { return d.y; }, sparklineYScale); 30 | sparkline.attr("stroke", function(d, i, dataset) { return dataset.metadata().name; }, colorScale); 31 | sparkline.addDataset(series); 32 | var dragBox = new Plottable.Components.XDragBoxLayer(); 33 | dragBox.resizable(true); 34 | dragBox.onDrag(function(bounds) { 35 | var min = sparklineXScale.invert(bounds.topLeft.x); 36 | var max = sparklineXScale.invert(bounds.bottomRight.x); 37 | xScale.domain([min, max]); 38 | }); 39 | dragBox.onDragEnd(function(bounds) { 40 | if (bounds.topLeft.x === bounds.bottomRight.x) { 41 | xScale.domain(sparklineXScale.domain()); 42 | } 43 | }); 44 | xScale.onUpdate(function() { 45 | dragBox.boxVisible(true); 46 | var xDomain = xScale.domain(); 47 | dragBox.bounds({ 48 | topLeft: { x: sparklineXScale.scale(xDomain[0]), y: null }, 49 | bottomRight: { x: sparklineXScale.scale(xDomain[1]), y: null } 50 | }); 51 | }); 52 | var miniChart = new Plottable.Components.Group([sparkline, dragBox]); 53 | var pzi = new Plottable.Interactions.PanZoom(xScale, null); 54 | pzi.attachTo(plot); 55 | var output = d3.select("#hoverFeedback"); 56 | var outputDefaultText = "Closest:" 57 | output.text(outputDefaultText); 58 | var chart = new Plottable.Components.Table([ 59 | [yAxis, plot ], 60 | [null , xAxis ], 61 | [null , miniChart ], 62 | [null , sparklineXAxis] 63 | ]); 64 | chart.rowWeight(2, 0.2); 65 | chart.renderTo('#'+id); 66 | var crosshair = createCrosshair(plot); 67 | var pointer = new Plottable.Interactions.Pointer(); 68 | pointer.onPointerMove(function(p) { 69 | var nearestEntity = plot.entityNearest(p); 70 | if (nearestEntity.datum == null) { 71 | return; 72 | } 73 | crosshair.drawAt(nearestEntity.position); 74 | var datum = nearestEntity.datum; 75 | output.text("Closest: (" + datum.x.toLocaleString() + ", " + datum.y.toFixed(2) + ")"); 76 | }); 77 | pointer.onPointerExit(function() { 78 | crosshair.hide(); 79 | output.text(outputDefaultText); 80 | }); 81 | pointer.attachTo(plot); 82 | function createCrosshair(plot) { 83 | var crosshair = {}; 84 | var crosshairContainer = plot.foreground().append("g").style("visibility", "hidden"); 85 | crosshair.vLine = crosshairContainer.append("line") 86 | .attr("stroke", "black") 87 | .attr("y1", 0) 88 | .attr("y2", plot.height()); 89 | crosshair.circle = crosshairContainer.append("circle") 90 | .attr("stroke", "black") 91 | .attr("fill", "white") 92 | .attr("r", 3); 93 | crosshair.drawAt = function(p) { 94 | crosshair.vLine.attr({ 95 | x1: p.x, 96 | x2: p.x 97 | }); 98 | crosshair.circle.attr({ 99 | cx: p.x, 100 | cy: p.y 101 | }); 102 | crosshairContainer.style("visibility", "visible"); 103 | } 104 | crosshair.hide = function() { 105 | crosshairContainer.style("visibility", "hidden"); 106 | } 107 | return crosshair; 108 | } 109 | } -------------------------------------------------------------------------------- /stocktalk/scripts/streaming.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | import time 4 | import copy 5 | import threading 6 | import codecs 7 | import tweepy 8 | from tweepy.api import API 9 | 10 | # Special Exceptions 11 | from requests.exceptions import Timeout 12 | from requests.exceptions import ConnectionError 13 | from requests.packages.urllib3.exceptions import ReadTimeoutError 14 | 15 | # Local Files 16 | sys.path.append("..") 17 | from scripts import mongio 18 | 19 | def get_tracker(queries): 20 | return {query: {'volume': 0, 'scores': []} for query in queries} 21 | 22 | def get_reverse(queries): 23 | reverse = {} 24 | for query in queries: 25 | for keyword in queries[query]: 26 | reverse[keyword] = query 27 | return reverse 28 | 29 | def elapsed_time(start): 30 | return (time.time()-start) 31 | 32 | def process(text): 33 | text = re.sub("[0-9]+", "number", text) 34 | text = re.sub("#", "", text) 35 | text = re.sub("\n", "", text) 36 | text = re.sub("$[^\s]+", "", text) 37 | text = re.sub("@[^\s]+", "", text) 38 | text = re.sub("(http|https)://[^\s]*", "", text) 39 | text = re.sub("[^\s]+@[^\s]+", "", text) 40 | text = re.sub('[^a-z A-Z]+', '', text) 41 | return text 42 | 43 | class Listener(tweepy.StreamListener): 44 | 45 | def __init__(self, auth, queries, refresh, sentiment=False, debug=False): 46 | self.api = tweepy.API(auth) 47 | self.queries = queries.keys() 48 | self.refresh = refresh 49 | self.sentiment = sentiment 50 | self.processing = False 51 | self.timer = time.time() 52 | self.debug = debug 53 | self.reverse = get_reverse(queries) 54 | self.tracker = get_tracker(self.queries) 55 | 56 | def process(self): 57 | # Reset timer 58 | self.timer = time.time() 59 | 60 | # Copy tracking data to temporary tracker 61 | previous_tracker = copy.deepcopy(self.tracker) 62 | self.tracker = get_tracker(self.queries) 63 | 64 | # Update database 65 | for query in previous_tracker: 66 | 67 | if self.sentiment: 68 | scores = previous_tracker[query]['scores'] 69 | try: 70 | sentiment = round(sum(scores)/len(scores) ,2) 71 | except ZeroDivisionError: 72 | sentiment = 0 73 | else: 74 | sentiment = 0 75 | 76 | volume = previous_tracker[query]['volume'] 77 | timestamp = time.strftime('%m/%d/%Y %H:%M:%S') 78 | 79 | mongio.push(query, 'logs', {'timestamp' : timestamp, 80 | 'volume' : volume, 81 | 'sentiment' : sentiment}) 82 | 83 | if self.debug: 84 | print('Query', query) 85 | print('Timestamp', timestamp) 86 | print('Volume', volume) 87 | print('Sentiment', sentiment) 88 | print('-------\n') 89 | 90 | self.processing = False 91 | 92 | def on_status(self, status): 93 | original_tweet = status.text 94 | 95 | # For every incoming tweet... 96 | for query in self.queries: 97 | if query.lower() in original_tweet.lower(): 98 | 99 | # Categorize tweet 100 | lookup = self.reverse[query] 101 | 102 | # Increment count 103 | self.tracker[lookup]['volume'] += 1 104 | 105 | # Sentiment analysis 106 | if self.sentiment: 107 | processed_tweet = process(original_tweet.lower()) 108 | score = SentimentIntensityAnalyzer().polarity_scores(processed_tweet)['compound'] 109 | self.tracker[lookup]['scores'].append(score) 110 | 111 | # Check refresh 112 | if elapsed_time(self.timer) >= self.refresh: 113 | if not self.processing: 114 | self.processing = True 115 | processing_thread = threading.Thread(target=self.process) 116 | processing_thread.start() 117 | return True 118 | 119 | def on_error(self, status_code): 120 | print("{0} Error: {1}\n".format(time.strftime('%m/%d/%Y %H:%M:%S'), status_code)) 121 | if status_code == 413 or status_code == 420 or status_code == 503: 122 | return False 123 | return True # To continue listening 124 | 125 | def on_timeout(self): 126 | print("Timeout...") 127 | return True # To continue listening 128 | 129 | # Streaming -------------------------------------------------- 130 | 131 | def streamer(credentials, queries, refresh, sentiment=False, debug=False): 132 | keywords = [i for j in queries.values() for i in j] 133 | 134 | # User Error Checks 135 | if len(queries) <= 0: print("Error: You must include at least one query."); return 136 | if len(queries) >= 10: print("Warning: Fewer than ten query recommended.") 137 | if len(keywords) <= 0: print("Error: You must include at least one keyword."); return 138 | if len(keywords) >= 20: print("Warning: Fewer than twenty keywords recommended.") 139 | if refresh <= 0: print("Error: Refresh rate must be greater than 0"); return 140 | 141 | auth = tweepy.OAuthHandler(credentials[0], credentials[1]) 142 | auth.set_access_token(credentials[2], credentials[3]) 143 | 144 | if sentiment: 145 | global SentimentIntensityAnalyzer 146 | from nltk.sentiment.vader import SentimentIntensityAnalyzer 147 | 148 | while True: 149 | 150 | # Start streaming ----------------------------- 151 | try: 152 | print("Streaming Now...") 153 | listener = Listener(auth, queries, refresh, sentiment, debug) 154 | stream = tweepy.Stream(auth, listener) 155 | stream.filter(track=keywords) 156 | 157 | except (Timeout, ConnectionError, ReadTimeoutError): 158 | print("{0} Error: Connection Dropped\n".format(time.strftime('%m/%d/%Y %H:%M:%S'))) 159 | print("Re-establishing Connection...") 160 | 161 | time.sleep((15*60)+1) # Wait at least 15 minutes before restarting listener 162 | 163 | # --------------------------------------------- 164 | -------------------------------------------------------------------------------- /stocktalk/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | 5 |