├── templates ├── layout.html └── cloud.html ├── README.md ├── static ├── style.css └── jquery.js └── newsApp.py /templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | News Visualization 3 | 4 | 5 | 6 | 7 | 8 |
9 | {% block body %}{% endblock %} 10 |
11 | 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # news-visualization 2 | Watson Discovery News Visualization 3 | 4 | A [D3.js](https://d3js.org/) visualization using the pre-enriched news content avaialable as part of the [Watson Discovery Service](https://www.ibm.com/watson/developercloud/discovery.html) 5 | 6 | To get started, provision an instance of Discovery on [Bluemix](https://console.ng.bluemix.net/) 7 | 8 | Clone this repo and navigate to the news-visualization directory: 9 | ``` 10 | git clone https://github.com/zwalchuk/news-visualization.git 11 | cd news-visualization 12 | ``` 13 | Gather your username, password, environment_id, and collection_id and store them as environment variables: 14 | ``` 15 | export USERNAME = '' 16 | export PASSWORD = '' 17 | export ENVIRONMENT_ID = '' 18 | export COLLECTION_ID = '' 19 | ``` 20 | Launch the app: 21 | ``` 22 | python newsApp.py 23 | ``` 24 | 25 | -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | body { font-family: sans-serif; background: #eee; } 2 | a, h1, h2 { color: #383838; } 3 | a:link {text-decoration: none;} 4 | a:hover {text-decoration: underline;} 5 | a:active {text-decoration: none;} 6 | a:visited {color:grey;} 7 | h1, h2 { font-family: 'Times', serif; } 8 | h1 { border-bottom: 2px solid #eee; } 9 | h2 { font-size: 1.2em;} 10 | td { width: 33.33%; } 11 | table { padding-top: 0.8em; } 12 | hr { border: 0; height: 0; border-top: 1px solid rgba(0,0,0,0.1); border-bottom: 1px solid rgba(255,255,255,0.3);} 13 | 14 | .page { height: 85vh; margin: 3em; background: white; 15 | box-shadow: 10px 10px 5px #888888;} 16 | .entries { margin: 0; padding: 0; } 17 | .add-entry { font-size: 0.9em; font-weight: bold; padding: 0.8em;} 18 | 19 | .flash { background: #cee5F5; padding: 0.5em; 20 | border: 1px solid #aacbe2; margin-bottom: 1em;} 21 | .error { background: #f0d6d6; padding: 0.5em; } 22 | .node { font-family: Lato;} 23 | 24 | .link { stroke: #999; stroke-width: .5px;} 25 | #loading { display: none;} 26 | #chart { margin: auto; display: none; } 27 | .graph { width: 75%; height: 100%; background: #383838; 28 | float: left} 29 | .headlines { width: 25%; height: 100%; float: left; 30 | overflow-y: auto; 31 | } 32 | .titles { padding: .5em } 33 | -------------------------------------------------------------------------------- /newsApp.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import sys 3 | import os 4 | from flask import Flask, request, session, g, redirect, url_for, \ 5 | abort, render_template, flash, json, jsonify 6 | 7 | 8 | 9 | 10 | app = Flask(__name__) 11 | 12 | username = os.environ.get('USERNAME', None) 13 | password = os.environ.get('PASSWORD', None) 14 | environment_id = os.environ.get('ENVIRONMENT_ID', None) 15 | collection_id = os.environ.get('COLLECTION_ID', None) 16 | endpoint = "https://gateway.watsonplatform.net/discovery/api/v1/environments/"+environment_id+"/collections/"+collection_id+"/query?version=2016-11-07&" 17 | 18 | @app.route('/') 19 | def error(): 20 | 21 | return "Please specify a search term in your URL" 22 | 23 | @app.route('/newHeadlines', methods=['POST']) 24 | def newHeadlines(): 25 | combo = request.json['combo'] 26 | comboWords=combo.replace("\"","").split('|') 27 | 28 | combos=[] 29 | headlines={} 30 | 31 | 32 | try: 33 | get_url = endpoint+"query=title:("+combo+")|enrichedTitle.entities.text:("+combo+")&count=50&return=title,url" 34 | results = requests.get(url=get_url, auth=(username, password)) 35 | response = results.json() 36 | 37 | 38 | for article in response['results']: 39 | combos[:]=[] 40 | for word in comboWords: 41 | if word.upper() in article['title'].upper(): 42 | combos.append(word) 43 | comboStr = ''.join(sorted(combos)) 44 | comboLen = len(combos) 45 | if comboLen not in headlines: 46 | headlines[comboLen]={} 47 | if comboStr not in headlines[comboLen]: 48 | headlines[comboLen][comboStr]={} 49 | headlines[comboLen][comboStr][article['title']]=article['url'] 50 | 51 | 52 | except Exception as e: 53 | print e 54 | output = { 'headlines': headlines } 55 | return jsonify(output) 56 | 57 | @app.route('/click', methods=['GET', 'POST']) 58 | def click(): 59 | 60 | 61 | nodes=request.json['nodes'] 62 | links=request.json['links'] 63 | bigWords=request.json['bigWords'] 64 | index=request.json['current'] 65 | 66 | x = nodes[index]['x'] 67 | y = nodes[index]['y'] 68 | text = nodes[index]['text'] 69 | 70 | length = len(nodes) 71 | words={} 72 | headlines={} 73 | combo="" 74 | comboWords=[] 75 | combos=[] 76 | for node in nodes: 77 | words[node['text']] = node['index'] 78 | if node['expand'] == 1: 79 | comboWords.append(node['text']) 80 | for word in comboWords: 81 | combo+="\""+word+"\"|" 82 | combo=combo[:-1] 83 | try: 84 | get_url = endpoint+"query=title:("+combo+")|enrichedTitle.entities.text:("+combo+")&count=50&return=title,url" 85 | results = requests.get(url=get_url, auth=(username, password)) 86 | response = results.json() 87 | 88 | 89 | for article in response['results']: 90 | combos[:]=[] 91 | for word in comboWords: 92 | if word.upper() in article['title'].upper(): 93 | combos.append(word) 94 | comboStr = ''.join(sorted(combos)) 95 | comboLen = len(combos) 96 | if comboLen not in headlines: 97 | headlines[comboLen]={} 98 | if comboStr not in headlines[comboLen]: 99 | headlines[comboLen][comboStr]={} 100 | headlines[comboLen][comboStr][article['title']]=article['url'] 101 | 102 | except Exception as e: 103 | print e 104 | 105 | output = { 'results': { 'nodes': [], 'links': [], 'headlines': headlines, 'combo': combo } } 106 | 107 | try: 108 | get_url = endpoint+"query=title:\""+text+"\"&aggregation=nested(enrichedTitle.entities).filter(enrichedTitle.entities.type:Person).term(enrichedTitle.entities.text,count:100)&count=0" 109 | results = requests.get(url=get_url, auth=(username, password)) 110 | response=results.json() 111 | 112 | #add to bigWords 113 | wordList = [] 114 | for kword in response['aggregations'][0]['aggregations'][0]['aggregations'][0]['results']: 115 | wordList.append(kword['key']) 116 | bigWords[text]={'wordList':wordList,'expand':1} 117 | output['results']['bigWords']=bigWords 118 | count1=0 119 | count2=0 120 | 121 | for newWord in bigWords[text]['wordList']: 122 | if newWord in words: 123 | output['results']['links'].append({'source':index,'target':words[newWord]}) 124 | continue 125 | if count2 < 5: 126 | for bigWord in bigWords: 127 | if bigWords[bigWord]['expand']==0: 128 | continue 129 | if bigWord == text: 130 | continue 131 | if newWord in bigWords[bigWord]['wordList']: 132 | if newWord not in words: 133 | output['results']['nodes'].append({'x': x, 'y': y, 'text': newWord, 'size': 1.5, 'color': 'white', 'expand': 0}) 134 | words[newWord]=length 135 | length+=1 136 | count2+=1 137 | output['results']['links'].append({'source':words[newWord],'target':words[bigWord]}) 138 | output['results']['links'].append({'source':words[newWord],'target':index}) 139 | if newWord not in words and count1 < 5: 140 | output['results']['nodes'].append({'x': x, 'y': y, 'text': newWord, 'size': 1.5, 'color': 'white', 'expand': 0}) 141 | output['results']['links'].append({'source':length,'target':index}) 142 | length+=1 143 | count1+=1 144 | 145 | except Exception as e: 146 | print e 147 | 148 | return jsonify(output) 149 | 150 | @app.route('/favicon.ico') 151 | def favicon(): 152 | return "" 153 | 154 | 155 | @app.route('/') 156 | def news_page(keyword): 157 | index=0 158 | nodes=[] 159 | links=[] 160 | headlines={} 161 | headlines[1]={} 162 | headlines[1][keyword]={} 163 | 164 | bigWords={} 165 | 166 | try: 167 | get_url = endpoint+"query=title:("+keyword+")|enrichedTitle.entities.text:("+keyword+")&count=50&return=title,url" 168 | results = requests.get(url=get_url, auth=(username, password)) 169 | response = results.json() 170 | 171 | for article in response['results']: 172 | headlines[1][keyword][article['title']]=article['url'] 173 | 174 | 175 | except Exception as e: 176 | print e 177 | 178 | try: 179 | get_url = endpoint+"query=title:\""+keyword+"\"&aggregation=nested(enrichedTitle.entities).filter(enrichedTitle.entities.type:Person).term(enrichedTitle.entities.text,count:100)&count=0" 180 | results = requests.get(url=get_url, auth=(username, password)) 181 | response=results.json() 182 | 183 | #add to bigWords 184 | wordList = [] 185 | for kword in response['aggregations'][0]['aggregations'][0]['aggregations'][0]['results']: 186 | wordList.append(kword['key']) 187 | bigWords[keyword]={'wordList':wordList,'expand':1} 188 | except Exception as e: 189 | print e 190 | 191 | count=0 192 | nodes.insert(0, {'x': 300, 'y': 200, 'text': keyword, 'size': 3, 'fixed': 1, 'color': '#0066FF', 'expand': 1}) 193 | for word in bigWords[keyword]['wordList']: 194 | if count > 9: 195 | break 196 | if word == keyword: 197 | continue 198 | else: 199 | nodes.append({'x': 300, 'y': 200, 'text': word, 'size': 1.5, 'color': 'white', 'expand': 0}) 200 | links.append({'source':count + 1,'target':0}) 201 | count+=1 202 | 203 | return render_template('cloud.html', nodes=json.dumps(nodes), links=json.dumps(links), bigWords=json.dumps(bigWords), headlines=json.dumps(headlines)) 204 | 205 | port = os.getenv('VCAP_APP_PORT', '8000') 206 | 207 | if __name__ == '__main__': 208 | app.run(host='0.0.0.0', port=int(port), debug=True) 209 | 210 | -------------------------------------------------------------------------------- /templates/cloud.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block body %} 3 | 4 |
5 | 6 |
7 | 8 | 303 | 304 | {% endblock %} 305 | -------------------------------------------------------------------------------- /static/jquery.js: -------------------------------------------------------------------------------- 1 | !function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){function c(a){var b=a.length,c=ab.type(a);return"function"===c||ab.isWindow(a)?!1:1===a.nodeType&&b?!0:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}function d(a,b,c){if(ab.isFunction(b))return ab.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return ab.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(hb.test(b))return ab.filter(b,a,c);b=ab.filter(b,a)}return ab.grep(a,function(a){return U.call(b,a)>=0!==c})}function e(a,b){for(;(a=a[b])&&1!==a.nodeType;);return a}function f(a){var b=ob[a]={};return ab.each(a.match(nb)||[],function(a,c){b[c]=!0}),b}function g(){$.removeEventListener("DOMContentLoaded",g,!1),a.removeEventListener("load",g,!1),ab.ready()}function h(){Object.defineProperty(this.cache={},0,{get:function(){return{}}}),this.expando=ab.expando+Math.random()}function i(a,b,c){var d;if(void 0===c&&1===a.nodeType)if(d="data-"+b.replace(ub,"-$1").toLowerCase(),c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:tb.test(c)?ab.parseJSON(c):c}catch(e){}sb.set(a,b,c)}else c=void 0;return c}function j(){return!0}function k(){return!1}function l(){try{return $.activeElement}catch(a){}}function m(a,b){return ab.nodeName(a,"table")&&ab.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function n(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function o(a){var b=Kb.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function p(a,b){for(var c=0,d=a.length;d>c;c++)rb.set(a[c],"globalEval",!b||rb.get(b[c],"globalEval"))}function q(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(rb.hasData(a)&&(f=rb.access(a),g=rb.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;d>c;c++)ab.event.add(b,e,j[e][c])}sb.hasData(a)&&(h=sb.access(a),i=ab.extend({},h),sb.set(b,i))}}function r(a,b){var c=a.getElementsByTagName?a.getElementsByTagName(b||"*"):a.querySelectorAll?a.querySelectorAll(b||"*"):[];return void 0===b||b&&ab.nodeName(a,b)?ab.merge([a],c):c}function s(a,b){var c=b.nodeName.toLowerCase();"input"===c&&yb.test(a.type)?b.checked=a.checked:("input"===c||"textarea"===c)&&(b.defaultValue=a.defaultValue)}function t(b,c){var d=ab(c.createElement(b)).appendTo(c.body),e=a.getDefaultComputedStyle?a.getDefaultComputedStyle(d[0]).display:ab.css(d[0],"display");return d.detach(),e}function u(a){var b=$,c=Ob[a];return c||(c=t(a,b),"none"!==c&&c||(Nb=(Nb||ab("