├── cache
    └── .gitignore
├── README.markdown
└── index.php


/cache/.gitignore:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
1 | Important People
2 | ================
3 | 
4 | This script grabs a set of Twitter search results and retrieves the author's follower and update counts. Results are color coded and can be downloaded as a CSV.
5 | 
6 | BTW: I wrote this in about half an hour, so there's very little (read: no) error checking. If the fail whale were to rear it's ugly head, bad things may happen.
7 | 
8 | Based on an idea from [@mulls](http://twitter.com/mulls).
9 | 


--------------------------------------------------------------------------------
/index.php:
--------------------------------------------------------------------------------
  1 | <?PHP
  2 |     // Process user info JavaScript requests...
  3 |     if(isset($_GET['username']))
  4 |     {
  5 |         echo json_encode(user_info($_GET['username']));
  6 |         exit;
  7 |     }
  8 | 
  9 |     $pages_to_fetch = isset($_GET['pages']) ? intval($_GET['pages']) : 1;
 10 |     if($pages_to_fetch > 50) $pages_to_fetch = 50;
 11 |     if($pages_to_fetch < 1) $pages_to_fetch = 1;
 12 | 
 13 |     if(isset($_GET['q']))
 14 |     {
 15 |         $tweets = array();
 16 |         $users  = array();
 17 |         $q      = urlencode($_GET['q']);
 18 |         $page   = 1;
 19 |         $url    = "http://search.twitter.com/search.atom?q=$q&rpp=100&page=$page";
 20 | 
 21 |         $pages = 0;
 22 |         while($url && $pages++ < $pages_to_fetch)
 23 |         {
 24 |             // Note: we're caching search results. Feel free to turn off if you want real-time results.
 25 |             $xmlstr = geturl($url);
 26 |             $xml    = simplexml_load_string($xmlstr);
 27 |             foreach($xml->entry as $tweet)
 28 |             {
 29 |                 $user = array_shift(explode(' ', (string) $tweet->author->name));
 30 |                 if((strtolower($user) != strtolower($_GET['q'])) && (strtolower("@$user") != strtolower($_GET['q'])))
 31 |                 {
 32 |                     $tweets[] = array('msg'  => (string) $tweet->content,
 33 |                                       'user' => $user,
 34 |                                       'link' => (string) $tweet->link[0]['href'],
 35 |                                       'dt'   => (string) $tweet->published);
 36 | 
 37 |                     // Only fetch server-side user data when doing a csv dump
 38 |                     if(isset($_GET['csv']) && !isset($users[$user]))
 39 |                         $users[$user] = user_info($user);
 40 |                 }
 41 |             }
 42 | 
 43 |             // Look for another page of results
 44 |             $url = false;
 45 |             foreach($xml->link as $link)
 46 |             {
 47 |                 if((string) $link['rel'] == 'next')
 48 |                     $url = (string) $link['href'];
 49 |             }
 50 |         }
 51 | 
 52 |         if(isset($_GET['csv']))
 53 |         {
 54 |             header("Cache-Control: must-revalidate, post-check=0, pre-check=0");
 55 |             header("Content-Disposition: attachment; filename=" . $_GET['q'] . ".csv");
 56 |             header("Content-Type: text/csv");
 57 | 
 58 |             echo "message,username,permalink,timestamp,following,followers,updates\n";
 59 |             $fp = fopen('php://output', 'w');
 60 |             foreach($tweets as $t)
 61 |             {
 62 |                 unset($users[$t['user']]['username']);
 63 |                 $arr = array_merge($t, $users[$t['user']]);
 64 |                 fputcsv($fp, $arr);
 65 |             }
 66 |             fclose($fp);
 67 |             exit;
 68 |         }
 69 |     }
 70 | 
 71 |     // Check our current API limit
 72 |     // $xmlstr = geturl('http://twitter.com/account/rate_limit_status.xml');
 73 |     // $xml = simplexml_load_string($xmlstr);
 74 |     // $api_calls_remaining = (string) $xml->{'remaining-hits'};
 75 |     // $reset_time = local_time((string) $xml->{'reset-time'});
 76 | 
 77 |     // END OF THE MAIN SCRIPT
 78 |     // JUST HELPER FUNCTIONS BELOW
 79 | 
 80 |     function geturl($url, $cache = true)
 81 |     {
 82 |         // Please be nice and cache your requests!
 83 |         if($cache)
 84 |         {
 85 |             $fn = 'cache/' . md5($url);
 86 |             if(file_exists($fn) && filemtime($fn) > (time() - 3600) && !isset($_GET['cb']))
 87 |                 return file_get_contents($fn);
 88 |         }
 89 | 
 90 |         $ch = curl_init();
 91 |         curl_setopt($ch, CURLOPT_URL, $url);
 92 |         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
 93 |         curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
 94 |         $data = curl_exec($ch);
 95 |         curl_close($ch);
 96 | 
 97 |         if($cache && !file_put_contents($fn, $data))
 98 |             die("Caching is turned on, but it doesn't appear that your cache directory is writable. Tried to write to '$fn'.");
 99 | 
100 |         return $data;
101 |     }
102 | 
103 |     // Let's face it. Twitter's API is often shitty and they limit you to 100 requests per hour.
104 |     // With that in mind, I offer three solutions for grabbing user data, which can be controlled
105 |     // via the $stupid_twitter parameter. They are...
106 |     //
107 |     // 1) Scrape the data directly from Twitter's website
108 |     // 2) Pull the data using YQL from Yahoo!, which handles caching for us in case twitter.com goes down
109 |     // 3) Use Twitter's API directly
110 |     //
111 |     // I'd suggest using YQL as it's more reliable than the API and a bit nicer on their servers than raw scraping.
112 |     function user_info($username, $stupid_twitter = 'yql')
113 |     {
114 |         if($stupid_twitter === 'scrape') // Scrape public website
115 |         {
116 |             // Please leave the cache turned on so we're at least scraping Twitter "nicely"
117 |             $html = geturl("http://twitter.com/$username");
118 |             $followers = match('/follower_count.*?([0-9,]+)/ms', $html, 1);
119 |             $updates = match('/update_count.*?([0-9,]+)/ms', $html, 1);
120 | 
121 |             $followers = preg_replace('/[^0-9]/', '', $followers);
122 |             $updates = preg_replace('/[^0-9]/', '', $updates);
123 | 
124 |             return array('followers' => $followers, 'updates' => $updates, 'username' => $username);
125 |         }
126 |         elseif($stupid_twitter === 'yql') // Go through YQL
127 |         {
128 |             // YQL: select * from html where url="http://twitter.com/tylerhall/" and xpath='//span[contains(@class, "_count")]'
129 |             $user = urlencode($username);
130 |             $url = "http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20html%20where%20url%3D%22http%3A%2F%2Ftwitter.com%2F" . $user . "%2F%22%20and%20xpath%3D'%2F%2Fspan%5Bcontains(%40class%2C%20%22_count%22)%5D'&format=xml";
131 |             $xmlstr = geturl($url);
132 |             $uxml = simplexml_load_string($xmlstr);
133 | 
134 |             $following = preg_replace('/[^0-9]/', '', (string) $uxml->results->span[0]);
135 |             $followers = preg_replace('/[^0-9]/', '', (string) $uxml->results->span[1]);
136 |             $updates = preg_replace('/[^0-9]/', '', (string) $uxml->results->span[2]);
137 |             return array('following' => $following, 'followers' => $followers, 'updates' => $updates, 'username' => $username);
138 |         }
139 |         else // Use Twitter's API
140 |         {
141 |             $xmlstr = geturl("http://twitter.com/users/show/" . urlencode($user) . ".xml");
142 |             $uxml = simplexml_load_string($xmlstr);
143 |             return array('followers' => (string) $uxml->followers_count, 'updates' => (string) $uxml->statuses_count, 'username' => $username);
144 |         }
145 |     }
146 | 
147 |     // Simple wrapper around preg_match()
148 |     function match($regex, $str, $i = 0)
149 |     {
150 |         if(preg_match($regex, $str, $match) == 1)
151 |             return $match[$i];
152 |         else
153 |             return false;
154 |     }
155 | 
156 |     // Convert a timestamp or date formatted string to a local time
157 |     // I'm sure there's some obscure PHP function I just don't know about
158 |     // that already does this. Right?
159 |     function local_time($dt, $format = 'm/d/Y g:ia')
160 |     {
161 |         if(ctype_digit($dt) !== true)
162 |             $dt = strtotime($dt);
163 | 
164 |         $arr = localtime($dt, true);
165 |         $local_time = date($format, mktime($arr['tm_hour'], $arr['tm_min'], $arr['tm_sec'], $arr['tm_mon'] + 1, $arr['tm_mday'], $arr['tm_year']));
166 |         return $local_time;
167 |     }
168 | ?>
169 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
170 |     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
171 | 
172 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
173 | <head>
174 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
175 |     <?PHP if(isset($_GET['q'])) : ?>
176 |     <title>Important People - (<?PHP echo htmlspecialchars($_GET['q']); ?>)</title>
177 |     <?PHP else : ?>
178 |     <title>Important People</title>
179 |     <?PHP endif; ?>
180 |     <link rel="search" type="application/opensearchdescription+xml" title="Influencers" href="http://sideline.yahoo.com/influencers/opensearch.xml" />
181 |     <!-- Love us some YUI -->
182 |     <link rel="stylesheet" type="text/css" href="http://yui.yahooapis.com/2.7.0/build/reset-fonts-grids/reset-fonts-grids.css">
183 |     <link rel="stylesheet" type="text/css" href="http://yui.yahooapis.com/2.7.0/build/base/base-min.css">
184 |     <!-- And <3 jQuery, too! -->
185 |     <script type="text/javascript" charset="utf-8" src="http://cdn.clickontyler.com/js/jquery.min.20090210210309.gz.js"></script>
186 | 
187 |     <style type="text/css" media="screen">
188 |         h1, h2, form, p, blockquote { text-align:left; }
189 |         h1 { color:#7b0099; }
190 |         h1 sup { font-size:77%; color:#aaa; }
191 |         blockquote { font-family:courier; }
192 |         th { background-color:#ccc; color:#000; }
193 |         td.dt { white-space:nowrap; }
194 |         td.msg { text-align:left; }
195 | 
196 |         #progress { width:300px; height:15px; border:1px solid #000; position:relative; margin-bottom:1em; }
197 |         #progress p { color:#000; font-size:77%; position:absolute; width:300px; text-align:center; padding-top:2px; }
198 |         #progress span { display:block; background-color:#99f; width:0%; height:15px; }
199 |     </style>
200 | 
201 |     <script type="text/javascript" charset="utf-8">
202 |         var completed_rows = 0;
203 |         var unique_users = 0;
204 |         var unique_followers = 0;
205 | 
206 |         $(function() {
207 |             var pb = $('#progress span'); // progress bar
208 |             var pt = $('#progress p'); // progress text
209 | 
210 |             // Loop over each data row
211 |             $('tbody tr').each(function(i) {
212 |                 // Simple hack that to skip duplicate users
213 |                 if($(this).hasClass('done')) return;
214 |                 $('tr.u-' + $('td.username', $(this)).text()).addClass('done');
215 | 
216 |                 // Grab the user data from the server
217 |                 var that = this;
218 |                 $.getJSON('<?PHP echo $_SERVER['PHP_SELF']; ?>?username=' + $('td.username', this).text(), function(json) {
219 |                     unique_users++;
220 |                     if(json.followers.length > 0)
221 |                         unique_followers += parseInt(json.followers);
222 | 
223 |                     // Update each instance of that user throughout the table
224 |                     $('tr.u-' + json.username).each(function(i) {
225 |                         $('td.followers', this).text(json.followers);
226 |                         $('td.following', this).text(json.following);
227 |                         $('td.updates', this).text(json.updates);
228 |                         completed_rows++;
229 |                     });
230 | 
231 |                     // Progress bar shiznit
232 |                     p = (completed_rows / <?PHP echo count($tweets); ?>) * 100;
233 |                     pb.css('width', p + '%');
234 |                     pt.text(String(completed_rows) + ' / <?PHP echo count($tweets); ?>');
235 | 
236 |                     // Know when to quit
237 |                     if(completed_rows == <?PHP echo count($tweets); ?>) {
238 |                         colorRows();
239 |                         pt.text('Done! <?PHP echo count($tweets); ?> total');
240 |                         $('th.username').text('Users (' + addCommas(unique_users) + ')');
241 |                         $('th.followers').text('Followers (' + addCommas(unique_followers) + ')');
242 |                     }
243 |                 });
244 | 
245 |             });
246 |         });
247 | 
248 |         function colorRows() {
249 |             var stdev = stDev();
250 |             var cfactor = 100 / ((unique_followers / unique_users) + stdev); // 100 should be 255, but that's too bright
251 |             var lowcut = (unique_followers / unique_users) - stdev / 4;
252 |             var highcut = (unique_followers / unique_users) + stdev / 2;
253 |             var tdmcut = (unique_followers / unique_users) + stdev / 4;
254 |             var color;
255 |             var followers;
256 | 
257 |             $('tbody tr').each(function(i) {
258 |                 followers = parseInt($('td.followers', this).text());
259 | 
260 |                 if(followers > tdmcut) {
261 |                     if(parseInt($('td.following', this).text()) > (followers * 0.75)) {
262 |                         // $(this).css('background-color', '#00cc00');
263 |                         $(this).fadeTo(0, .4);
264 |                         return;
265 |                     }
266 |                 }
267 | 
268 |                 if(followers < lowcut)
269 |                     color = 'ffffff';
270 |                 else if(followers > highcut)
271 |                     color = 'ffff00';
272 |                 else {
273 |                     followers = 255 - Math.round(followers * cfactor);
274 |                     followers = followers.toString(16);
275 |                     if(followers.length < 2)
276 |                         followers = '0' + followers;
277 |                     color = 'ff' + followers + 'ff';
278 |                 }
279 | 
280 |                 $(this).css('background-color', '#' + color);
281 |             });
282 |         }
283 | 
284 |         // Calculate standard deviation of Followers column
285 |         function stDev() {
286 |             var avg = unique_followers / unique_users;
287 |             var sum = 0;
288 |             var f;
289 | 
290 |             $('tbody tr').removeClass('done').each(function(i) {
291 |                 if($(this).hasClass('done')) return;
292 |                 $('tr.u-' + $('td.username', $(this)).text()).addClass('done');
293 |                 f = parseFloat($('td.followers', $(this)).text());
294 |                 if(!isNaN(f)) sum += Math.pow(Math.abs(f - avg), 2);
295 |             });
296 |             return Math.sqrt(sum / (unique_users - 1));
297 |         }
298 | 
299 |         // From http://www.mredkj.com/javascript/nfbasic.html
300 |         function addCommas(nStr) {
301 |             nStr += '';
302 |             x = nStr.split('.');
303 |             x1 = x[0];
304 |             x2 = x.length > 1 ? '.' + x[1] : '';
305 |             var rgx = /(\d+)(\d{3})/;
306 |             while (rgx.test(x1)) {
307 |                 x1 = x1.replace(rgx, '$1' + ',' + '$2');
308 |             }
309 |             return x1 + x2;
310 |         }
311 |     </script>
312 | </head>
313 | <body>
314 |     <h1>Important People <sup>so beta it's not even alpha</sup></h1>
315 | 
316 |     <p>During a conversation at work, <a href="http://twitter.com/mulls/">@mulls</a> wanted a quick way to see who are the most influential people tweeting about a specific topic. <a href="http://twitter.com/chadauld/">@chadauld</a> and I came up with the simple metric of ranking users by their follower count. And this is the result.</p>
317 | 
318 |     <?PHP if(!isset($_GET['q'])) : ?>
319 |     <p>Users we consider influential are colored bright yellow. Mid-range users are pink, gradiating down to users that no one cares about in white, and ordered by date &mdash; most recent on top.</p>
320 | 
321 |     <p>For the math nerds in the house, we deem important users to be those where</p>
322 |     <blockquote>followers &gt; &#956; + &#963; / 2</blockquote>
323 |     <p>and un-important users are where</p>
324 |     <blockquote>followers &lt; &#956; - &#963; / 4</blockquote>
325 | 
326 |     <p>That said, given more time, we should probably develop a better metric. Take into account their updates per day, or how many followers *their* followers have. Something like that.</p>
327 | 
328 |     <p>Anyway, because Twitter's API ocasionally doesn't respond quickly enough, we load the follower counts on the client side by making callbacks to <a href="http://developer.yahoo.com/yql/">Yahoo!'s YQL service</a>, which pulls the data for us and also serves as a proxy for when Twitter goes down.</p>
329 | 
330 |     <p>This was written in a couple hours, so don't hate too much. (We know it doesn't currently work in Internet Explorer.) Feel free to <a href="http://github.com/tylerhall/important-people/tree/master">download and run it on your own box</a>.</p>
331 | 
332 |     <p><strong>Update 4/18:</strong> We've added an experimental new feature which attempts to highlight (er...ignore) users that we detect are spammy. Our reason for doing this it to try and filter out users that are merely contributing to Twitter's noise. You'll see that these users are greyed out. And here's the formula we're using to detect them:</p>
333 |     <blockquote>(followers &gt; &#956; + &#963; / 4) &amp;&amp; (following &gt; followers * 0.75)</blockquote>
334 |     <?PHP endif; ?>
335 | 
336 |     <h2>Whatcha think?</h2>
337 | 
338 |     <p>Do you find this hack useful? We'd love to get your feedback as we're thinking of incorporating this functionality directly into <a href="http://sideline.yahoo.com">Sideline</a>. Let us know! Feedback is welcome either at <a href="http://twitter.com/tylerhall/">@tylerhall</a>, <a href="http://twitter.com/chadauld/">@chadauld</a>, or <a href="http://twitter.com/ysideline/">@ysideline</a>.</p>
339 | 
340 |     <h1>Search</h1>
341 |     <form action="<?PHP echo $_SERVER['PHP_SELF']; ?>" method="get">
342 |         <p>
343 |             <label for="q">Search Query:</label> <input type="text" name="q" value="<?PHP if(isset($_GET['q'])) echo htmlspecialchars($_GET['q']); ?>" id="q">
344 |             <input type="submit" name="btnSubmit" value="Search" id="btnSubmit">
345 |             <a href="<?PHP echo $_SERVER['PHP_SELF']; ?>">Clear Results</a>
346 |         </p>
347 |         <p>Number of pages: <input type="text" name="pages" value="<?PHP echo $pages_to_fetch; ?>" id="pages"></p>
348 |         <p>Can't think of anything to search for? <a href="<?PHP echo $_SERVER['PHP_SELF']; ?>?q=gruber">Here</a> <a href="<?PHP echo $_SERVER['PHP_SELF']; ?>?q=tylerhall">are</a> <a href="<?PHP echo $_SERVER['PHP_SELF']; ?>?q=yahoo">a</a> <a href="<?PHP echo $_SERVER['PHP_SELF']; ?>?q=sideline">few</a> <a href="<?PHP echo $_SERVER['PHP_SELF']; ?>?q=nashville">examples</a>.</p>
349 |     </form>
350 | 
351 |     <?PHP if(isset($_GET['q'])) : ?>
352 |     <p><a href="<?PHP echo $_SERVER['PHP_SELF']; ?>?q=<?PHP echo urlencode($_GET['q']); ?>&amp;csv&amp;pages=<?PHP echo $pages_to_fetch; ?>">Download CSV</a></p>
353 |     <div id="progress">
354 |         <p></p>
355 |         <span></span>
356 |     </div>
357 |     <table>
358 |         <thead>
359 |             <tr>
360 |                 <th class="username">Users</th>
361 |                 <th class="followers">Followers</th>
362 |                 <th class="following">Following</th>
363 |                 <th class="updates">Updates</th>
364 |                 <th class="dt">Date &darr;</th>
365 |                 <th class="msg">Message</th>
366 |             </tr>
367 |         </thead>
368 |         <tbody>
369 |             <?PHP foreach($tweets as $t) : ?>
370 |             <tr class="u-<?PHP echo $t['user']; ?>">
371 |                 <td class="username"><a href="<?PHP echo $t['link']; ?>"><?PHP echo $t['user']; ?></a></td>
372 |                 <td class="followers">-</td>
373 |                 <td class="following">-</td>
374 |                 <td class="updates">-</td>
375 |                 <td class="dt"><?PHP echo local_time($t['dt'], 'n/j g:ia'); ?></td>
376 |                 <td class="msg"><?PHP echo $t['msg']; ?></td>
377 |             </tr>
378 |             <?PHP endforeach; ?>
379 |         </tbody>
380 |     </table>
381 |     <?PHP endif; ?>
382 | </body>
383 | </html>
384 | 


--------------------------------------------------------------------------------