43 | settings 44 | 49 |
50 | 69 |Loading from Hacker News |
├── .gitignore ├── README ├── app.yaml ├── feeds.js ├── images ├── h114.png ├── h128.png └── h256.png ├── index.coffee ├── index.html ├── index.less ├── lib ├── badge.js ├── coffee-script.js ├── date.js ├── jquery.deserialize.js ├── jquery.min.js ├── less-1.0.40.js ├── readability.js └── underscore.js ├── main.py ├── start.sh └── up.sh /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.sw* 2 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | A reader for the Hacker News website (http://news.ycombinator.com) built in 2 | HTML5 and demonstrating the "HTML5 Everywhere" approach to contemporary 3 | mobile and desktop app development. 4 | 5 | Overview of the initial version: 6 | http://softwareas.com/is-this-what-the-app-of-2015-looks-like-html5-coffeescript-less-webstore-phonegap-apparatio 7 | -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | application: hackynews 2 | version: 1 3 | runtime: python 4 | api_version: 1 5 | 6 | handlers: 7 | - url: / 8 | static_files: index.html 9 | upload: index.html 10 | - url: /index.(.+) 11 | static_files: index.\1 12 | upload: index.(.+) 13 | - url: /lib/(.+) 14 | static_files: lib/\1 15 | upload: lib/(.+) 16 | - url: /images/(.+) 17 | static_files: images/\1 18 | upload: images/(.+) -------------------------------------------------------------------------------- /feeds.js: -------------------------------------------------------------------------------- 1 | google.load('feeds', '1') 2 | google.setOnLoadCallback(function() { 3 | if (window.init) return window.init(); 4 | setTimeout(arguments.callee, 500); 5 | }); 6 | -------------------------------------------------------------------------------- /images/h114.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahemoff/hackernews/45e8052cdaa027b21b80a8359f310e99494312ee/images/h114.png -------------------------------------------------------------------------------- /images/h128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahemoff/hackernews/45e8052cdaa027b21b80a8359f310e99494312ee/images/h128.png -------------------------------------------------------------------------------- /images/h256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahemoff/hackernews/45e8052cdaa027b21b80a8359f310e99494312ee/images/h256.png -------------------------------------------------------------------------------- /index.coffee: -------------------------------------------------------------------------------- 1 | 2 | ############################################################################### 3 | # GENERIC 4 | ############################################################################### 5 | 6 | $.fn.radio = () -> $(this).show().siblings().hide() 7 | $.fn.radioClass = (className) -> 8 | $(this).addClass(className).siblings().removeClass(className) 9 | 10 | $(".toggle span").live "click", () -> $(this).parent().next().slideToggle() 11 | $.fn.src = (url, throttle) -> 12 | $(this).attr("src", url) 13 | return 14 | namespace = arguments.callee 15 | return if ($iframe=$(this)).attr("src")==url 16 | throttle = throttle||200 17 | now = +new Date 18 | clearTimeout(arguments.callee.timer) 19 | timeSinceLastReload = +new Date - (namespace.lastReload||0) 20 | timeTillReload = Math.max(0,throttle-timeSinceLastReload) 21 | namespace.timer = setTimeout( () -> 22 | $iframe.attr("src", url) 23 | namespace.lastReload = now 24 | , timeTillReload) 25 | return $iframe 26 | encode = (s) -> s.replace(":","%3a").replace(/\//g, "%2f") 27 | delay = (ms, func) -> setTimeout func, ms 28 | 29 | $.fn.checked = () -> $(this).attr("checked") 30 | 31 | ############################################################################### 32 | # TEMPLATES 33 | ############################################################################### 34 | 35 | storyTemplate = _.template $("#storyTemplate").html() 36 | contentPanelTemplate = _.template($("#contentPanelTemplate").html()) 37 | 38 | ############################################################################### 39 | # LOAD STORIES 40 | ############################################################################### 41 | 42 | Object.prototype.attr = (name) -> 43 | try 44 | return this.xmlNode.getElementsByTagName(name)[0].firstChild.textContent 45 | catch e 46 | return '-' 47 | 48 | Story = (entry) -> 49 | story = _(this).extend 50 | title: entry.attr 'title' 51 | url: entry.attr 'link' 52 | points: entry.attr 'points' 53 | username: entry.attr 'username' 54 | num_comments: entry.attr 'num_comments' 55 | commentsURL: entry.attr 'comments' 56 | created: Date.fromISO8601(entry.attr('create_ts')).getTime() 57 | posterURL: "http://news.ycombinator.com/user?id=#{entry.attr 'username'}" 58 | postedAgo: Date.fromISO8601(entry.attr 'create_ts').timeago() 59 | simpleURL: "http://www.instapaper.com/text?u=#{encode(entry.attr 'link')}" 60 | this 61 | 62 | updateCount = 0 63 | update = () -> 64 | feed = new google.feeds.Feed "http://www.hnsearch.com/rss?nocache=#{Math.round 1e9*Math.random()}" 65 | feed.setResultFormat google.feeds.Feed.MIXED_FORMAT 66 | feed.setNumEntries 30 67 | feed.load (res) -> 68 | return if res.error 69 | stories = res.feed.entries.map (entry) -> new Story(entry) 70 | stories.sort (s1,s2) -> s2.created - s1.created 71 | $("#stories").empty().hide() 72 | _(stories).each (story, i) -> 73 | story.prerender = !updateCount and i<3 #pre-render first 3 stories on first paint 74 | $("
Loading from Hacker News |
Sorry, readability was unable to parse this page for content. If you feel like it should have been able to, please let us know by submitting an issue.
", 107 | (readability.frameHack ? "It appears this page uses frames. Unfortunately, browser security properties often cause Readability to fail on pages that include frames. You may want to try running readability itself on this source page: " + readability.biggestFrame.src + "
" : ""), 108 | "Also, please note that Readability does not play very nicely with front pages. Readability is intended to work on articles with a sizable chunk of text that you'd like to read comfortably. If you're using Readability on a landing page (like nytimes.com for example), please click into an article first before using Readability.
" 109 | ].join(''); 110 | 111 | nextPageLink = null; 112 | } 113 | 114 | overlay.id = "readOverlay"; 115 | innerDiv.id = "readInner"; 116 | 117 | /* Apply user-selected styling */ 118 | document.body.className = readStyle; 119 | document.dir = readability.getSuggestedDirection(articleTitle.innerHTML); 120 | 121 | if (readStyle === "style-athelas" || readStyle === "style-apertura"){ 122 | overlay.className = readStyle + " rdbTypekit"; 123 | } 124 | else { 125 | overlay.className = readStyle; 126 | } 127 | innerDiv.className = readMargin + " " + readSize; 128 | 129 | if(typeof(readConvertLinksToFootnotes) !== 'undefined' && readConvertLinksToFootnotes === true) { 130 | readability.convertLinksToFootnotes = true; 131 | } 132 | 133 | /* Glue the structure of our document together. */ 134 | innerDiv.appendChild( articleTitle ); 135 | innerDiv.appendChild( articleContent ); 136 | innerDiv.appendChild( articleFooter ); 137 | overlay.appendChild( articleTools ); 138 | overlay.appendChild( innerDiv ); 139 | 140 | /* Clear the old HTML, insert the new content. */ 141 | document.body.innerHTML = ""; 142 | document.body.insertBefore(overlay, document.body.firstChild); 143 | document.body.removeAttribute('style'); 144 | 145 | if(readability.frameHack) 146 | { 147 | var readOverlay = document.getElementById('readOverlay'); 148 | readOverlay.style.height = '100%'; 149 | readOverlay.style.overflow = 'auto'; 150 | } 151 | 152 | /** 153 | * If someone tries to use Readability on a site's root page, give them a warning about usage. 154 | **/ 155 | if((window.location.protocol + "//" + window.location.host + "/") === window.location.href) 156 | { 157 | articleContent.style.display = "none"; 158 | var rootWarning = document.createElement('p'); 159 | rootWarning.id = "readability-warning"; 160 | rootWarning.innerHTML = "Readability was intended for use on individual articles and not home pages. " + 161 | "If you'd like to try rendering this page anyway, click here to continue."; 162 | 163 | innerDiv.insertBefore( rootWarning, articleContent ); 164 | } 165 | 166 | readability.postProcessContent(articleContent); 167 | 168 | window.scrollTo(0, 0); 169 | 170 | /* If we're using the Typekit library, select the font */ 171 | if (readStyle === "style-athelas" || readStyle === "style-apertura") { 172 | readability.useRdbTypekit(); 173 | } 174 | 175 | if (nextPageLink) { 176 | /** 177 | * Append any additional pages after a small timeout so that people 178 | * can start reading without having to wait for this to finish processing. 179 | **/ 180 | window.setTimeout(function() { 181 | readability.appendNextPage(nextPageLink); 182 | }, 500); 183 | } 184 | 185 | /** Smooth scrolling **/ 186 | document.onkeydown = function(e) { 187 | var code = (window.event) ? event.keyCode : e.keyCode; 188 | if (code === 16) { 189 | readability.reversePageScroll = true; 190 | return; 191 | } 192 | 193 | if (code === 32) { 194 | readability.curScrollStep = 0; 195 | var windowHeight = window.innerHeight ? window.innerHeight : (document.documentElement.clientHeight ? document.documentElement.clientHeight : document.body.clientHeight); 196 | 197 | if(readability.reversePageScroll) { 198 | readability.scrollTo(readability.scrollTop(), readability.scrollTop() - (windowHeight - 50), 20, 10); 199 | } 200 | else { 201 | readability.scrollTo(readability.scrollTop(), readability.scrollTop() + (windowHeight - 50), 20, 10); 202 | } 203 | 204 | return false; 205 | } 206 | }; 207 | 208 | document.onkeyup = function(e) { 209 | var code = (window.event) ? event.keyCode : e.keyCode; 210 | if (code === 16) { 211 | readability.reversePageScroll = false; 212 | return; 213 | } 214 | }; 215 | }, 216 | 217 | /** 218 | * Run any post-process modifications to article content as necessary. 219 | * 220 | * @param Element 221 | * @return void 222 | **/ 223 | postProcessContent: function(articleContent) { 224 | if(readability.convertLinksToFootnotes && !window.location.href.match(/wikipedia\.org/g)) { 225 | readability.addFootnotes(articleContent); 226 | } 227 | 228 | readability.fixImageFloats(articleContent); 229 | }, 230 | 231 | /** 232 | * Some content ends up looking ugly if the image is too large to be floated. 233 | * If the image is wider than a threshold (currently 55%), no longer float it, 234 | * center it instead. 235 | * 236 | * @param Element 237 | * @return void 238 | **/ 239 | fixImageFloats: function (articleContent) { 240 | var imageWidthThreshold = Math.min(articleContent.offsetWidth, 800) * 0.55, 241 | images = articleContent.getElementsByTagName('img'); 242 | 243 | for(var i=0, il = images.length; i < il; i+=1) { 244 | var image = images[i]; 245 | 246 | if(image.offsetWidth > imageWidthThreshold) { 247 | image.className += " blockImage"; 248 | } 249 | } 250 | }, 251 | 252 | /** 253 | * Get the article tools Element that has buttons like reload, print, email. 254 | * 255 | * @return void 256 | **/ 257 | getArticleTools: function () { 258 | var articleTools = document.createElement("DIV"); 259 | 260 | articleTools.id = "readTools"; 261 | articleTools.innerHTML = 262 | "Reload Original Page" + 263 | "Print Page" + 264 | "Email Page"; 265 | 266 | return articleTools; 267 | }, 268 | 269 | /** 270 | * retuns the suggested direction of the string 271 | * 272 | * @return "rtl" || "ltr" 273 | **/ 274 | getSuggestedDirection: function(text) { 275 | function sanitizeText() { 276 | return text.replace(/@\w+/, ""); 277 | } 278 | 279 | function countMatches(match) { 280 | var matches = text.match(new RegExp(match, "g")); 281 | return matches !== null ? matches.length : 0; 282 | } 283 | 284 | function isRTL() { 285 | var count_heb = countMatches("[\\u05B0-\\u05F4\\uFB1D-\\uFBF4]"); 286 | var count_arb = countMatches("[\\u060C-\\u06FE\\uFB50-\\uFEFC]"); 287 | 288 | // if 20% of chars are Hebrew or Arbic then direction is rtl 289 | return (count_heb + count_arb) * 100 / text.length > 20; 290 | } 291 | 292 | text = sanitizeText(text); 293 | return isRTL() ? "rtl" : "ltr"; 294 | }, 295 | 296 | 297 | /** 298 | * Get the article title as an H1. 299 | * 300 | * @return void 301 | **/ 302 | getArticleTitle: function () { 303 | var curTitle = "", 304 | origTitle = ""; 305 | 306 | try { 307 | curTitle = origTitle = document.title; 308 | 309 | if(typeof curTitle !== "string") { /* If they had an element with id "title" in their HTML */ 310 | curTitle = origTitle = readability.getInnerText(document.getElementsByTagName('title')[0]); 311 | } 312 | } 313 | catch(e) {} 314 | 315 | if(curTitle.match(/ [\|\-] /)) 316 | { 317 | curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1'); 318 | 319 | if(curTitle.split(' ').length < 3) { 320 | curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1'); 321 | } 322 | } 323 | else if(curTitle.indexOf(': ') !== -1) 324 | { 325 | curTitle = origTitle.replace(/.*:(.*)/gi, '$1'); 326 | 327 | if(curTitle.split(' ').length < 3) { 328 | curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1'); 329 | } 330 | } 331 | else if(curTitle.length > 150 || curTitle.length < 15) 332 | { 333 | var hOnes = document.getElementsByTagName('h1'); 334 | if(hOnes.length === 1) 335 | { 336 | curTitle = readability.getInnerText(hOnes[0]); 337 | } 338 | } 339 | 340 | curTitle = curTitle.replace( readability.regexps.trim, "" ); 341 | 342 | if(curTitle.split(' ').length <= 4) { 343 | curTitle = origTitle; 344 | } 345 | 346 | var articleTitle = document.createElement("H1"); 347 | articleTitle.innerHTML = curTitle; 348 | 349 | return articleTitle; 350 | }, 351 | 352 | /** 353 | * Get the footer with the readability mark etc. 354 | * 355 | * @return void 356 | **/ 357 | getArticleFooter: function () { 358 | var articleFooter = document.createElement("DIV"); 359 | 360 | /** 361 | * For research purposes, generate an img src that contains the chosen readstyle etc, 362 | * so we can generate aggregate stats and change styles based on them in the future 363 | **/ 364 | // var statsQueryParams = "?readStyle=" + encodeURIComponent(readStyle) + "&readMargin=" + encodeURIComponent(readMargin) + "&readSize=" + encodeURIComponent(readSize); 365 | /* TODO: attach this to an image */ 366 | 367 | articleFooter.id = "readFooter"; 368 | articleFooter.innerHTML = [ 369 | " ", 370 | " "].join(''); 381 | 382 | return articleFooter; 383 | }, 384 | 385 | /** 386 | * Prepare the HTML document for readability to scrape it. 387 | * This includes things like stripping javascript, CSS, and handling terrible markup. 388 | * 389 | * @return void 390 | **/ 391 | prepDocument: function () { 392 | /** 393 | * In some cases a body element can't be found (if the HTML is totally hosed for example) 394 | * so we create a new body node and append it to the document. 395 | */ 396 | if(document.body === null) 397 | { 398 | var body = document.createElement("body"); 399 | try { 400 | document.body = body; 401 | } 402 | catch(e) { 403 | document.documentElement.appendChild(body); 404 | dbg(e); 405 | } 406 | } 407 | 408 | document.body.id = "readabilityBody"; 409 | 410 | var frames = document.getElementsByTagName('frame'); 411 | if(frames.length > 0) 412 | { 413 | var bestFrame = null; 414 | var bestFrameSize = 0; /* The frame to try to run readability upon. Must be on same domain. */ 415 | var biggestFrameSize = 0; /* Used for the error message. Can be on any domain. */ 416 | for(var frameIndex = 0; frameIndex < frames.length; frameIndex+=1) 417 | { 418 | var frameSize = frames[frameIndex].offsetWidth + frames[frameIndex].offsetHeight; 419 | var canAccessFrame = false; 420 | try { 421 | var frameBody = frames[frameIndex].contentWindow.document.body; 422 | canAccessFrame = true; 423 | } 424 | catch(eFrames) { 425 | dbg(eFrames); 426 | } 427 | 428 | if(frameSize > biggestFrameSize) { 429 | biggestFrameSize = frameSize; 430 | readability.biggestFrame = frames[frameIndex]; 431 | } 432 | 433 | if(canAccessFrame && frameSize > bestFrameSize) 434 | { 435 | readability.frameHack = true; 436 | 437 | bestFrame = frames[frameIndex]; 438 | bestFrameSize = frameSize; 439 | } 440 | } 441 | 442 | if(bestFrame) 443 | { 444 | var newBody = document.createElement('body'); 445 | newBody.innerHTML = bestFrame.contentWindow.document.body.innerHTML; 446 | newBody.style.overflow = 'scroll'; 447 | document.body = newBody; 448 | 449 | var frameset = document.getElementsByTagName('frameset')[0]; 450 | if(frameset) { 451 | frameset.parentNode.removeChild(frameset); } 452 | } 453 | } 454 | 455 | /* Remove all stylesheets */ 456 | for (var k=0;k < document.styleSheets.length; k+=1) { 457 | if (document.styleSheets[k].href !== null && document.styleSheets[k].href.lastIndexOf("readability") === -1) { 458 | document.styleSheets[k].disabled = true; 459 | } 460 | } 461 | 462 | /* Remove all style tags in head (not doing this on IE) - TODO: Why not? */ 463 | var styleTags = document.getElementsByTagName("style"); 464 | for (var st=0;st < styleTags.length; st+=1) { 465 | styleTags[st].textContent = ""; 466 | } 467 | 468 | /* Turn all double br's into p's */ 469 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */ 470 | document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '').replace(readability.regexps.replaceFonts, '<$1span>'); 471 | }, 472 | 473 | /** 474 | * For easier reading, convert this document to have footnotes at the bottom rather than inline links. 475 | * @see http://www.roughtype.com/archives/2010/05/experiments_in.php 476 | * 477 | * @return void 478 | **/ 479 | addFootnotes: function(articleContent) { 480 | var footnotesWrapper = document.getElementById('readability-footnotes'), 481 | articleFootnotes = document.getElementById('readability-footnotes-list'); 482 | 483 | if(!footnotesWrapper) { 484 | footnotesWrapper = document.createElement("DIV"); 485 | footnotesWrapper.id = 'readability-footnotes'; 486 | footnotesWrapper.innerHTML = '
tags, etc.
609 | *
610 | * @param Element
611 | * @return void
612 | **/
613 | prepArticle: function (articleContent) {
614 | readability.cleanStyles(articleContent);
615 | readability.killBreaks(articleContent);
616 |
617 | /* Clean out junk from the article content */
618 | readability.cleanConditionally(articleContent, "form");
619 | readability.clean(articleContent, "object");
620 | readability.clean(articleContent, "h1");
621 |
622 | /**
623 | * If there is only one h2, they are probably using it
624 | * as a header and not a subheader, so remove it since we already have a header.
625 | ***/
626 | if(articleContent.getElementsByTagName('h2').length === 1) {
627 | readability.clean(articleContent, "h2");
628 | }
629 | readability.clean(articleContent, "iframe");
630 |
631 | readability.cleanHeaders(articleContent);
632 |
633 | /* Do these last as the previous stuff may have removed junk that will affect these */
634 | readability.cleanConditionally(articleContent, "table");
635 | readability.cleanConditionally(articleContent, "ul");
636 | readability.cleanConditionally(articleContent, "div");
637 |
638 | /* Remove extra paragraphs */
639 | var articleParagraphs = articleContent.getElementsByTagName('p');
640 | for(var i = articleParagraphs.length-1; i >= 0; i-=1) {
641 | var imgCount = articleParagraphs[i].getElementsByTagName('img').length;
642 | var embedCount = articleParagraphs[i].getElementsByTagName('embed').length;
643 | var objectCount = articleParagraphs[i].getElementsByTagName('object').length;
644 |
645 | if(imgCount === 0 && embedCount === 0 && objectCount === 0 && readability.getInnerText(articleParagraphs[i], false) === '') {
646 | articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);
647 | }
648 | }
649 |
650 | try {
651 | articleContent.innerHTML = articleContent.innerHTML.replace(/
]*>\s*
topCandidate.readability.contentScore) { 852 | topCandidate = candidates[c]; } 853 | } 854 | 855 | /** 856 | * If we still have no top candidate, just use the body as a last resort. 857 | * We also have to copy the body node so it is something we can modify. 858 | **/ 859 | if (topCandidate === null || topCandidate.tagName === "BODY") 860 | { 861 | topCandidate = document.createElement("DIV"); 862 | topCandidate.innerHTML = page.innerHTML; 863 | page.innerHTML = ""; 864 | page.appendChild(topCandidate); 865 | readability.initializeNode(topCandidate); 866 | } 867 | 868 | /** 869 | * Now that we have the top candidate, look through its siblings for content that might also be related. 870 | * Things like preambles, content split by ads that we removed, etc. 871 | **/ 872 | var articleContent = document.createElement("DIV"); 873 | if (isPaging) { 874 | articleContent.id = "readability-content"; 875 | } 876 | var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2); 877 | var siblingNodes = topCandidate.parentNode.childNodes; 878 | 879 | 880 | for(var s=0, sl=siblingNodes.length; s < sl; s+=1) { 881 | var siblingNode = siblingNodes[s]; 882 | var append = false; 883 | 884 | /** 885 | * Fix for odd IE7 Crash where siblingNode does not exist even though this should be a live nodeList. 886 | * Example of error visible here: http://www.esquire.com/features/honesty0707 887 | **/ 888 | if(!siblingNode) { 889 | continue; 890 | } 891 | 892 | dbg("Looking at sibling node: " + siblingNode + " (" + siblingNode.className + ":" + siblingNode.id + ")" + ((typeof siblingNode.readability !== 'undefined') ? (" with score " + siblingNode.readability.contentScore) : '')); 893 | dbg("Sibling has score " + (siblingNode.readability ? siblingNode.readability.contentScore : 'Unknown')); 894 | 895 | if(siblingNode === topCandidate) 896 | { 897 | append = true; 898 | } 899 | 900 | var contentBonus = 0; 901 | /* Give a bonus if sibling nodes and top candidates have the example same classname */ 902 | if(siblingNode.className === topCandidate.className && topCandidate.className !== "") { 903 | contentBonus += topCandidate.readability.contentScore * 0.2; 904 | } 905 | 906 | if(typeof siblingNode.readability !== 'undefined' && (siblingNode.readability.contentScore+contentBonus) >= siblingScoreThreshold) 907 | { 908 | append = true; 909 | } 910 | 911 | if(siblingNode.nodeName === "P") { 912 | var linkDensity = readability.getLinkDensity(siblingNode); 913 | var nodeContent = readability.getInnerText(siblingNode); 914 | var nodeLength = nodeContent.length; 915 | 916 | if(nodeLength > 80 && linkDensity < 0.25) 917 | { 918 | append = true; 919 | } 920 | else if(nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1) 921 | { 922 | append = true; 923 | } 924 | } 925 | 926 | if(append) { 927 | dbg("Appending node: " + siblingNode); 928 | 929 | var nodeToAppend = null; 930 | if(siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") { 931 | /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */ 932 | 933 | dbg("Altering siblingNode of " + siblingNode.nodeName + ' to div.'); 934 | nodeToAppend = document.createElement("DIV"); 935 | try { 936 | nodeToAppend.id = siblingNode.id; 937 | nodeToAppend.innerHTML = siblingNode.innerHTML; 938 | } 939 | catch(er) { 940 | dbg("Could not alter siblingNode to div, probably an IE restriction, reverting back to original."); 941 | nodeToAppend = siblingNode; 942 | s-=1; 943 | sl-=1; 944 | } 945 | } else { 946 | nodeToAppend = siblingNode; 947 | s-=1; 948 | sl-=1; 949 | } 950 | 951 | /* To ensure a node does not interfere with readability styles, remove its classnames */ 952 | nodeToAppend.className = ""; 953 | 954 | /* Append sibling and subtract from our list because it removes the node when you append to another node */ 955 | articleContent.appendChild(nodeToAppend); 956 | } 957 | } 958 | 959 | /** 960 | * So we have all of the content that we need. Now we clean it up for presentation. 961 | **/ 962 | readability.prepArticle(articleContent); 963 | 964 | if (readability.curPageNum === 1) { 965 | articleContent.innerHTML = '
§
'; 1405 | 1406 | document.getElementById("readability-content").appendChild(articlePage); 1407 | 1408 | if(readability.curPageNum > readability.maxPages) { 1409 | var nextPageMarkup = ""; 1410 | 1411 | articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup; 1412 | return; 1413 | } 1414 | 1415 | /** 1416 | * Now that we've built the article page DOM element, get the page content 1417 | * asynchronously and load the cleaned content into the div we created for it. 1418 | **/ 1419 | (function(pageUrl, thisPage) { 1420 | readability.ajax(pageUrl, { 1421 | success: function(r) { 1422 | 1423 | /* First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page. */ 1424 | var eTag = r.getResponseHeader('ETag'); 1425 | if(eTag) { 1426 | if(eTag in readability.pageETags) { 1427 | dbg("Exact duplicate page found via ETag. Aborting."); 1428 | articlePage.style.display = 'none'; 1429 | return; 1430 | } else { 1431 | readability.pageETags[eTag] = 1; 1432 | } 1433 | } 1434 | 1435 | // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away. 1436 | var page = document.createElement("DIV"); 1437 | 1438 | /** 1439 | * Do some preprocessing to our HTML to make it ready for appending. 1440 | * • Remove any script tags. Swap and reswap newlines with a unicode character because multiline regex doesn't work in javascript. 1441 | * • Turn any noscript tags into divs so that we can parse them. This allows us to find any next page links hidden via javascript. 1442 | * • Turn all double br's into p's - was handled by prepDocument in the original view. 1443 | * Maybe in the future abstract out prepDocument to work for both the original document and AJAX-added pages. 1444 | **/ 1445 | var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/');
1449 | responseHtml = responseHtml.replace(readability.regexps.replaceFonts, '<$1span>');
1450 |
1451 | page.innerHTML = responseHtml;
1452 |
1453 | /**
1454 | * Reset all flags for the next page, as they will search through it and disable as necessary at the end of grabArticle.
1455 | **/
1456 | readability.flags = 0x1 | 0x2 | 0x4;
1457 |
1458 | var nextPageLink = readability.findNextPageLink(page),
1459 | content = readability.grabArticle(page);
1460 |
1461 | if(!content) {
1462 | dbg("No content found in page to append. Aborting.");
1463 | return;
1464 | }
1465 |
1466 | /**
1467 | * Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
1468 | * Compare it against all of the the previous document's we've gotten. If the previous
1469 | * document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
1470 | **/
1471 | var firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null;
1472 | if(firstP && firstP.innerHTML.length > 100) {
1473 | for(var i=1; i <= readability.curPageNum; i+=1) {
1474 | var rPage = document.getElementById('readability-page-' + i);
1475 | if(rPage && rPage.innerHTML.indexOf(firstP.innerHTML) !== -1) {
1476 | dbg('Duplicate of page ' + i + ' - skipping.');
1477 | articlePage.style.display = 'none';
1478 | readability.parsedPages[pageUrl] = true;
1479 | return;
1480 | }
1481 | }
1482 | }
1483 |
1484 | readability.removeScripts(content);
1485 |
1486 | thisPage.innerHTML = thisPage.innerHTML + content.innerHTML;
1487 |
1488 | /**
1489 | * After the page has rendered, post process the content. This delay is necessary because,
1490 | * in webkit at least, offsetWidth is not set in time to determine image width. We have to
1491 | * wait a little bit for reflow to finish before we can fix floating images.
1492 | **/
1493 | window.setTimeout(
1494 | function() { readability.postProcessContent(thisPage); },
1495 | 500
1496 | );
1497 |
1498 | if(nextPageLink) {
1499 | readability.appendNextPage(nextPageLink);
1500 | }
1501 | }
1502 | });
1503 | }(nextPageLink, articlePage));
1504 | },
1505 |
1506 | /**
1507 | * Get an elements class/id weight. Uses regular expressions to tell if this
1508 | * element looks good or bad.
1509 | *
1510 | * @param Element
1511 | * @return number (Integer)
1512 | **/
1513 | getClassWeight: function (e) {
1514 | if(!readability.flagIsActive(readability.FLAG_WEIGHT_CLASSES)) {
1515 | return 0;
1516 | }
1517 |
1518 | var weight = 0;
1519 |
1520 | /* Look for a special classname */
1521 | if (typeof(e.className) === 'string' && e.className !== '')
1522 | {
1523 | if(e.className.search(readability.regexps.negative) !== -1) {
1524 | weight -= 25; }
1525 |
1526 | if(e.className.search(readability.regexps.positive) !== -1) {
1527 | weight += 25; }
1528 | }
1529 |
1530 | /* Look for a special ID */
1531 | if (typeof(e.id) === 'string' && e.id !== '')
1532 | {
1533 | if(e.id.search(readability.regexps.negative) !== -1) {
1534 | weight -= 25; }
1535 |
1536 | if(e.id.search(readability.regexps.positive) !== -1) {
1537 | weight += 25; }
1538 | }
1539 |
1540 | return weight;
1541 | },
1542 |
1543 | nodeIsVisible: function (node) {
1544 | return (node.offsetWidth !== 0 || node.offsetHeight !== 0) && node.style.display.toLowerCase() !== 'none';
1545 | },
1546 |
1547 | /**
1548 | * Remove extraneous break tags from a node.
1549 | *
1550 | * @param Element
1551 | * @return void
1552 | **/
1553 | killBreaks: function (e) {
1554 | try {
1555 | e.innerHTML = e.innerHTML.replace(readability.regexps.killBreaks,'
');
1556 | }
1557 | catch (eBreaks) {
1558 | dbg("KillBreaks failed - this is an IE bug. Ignoring.: " + eBreaks);
1559 | }
1560 | },
1561 |
1562 | /**
1563 | * Clean a node of all elements of type "tag".
1564 | * (Unless it's a youtube/vimeo video. People love movies.)
1565 | *
1566 | * @param Element
1567 | * @param string tag to clean
1568 | * @return void
1569 | **/
1570 | clean: function (e, tag) {
1571 | var targetList = e.getElementsByTagName( tag );
1572 | var isEmbed = (tag === 'object' || tag === 'embed');
1573 |
1574 | for (var y=targetList.length-1; y >= 0; y-=1) {
1575 | /* Allow youtube and vimeo videos through as people usually want to see those. */
1576 | if(isEmbed) {
1577 | var attributeValues = "";
1578 | for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) {
1579 | attributeValues += targetList[y].attributes[i].value + '|';
1580 | }
1581 |
1582 | /* First, check the elements attributes to see if any of them contain youtube or vimeo */
1583 | if (attributeValues.search(readability.regexps.videos) !== -1) {
1584 | continue;
1585 | }
1586 |
1587 | /* Then check the elements inside this element for the same. */
1588 | if (targetList[y].innerHTML.search(readability.regexps.videos) !== -1) {
1589 | continue;
1590 | }
1591 |
1592 | }
1593 |
1594 | targetList[y].parentNode.removeChild(targetList[y]);
1595 | }
1596 | },
1597 |
1598 | /**
1599 | * Clean an element of all tags of type "tag" if they look fishy.
1600 | * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
1601 | *
1602 | * @return void
1603 | **/
1604 | cleanConditionally: function (e, tag) {
1605 |
1606 | if(!readability.flagIsActive(readability.FLAG_CLEAN_CONDITIONALLY)) {
1607 | return;
1608 | }
1609 |
1610 | var tagsList = e.getElementsByTagName(tag);
1611 | var curTagsLength = tagsList.length;
1612 |
1613 | /**
1614 | * Gather counts for other typical elements embedded within.
1615 | * Traverse backwards so we can remove nodes at the same time without effecting the traversal.
1616 | *
1617 | * TODO: Consider taking into account original contentScore here.
1618 | **/
1619 | for (var i=curTagsLength-1; i >= 0; i-=1) {
1620 | var weight = readability.getClassWeight(tagsList[i]);
1621 | var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0;
1622 |
1623 | dbg("Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")" + ((typeof tagsList[i].readability !== 'undefined') ? (" with score " + tagsList[i].readability.contentScore) : ''));
1624 |
1625 | if(weight+contentScore < 0)
1626 | {
1627 | tagsList[i].parentNode.removeChild(tagsList[i]);
1628 | }
1629 | else if ( readability.getCharCount(tagsList[i],',') < 10) {
1630 | /**
1631 | * If there are not very many commas, and the number of
1632 | * non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
1633 | **/
1634 | var p = tagsList[i].getElementsByTagName("p").length;
1635 | var img = tagsList[i].getElementsByTagName("img").length;
1636 | var li = tagsList[i].getElementsByTagName("li").length-100;
1637 | var input = tagsList[i].getElementsByTagName("input").length;
1638 |
1639 | var embedCount = 0;
1640 | var embeds = tagsList[i].getElementsByTagName("embed");
1641 | for(var ei=0,il=embeds.length; ei < il; ei+=1) {
1642 | if (embeds[ei].src.search(readability.regexps.videos) === -1) {
1643 | embedCount+=1;
1644 | }
1645 | }
1646 |
1647 | var linkDensity = readability.getLinkDensity(tagsList[i]);
1648 | var contentLength = readability.getInnerText(tagsList[i]).length;
1649 | var toRemove = false;
1650 |
1651 | if ( img > p ) {
1652 | toRemove = true;
1653 | } else if(li > p && tag !== "ul" && tag !== "ol") {
1654 | toRemove = true;
1655 | } else if( input > Math.floor(p/3) ) {
1656 | toRemove = true;
1657 | } else if(contentLength < 25 && (img === 0 || img > 2) ) {
1658 | toRemove = true;
1659 | } else if(weight < 25 && linkDensity > 0.2) {
1660 | toRemove = true;
1661 | } else if(weight >= 25 && linkDensity > 0.5) {
1662 | toRemove = true;
1663 | } else if((embedCount === 1 && contentLength < 75) || embedCount > 1) {
1664 | toRemove = true;
1665 | }
1666 |
1667 | if(toRemove) {
1668 | tagsList[i].parentNode.removeChild(tagsList[i]);
1669 | }
1670 | }
1671 | }
1672 | },
1673 |
1674 | /**
1675 | * Clean out spurious headers from an Element. Checks things like classnames and link density.
1676 | *
1677 | * @param Element
1678 | * @return void
1679 | **/
1680 | cleanHeaders: function (e) {
1681 | for (var headerIndex = 1; headerIndex < 3; headerIndex+=1) {
1682 | var headers = e.getElementsByTagName('h' + headerIndex);
1683 | for (var i=headers.length-1; i >=0; i-=1) {
1684 | if (readability.getClassWeight(headers[i]) < 0 || readability.getLinkDensity(headers[i]) > 0.33) {
1685 | headers[i].parentNode.removeChild(headers[i]);
1686 | }
1687 | }
1688 | }
1689 | },
1690 |
1691 | /*** Smooth scrolling logic ***/
1692 |
1693 | /**
1694 | * easeInOut animation algorithm - returns an integer that says how far to move at this point in the animation.
1695 | * Borrowed from jQuery's easing library.
1696 | * @return integer
1697 | **/
1698 | easeInOut: function(start,end,totalSteps,actualStep) {
1699 | var delta = end - start;
1700 |
1701 | if ((actualStep/=totalSteps/2) < 1) {
1702 | return delta/2*actualStep*actualStep + start;
1703 | }
1704 | actualStep -=1;
1705 | return -delta/2 * ((actualStep)*(actualStep-2) - 1) + start;
1706 | },
1707 |
1708 | /**
1709 | * Helper function to, in a cross compatible way, get or set the current scroll offset of the document.
1710 | * @return mixed integer on get, the result of window.scrollTo on set
1711 | **/
1712 | scrollTop: function(scroll){
1713 | var setScroll = typeof scroll !== 'undefined';
1714 |
1715 | if(setScroll) {
1716 | return window.scrollTo(0, scroll);
1717 | }
1718 | if(typeof window.pageYOffset !== 'undefined') {
1719 | return window.pageYOffset;
1720 | }
1721 | else if(document.documentElement.clientHeight) {
1722 | return document.documentElement.scrollTop;
1723 | }
1724 | else {
1725 | return document.body.scrollTop;
1726 | }
1727 | },
1728 |
1729 | /**
1730 | * scrollTo - Smooth scroll to the point of scrollEnd in the document.
1731 | * @return void
1732 | **/
1733 | curScrollStep: 0,
1734 | scrollTo: function (scrollStart, scrollEnd, steps, interval) {
1735 | if(
1736 | (scrollStart < scrollEnd && readability.scrollTop() < scrollEnd) ||
1737 | (scrollStart > scrollEnd && readability.scrollTop() > scrollEnd)
1738 | ) {
1739 | readability.curScrollStep+=1;
1740 | if(readability.curScrollStep > steps) {
1741 | return;
1742 | }
1743 |
1744 | var oldScrollTop = readability.scrollTop();
1745 |
1746 | readability.scrollTop(readability.easeInOut(scrollStart, scrollEnd, steps, readability.curScrollStep));
1747 |
1748 | // We're at the end of the window.
1749 | if(oldScrollTop === readability.scrollTop()) {
1750 | return;
1751 | }
1752 |
1753 | window.setTimeout(function() {
1754 | readability.scrollTo(scrollStart, scrollEnd, steps, interval);
1755 | }, interval);
1756 | }
1757 | },
1758 |
1759 |
1760 | /**
1761 | * Show the email popup.
1762 | *
1763 | * @return void
1764 | **/
1765 | emailBox: function () {
1766 | var emailContainerExists = document.getElementById('email-container');
1767 | if(null !== emailContainerExists)
1768 | {
1769 | return;
1770 | }
1771 |
1772 | var emailContainer = document.createElement("DIV");
1773 | emailContainer.setAttribute('id', 'email-container');
1774 | emailContainer.innerHTML = '';
1775 |
1776 | document.body.appendChild(emailContainer);
1777 | },
1778 |
1779 | /**
1780 | * Close the email popup. This is a hacktackular way to check if we're in a "close loop".
1781 | * Since we don't have crossdomain access to the frame, we can only know when it has
1782 | * loaded again. If it's loaded over 3 times, we know to close the frame.
1783 | *
1784 | * @return void
1785 | **/
1786 | removeFrame: function () {
1787 | readability.iframeLoads+=1;
1788 | if (readability.iframeLoads > 3)
1789 | {
1790 | var emailContainer = document.getElementById('email-container');
1791 | if (null !== emailContainer) {
1792 | emailContainer.parentNode.removeChild(emailContainer);
1793 | }
1794 |
1795 | readability.iframeLoads = 0;
1796 | }
1797 | },
1798 |
1799 | htmlspecialchars: function (s) {
1800 | if (typeof(s) === "string") {
1801 | s = s.replace(/&/g, "&");
1802 | s = s.replace(/"/g, """);
1803 | s = s.replace(/'/g, "'");
1804 | s = s.replace(//g, ">");
1806 | }
1807 |
1808 | return s;
1809 | },
1810 |
1811 | flagIsActive: function(flag) {
1812 | return (readability.flags & flag) > 0;
1813 | },
1814 |
1815 | addFlag: function(flag) {
1816 | readability.flags = readability.flags | flag;
1817 | },
1818 |
1819 | removeFlag: function(flag) {
1820 | readability.flags = readability.flags & ~flag;
1821 | }
1822 |
1823 | };
1824 |
1825 | readability.init();
--------------------------------------------------------------------------------
/lib/underscore.js:
--------------------------------------------------------------------------------
1 | // Underscore.js 1.1.3
2 | // (c) 2010 Jeremy Ashkenas, DocumentCloud Inc.
3 | // Underscore is freely distributable under the MIT license.
4 | // Portions of Underscore are inspired or borrowed from Prototype,
5 | // Oliver Steele's Functional, and John Resig's Micro-Templating.
6 | // For all details and documentation:
7 | // http://documentcloud.github.com/underscore
8 |
9 | (function() {
10 |
11 | // Baseline setup
12 | // --------------
13 |
14 | // Establish the root object, `window` in the browser, or `global` on the server.
15 | var root = this;
16 |
17 | // Save the previous value of the `_` variable.
18 | var previousUnderscore = root._;
19 |
20 | // Establish the object that gets returned to break out of a loop iteration.
21 | var breaker = {};
22 |
23 | // Save bytes in the minified (but not gzipped) version:
24 | var ArrayProto = Array.prototype, ObjProto = Object.prototype;
25 |
26 | // Create quick reference variables for speed access to core prototypes.
27 | var slice = ArrayProto.slice,
28 | unshift = ArrayProto.unshift,
29 | toString = ObjProto.toString,
30 | hasOwnProperty = ObjProto.hasOwnProperty;
31 |
32 | // All **ECMAScript 5** native function implementations that we hope to use
33 | // are declared here.
34 | var
35 | nativeForEach = ArrayProto.forEach,
36 | nativeMap = ArrayProto.map,
37 | nativeReduce = ArrayProto.reduce,
38 | nativeReduceRight = ArrayProto.reduceRight,
39 | nativeFilter = ArrayProto.filter,
40 | nativeEvery = ArrayProto.every,
41 | nativeSome = ArrayProto.some,
42 | nativeIndexOf = ArrayProto.indexOf,
43 | nativeLastIndexOf = ArrayProto.lastIndexOf,
44 | nativeIsArray = Array.isArray,
45 | nativeKeys = Object.keys;
46 |
47 | // Create a safe reference to the Underscore object for use below.
48 | var _ = function(obj) { return new wrapper(obj); };
49 |
50 | // Export the Underscore object for **CommonJS**, with backwards-compatibility
51 | // for the old `require()` API. If we're not in CommonJS, add `_` to the
52 | // global object.
53 | if (typeof module !== 'undefined' && module.exports) {
54 | module.exports = _;
55 | _._ = _;
56 | } else {
57 | root._ = _;
58 | }
59 |
60 | // Current version.
61 | _.VERSION = '1.1.3';
62 |
63 | // Collection Functions
64 | // --------------------
65 |
66 | // The cornerstone, an `each` implementation, aka `forEach`.
67 | // Handles objects implementing `forEach`, arrays, and raw objects.
68 | // Delegates to **ECMAScript 5**'s native `forEach` if available.
69 | var each = _.each = _.forEach = function(obj, iterator, context) {
70 | var value;
71 | if (nativeForEach && obj.forEach === nativeForEach) {
72 | obj.forEach(iterator, context);
73 | } else if (_.isNumber(obj.length)) {
74 | for (var i = 0, l = obj.length; i < l; i++) {
75 | if (iterator.call(context, obj[i], i, obj) === breaker) return;
76 | }
77 | } else {
78 | for (var key in obj) {
79 | if (hasOwnProperty.call(obj, key)) {
80 | if (iterator.call(context, obj[key], key, obj) === breaker) return;
81 | }
82 | }
83 | }
84 | };
85 |
86 | // Return the results of applying the iterator to each element.
87 | // Delegates to **ECMAScript 5**'s native `map` if available.
88 | _.map = function(obj, iterator, context) {
89 | if (nativeMap && obj.map === nativeMap) return obj.map(iterator, context);
90 | var results = [];
91 | each(obj, function(value, index, list) {
92 | results[results.length] = iterator.call(context, value, index, list);
93 | });
94 | return results;
95 | };
96 |
97 | // **Reduce** builds up a single result from a list of values, aka `inject`,
98 | // or `foldl`. Delegates to **ECMAScript 5**'s native `reduce` if available.
99 | _.reduce = _.foldl = _.inject = function(obj, iterator, memo, context) {
100 | var initial = memo !== void 0;
101 | if (nativeReduce && obj.reduce === nativeReduce) {
102 | if (context) iterator = _.bind(iterator, context);
103 | return initial ? obj.reduce(iterator, memo) : obj.reduce(iterator);
104 | }
105 | each(obj, function(value, index, list) {
106 | if (!initial && index === 0) {
107 | memo = value;
108 | } else {
109 | memo = iterator.call(context, memo, value, index, list);
110 | }
111 | });
112 | return memo;
113 | };
114 |
115 | // The right-associative version of reduce, also known as `foldr`.
116 | // Delegates to **ECMAScript 5**'s native `reduceRight` if available.
117 | _.reduceRight = _.foldr = function(obj, iterator, memo, context) {
118 | if (nativeReduceRight && obj.reduceRight === nativeReduceRight) {
119 | if (context) iterator = _.bind(iterator, context);
120 | return memo !== void 0 ? obj.reduceRight(iterator, memo) : obj.reduceRight(iterator);
121 | }
122 | var reversed = (_.isArray(obj) ? obj.slice() : _.toArray(obj)).reverse();
123 | return _.reduce(reversed, iterator, memo, context);
124 | };
125 |
126 | // Return the first value which passes a truth test. Aliased as `detect`.
127 | _.find = _.detect = function(obj, iterator, context) {
128 | var result;
129 | any(obj, function(value, index, list) {
130 | if (iterator.call(context, value, index, list)) {
131 | result = value;
132 | return true;
133 | }
134 | });
135 | return result;
136 | };
137 |
138 | // Return all the elements that pass a truth test.
139 | // Delegates to **ECMAScript 5**'s native `filter` if available.
140 | // Aliased as `select`.
141 | _.filter = _.select = function(obj, iterator, context) {
142 | if (nativeFilter && obj.filter === nativeFilter) return obj.filter(iterator, context);
143 | var results = [];
144 | each(obj, function(value, index, list) {
145 | if (iterator.call(context, value, index, list)) results[results.length] = value;
146 | });
147 | return results;
148 | };
149 |
150 | // Return all the elements for which a truth test fails.
151 | _.reject = function(obj, iterator, context) {
152 | var results = [];
153 | each(obj, function(value, index, list) {
154 | if (!iterator.call(context, value, index, list)) results[results.length] = value;
155 | });
156 | return results;
157 | };
158 |
159 | // Determine whether all of the elements match a truth test.
160 | // Delegates to **ECMAScript 5**'s native `every` if available.
161 | // Aliased as `all`.
162 | _.every = _.all = function(obj, iterator, context) {
163 | iterator = iterator || _.identity;
164 | if (nativeEvery && obj.every === nativeEvery) return obj.every(iterator, context);
165 | var result = true;
166 | each(obj, function(value, index, list) {
167 | if (!(result = result && iterator.call(context, value, index, list))) return breaker;
168 | });
169 | return result;
170 | };
171 |
172 | // Determine if at least one element in the object matches a truth test.
173 | // Delegates to **ECMAScript 5**'s native `some` if available.
174 | // Aliased as `any`.
175 | var any = _.some = _.any = function(obj, iterator, context) {
176 | iterator = iterator || _.identity;
177 | if (nativeSome && obj.some === nativeSome) return obj.some(iterator, context);
178 | var result = false;
179 | each(obj, function(value, index, list) {
180 | if (result = iterator.call(context, value, index, list)) return breaker;
181 | });
182 | return result;
183 | };
184 |
185 | // Determine if a given value is included in the array or object using `===`.
186 | // Aliased as `contains`.
187 | _.include = _.contains = function(obj, target) {
188 | if (nativeIndexOf && obj.indexOf === nativeIndexOf) return obj.indexOf(target) != -1;
189 | var found = false;
190 | any(obj, function(value) {
191 | if (found = value === target) return true;
192 | });
193 | return found;
194 | };
195 |
196 | // Invoke a method (with arguments) on every item in a collection.
197 | _.invoke = function(obj, method) {
198 | var args = slice.call(arguments, 2);
199 | return _.map(obj, function(value) {
200 | return (method ? value[method] : value).apply(value, args);
201 | });
202 | };
203 |
204 | // Convenience version of a common use case of `map`: fetching a property.
205 | _.pluck = function(obj, key) {
206 | return _.map(obj, function(value){ return value[key]; });
207 | };
208 |
209 | // Return the maximum element or (element-based computation).
210 | _.max = function(obj, iterator, context) {
211 | if (!iterator && _.isArray(obj)) return Math.max.apply(Math, obj);
212 | var result = {computed : -Infinity};
213 | each(obj, function(value, index, list) {
214 | var computed = iterator ? iterator.call(context, value, index, list) : value;
215 | computed >= result.computed && (result = {value : value, computed : computed});
216 | });
217 | return result.value;
218 | };
219 |
220 | // Return the minimum element (or element-based computation).
221 | _.min = function(obj, iterator, context) {
222 | if (!iterator && _.isArray(obj)) return Math.min.apply(Math, obj);
223 | var result = {computed : Infinity};
224 | each(obj, function(value, index, list) {
225 | var computed = iterator ? iterator.call(context, value, index, list) : value;
226 | computed < result.computed && (result = {value : value, computed : computed});
227 | });
228 | return result.value;
229 | };
230 |
231 | // Sort the object's values by a criterion produced by an iterator.
232 | _.sortBy = function(obj, iterator, context) {
233 | return _.pluck(_.map(obj, function(value, index, list) {
234 | return {
235 | value : value,
236 | criteria : iterator.call(context, value, index, list)
237 | };
238 | }).sort(function(left, right) {
239 | var a = left.criteria, b = right.criteria;
240 | return a < b ? -1 : a > b ? 1 : 0;
241 | }), 'value');
242 | };
243 |
244 | // Use a comparator function to figure out at what index an object should
245 | // be inserted so as to maintain order. Uses binary search.
246 | _.sortedIndex = function(array, obj, iterator) {
247 | iterator = iterator || _.identity;
248 | var low = 0, high = array.length;
249 | while (low < high) {
250 | var mid = (low + high) >> 1;
251 | iterator(array[mid]) < iterator(obj) ? low = mid + 1 : high = mid;
252 | }
253 | return low;
254 | };
255 |
256 | // Safely convert anything iterable into a real, live array.
257 | _.toArray = function(iterable) {
258 | if (!iterable) return [];
259 | if (iterable.toArray) return iterable.toArray();
260 | if (_.isArray(iterable)) return iterable;
261 | if (_.isArguments(iterable)) return slice.call(iterable);
262 | return _.values(iterable);
263 | };
264 |
265 | // Return the number of elements in an object.
266 | _.size = function(obj) {
267 | return _.toArray(obj).length;
268 | };
269 |
270 | // Array Functions
271 | // ---------------
272 |
273 | // Get the first element of an array. Passing **n** will return the first N
274 | // values in the array. Aliased as `head`. The **guard** check allows it to work
275 | // with `_.map`.
276 | _.first = _.head = function(array, n, guard) {
277 | return n && !guard ? slice.call(array, 0, n) : array[0];
278 | };
279 |
280 | // Returns everything but the first entry of the array. Aliased as `tail`.
281 | // Especially useful on the arguments object. Passing an **index** will return
282 | // the rest of the values in the array from that index onward. The **guard**
283 | // check allows it to work with `_.map`.
284 | _.rest = _.tail = function(array, index, guard) {
285 | return slice.call(array, _.isUndefined(index) || guard ? 1 : index);
286 | };
287 |
288 | // Get the last element of an array.
289 | _.last = function(array) {
290 | return array[array.length - 1];
291 | };
292 |
293 | // Trim out all falsy values from an array.
294 | _.compact = function(array) {
295 | return _.filter(array, function(value){ return !!value; });
296 | };
297 |
298 | // Return a completely flattened version of an array.
299 | _.flatten = function(array) {
300 | return _.reduce(array, function(memo, value) {
301 | if (_.isArray(value)) return memo.concat(_.flatten(value));
302 | memo[memo.length] = value;
303 | return memo;
304 | }, []);
305 | };
306 |
307 | // Return a version of the array that does not contain the specified value(s).
308 | _.without = function(array) {
309 | var values = slice.call(arguments, 1);
310 | return _.filter(array, function(value){ return !_.include(values, value); });
311 | };
312 |
313 | // Produce a duplicate-free version of the array. If the array has already
314 | // been sorted, you have the option of using a faster algorithm.
315 | // Aliased as `unique`.
316 | _.uniq = _.unique = function(array, isSorted) {
317 | return _.reduce(array, function(memo, el, i) {
318 | if (0 == i || (isSorted === true ? _.last(memo) != el : !_.include(memo, el))) memo[memo.length] = el;
319 | return memo;
320 | }, []);
321 | };
322 |
323 | // Produce an array that contains every item shared between all the
324 | // passed-in arrays.
325 | _.intersect = function(array) {
326 | var rest = slice.call(arguments, 1);
327 | return _.filter(_.uniq(array), function(item) {
328 | return _.every(rest, function(other) {
329 | return _.indexOf(other, item) >= 0;
330 | });
331 | });
332 | };
333 |
334 | // Zip together multiple lists into a single array -- elements that share
335 | // an index go together.
336 | _.zip = function() {
337 | var args = slice.call(arguments);
338 | var length = _.max(_.pluck(args, 'length'));
339 | var results = new Array(length);
340 | for (var i = 0; i < length; i++) results[i] = _.pluck(args, "" + i);
341 | return results;
342 | };
343 |
344 | // If the browser doesn't supply us with indexOf (I'm looking at you, **MSIE**),
345 | // we need this function. Return the position of the first occurrence of an
346 | // item in an array, or -1 if the item is not included in the array.
347 | // Delegates to **ECMAScript 5**'s native `indexOf` if available.
348 | _.indexOf = function(array, item) {
349 | if (nativeIndexOf && array.indexOf === nativeIndexOf) return array.indexOf(item);
350 | for (var i = 0, l = array.length; i < l; i++) if (array[i] === item) return i;
351 | return -1;
352 | };
353 |
354 |
355 | // Delegates to **ECMAScript 5**'s native `lastIndexOf` if available.
356 | _.lastIndexOf = function(array, item) {
357 | if (nativeLastIndexOf && array.lastIndexOf === nativeLastIndexOf) return array.lastIndexOf(item);
358 | var i = array.length;
359 | while (i--) if (array[i] === item) return i;
360 | return -1;
361 | };
362 |
363 | // Generate an integer Array containing an arithmetic progression. A port of
364 | // the native Python `range()` function. See
365 | // [the Python documentation](http://docs.python.org/library/functions.html#range).
366 | _.range = function(start, stop, step) {
367 | var args = slice.call(arguments),
368 | solo = args.length <= 1,
369 | start = solo ? 0 : args[0],
370 | stop = solo ? args[0] : args[1],
371 | step = args[2] || 1,
372 | len = Math.max(Math.ceil((stop - start) / step), 0),
373 | idx = 0,
374 | range = new Array(len);
375 | while (idx < len) {
376 | range[idx++] = start;
377 | start += step;
378 | }
379 | return range;
380 | };
381 |
382 | // Function (ahem) Functions
383 | // ------------------
384 |
385 | // Create a function bound to a given object (assigning `this`, and arguments,
386 | // optionally). Binding with arguments is also known as `curry`.
387 | _.bind = function(func, obj) {
388 | var args = slice.call(arguments, 2);
389 | return function() {
390 | return func.apply(obj || {}, args.concat(slice.call(arguments)));
391 | };
392 | };
393 |
394 | // Bind all of an object's methods to that object. Useful for ensuring that
395 | // all callbacks defined on an object belong to it.
396 | _.bindAll = function(obj) {
397 | var funcs = slice.call(arguments, 1);
398 | if (funcs.length == 0) funcs = _.functions(obj);
399 | each(funcs, function(f) { obj[f] = _.bind(obj[f], obj); });
400 | return obj;
401 | };
402 |
403 | // Memoize an expensive function by storing its results.
404 | _.memoize = function(func, hasher) {
405 | var memo = {};
406 | hasher = hasher || _.identity;
407 | return function() {
408 | var key = hasher.apply(this, arguments);
409 | return key in memo ? memo[key] : (memo[key] = func.apply(this, arguments));
410 | };
411 | };
412 |
413 | // Delays a function for the given number of milliseconds, and then calls
414 | // it with the arguments supplied.
415 | _.delay = function(func, wait) {
416 | var args = slice.call(arguments, 2);
417 | return setTimeout(function(){ return func.apply(func, args); }, wait);
418 | };
419 |
420 | // Defers a function, scheduling it to run after the current call stack has
421 | // cleared.
422 | _.defer = function(func) {
423 | return _.delay.apply(_, [func, 1].concat(slice.call(arguments, 1)));
424 | };
425 |
426 | // Internal function used to implement `_.throttle` and `_.debounce`.
427 | var limit = function(func, wait, debounce) {
428 | var timeout;
429 | return function() {
430 | var context = this, args = arguments;
431 | var throttler = function() {
432 | timeout = null;
433 | func.apply(context, args);
434 | };
435 | if (debounce) clearTimeout(timeout);
436 | if (debounce || !timeout) timeout = setTimeout(throttler, wait);
437 | };
438 | };
439 |
440 | // Returns a function, that, when invoked, will only be triggered at most once
441 | // during a given window of time.
442 | _.throttle = function(func, wait) {
443 | return limit(func, wait, false);
444 | };
445 |
446 | // Returns a function, that, as long as it continues to be invoked, will not
447 | // be triggered. The function will be called after it stops being called for
448 | // N milliseconds.
449 | _.debounce = function(func, wait) {
450 | return limit(func, wait, true);
451 | };
452 |
453 | // Returns the first function passed as an argument to the second,
454 | // allowing you to adjust arguments, run code before and after, and
455 | // conditionally execute the original function.
456 | _.wrap = function(func, wrapper) {
457 | return function() {
458 | var args = [func].concat(slice.call(arguments));
459 | return wrapper.apply(wrapper, args);
460 | };
461 | };
462 |
463 | // Returns a function that is the composition of a list of functions, each
464 | // consuming the return value of the function that follows.
465 | _.compose = function() {
466 | var funcs = slice.call(arguments);
467 | return function() {
468 | var args = slice.call(arguments);
469 | for (var i=funcs.length-1; i >= 0; i--) {
470 | args = [funcs[i].apply(this, args)];
471 | }
472 | return args[0];
473 | };
474 | };
475 |
476 | // Object Functions
477 | // ----------------
478 |
479 | // Retrieve the names of an object's properties.
480 | // Delegates to **ECMAScript 5**'s native `Object.keys`
481 | _.keys = nativeKeys || function(obj) {
482 | if (_.isArray(obj)) return _.range(0, obj.length);
483 | var keys = [];
484 | for (var key in obj) if (hasOwnProperty.call(obj, key)) keys[keys.length] = key;
485 | return keys;
486 | };
487 |
488 | // Retrieve the values of an object's properties.
489 | _.values = function(obj) {
490 | return _.map(obj, _.identity);
491 | };
492 |
493 | // Return a sorted list of the function names available on the object.
494 | // Aliased as `methods`
495 | _.functions = _.methods = function(obj) {
496 | return _.filter(_.keys(obj), function(key){ return _.isFunction(obj[key]); }).sort();
497 | };
498 |
499 | // Extend a given object with all the properties in passed-in object(s).
500 | _.extend = function(obj) {
501 | each(slice.call(arguments, 1), function(source) {
502 | for (var prop in source) obj[prop] = source[prop];
503 | });
504 | return obj;
505 | };
506 |
507 | // Create a (shallow-cloned) duplicate of an object.
508 | _.clone = function(obj) {
509 | return _.isArray(obj) ? obj.slice() : _.extend({}, obj);
510 | };
511 |
512 | // Invokes interceptor with the obj, and then returns obj.
513 | // The primary purpose of this method is to "tap into" a method chain, in
514 | // order to perform operations on intermediate results within the chain.
515 | _.tap = function(obj, interceptor) {
516 | interceptor(obj);
517 | return obj;
518 | };
519 |
520 | // Perform a deep comparison to check if two objects are equal.
521 | _.isEqual = function(a, b) {
522 | // Check object identity.
523 | if (a === b) return true;
524 | // Different types?
525 | var atype = typeof(a), btype = typeof(b);
526 | if (atype != btype) return false;
527 | // Basic equality test (watch out for coercions).
528 | if (a == b) return true;
529 | // One is falsy and the other truthy.
530 | if ((!a && b) || (a && !b)) return false;
531 | // One of them implements an isEqual()?
532 | if (a.isEqual) return a.isEqual(b);
533 | // Check dates' integer values.
534 | if (_.isDate(a) && _.isDate(b)) return a.getTime() === b.getTime();
535 | // Both are NaN?
536 | if (_.isNaN(a) && _.isNaN(b)) return false;
537 | // Compare regular expressions.
538 | if (_.isRegExp(a) && _.isRegExp(b))
539 | return a.source === b.source &&
540 | a.global === b.global &&
541 | a.ignoreCase === b.ignoreCase &&
542 | a.multiline === b.multiline;
543 | // If a is not an object by this point, we can't handle it.
544 | if (atype !== 'object') return false;
545 | // Check for different array lengths before comparing contents.
546 | if (a.length && (a.length !== b.length)) return false;
547 | // Nothing else worked, deep compare the contents.
548 | var aKeys = _.keys(a), bKeys = _.keys(b);
549 | // Different object sizes?
550 | if (aKeys.length != bKeys.length) return false;
551 | // Recursive comparison of contents.
552 | for (var key in a) if (!(key in b) || !_.isEqual(a[key], b[key])) return false;
553 | return true;
554 | };
555 |
556 | // Is a given array or object empty?
557 | _.isEmpty = function(obj) {
558 | if (_.isArray(obj) || _.isString(obj)) return obj.length === 0;
559 | for (var key in obj) if (hasOwnProperty.call(obj, key)) return false;
560 | return true;
561 | };
562 |
563 | // Is a given value a DOM element?
564 | _.isElement = function(obj) {
565 | return !!(obj && obj.nodeType == 1);
566 | };
567 |
568 | // Is a given value an array?
569 | // Delegates to ECMA5's native Array.isArray
570 | _.isArray = nativeIsArray || function(obj) {
571 | return !!(obj && obj.concat && obj.unshift && !obj.callee);
572 | };
573 |
574 | // Is a given variable an arguments object?
575 | _.isArguments = function(obj) {
576 | return !!(obj && obj.callee);
577 | };
578 |
579 | // Is a given value a function?
580 | _.isFunction = function(obj) {
581 | return !!(obj && obj.constructor && obj.call && obj.apply);
582 | };
583 |
584 | // Is a given value a string?
585 | _.isString = function(obj) {
586 | return !!(obj === '' || (obj && obj.charCodeAt && obj.substr));
587 | };
588 |
589 | // Is a given value a number?
590 | _.isNumber = function(obj) {
591 | return !!(obj === 0 || (obj && obj.toExponential && obj.toFixed));
592 | };
593 |
594 | // Is the given value NaN -- this one is interesting. NaN != NaN, and
595 | // isNaN(undefined) == true, so we make sure it's a number first.
596 | _.isNaN = function(obj) {
597 | return toString.call(obj) === '[object Number]' && isNaN(obj);
598 | };
599 |
600 | // Is a given value a boolean?
601 | _.isBoolean = function(obj) {
602 | return obj === true || obj === false;
603 | };
604 |
605 | // Is a given value a date?
606 | _.isDate = function(obj) {
607 | return !!(obj && obj.getTimezoneOffset && obj.setUTCFullYear);
608 | };
609 |
610 | // Is the given value a regular expression?
611 | _.isRegExp = function(obj) {
612 | return !!(obj && obj.test && obj.exec && (obj.ignoreCase || obj.ignoreCase === false));
613 | };
614 |
615 | // Is a given value equal to null?
616 | _.isNull = function(obj) {
617 | return obj === null;
618 | };
619 |
620 | // Is a given variable undefined?
621 | _.isUndefined = function(obj) {
622 | return obj === void 0;
623 | };
624 |
625 | // Utility Functions
626 | // -----------------
627 |
628 | // Run Underscore.js in *noConflict* mode, returning the `_` variable to its
629 | // previous owner. Returns a reference to the Underscore object.
630 | _.noConflict = function() {
631 | root._ = previousUnderscore;
632 | return this;
633 | };
634 |
635 | // Keep the identity function around for default iterators.
636 | _.identity = function(value) {
637 | return value;
638 | };
639 |
640 | // Run a function **n** times.
641 | _.times = function (n, iterator, context) {
642 | for (var i = 0; i < n; i++) iterator.call(context, i);
643 | };
644 |
645 | // Add your own custom functions to the Underscore object, ensuring that
646 | // they're correctly added to the OOP wrapper as well.
647 | _.mixin = function(obj) {
648 | each(_.functions(obj), function(name){
649 | addToWrapper(name, _[name] = obj[name]);
650 | });
651 | };
652 |
653 | // Generate a unique integer id (unique within the entire client session).
654 | // Useful for temporary DOM ids.
655 | var idCounter = 0;
656 | _.uniqueId = function(prefix) {
657 | var id = idCounter++;
658 | return prefix ? prefix + id : id;
659 | };
660 |
661 | // By default, Underscore uses ERB-style template delimiters, change the
662 | // following template settings to use alternative delimiters.
663 | _.templateSettings = {
664 | evaluate : /<%([\s\S]+?)%>/g,
665 | interpolate : /<%=([\s\S]+?)%>/g
666 | };
667 |
668 | // JavaScript micro-templating, similar to John Resig's implementation.
669 | // Underscore templating handles arbitrary delimiters, preserves whitespace,
670 | // and correctly escapes quotes within interpolated code.
671 | _.template = function(str, data) {
672 | var c = _.templateSettings;
673 | var tmpl = 'var __p=[],print=function(){__p.push.apply(__p,arguments);};' +
674 | 'with(obj||{}){__p.push(\'' +
675 | str.replace(/\\/g, '\\\\')
676 | .replace(/'/g, "\\'")
677 | .replace(c.interpolate, function(match, code) {
678 | return "'," + code.replace(/\\'/g, "'") + ",'";
679 | })
680 | .replace(c.evaluate || null, function(match, code) {
681 | return "');" + code.replace(/\\'/g, "'")
682 | .replace(/[\r\n\t]/g, ' ') + "__p.push('";
683 | })
684 | .replace(/\r/g, '\\r')
685 | .replace(/\n/g, '\\n')
686 | .replace(/\t/g, '\\t')
687 | + "');}return __p.join('');";
688 | var func = new Function('obj', tmpl);
689 | return data ? func(data) : func;
690 | };
691 |
692 | // The OOP Wrapper
693 | // ---------------
694 |
695 | // If Underscore is called as a function, it returns a wrapped object that
696 | // can be used OO-style. This wrapper holds altered versions of all the
697 | // underscore functions. Wrapped objects may be chained.
698 | var wrapper = function(obj) { this._wrapped = obj; };
699 |
700 | // Expose `wrapper.prototype` as `_.prototype`
701 | _.prototype = wrapper.prototype;
702 |
703 | // Helper function to continue chaining intermediate results.
704 | var result = function(obj, chain) {
705 | return chain ? _(obj).chain() : obj;
706 | };
707 |
708 | // A method to easily add functions to the OOP wrapper.
709 | var addToWrapper = function(name, func) {
710 | wrapper.prototype[name] = function() {
711 | var args = slice.call(arguments);
712 | unshift.call(args, this._wrapped);
713 | return result(func.apply(_, args), this._chain);
714 | };
715 | };
716 |
717 | // Add all of the Underscore functions to the wrapper object.
718 | _.mixin(_);
719 |
720 | // Add all mutator Array functions to the wrapper.
721 | each(['pop', 'push', 'reverse', 'shift', 'sort', 'splice', 'unshift'], function(name) {
722 | var method = ArrayProto[name];
723 | wrapper.prototype[name] = function() {
724 | method.apply(this._wrapped, arguments);
725 | return result(this._wrapped, this._chain);
726 | };
727 | });
728 |
729 | // Add all accessor Array functions to the wrapper.
730 | each(['concat', 'join', 'slice'], function(name) {
731 | var method = ArrayProto[name];
732 | wrapper.prototype[name] = function() {
733 | return result(method.apply(this._wrapped, arguments), this._chain);
734 | };
735 | });
736 |
737 | // Start chaining a wrapped Underscore object.
738 | wrapper.prototype.chain = function() {
739 | this._chain = true;
740 | return this;
741 | };
742 |
743 | // Extracts the result from a wrapped and chained object.
744 | wrapper.prototype.value = function() {
745 | return this._wrapped;
746 | };
747 |
748 | })();
749 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright 2010 Paul Kinlan.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | from google.appengine.ext import webapp
19 | from google.appengine.ext.webapp import util
20 | from google.appengine.api import urlfetch
21 |
22 | import re
23 | import logging
24 | from urlparse import urlparse
25 | from django.utils import simplejson
26 |
27 |
28 | title = "