├── .gitignore
├── package.sh
├── misc
    ├── readability-module.js
    ├── readability-feedflock.js
    └── readability-ori.js
├── test.txt
├── LICENSE.txt
├── test
    ├── bugs
    │   ├── htmlparser.js
    │   └── jsdom-bug.js
    ├── clean-file.js
    ├── grab-pages.rb
    ├── clean-proxy.js
    ├── weird-pages
    │   └── w3c-css-no-closing-head.html
    └── nytime.html
├── README.md
├── package.json
├── notes.txt
└── lib
    └── sprintf.js


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | log/*.log
3 | dist/*
4 | *.tmproj
5 | 
6 | 


--------------------------------------------------------------------------------
/package.sh:
--------------------------------------------------------------------------------
1 | NAME=node-readability
2 | tar -zcf ./dist/readability.tgz -C .. --exclude=".*" --exclude="test*" $NAME/lib $NAME/LICENSE.txt $NAME/README.md $NAME/package.json


--------------------------------------------------------------------------------
/misc/readability-module.js:
--------------------------------------------------------------------------------
 1 | exports.parse = parse;
 2 | var jsdom = require('jsdom');
 3 | var rdom = require('./readability-my2.js');
 4 | var util = require('util');
 5 | 
 6 | function parse(html, url, callback) {
 7 |     //util.debug(html);
 8 |     var doc = jsdom.jsdom(html, null, {url: url});
 9 |     util.log('---DOM created');
10 |     var win = doc.parentWindow;
11 |     if (!doc.body) {
12 |         console.log('empty body');
13 |         return callback({title: '', content: ''});
14 |     }
15 |     
16 |     rdom.start(win, function(html) {
17 |         //console.log(html);
18 |         callback({title: document.title, content: html});
19 |     });
20 | }


--------------------------------------------------------------------------------
/test.txt:
--------------------------------------------------------------------------------
 1 | http://127.0.0.1:3000/?url=http%3A%2F%2Fwww.bbc.co.uk%2Fukchina%2Fsimp%2Fentertainment%2F2010%2F11%2F101103_ent_harrypotter.shtml
 2 | 
 3 | http://127.0.0.1:3000/?url=http://en.wikipedia.org/wiki/Ruby
 4 | http://127.0.0.1:3000/?url=http://buzz.blogger.com/2010/10/safe-browsing-on-blogger.html
 5 | http://127.0.0.1:3000/?url=http://www.ifanr.com/24614
 6 | http://127.0.0.1:3000/?url=http://www.boston.com/news/politics/articles/2010/11/03/patrick_roars_to_a_2d_term/
 7 | 
 8 | 
 9 | problems:
10 | slow
11 | http://127.0.0.1:3000/?url=http://www.gazeta.ru/news/lastnews/
12 | http://127.0.0.1:3000/?url=http://www.sqlite.org/fts3.html
13 | http://127.0.0.1:3000/?url=http://news.google.com.hk/nwshp?hl=zh-tw&tab=in
14 | 
15 | returned html cannot be parsed by browser
16 | http://blog.zacharyvoase.com/2010/11/11/sockets-and-nodes-i/
17 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010 Arrix Zhou
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/test/bugs/htmlparser.js:
--------------------------------------------------------------------------------
 1 | var request = require('request');
 2 | var jsdom = require('jsdom');
 3 | 
 4 | var url = 'http://www.w3.org/TR/css3-2d-transforms/';
 5 | request({uri:url}, function (error, response, body) {
 6 |     var html = body;
 7 |     var doc = jsdom.jsdom(html, null, {url: url});
 8 |     console.log(doc.head+''); //[ HEAD ]
 9 |     console.log(doc.body === null); //true
10 |     console.log(doc.head.childNodes[9].tagName); //BODY
11 | });
12 | 
13 | var doc = jsdom.jsdom(html, null, {url: ''});
14 | 
15 | 
16 | var HTML5 = require('html5');
17 | var fs = require('fs');
18 | var content = fs.readFileSync('test/css.html', 'utf-8');
19 | var html = content;
20 | var jsdom = require('jsdom');
21 | var browser = jsdom.browserAugmentation(jsdom.defaultLevel);
22 | 
23 | var doc = new browser.HTMLDocument();
24 | var parser = new HTML5.Parser({document: doc});
25 | parser.parse(html);
26 | 
27 | var doc2 = jsdom.jsdom(html, null, {parser: HTML5});
28 | 
29 | 
30 | 
31 | 
32 | var htmlparser = require("htmlparser");
33 | var handler = new htmlparser.DefaultHandler(function (error, dom) {
34 | 
35 | });
36 | var parser = new htmlparser.Parser(handler);
37 | parser.parseComplete(html);
38 | sys.puts(sys.inspect(handler.dom, false, null));


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # node-readability
 2 | [Readability.js by Arc90](http://lab.arc90.com/experiments/readability/) ported to node.js.
 3 | 
 4 | Blog post: [Server side readability with node.js](http://arrix.blogspot.com/2010/11/server-side-readability-with-nodejs.html)
 5 | ## Requirements
 6 | * [node.js](http://nodejs.org/)
 7 | * [jsdom](https://github.com/tmpvar/jsdom)
 8 | * [htmlparser](https://github.com/tautologistics/node-htmlparser)
 9 | 
10 | ## Live demo
11 | I'm working on it...
12 | ## Example
13 | 
14 |     var readability = require('readability');
15 |     //...
16 |     // This is an very early example. The API is subject to change.
17 |     readability.parse(html, url, function(result) {
18 |         console.log(result.title, result.content);
19 |     });
20 | 
21 | ## Performance
22 | In my testing of 140 pages with an average size of **58KB** collected from [digg](http://digg.com/news.rss), [delicious](http://feeds.delicious.com/v2/rss/?count=50) and [hacker news](http://news.ycombinator.com/rss), the average time taken for each page is about **1.1 seconds** on a Mac Mini (2.4G Intel Core 2 Duo).
23 | ## Limitation
24 | * no fetching next pages
25 | * no support for frames
26 | 
27 | ## Plan
28 | * Performance optimization
29 | * Better API, more options
30 | * Support more readability features


--------------------------------------------------------------------------------
/test/bugs/jsdom-bug.js:
--------------------------------------------------------------------------------
 1 | // jsdom bug: Live NodeList isn't updated after DOM manipulation
 2 | // node.js v0.2.4
 3 | // jsdom@0.1.20
 4 | // https://github.com/tmpvar/jsdom/issues/#issue/77
 5 | 
 6 | var jsdom = require('jsdom');
 7 | var html = '<html><body>&nbsp;<p id="p1"></p><p id="p2"></p></body></html>';
 8 | var window = jsdom.jsdom(html).createWindow();
 9 | var document = window.document;
10 | 
11 | var all = document.getElementsByTagName('*');
12 | var i = 2;
13 | var node = all[i];
14 | console.log(''+node); //P#p1
15 | node.parentNode.removeChild(node);
16 | 
17 | console.log(''+all[i]); //still P#p1. the live NodeList wasn't updated properly
18 | all.length; //trigger a refresh. the length getter calls update()
19 | console.log(''+all[i]); //P#p2 OK
20 | 
21 | 
22 | // innerHTML = '' doesn't removed all children
23 | // https://github.com/tmpvar/jsdom/issues/#issue/80
24 | (function() {
25 |   var jsdom = require('jsdom');
26 |   var html = '<html><body><p id="p1"></p><p id="p2"></p></body></html>';
27 |   var doc = jsdom.jsdom(html);
28 |   var win = doc.createWindow();
29 |   var b = doc.body;
30 |   b.innerHTML = '';
31 |   console.log(b.innerHTML); //<p id="p2"></p>
32 | 
33 |   var arr = [0, 1, 2, 3, 4, 5];
34 |   arr.forEach(function(v, i) {
35 |   	console.log('[', i, '] ==', v);
36 |   	arr.splice(i, 1);
37 |   });
38 |   // output
39 |   // [ 0 ] == 0
40 |   // [ 1 ] == 2
41 |   // [ 2 ] == 4
42 | 
43 | })();
44 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "readability",
 3 |     "version": "0.1.0",
 4 |     "description": "Arc90's readability.js adapted to node.js",
 5 |     "keywords": [
 6 |         "readability"
 7 |     ],
 8 |     "maintainers": [
 9 |         {
10 |             "name": "Arrix",
11 |             "email": "arrixzhou@gmail.com",
12 |             "web": "http://arrix.blogspot.com"
13 |         }
14 |     ],
15 |     "contributors": [
16 |         {
17 |             "name": "Arrix",
18 |             "email": "arrixzhou@gmail.com",
19 |             "web": "http://arrix.blogspot.com"
20 |         },
21 |         {
22 |             "name": "Vincent Cao",
23 |             "email": "caojunvincent@gmail.com"
24 |         }
25 |     ],
26 |     "bugs": {
27 |         "mail": "arrixzhou@gmail.com",
28 |         "web": "http://github.com/arrix/node-readability/issues"
29 |     },
30 |     "licenses": [
31 |         {
32 |             "type": "MIT",
33 |             "url": "http://github.com/arrix/node-readability/LICENSE.txt"
34 |         }
35 |     ],
36 |     "repositories": [
37 |         {
38 |             "type": "git",
39 |             "url": "http://github.com/arrix/node-readability.git"
40 |         }
41 |     ],
42 |     "dependencies": {
43 |        "mjsunit.runner": ">=0.1.0",
44 |        "jsdom": ">=0.1.21",
45 |        "htmlparser": ">=1.7.3"
46 |     },
47 |     "engines" : { "node" : ">=0.2.5" },
48 |     "directories": {
49 |         "lib": "lib"
50 |     },
51 |     "main": "./lib/readability"
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/test/clean-file.js:
--------------------------------------------------------------------------------
 1 | var http = require('http'),
 2 |     url_mod = require('url'),
 3 |     fs = require('fs');
 4 |         
 5 | var readability = require('../lib/readability.js'),
 6 |     sprintf = readability.sprintf;
 7 | 
 8 | function cleanFile(path, url, cb) {
 9 |     var content = fs.readFileSync(path, 'utf-8');
10 |     readability.parse(content, url, {removeReadabilityArtifacts: false, removeClassNames: false, debug: true, profile: 1}, cb);
11 | }
12 | if (1) {
13 |  cleanFile(__dirname + '/weird-pages/w3c-css-no-closing-head.html', '', function(info) {
14 |      //console.log(info.content);
15 |  });
16 |  
17 |  return;
18 | }
19 | 
20 | function batch_run() {
21 |     var dir = __dirname + '/pages/';
22 |     var files = fs.readdirSync(dir);
23 |     var results = [];
24 |     //files.length = 10;
25 |     files.forEach(function(f) {
26 | 		if (!/\.html/i.test(f)) return;
27 |         console.log('######## Processing file...', f);
28 |         cleanFile(dir + f, '', function(result) {
29 |             results.push({time: result.time, file: f, inputLength: result.inputLength, error: result.error});
30 |         });
31 |     });
32 |     
33 |     var total = 0, totalTime = 0;
34 |     results.filter(function(v) {return !v.error}).sort(function(a, b) {return a.time - b.time;}).forEach(function(r) {
35 |         total++;
36 |         totalTime += r.time;
37 |         console.log(sprintf('%5.2f\t%8d\t%10s', r.time, r.inputLength, r.file));
38 |     });
39 |     console.log('total:', total, "avg time:", totalTime/total);
40 | }
41 | 
42 | batch_run();
43 | 


--------------------------------------------------------------------------------
/test/grab-pages.rb:
--------------------------------------------------------------------------------
 1 | require 'open-uri'
 2 | require 'rexml/document'
 3 | require 'fileutils'
 4 | 
 5 | module Program
 6 |   class << self
 7 |     def fetch_feed(url)
 8 |       content = nil
 9 |       open(url) do |f|
10 |         content = f.read
11 |       end
12 |       content
13 |     end
14 |     
15 |     def fetch_digg_feed
16 |       url = 'http://services.digg.com/2.0/story.getTopNews?type=rss'
17 |       content = fetch_feed(url)
18 |       content.force_encoding('iso-8859-1');
19 |       content.encode('utf-8')
20 |     end
21 |     
22 |     def fetch_hackernews_feed
23 |       url = 'http://news.ycombinator.com/rss'
24 |       fetch_feed(url)
25 |     end
26 |     
27 |     def fetch_delicious_feed
28 |       url = 'http://feeds.delicious.com/v2/rss/?count=30'
29 |       fetch_feed(url)
30 |     end
31 |     
32 |     def parse_rss(feed)
33 |       xml = REXML::Document.new(feed)
34 |       xml.elements.each("//item") do |item|
35 |         link = item.get_elements('link')[0].text.strip
36 |         title = item.get_elements('title')[0].text.strip
37 |         yield link, title
38 |       end
39 |     end
40 |     
41 |     def run
42 |       dir = File.expand_path('../pages', __FILE__)
43 |       FileUtils.mkdir(dir) unless File.exists? dir
44 |       
45 |       [fetch_digg_feed, fetch_hackernews_feed, fetch_delicious_feed].each do |feed|
46 |         parse_rss(feed) do |url, title|
47 |           filename = title.gsub(/\W/, '_') + '.html'
48 |           filepath = File.join(dir, filename)
49 |           puts "fetching #{url} as #{filepath}"
50 |           puts `curl --connect-timeout=5 #{url} > #{filepath} &`
51 |           sleep 1
52 |         end
53 |       end
54 |     end
55 |     
56 |   end
57 | end
58 | 
59 | if __FILE__ == $0
60 |   Program.run
61 | end
62 | 


--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
 1 | # live NodeList
 2 | NodeLists returned by node.childNodes and getElementsByXxx() apis are live which means changes to the DOM tree will be reflected in the NodeList when accessed.
 3 | 
 4 | In jsdom's implementation, a live NodeList is updated when item() or length is accessed but not when the [index] is accessed.
 5 | In a live NodeList iteration, you must carefully call list.update() (or just list.length) to trigger an update.
 6 | Beware that NodeList update is very expensive! When possible, prefer DOM transversal over getElementsByXxx();
 7 | 
 8 | If no changes will be made the the subtree, it is a good idea to iterate over an Array. 
 9 | var arr = nodeList.toArray(); //toArray() is not in the standards
10 | var arr = Array.prototype.slice.call(nodeList);
11 | 
12 | # nodeList._length
13 | WRONG: In jsom the length getter property of a NodeList calls .update() which re-query against the DOM tree. In a read only loop, it is more efficient to access ._length instead of .length.
14 | var nodes = ele.getElementsByTagName('div'), i, len;
15 | for (i = 0, len = nodes._length; i < len, i++) {
16 |     //does not change the dom structure
17 | }
18 | childNodes._length may not be update to date!!!!!
19 | 
20 | # .textContent
21 | readability.getInnerText is very frequently used function. My optimization for it reduced the total running time by half. 
22 | // hundredfold faster
23 | // use native string.trim
24 | // jsdom's implementation of textContent is innerHTML + strip tags + HTMLDecode
25 | // here we replace it with an optimized tree walker
26 | 
27 | # cleanStyles
28 | cleanStyles is recursive, it counts for most running time of prepArticle
29 | 
30 | # security
31 | arbitrary js
32 | frames
33 | 
34 | # performance
35 | grep TOTAL clean.log|cut -d ' ' -f5|sort -n
36 | 
37 | irb>
38 | s = <<EOT
39 | ...
40 | EOT
41 | a = s.split("\n").map(&:to_f)
42 | avg = a.reduce{|x,y| x+y} / a.size
43 | def hist(array)
44 |   
45 | end
46 | 
47 | 
48 | def avg(s, regex) 
49 |     a = s.scan(regex).flatten.map(&:to_f)
50 |     a.reduce{|x,y| x+y}/a.size
51 | end
52 | 
53 | # sum profiler output
54 | s = <<EOT
55 | 19 Nov 12:56:08 -       0.233 seconds [killBreaks] 
56 | 19 Nov 12:56:08 -       0.069 seconds [cleanConditionally] 
57 | 19 Nov 12:56:09 -       0.071 seconds [clean] 
58 | 19 Nov 12:56:09 -       0.074 seconds [clean] 
59 | 19 Nov 12:56:09 -       0.068 seconds [clean] 
60 | 19 Nov 12:56:09 -       0.139 seconds [cleanHeaders] 
61 | 19 Nov 12:56:09 -       0.241 seconds [cleanConditionally] 
62 | 19 Nov 12:56:09 -       0.088 seconds [cleanConditionally] 
63 | 19 Nov 12:56:09 -       0.233 seconds [cleanConditionally] 
64 | 19 Nov 12:56:10 -       0.507 seconds [prepArticle Remove extra paragraphs] 
65 | 19 Nov 12:56:11 -       0.568 seconds [prepArticle innerHTML replacement]
66 | EOT
67 | s.scan(/\s([.\d]+)\s+seconds/).flatten.map(&:to_f).reduce{|a,b|a+b}


--------------------------------------------------------------------------------
/test/clean-proxy.js:
--------------------------------------------------------------------------------
  1 | var sys = require('sys'), 
  2 |     http = require('http'),
  3 |     url_mod = require('url'),
  4 |     events = require('events');
  5 |     
  6 | var util = require('util');
  7 |     
  8 | var jsdom = require('jsdom');
  9 | 
 10 | var readability2 = require('../lib/readability.js');
 11 |     
 12 | function extend(to, from) {
 13 |     var l,i,o,j;
 14 |     for (i = 1, l = arguments.length; i < l; i++) {
 15 |         o = arguments[i];
 16 |         for (j in o) {
 17 |             to[j] = o[j];
 18 |         }
 19 |     }
 20 |     return to;
 21 | }
 22 | 
 23 | http.createServer(function(req, res) {
 24 |     console.log(req.url);
 25 |     var u = url_mod.parse(req.url, true);
 26 |     
 27 |     
 28 |     var isMainDoc = u.pathname == '/';
 29 |     
 30 |     if (isMainDoc) {
 31 |         var q = u.query;
 32 |         u = (q && q.url) || 'http://en.wikipedia.org/wiki/Ruby'
 33 |         u = url_mod.parse(u);
 34 |     } else {
 35 |         res.writeHead(404);
 36 |         res.end();
 37 |         return;
 38 |     }
 39 |     
 40 |     
 41 |     var client = http.createClient(u.port || 80, u.hostname);
 42 |     var h = {host: u.hostname};
 43 |     'accept user-agent accept-language accept-charset accept-encoding'.split(' ').forEach(function(field) {
 44 |         h[field] = req.headers[field];
 45 |     });
 46 |     
 47 |     extend(h, {'pragma': 'no-cache', 'accept-encoding': 'none'});
 48 |         
 49 |     console.log(h);
 50 |     var request = client.request('GET', u.pathname + (u.search || '') + (u.hash || ''), h);
 51 |     request.end();
 52 |     
 53 |     var result = {};
 54 |     request.on('response', function(response) {
 55 |         console.log('[Response]', response.statusCode, response.headers['content-type']);
 56 |         
 57 |         var contentType = response.headers['content-type'];
 58 |         if (/html/i.test(contentType)) {
 59 |             //is html
 60 |         } else {
 61 |             console.log('NOT HTML');
 62 |             //something else
 63 |             res.writeHead(response.statusCode, response.headers);
 64 |             response.on('data', function(data) {
 65 |                 res.write(data);
 66 |             });
 67 |             response.on('end', function() {
 68 |                 res.end();
 69 |             });
 70 |             return;
 71 |         }
 72 |         
 73 |         response.setEncoding('utf-8');
 74 |         result.statusCode = response.statusCode;
 75 |         result.headers = response.headers;
 76 |         result.headers['content-type'] = 'text/html'; //override xhtml mimetype
 77 |         result.body = null;
 78 |         response.on('data', function(data) {
 79 |             //console.log('==========', data.constructor.name, data);
 80 |             if (result.body === null) 
 81 |                 result.body = data;
 82 |             else
 83 |                 result.body += data;
 84 |         });
 85 |         response.on('end', function() {
 86 |             util.debug('===== response end');
 87 |             res.writeHead(result.statusCode, result.headers);
 88 |             if (result.body !== null) {
 89 |                 // var clean = new CleanReading(result.body);
 90 |                 // clean.on('finish', function(info) {
 91 |                 //     res.write(info.content);
 92 |                 //     res.end();
 93 |                 // });
 94 |                 // clean.clean();
 95 |                 
 96 |                 readability2.parse(result.body, '', {removeReadabilityArtifacts: false, removeClassNames: false, debug: true, profile: true}, function(info) {
 97 |                     res.write(CleanReading.prototype.wrapContent(info.title, info.content));
 98 |                     res.end();
 99 |                 });
100 |             } else {
101 |                 res.end();
102 |             }
103 |         });
104 |     });
105 | }).listen(3000);
106 | 
107 | function CleanReading(html) {
108 |     var z = this;
109 |     events.EventEmitter.call(z);
110 |     z.html = html;
111 | }
112 | CleanReading.super_ = events.EventEmitter;
113 | 
114 | CleanReading.prototype = Object.create(events.EventEmitter.prototype, {
115 |     constructor: {value: CleanReading}
116 | });
117 | 
118 | extend(CleanReading.prototype, {
119 |     clean: function() {
120 |         var z = this;
121 |         // var w = jsdom.jsdom(z.html).createWindow();
122 |         // var doc = w.document;
123 |         
124 |         readability.Client.parse(z.html, function(r) {
125 |             console.log('====== readability ======', r.title);
126 |             z.emit('finish', r);
127 |             //z.emit('finish', {title: r.title, content: z.wrapContent(r.title, r.content)});
128 |         });
129 |         //z.emit('finish', {content: z.html});
130 |     },
131 |     
132 |     wrapContent: function(title, content) {
133 |         return '<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>' +
134 |          title + '</title><link rel="stylesheet" href="http://lab.arc90.com/experiments/readability/css/readability.css" type="text/css" media="all" /></head><body>' +
135 |          content +
136 |          '</body></html>'; 
137 |     }
138 | });


--------------------------------------------------------------------------------
/misc/readability-feedflock.js:
--------------------------------------------------------------------------------
  1 | (function() {
  2 |   //require.paths.unshift('./vendor');
  3 |   var sys = require('sys');
  4 |   var jsdom = require('jsdom');
  5 |   //var htmlparser = require('./htmlparser');
  6 |   //var level = jsdom.defaultLevel;
  7 |   // var doc = new (level.Document)();
  8 |   // doc.createWindow = function() {
  9 |   //   window = jsdom.windowAugmentation(level, { document: doc, parser: htmlparser })
 10 |   //   delete window.document.createWindow
 11 |   //   return window
 12 |   // };
 13 |   // var document = doc.createWindow().document;
 14 | 
 15 |   var document;
 16 |   var Client = {
 17 |     parse: function(content, callback) {
 18 |       document = jsdom.jsdom(content).createWindow().document;
 19 |       //document.innerHTML = content;
 20 |       //console.log(document.body);
 21 |       if (!document.body) {
 22 |         callback({content:'',title:''});
 23 |         return;
 24 |       }
 25 |       
 26 |     	// Replace all doubled-up <BR> tags with <P> tags, and remove fonts.
 27 |     	var pattern =  new RegExp ("<br/?>[ \r\n\s]*<br/?>", "g");
 28 |     	document.body.innerHTML = document.body.innerHTML.replace(pattern, "</p><p>").replace(/<\/?font[^>]*>/g, '');
 29 | 
 30 |     	var allParagraphs = document.getElementsByTagName("p");
 31 |     	var contentDiv = null;
 32 |     	var topDivParas =[];
 33 | 
 34 |     	var articleContent = document.createElement("DIV");
 35 |     	var articleTitle = document.title
 36 | 
 37 |     	if (articleTitle)
 38 |     	  articleTitle = articleTitle.replace(/^\s+|\s+$/g, '');
 39 | 
 40 |     	// Study all the paragraphs and find the chunk that has the best score.
 41 |     	// A score is determined by things like: Number of <p>'s, commas, special classes, etc.
 42 |     	for (var j=0; j	< allParagraphs.length; j++) {
 43 |     		var parentNode = allParagraphs[j].parentNode;
 44 | 
 45 |         if(typeof(parentNode) != 'undefined') {
 46 |       		// Initialize readability data
 47 |       		if(typeof parentNode.readability == 'undefined')
 48 |       		{
 49 |       			parentNode.readability = {"contentScore": 0};
 50 | 
 51 |       			// Look for a special classname
 52 |       			if(parentNode.className.match(/(comment|meta|footer|footnote)/))
 53 |       				parentNode.readability.contentScore -= 50;
 54 |       			else if(parentNode.className.match(/((^|\\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\\s|$))/))
 55 |       				parentNode.readability.contentScore += 25;
 56 | 
 57 |       			// Look for a special ID
 58 |       			if(parentNode.id.match(/(comment|meta|footer|footnote)/))
 59 |       				parentNode.readability.contentScore -= 50;
 60 |       			else if(parentNode.id.match(/^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/))
 61 |       				parentNode.readability.contentScore += 25;
 62 |       		}
 63 | 
 64 |       		// Add a point for the paragraph found
 65 |       		if(this.getInnerText(allParagraphs[j]).length > 10)
 66 |       			parentNode.readability.contentScore++;
 67 | 
 68 |       		// Add points for any commas within this paragraph
 69 |       		parentNode.readability.contentScore += this.getCharCount(allParagraphs[j]);
 70 | 
 71 |       		topDivParas.push({ 'node': parentNode, 'score': parentNode.readability.contentScore });
 72 |       	}
 73 |     	}
 74 | 
 75 |     	for (var i=0; i	< topDivParas.length; i++) {
 76 |     	  var score = topDivParas[i].score;
 77 |         if (contentDiv == null || score > contentDiv.score) {
 78 |           contentDiv = { 'node': topDivParas[i].node, 'score': score }
 79 |         }
 80 |       }
 81 | 
 82 |       if (contentDiv == null)
 83 |         return callback({ content: '', title: '' });
 84 | 
 85 |       var topDiv = contentDiv.node
 86 | 
 87 |     	this.cleanStyles(topDiv);					// Removes all style attributes
 88 |     	topDiv = this.killDivs(topDiv);		// Goes in and removes DIV's that have more non <p> stuff than <p> stuff
 89 |     	topDiv = this.killBreaks(topDiv);  // Removes any consecutive <br />'s into just one <br />
 90 | 
 91 |     	// Cleans out junk from the topDiv just in case:
 92 |     	topDiv = this.clean(topDiv, "form");
 93 |     	topDiv = this.clean(topDiv, "object");
 94 |     	topDiv = this.clean(topDiv, "table", 250);
 95 |     	topDiv = this.clean(topDiv, "h1");
 96 |     	topDiv = this.clean(topDiv, "h2");
 97 |     	topDiv = this.clean(topDiv, "iframe");
 98 | 
 99 |     	articleContent.appendChild(topDiv);
100 | 
101 |     	return callback({ content: articleContent.innerHTML, title: articleTitle });
102 |     },
103 |     getInnerText: function(e) {
104 |     	return e.textContent;
105 |     },
106 |     getCharCount: function( e,s ) {
107 |       s = s || ",";
108 |       return this.getInnerText(e).split(s).length;
109 |     },
110 |     cleanStyles: function( e ) {
111 |       e = e || document;
112 |       var cur = e.firstChild;
113 | 
114 |     	// If we had a bad node, there's not much we can do.
115 |     	if(!e)
116 |     		return;
117 | 
118 |     	// Remove any root styles, if we're able.
119 |     	if(typeof e.removeAttribute == 'function')
120 |     		e.removeAttribute('style');
121 | 
122 |         // Go until there are no more child nodes
123 |         while ( cur != null ) {
124 |     		if ( cur.nodeType == 1 ) {
125 |     			// Remove style attribute(s) :
126 |     			cur.removeAttribute("style");
127 |     			this.cleanStyles( cur );
128 |     		}
129 |     		cur = cur.nextSibling;
130 |     	}
131 |     },
132 |     killDivs: function ( e ) {
133 |       var divsList = e.getElementsByTagName( "div" );
134 |       var curDivLength = divsList.length;
135 | 
136 |       // Gather counts for other typical elements embedded within.
137 |       // Traverse backwards so we can remove nodes at the same time without effecting the traversal.
138 |       for (var i=curDivLength-1; i >= 0; i--) {
139 |       	var p = divsList[i].getElementsByTagName("p").length;
140 |       	var img = divsList[i].getElementsByTagName("img").length;
141 |       	var li = divsList[i].getElementsByTagName("li").length;
142 |       	var a = divsList[i].getElementsByTagName("a").length;
143 |       	var embed = divsList[i].getElementsByTagName("embed").length;
144 | 
145 |       // If the number of commas is less than 10 (bad sign) ...
146 |       if ( this.getCharCount(divsList[i]) < 10) {
147 |       		// And the number of non-paragraph elements is more than paragraphs
148 |       		// or other ominous signs :
149 |       		if ( img > p || li > p || a > p || p == 0 || embed > 0) {
150 |       			divsList[i].parentNode.removeChild(divsList[i]);
151 |       		}
152 |       	}
153 |       }
154 |       return e;
155 |     },
156 |     killBreaks: function ( e ) {
157 |     	e.innerHTML = e.innerHTML.replace(/(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,'<br />');
158 |     	return e;
159 |     },
160 |     clean: function(e, tags, minWords) {
161 |       var targetList = e.getElementsByTagName( tags );
162 |       minWords = minWords || 1000000;
163 | 
164 |       for (var y=0; y < targetList.length; y++) {
165 |       	// If the text content isn't laden with words, remove the child:
166 |       	if (this.getCharCount(targetList[y], " ") < minWords) {
167 |       		targetList[y].parentNode.removeChild(targetList[y]);
168 |       	}
169 |       }
170 |       return e;
171 |     }
172 |   };
173 |   exports.Client = Client;
174 | })();


--------------------------------------------------------------------------------
/lib/sprintf.js:
--------------------------------------------------------------------------------
  1 | /**
  2 | sprintf() for JavaScript 0.7-beta1
  3 | http://www.diveintojavascript.com/projects/javascript-sprintf
  4 | 
  5 | Copyright (c) Alexandru Marasteanu <alexaholic [at) gmail (dot] com>
  6 | All rights reserved.
  7 | 
  8 | Redistribution and use in source and binary forms, with or without
  9 | modification, are permitted provided that the following conditions are met:
 10 |     * Redistributions of source code must retain the above copyright
 11 |       notice, this list of conditions and the following disclaimer.
 12 |     * Redistributions in binary form must reproduce the above copyright
 13 |       notice, this list of conditions and the following disclaimer in the
 14 |       documentation and/or other materials provided with the distribution.
 15 |     * Neither the name of sprintf() for JavaScript nor the
 16 |       names of its contributors may be used to endorse or promote products
 17 |       derived from this software without specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | DISCLAIMED. IN NO EVENT SHALL Alexandru Marasteanu BE LIABLE FOR ANY
 23 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | 
 31 | Changelog:
 32 | 2010.09.06 - 0.7-beta1
 33 |   - features: vsprintf, support for named placeholders
 34 |   - enhancements: format cache, reduced global namespace pollution
 35 | 
 36 | 2010.05.22 - 0.6:
 37 |  - reverted to 0.4 and fixed the bug regarding the sign of the number 0
 38 |  Note:
 39 |  Thanks to Raphael Pigulla <raph (at] n3rd [dot) org> (http://www.n3rd.org/)
 40 |  who warned me about a bug in 0.5, I discovered that the last update was
 41 |  a regress. I appologize for that.
 42 | 
 43 | 2010.05.09 - 0.5:
 44 |  - bug fix: 0 is now preceeded with a + sign
 45 |  - bug fix: the sign was not at the right position on padded results (Kamal Abdali)
 46 |  - switched from GPL to BSD license
 47 | 
 48 | 2007.10.21 - 0.4:
 49 |  - unit test and patch (David Baird)
 50 | 
 51 | 2007.09.17 - 0.3:
 52 |  - bug fix: no longer throws exception on empty paramenters (Hans Pufal)
 53 | 
 54 | 2007.09.11 - 0.2:
 55 |  - feature: added argument swapping
 56 | 
 57 | 2007.04.03 - 0.1:
 58 |  - initial release
 59 | **/
 60 | 
 61 | var sprintf = (function() {
 62 | 	function get_type(variable) {
 63 | 		return Object.prototype.toString.call(variable).slice(8, -1).toLowerCase();
 64 | 	}
 65 | 	function str_repeat(input, multiplier) {
 66 | 		for (var output = []; multiplier > 0; output[--multiplier] = input) {/* do nothing */}
 67 | 		return output.join('');
 68 | 	}
 69 | 
 70 | 	var str_format = function() {
 71 | 		if (!str_format.cache.hasOwnProperty(arguments[0])) {
 72 | 			str_format.cache[arguments[0]] = str_format.parse(arguments[0]);
 73 | 		}
 74 | 		return str_format.format.call(null, str_format.cache[arguments[0]], arguments);
 75 | 	};
 76 | 
 77 | 	str_format.format = function(parse_tree, argv) {
 78 | 		var cursor = 1, tree_length = parse_tree.length, node_type = '', arg, output = [], i, k, match, pad, pad_character, pad_length;
 79 | 		for (i = 0; i < tree_length; i++) {
 80 | 			node_type = get_type(parse_tree[i]);
 81 | 			if (node_type === 'string') {
 82 | 				output.push(parse_tree[i]);
 83 | 			}
 84 | 			else if (node_type === 'array') {
 85 | 				match = parse_tree[i]; // convenience purposes only
 86 | 				if (match[2]) { // keyword argument
 87 | 					arg = argv[cursor];
 88 | 					for (k = 0; k < match[2].length; k++) {
 89 | 						if (!arg.hasOwnProperty(match[2][k])) {
 90 | 							throw(sprintf('[sprintf] property "%s" does not exist', match[2][k]));
 91 | 						}
 92 | 						arg = arg[match[2][k]];
 93 | 					}
 94 | 				}
 95 | 				else if (match[1]) { // positional argument (explicit)
 96 | 					arg = argv[match[1]];
 97 | 				}
 98 | 				else { // positional argument (implicit)
 99 | 					arg = argv[cursor++];
100 | 				}
101 | 
102 | 				if (/[^s]/.test(match[8]) && (get_type(arg) != 'number')) {
103 | 					throw(sprintf('[sprintf] expecting number but found %s', get_type(arg)));
104 | 				}
105 | 				switch (match[8]) {
106 | 					case 'b': arg = arg.toString(2); break;
107 | 					case 'c': arg = String.fromCharCode(arg); break;
108 | 					case 'd': arg = parseInt(arg, 10); break;
109 | 					case 'e': arg = match[7] ? arg.toExponential(match[7]) : arg.toExponential(); break;
110 | 					case 'f': arg = match[7] ? parseFloat(arg).toFixed(match[7]) : parseFloat(arg); break;
111 | 					case 'o': arg = arg.toString(8); break;
112 | 					case 's': arg = ((arg = String(arg)) && match[7] ? arg.substring(0, match[7]) : arg); break;
113 | 					case 'u': arg = Math.abs(arg); break;
114 | 					case 'x': arg = arg.toString(16); break;
115 | 					case 'X': arg = arg.toString(16).toUpperCase(); break;
116 | 				}
117 | 				arg = (/[def]/.test(match[8]) && match[3] && arg >= 0 ? '+'+ arg : arg);
118 | 				pad_character = match[4] ? match[4] == '0' ? '0' : match[4].charAt(1) : ' ';
119 | 				pad_length = match[6] - String(arg).length;
120 | 				pad = match[6] ? str_repeat(pad_character, pad_length) : '';
121 | 				output.push(match[5] ? arg + pad : pad + arg);
122 | 			}
123 | 		}
124 | 		return output.join('');
125 | 	};
126 | 
127 | 	str_format.cache = {};
128 | 
129 | 	str_format.parse = function(fmt) {
130 | 		var _fmt = fmt, match = [], parse_tree = [], arg_names = 0;
131 | 		while (_fmt) {
132 | 			if ((match = /^[^\x25]+/.exec(_fmt)) !== null) {
133 | 				parse_tree.push(match[0]);
134 | 			}
135 | 			else if ((match = /^\x25{2}/.exec(_fmt)) !== null) {
136 | 				parse_tree.push('%');
137 | 			}
138 | 			else if ((match = /^\x25(?:([1-9]\d*)\$|\(([^\)]+)\))?(\+)?(0|'[^$])?(-)?(\d+)?(?:\.(\d+))?([b-fosuxX])/.exec(_fmt)) !== null) {
139 | 				if (match[2]) {
140 | 					arg_names |= 1;
141 | 					var field_list = [], replacement_field = match[2], field_match = [];
142 | 					if ((field_match = /^([a-z_][a-z_\d]*)/i.exec(replacement_field)) !== null) {
143 | 						field_list.push(field_match[1]);
144 | 						while ((replacement_field = replacement_field.substring(field_match[0].length)) !== '') {
145 | 							if ((field_match = /^\.([a-z_][a-z_\d]*)/i.exec(replacement_field)) !== null) {
146 | 								field_list.push(field_match[1]);
147 | 							}
148 | 							else if ((field_match = /^\[(\d+)\]/.exec(replacement_field)) !== null) {
149 | 								field_list.push(field_match[1]);
150 | 							}
151 | 							else {
152 | 								throw('[sprintf] huh?');
153 | 							}
154 | 						}
155 | 					}
156 | 					else {
157 | 						throw('[sprintf] huh?');
158 | 					}
159 | 					match[2] = field_list;
160 | 				}
161 | 				else {
162 | 					arg_names |= 2;
163 | 				}
164 | 				if (arg_names === 3) {
165 | 					throw('[sprintf] mixing positional and named placeholders is not (yet) supported');
166 | 				}
167 | 				parse_tree.push(match);
168 | 			}
169 | 			else {
170 | 				throw('[sprintf] huh?');
171 | 			}
172 | 			_fmt = _fmt.substring(match[0].length);
173 | 		}
174 | 		return parse_tree;
175 | 	};
176 | 
177 | 	return str_format;
178 | })();
179 | 
180 | var vsprintf = function(fmt, argv) {
181 | 	argv.unshift(fmt);
182 | 	return sprintf.apply(null, argv);
183 | };
184 | 
185 | exports.sprintf = sprintf;
186 | exports.vsprintf = vsprintf;


--------------------------------------------------------------------------------
/test/weird-pages/w3c-css-no-closing-head.html:
--------------------------------------------------------------------------------
   1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
   2 | "http://www.w3.org/TR/html4/strict.dtd">
   3 | 
   4 | <html lang=en>
   5 |  <head>
   6 |   <title>CSS 2D Transforms Module Level 3</title>
   7 |   <link href=default.css rel=stylesheet type="text/css">
   8 | 
   9 |   <style type="text/css">
  10 |     .rhs { white-space: pre-wrap; }
  11 |     code { font-size: inherit; }
  12 |     #box-shadow-samples td { background: white; color: black; }
  13 |   </style>
  14 |   <link href="http://www.w3.org/StyleSheets/TR/W3C-WD.css" rel=stylesheet
  15 |   type="text/css">
  16 | 
  17 |  <body>
  18 |   <div class=head> <!--begin-logo-->
  19 |    <p><a href="http://www.w3.org/"><img alt=W3C height=48
  20 |     src="http://www.w3.org/Icons/w3c_home" width=72></a> <!--end-logo-->
  21 | 
  22 |    <h1>CSS 2D Transforms Module Level 3</h1>
  23 | 
  24 |    <h2 class="no-num no-toc" id=longstatus-date>W3C Working Draft 01 December
  25 |     2009</h2>
  26 | 
  27 |    <dl>
  28 |     <dt>This version:
  29 | 
  30 |     <dd> <a
  31 |      href="http://www.w3.org/TR/2009/WD-css3-2d-transforms-20091201"><!--http://dev.w3.org/csswg/css3-2d-transforms/-->
  32 |      http://www.w3.org/TR/2009/WD-css3-2d-transforms-20091201</a>
  33 | 
  34 |     <dt>Latest version:
  35 | 
  36 |     <dd><a
  37 |      href="http://www.w3.org/TR/css3-2d-transforms">http://www.w3.org/TR/css3-2d-transforms</a>
  38 |      
  39 | 
  40 |     <dt>Previous version:
  41 | 
  42 |     <dd><a href="http://www.w3.org/TR/2009/WD-css3-2d-transforms-20090320">
  43 |      http://www.w3.org/TR/2009/WD-css3-2d-transforms-20090320</a>
  44 | 
  45 |     <dt id=editors-list>Editors:
  46 | 
  47 |     <dd><a href="mailto:dino@apple.com">Dean Jackson</a> (<a
  48 |      href="http://www.apple.com/">Apple Inc</a>)
  49 | 
  50 |     <dd><a href="mailto:hyatt@apple.com">David Hyatt</a> (<a
  51 |      href="http://www.apple.com/">Apple Inc</a>)
  52 | 
  53 |     <dd><a href="mailto:cmarrin@apple.com">Chris Marrin</a> (<a
  54 |      href="http://www.apple.com/">Apple Inc</a>)
  55 |    </dl>
  56 |    <!--begin-copyright-->
  57 |    <p class=copyright><a
  58 |     href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright"
  59 |     rel=license>Copyright</a> &copy; 2009 <a
  60 |     href="http://www.w3.org/"><acronym title="World Wide Web
  61 |     Consortium">W3C</acronym></a><sup>&reg;</sup> (<a
  62 |     href="http://www.csail.mit.edu/"><acronym title="Massachusetts Institute
  63 |     of Technology">MIT</acronym></a>, <a
  64 |     href="http://www.ercim.org/"><acronym title="European Research Consortium
  65 |     for Informatics and Mathematics">ERCIM</acronym></a>, <a
  66 |     href="http://www.keio.ac.jp/">Keio</a>), All Rights Reserved. W3C <a
  67 |     href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>,
  68 |     <a
  69 |     href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a>
  70 |     and <a
  71 |     href="http://www.w3.org/Consortium/Legal/copyright-documents">document
  72 |     use</a> rules apply.</p>
  73 |    <!--end-copyright-->
  74 |    <hr title="Separator for header">
  75 |   </div>
  76 | 
  77 |   <h2 class="no-num no-toc" id=abstract>Abstract</h2>
  78 | 
  79 |   <p>CSS 2D Transforms allows elements rendered by CSS to be transformed in
  80 |    two-dimensional space.
  81 | 
  82 |   <h2 class="no-num no-toc" id=status>Status of this document</h2>
  83 |   <!--begin-status-->
  84 | 
  85 |   <p><em>This section describes the status of this document at the time of
  86 |    its publication. Other documents may supersede this document. A list of
  87 |    current W3C publications and the latest revision of this technical report
  88 |    can be found in the <a href="http://www.w3.org/TR/">W3C technical reports
  89 |    index at http://www.w3.org/TR/.</a></em>
  90 | 
  91 |   <p>Publication as a Working Draft does not imply endorsement by the W3C
  92 |    Membership. This is a draft document and may be updated, replaced or
  93 |    obsoleted by other documents at any time. It is inappropriate to cite this
  94 |    document as other than work in progress.
  95 | 
  96 |   <p>The (<a
  97 |    href="http://lists.w3.org/Archives/Public/www-style/">archived</a>) public
  98 |    mailing list <a href="mailto:www-style@w3.org">www-style@w3.org</a> (see
  99 |    <a href="http://www.w3.org/Mail/Request">instructions</a>) is preferred
 100 |    for discussion of this specification. When sending e-mail, please put the
 101 |    text &#8220;css3-2d-transforms&#8221; in the subject, preferably like
 102 |    this: &#8220;[<!---->css3-2d-transforms<!---->] <em>&hellip;summary of
 103 |    comment&hellip;</em>&#8221;
 104 | 
 105 |   <p>This document was produced by the <a
 106 |    href="http://www.w3.org/Style/CSS/members">CSS Working Group</a> (part of
 107 |    the <a href="http://www.w3.org/Style/">Style Activity</a>).
 108 | 
 109 |   <p>This document was produced by a group operating under the <a
 110 |    href="http://www.w3.org/Consortium/Patent-Policy-20040205/">5 February
 111 |    2004 W3C Patent Policy</a>. W3C maintains a <a
 112 |    href="http://www.w3.org/2004/01/pp-impl/32061/status"
 113 |    rel=disclosure>public list of any patent disclosures</a> made in
 114 |    connection with the deliverables of the group; that page also includes
 115 |    instructions for disclosing a patent. An individual who has actual
 116 |    knowledge of a patent which the individual believes contains <a
 117 |    href="http://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential">Essential
 118 |    Claim(s)</a> must disclose the information in accordance with <a
 119 |    href="http://www.w3.org/Consortium/Patent-Policy-20040205/#sec-Disclosure">section
 120 |    6 of the W3C Patent Policy</a>.</p>
 121 |   <!--end-status-->
 122 | 
 123 |   <p> The <a href=ChangeLog>list of changes made to this specification</a> is
 124 |    available.
 125 | 
 126 |   <h2 class="no-num no-toc" id=contents>Table of contents</h2>
 127 |   <!--begin-toc-->
 128 | 
 129 |   <ul class=toc>
 130 |    <li><a href="#introduction"><span class=secno>1. </span>Introduction</a>
 131 | 
 132 |    <li><a href="#transform-property"><span class=secno>2. </span> The <span
 133 |     class=prop-name>&lsquo;<code
 134 |     class=property>transform</code>&rsquo;</span> Property </a>
 135 | 
 136 |    <li><a href="#transform-origin-property"><span class=secno>3. </span> The
 137 |     <span class=prop-name>&lsquo;<code
 138 |     class=property>transform-origin</code>&rsquo;</span> Property </a>
 139 | 
 140 |    <li><a href="#transform-functions"><span class=secno>4. </span> The
 141 |     Transformation Functions </a>
 142 | 
 143 |    <li><a href="#transform-values"><span class=secno>5. </span> Transform
 144 |     Values and Lists </a>
 145 | 
 146 |    <li><a href="#animation"><span class=secno>6. </span> Transitions and
 147 |     animations between transform values </a>
 148 | 
 149 |    <li><a href="#matrix-decomposition"><span class=secno>7. </span> Matrix
 150 |     decomposition for animation </a>
 151 | 
 152 |    <li><a href="#dom-interfaces"><span class=secno>8. </span> DOM Interfaces
 153 |     </a>
 154 |     <ul class=toc>
 155 |      <li><a href="#cssmatrix-interface"><span class=secno>8.1. </span>
 156 |       CSSMatrix </a>
 157 |     </ul>
 158 | 
 159 |    <li><a href="#references"><span class=secno>9. </span>References</a>
 160 |     <ul class=toc>
 161 |      <li class=no-num><a href="#normative-references">Normative
 162 |       references</a>
 163 | 
 164 |      <li class=no-num><a href="#other-references">Other references</a>
 165 |     </ul>
 166 | 
 167 |    <li class=no-num><a href="#property-index">Property index</a>
 168 | 
 169 |    <li class=no-num><a href="#index">Index</a>
 170 |   </ul>
 171 |   <!--end-toc-->
 172 | 
 173 |   <h2 id=introduction><span class=secno>1. </span>Introduction</h2>
 174 | 
 175 |   <p><em>This section is not normative.</em>
 176 | 
 177 |   <p> The CSS <a href="http://www.w3.org/TR/REC-CSS2/visuren.html">visual
 178 |    formatting model</a> describes a coordinate system within which each
 179 |    element is positioned. Positions and sizes in this coordinate space can be
 180 |    thought of as being expressed in pixels, starting in the upper left corner
 181 |    of the parent with positive values proceeding to the right and down.
 182 | 
 183 |   <p> This coordinate space can be modified with the <span
 184 |    class=prop-name>&lsquo;<code class=property><a
 185 |    href="#effects">transform</a></code>&rsquo;</span> property. Using
 186 |    transform, elements can be translated, rotated and scaled in two
 187 |    dimensional space. The coordinate space behaves as described in the <a
 188 |    href="http://www.w3.org/TR/SVG/coords.html#EstablishingANewUserSpace">coordinate
 189 |    system transformations</a> section of the SVG 1.1 specification. This is a
 190 |    coordinate system with two axes: the X axis increases horizontally to the
 191 |    right; the Y axis increases vertically downwards.
 192 | 
 193 |   <p> Specifying a value other than &lsquo;<code
 194 |    class=property>none</code>&rsquo; for the <span
 195 |    class=prop-name>&lsquo;<code class=property><a
 196 |    href="#effects">transform</a></code>&rsquo;</span> property establishes a
 197 |    new <em>local coordinate system</em> at the element that it is applied to.
 198 |    Transformations are cumulative. That is, elements establish their local
 199 |    coordinate system within the coordinate system of their parent. In this
 200 |    way, a <span class=prop-name>&lsquo;<code class=property><a
 201 |    href="#effects">transform</a></code>&rsquo;</span> property effectively
 202 |    accumulates all the <span class=prop-name>&lsquo;<code class=property><a
 203 |    href="#effects">transform</a></code>&rsquo;</span> properties of its
 204 |    ancestors. The accumulation of these transforms defines a <em>current
 205 |    transformation matrix (CTM)</em> for the element.
 206 | 
 207 |   <p> The transform property does not affect the flow of the content
 208 |    surrounding the transformed element. However, the value of the overflow
 209 |    area takes into account transformed elements. This behavior is similar to
 210 |    what happens when elements are translated via relative positioning.
 211 |    Therefore, if the value of the <span class=prop-name>&lsquo;<code
 212 |    class=property>overflow</code>&rsquo;</span> property is <span
 213 |    class=prop-value>&lsquo;<code class=property>scroll</code>&rsquo;</span>
 214 |    or <span class=prop-value>&lsquo;<code
 215 |    class=property>auto</code>&rsquo;</span>, scrollbars will appear as needed
 216 |    to see content that is transformed outside the visible area.
 217 | 
 218 |   <p> Any value other than &lsquo;<code class=property>none</code>&rsquo; for
 219 |    the transform results in the creation of both a stacking context and a
 220 |    containing block. The object acts as a containing block for fixed
 221 |    positioned descendants.
 222 | 
 223 |   <div class=todo> Need to go into more detail here about why fixed
 224 |    positioned objects should do this, i.e., that it's much harder to
 225 |    implement otherwise.</div>
 226 | 
 227 |   <div class=issue> There are two roles for transformations in layout: (1)
 228 |    transformations that adjust the position of the affected content without
 229 |    changing the normal layout of that content (much like relative
 230 |    positioning) and (2) transformation of the content prior to layout that
 231 |    affects the layout of that content. See <a
 232 |    href="http://lists.w3.org/Archives/Public/www-style/2007Oct/0209">http://lists.w3.org/Archives/Public/www-style/2007Oct/0209</a>
 233 |    for examples of both cases. The "transform" property (as defined in this
 234 |    document) is equally useful for both roles. This document is focused on
 235 |    satisfying the first role. There is, however, an architectural question
 236 |    that arises because there needs to be a way to distinguish which role an
 237 |    author of a stylesheet wants. The key question is which is the default
 238 |    behavior/role for the "transform" property and how is the other
 239 |    behavior/role indicated by a stylesheet author. If you have an opinion on
 240 |    this topic, please send feedback.</div>
 241 | 
 242 |   <div class=issue> What do fixed backgrounds do in transforms? They should
 243 |    probably ignore the transform completely, since - even transformed - the
 244 |    object should be acting as "porthole" through which the fixed background
 245 |    can be viewed in its original form.</div>
 246 | 
 247 |   <div class=issue> This property should also be applicable to SVG elements.</div>
 248 | 
 249 |   <div class=issue> We also need to specify that SVG transforms *do* combine
 250 |    with this transform, e.g., if a &lt;foreignObject&gt; is inside
 251 |    transformed SVG and then defines a transform of its own. This means we may
 252 |    potentially have to examine the current SVG transform and combine with it
 253 |    to set the correct transform.</div>
 254 |   <!-- ======================================================================================================= -->
 255 | 
 256 |   <h2 id=transform-property><span class=secno>2. </span> The <span
 257 |    class=prop-name>&lsquo;<code class=property><a
 258 |    href="#effects">transform</a></code>&rsquo;</span> Property</h2>
 259 | 
 260 |   <p> A two-dimensional transformation is applied to an element through the
 261 |    <span class=prop-name>&lsquo;<code class=property><a
 262 |    href="#effects">transform</a></code>&rsquo;</span> property. This property
 263 |    contains a list of <a href="#transform-functions">transform functions</a>.
 264 |    The final transformation value for an element is obtained by performing a
 265 |    matrix concatenation of each entry in the list. The set of transform
 266 |    functions is similar to those allowed by SVG.
 267 | 
 268 |   <table class=propdef>
 269 |    <tbody>
 270 |     <tr>
 271 |      <td> <em>Name:</em>
 272 | 
 273 |      <td> <dfn id=effects>transform</dfn>
 274 | 
 275 |     <tr>
 276 |      <td> <em>Value:</em>
 277 | 
 278 |      <td> none | &lt;transform-function&gt; [ &lt;transform-function&gt; ]*
 279 | 
 280 |     <tr>
 281 |      <td> <em>Initial:</em>
 282 | 
 283 |      <td> none
 284 | 
 285 |     <tr>
 286 |      <td> <em>Applies&nbsp;to:</em>
 287 | 
 288 |      <td> block-level and inline-level elements
 289 | 
 290 |     <tr>
 291 |      <td> <em>Inherited:</em>
 292 | 
 293 |      <td> no
 294 | 
 295 |     <tr>
 296 |      <td> <em>Percentages:</em>
 297 | 
 298 |      <td> refer to the size of the element's box
 299 | 
 300 |     <tr>
 301 |      <td> <em>Media:</em>
 302 | 
 303 |      <td> visual
 304 | 
 305 |     <tr>
 306 |      <td> <em>Computed value:</em>
 307 | 
 308 |      <td> Same as specified value.
 309 |   </table>
 310 |   <!-- ======================================================================================================= -->
 311 | 
 312 |   <h2 id=transform-origin-property><span class=secno>3. </span> The <span
 313 |    class=prop-name>&lsquo;<code class=property><a
 314 |    href="#transform-origin">transform-origin</a></code>&rsquo;</span>
 315 |    Property</h2>
 316 | 
 317 |   <p> The <span class=prop-name>&lsquo;<code class=property><a
 318 |    href="#transform-origin">transform-origin</a></code>&rsquo;</span>
 319 |    property establishes the origin of transformation for an element. This
 320 |    property is applied by first translating the element by the negated value
 321 |    of the property, then applying the element's transform, then translating
 322 |    by the property value. This effectively moves the desired transformation
 323 |    origin of the element to (0,0) in the local coordinate system, then
 324 |    applies the element's transform, then moves the element back to its
 325 |    original position.
 326 | 
 327 |   <p> If only one value is specified, the second value is assumed to be
 328 |    &lsquo;<code class=property>center</code>&rsquo;. If at least one value is
 329 |    not a keyword, then the first value represents the horizontal position and
 330 |    the second represents the vertical position. Negative &lt;percentage&gt;
 331 |    and &lt;length&gt; values are allowed.
 332 | 
 333 |   <table class=propdef>
 334 |    <tbody>
 335 |     <tr>
 336 |      <td> <em>Name:</em>
 337 | 
 338 |      <td> <dfn id=transform-origin>transform-origin</dfn>
 339 | 
 340 |     <tr>
 341 |      <td> <em>Value:</em>
 342 | 
 343 |      <td> [ [ &lt;percentage&gt; | &lt;length&gt; | left | center | right ] [
 344 |       &lt;percentage&gt; | &lt;length&gt; | top | center | bottom ]? ] | [ [
 345 |       left | center | right ] || [ top | center | bottom ] ]
 346 | 
 347 |     <tr>
 348 |      <td> <em>Initial:</em>
 349 | 
 350 |      <td> 50% 50%
 351 | 
 352 |     <tr>
 353 |      <td> <em>Applies&nbsp;to:</em>
 354 | 
 355 |      <td> block-level and inline-level elements
 356 | 
 357 |     <tr>
 358 |      <td> <em>Inherited:</em>
 359 | 
 360 |      <td> no
 361 | 
 362 |     <tr>
 363 |      <td> <em>Percentages:</em>
 364 | 
 365 |      <td> refer to the size of the element's box
 366 | 
 367 |     <tr>
 368 |      <td> <em>Media:</em>
 369 | 
 370 |      <td> visual
 371 | 
 372 |     <tr>
 373 |      <td> <em>Computed value:</em>
 374 | 
 375 |      <td> For &lt;length&gt; the absolute value, otherwise a percentage
 376 |   </table>
 377 |   <!-- ======================================================================================================= -->
 378 | 
 379 |   <h2 id=transform-functions><span class=secno>4. </span> The Transformation
 380 |    Functions</h2>
 381 | 
 382 |   <p> The value of the <a class=prop-name href="#effects">transform</a>
 383 |    property is a list of &lt;transform-functions&gt; applied in the order
 384 |    provided. The individual transform functions are separated by whitespace.
 385 |    The set of allowed transform functions is given below. In this list the
 386 |    type &lt;translation-value&gt; is defined as a &lt;length&gt; or
 387 |    &lt;percentage&gt; value, and the &lt;angle&gt; type is defined by <a
 388 |    href="http://www.w3.org/TR/css3-values/">CSS Values and Units.</a>
 389 | 
 390 |   <dl>
 391 |    <dt> <span class=prop-value>matrix(&lt;number&gt;, &lt;number&gt;,
 392 |     &lt;number&gt;, &lt;number&gt;, &lt;number&gt;, &lt;number&gt;)</span>
 393 | 
 394 |    <dd> specifies a 2D transformation in the form of a <a
 395 |     href="http://www.w3.org/TR/SVG/coords.html#TransformMatrixDefined">transformation
 396 |     matrix</a> of six values. <span
 397 |     class=prop-value>matrix(a,b,c,d,e,f)</span> is equivalent to applying the
 398 |     transformation matrix <strong>[a b c d e f]</strong>.
 399 | 
 400 |    <dt> <span class=prop-value>translate(&lt;translation-value&gt;[,
 401 |     &lt;translation-value&gt;])</span>
 402 | 
 403 |    <dd> specifies a <a
 404 |     href="http://www.w3.org/TR/SVG/coords.html#TranslationDefined">2D
 405 |     translation</a> by the vector [tx, ty], where tx is the first
 406 |     translation-value parameter and ty is the optional second
 407 |     translation-value parameter. If <em>&lt;ty&gt;</em> is not provided, ty
 408 |     has zero as a value.
 409 | 
 410 |    <dt> <span class=prop-value>translateX(&lt;translation-value&gt;)</span>
 411 | 
 412 |    <dd> specifies a <a
 413 |     href="http://www.w3.org/TR/SVG/coords.html#TranslationDefined">translation</a>
 414 |     by the given amount in the X direction.
 415 | 
 416 |    <dt> <span class=prop-value>translateY(&lt;translation-value&gt;)</span>
 417 | 
 418 |    <dd> specifies a <a
 419 |     href="http://www.w3.org/TR/SVG/coords.html#TranslationDefined">translation</a>
 420 |     by the given amount in the Y direction.
 421 | 
 422 |    <dt> <span class=prop-value>scale(&lt;number&gt;[, &lt;number&gt;])</span>
 423 |     
 424 | 
 425 |    <dd> specifies a <a
 426 |     href="http://www.w3.org/TR/SVG/coords.html#ScalingDefined">2D scale</a>
 427 |     operation by the [sx,sy] scaling vector described by the 2 parameters. If
 428 |     the second parameter is not provided, it is takes a value equal to the
 429 |     first.
 430 | 
 431 |    <dt> <span class=prop-value>scaleX(&lt;number&gt;)</span>
 432 | 
 433 |    <dd> specifies a scale operation using the [sx,1] scaling vector, where sx
 434 |     is given as the parameter.
 435 | 
 436 |    <dt> <span class=prop-value>scaleY(&lt;number&gt;)</span>
 437 | 
 438 |    <dd> specifies a scale operation using the [1,sy] scaling vector, where sy
 439 |     is given as the parameter.
 440 | 
 441 |    <dt> <span class=prop-value>rotate(&lt;angle&gt;)</span>
 442 | 
 443 |    <dd> specifies a <a
 444 |     href="http://www.w3.org/TR/SVG/coords.html#RotationDefined">2D
 445 |     rotation</a> by the angle specified in the parameter about the origin of
 446 |     the element, as defined by the <em><a
 447 |     href="#transform-origin">transform-origin</a></em> property.
 448 | 
 449 |    <dt> <span class=prop-value>skewX(&lt;angle&gt;)</span>
 450 | 
 451 |    <dd> specifies a <a
 452 |     href="http://www.w3.org/TR/SVG/coords.html#SkewXDefined">skew
 453 |     transformation along the X axis</a> by the given angle.
 454 | 
 455 |    <dt> <span class=prop-value>skewY(&lt;angle&gt;)</span>
 456 | 
 457 |    <dd> specifies a <a
 458 |     href="http://www.w3.org/TR/SVG/coords.html#SkewYDefined">skew
 459 |     transformation along the Y axis</a> by the given angle.
 460 | 
 461 |    <dt> <span class=prop-value>skew(&lt;angle&gt; [, &lt;angle&gt;])</span>
 462 | 
 463 |    <dd> specifies a <a
 464 |     href="http://www.w3.org/TR/SVG/coords.html#SkewXDefined">skew
 465 |     transformation along the X and Y axes</a>. The first angle parameter
 466 |     specifies the skew on the X axis. The second angle parameter specifies
 467 |     the skew on the Y axis. If the second parameter is not given then a value
 468 |     of 0 is used for the Y angle (ie. no skew on the Y axis).
 469 |   </dl>
 470 | 
 471 |   <h2 id=transform-values><span class=secno>5. </span> Transform Values and
 472 |    Lists</h2>
 473 | 
 474 |   <p> The &lt;translation-value&gt; values are defined as [&lt;percentage&gt;
 475 |    | &lt;length&gt;]. All other value types are described <a
 476 |    href="http://www.w3.org/TR/REC-CSS2/syndata.html#values">as CSS types</a>.
 477 |    If a list of transforms is provided, then the net effect is as if each
 478 |    transform had been specified separately in the order provided. For
 479 |    example,
 480 | 
 481 |   <pre>
 482 |   &lt;div style="transform:translate(-10px,-20px) scale(2) rotate(45deg) translate(5px,10px)"/&gt;
 483 |   </pre>
 484 | 
 485 |   <p> is functionally equivalent to:
 486 | 
 487 |   <pre>
 488 |   &lt;div style="transform:translate(-10px,-20px)"&gt;
 489 |     &lt;div style="transform:scale(2)"&gt;
 490 |       &lt;div style="transform:rotate(45deg)"&gt;
 491 |         &lt;div style="transform:translate(5px,10px)"&gt;
 492 |         &lt;/div&gt;
 493 |       &lt;/div&gt;
 494 |     &lt;/div&gt;
 495 |   &lt;/div&gt;
 496 |   </pre>
 497 | 
 498 |   <div class=example>
 499 |    <pre>
 500 |   div {
 501 |       transform: translate(100px, 100px);
 502 |   }
 503 |   </pre>
 504 |    Move the element by 100 pixels in both the X and Y directions.
 505 |    <div class=figure> <img alt="The 100px translation in X and Y"
 506 |     src=transform1.png></div>
 507 |   </div>
 508 | 
 509 |   <div class=example>
 510 |    <pre>
 511 |   div {
 512 |       height: 100px; width: 100px;
 513 |       transform: translate(80px, 80px) scale(1.5, 1.5) rotate(45deg);
 514 |   }
 515 |   </pre>
 516 |    Move the element by 80 pixels in both the X and Y directions, then scale
 517 |    the element by 150%, then rotate it 45 degrees clockwise about the Z axis.
 518 |    Note that the scale and rotate operate about the center of the element,
 519 |    since the element has the default transform-origin of 50% 50%.
 520 |    <div class=figure> <img alt="The transform specified above"
 521 |     src="compound_transform.png"></div>
 522 |   </div>
 523 |   <!-- ======================================================================================================= -->
 524 | 
 525 |   <h2 id=animation><span class=secno>6. </span> Transitions and animations
 526 |    between transform values</h2>
 527 | 
 528 |   <p> When animating or transitioning the value of a transform property the
 529 |    rules described below are applied. The &lsquo;<code
 530 |    class=property>from</code>&rsquo; transform is the transform at the start
 531 |    of the transition or current keyframe. The &lsquo;<code
 532 |    class=property>end</code>&rsquo; transform is the transform at the end of
 533 |    the transition or current keyframe.
 534 | 
 535 |   <ul>
 536 |    <li> If the &lsquo;<code class=property>from</code>&rsquo; and
 537 |     &lsquo;<code class=property>to</code>&rsquo; transforms are both single
 538 |     functions of the same type:
 539 |     <ul>
 540 |      <li> For translate, translateX, translateY, scale, scaleX, scaleY,
 541 |       rotate, skew, skewX and skewY functions:
 542 |       <ul>
 543 |        <li> the individual components of the function are interpolated
 544 |         numerically.
 545 |       </ul>
 546 | 
 547 |      <li> For matrix:
 548 |       <ul>
 549 |        <li> the matrix is decomposed using <a
 550 |         href="http://tog.acm.org/resources/GraphicsGems/gemsii/unmatrix.c">the
 551 |         method described by unmatrix</a> into separate translation, scale,
 552 |         rotation and skew matrices, then each decomposed matrix is
 553 |         interpolated numerically, and finally combined in order to produce a
 554 |         resulting 3x2 matrix.
 555 |       </ul>
 556 |     </ul>
 557 | 
 558 |    <li> If both the &lsquo;<code class=property>from</code>&rsquo; and
 559 |     &lsquo;<code class=property>to</code>&rsquo; transforms are "none":
 560 |     <ul>
 561 |      <li> There is no interpolation necessary
 562 |     </ul>
 563 | 
 564 |    <li> If one of the &lsquo;<code class=property>from</code>&rsquo; or
 565 |     &lsquo;<code class=property>to</code>&rsquo; transforms is "none":
 566 |     <ul>
 567 |      <li> The &lsquo;<code class=property>none</code>&rsquo; is replaced by
 568 |       an equivalent identity function list for the corresponding transform
 569 |       function list.
 570 |       <p> For example, if the &lsquo;<code class=property>from</code>&rsquo;
 571 |        transform is "scale(2)" and the &lsquo;<code
 572 |        class=property>to</code>&rsquo; transform is "none" then the value
 573 |        "scale(1)" will be used as the &lsquo;<code
 574 |        class=property>to</code>&rsquo; value, and animation will proceed
 575 |        using the rule above. Similarly, if the &lsquo;<code
 576 |        class=property>from</code>&rsquo; transform is "none" and the
 577 |        &lsquo;<code class=property>to</code>&rsquo; transform is "scale(2)
 578 |        rotate(50deg)" then the animation will execute as if the &lsquo;<code
 579 |        class=property>from</code>&rsquo; value is "scale(1) rotate(0)".</p>
 580 | 
 581 |       <p> The identity functions are translate(0), translateX(0),
 582 |        translateY(0), scale(1), scaleX(1), scaleY(1), rotate(0), rotateX(0),
 583 |        rotateY(0), skewX(0), skewY(0), skew(0, 0) and matrix(1, 0, 0, 1, 0,
 584 |        0).</p>
 585 |     </ul>
 586 | 
 587 |    <li> If both the &lsquo;<code class=property>from</code>&rsquo; and
 588 |     &lsquo;<code class=property>to</code>&rsquo; transforms have the same
 589 |     number of transform functions and corresponding functions in each
 590 |     transform list are of the same type:
 591 |     <ul>
 592 |      <li> Each transform function is animated with its corresponding
 593 |       destination function in isolation using the rules described above. The
 594 |       individual values are then applied as a list to produce resulting
 595 |       transform value.
 596 |     </ul>
 597 | 
 598 |    <li> Otherwise:
 599 |     <ul>
 600 |      <li> The transform function lists are each converted into the equivalent
 601 |       matrix value and animation proceeds using the rule for a single
 602 |       function above.
 603 |     </ul>
 604 |   </ul>
 605 | 
 606 |   <p> In some cases, an animation might cause a transformation matrix to be
 607 |    singular or non-invertible. For example, an animation in which scale moves
 608 |    from 1 to -1. At the time when the matrix is in such a state, the
 609 |    transformed element is not rendered.
 610 | 
 611 |   <h2 id=matrix-decomposition><span class=secno>7. </span> Matrix
 612 |    decomposition for animation</h2>
 613 | 
 614 |   <p> When interpolating between 2 matrices, each is decomposed into the
 615 |    corresponding translation, rotation, scale, skew, and perspective values.
 616 |    Not all matrices can be accurately described by these values. Those that
 617 |    can't are decomposed into the most accurate representation possible, using
 618 |    the technique below. This technique is taken from The "unmatrix" method in
 619 |    "Graphics Gems II, edited by Jim Arvo". The pseudocode below works on a
 620 |    4x4 homogeneous matrix. A 3x2 2D matrix is therefore first converted to
 621 |    4x4 homogeneous form.
 622 | 
 623 |   <pre>
 624 |   Input: matrix       ; a 4x4 matrix
 625 |   Output: translation ; a 3 component vector
 626 |           rotation    ; Euler angles, represented as a 3 component vector
 627 |           scale       ; a 3 component vector
 628 |           skew        ; skew factors XY,XZ,YZ represented as a 3 component vector
 629 |           perspective ; a 4 component vector
 630 |   Returns false if the matrix cannot be decomposed, true if it can
 631 | 
 632 |     Supporting functions (point is a 3 component vector, matrix is a 4x4 matrix):
 633 |       float  determinant(matrix)          returns the 4x4 determinant of the matrix
 634 |       matrix inverse(matrix)              returns the inverse of the passed matrix
 635 |       matrix transpose(matrix)            returns the transpose of the passed matrix
 636 |       point  multVecMatrix(point, matrix) multiplies the passed point by the passed matrix 
 637 |                                           and returns the transformed point
 638 |       float  length(point)                returns the length of the passed vector
 639 |       point  normalize(point)             normalizes the length of the passed point to 1
 640 |       float  dot(point, point)            returns the dot product of the passed points
 641 |       float  cos(float)                   returns the cosine of the passed angle in radians
 642 |       float  asin(float)                  returns the arcsine in radians of the passed value
 643 |       float  atan2(float y, float x)      returns the principal value of the arc tangent of 
 644 |                                           y/x, using the signs of both arguments to determine 
 645 |                                           the quadrant of the return value
 646 | 
 647 |     Decomposition also makes use of the following function:
 648 |       point combine(point a, point b, float ascl, float bscl)
 649 |           result[0] = (ascl * a[0]) + (bscl * b[0])
 650 |           result[1] = (ascl * a[1]) + (bscl * b[1])
 651 |           result[2] = (ascl * a[2]) + (bscl * b[2])
 652 |           return result
 653 | 
 654 | 
 655 |     // Normalize the matrix.
 656 |     if (matrix[3][3] == 0)
 657 |         return false
 658 | 
 659 |     for (i = 0; i &lt; 4; i++)
 660 |         for (j = 0; j &lt; 4; j++)
 661 |             matrix[i][j] /= matrix[3][3]
 662 | 
 663 |     // perspectiveMatrix is used to solve for perspective, but it also provides
 664 |     // an easy way to test for singularity of the upper 3x3 component.
 665 |     perspectiveMatrix = matrix
 666 | 
 667 |     for (i = 0; i &lt; 3; i++)
 668 |         perspectiveMatrix[i][3] = 0
 669 | 
 670 |     perspectiveMatrix[3][3] = 1
 671 | 
 672 |     if (determinant(perspectiveMatrix) == 0)
 673 |         return false
 674 | 
 675 |     // First, isolate perspective.
 676 |     if (matrix[0][3] != 0 || matrix[1][3] != 0 || matrix[2][3] != 0)
 677 |         // rightHandSide is the right hand side of the equation.
 678 |         rightHandSide[0] = matrix[0][3];
 679 |         rightHandSide[1] = matrix[1][3];
 680 |         rightHandSide[2] = matrix[2][3];
 681 |         rightHandSide[3] = matrix[3][3];
 682 | 
 683 |         // Solve the equation by inverting perspectiveMatrix and multiplying
 684 |         // rightHandSide by the inverse.
 685 |         inversePerspectiveMatrix = inverse(perspectiveMatrix)
 686 |         transposedInversePerspectiveMatrix = transposeMatrix4(inversePerspectiveMatrix)
 687 |         perspective = multVecMatrix(rightHandSide, transposedInversePerspectiveMatrix)
 688 | 
 689 |          // Clear the perspective partition
 690 |         matrix[0][3] = matrix[1][3] = matrix[2][3] = 0
 691 |         matrix[3][3] = 1
 692 |     else
 693 |         // No perspective.
 694 |         perspective[0] = perspective[1] = perspective[2] = 0
 695 |         perspective[3] = 1
 696 | 
 697 |     // Next take care of translation
 698 |     translate[0] = matrix[3][0]
 699 |     matrix[3][0] = 0
 700 |     translate[1] = matrix[3][1]
 701 |     matrix[3][1] = 0
 702 |     translate[2] = matrix[3][2]
 703 |     matrix[3][2] = 0
 704 | 
 705 |     // Now get scale and shear. &#x27;row&#x27; is a 3 element array of 3 component vectors
 706 |     for (i = 0; i &lt; 3; i++)
 707 |         row[i][0] = matrix[i][0]
 708 |         row[i][1] = matrix[i][1]
 709 |         row[i][2] = matrix[i][2]
 710 | 
 711 |     // Compute X scale factor and normalize first row.
 712 |     scale[0] = length(row[0])
 713 |     row[0] = normalize(row[0])
 714 | 
 715 |     // Compute XY shear factor and make 2nd row orthogonal to 1st.
 716 |     skew[0] = dot(row[0], row[1])
 717 |     row[1] = combine(row[1], row[0], 1.0, -skew[0])
 718 | 
 719 |     // Now, compute Y scale and normalize 2nd row.
 720 |     scale[1] = length(row[1])
 721 |     row[1] = normalize(row[1])
 722 |     skew[0] /= scale[1];
 723 | 
 724 |     // Compute XZ and YZ shears, orthogonalize 3rd row
 725 |     skew[1] = dot(row[0], row[2])
 726 |     row[2] = combine(row[2], row[0], 1.0, -skew[1])
 727 |     skew[2] = dot(row[1], row[2])
 728 |     row[2] = combine(row[2], row[1], 1.0, -skew[2])
 729 | 
 730 |     // Next, get Z scale and normalize 3rd row.
 731 |     scale[2] = length(row[2])
 732 |     row[2] = normalize(row[2])
 733 |     skew[1] /= scale[2]
 734 |     skew[2] /= scale[2]
 735 | 
 736 |     // At this point, the matrix (in rows) is orthonormal.
 737 |     // Check for a coordinate system flip.  If the determinant
 738 |     // is -1, then negate the matrix and the scaling factors.
 739 |     pdum3 = cross(row[1], row[2])
 740 |     if (dot(row[0], pdum3) &lt; 0)
 741 |         for (i = 0; i &lt; 3; i++) {
 742 |             scale[0] *= -1;
 743 |             row[i][0] *= -1
 744 |             row[i][1] *= -1
 745 |             row[i][2] *= -1
 746 | 
 747 |     // Now, get the rotations ou
 748 |     rotate[1] = asin(-row[0][2]);
 749 |     if (cos(rotate[1]) != 0)
 750 |        rotate[0] = atan2(row[1][2], row[2][2]);
 751 |        rotate[2] = atan2(row[0][1], row[0][0]);
 752 |     else
 753 |        rotate[0] = atan2(-row[2][0], row[1][1]);
 754 |        rotate[2] = 0;
 755 | 
 756 |     return true;
 757 |     </pre>
 758 | 
 759 |   <p> Each component of each returned value is linearly interpolated with the
 760 |    corresponding component of the other matrix. The resulting components are
 761 |    then recomposed into a final matrix as though combining the following
 762 |    transform functions:
 763 | 
 764 |   <pre>
 765 |         matrix3d(1,0,0,0, 0,1,0,0, 0,0,1,0, perspective[0], perspective[1], perspective[2], perspective[3])
 766 |         translate3d(translation[0], translation[1], translation[2])
 767 |         rotateX(rotation[0]) rotateY(rotation[1]) rotateZ(rotation[2])
 768 |         matrix3d(1,0,0,0, 0,1,0,0, 0,skew[2],1,0, 0,0,0,1)
 769 |         matrix3d(1,0,0,0, 0,1,0,0, skew[1],0,1,0, 0,0,0,1)
 770 |         matrix3d(1,0,0,0, skew[0],1,0,0, 0,0,1,0, 0,0,0,1)
 771 |         scale3d(scale[0], scale[1], scale[2])
 772 |       </pre>
 773 | 
 774 |   <h2 id=dom-interfaces><span class=secno>8. </span> DOM Interfaces</h2>
 775 | 
 776 |   <p> This section describes the interfaces and functionality added to the
 777 |    DOM to support runtime access to the functionality described above.
 778 | 
 779 |   <h3 id=cssmatrix-interface><span class=secno>8.1. </span> CSSMatrix</h3>
 780 | 
 781 |   <dl>
 782 |    <dt> <b>Interface <i><a id=DOM-CSSMatrix
 783 |     name=DOM-CSSMatrix>CSSMatrix</a></i></b>
 784 | 
 785 |    <dd>
 786 |     <p> The <code>CSSMatrix</code> interface represents a 4x4 homogeneous
 787 |      matrix.</p>
 788 | 
 789 |     <dl>
 790 |      <dt> <b>IDL Definition</b>
 791 | 
 792 |      <dd>
 793 |       <div class=idl-code>
 794 |        <pre>
 795 |   interface CSSMatrix {
 796 |       attribute float a;
 797 |       attribute float b;
 798 |       attribute float c;
 799 |       attribute float d;
 800 |       attribute float e;
 801 |       attribute float f;
 802 | 
 803 |       void        setMatrixValue(in DOMString string) raises(DOMException);
 804 |       CSSMatrix   multiply(in CSSMatrix secondMatrix);
 805 |       CSSMatrix   multiplyLeft(in CSSMatrix secondMatrix);
 806 |       CSSMatrix   inverse() raises(DOMException);
 807 |       CSSMatrix   translate(in float x, in float y);
 808 |       CSSMatrix   scale(in float scaleX, in float scaleY);
 809 |       CSSMatrix   skew(in float angleX, in float angleY);
 810 |       CSSMatrix   rotate(in float angle);
 811 |   };</pre>
 812 |       </div>
 813 |       <br>
 814 |      </dd>
 815 |      <!-- IDL -->
 816 | 
 817 |      <dt> <b>Attributes</b>
 818 | 
 819 |      <dd>
 820 |       <dl>
 821 |        <dt> <code class=attribute-name><a id=DOM-CSSMatrix-matrix
 822 |         name=DOM-CSSMatrix-matrix>a-f</a></code> of type <code>float</code>
 823 | 
 824 |        <dd> Each of these attributes represents one of the values in the 3x2
 825 |         matrix.<br>
 826 |       </dl>
 827 |      </dd>
 828 |      <!-- Attributes -->
 829 | 
 830 |      <dt> <b>Methods</b>
 831 | 
 832 |      <dd>
 833 |       <dl><!-- ===================================================== -->
 834 | 
 835 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-setMatrixValue
 836 |         name=DOM-CSSMatrix-setMatrixValue>setMatrixValue</a></code>
 837 | 
 838 |        <dd>
 839 |         <div class=method> The <code>setMatrixValue</code> method replaces
 840 |          the existing matrix with one computed from parsing the passed string
 841 |          as though it had been assigned to the transform property in a CSS
 842 |          style rule.
 843 |          <div class=parameters> <b>Parameters</b>
 844 |           <div class=paramtable>
 845 |            <dl>
 846 |             <dt> <code class=parameter-name>string</code> of type
 847 |              <code>DOMString</code>
 848 | 
 849 |             <dd> The string to parse.<br>
 850 |            </dl>
 851 |           </div>
 852 |          </div>
 853 |          <!-- parameters -->
 854 |          <div class=return-value> <b>No Return Value</b></div>
 855 | 
 856 |          <div> <b>Exceptions</b>
 857 |           <div class=returnvalue>
 858 |            <dl>
 859 |             <dt> <code>DOMException SYNTAX_ERR</code>
 860 | 
 861 |             <dd> Thrown when the provided string can not be parsed into a
 862 |              CSSMatrix.
 863 |            </dl>
 864 |           </div>
 865 |          </div>
 866 |         </div>
 867 |        </dd>
 868 |        <!-- setMatrixValue -->
 869 |        <!-- ===================================================== -->
 870 | 
 871 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-multiply
 872 |         name=DOM-CSSMatrix-multiply>multiply</a></code>
 873 | 
 874 |        <dd>
 875 |         <div class=method> The <code>multiply</code> method returns a new
 876 |          CSSMatrix which is the result of this matrix multiplied by the
 877 |          passed matrix, with the passed matrix to the right. This matrix is
 878 |          not modified.
 879 |          <div class=parameters> <b>Parameters</b>
 880 |           <div class=paramtable>
 881 |            <dl>
 882 |             <dt> <code class=parameter-name>secondMatrix</code> of type
 883 |              <code>CSSMatrix</code>
 884 | 
 885 |             <dd> The matrix to multiply.<br>
 886 |            </dl>
 887 |           </div>
 888 |          </div>
 889 |          <!-- parameters -->
 890 |          <div class=return-value> <b>Return Value</b>
 891 |           <div class=returnvalue>
 892 |            <dl>
 893 |             <dt> <code>CSSMatrix</code>
 894 | 
 895 |             <dd> The result matrix.<br>
 896 |            </dl>
 897 |           </div>
 898 |          </div>
 899 | 
 900 |          <div> <b>No Exceptions</b></div>
 901 |         </div>
 902 |        </dd>
 903 |        <!-- multiply() -->
 904 |        <!-- ===================================================== -->
 905 | 
 906 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-multiplyLeft
 907 |         name=DOM-CSSMatrix-multiplyLeft>multiplyLeft</a></code>
 908 | 
 909 |        <dd>
 910 |         <div class=method> The <code>multiplyLeft</code> method returns a new
 911 |          CSSMatrix which is the result of this matrix multiplied by the
 912 |          passed matrix, with the passed matrix to the left. This matrix is
 913 |          not modified.
 914 |          <div class=parameters> <b>Parameters</b>
 915 |           <div class=paramtable>
 916 |            <dl>
 917 |             <dt> <code class=parameter-name>secondMatrix</code> of type
 918 |              <code>CSSMatrix</code>
 919 | 
 920 |             <dd> The matrix to multiply.<br>
 921 |            </dl>
 922 |           </div>
 923 |          </div>
 924 |          <!-- parameters -->
 925 |          <div class=return-value> <b>Return Value</b>
 926 |           <div class=returnvalue>
 927 |            <dl>
 928 |             <dt> <code>CSSMatrix</code>
 929 | 
 930 |             <dd> The result matrix.<br>
 931 |            </dl>
 932 |           </div>
 933 |          </div>
 934 | 
 935 |          <div> <b>No Exceptions</b></div>
 936 |         </div>
 937 |        </dd>
 938 |        <!-- multiplyLeft() -->
 939 |        <!-- ===================================================== -->
 940 | 
 941 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-inverse
 942 |         name=DOM-CSSMatrix-inverse>inverse</a></code>
 943 | 
 944 |        <dd>
 945 |         <div class=method> The <code>inverse</code> method returns a new
 946 |          matrix which is the inverse of this matrix. This matrix is not
 947 |          modified.
 948 |          <div class=parameters> <b>No Parameters</b></div>
 949 |          <!-- parameters -->
 950 |          <div class=return-value> <b>Return Value</b>
 951 |           <div class=returnvalue>
 952 |            <dl>
 953 |             <dt> <code>CSSMatrix</code>
 954 | 
 955 |             <dd> The inverted matrix.<br>
 956 |            </dl>
 957 |           </div>
 958 |          </div>
 959 | 
 960 |          <div> <b>Exceptions</b>
 961 |           <div class=returnvalue>
 962 |            <dl>
 963 |             <dt> <code>DOMException NOT_SUPPORTED_ERR</code>
 964 | 
 965 |             <dd> Thrown when the CSSMatrix can not be inverted.
 966 |            </dl>
 967 |           </div>
 968 |          </div>
 969 |         </div>
 970 |         <!-- ======================================================================================================= -->
 971 |         </dd>
 972 |        <!-- inverse() -->
 973 |        <!-- ===================================================== -->
 974 | 
 975 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-translate
 976 |         name=DOM-CSSMatrix-translate>translate</a></code>
 977 | 
 978 |        <dd>
 979 |         <div class=method> The <code>translate</code> method returns a new
 980 |          matrix which is this matrix post multiplied by a translation matrix
 981 |          containing the passed values. This matrix is not modified.
 982 |          <div class=parameters> <b>Parameters</b>
 983 |           <div class=paramtable>
 984 |            <dl>
 985 |             <dt> <code class=parameter-name>x</code> of type
 986 |              <code>float</code>
 987 | 
 988 |             <dd> The X component of the translation value.<br>
 989 | 
 990 |             <dt> <code class=parameter-name>y</code> of type
 991 |              <code>float</code>
 992 | 
 993 |             <dd> The Y component of the translation value.<br>
 994 |            </dl>
 995 |           </div>
 996 |          </div>
 997 |          <!-- parameters -->
 998 |          <div class=return-value> <b>Return Value</b>
 999 |           <div class=returnvalue>
1000 |            <dl>
1001 |             <dt> <code>CSSMatrix</code>
1002 | 
1003 |             <dd> The result matrix.<br>
1004 |            </dl>
1005 |           </div>
1006 |          </div>
1007 | 
1008 |          <div> <b>No Exceptions</b></div>
1009 |         </div>
1010 |         <!-- ======================================================================================================= -->
1011 |         </dd>
1012 |        <!-- translate() -->
1013 |        <!-- ===================================================== -->
1014 | 
1015 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-scale
1016 |         name=DOM-CSSMatrix-scale>scale</a></code>
1017 | 
1018 |        <dd>
1019 |         <div class=method> The <code>scale</code> method returns a new matrix
1020 |          which is this matrix post multiplied by a scale matrix containing
1021 |          the passed values. If the y component is undefined, the x component
1022 |          value is used in its place. This matrix is not modified.
1023 |          <div class=parameters> <b>Parameters</b>
1024 |           <div class=paramtable>
1025 |            <dl>
1026 |             <dt> <code class=parameter-name>scaleX</code> of type
1027 |              <code>float</code>
1028 | 
1029 |             <dd> The X component of the scale value.<br>
1030 | 
1031 |             <dt> <code class=parameter-name>scaleY</code> of type
1032 |              <code>float</code>
1033 | 
1034 |             <dd> The (optional) Y component of the scale value.<br>
1035 |            </dl>
1036 |           </div>
1037 |          </div>
1038 |          <!-- parameters -->
1039 |          <div class=return-value> <b>Return Value</b>
1040 |           <div class=returnvalue>
1041 |            <dl>
1042 |             <dt> <code>CSSMatrix</code>
1043 | 
1044 |             <dd> The result matrix.<br>
1045 |            </dl>
1046 |           </div>
1047 |          </div>
1048 | 
1049 |          <div> <b>No Exceptions</b></div>
1050 |         </div>
1051 |         <!-- ======================================================================================================= -->
1052 |         </dd>
1053 |        <!-- scale() -->
1054 |        <!-- ===================================================== -->
1055 | 
1056 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-rotate
1057 |         name=DOM-CSSMatrix-rotate>rotate</a></code>
1058 | 
1059 |        <dd>
1060 |         <div class=method> The <code>rotate</code> method returns a new
1061 |          matrix which is this matrix post multiplied by a rotation matrix.
1062 |          The rotation value is in degrees. This matrix is not modified.
1063 |          <div class=parameters> <b>Parameters</b>
1064 |           <div class=paramtable>
1065 |            <dl>
1066 |             <dt> <code class=parameter-name>angle</code> of type
1067 |              <code>float</code>
1068 | 
1069 |             <dd> The angle of rotation.<br>
1070 |            </dl>
1071 |           </div>
1072 |          </div>
1073 |          <!-- parameters -->
1074 |          <div class=return-value> <b>Return Value</b>
1075 |           <div class=returnvalue>
1076 |            <dl>
1077 |             <dt> <code>CSSMatrix</code>
1078 | 
1079 |             <dd> The result matrix.<br>
1080 |            </dl>
1081 |           </div>
1082 |          </div>
1083 | 
1084 |          <div> <b>No Exceptions</b></div>
1085 |         </div>
1086 |         <!-- ======================================================================================================= -->
1087 |         </dd>
1088 |        <!-- rotate() -->
1089 |        <!-- ===================================================== -->
1090 | 
1091 |        <dt> <code class=method-name><a id=DOM-CSSMatrix-skew
1092 |         name=DOM-CSSMatrix-skew>skew</a></code>
1093 | 
1094 |        <dd>
1095 |         <div class=method> The <code>skew</code> method returns a new matrix
1096 |          which is this matrix post multiplied by a skew matrix. The rotation
1097 |          value is in degrees. This matrix is not modified.
1098 |          <div class=parameters> <b>Parameters</b>
1099 |           <div class=paramtable>
1100 |            <dl>
1101 |             <dt> <code class=parameter-name>angleX</code> of type
1102 |              <code>float</code>
1103 | 
1104 |             <dd> The angle of skew along the X axis.<br>
1105 | 
1106 |             <dt> <code class=parameter-name>angleY</code> of type
1107 |              <code>float</code>
1108 | 
1109 |             <dd> The angle of skew along the Y axis.<br>
1110 |            </dl>
1111 |           </div>
1112 |          </div>
1113 |          <!-- parameters -->
1114 |          <div class=return-value> <b>Return Value</b>
1115 |           <div class=returnvalue>
1116 |            <dl>
1117 |             <dt> <code>CSSMatrix</code>
1118 | 
1119 |             <dd> The result matrix.<br>
1120 |            </dl>
1121 |           </div>
1122 |          </div>
1123 | 
1124 |          <div> <b>No Exceptions</b></div>
1125 |         </div>
1126 |         <!-- ======================================================================================================= -->
1127 |         </dd>
1128 |        <!-- skew() -->
1129 |       </dl>
1130 |       <!-- methods -->
1131 |     </dl>
1132 |    </dd>
1133 |    <!-- Interface CSSMatrix -->
1134 |   </dl>
1135 | 
1136 |   <p> In addition to the interface listed above, the
1137 |    <code>getComputedStyle</code> method of the <code>Window</code> object has
1138 |    been updated. The <code><a href="#effects">transform</a></code> property
1139 |    of the style object returned by <code>getComputedStyle</code> contains a
1140 |    DOMString of the form "matrix(a, b, c, d, e, f)" representing the 3x2
1141 |    matrix that is the result of applying the individual functions listed in
1142 |    the <code><a href="#effects">transform</a></code> property.
1143 | 
1144 |   <h2 id=references><span class=secno>9. </span>References</h2>
1145 | 
1146 |   <h3 class=no-num id=normative-references>Normative references</h3>
1147 |   <!--begin-normative-->
1148 |   <!-- Sorted by label -->
1149 | 
1150 |   <dl class=bibliography>
1151 |    <dt style="display: none"><!-- keeps the doc valid if the DL is empty -->
1152 |     <!---->
1153 |   </dl>
1154 |   <!--end-normative-->
1155 | 
1156 |   <h3 class=no-num id=other-references>Other references</h3>
1157 |   <!--begin-informative-->
1158 |   <!-- Sorted by label -->
1159 | 
1160 |   <dl class=bibliography>
1161 |    <dt style="display: none"><!-- keeps the doc valid if the DL is empty -->
1162 |     <!---->
1163 |   </dl>
1164 |   <!--end-informative-->
1165 | 
1166 |   <h2 class=no-num id=property-index>Property index</h2>
1167 |   <!--begin-properties-->
1168 | 
1169 |   <table class=proptable>
1170 |    <thead>
1171 |     <tr>
1172 |      <th>Property
1173 | 
1174 |      <th>Values
1175 | 
1176 |      <th>Initial
1177 | 
1178 |      <th>Applies&nbsp;to
1179 | 
1180 |      <th>Inh.
1181 | 
1182 |      <th>Percentages
1183 | 
1184 |      <th>Media
1185 | 
1186 |    <tbody>
1187 |     <tr valign=baseline>
1188 |      <td><a class=property href="#effects">transform</a>
1189 | 
1190 |      <td>none | &lt;transform-function&gt; [ &lt;transform-function&gt; ]*
1191 | 
1192 |      <td>none
1193 | 
1194 |      <td>block-level and inline-level elements
1195 | 
1196 |      <td>no
1197 | 
1198 |      <td>refer to the size of the element's box
1199 | 
1200 |      <td>visual
1201 | 
1202 |     <tr valign=baseline>
1203 |      <td><a class=property href="#transform-origin">transform-origin</a>
1204 | 
1205 |      <td>[ [ &lt;percentage&gt; | &lt;length&gt; | left | center | right ] [
1206 |       &lt;percentage&gt; | &lt;length&gt; | top | center | bottom ]? ] | [ [
1207 |       left | center | right ] || [ top | center | bottom ] ]
1208 | 
1209 |      <td>50% 50%
1210 | 
1211 |      <td>block-level and inline-level elements
1212 | 
1213 |      <td>no
1214 | 
1215 |      <td>refer to the size of the element's box
1216 | 
1217 |      <td>visual
1218 |   </table>
1219 |   <!--end-properties-->
1220 | 
1221 |   <h2 class=no-num id=index>Index</h2>
1222 |   <!--begin-index-->
1223 | 
1224 |   <ul class=indexlist>
1225 |    <li>transform, <a href="#effects" title=transform><strong>2.</strong></a>
1226 | 
1227 |    <li>transform-origin, <a href="#transform-origin"
1228 |     title=transform-origin><strong>3.</strong></a>
1229 |   </ul>
1230 |   <!--end-index-->
1231 | </html>
1232 | <!-- Keep this comment at the end of the file
1233 | Local variables:
1234 | mode: sgml
1235 | sgml-default-doctype-name:"html"
1236 | sgml-minimize-attributes:t
1237 | End:
1238 | -->
1239 | 


--------------------------------------------------------------------------------
/test/nytime.html:
--------------------------------------------------------------------------------
  1 | 
  2 |  
  3 |  
  4 |  
  5 |  
  6 |  
  7 |  
  8 |  
  9 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 10 | <html xmlns:og="http://opengraphprotocol.org/schema/" xmlns:fb="http://www.facebook.com/2008/fbml">
 11 | <head>
 12 | <title>‘Decision Points’ Tour Puts Bush in Spotlight - NYTimes.com</title>
 13 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 
 14 | <meta name="description" content="George W. Bush will end a self-imposed silence about his presidency in an NBC special with Matt Lauer on Monday, the eve of the release of his memoir."> 
 15 | <meta name="keywords" content="Television,Books and Literature,Bush  George W,Lauer  Matt,National Broadcasting Co,NBC News"> 
 16 | <meta name="ROBOTS" content="NOARCHIVE"> 
 17 | <meta name="DISPLAYDATE" content="November 7, 2010"> 
 18 | <meta name="hdl" content="‘Decision Points’ Tour Puts Bush in Spotlight"> 
 19 | <meta name="hdl_p" content="With Book, Bush Is Back in Spotlight"> 
 20 | <meta name="byl" content="By BRIAN STELTER"> 
 21 | <meta name="lp" content="George W. Bush will end a self-imposed silence about his presidency in an NBC special with Matt Lauer on Monday, the eve of the release of his memoir."> 
 22 | <meta name="cre" content="The New York Times"> 
 23 | <meta name="edt" content="NewYork"> 
 24 | <meta name="pdate" content="20101107"> 
 25 | <meta name="ttl" content=""> 
 26 | <meta name="virtloc" content=""> 
 27 | <meta name="des" content="Television;Books and Literature"> 
 28 | <meta name="per" content="Bush, George W;Lauer, Matt"> 
 29 | <meta name="org" content="National Broadcasting Co;NBC News"> 
 30 | <meta name="geo" content=""> 
 31 | <meta name="ticker" content="CBS Corp|CBS|NYSE"> 
 32 | <meta name="misspelling" content=""> 
 33 | <meta name="dat" content="November 7, 2010"> 
 34 | <meta name="tom" content="News"> 
 35 | <meta name="cat" content=""> 
 36 | <meta name="col" content=""> 
 37 | <meta name="dsk" content="Business Day / Media &amp; Advertising"> 
 38 | <meta name="articleid" content="1248069299349"> 
 39 | <meta name="ARTICLE_TEMPLATE_VERSION" CONTENT="700"> 
 40 | <meta name="hdr_img" content="/images/article/header/sect_business.gif"> 
 41 | <meta name="thumbnail" content="images/2010/11/08/business/jpnbc/jpnbc-thumbStandard.jpg"> 
 42 | <meta name="thumbnail_height" content="75"> 
 43 | <meta name="thumbnail_width" content="75"> 
 44 | <meta name="xlarge" content=""> 
 45 | <meta name="xlarge_height" content=""> 
 46 | <meta name="xlarge_width" content=""> 
 47 | <meta name="sectionfront_jsonp" content="http://json8.nytimes.com/pages/business/media/index.jsonp"> 
 48 | <meta name="CG" content="business"> 
 49 | <meta name="SCG" content="media"> 
 50 | <meta name="PT" content="Article"> 
 51 | <meta name="PST" content="News"> 
 52 | <link rel="canonical" href="http://www.nytimes.com/2010/11/08/business/media/08nbc.html"> 
 53 | <meta property="og:url" content="http://www.nytimes.com/2010/11/08/business/media/08nbc.html"/> 
 54 | <meta property="og:type" content="article"/> 
 55 | <meta property="og:title" content="‘Decision Points’ Tour Puts Bush in Spotlight"/> 
 56 | <meta property="og:image" content="http://graphics8.nytimes.com/images/2010/11/08/business/jpnbc/jpnbc-thumbStandard.jpg"/> 
 57 | 
 58 | 
 59 | <link rel="stylesheet" type="text/css" href="http://graphics8.nytimes.com/css/0.1/screen/build/article/2.0/business/styles.css"><!--[if IE]>
 60 |     <style type="text/css">
 61 |         @import url(http://graphics8.nytimes.com/css/0.1/screen/common/ie.css);
 62 |     </style>
 63 | <![endif]-->
 64 | <!--[if IE 6]>
 65 |     <style type="text/css">
 66 |         @import url(http://graphics8.nytimes.com/css/0.1/screen/common/ie6.css);
 67 |     </style>
 68 | <![endif]-->
 69 | <!--
 70 | <script type="text/javascript" src="http://graphics8.nytimes.com/js/common.js"></script>
 71 | <script type="text/javascript" src="http://graphics8.nytimes.com/js/common/screen/DropDown.js"></script>
 72 | <script type="text/javascript" src="http://graphics8.nytimes.com/js/util/tooltip.js"></script>
 73 | <script type="text/javascript" src="http://graphics8.nytimes.com/js/common/screen/altClickToSearch.js"></script>
 74 | <script type="text/javascript" src="http://graphics8.nytimes.com/js/app/article/upNext.js"></script>
 75 | <script type="text/javascript" src="http://graphics8.nytimes.com/js/article/articleShare.js"></script>
 76 | -->
 77 | </head>
 78 |  
 79 |     
 80 |  
 81 | <body class="wideAd"> 
 82 | 
 83 | 
 84 | <a name="top"></a>
 85 | <div id="shell">
 86 | <ul id="memberTools"> 
 87 |  
 88 | <!-- ADXINFO classification="text_ad" campaign="nyt2010-circ-tr-bar1-intl-2week-37483"--><li><a href="http://www.nytimes.com/adx/bin/adx_click.html?type=goto&opzn&page=www.nytimes.com/yr/mo/day/business/media&pos=Bar1&sn2=fef71370/795933d5&sn1=c64f0312/28bb7ca&camp=nyt2010-circ-tr-bar1-intl-2week-37483&ad=081810-bar1-intl-2week-37483&goto=http%3A%2F%2Ftimesreader%2Enytimes%2Ecom%2Fwebapp%2FTimesReader%2Edo%3FpromoCode%3DT9179XQW1%26campaignId%3D37483" target="_blank">Try Times Reader today</a></li> 
 89 |  
 90 |  
 91 |  
 92 |                         <li><a href="http://www.nytimes.com/auth/login?URI=http://">Log In</a></li> 
 93 |             <li><a href="http://www.nytimes.com/gst/regi.html">Register Now</a></li> 
 94 |                             
 95 |  
 96 | <li><a href="http://www.nytimes.com/membercenter/sitehelp.html">Help</a></li> 
 97 | </ul> 
 98 | <div class="tabsContainer"> 
 99 | <ul id="mainTabs" class="tabs"> 
100 | <li class="first mainTabHome"><a href="http://www.nytimes.com">Home Page</a></li> 
101 | <li class="mainTabTodaysPaper"><a href="http://www.nytimes.com/pages/todayspaper/index.html">Today's Paper</a></li> 
102 | <li class="mainTabVideo"><a href="http://www.nytimes.com/video">Video</a></li> 
103 | <li class="mainTabMostPopular"><a href="http://www.nytimes.com/mostpopular">Most Popular</a></li> 
104 | <li class="mainTabTimesTopics"><a href="http://topics.nytimes.com/top/reference/timestopics">Times Topics</a></li> 
105 | </ul> 
106 | </div> 
107 | <div id="page" class="tabContent active">
108 | <div class="clearfix" id="masthead"> 
109 |  
110 | <div id="searchWidget"> 
111 | <div class="inlineSearchControl"> 
112 | <form enctype="application/x-www-form-urlencoded" action="http://query.nytimes.com/search/sitesearch" method="get" name="searchForm" id="searchForm"> 
113 | <input type="hidden" value="full" name="date_select"/> 
114 | <label for="searchQuery">Search All NYTimes.com</label> 
115 | <input type="text" class="text" value="" size="" name="query" id="searchQuery"/> 
116 | <input type="hidden" id="searchAll" name="type" value="nyt"/> 
117 | <input id="searchSubmit" title="Search" width="22" height="19" alt="Search" type="image" src="http://graphics8.nytimes.com/images/global/buttons/go.gif"> 
118 | </form> 
119 | </div> 
120 | </div> 
121 | <div id="branding" > 
122 | <a href="http://www.nytimes.com"><span id="nytIhtMastheadLogo"> 
123 | <a href="http://www.nytimes.com"><img src="http://graphics8.nytimes.com/images/misc/nytlogo152x23.gif" alt="New York Times" id="NYTLogo"/></a> 
124 | </span></a> 
125 | </div> 
126 |  
127 | <h2 class="pageHeaderWithLabel"> 
128 |  
129 |  
130 | <span><a href="http://www.nytimes.com/pages/business/index.html">Business Day</a></span> 
131 |  
132 | <a href="http://www.nytimes.com/pages/business/media/index.html">Media & Advertising</a> 
133 | </h2> 
134 |  
135 | </div> 
136 | <div class="navigation tabsContainer"> 
137 | <ul class="tabs"> 
138 | <li id="navWorld" class="first "> 
139 | <a href="http://www.nytimes.com/pages/world/index.html">World</a> 
140 | </li>	<li id="navUs" > 
141 | <a href="http://www.nytimes.com/pages/national/index.html">U.S.</a> 
142 | </li>	<li id="navNyregion" > 
143 | <a href="http://www.nytimes.com/pages/nyregion/index.html">N.Y. / Region</a> 
144 | </li>	<li id="navBusiness" class="selected"> 
145 | <a href="http://www.nytimes.com/pages/business/index.html">Business</a> 
146 | </li>	<li id="navTechnology" > 
147 | <a href="http://www.nytimes.com/pages/technology/index.html">Technology</a> 
148 | </li>	<li id="navScience" > 
149 | <a href="http://www.nytimes.com/pages/science/index.html">Science</a> 
150 | </li>	<li id="navHealth" > 
151 | <a href="http://www.nytimes.com/pages/health/index.html">Health</a> 
152 | </li>	<li id="navSports" > 
153 | <a href="http://www.nytimes.com/pages/sports/index.html">Sports</a> 
154 | </li>	<li id="navOpinion" > 
155 | <a href="http://www.nytimes.com/pages/opinion/index.html">Opinion</a> 
156 | </li>	<li id="navArts" > 
157 | <a href="http://www.nytimes.com/pages/arts/index.html">Arts</a> 
158 | </li>	<li id="navStyle" > 
159 | <a href="http://www.nytimes.com/pages/style/index.html">Style</a> 
160 | </li>	<li id="navTravel" > 
161 | <a href="http://www.nytimes.com/pages/travel/index.html">Travel</a> 
162 | </li>	<li id="navJobs" > 
163 | <a href="http://www.nytimes.com/pages/jobs/index.html">Jobs</a> 
164 | </li>	<li id="navRealestate" > 
165 | <a href="http://www.nytimes.com/pages/realestate/index.html">Real Estate</a> 
166 | </li>	<li id="navAutomobiles" > 
167 | <a href="http://www.nytimes.com/pages/automobiles/index.html">Autos</a> 
168 | </li></ul> 
169 | </div> 
170 | <div class="subNavigation tabContent active"> 
171 | <div class="column firstColumn"> 
172 | <div id="searchWidget"> 
173 | <div class="inlineSearchControl"> 
174 | <form enctype="application/x-www-form-urlencoded" action="http://query.nytimes.com/search/business/" method="get" name="searchForm" id="searchForm"> 
175 | <input type="hidden" value="full" name="date_select"/> 
176 | <input id="bsearchQuery" type="text" class="text" name="query" autocomplete="off"/> 
177 | <div class="querySuggestions" style="display:none;"></div> 
178 | <input type="hidden" id="searchAll" name="type" value="nyt"/> 
179 | <input id="searchSubmit" title="Search" width="40" height="19" alt="Search" type="image" src="http://graphics8.nytimes.com/images/global/global_search/search_button40x19.gif"> 
180 | </form>  
181 | </div> 
182 | </div><!--close searchWidget --> 
183 | </div><!--close column --> 
184 | <div class="column lastColumn"> 
185 | <ul class="horizontalMenu wrap"> 
186 | <li class="firstItem"><a href="http://www.nytimes.com/pages/business/global/index.html">Global</a></li> 
187 | <li><a href="http://dealbook.blogs.nytimes.com">DealBook</a></li> 
188 | <li><a href="http://markets.on.nytimes.com/research/markets/overview/overview.asp">Markets</a></li> 
189 | <li><a href="http://www.nytimes.com/pages/business/economy/index.html">Economy</a></li> 
190 | <li><a href="http://www.nytimes.com/pages/business/energy-environment/index.html">Energy</a></li> 
191 | <li class="selected">Media</li> 
192 | <li><a href="http://www.nytimes.com/pages/technology/personaltech/index.html">Personal Tech</a></li> 
193 | <li><a href="http://www.nytimes.com/pages/business/smallbusiness/index.html">Small Business</a></li> 
194 | <li class="lastItem"><a href="http://www.nytimes.com/pages/your-money/index.html">Your Money</a></li> 
195 | </ul> 
196 | </div><!--close column --> 
197 | </div><!--close subNavigation --> 	    				    		          
198 |  
199 |  
200 | <div id="main">
201 | <div class="spanAB wrap closing">
202 | <div id="abColumn" class="abColumn"><!--open abColumn -->
203 | <div id="article">
204 | <!--cur: prev:--> 
205 | <div class="columnGroup first">				
206 | <h1 class="articleHeadline"><NYT_HEADLINE  version="1.0" type=" ">With Book, Bush Is Back in Spotlight</NYT_HEADLINE></h1> 
207 | <NYT_BYLINE >	<h6 class="byline">By <a href="http://topics.nytimes.com/top/reference/timestopics/people/s/brian_stelter/index.html?inline=nyt-per" title="More Articles by Brian Stelter" class="meta-per">BRIAN STELTER</a></h6> 
208 | </NYT_BYLINE>	<h6 class="dateline">Published: November 7, 2010</h6> 
209 | <script type="text/javascript"> 
210 | var articleToolsShareData = {"url":"http:\/\/www.nytimes.com\/2010\/11\/08\/business\/media\/08nbc.html","headline":"With Book, Bush Is Back in Spotlight","description":"George W. Bush will end a self-imposed silence about his presidency in an NBC special with Matt Lauer on Monday, the eve of the release of his memoir.","keywords":"Television,Books and Literature,Bush  George W,Lauer  Matt,National Broadcasting Co,NBC News","section":"business","sub_section":"media","section_display":"Business Day","sub_section_display":"Media & Advertising","byline":"By <a href=\"http:\/\/topics.nytimes.com\/top\/reference\/timestopics\/people\/s\/brian_stelter\/index.html?inline=nyt-per\" title=\"More Articles by Brian Stelter\" class=\"meta-per\">BRIAN STELTER<\/a>","pubdate":"November 7, 2010","passkey":null};
211 | function getShareURL() {
212 |     return encodeURIComponent(articleToolsShareData.url);
213 | }
214 | function getShareHeadline() {
215 |     return encodeURIComponent(articleToolsShareData.headline);
216 | }
217 | function getShareDescription() {
218 |     return encodeURIComponent(articleToolsShareData.description);
219 | }
220 | function getShareKeywords() {
221 |     return encodeURIComponent(articleToolsShareData.keywords);
222 | }
223 | function getShareSection() {
224 |     return encodeURIComponent(articleToolsShareData.section);
225 | }
226 | function getShareSubSection() {
227 | 	return encodeURIComponent(articleToolsShareData.sub_section);
228 | }
229 | function getShareSectionDisplay() {
230 |     return encodeURIComponent(articleToolsShareData.section_display);
231 | }
232 | function getShareSubSectionDisplay() {
233 |     return encodeURIComponent(articleToolsShareData.sub_section_display);
234 | }
235 | function getShareByline() {
236 |     return encodeURIComponent(articleToolsShareData.byline);
237 | }
238 | function getSharePubdate() {
239 |     return encodeURIComponent(articleToolsShareData.pubdate);
240 | }
241 | function getSharePasskey() {
242 |     return encodeURIComponent(articleToolsShareData.passkey);
243 | }
244 | </script> 
245 | <div class="articleTools"> 
246 | <div class="box"> 
247 | <div class="inset"> 
248 | <ul id="toolsList" class="toolsList wrap"> 
249 | <li class="email"> 
250 |  
251 | <a id="emailThis" onClick="s_code_linktrack('Article-Tool-EmailSignIn');" 
252 |            href="http://www.nytimes.com/auth/login?URI=http://www.nytimes.com/2010/11/08/business/media/08nbc.html">Sign In to E-Mail</a> 
253 | </li> 
254 | <li class="print"> 
255 | <A HREF="/2010/11/08/business/media/08nbc.html?hp=&pagewanted=print">Print</a> 
256 | </li> 
257 |  <li class="singlePage"> 
258 | <A HREF="/2010/11/08/business/media/08nbc.html?hp=&pagewanted=all"> Single Page</a> 
259 |  </li> 
260 | <NYT_REPRINTS_FORM> 
261 |  
262 | <script name="javascript"> 
263 | 	function submitCCCForm(){
264 | 		PopUp = window.open('', '_Icon','location=no,toolbar=no,status=no,width=650,height=550,scrollbars=yes,resizable=yes');
265 | 		this.document.cccform.submit();
266 | 	}
267 | 	</script> 
268 | <li class="reprints">			<form name="cccform" action="https://s100.copyright.com/CommonApp/LoadingApplication.jsp" target="_Icon"> 
269 | <input type="hidden" name="Title" value="With Book, Bush Is Back in Spotlight"> 
270 | <input type="hidden" name="Author" value="By BRIAN STELTER "> 
271 | <input type="hidden" name="ContentID" value="http://www.nytimes.com/2010/11/08/business/media/08nbc.html"> 
272 | <input type="hidden" name="FormatType" value="default"> 
273 | <input type="hidden" name="PublicationDate" value="November 8, 2010"> 
274 | <input type="hidden" name="PublisherName" value="The New York Times"> 
275 | <input type="hidden" name="Publication" value="nytimes.com"> 
276 | <input type="hidden" name="wordCount" value="12"> 
277 | </form> 
278 | <a href="#" onClick="submitCCCForm()">Reprints</a> 
279 | </li>	        
280 | </NYT_REPRINTS_FORM> 
281 | </ul> 
282 |         </div> 
283 | </div> 
284 | </div> 
285 | <div class="articleBody"> 
286 |  
287 |  
288 |  
289 |  
290 |  
291 | <NYT_TEXT > 
292 |  
293 | <NYT_CORRECTION_TOP> 
294 | </NYT_CORRECTION_TOP> 
295 |     <p> 
296 | <a href="http://topics.nytimes.com/top/reference/timestopics/people/b/george_w_bush/index.html?inline=nyt-per" title="More articles about George W. Bush." class="meta-per">George W. Bush</a> will end a self-imposed silence about his presidency in an <a href="http://topics.nytimes.com/top/news/business/companies/nbc_universal/index.html?inline=nyt-org" title="More articles about NBC Universal." class="meta-org">NBC</a> prime-time special on Monday, the eve of the release of his memoir, &ldquo;Decision Points.&rdquo; That the interviewer will be Matt Lauer, the co-host of the &ldquo;Today&rdquo; show, reveals calculations by Mr. Bush and his advisers, as well as a campaign by NBC.        </p> 
297 | </div> 
298 | <div class="articleInline runaroundLeft"> 
299 |   
300 | <!--forceinline-->   
301 | <div class="inlineImage module"> 
302 | <div class="image"> 
303 | <div class="icon enlargeThis"><a href="javascript:pop_me_up2('http://www.nytimes.com/imagepages/2010/11/08/jpnbc.html','jpnbc_html','width=720,height=581,scrollbars=yes,toolbars=no,resizable=yes')">Enlarge This Image</a></div> 
304 | <a href="javascript:pop_me_up2('http://www.nytimes.com/imagepages/2010/11/08/jpnbc.html','jpnbc_html','width=720,height=581,scrollbars=yes,toolbars=no,resizable=yes')"> 
305 | <img src="http://graphics8.nytimes.com/images/2010/11/08/business/jpnbc/jpnbc-articleInline.jpg" width="190" height="132" alt=""> 
306 | </a> 
307 | </div> 
308 | <h6 class="credit">Peter Kramer/NBC</h6> 
309 | <p class="caption">Matt Lauer of “Today” interviewing George W. Bush in his first one-on-one interview since leaving the White House. The special will be shown on Monday at 8 p.m. Eastern time.                            </p> 
310 | </div> 
311 |   
312 | <div class="columnGroup doubleRule"> 
313 | <h3 class="sectionHeader">Related</h3> 
314 | <ul class="headlinesOnly multiline flush"> 
315 | <li> 
316 | <h6><a href="http://www.nytimes.com/2010/11/07/weekinreview/07baker.html?ref=media"> 
317 | The White House: Now Appearing: George W. Bush</a> 
318 | (November 7, 2010)
319 | </h6> 
320 | </li> 
321 | </ul> 
322 | </div> 
323 | <div id="portfolioInline"> 
324 | <h3 class="sectionHeader">Add to Portfolio</h3> 
325 | <ul class="flush"> 
326 |  
327 |  
328 | 							<li><a href="http://www.nytimes.com/auth/login?URI=http://www.nytimes.com/2010/11/08/business/media/08nbc.html">CBS Corp</a></li> 
329 | 							
330 | </ul> 
331 | <p class="refer"><a href="http://markets.on.nytimes.com/research/portfolio/view/view.asp#sda">Go to your Portfolio &#187;</a></p> 
332 | </div> 
333 |    
334 | </div> 
335 | <div class="articleBody"> 
336 |  <p> 
337 | In the past, the first interview of a controversial ex-president would be expected to go to the nation&rsquo;s top evening news anchor, currently NBC&rsquo;s <a href="http://topics.nytimes.com/top/reference/timestopics/people/w/brian_williams/index.html?inline=nyt-per" title="More articles about Brian Williams." class="meta-per">Brian Williams</a>. By choosing the top morning anchor instead, both sides are essentially endorsing the soft power of Matt Lauer.        </p><p> 
338 | &ldquo;He&rsquo;s an extraordinarily fair interviewer,&rdquo; said Jim Bell, the executive producer of &ldquo;Today&rdquo; and of the prime-time special. &ldquo;We&rsquo;re living in a time when some of television news is partisan, and Matt and the &lsquo;Today&rsquo; show are decidedly not so.&rdquo;        </p><p> 
339 | That was a selling point for Mr. Bush and his advisers, who decided that &ldquo;the first interview should be in a news context, with a network news anchor,&rdquo; said David Drake, a senior vice president of Crown, the publisher of  &ldquo;<a href="http://www.randomhouse.com/catalog/display.pperl?isbn=9780307590619" title="The publisher&rsquo;s profile of the book.">Decision Points</a>.&rdquo;        </p><p> 
340 | For NBC, the interview &mdash; which was taped over the course of two days in Texas late last month &mdash; is a major coup. &ldquo;They talked about every subject under the sun,&rdquo; said Steve Capus, the president of NBC News, who observed that Mr. Bush &ldquo;has things he wants to get off his chest.&rdquo;        </p><p> 
341 | But critics of Mr. Bush &mdash; and there are many, with polls showing that most Americans still hold an unfavorable view of him &mdash; who would like to see a televised confrontation over issues like the Iraq war may come away disappointed. The tone of the prime-time special is conversational, not prosecutorial, and for that reason, &ldquo;Lauer/Bush&rdquo; is not likely to join &ldquo;Frost/Nixon&rdquo; in the public imagination.        </p><p> 
342 | Mr. Bell pointedly called the special &ldquo;a conversation with President Bush about his book,&rdquo; not just his presidency. Many tough questions are asked, and the word &ldquo;torture&rdquo; is used, Mr. Bell emphasized, but it comes down to tone.        </p><p> 
343 | Dana Perino, who was a White House press secretary while Mr. Bush was in office, said  that tone was an important consideration for the TV book tour.        </p><p> 
344 | &ldquo;He&rsquo;s not interested in having a debate about the policies,&rdquo; Ms. Perino said of Mr. Bush. She elaborated later: &ldquo;There&rsquo;s been plenty of debates about the decisions he has made. Now he&rsquo;s trying to explain what he was going through, and the conditions he was working under.&rdquo;        </p><p> 
345 | Doris Kearns Goodwin, the presidential historian, said Mr. Bush&rsquo;s televised interview was not likely to deviate from the words in his memoir. But &ldquo;there is some value in seeing his mood,&rdquo; she said, including his attitude about the memoir itself. (Ms. Goodwin was a paid contributor to NBC until 2008.)        </p><p> 
346 | To get the first shot at the Bush interview, each major television network pieced together its best proposal &mdash; a &ldquo;package,&rdquo; Mr. Drake said &mdash; and at least one other offered a prime-time special like NBC&rsquo;s. He declined to share specifics, but said &ldquo;it was a close decision.&rdquo;        </p><p> 
347 | The NBC interview is the start of a book tour like almost no other. Mr. Bush will sit down with <a href="http://topics.nytimes.com/top/reference/timestopics/people/w/oprah_winfrey/index.html?inline=nyt-per" title="More articles about Oprah Winfrey." class="meta-per">Oprah Winfrey</a> and <a href="http://topics.nytimes.com/top/reference/timestopics/people/l/rush_limbaugh/index.html?inline=nyt-per" title="More articles about Rush Limbaugh." class="meta-per">Rush Limbaugh</a>, as well as with all three prime-time hosts on the Fox News Channel this week. There will be print interviews, too, but the only one announced so far is with AARP The Magazine.        </p><p> 
348 | Andrew Tyndall, who publishes a newsletter about the television news business, The Tyndall Report, said he suspected that Mr. Bush and his aides were striking a balance by selecting Mr. Lauer for the first interview. &ldquo;On the one hand, you&rsquo;re looking for comfort,&rdquo; Mr. Tyndall said. &ldquo;On the other hand, you don&rsquo;t want the interview to be perceived as a series of softballs.&rdquo;        </p><p> 
349 | NBC executives privately agreed with that assessment, and said they thought that Mr. Bush would not have felt as comfortable with the network&rsquo;s other top interviewers.        </p><p> 
350 | Asked whether Mr. Williams or the &ldquo;Meet the Press&rdquo; moderator David Gregory pursued the interview, Mr. Capus said &ldquo;I&rsquo;m sure they did,&rdquo; but that &ldquo;Matt was the official push from NBC News and I&rsquo;m thrilled that we got it.&rdquo; Mr. Capus said that Mr. Lauer had a &ldquo;rapport&rdquo; with Mr. Bush in prior interviews.        </p><p> 
351 | Along with comfort, audience size was  important. &ldquo;Today&rdquo; is both the top-rated morning show and a highly sought-after outlet for authors.        </p><div id="pageLinks"><ul id="pageNumbers"><li> 1 </li><li> <a onClick="s_code_linktrack('Article-MultiPagePageNum2');" title="Page 2" href="/2010/11/08/business/media/08nbc.html?pagewanted=2&hp">2</a> </li></ul><a class="next" onClick="s_code_linktrack('Article-MultiPage-Next');"
352 |             title="Next Page"
353 |             href="/2010/11/08/business/media/08nbc.html?pagewanted=2&hp">Next Page &#x00bb;</a></div><NYT_CORRECTION_BOTTOM>	<div class="articleCorrection"> 
354 | </div> 
355 | </NYT_CORRECTION_BOTTOM><NYT_UPDATE_BOTTOM> 
356 | </NYT_UPDATE_BOTTOM> 
357 | </NYT_TEXT> 
358 | </div>	</div> 
359 | <!--cur: prev:--> 
360 | <div class="columnGroup ">				
361 | <div class="articleFooter"> 
362 | <div class="articleMeta"> 
363 | <div class="opposingFloatControl wrap"> 
364 | <div class="element1"> 
365 | <h6 class="metaFootnote">A version of this article appeared in print on November 8, 2010, on page B1 of the New York edition.</h6> 
366 | </div> 
367 | </div> 
368 | </div> 
369 | </div>	</div> 
370 | <!--cur: prev:--> 
371 | <div class="columnGroup last">				
372 | <div id="articleExtras"> 
373 | <div class="expandedToolsRight"> 
374 | <div class="articleTools"> 
375 | <div class="box"> 
376 | <div class="inset"> 
377 | <ul id="toolsList" class="toolsList wrap"> 
378 | <li class="email"> 
379 | <a id="emailThis" onClick="s_code_linktrack('Article-Tool-EmailSignIn');" 
380 |             href="http://www.nytimes.com/auth/login?URI=http://www.nytimes.com/2010/11/08/business/media/08nbc.html">Sign In to E-Mail</a> 
381 | </li> 
382 | <li class="print"> 
383 | <A HREF="/2010/11/08/business/media/08nbc.html?hp=&pagewanted=print">Print</a> 
384 | </li> 
385 |  <li class="singlePage"> 
386 | <A HREF="/2010/11/08/business/media/08nbc.html?hp=&pagewanted=all"> Single Page</a> 
387 |  </li> 
388 | <NYT_REPRINTS_FORM> 
389 |  
390 | <script name="javascript"> 
391 | 	function submitCCCForm(){
392 | 		PopUp = window.open('', '_Icon','location=no,toolbar=no,status=no,width=650,height=550,scrollbars=yes,resizable=yes');
393 | 		this.document.cccform.submit();
394 | 	}
395 | 	</script> 
396 | <li class="reprints">			<a href="#" onClick="submitCCCForm()">Reprints</a> 
397 | </li>	        
398 | </NYT_REPRINTS_FORM> 
399 | </ul> 
400 | <div class="articleToolsSponsor" id="Frame4A"><!-- ADXINFO classification="feature_position" campaign="foxsearch2010_emailtools_1225555c_nyt5"--><a href="http://www.nytimes.com/adx/bin/adx_click.html?type=goto&opzn&page=www.nytimes.com/yr/mo/day/business/media&pos=Frame4A&sn2=66350d29/867aca44&sn1=15b040a8/f090befe&camp=foxsearch2010_emailtools_1225555c_nyt5&ad=bs_120x60alt_10k_date_DEC3&goto=http%3A%2F%2Fwww%2Efoxsearchlight%2Ecom%2Fblackswan" target="_blank"> 
401 | <img src="http://graphics8.nytimes.com/adx/images/ADS/24/57/ad.245789/bs_120x60alt_10k_date.gif" width="120" height="60" border="0"></a> 
402 | </div>        </div> 
403 | </div> 
404 | </div> 
405 | <script type="text/javascript"> 
406 | writePost();
407 | </script> 
408 | </div> 
409 | </div> 
410 |  
411 | <div class="singleAd" id="Bottom1"> 
412 | <!-- ADXINFO classification="text_ad" campaign="nyt2010-circ-tr-footer-intl-nonhp-2week-3747U"--><p><A HREF="http://www.nytimes.com/adx/bin/adx_click.html?type=goto&opzn&page=www.nytimes.com/yr/mo/day/business/media&pos=Bottom1&sn2=2b5d86a0/52b93ec&sn1=b8a2a708/906d6215&camp=nyt2010-circ-tr-footer-intl-nonhp-2week-3747U&ad=081810-footer-intl-nonhp-2week-3747U&goto=http%3A%2F%2Ftimesreader%2Enytimes%2Ecom%2Fwebapp%2FTimesReader%2Edo%3FpromoCode%3DT9179XQW1%26campaignId%3D3747U" target="_blank">Get the full newspaper experience, and more, delivered to your Mac or PC. 
413 | Times Reader 2.0: Try it FREE for 2 full weeks.  </a></p> 
414 |  
415 |  
416 | </div> 
417 |  
418 | 
419 | <div class="emailAlertModule module"> 
420 | <h5 class="sectionHeaderSm">Get Free E-mail Alerts on These Topics</h5> 
421 | <form action="https://select.nytimes.com/mem/tnt.html" method="GET" enctype="application/x-www-form-urlencoded"> 
422 | <input type="hidden" name="retA" value="http://www.nytimes.com//2010/11/08/business/media/08nbc.html" > 
423 | <input type="hidden" name="retT" value="With Book, Bush Is Back in Spotlight"> 
424 | <input type="hidden" name="module" value="call"> 
425 | <input type="hidden" name="alert_context" value="1"> 
426 | <ul class="flush"> 
427 | <li> 
428 | <input type="hidden" name="topic1" value="Bush%2C+George+W"> 
429 | <input type="hidden" name="topic_field1" value="per"> 
430 | <a class="inTextReferEmail" href="https://select.nytimes.com/mem/tnt.html?module=call&alert_context=1&topic1=Bush%2C+George+W&topic_field1=per&topic1_check=y&retA=&retT=&cskey=" onClick="javascript:s_code_linktrack('Article-RelatedTopics'); dcsMultiTrack('DCS.dcssip','www.nytimes.com','DCS.dcsuri','/newstracker/add.html','WT.ti','Newstracker Add','WT.z_nta','Add','WT.pers','Per','WT.z_dcsm','1');">Bush, George W</a> 
431 | </li> 
432 | <li> 
433 | <input type="hidden" name="topic1" value="Lauer%2C+Matt"> 
434 | <input type="hidden" name="topic_field1" value="per"> 
435 | <a class="inTextReferEmail" href="https://select.nytimes.com/mem/tnt.html?module=call&alert_context=1&topic1=Lauer%2C+Matt&topic_field1=per&topic1_check=y&retA=&retT=&cskey=" onClick="javascript:s_code_linktrack('Article-RelatedTopics'); dcsMultiTrack('DCS.dcssip','www.nytimes.com','DCS.dcsuri','/newstracker/add.html','WT.ti','Newstracker Add','WT.z_nta','Add','WT.pers','Per','WT.z_dcsm','1');">Lauer, Matt</a> 
436 | </li> 
437 | <li> 
438 | <input type="hidden" name="topic1" value="National+Broadcasting+Co"> 
439 | <input type="hidden" name="topic_field1" value="org"> 
440 | <a class="inTextReferEmail" href="https://select.nytimes.com/mem/tnt.html?module=call&alert_context=1&topic1=National+Broadcasting+Co&topic_field1=org&topic1_check=y&retA=&retT=&cskey=" onClick="javascript:s_code_linktrack('Article-RelatedTopics'); dcsMultiTrack('DCS.dcssip','www.nytimes.com','DCS.dcsuri','/newstracker/add.html','WT.ti','Newstracker Add','WT.z_nta','Add','WT.pers','Per','WT.z_dcsm','1');">National Broadcasting Co</a> 
441 | </li> 
442 | <li> 
443 | <input type="hidden" name="topic1" value="Television"> 
444 | <input type="hidden" name="topic_field1" value="des"> 
445 | <a class="inTextReferEmail" href="https://select.nytimes.com/mem/tnt.html?module=call&alert_context=1&topic1=Television&topic_field1=des&topic1_check=y&retA=&retT=&cskey=" onClick="javascript:s_code_linktrack('Article-RelatedTopics'); dcsMultiTrack('DCS.dcssip','www.nytimes.com','DCS.dcsuri','/newstracker/add.html','WT.ti','Newstracker Add','WT.z_nta','Add','WT.pers','Per','WT.z_dcsm','1');">Television</a> 
446 | </li> 
447 | </ul> 
448 | </form> 
449 | </div> 
450 | </div> 
451 | </div>
452 | </div><!--close abColumn -->
453 | <div class="cColumn">
454 |  
455 |  
456 |  
457 | <div class="columnGroup"> 
458 | <div  id="XXL"> 
459 | <!-- ADXINFO classification="banner" campaign="Intel_US10q4CONCPre-1480628-nyt2"--><SCRIPT type="text/javascript" SRC="http://ad.doubleclick.net/adj/N5364.nytimes/B4744290;sz=468x648;pc=nyt144379_243820;ord=2010.11.08.11.10.45;click=http://www.nytimes.com/adx/bin/adx_click.html?type=goto&opzn&page=www.nytimes.com/yr/mo/day/business/media&pos=XXL&camp=Intel_US10q4CONCPre-1480628-nyt2&ad=US10q4CONCPre.Biz.Style.dart468x648&sn2=e8cd4b75/e2f4a291&snr=doubleclick&snx=1289214349&sn1=84e0f53e/b10ae845&goto="> 
460 | </SCRIPT> 
461 | <NOSCRIPT> 
462 | <A HREF="http://www.nytimes.com/adx/bin/adx_click.html?type=goto&opzn&page=www.nytimes.com/yr/mo/day/business/media&pos=XXL&sn2=e8cd4b75/e2f4a291&sn1=9b1c7802/b6159f8&camp=Intel_US10q4CONCPre-1480628-nyt2&ad=US10q4CONCPre.Biz.Style.dart468x648&goto=http://ad.doubleclick.net/jump/N5364.nytimes/B4744290;sz=468x648;pc=nyt144379_243820;ord=2010.11.08.11.10.45" TARGET="_blank">
463 | <IMG SRC="http://ad.doubleclick.net/ad/N5364.nytimes/B4744290;sz=468x648;pc=nyt144379_243820;ord=2010.11.08.11.10.45"
464 |  BORDER=0 WIDTH=468 HEIGHT=648
465 |  ALT="Click Here"></A>
466 | </NOSCRIPT> 
467 | </div> 
468 | </div> 
469 | <div class="columnGroup"> 
470 |  
471 | </div> 
472 | <div class="columnGroup"> 
473 | <div id="mostPopWidget" class="singleRule"> 
474 |  
475 |       <!-- MOST POPULAR MODULE STARTS --> 
476 |       <h4>MOST POPULAR - BUSINESS DAY</h4> 
477 |          <div id="tabsContainer"> 
478 |          <ul class="tabs"> 
479 |             <li class="selected"><a href="#">E-Mailed</a></li> 
480 |             <li><a href="#">Blogged</a></li> 
481 |             
482 |             <li><a href="#">Viewed</a></li> 
483 |             
484 |          </ul> 
485 |          </div> 
486 |             <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><HEAD><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"></HEAD><div class="tabContent tabContentActive" id="mostEmailed"><ol> 
487 | <li img="http://graphics8.nytimes.com/images/2010/11/07/business/NOVEL-1/NOVEL-1-thumbStandard.jpg"  kicker="Novelties"><a href="http://www.nytimes.com/2010/11/07/business/07novel.html?src=me&amp;ref=business" title="Click to go to this article">Novelties: When a Camcorder Becomes a Life Partner</a></li> 
488 | <li img="http://graphics8.nytimes.com/images/2010/11/07/business/FRACK/FRACK-thumbStandard.jpg"  kicker=""><a href="http://www.nytimes.com/2010/11/07/business/energy-environment/07frack.html?src=me&amp;ref=business" title="Click to go to this article">When a Rig Moves In Next Door</a></li> 
489 | <li kicker=""><a href="http://www.nytimes.com/2010/11/07/business/07lawyers.html?src=me&amp;ref=business" title="Click to go to this article">Taking on a Second Mortgage to Pay the Foreclosure Lawyer</a></li> 
490 | <li img="http://graphics8.nytimes.com/images/2010/11/07/business/UNBOX/UNBOX-thumbStandard.jpg"  kicker="Unboxed"><a href="http://www.nytimes.com/2010/11/07/business/07unboxed.html?src=me&amp;ref=business" title="Click to go to this article">Unboxed: Apple and I.B.M. Aren&rsquo;t All That Different</a></li> 
491 | <li kicker="Fair Game"><a href="http://www.nytimes.com/2010/11/07/business/07gret.html?src=me&amp;ref=business" title="Click to go to this article">Fair Game: He Saw Trouble Coming. Now He Sees It Going.</a></li> 
492 | <li img="http://graphics8.nytimes.com/images/2010/11/05/business/businessspecial5/SUB-CARE/SUB-CARE-thumbStandard.jpg"  kicker="Your Money"><a href="http://www.nytimes.com/2010/11/05/business/businessspecial5/05CARE.html?src=me&amp;ref=business" title="Click to go to this article">Your Money: Ignore Long-Term Care Planning at Your Peril</a></li> 
493 | <li img="http://graphics8.nytimes.com/images/2010/11/08/business/DEBT/DEBT-thumbStandard.jpg"  kicker=""><a href="http://www.nytimes.com/2010/11/08/business/global/08debt.html?src=me&amp;ref=business" title="Click to go to this article">Irish Debt Woes Revive Concern About Europe</a></li> 
494 | <li img="http://graphics8.nytimes.com/images/2010/11/07/business/VIEW/VIEW-thumbStandard.jpg"  kicker="Economic View"><a href="http://www.nytimes.com/2010/11/07/business/economy/07view.html?src=me&amp;ref=business" title="Click to go to this article">Economic View: Estate Tax Issue Offers Quick Test for Congress</a></li> 
495 | <li kicker=""><a href="http://www.nytimes.com/2010/11/08/business/media/08animate.html?src=me&amp;ref=business" title="Click to go to this article">Chinese Animator Seeks a Global Role</a></li> 
496 | <li img="http://graphics8.nytimes.com/images/2010/11/07/business/SUB-CORNER/SUB-CORNER-thumbStandard.jpg"  kicker="Corner Office"><a href="http://www.nytimes.com/2010/11/07/business/07corner.html?src=me&amp;ref=business" title="Click to go to this article">Corner Office: Ask Your Mentor for Help, Not for Brownie Points</a></li> 
497 | </ol> 
498 |         <a class="more" href="http://www.nytimes.com/gst/mostemailed.html">Go to Complete List &#x00bb;</a> 
499 |         </div><!-- #most emailed top10 --> 
500 |  
501 |             <div class="tabContent" id="mostBlogged"> 
502 | <ol> 
503 | <li><a href="http://www.nytimes.com/2010/11/01/business/media/01fox.html?bl" title="Click to go to this article">News Corp. Donation Clouds Fox Coverage of Prop. 24</a></li> 
504 | <li><a href="http://www.nytimes.com/2010/11/03/business/media/03newscorp.html?bl" title="Click to go to this article">Times of London Reports More Than 100,000 Paying Customers for Web Site</a></li> 
505 | <li><a href="http://www.nytimes.com/2010/11/06/business/energy-environment/06shell.html?bl" title="Click to go to this article">Shell Presses for Drilling in Arctic</a></li> 
506 | <li><a href="http://www.nytimes.com/2010/11/07/business/economy/07view.html?bl" title="Click to go to this article">Estate Tax Issue Offers Quick Test for Congress</a></li> 
507 | <li><a href="http://www.nytimes.com/2010/11/03/business/energy-environment/03solar.html?bl" title="Click to go to this article">Solyndra, a Solar-Panel Maker, Will Close a Plant</a></li> 
508 | <li><a href="http://www.nytimes.com/2010/11/06/business/economy/06jobs.html?bl" title="Click to go to this article">U.S. Added Jobs in October, First Time Since May</a></li> 
509 | <li><a href="http://www.nytimes.com/2010/11/04/business/energy-environment/04iht-rbogface.html?bl" title="Click to go to this article">Facebook Under Pressure to Be Greener</a></li> 
510 | <li><a href="http://www.nytimes.com/2010/11/04/business/economy/04fomc.html?bl" title="Click to go to this article">Fed to Buy $600 Billion in Debt; Calls Recovery 'Disappointingly Slow'</a></li> 
511 | <li><a href="http://www.nytimes.com/2010/11/04/business/economy/04fed.html?bl" title="Click to go to this article">Fed to Spend $600 Billion to Speed Up Recovery</a></li> 
512 | <li><a href="http://www.nytimes.com/2010/11/04/business/global/04global.html?bl" title="Click to go to this article">Shift in Washington Stirs Economic Jitters Abroad</a></li> 
513 | </ol> 
514 | <a class="more" href="http://www.nytimes.com/gst/mostblogged.html">Go to Complete List &#x00bb;</a> 
515 | </div><!-- #most blogged top10 --> 
516 |  
517 |             
518 |             <div class="tabContent" id="mostViewed"><ol> 
519 | <li img="http://graphics8.nytimes.com/images/2010/11/08/business/DEBT/DEBT-thumbStandard.jpg"  kicker=""><a href="http://www.nytimes.com/2010/11/08/business/global/08debt.html?src=mv&amp;ref=business" title="Click to go to this article">Irish Debt Woes Revive Concern About Europe</a></li> 
520 | <li img="http://graphics8.nytimes.com/images/2010/11/08/business/OLBERMANN/OLBERMANN-thumbStandard.jpg"  kicker=""><a href="http://www.nytimes.com/2010/11/08/business/media/08olbermann.html?src=mv&amp;ref=business" title="Click to go to this article">MSNBC to Lift Olbermann Suspension on Tuesday</a></li> 
521 | <li img="http://graphics8.nytimes.com/images/2010/11/08/business/jpnbc/jpnbc-thumbStandard.jpg"  kicker=""><a href="http://www.nytimes.com/2010/11/08/business/media/08nbc.html?src=mv&amp;ref=business" title="Click to go to this article">With Book, Bush Is Back in Spotlight</a></li> 
522 | <li kicker=""><a href="http://www.nytimes.com/2010/11/08/business/media/08animate.html?src=mv&amp;ref=business" title="Click to go to this article">Chinese Animator Seeks a Global Role</a></li> 
523 | <li img="http://graphics8.nytimes.com/images/2010/11/07/business/NOVEL-1/NOVEL-1-thumbStandard.jpg"  kicker="Novelties"><a href="http://www.nytimes.com/2010/11/07/business/07novel.html?src=mv&amp;ref=business" title="Click to go to this article">Novelties: When a Camcorder Becomes a Life Partner</a></li> 
524 | <li kicker="News Analysis"><a href="http://www.nytimes.com/2010/11/08/business/economy/08fed.html?src=mv&amp;ref=business" title="Click to go to this article">News Analysis: Friedman Casts Shadow as Economists Meet</a></li> 
525 | <li img="http://graphics8.nytimes.com/images/2010/11/07/business/UNBOX/UNBOX-thumbStandard.jpg"  kicker="Unboxed"><a href="http://www.nytimes.com/2010/11/07/business/07unboxed.html?src=mv&amp;ref=business" title="Click to go to this article">Unboxed: Apple and I.B.M. Aren&rsquo;t All That Different</a></li> 
526 | <li kicker=""><a href="http://www.nytimes.com/2010/11/08/business/media/08nation.html?src=mv&amp;ref=business" title="Click to go to this article">Bad News for Liberals May Be Good News for a Liberal Magazine</a></li> 
527 | <li kicker=""><a href="http://mediadecoder.blogs.nytimes.com/2010/11/05/maddow-says-olbermanns-suspension-shows-difference-between-msnbc-and-fox-news/?src=mv&amp;ref=business" title="Click to go to this article">Maddow Says Olbermann's Suspension Shows Difference Between MSNBC and Fox News</a></li> 
528 | <li kicker="The Media  Equation"><a href="http://www.nytimes.com/2010/11/08/business/media/08carr.html?src=mv&amp;ref=business" title="Click to go to this article">The Media  Equation: Olbermann, Impartiality and MSNBC</a></li> 
529 | </ol> 
530 |         </div><!-- #most viewed top10 --> 
531 |  
532 |             
533 |          <script type="text/javascript">new Accordian("mostPopWidget");</script> 
534 |       <!-- MOST POPULAR MODULE ENDS --> 
535 |    
536 |  
537 | </div><!--close mostPopWidget --> 
538 | </div> 
539 |  
540 |  
541 |  
542 |  
543 | </div>
544 | </div><!--close spanAB -->
545 |  
546 |   <!-- start MOTH --> 
547 |   	<div id="insideNYTimes" class="doubleRule"> 
548 |             <script type="text/javascript" src="http://graphics8.nytimes.com/js/app/moth/moth.js"></script> 
549 |         <div id="insideNYTimesHeader"> 
550 |                     <div class="navigation"><span id="leftArrow"><img id="mothReverse" src="http://i1.nyt.com/images/global/buttons/moth_reverse.gif" /></span>&nbsp;<span id="rightArrow"><img id="mothForward" src="http://i1.nyt.com/images/global/buttons/moth_forward.gif" /></span></div> 
551 |                 <h4> 
552 |             Inside NYTimes.com        </h4> 
553 |     </div> 
554 |     
555 |         
556 |     <div id="insideNYTimesScrollWrapper"> 
557 |         <table id="insideNYTimesBrowser" cellspacing="0"> 
558 |             <tbody> 
559 |                 <tr> 
560 |                                                 <td class="first"> 
561 |         <div class="story"> 
562 |             <h6 class="kicker"> 
563 |                                     <a href="http://movies.nytimes.com/pages/movies/index.html">Movies &raquo;</a> 
564 |                             </h6> 
565 |             <div class="mothImage"> 
566 |                 <a href="http://www.nytimes.com/2010/11/08/movies/08clayburgh.html"><img src="http://i1.nyt.com/images/2010/11/08/movies/08moth_clayburgh/08moth_clayburgh-moth.jpg" alt="Clayburgh&rsquo;s Memorable &lsquo;Unmarried Woman&rsquo;" width="151" height="151" /></a> 
567 |             </div> 
568 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/movies/08clayburgh.html">Clayburgh&rsquo;s Memorable &lsquo;Unmarried Woman&rsquo;</a></h6> 
569 |         </div> 
570 |     </td> 
571 |                                                 <td> 
572 |         <div class="story"> 
573 |             <h6 class="kicker"> 
574 |                                     <a href="http://www.nytimes.com/pages/opinion/index.html">Opinion &raquo;</a> 
575 |                             </h6> 
576 |             <div class="mothImage"> 
577 |                 <a href="http://opinionator.blogs.nytimes.com/2010/11/07/lincoln-wins-now-what/"><img src="http://i1.nyt.com/images/2010/11/08/opinion/08moth_opinionator/08moth_opinionator-moth.jpg" alt="Disunion: Jamie Malanowski" width="151" height="151" /></a> 
578 |             </div> 
579 |             <h6 class="headline"><a href="http://opinionator.blogs.nytimes.com/2010/11/07/lincoln-wins-now-what/">Disunion: Jamie Malanowski</a></h6> 
580 |         </div> 
581 |     </td> 
582 |                                                 <td> 
583 |         <div class="story"> 
584 |             <h6 class="kicker"> 
585 |                                     <a href="http://www.nytimes.com/pages/weekinreview/index.html">Week in Review &raquo;</a> 
586 |                             </h6> 
587 |             <div class="mothImage"> 
588 |                 <a href="http://www.nytimes.com/2010/11/07/weekinreview/07marsh.html"><img src="http://i1.nyt.com/images/2010/11/07/weekinreview/07moth_marsh/07moth_marsh-moth.jpg" alt="Rightward, March: The Midterm Exit Polls" width="151" height="151" /></a> 
589 |             </div> 
590 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/07/weekinreview/07marsh.html">Rightward, March: The Midterm Exit Polls</a></h6> 
591 |         </div> 
592 |     </td> 
593 |                                                 <td> 
594 |         <div class="story"> 
595 |             <h6 class="kicker"> 
596 |                                     <a href="http://www.nytimes.com/pages/national/index.html">U.S. &raquo;</a> 
597 |                             </h6> 
598 |             <div class="mothImage"> 
599 |                 <a href="http://www.nytimes.com/2010/11/08/us/08canton.html"><img src="http://i1.nyt.com/images/2010/11/08/us/08moth_canton/08moth_canton-moth.jpg" alt="Kindness of a Stranger That Still Resonates" width="151" height="151" /></a> 
600 |             </div> 
601 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/us/08canton.html">Kindness of a Stranger That Still Resonates</a></h6> 
602 |         </div> 
603 |     </td> 
604 |                                                     <td> 
605 |             <div class="story"> 
606 |                 <h6 class="kicker"><a href="http://www.nytimes.com/pages/opinion/index.html">Opinion &raquo;</a></h6> 
607 |                 <h3><a href="http://opinionator.blogs.nytimes.com/2010/11/07/speech-and-harm/">The Stone: Speech and Harm</a></h3> 
608 |                 <p class="summary">What is at the root of the power of slurs to cause unease, shock and pain?</p> 
609 |             </div> 
610 |         </td> 
611 |                                                 <td> 
612 |         <div class="story"> 
613 |             <h6 class="kicker"> 
614 |                                     <a href="http://www.nytimes.com/pages/business/index.html">Business &raquo;</a> 
615 |                             </h6> 
616 |             <div class="mothImage"> 
617 |                 <a href="http://www.nytimes.com/2010/11/08/business/media/08conan.html"><img src="http://i1.nyt.com/images/2010/11/08/business/08moth_conan/08moth_conan-moth.jpg" alt="High Hopes for Conan O&rsquo;Brien&rsquo;s Debut" width="151" height="151" /></a> 
618 |             </div> 
619 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/business/media/08conan.html">High Hopes for Conan O&rsquo;Brien&rsquo;s Debut</a></h6> 
620 |         </div> 
621 |     </td> 
622 |                                                 <td class="hidden"> 
623 |         <div class="story"> 
624 |             <h6 class="kicker"> 
625 |                                     <a href="http://www.nytimes.com/pages/nyregion/index.html">N.Y. / Region &raquo;</a> 
626 |                             </h6> 
627 |             <div class="mothImage"> 
628 |                 <a href="http://www.nytimes.com/2010/11/08/nyregion/08homework.html"><span class="img" src="http://i1.nyt.com/images/2010/11/08/nyregion/08moth_homework/08moth_homework-moth.jpg" alt="Like a Monitor More Than a Tutor" width="151" height="151" /></a> 
629 |             </div> 
630 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/nyregion/08homework.html">Like a Monitor More Than a Tutor</a></h6> 
631 |         </div> 
632 |     </td> 
633 |                                                 <td class="hidden"> 
634 |         <div class="story"> 
635 |             <h6 class="kicker"> 
636 |                                     <a href="http://www.nytimes.com/pages/opinion/index.html">Opinion &raquo;</a> 
637 |                             </h6> 
638 |             <div class="mothImage"> 
639 |                 <a href="http://www.nytimes.com/2010/11/08/opinion/08judt.html"><span class="img" src="http://i1.nyt.com/images/2010/11/08/opinion/08moth_oped/08moth_oped-moth.jpg" alt="Op-Ed: Tony Judt" width="151" height="151" /></a> 
640 |             </div> 
641 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/opinion/08judt.html">Op-Ed: Tony Judt</a></h6> 
642 |         </div> 
643 |     </td> 
644 |                                                 <td class="hidden"> 
645 |         <div class="story"> 
646 |             <h6 class="kicker"> 
647 |                                     <a href="http://www.nytimes.com/pages/sports/index.html">Sports &raquo;</a> 
648 |                             </h6> 
649 |             <div class="mothImage"> 
650 |                 <a href="http://www.nytimes.com/2010/11/08/sports/08injuries.html"><span class="img" src="http://i1.nyt.com/images/2010/11/08/sports/08moth_injuries/08moth_injuries-moth.jpg" alt="Learning From the Sadness" width="151" height="151" /></a> 
651 |             </div> 
652 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/sports/08injuries.html">Learning From the Sadness</a></h6> 
653 |         </div> 
654 |     </td> 
655 |                                                 <td class="hidden"> 
656 |         <div class="story"> 
657 |             <h6 class="kicker"> 
658 |                                     <a href="http://www.nytimes.com/pages/arts/index.html">Arts &raquo;</a> 
659 |                             </h6> 
660 |             <div class="mothImage"> 
661 |                 <a href="http://www.nytimes.com/2010/11/08/arts/dance/08abt.html"><span class="img" src="http://i1.nyt.com/images/2010/11/08/arts/08moth_abt/08moth_abt-moth.jpg" alt="A Cuban-American Cultural Exchange" width="151" height="151" /></a> 
662 |             </div> 
663 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/arts/dance/08abt.html">A Cuban-American Cultural Exchange</a></h6> 
664 |         </div> 
665 |     </td> 
666 |                                                     <td class="hidden"> 
667 |             <div class="story"> 
668 |                 <h6 class="kicker"><a href="http://www.nytimes.com/pages/opinion/index.html">Opinion &raquo;</a></h6> 
669 |                 <h3><a href="">What Obama Can Learn From India</a></h3> 
670 |                 <p class="summary">A Room for Debate forum on how India&rsquo;s impressive economic growth offers lessons for the U.S.</p> 
671 |             </div> 
672 |         </td> 
673 |                                                 <td class="hidden"> 
674 |         <div class="story"> 
675 |             <h6 class="kicker"> 
676 |                                     <a href="http://www.nytimes.com/pages/world/index.html">World &raquo;</a> 
677 |                             </h6> 
678 |             <div class="mothImage"> 
679 |                 <a href="http://www.nytimes.com/2010/11/08/world/middleeast/08corrie.html"><span class="img" src="http://i1.nyt.com/images/2010/11/08/world/08moth_corrie/08moth_corrie-moth.jpg" alt="For Family of Activist Killed, the Case Continues" width="151" height="151" /></a> 
680 |             </div> 
681 |             <h6 class="headline"><a href="http://www.nytimes.com/2010/11/08/world/middleeast/08corrie.html">For Family of Activist Killed, the Case Continues</a></h6> 
682 |         </div> 
683 |     </td> 
684 |                                     </tr> 
685 |             </tbody> 
686 |         </table> 
687 |     </div> 
688 |     
689 |     </div><!-- end #insideNYTimes --> 
690 |  
691 |             </div><!--close main -->
692 | <div id="footer"> 
693 | <ul class="first"> 
694 | <li class="first"><a href="http://www.nytimes.com">Home</a></li> 
695 | <li > 
696 | <a href="http://www.nytimes.com/pages/world/index.html">World</a> 
697 | </li> 
698 | <li > 
699 | <a href="http://www.nytimes.com/pages/national/index.html">U.S.</a> 
700 | </li> 
701 | <li > 
702 | <a href="http://www.nytimes.com/pages/nyregion/index.html">N.Y. / Region</a> 
703 | </li> 
704 | <li > 
705 | <a href="http://www.nytimes.com/pages/business/index.html">Business</a> 
706 | </li> 
707 | <li > 
708 | <a href="http://www.nytimes.com/pages/technology/index.html">Technology</a> 
709 | </li> 
710 | <li > 
711 | <a href="http://www.nytimes.com/pages/science/index.html">Science</a> 
712 | </li> 
713 | <li > 
714 | <a href="http://www.nytimes.com/pages/health/index.html">Health</a> 
715 | </li> 
716 | <li > 
717 | <a href="http://www.nytimes.com/pages/sports/index.html">Sports</a> 
718 | </li> 
719 | <li > 
720 | <a href="http://www.nytimes.com/pages/opinion/index.html">Opinion</a> 
721 | </li> 
722 | <li > 
723 | <a href="http://www.nytimes.com/pages/arts/index.html">Arts</a> 
724 | </li> 
725 | <li > 
726 | <a href="http://www.nytimes.com/pages/style/index.html">Style</a> 
727 | </li> 
728 | <li > 
729 | <a href="http://www.nytimes.com/pages/travel/index.html">Travel</a> 
730 | </li> 
731 | <li > 
732 | <a href="http://www.nytimes.com/pages/jobs/index.html">Jobs</a> 
733 | </li> 
734 | <li > 
735 | <a href="http://www.nytimes.com/pages/realestate/index.html">Real Estate</a> 
736 | </li> 
737 | <li > 
738 | <a href="http://www.nytimes.com/pages/automobiles/index.html">Autos</a> 
739 | </li> 
740 | <li><a href="#top">Back to Top</a></li> 
741 | </ul>		<ul> 
742 | <li class="first"><a href="http://www.nytimes.com/ref/membercenter/help/copyright.html">Copyright 2010</a> <a href="http://www.nytco.com/">The New York Times Company</a></li> 
743 | <li><a href="http://www.nytimes.com/privacy">Privacy</a></li> 
744 | <li><a href="http://www.nytimes.com/ref/membercenter/help/agree.html">Terms of Service</a></li> 
745 | <li><a href="http://www.nytimes.com/search">Search</a></li> 
746 | <li><a href="http://www.nytimes.com/corrections.html">Corrections</a></li> 
747 | <li><a class="rssButton" href="http://www.nytimes.com/rss">RSS</a></li> 
748 | <li><a href="http://firstlook.nytimes.com">First Look</a></li> 
749 | <li><a href="http://www.nytimes.com/membercenter/sitehelp.html">Help</a></li> 
750 | <li><a href="http://www.nytimes.com/ref/membercenter/help/infoservdirectory.html">Contact Us</a></li> 
751 | <li><a href="https://careers.nytco.com/TAM/nyt_docs/TAM/candidate.html">Work for Us</a></li> 
752 | <li><a href="http://www.nytimes.whsites.net/mediakit/">Advertise</a></li> 
753 | <li><a href="http://spiderbites.nytimes.com/">Site Map</a></li> 
754 | </ul> 
755 | </div> 
756 | </div><!--close page -->
757 | </div><!--close shell -->
758 | <IMG SRC="/adx/bin/clientside/9e8eea7Q2FQ27Q5BQ3ETKQ27P4Q2BKkv4Q3AQ60Q5BPKaDQ60c.a-5Q7E.Q2BQ5E6Q2BQ5CaB-cQ5CT" height="1" width="3">
759 |    
760 |  
761 |  
762 | </body> 
763 | 
764 |  
765 | 			
766 | 		<!-- Start UPT call --> 
767 | 		<img height="1" width="3" border=0 src="http://up.nytimes.com/?d=0//10&t=2&s=0&ui=0&r=http%3a%2f%2fwww%2enytimes%2ecom%2f2010%2f11%2f08%2fbusiness%2fmedia%2f08nbc%2ehtml%3fhp&u=www%2enytimes%2ecom%2f2010%2f11%2f08%2fbusiness%2fmedia%2f08nbc%2ehtml%3fhp%3d"> 
768 | 		<!-- End UPT call --> 
769 | 	
770 | 		
771 |         <script language="JavaScript"><!--
772 |           var dcsvid="0";
773 |           var regstatus="non-registered";
774 |         //--></script> 
775 |         <script src="http://graphics8.nytimes.com/js/app/analytics/trackingTags_v1.1.js" type="text/javascript"></script> 
776 |         <noscript> 
777 |           <div><img alt="DCSIMG" id="DCSIMG" width="1" height="1" src="http://wt.o.nytimes.com/dcsym57yw10000s1s8g0boozt_9t1x/njs.gif?dcsuri=/nojavascript&amp;WT.js=No&amp;WT.tv=1.0.7"/></div>
778 |         </noscript> 
779 |    
780 | <img src="http://graphics8.nytimes.com/ads/blank.gif">
781 | 
782 | 
783 | </html>
784 | 
785 | 
786 | 
787 |  
788 | 


--------------------------------------------------------------------------------
/misc/readability-ori.js:
--------------------------------------------------------------------------------
   1 | /*jslint undef: true, nomen: true, eqeqeq: true, plusplus: true, newcap: true, immed: true, browser: true, devel: true, passfail: false */
   2 | /*global window: false, readConvertLinksToFootnotes: false, readStyle: false, readSize: false, readMargin: false, Typekit: false, ActiveXObject: false */
   3 | 
   4 | var dbg = (typeof console !== 'undefined') ? function(s) {
   5 |     console.log("Readability: " + s);
   6 | } : function() {};
   7 | 
   8 | /*
   9 |  * Readability. An Arc90 Lab Experiment. 
  10 |  * Website: http://lab.arc90.com/experiments/readability
  11 |  * Source:  http://code.google.com/p/arc90labs-readability
  12 |  *
  13 |  * "Readability" is a trademark of Arc90 Inc and may not be used without explicit permission. 
  14 |  *
  15 |  * Copyright (c) 2010 Arc90 Inc
  16 |  * Readability is licensed under the Apache License, Version 2.0.
  17 | **/
  18 | var readability = {
  19 |     version:                '1.7.1',
  20 |     emailSrc:               'http://lab.arc90.com/experiments/readability/email.php',
  21 |     iframeLoads:             0,
  22 |     convertLinksToFootnotes: false,
  23 |     reversePageScroll:       false, /* If they hold shift and hit space, scroll up */
  24 |     frameHack:               false, /**
  25 |                                       * The frame hack is to workaround a firefox bug where if you
  26 |                                       * pull content out of a frame and stick it into the parent element, the scrollbar won't appear.
  27 |                                       * So we fake a scrollbar in the wrapping div.
  28 |                                      **/
  29 |     biggestFrame:            false,
  30 |     bodyCache:               null,   /* Cache the body HTML in case we need to re-use it later */
  31 |     flags:                   0x1 | 0x2 | 0x4,   /* Start with all flags set. */
  32 | 
  33 |     /* constants */
  34 |     FLAG_STRIP_UNLIKELYS:     0x1,
  35 |     FLAG_WEIGHT_CLASSES:      0x2,
  36 |     FLAG_CLEAN_CONDITIONALLY: 0x4,
  37 | 
  38 |     maxPages:    30, /* The maximum number of pages to loop through before we call it quits and just show a link. */
  39 |     parsedPages: {}, /* The list of pages we've parsed in this call of readability, for autopaging. As a key store for easier searching. */
  40 |     pageETags:   {}, /* A list of the ETag headers of pages we've parsed, in case they happen to match, we'll know it's a duplicate. */
  41 |     
  42 |     /**
  43 |      * All of the regular expressions in use within readability.
  44 |      * Defined up here so we don't instantiate them repeatedly in loops.
  45 |      **/
  46 |     regexps: {
  47 |         unlikelyCandidates:    /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
  48 |         okMaybeItsACandidate:  /and|article|body|column|main|shadow/i,
  49 |         positive:              /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
  50 |         negative:              /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i,
  51 |         extraneous:            /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single/i,
  52 |         divToPElements:        /<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
  53 |         replaceBrs:            /(<br[^>]*>[ \n\r\t]*){2,}/gi,
  54 |         replaceFonts:          /<(\/?)font[^>]*>/gi,
  55 |         trim:                  /^\s+|\s+$/g,
  56 |         normalize:             /\s{2,}/g,
  57 |         killBreaks:            /(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,
  58 |         videos:                /http:\/\/(www\.)?(youtube|vimeo)\.com/i,
  59 |         skipFootnoteLink:      /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
  60 |         nextLink:              /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, // Match: next, continue, >, >>, » but not >|, »| as those usually mean last.
  61 |         prevLink:              /(prev|earl|old|new|<|«)/i
  62 |     },
  63 | 
  64 |     /**
  65 |      * Runs readability.
  66 |      * 
  67 |      * Workflow:
  68 |      *  1. Prep the document by removing script tags, css, etc.
  69 |      *  2. Build readability's DOM tree.
  70 |      *  3. Grab the article content from the current dom tree.
  71 |      *  4. Replace the current DOM tree with the new one.
  72 |      *  5. Read peacefully.
  73 |      *
  74 |      * @return void
  75 |      **/
  76 |     init: function() {
  77 |         /* Before we do anything, remove all scripts that are not readability. */
  78 |         window.onload = window.onunload = function() {};
  79 | 
  80 |         readability.removeScripts(document);
  81 | 
  82 |         if(document.body && !readability.bodyCache) {
  83 |             readability.bodyCache = document.body.innerHTML;
  84 | 
  85 |         }
  86 |         /* Make sure this document is added to the list of parsed pages first, so we don't double up on the first page */
  87 |         readability.parsedPages[window.location.href.replace(/\/$/, '')] = true;
  88 | 
  89 |         /* Pull out any possible next page link first */
  90 |         var nextPageLink = readability.findNextPageLink(document.body);
  91 |         
  92 |         readability.prepDocument();
  93 | 
  94 |         /* Build readability's DOM tree */
  95 |         var overlay        = document.createElement("DIV");
  96 |         var innerDiv       = document.createElement("DIV");
  97 |         var articleTools   = readability.getArticleTools();
  98 |         var articleTitle   = readability.getArticleTitle();
  99 |         var articleContent = readability.grabArticle();
 100 |         var articleFooter  = readability.getArticleFooter();
 101 | 
 102 |         if(!articleContent) {
 103 |             articleContent    = document.createElement("DIV");
 104 |             articleContent.id = "readability-content";
 105 |             articleContent.innerHTML = [
 106 |                 "<p>Sorry, readability was unable to parse this page for content. If you feel like it should have been able to, please <a href='http://code.google.com/p/arc90labs-readability/issues/entry'>let us know by submitting an issue.</a></p>",
 107 |                 (readability.frameHack ? "<p><strong>It appears this page uses frames.</strong> Unfortunately, browser security properties often cause Readability to fail on pages that include frames. You may want to try running readability itself on this source page: <a href='" + readability.biggestFrame.src + "'>" + readability.biggestFrame.src + "</a></p>" : ""),
 108 |                 "<p>Also, please note that Readability does not play very nicely with front pages. Readability is intended to work on articles with a sizable chunk of text that you'd like to read comfortably. If you're using Readability on a landing page (like nytimes.com for example), please click into an article first before using Readability.</p>"
 109 |             ].join('');
 110 | 
 111 |             nextPageLink = null;
 112 |         }
 113 | 
 114 |         overlay.id              = "readOverlay";
 115 |         innerDiv.id             = "readInner";
 116 | 
 117 |         /* Apply user-selected styling */
 118 |         document.body.className = readStyle;
 119 |         document.dir            = readability.getSuggestedDirection(articleTitle.innerHTML);
 120 | 
 121 |         if (readStyle === "style-athelas" || readStyle === "style-apertura"){
 122 |             overlay.className = readStyle + " rdbTypekit";
 123 |         }
 124 |         else {
 125 |             overlay.className = readStyle;
 126 |         }
 127 |         innerDiv.className    = readMargin + " " + readSize;
 128 | 
 129 |         if(typeof(readConvertLinksToFootnotes) !== 'undefined' && readConvertLinksToFootnotes === true) {
 130 |             readability.convertLinksToFootnotes = true;
 131 |         }
 132 | 
 133 |         /* Glue the structure of our document together. */
 134 |         innerDiv.appendChild( articleTitle   );
 135 |         innerDiv.appendChild( articleContent );
 136 |         innerDiv.appendChild( articleFooter  );
 137 |          overlay.appendChild( articleTools   );
 138 |          overlay.appendChild( innerDiv       );
 139 | 
 140 |         /* Clear the old HTML, insert the new content. */
 141 |         document.body.innerHTML = "";
 142 |         document.body.insertBefore(overlay, document.body.firstChild);
 143 |         document.body.removeAttribute('style');
 144 | 
 145 |         if(readability.frameHack)
 146 |         {
 147 |             var readOverlay = document.getElementById('readOverlay');
 148 |             readOverlay.style.height = '100%';
 149 |             readOverlay.style.overflow = 'auto';
 150 |         }
 151 | 
 152 |         /**
 153 |          * If someone tries to use Readability on a site's root page, give them a warning about usage.
 154 |         **/
 155 |         if((window.location.protocol + "//" + window.location.host + "/") === window.location.href)
 156 |         {
 157 |             articleContent.style.display = "none";
 158 |             var rootWarning = document.createElement('p');
 159 |                 rootWarning.id = "readability-warning";
 160 |                 rootWarning.innerHTML = "<em>Readability</em> was intended for use on individual articles and not home pages. " +
 161 |                     "If you'd like to try rendering this page anyway, <a onClick='javascript:document.getElementById(\"readability-warning\").style.display=\"none\";document.getElementById(\"readability-content\").style.display=\"block\";'>click here</a> to continue.";
 162 | 
 163 |             innerDiv.insertBefore( rootWarning, articleContent );
 164 |         }
 165 | 
 166 |         readability.postProcessContent(articleContent);
 167 | 
 168 |         window.scrollTo(0, 0);
 169 | 
 170 |         /* If we're using the Typekit library, select the font */
 171 |         if (readStyle === "style-athelas" || readStyle === "style-apertura") {
 172 |             readability.useRdbTypekit();
 173 |         }
 174 | 
 175 |         if (nextPageLink) {
 176 |             /** 
 177 |              * Append any additional pages after a small timeout so that people
 178 |              * can start reading without having to wait for this to finish processing.
 179 |             **/
 180 |             window.setTimeout(function() {
 181 |                 readability.appendNextPage(nextPageLink);
 182 |             }, 500);
 183 |         }
 184 | 
 185 |         /** Smooth scrolling **/
 186 |         document.onkeydown = function(e) {
 187 |             var code = (window.event) ? event.keyCode : e.keyCode;
 188 |             if (code === 16) {
 189 |                 readability.reversePageScroll = true;
 190 |                 return;
 191 |             }
 192 | 
 193 |             if (code === 32) {
 194 |                 readability.curScrollStep = 0;
 195 |                 var windowHeight = window.innerHeight ? window.innerHeight : (document.documentElement.clientHeight ? document.documentElement.clientHeight : document.body.clientHeight);
 196 | 
 197 |                 if(readability.reversePageScroll) {
 198 |                     readability.scrollTo(readability.scrollTop(), readability.scrollTop() - (windowHeight - 50), 20, 10);                   
 199 |                 }
 200 |                 else {
 201 |                     readability.scrollTo(readability.scrollTop(), readability.scrollTop() + (windowHeight - 50), 20, 10);                   
 202 |                 }
 203 |                 
 204 |                 return false;
 205 |             }
 206 |         };
 207 |         
 208 |         document.onkeyup = function(e) {
 209 |             var code = (window.event) ? event.keyCode : e.keyCode;
 210 |             if (code === 16) {
 211 |                 readability.reversePageScroll = false;
 212 |                 return;
 213 |             }
 214 |         };
 215 |     },
 216 | 
 217 |     /**
 218 |      * Run any post-process modifications to article content as necessary.
 219 |      * 
 220 |      * @param Element
 221 |      * @return void
 222 |     **/
 223 |     postProcessContent: function(articleContent) {
 224 |         if(readability.convertLinksToFootnotes && !window.location.href.match(/wikipedia\.org/g)) {
 225 |             readability.addFootnotes(articleContent);
 226 |         }
 227 | 
 228 |         readability.fixImageFloats(articleContent);
 229 |     },
 230 | 
 231 |     /**
 232 |      * Some content ends up looking ugly if the image is too large to be floated.
 233 |      * If the image is wider than a threshold (currently 55%), no longer float it,
 234 |      * center it instead.
 235 |      *
 236 |      * @param Element
 237 |      * @return void
 238 |     **/
 239 |     fixImageFloats: function (articleContent) {
 240 |         var imageWidthThreshold = Math.min(articleContent.offsetWidth, 800) * 0.55,
 241 |             images              = articleContent.getElementsByTagName('img');
 242 | 
 243 |         for(var i=0, il = images.length; i < il; i+=1) {
 244 |             var image = images[i];
 245 |             
 246 |             if(image.offsetWidth > imageWidthThreshold) {
 247 |                 image.className += " blockImage";
 248 |             }
 249 |         }
 250 |     },
 251 | 
 252 |     /**
 253 |      * Get the article tools Element that has buttons like reload, print, email.
 254 |      *
 255 |      * @return void
 256 |      **/
 257 |     getArticleTools: function () {
 258 |         var articleTools = document.createElement("DIV");
 259 | 
 260 |         articleTools.id        = "readTools";
 261 |         articleTools.innerHTML = 
 262 |             "<a href='#' onclick='return window.location.reload()' title='Reload original page' id='reload-page'>Reload Original Page</a>" +
 263 |             "<a href='#' onclick='javascript:window.print();' title='Print page' id='print-page'>Print Page</a>" +
 264 |             "<a href='#' onclick='readability.emailBox(); return false;' title='Email page' id='email-page'>Email Page</a>";
 265 | 
 266 |         return articleTools;
 267 |     },
 268 | 
 269 |     /**
 270 |      * retuns the suggested direction of the string
 271 |      *
 272 |      * @return "rtl" || "ltr"
 273 |      **/
 274 |     getSuggestedDirection: function(text) {
 275 |         function sanitizeText() {
 276 |             return text.replace(/@\w+/, "");
 277 |         }
 278 |         
 279 |         function countMatches(match) {
 280 |             var matches = text.match(new RegExp(match, "g"));
 281 |             return matches !== null ? matches.length : 0; 
 282 |         }
 283 |         
 284 |         function isRTL() {            
 285 |             var count_heb =  countMatches("[\\u05B0-\\u05F4\\uFB1D-\\uFBF4]");
 286 |             var count_arb =  countMatches("[\\u060C-\\u06FE\\uFB50-\\uFEFC]");
 287 | 
 288 |             // if 20% of chars are Hebrew or Arbic then direction is rtl
 289 |             return  (count_heb + count_arb) * 100 / text.length > 20;
 290 |         }
 291 | 
 292 |         text  = sanitizeText(text);
 293 |         return isRTL() ? "rtl" : "ltr";
 294 |     },
 295 | 
 296 |     
 297 |     /**
 298 |      * Get the article title as an H1.
 299 |      *
 300 |      * @return void
 301 |      **/
 302 |     getArticleTitle: function () {
 303 |         var curTitle = "",
 304 |             origTitle = "";
 305 | 
 306 |         try {
 307 |             curTitle = origTitle = document.title;
 308 |             
 309 |             if(typeof curTitle !== "string") { /* If they had an element with id "title" in their HTML */
 310 |                 curTitle = origTitle = readability.getInnerText(document.getElementsByTagName('title')[0]);             
 311 |             }
 312 |         }
 313 |         catch(e) {}
 314 |         
 315 |         if(curTitle.match(/ [\|\-] /))
 316 |         {
 317 |             curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1');
 318 |             
 319 |             if(curTitle.split(' ').length < 3) {
 320 |                 curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');
 321 |             }
 322 |         }
 323 |         else if(curTitle.indexOf(': ') !== -1)
 324 |         {
 325 |             curTitle = origTitle.replace(/.*:(.*)/gi, '$1');
 326 | 
 327 |             if(curTitle.split(' ').length < 3) {
 328 |                 curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');
 329 |             }
 330 |         }
 331 |         else if(curTitle.length > 150 || curTitle.length < 15)
 332 |         {
 333 |             var hOnes = document.getElementsByTagName('h1');
 334 |             if(hOnes.length === 1)
 335 |             {
 336 |                 curTitle = readability.getInnerText(hOnes[0]);
 337 |             }
 338 |         }
 339 | 
 340 |         curTitle = curTitle.replace( readability.regexps.trim, "" );
 341 | 
 342 |         if(curTitle.split(' ').length <= 4) {
 343 |             curTitle = origTitle;
 344 |         }
 345 |         
 346 |         var articleTitle = document.createElement("H1");
 347 |         articleTitle.innerHTML = curTitle;
 348 |         
 349 |         return articleTitle;
 350 |     },
 351 | 
 352 |     /**
 353 |      * Get the footer with the readability mark etc.
 354 |      *
 355 |      * @return void
 356 |      **/
 357 |     getArticleFooter: function () {
 358 |         var articleFooter = document.createElement("DIV");
 359 | 
 360 |         /**
 361 |          * For research purposes, generate an img src that contains the chosen readstyle etc,
 362 |          * so we can generate aggregate stats and change styles based on them in the future
 363 |          **/
 364 |         // var statsQueryParams = "?readStyle=" + encodeURIComponent(readStyle) + "&readMargin=" + encodeURIComponent(readMargin) + "&readSize=" + encodeURIComponent(readSize);
 365 |         /* TODO: attach this to an image */
 366 | 
 367 |         articleFooter.id = "readFooter";
 368 |         articleFooter.innerHTML = [
 369 |         "<div id='rdb-footer-print'>Excerpted from <cite>" + document.title + "</cite><br />" + window.location.href + "</div>",
 370 |         "<div id='rdb-footer-wrapper'>",
 371 |              "<div id='rdb-footer-left'>",
 372 |                  "<a href='http://lab.arc90.com/experiments/readability' id='readability-logo'>Readability &mdash;&nbsp;</a>",
 373 |                  "<a href='http://www.arc90.com/' id='arc90-logo'> An Arc90 Laboratory Experiment&nbsp;</a>",
 374 |                  " <span id='readability-url'> http://lab.arc90.com/experiments/readability</span>",
 375 |              "</div>",
 376 |              "<div id='rdb-footer-right'>",
 377 |                  "<a href='http://www.twitter.com/arc90' class='footer-twitterLink'>Follow us on Twitter &raquo;</a>",
 378 |                  "<span class='version'>Readability version " + readability.version + "</span>",
 379 |              "</div>",
 380 |         "</div>"].join('');
 381 |                 
 382 |         return articleFooter;
 383 |     },
 384 |     
 385 |     /**
 386 |      * Prepare the HTML document for readability to scrape it.
 387 |      * This includes things like stripping javascript, CSS, and handling terrible markup.
 388 |      * 
 389 |      * @return void
 390 |      **/
 391 |     prepDocument: function () {
 392 |         /**
 393 |          * In some cases a body element can't be found (if the HTML is totally hosed for example)
 394 |          * so we create a new body node and append it to the document.
 395 |          */
 396 |         if(document.body === null)
 397 |         {
 398 |             var body = document.createElement("body");
 399 |             try {
 400 |                 document.body = body;       
 401 |             }
 402 |             catch(e) {
 403 |                 document.documentElement.appendChild(body);
 404 |                 dbg(e);
 405 |             }
 406 |         }
 407 | 
 408 |         document.body.id = "readabilityBody";
 409 | 
 410 |         var frames = document.getElementsByTagName('frame');
 411 |         if(frames.length > 0)
 412 |         {
 413 |             var bestFrame = null;
 414 |             var bestFrameSize = 0;    /* The frame to try to run readability upon. Must be on same domain. */
 415 |             var biggestFrameSize = 0; /* Used for the error message. Can be on any domain. */
 416 |             for(var frameIndex = 0; frameIndex < frames.length; frameIndex+=1)
 417 |             {
 418 |                 var frameSize = frames[frameIndex].offsetWidth + frames[frameIndex].offsetHeight;
 419 |                 var canAccessFrame = false;
 420 |                 try {
 421 |                     var frameBody = frames[frameIndex].contentWindow.document.body;
 422 |                     canAccessFrame = true;
 423 |                 }
 424 |                 catch(eFrames) {
 425 |                     dbg(eFrames);
 426 |                 }
 427 | 
 428 |                 if(frameSize > biggestFrameSize) {
 429 |                     biggestFrameSize         = frameSize;
 430 |                     readability.biggestFrame = frames[frameIndex];
 431 |                 }
 432 |                 
 433 |                 if(canAccessFrame && frameSize > bestFrameSize)
 434 |                 {
 435 |                     readability.frameHack = true;
 436 |     
 437 |                     bestFrame = frames[frameIndex];
 438 |                     bestFrameSize = frameSize;
 439 |                 }
 440 |             }
 441 | 
 442 |             if(bestFrame)
 443 |             {
 444 |                 var newBody = document.createElement('body');
 445 |                 newBody.innerHTML = bestFrame.contentWindow.document.body.innerHTML;
 446 |                 newBody.style.overflow = 'scroll';
 447 |                 document.body = newBody;
 448 |                 
 449 |                 var frameset = document.getElementsByTagName('frameset')[0];
 450 |                 if(frameset) {
 451 |                     frameset.parentNode.removeChild(frameset); }
 452 |             }
 453 |         }
 454 | 
 455 |         /* Remove all stylesheets */
 456 |         for (var k=0;k < document.styleSheets.length; k+=1) {
 457 |             if (document.styleSheets[k].href !== null && document.styleSheets[k].href.lastIndexOf("readability") === -1) {
 458 |                 document.styleSheets[k].disabled = true;
 459 |             }
 460 |         }
 461 | 
 462 |         /* Remove all style tags in head (not doing this on IE) - TODO: Why not? */
 463 |         var styleTags = document.getElementsByTagName("style");
 464 |         for (var st=0;st < styleTags.length; st+=1) {
 465 |             styleTags[st].textContent = "";
 466 |         }
 467 | 
 468 |         /* Turn all double br's into p's */
 469 |         /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
 470 |         document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>');
 471 |     },
 472 | 
 473 |     /**
 474 |      * For easier reading, convert this document to have footnotes at the bottom rather than inline links.
 475 |      * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
 476 |      *
 477 |      * @return void
 478 |     **/
 479 |     addFootnotes: function(articleContent) {
 480 |         var footnotesWrapper = document.getElementById('readability-footnotes'),
 481 |             articleFootnotes = document.getElementById('readability-footnotes-list');
 482 |         
 483 |         if(!footnotesWrapper) {
 484 |             footnotesWrapper               = document.createElement("DIV");
 485 |             footnotesWrapper.id            = 'readability-footnotes';
 486 |             footnotesWrapper.innerHTML     = '<h3>References</h3>';
 487 |             footnotesWrapper.style.display = 'none'; /* Until we know we have footnotes, don't show the references block. */
 488 |             
 489 |             articleFootnotes    = document.createElement('ol');
 490 |             articleFootnotes.id = 'readability-footnotes-list';
 491 |             
 492 |             footnotesWrapper.appendChild(articleFootnotes);
 493 |     
 494 |             var readFooter = document.getElementById('readFooter');
 495 |             
 496 |             if(readFooter) {
 497 |                 readFooter.parentNode.insertBefore(footnotesWrapper, readFooter);
 498 |             }
 499 |         }
 500 | 
 501 |         var articleLinks = articleContent.getElementsByTagName('a');
 502 |         var linkCount    = articleFootnotes.getElementsByTagName('li').length;
 503 |         for (var i = 0; i < articleLinks.length; i+=1)
 504 |         {
 505 |             var articleLink  = articleLinks[i],
 506 |                 footnoteLink = articleLink.cloneNode(true),
 507 |                 refLink      = document.createElement('a'),
 508 |                 footnote     = document.createElement('li'),
 509 |                 linkDomain   = footnoteLink.host ? footnoteLink.host : document.location.host,
 510 |                 linkText     = readability.getInnerText(articleLink);
 511 |             
 512 |             if(articleLink.className && articleLink.className.indexOf('readability-DoNotFootnote') !== -1 || linkText.match(readability.regexps.skipFootnoteLink)) {
 513 |                 continue;
 514 |             }
 515 |             
 516 |             linkCount+=1;
 517 | 
 518 |             /** Add a superscript reference after the article link */
 519 |             refLink.href      = '#readabilityFootnoteLink-' + linkCount;
 520 |             refLink.innerHTML = '<small><sup>[' + linkCount + ']</sup></small>';
 521 |             refLink.className = 'readability-DoNotFootnote';
 522 |             try { refLink.style.color = 'inherit'; } catch(e) {} /* IE7 doesn't like inherit. */
 523 |             
 524 |             if(articleLink.parentNode.lastChild === articleLink) {
 525 |                 articleLink.parentNode.appendChild(refLink);
 526 |             } else {
 527 |                 articleLink.parentNode.insertBefore(refLink, articleLink.nextSibling);
 528 |             }
 529 | 
 530 |             articleLink.name        = 'readabilityLink-' + linkCount;
 531 |             try { articleLink.style.color = 'inherit'; } catch(err) {} /* IE7 doesn't like inherit. */
 532 | 
 533 |             footnote.innerHTML      = "<small><sup><a href='#readabilityLink-" + linkCount + "' title='Jump to Link in Article'>^</a></sup></small> ";
 534 | 
 535 |             footnoteLink.innerHTML  = (footnoteLink.title ? footnoteLink.title : linkText);
 536 |             footnoteLink.name       = 'readabilityFootnoteLink-' + linkCount;
 537 |             
 538 |             footnote.appendChild(footnoteLink);
 539 |             footnote.innerHTML = footnote.innerHTML + "<small> (" + linkDomain + ")</small>";
 540 |             
 541 |             articleFootnotes.appendChild(footnote);
 542 |         }
 543 | 
 544 |         if(linkCount > 0) {
 545 |             footnotesWrapper.style.display = 'block';
 546 |         }
 547 |     },
 548 | 
 549 |     useRdbTypekit: function () {
 550 |         var rdbHead      = document.getElementsByTagName('head')[0];
 551 |         var rdbTKScript  = document.createElement('script');
 552 |         var rdbTKCode    = null;
 553 | 
 554 |         var rdbTKLink    = document.createElement('a');
 555 |             rdbTKLink.setAttribute('class','rdbTK-powered');
 556 |             rdbTKLink.setAttribute('title','Fonts by Typekit');
 557 |             rdbTKLink.innerHTML = "Fonts by <span class='rdbTK'>Typekit</span>";
 558 | 
 559 |         if (readStyle === "style-athelas") {
 560 |             rdbTKCode = "sxt6vzy";
 561 |             dbg("Using Athelas Theme");
 562 | 
 563 |             rdbTKLink.setAttribute('href','http://typekit.com/?utm_source=readability&utm_medium=affiliate&utm_campaign=athelas');
 564 |             rdbTKLink.setAttribute('id','rdb-athelas');
 565 |             document.getElementById("rdb-footer-right").appendChild(rdbTKLink);
 566 |         }
 567 |         if (readStyle === "style-apertura") {
 568 |             rdbTKCode = "bae8ybu";
 569 |             dbg("Using Inverse Theme");
 570 | 
 571 |             rdbTKLink.setAttribute('href','http://typekit.com/?utm_source=readability&utm_medium=affiliate&utm_campaign=inverse');
 572 |             rdbTKLink.setAttribute('id','rdb-inverse');
 573 |             document.getElementById("rdb-footer-right").appendChild(rdbTKLink);
 574 |         }
 575 | 
 576 |         /**
 577 |          * Setting new script tag attributes to pull Typekits libraries
 578 |         **/
 579 |         rdbTKScript.setAttribute('type','text/javascript');
 580 |         rdbTKScript.setAttribute('src',"http://use.typekit.com/" + rdbTKCode + ".js");
 581 |         rdbTKScript.setAttribute('charset','UTF-8');
 582 |         rdbHead.appendChild(rdbTKScript);
 583 | 
 584 |         /**
 585 |          * In the future, maybe try using the following experimental Callback function?:
 586 |          * http://gist.github.com/192350
 587 |          * &
 588 |          * http://getsatisfaction.com/typekit/topics/support_a_pre_and_post_load_callback_function
 589 |         **/
 590 |         var typekitLoader = function() {
 591 |             dbg("Looking for Typekit.");
 592 |             if(typeof Typekit !== "undefined") {
 593 |                 try {
 594 |                     dbg("Caught typekit");
 595 |                     Typekit.load();
 596 |                     clearInterval(window.typekitInterval);
 597 |                 } catch(e) {
 598 |                     dbg("Typekit error: " + e);
 599 |                 }
 600 |             }
 601 |         };
 602 | 
 603 |         window.typekitInterval = window.setInterval(typekitLoader, 100);
 604 |     },
 605 | 
 606 |     /**
 607 |      * Prepare the article node for display. Clean out any inline styles,
 608 |      * iframes, forms, strip extraneous <p> tags, etc.
 609 |      *
 610 |      * @param Element
 611 |      * @return void
 612 |      **/
 613 |     prepArticle: function (articleContent) {
 614 |         readability.cleanStyles(articleContent);
 615 |         readability.killBreaks(articleContent);
 616 | 
 617 |         /* Clean out junk from the article content */
 618 |         readability.cleanConditionally(articleContent, "form");
 619 |         readability.clean(articleContent, "object");
 620 |         readability.clean(articleContent, "h1");
 621 | 
 622 |         /**
 623 |          * If there is only one h2, they are probably using it
 624 |          * as a header and not a subheader, so remove it since we already have a header.
 625 |         ***/
 626 |         if(articleContent.getElementsByTagName('h2').length === 1) {
 627 |             readability.clean(articleContent, "h2");
 628 |         }
 629 |         readability.clean(articleContent, "iframe");
 630 | 
 631 |         readability.cleanHeaders(articleContent);
 632 | 
 633 |         /* Do these last as the previous stuff may have removed junk that will affect these */
 634 |         readability.cleanConditionally(articleContent, "table");
 635 |         readability.cleanConditionally(articleContent, "ul");
 636 |         readability.cleanConditionally(articleContent, "div");
 637 | 
 638 |         /* Remove extra paragraphs */
 639 |         var articleParagraphs = articleContent.getElementsByTagName('p');
 640 |         for(var i = articleParagraphs.length-1; i >= 0; i-=1) {
 641 |             var imgCount    = articleParagraphs[i].getElementsByTagName('img').length;
 642 |             var embedCount  = articleParagraphs[i].getElementsByTagName('embed').length;
 643 |             var objectCount = articleParagraphs[i].getElementsByTagName('object').length;
 644 |             
 645 |             if(imgCount === 0 && embedCount === 0 && objectCount === 0 && readability.getInnerText(articleParagraphs[i], false) === '') {
 646 |                 articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);
 647 |             }
 648 |         }
 649 | 
 650 |         try {
 651 |             articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p');      
 652 |         }
 653 |         catch (e) {
 654 |             dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " + e);
 655 |         }
 656 |     },
 657 |     
 658 |     /**
 659 |      * Initialize a node with the readability object. Also checks the
 660 |      * className/id for special names to add to its score.
 661 |      *
 662 |      * @param Element
 663 |      * @return void
 664 |     **/
 665 |     initializeNode: function (node) {
 666 |         node.readability = {"contentScore": 0};         
 667 | 
 668 |         switch(node.tagName) {
 669 |             case 'DIV':
 670 |                 node.readability.contentScore += 5;
 671 |                 break;
 672 | 
 673 |             case 'PRE':
 674 |             case 'TD':
 675 |             case 'BLOCKQUOTE':
 676 |                 node.readability.contentScore += 3;
 677 |                 break;
 678 |                 
 679 |             case 'ADDRESS':
 680 |             case 'OL':
 681 |             case 'UL':
 682 |             case 'DL':
 683 |             case 'DD':
 684 |             case 'DT':
 685 |             case 'LI':
 686 |             case 'FORM':
 687 |                 node.readability.contentScore -= 3;
 688 |                 break;
 689 | 
 690 |             case 'H1':
 691 |             case 'H2':
 692 |             case 'H3':
 693 |             case 'H4':
 694 |             case 'H5':
 695 |             case 'H6':
 696 |             case 'TH':
 697 |                 node.readability.contentScore -= 5;
 698 |                 break;
 699 |         }
 700 |        
 701 |         node.readability.contentScore += readability.getClassWeight(node);
 702 |     },
 703 |     
 704 |     /***
 705 |      * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
 706 |      *               most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
 707 |      *
 708 |      * @param page a document to run upon. Needs to be a full document, complete with body.
 709 |      * @return Element
 710 |     **/
 711 |     grabArticle: function (page) {
 712 |         var stripUnlikelyCandidates = readability.flagIsActive(readability.FLAG_STRIP_UNLIKELYS),
 713 |             isPaging = (page !== null) ? true: false;
 714 | 
 715 |         page = page ? page : document.body;
 716 | 
 717 |         var pageCacheHtml = page.innerHTML;
 718 | 
 719 |         var allElements = page.getElementsByTagName('*');
 720 | 
 721 |         /**
 722 |          * First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs
 723 |          * into P tags where they have been used inappropriately (as in, where they contain no other block level elements.)
 724 |          *
 725 |          * Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
 726 |          * TODO: Shouldn't this be a reverse traversal?
 727 |         **/
 728 |         var node = null;
 729 |         var nodesToScore = [];
 730 |         for(var nodeIndex = 0; (node = allElements[nodeIndex]); nodeIndex+=1) {
 731 |             /* Remove unlikely candidates */
 732 |             if (stripUnlikelyCandidates) {
 733 |                 var unlikelyMatchString = node.className + node.id;
 734 |                 if (
 735 |                     (
 736 |                         unlikelyMatchString.search(readability.regexps.unlikelyCandidates) !== -1 &&
 737 |                         unlikelyMatchString.search(readability.regexps.okMaybeItsACandidate) === -1 &&
 738 |                         node.tagName !== "BODY"
 739 |                     )
 740 |                 )
 741 |                 {
 742 |                     dbg("Removing unlikely candidate - " + unlikelyMatchString);
 743 |                     node.parentNode.removeChild(node);
 744 |                     nodeIndex-=1;
 745 |                     continue;
 746 |                 }               
 747 |             }
 748 | 
 749 |             if (node.tagName === "P" || node.tagName === "TD" || node.tagName === "PRE") {
 750 |                 nodesToScore[nodesToScore.length] = node;
 751 |             }
 752 | 
 753 |             /* Turn all divs that don't have children block level elements into p's */
 754 |             if (node.tagName === "DIV") {
 755 |                 if (node.innerHTML.search(readability.regexps.divToPElements) === -1) {
 756 |                     var newNode = document.createElement('p');
 757 |                     try {
 758 |                         newNode.innerHTML = node.innerHTML;             
 759 |                         node.parentNode.replaceChild(newNode, node);
 760 |                         nodeIndex-=1;
 761 | 
 762 |                         nodesToScore[nodesToScore.length] = node;
 763 |                     }
 764 |                     catch(e) {
 765 |                         dbg("Could not alter div to p, probably an IE restriction, reverting back to div.: " + e);
 766 |                     }
 767 |                 }
 768 |                 else
 769 |                 {
 770 |                     /* EXPERIMENTAL */
 771 |                     for(var i = 0, il = node.childNodes.length; i < il; i+=1) {
 772 |                         var childNode = node.childNodes[i];
 773 |                         if(childNode.nodeType === 3) { // Node.TEXT_NODE
 774 |                             var p = document.createElement('p');
 775 |                             p.innerHTML = childNode.nodeValue;
 776 |                             p.style.display = 'inline';
 777 |                             p.className = 'readability-styled';
 778 |                             childNode.parentNode.replaceChild(p, childNode);
 779 |                         }
 780 |                     }
 781 |                 }
 782 |             } 
 783 |         }
 784 | 
 785 |         /**
 786 |          * Loop through all paragraphs, and assign a score to them based on how content-y they look.
 787 |          * Then add their score to their parent node.
 788 |          *
 789 |          * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
 790 |         **/
 791 |         var candidates = [];
 792 |         for (var pt=0; pt < nodesToScore.length; pt+=1) {
 793 |             var parentNode      = nodesToScore[pt].parentNode;
 794 |             var grandParentNode = parentNode ? parentNode.parentNode : null;
 795 |             var innerText       = readability.getInnerText(nodesToScore[pt]);
 796 | 
 797 |             if(!parentNode || typeof(parentNode.tagName) === 'undefined') {
 798 |                 continue;
 799 |             }
 800 | 
 801 |             /* If this paragraph is less than 25 characters, don't even count it. */
 802 |             if(innerText.length < 25) {
 803 |                 continue; }
 804 | 
 805 |             /* Initialize readability data for the parent. */
 806 |             if(typeof parentNode.readability === 'undefined') {
 807 |                 readability.initializeNode(parentNode);
 808 |                 candidates.push(parentNode);
 809 |             }
 810 | 
 811 |             /* Initialize readability data for the grandparent. */
 812 |             if(grandParentNode && typeof(grandParentNode.readability) === 'undefined' && typeof(grandParentNode.tagName) !== 'undefined') {
 813 |                 readability.initializeNode(grandParentNode);
 814 |                 candidates.push(grandParentNode);
 815 |             }
 816 | 
 817 |             var contentScore = 0;
 818 | 
 819 |             /* Add a point for the paragraph itself as a base. */
 820 |             contentScore+=1;
 821 | 
 822 |             /* Add points for any commas within this paragraph */
 823 |             contentScore += innerText.split(',').length;
 824 |             
 825 |             /* For every 100 characters in this paragraph, add another point. Up to 3 points. */
 826 |             contentScore += Math.min(Math.floor(innerText.length / 100), 3);
 827 |             
 828 |             /* Add the score to the parent. The grandparent gets half. */
 829 |             parentNode.readability.contentScore += contentScore;
 830 | 
 831 |             if(grandParentNode) {
 832 |                 grandParentNode.readability.contentScore += contentScore/2;             
 833 |             }
 834 |         }
 835 | 
 836 |         /**
 837 |          * After we've calculated scores, loop through all of the possible candidate nodes we found
 838 |          * and find the one with the highest score.
 839 |         **/
 840 |         var topCandidate = null;
 841 |         for(var c=0, cl=candidates.length; c < cl; c+=1)
 842 |         {
 843 |             /**
 844 |              * Scale the final candidates score based on link density. Good content should have a
 845 |              * relatively small link density (5% or less) and be mostly unaffected by this operation.
 846 |             **/
 847 |             candidates[c].readability.contentScore = candidates[c].readability.contentScore * (1-readability.getLinkDensity(candidates[c]));
 848 | 
 849 |             dbg('Candidate: ' + candidates[c] + " (" + candidates[c].className + ":" + candidates[c].id + ") with score " + candidates[c].readability.contentScore);
 850 | 
 851 |             if(!topCandidate || candidates[c].readability.contentScore > topCandidate.readability.contentScore) {
 852 |                 topCandidate = candidates[c]; }
 853 |         }
 854 | 
 855 |         /**
 856 |          * If we still have no top candidate, just use the body as a last resort.
 857 |          * We also have to copy the body node so it is something we can modify.
 858 |          **/
 859 |         if (topCandidate === null || topCandidate.tagName === "BODY")
 860 |         {
 861 |             topCandidate = document.createElement("DIV");
 862 |             topCandidate.innerHTML = page.innerHTML;
 863 |             page.innerHTML = "";
 864 |             page.appendChild(topCandidate);
 865 |             readability.initializeNode(topCandidate);
 866 |         }
 867 | 
 868 |         /**
 869 |          * Now that we have the top candidate, look through its siblings for content that might also be related.
 870 |          * Things like preambles, content split by ads that we removed, etc.
 871 |         **/
 872 |         var articleContent        = document.createElement("DIV");
 873 |         if (isPaging) {
 874 |             articleContent.id     = "readability-content";
 875 |         }
 876 |         var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2);
 877 |         var siblingNodes          = topCandidate.parentNode.childNodes;
 878 | 
 879 | 
 880 |         for(var s=0, sl=siblingNodes.length; s < sl; s+=1) {
 881 |             var siblingNode = siblingNodes[s];
 882 |             var append      = false;
 883 | 
 884 |             /**
 885 |              * Fix for odd IE7 Crash where siblingNode does not exist even though this should be a live nodeList.
 886 |              * Example of error visible here: http://www.esquire.com/features/honesty0707
 887 |             **/
 888 |             if(!siblingNode) {
 889 |                 continue;
 890 |             }
 891 | 
 892 |             dbg("Looking at sibling node: " + siblingNode + " (" + siblingNode.className + ":" + siblingNode.id + ")" + ((typeof siblingNode.readability !== 'undefined') ? (" with score " + siblingNode.readability.contentScore) : ''));
 893 |             dbg("Sibling has score " + (siblingNode.readability ? siblingNode.readability.contentScore : 'Unknown'));
 894 | 
 895 |             if(siblingNode === topCandidate)
 896 |             {
 897 |                 append = true;
 898 |             }
 899 | 
 900 |             var contentBonus = 0;
 901 |             /* Give a bonus if sibling nodes and top candidates have the example same classname */
 902 |             if(siblingNode.className === topCandidate.className && topCandidate.className !== "") {
 903 |                 contentBonus += topCandidate.readability.contentScore * 0.2;
 904 |             }
 905 | 
 906 |             if(typeof siblingNode.readability !== 'undefined' && (siblingNode.readability.contentScore+contentBonus) >= siblingScoreThreshold)
 907 |             {
 908 |                 append = true;
 909 |             }
 910 |             
 911 |             if(siblingNode.nodeName === "P") {
 912 |                 var linkDensity = readability.getLinkDensity(siblingNode);
 913 |                 var nodeContent = readability.getInnerText(siblingNode);
 914 |                 var nodeLength  = nodeContent.length;
 915 |                 
 916 |                 if(nodeLength > 80 && linkDensity < 0.25)
 917 |                 {
 918 |                     append = true;
 919 |                 }
 920 |                 else if(nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1)
 921 |                 {
 922 |                     append = true;
 923 |                 }
 924 |             }
 925 | 
 926 |             if(append) {
 927 |                 dbg("Appending node: " + siblingNode);
 928 | 
 929 |                 var nodeToAppend = null;
 930 |                 if(siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") {
 931 |                     /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */
 932 |                     
 933 |                     dbg("Altering siblingNode of " + siblingNode.nodeName + ' to div.');
 934 |                     nodeToAppend = document.createElement("DIV");
 935 |                     try {
 936 |                         nodeToAppend.id = siblingNode.id;
 937 |                         nodeToAppend.innerHTML = siblingNode.innerHTML;
 938 |                     }
 939 |                     catch(er) {
 940 |                         dbg("Could not alter siblingNode to div, probably an IE restriction, reverting back to original.");
 941 |                         nodeToAppend = siblingNode;
 942 |                         s-=1;
 943 |                         sl-=1;
 944 |                     }
 945 |                 } else {
 946 |                     nodeToAppend = siblingNode;
 947 |                     s-=1;
 948 |                     sl-=1;
 949 |                 }
 950 |                 
 951 |                 /* To ensure a node does not interfere with readability styles, remove its classnames */
 952 |                 nodeToAppend.className = "";
 953 | 
 954 |                 /* Append sibling and subtract from our list because it removes the node when you append to another node */
 955 |                 articleContent.appendChild(nodeToAppend);
 956 |             }
 957 |         }
 958 | 
 959 |         /**
 960 |          * So we have all of the content that we need. Now we clean it up for presentation.
 961 |         **/
 962 |         readability.prepArticle(articleContent);
 963 | 
 964 |         if (readability.curPageNum === 1) {
 965 |             articleContent.innerHTML = '<div id="readability-page-1" class="page">' + articleContent.innerHTML + '</div>';
 966 |         }
 967 | 
 968 |         /**
 969 |          * Now that we've gone through the full algorithm, check to see if we got any meaningful content.
 970 |          * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
 971 |          * likelihood of finding the content, and the sieve approach gives us a higher likelihood of
 972 |          * finding the -right- content.
 973 |         **/
 974 |         if(readability.getInnerText(articleContent, false).length < 250) {
 975 |         page.innerHTML = pageCacheHtml;
 976 | 
 977 |             if (readability.flagIsActive(readability.FLAG_STRIP_UNLIKELYS)) {
 978 |                 readability.removeFlag(readability.FLAG_STRIP_UNLIKELYS);
 979 |                 return readability.grabArticle(page);
 980 |             }
 981 |             else if (readability.flagIsActive(readability.FLAG_WEIGHT_CLASSES)) {
 982 |                 readability.removeFlag(readability.FLAG_WEIGHT_CLASSES);
 983 |                 return readability.grabArticle(page);
 984 |             }
 985 |             else if (readability.flagIsActive(readability.FLAG_CLEAN_CONDITIONALLY)) {
 986 |                 readability.removeFlag(readability.FLAG_CLEAN_CONDITIONALLY);
 987 |                 return readability.grabArticle(page);
 988 |             } else {
 989 |                 return null;
 990 |             }
 991 |         }
 992 |         
 993 |         return articleContent;
 994 |     },
 995 |     
 996 |     /**
 997 |      * Removes script tags from the document.
 998 |      *
 999 |      * @param Element
1000 |     **/
1001 |     removeScripts: function (doc) {
1002 |         var scripts = doc.getElementsByTagName('script');
1003 |         for(var i = scripts.length-1; i >= 0; i-=1)
1004 |         {
1005 |             if(typeof(scripts[i].src) === "undefined" || (scripts[i].src.indexOf('readability') === -1 && scripts[i].src.indexOf('typekit') === -1))
1006 |             {
1007 |                 scripts[i].nodeValue="";
1008 |                 scripts[i].removeAttribute('src');
1009 |                 if (scripts[i].parentNode) {
1010 |                         scripts[i].parentNode.removeChild(scripts[i]);          
1011 |                 }
1012 |             }
1013 |         }
1014 |     },
1015 |     
1016 |     /**
1017 |      * Get the inner text of a node - cross browser compatibly.
1018 |      * This also strips out any excess whitespace to be found.
1019 |      *
1020 |      * @param Element
1021 |      * @return string
1022 |     **/
1023 |     getInnerText: function (e, normalizeSpaces) {
1024 |         var textContent    = "";
1025 | 
1026 |         if(typeof(e.textContent) === "undefined" && typeof(e.innerText) === "undefined") {
1027 |             return "";
1028 |         }
1029 | 
1030 |         normalizeSpaces = (typeof normalizeSpaces === 'undefined') ? true : normalizeSpaces;
1031 | 
1032 |         if (navigator.appName === "Microsoft Internet Explorer") {
1033 |             textContent = e.innerText.replace( readability.regexps.trim, "" ); }
1034 |         else {
1035 |             textContent = e.textContent.replace( readability.regexps.trim, "" ); }
1036 | 
1037 |         if(normalizeSpaces) {
1038 |             return textContent.replace( readability.regexps.normalize, " "); }
1039 |         else {
1040 |             return textContent; }
1041 |     },
1042 | 
1043 |     /**
1044 |      * Get the number of times a string s appears in the node e.
1045 |      *
1046 |      * @param Element
1047 |      * @param string - what to split on. Default is ","
1048 |      * @return number (integer)
1049 |     **/
1050 |     getCharCount: function (e,s) {
1051 |         s = s || ",";
1052 |         return readability.getInnerText(e).split(s).length-1;
1053 |     },
1054 | 
1055 |     /**
1056 |      * Remove the style attribute on every e and under.
1057 |      * TODO: Test if getElementsByTagName(*) is faster.
1058 |      *
1059 |      * @param Element
1060 |      * @return void
1061 |     **/
1062 |     cleanStyles: function (e) {
1063 |         e = e || document;
1064 |         var cur = e.firstChild;
1065 | 
1066 |         if(!e) {
1067 |             return; }
1068 | 
1069 |         // Remove any root styles, if we're able.
1070 |         if(typeof e.removeAttribute === 'function' && e.className !== 'readability-styled') {
1071 |             e.removeAttribute('style'); }
1072 | 
1073 |         // Go until there are no more child nodes
1074 |         while ( cur !== null ) {
1075 |             if ( cur.nodeType === 1 ) {
1076 |                 // Remove style attribute(s) :
1077 |                 if(cur.className !== "readability-styled") {
1078 |                     cur.removeAttribute("style");                   
1079 |                 }
1080 |                 readability.cleanStyles( cur );
1081 |             }
1082 |             cur = cur.nextSibling;
1083 |         }           
1084 |     },
1085 |     
1086 |     /**
1087 |      * Get the density of links as a percentage of the content
1088 |      * This is the amount of text that is inside a link divided by the total text in the node.
1089 |      * 
1090 |      * @param Element
1091 |      * @return number (float)
1092 |     **/
1093 |     getLinkDensity: function (e) {
1094 |         var links      = e.getElementsByTagName("a");
1095 |         var textLength = readability.getInnerText(e).length;
1096 |         var linkLength = 0;
1097 |         for(var i=0, il=links.length; i<il;i+=1)
1098 |         {
1099 |             linkLength += readability.getInnerText(links[i]).length;
1100 |         }       
1101 | 
1102 |         return linkLength / textLength;
1103 |     },
1104 |     
1105 |     /**
1106 |      * Find a cleaned up version of the current URL, to use for comparing links for possible next-pageyness.
1107 |      *
1108 |      * @author Dan Lacy
1109 |      * @return string the base url
1110 |     **/
1111 |     findBaseUrl: function () {
1112 |         var noUrlParams     = window.location.pathname.split("?")[0],
1113 |             urlSlashes      = noUrlParams.split("/").reverse(),
1114 |             cleanedSegments = [],
1115 |             possibleType    = "";
1116 | 
1117 |         for (var i = 0, slashLen = urlSlashes.length; i < slashLen; i+=1) {
1118 |             var segment = urlSlashes[i];
1119 | 
1120 |             // Split off and save anything that looks like a file type.
1121 |             if (segment.indexOf(".") !== -1) {
1122 |                 possibleType = segment.split(".")[1];
1123 | 
1124 |                 /* If the type isn't alpha-only, it's probably not actually a file extension. */
1125 |                 if(!possibleType.match(/[^a-zA-Z]/)) {
1126 |                     segment = segment.split(".")[0];                    
1127 |                 }
1128 |             }
1129 |             
1130 |             /**
1131 |              * EW-CMS specific segment replacement. Ugly.
1132 |              * Example: http://www.ew.com/ew/article/0,,20313460_20369436,00.html
1133 |             **/
1134 |             if(segment.indexOf(',00') !== -1) {
1135 |                 segment = segment.replace(',00', '');
1136 |             }
1137 | 
1138 |             // If our first or second segment has anything looking like a page number, remove it.
1139 |             if (segment.match(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i) && ((i === 1) || (i === 0))) {
1140 |                 segment = segment.replace(/((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i, "");
1141 |             }
1142 | 
1143 | 
1144 |             var del = false;
1145 | 
1146 |             /* If this is purely a number, and it's the first or second segment, it's probably a page number. Remove it. */
1147 |             if (i < 2 && segment.match(/^\d{1,2}$/)) {
1148 |                 del = true;
1149 |             }
1150 |             
1151 |             /* If this is the first segment and it's just "index", remove it. */
1152 |             if(i === 0 && segment.toLowerCase() === "index") {
1153 |                 del = true;
1154 |             }
1155 | 
1156 |             /* If our first or second segment is smaller than 3 characters, and the first segment was purely alphas, remove it. */
1157 |             if(i < 2 && segment.length < 3 && !urlSlashes[0].match(/[a-z]/i)) {
1158 |                 del = true;
1159 |             }
1160 | 
1161 |             /* If it's not marked for deletion, push it to cleanedSegments. */
1162 |             if (!del) {
1163 |                 cleanedSegments.push(segment);
1164 |             }
1165 |         }
1166 | 
1167 |         // This is our final, cleaned, base article URL.
1168 |         return window.location.protocol + "//" + window.location.host + cleanedSegments.reverse().join("/");
1169 |     },
1170 | 
1171 |     /**
1172 |      * Look for any paging links that may occur within the document.
1173 |      * 
1174 |      * @param body
1175 |      * @return object (array)
1176 |     **/
1177 |     findNextPageLink: function (elem) {
1178 |         var possiblePages = {},
1179 |             allLinks = elem.getElementsByTagName('a'),
1180 |             articleBaseUrl = readability.findBaseUrl();
1181 | 
1182 |         /**
1183 |          * Loop through all links, looking for hints that they may be next-page links.
1184 |          * Things like having "page" in their textContent, className or id, or being a child
1185 |          * of a node with a page-y className or id.
1186 |          *
1187 |          * Also possible: levenshtein distance? longest common subsequence?
1188 |          *
1189 |          * After we do that, assign each page a score, and 
1190 |         **/
1191 |         for(var i = 0, il = allLinks.length; i < il; i+=1) {
1192 |             var link     = allLinks[i],
1193 |                 linkHref = allLinks[i].href.replace(/#.*$/, '').replace(/\/$/, '');
1194 | 
1195 |             /* If we've already seen this page, ignore it */
1196 |             if(linkHref === "" || linkHref === articleBaseUrl || linkHref === window.location.href || linkHref in readability.parsedPages) {
1197 |                 continue;
1198 |             }
1199 |             
1200 |             /* If it's on a different domain, skip it. */
1201 |             if(window.location.host !== linkHref.split(/\/+/g)[1]) {
1202 |                 continue;
1203 |             }
1204 |             
1205 |             var linkText = readability.getInnerText(link);
1206 | 
1207 |             /* If the linkText looks like it's not the next page, skip it. */
1208 |             if(linkText.match(readability.regexps.extraneous) || linkText.length > 25) {
1209 |                 continue;
1210 |             }
1211 | 
1212 |             /* If the leftovers of the URL after removing the base URL don't contain any digits, it's certainly not a next page link. */
1213 |             var linkHrefLeftover = linkHref.replace(articleBaseUrl, '');
1214 |             if(!linkHrefLeftover.match(/\d/)) {
1215 |                 continue;
1216 |             }
1217 |             
1218 |             if(!(linkHref in possiblePages)) {
1219 |                 possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref};             
1220 |             } else {
1221 |                 possiblePages[linkHref].linkText += ' | ' + linkText;
1222 |             }
1223 | 
1224 |             var linkObj = possiblePages[linkHref];
1225 | 
1226 |             /**
1227 |              * If the articleBaseUrl isn't part of this URL, penalize this link. It could still be the link, but the odds are lower.
1228 |              * Example: http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
1229 |             **/
1230 |             if(linkHref.indexOf(articleBaseUrl) !== 0) {
1231 |                 linkObj.score -= 25;
1232 |             }
1233 | 
1234 |             var linkData = linkText + ' ' + link.className + ' ' + link.id;
1235 |             if(linkData.match(readability.regexps.nextLink)) {
1236 |                 linkObj.score += 50;
1237 |             }
1238 |             if(linkData.match(/pag(e|ing|inat)/i)) {
1239 |                 linkObj.score += 25;
1240 |             }
1241 |             if(linkData.match(/(first|last)/i)) { // -65 is enough to negate any bonuses gotten from a > or » in the text, 
1242 |                 /* If we already matched on "next", last is probably fine. If we didn't, then it's bad. Penalize. */
1243 |                 if(!linkObj.linkText.match(readability.regexps.nextLink)) {
1244 |                     linkObj.score -= 65;
1245 |                 }
1246 |             }
1247 |             if(linkData.match(readability.regexps.negative) || linkData.match(readability.regexps.extraneous)) {
1248 |                 linkObj.score -= 50;
1249 |             }
1250 |             if(linkData.match(readability.regexps.prevLink)) {
1251 |                 linkObj.score -= 200;
1252 |             }
1253 | 
1254 |             /* If a parentNode contains page or paging or paginat */
1255 |             var parentNode = link.parentNode,
1256 |                 positiveNodeMatch = false,
1257 |                 negativeNodeMatch = false;
1258 |             while(parentNode) {
1259 |                 var parentNodeClassAndId = parentNode.className + ' ' + parentNode.id;
1260 |                 if(!positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(/pag(e|ing|inat)/i)) {
1261 |                     positiveNodeMatch = true;
1262 |                     linkObj.score += 25;
1263 |                 }
1264 |                 if(!negativeNodeMatch && parentNodeClassAndId && parentNodeClassAndId.match(readability.regexps.negative)) {
1265 |                     /* If this is just something like "footer", give it a negative. If it's something like "body-and-footer", leave it be. */
1266 |                     if(!parentNodeClassAndId.match(readability.regexps.positive)) {
1267 |                         linkObj.score -= 25;
1268 |                         negativeNodeMatch = true;                       
1269 |                     }
1270 |                 }
1271 |                 
1272 |                 parentNode = parentNode.parentNode;
1273 |             }
1274 | 
1275 |             /**
1276 |              * If the URL looks like it has paging in it, add to the score.
1277 |              * Things like /page/2/, /pagenum/2, ?p=3, ?page=11, ?pagination=34
1278 |             **/
1279 |             if (linkHref.match(/p(a|g|ag)?(e|ing|ination)?(=|\/)[0-9]{1,2}/i) || linkHref.match(/(page|paging)/i)) {
1280 |                 linkObj.score += 25;
1281 |             }
1282 | 
1283 |             /* If the URL contains negative values, give a slight decrease. */
1284 |             if (linkHref.match(readability.regexps.extraneous)) {
1285 |                 linkObj.score -= 15;
1286 |             }
1287 | 
1288 |             /**
1289 |              * Minor punishment to anything that doesn't match our current URL.
1290 |              * NOTE: I'm finding this to cause more harm than good where something is exactly 50 points.
1291 |              *       Dan, can you show me a counterexample where this is necessary?
1292 |              * if (linkHref.indexOf(window.location.href) !== 0) {
1293 |              *    linkObj.score -= 1;
1294 |              * }
1295 |             **/
1296 | 
1297 |             /**
1298 |              * If the link text can be parsed as a number, give it a minor bonus, with a slight
1299 |              * bias towards lower numbered pages. This is so that pages that might not have 'next'
1300 |              * in their text can still get scored, and sorted properly by score.
1301 |             **/
1302 |             var linkTextAsNumber = parseInt(linkText, 10);
1303 |             if(linkTextAsNumber) {
1304 |                 // Punish 1 since we're either already there, or it's probably before what we want anyways.
1305 |                 if (linkTextAsNumber === 1) {
1306 |                     linkObj.score -= 10;
1307 |                 }
1308 |                 else {
1309 |                     // Todo: Describe this better
1310 |                     linkObj.score += Math.max(0, 10 - linkTextAsNumber);
1311 |                 }
1312 |             }
1313 |         }
1314 | 
1315 |         /**
1316 |          * Loop thrugh all of our possible pages from above and find our top candidate for the next page URL.
1317 |          * Require at least a score of 50, which is a relatively high confidence that this page is the next link.
1318 |         **/
1319 |         var topPage = null;
1320 |         for(var page in possiblePages) {
1321 |             if(possiblePages.hasOwnProperty(page)) {
1322 |                 if(possiblePages[page].score >= 50 && (!topPage || topPage.score < possiblePages[page].score)) {
1323 |                     topPage = possiblePages[page];
1324 |                 }
1325 |             }
1326 |         }
1327 | 
1328 |         if(topPage) {
1329 |             var nextHref = topPage.href.replace(/\/$/,'');
1330 | 
1331 |             dbg('NEXT PAGE IS ' + nextHref);
1332 |             readability.parsedPages[nextHref] = true;
1333 |             return nextHref;            
1334 |         }
1335 |         else {
1336 |             return null;
1337 |         }
1338 |     },
1339 | 
1340 |     /**
1341 |      * Build a simple cross browser compatible XHR.
1342 |      *
1343 |      * TODO: This could likely be simplified beyond what we have here right now. There's still a bit of excess junk.
1344 |     **/
1345 |     xhr: function () {
1346 |         if (typeof XMLHttpRequest !== 'undefined' && (window.location.protocol !== 'file:' || !window.ActiveXObject)) {
1347 |             return new XMLHttpRequest();
1348 |         }
1349 |         else {
1350 |             try { return new ActiveXObject('Msxml2.XMLHTTP.6.0'); } catch(sixerr) { }
1351 |             try { return new ActiveXObject('Msxml2.XMLHTTP.3.0'); } catch(threrr) { }
1352 |             try { return new ActiveXObject('Msxml2.XMLHTTP'); } catch(err) { }
1353 |         }
1354 | 
1355 |         return false;
1356 |     },
1357 | 
1358 |     successfulRequest: function (request) {
1359 |         return (request.status >= 200 && request.status < 300) || request.status === 304 || (request.status === 0 && request.responseText);
1360 |     },
1361 | 
1362 |     ajax: function (url, options) {
1363 |         var request = readability.xhr();
1364 | 
1365 |         function respondToReadyState(readyState) {
1366 |             if (request.readyState === 4) {
1367 |                 if (readability.successfulRequest(request)) {
1368 |                     if (options.success) { options.success(request); }
1369 |                 }
1370 |                 else {
1371 |                     if (options.error) { options.error(request); }
1372 |                 }
1373 |             }
1374 |         }
1375 | 
1376 |         if (typeof options === 'undefined') { options = {}; }
1377 | 
1378 |         request.onreadystatechange = respondToReadyState;
1379 |         
1380 |         request.open('get', url, true);
1381 |         request.setRequestHeader('Accept', 'text/html');
1382 | 
1383 |         try {
1384 |             request.send(options.postBody);
1385 |         }
1386 |         catch (e) {
1387 |             if (options.error) { options.error(); }
1388 |         }
1389 | 
1390 |         return request;
1391 |     },
1392 | 
1393 |     /**
1394 |      * Make an AJAX request for each page and append it to the document.
1395 |     **/
1396 |     curPageNum: 1,
1397 | 
1398 |     appendNextPage: function (nextPageLink) {
1399 |         readability.curPageNum+=1;
1400 | 
1401 |         var articlePage       = document.createElement("DIV");
1402 |         articlePage.id        = 'readability-page-' + readability.curPageNum;
1403 |         articlePage.className = 'page';
1404 |         articlePage.innerHTML = '<p class="page-separator" title="Page ' + readability.curPageNum + '">&sect;</p>';
1405 | 
1406 |         document.getElementById("readability-content").appendChild(articlePage);
1407 | 
1408 |         if(readability.curPageNum > readability.maxPages) {
1409 |             var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>";
1410 | 
1411 |             articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup;
1412 |             return;
1413 |         }
1414 |         
1415 |         /**
1416 |          * Now that we've built the article page DOM element, get the page content
1417 |          * asynchronously and load the cleaned content into the div we created for it.
1418 |         **/
1419 |         (function(pageUrl, thisPage) {
1420 |             readability.ajax(pageUrl, {
1421 |                 success: function(r) {
1422 | 
1423 |                     /* First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page. */
1424 |                     var eTag = r.getResponseHeader('ETag');
1425 |                     if(eTag) {
1426 |                         if(eTag in readability.pageETags) {
1427 |                             dbg("Exact duplicate page found via ETag. Aborting.");
1428 |                             articlePage.style.display = 'none';
1429 |                             return;
1430 |                         } else {
1431 |                             readability.pageETags[eTag] = 1;
1432 |                         }                       
1433 |                     }
1434 | 
1435 |                     // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away.
1436 |                     var page = document.createElement("DIV");
1437 | 
1438 |                     /**
1439 |                      * Do some preprocessing to our HTML to make it ready for appending.
1440 |                      * • Remove any script tags. Swap and reswap newlines with a unicode character because multiline regex doesn't work in javascript.
1441 |                      * • Turn any noscript tags into divs so that we can parse them. This allows us to find any next page links hidden via javascript.
1442 |                      * • Turn all double br's into p's - was handled by prepDocument in the original view.
1443 |                      *   Maybe in the future abstract out prepDocument to work for both the original document and AJAX-added pages.
1444 |                     **/
1445 |                     var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/<script.*?>.*?<\/script>/gi, '');
1446 |                     responseHtml = responseHtml.replace(/\n/g,'\uffff').replace(/<script.*?>.*?<\/script>/gi, '');
1447 |                     responseHtml = responseHtml.replace(/\uffff/g,'\n').replace(/<(\/?)noscript/gi, '<$1div');
1448 |                     responseHtml = responseHtml.replace(readability.regexps.replaceBrs, '</p><p>');
1449 |                     responseHtml = responseHtml.replace(readability.regexps.replaceFonts, '<$1span>');
1450 |                     
1451 |                     page.innerHTML = responseHtml;
1452 | 
1453 |                     /**
1454 |                      * Reset all flags for the next page, as they will search through it and disable as necessary at the end of grabArticle.
1455 |                     **/
1456 |                     readability.flags = 0x1 | 0x2 | 0x4;
1457 | 
1458 |                     var nextPageLink = readability.findNextPageLink(page),
1459 |                         content      =  readability.grabArticle(page);
1460 | 
1461 |                     if(!content) {
1462 |                         dbg("No content found in page to append. Aborting.");
1463 |                         return;
1464 |                     }
1465 | 
1466 |                     /**
1467 |                      * Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
1468 |                      * Compare it against all of the the previous document's we've gotten. If the previous
1469 |                      * document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
1470 |                     **/
1471 |                     var firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null;
1472 |                     if(firstP && firstP.innerHTML.length > 100) {
1473 |                         for(var i=1; i <= readability.curPageNum; i+=1) {
1474 |                             var rPage = document.getElementById('readability-page-' + i);
1475 |                             if(rPage && rPage.innerHTML.indexOf(firstP.innerHTML) !== -1) {
1476 |                                 dbg('Duplicate of page ' + i + ' - skipping.');
1477 |                                 articlePage.style.display = 'none';
1478 |                                 readability.parsedPages[pageUrl] = true;
1479 |                                 return;
1480 |                             }
1481 |                         }
1482 |                     }
1483 |                     
1484 |                     readability.removeScripts(content);
1485 | 
1486 |                     thisPage.innerHTML = thisPage.innerHTML + content.innerHTML;
1487 | 
1488 |                     /**
1489 |                      * After the page has rendered, post process the content. This delay is necessary because,
1490 |                      * in webkit at least, offsetWidth is not set in time to determine image width. We have to
1491 |                      * wait a little bit for reflow to finish before we can fix floating images.
1492 |                     **/
1493 |                     window.setTimeout(
1494 |                         function() { readability.postProcessContent(thisPage); },
1495 |                         500
1496 |                     );
1497 | 
1498 |                     if(nextPageLink) {
1499 |                         readability.appendNextPage(nextPageLink);
1500 |                     }
1501 |                 }
1502 |             });
1503 |         }(nextPageLink, articlePage));
1504 |     },
1505 |     
1506 |     /**
1507 |      * Get an elements class/id weight. Uses regular expressions to tell if this 
1508 |      * element looks good or bad.
1509 |      *
1510 |      * @param Element
1511 |      * @return number (Integer)
1512 |     **/
1513 |     getClassWeight: function (e) {
1514 |         if(!readability.flagIsActive(readability.FLAG_WEIGHT_CLASSES)) {
1515 |             return 0;
1516 |         }
1517 | 
1518 |         var weight = 0;
1519 | 
1520 |         /* Look for a special classname */
1521 |         if (typeof(e.className) === 'string' && e.className !== '')
1522 |         {
1523 |             if(e.className.search(readability.regexps.negative) !== -1) {
1524 |                 weight -= 25; }
1525 | 
1526 |             if(e.className.search(readability.regexps.positive) !== -1) {
1527 |                 weight += 25; }
1528 |         }
1529 | 
1530 |         /* Look for a special ID */
1531 |         if (typeof(e.id) === 'string' && e.id !== '')
1532 |         {
1533 |             if(e.id.search(readability.regexps.negative) !== -1) {
1534 |                 weight -= 25; }
1535 | 
1536 |             if(e.id.search(readability.regexps.positive) !== -1) {
1537 |                 weight += 25; }
1538 |         }
1539 | 
1540 |         return weight;
1541 |     },
1542 | 
1543 |     nodeIsVisible: function (node) {
1544 |         return (node.offsetWidth !== 0 || node.offsetHeight !== 0) && node.style.display.toLowerCase() !== 'none';
1545 |     },
1546 | 
1547 |     /**
1548 |      * Remove extraneous break tags from a node.
1549 |      *
1550 |      * @param Element
1551 |      * @return void
1552 |      **/
1553 |     killBreaks: function (e) {
1554 |         try {
1555 |             e.innerHTML = e.innerHTML.replace(readability.regexps.killBreaks,'<br />');       
1556 |         }
1557 |         catch (eBreaks) {
1558 |             dbg("KillBreaks failed - this is an IE bug. Ignoring.: " + eBreaks);
1559 |         }
1560 |     },
1561 | 
1562 |     /**
1563 |      * Clean a node of all elements of type "tag".
1564 |      * (Unless it's a youtube/vimeo video. People love movies.)
1565 |      *
1566 |      * @param Element
1567 |      * @param string tag to clean
1568 |      * @return void
1569 |      **/
1570 |     clean: function (e, tag) {
1571 |         var targetList = e.getElementsByTagName( tag );
1572 |         var isEmbed    = (tag === 'object' || tag === 'embed');
1573 |         
1574 |         for (var y=targetList.length-1; y >= 0; y-=1) {
1575 |             /* Allow youtube and vimeo videos through as people usually want to see those. */
1576 |             if(isEmbed) {
1577 |                 var attributeValues = "";
1578 |                 for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) {
1579 |                     attributeValues += targetList[y].attributes[i].value + '|';
1580 |                 }
1581 |                 
1582 |                 /* First, check the elements attributes to see if any of them contain youtube or vimeo */
1583 |                 if (attributeValues.search(readability.regexps.videos) !== -1) {
1584 |                     continue;
1585 |                 }
1586 | 
1587 |                 /* Then check the elements inside this element for the same. */
1588 |                 if (targetList[y].innerHTML.search(readability.regexps.videos) !== -1) {
1589 |                     continue;
1590 |                 }
1591 |                 
1592 |             }
1593 | 
1594 |             targetList[y].parentNode.removeChild(targetList[y]);
1595 |         }
1596 |     },
1597 |     
1598 |     /**
1599 |      * Clean an element of all tags of type "tag" if they look fishy.
1600 |      * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
1601 |      *
1602 |      * @return void
1603 |      **/
1604 |     cleanConditionally: function (e, tag) {
1605 | 
1606 |         if(!readability.flagIsActive(readability.FLAG_CLEAN_CONDITIONALLY)) {
1607 |             return;
1608 |         }
1609 | 
1610 |         var tagsList      = e.getElementsByTagName(tag);
1611 |         var curTagsLength = tagsList.length;
1612 | 
1613 |         /**
1614 |          * Gather counts for other typical elements embedded within.
1615 |          * Traverse backwards so we can remove nodes at the same time without effecting the traversal.
1616 |          *
1617 |          * TODO: Consider taking into account original contentScore here.
1618 |         **/
1619 |         for (var i=curTagsLength-1; i >= 0; i-=1) {
1620 |             var weight = readability.getClassWeight(tagsList[i]);
1621 |             var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0;
1622 |             
1623 |             dbg("Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")" + ((typeof tagsList[i].readability !== 'undefined') ? (" with score " + tagsList[i].readability.contentScore) : ''));
1624 | 
1625 |             if(weight+contentScore < 0)
1626 |             {
1627 |                 tagsList[i].parentNode.removeChild(tagsList[i]);
1628 |             }
1629 |             else if ( readability.getCharCount(tagsList[i],',') < 10) {
1630 |                 /**
1631 |                  * If there are not very many commas, and the number of
1632 |                  * non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
1633 |                 **/
1634 |                 var p      = tagsList[i].getElementsByTagName("p").length;
1635 |                 var img    = tagsList[i].getElementsByTagName("img").length;
1636 |                 var li     = tagsList[i].getElementsByTagName("li").length-100;
1637 |                 var input  = tagsList[i].getElementsByTagName("input").length;
1638 | 
1639 |                 var embedCount = 0;
1640 |                 var embeds     = tagsList[i].getElementsByTagName("embed");
1641 |                 for(var ei=0,il=embeds.length; ei < il; ei+=1) {
1642 |                     if (embeds[ei].src.search(readability.regexps.videos) === -1) {
1643 |                       embedCount+=1; 
1644 |                     }
1645 |                 }
1646 | 
1647 |                 var linkDensity   = readability.getLinkDensity(tagsList[i]);
1648 |                 var contentLength = readability.getInnerText(tagsList[i]).length;
1649 |                 var toRemove      = false;
1650 | 
1651 |                 if ( img > p ) {
1652 |                     toRemove = true;
1653 |                 } else if(li > p && tag !== "ul" && tag !== "ol") {
1654 |                     toRemove = true;
1655 |                 } else if( input > Math.floor(p/3) ) {
1656 |                     toRemove = true; 
1657 |                 } else if(contentLength < 25 && (img === 0 || img > 2) ) {
1658 |                     toRemove = true;
1659 |                 } else if(weight < 25 && linkDensity > 0.2) {
1660 |                     toRemove = true;
1661 |                 } else if(weight >= 25 && linkDensity > 0.5) {
1662 |                     toRemove = true;
1663 |                 } else if((embedCount === 1 && contentLength < 75) || embedCount > 1) {
1664 |                     toRemove = true;
1665 |                 }
1666 | 
1667 |                 if(toRemove) {
1668 |                     tagsList[i].parentNode.removeChild(tagsList[i]);
1669 |                 }
1670 |             }
1671 |         }
1672 |     },
1673 | 
1674 |     /**
1675 |      * Clean out spurious headers from an Element. Checks things like classnames and link density.
1676 |      *
1677 |      * @param Element
1678 |      * @return void
1679 |     **/
1680 |     cleanHeaders: function (e) {
1681 |         for (var headerIndex = 1; headerIndex < 3; headerIndex+=1) {
1682 |             var headers = e.getElementsByTagName('h' + headerIndex);
1683 |             for (var i=headers.length-1; i >=0; i-=1) {
1684 |                 if (readability.getClassWeight(headers[i]) < 0 || readability.getLinkDensity(headers[i]) > 0.33) {
1685 |                     headers[i].parentNode.removeChild(headers[i]);
1686 |                 }
1687 |             }
1688 |         }
1689 |     },
1690 | 
1691 |     /*** Smooth scrolling logic ***/
1692 |     
1693 |     /**
1694 |      * easeInOut animation algorithm - returns an integer that says how far to move at this point in the animation.
1695 |      * Borrowed from jQuery's easing library.
1696 |      * @return integer
1697 |     **/
1698 |     easeInOut: function(start,end,totalSteps,actualStep) { 
1699 |         var delta = end - start; 
1700 | 
1701 |         if ((actualStep/=totalSteps/2) < 1) { 
1702 |             return delta/2*actualStep*actualStep + start;
1703 |         }
1704 |         actualStep -=1;
1705 |         return -delta/2 * ((actualStep)*(actualStep-2) - 1) + start;
1706 |     },
1707 |     
1708 |     /**
1709 |      * Helper function to, in a cross compatible way, get or set the current scroll offset of the document.
1710 |      * @return mixed integer on get, the result of window.scrollTo on set
1711 |     **/
1712 |     scrollTop: function(scroll){
1713 |         var setScroll = typeof scroll !== 'undefined';
1714 | 
1715 |         if(setScroll) {
1716 |             return window.scrollTo(0, scroll);
1717 |         }
1718 |         if(typeof window.pageYOffset !== 'undefined') {
1719 |             return window.pageYOffset;
1720 |         }
1721 |         else if(document.documentElement.clientHeight) {
1722 |             return document.documentElement.scrollTop;
1723 |         }
1724 |         else {
1725 |             return document.body.scrollTop;
1726 |         }
1727 |     },
1728 |     
1729 |     /**
1730 |      * scrollTo - Smooth scroll to the point of scrollEnd in the document.
1731 |      * @return void
1732 |     **/
1733 |     curScrollStep: 0,
1734 |     scrollTo: function (scrollStart, scrollEnd, steps, interval) {
1735 |         if(
1736 |             (scrollStart < scrollEnd && readability.scrollTop() < scrollEnd) ||
1737 |             (scrollStart > scrollEnd && readability.scrollTop() > scrollEnd)
1738 |           ) {
1739 |             readability.curScrollStep+=1;
1740 |             if(readability.curScrollStep > steps) {
1741 |                 return;
1742 |             }
1743 | 
1744 |             var oldScrollTop = readability.scrollTop();
1745 |             
1746 |             readability.scrollTop(readability.easeInOut(scrollStart, scrollEnd, steps, readability.curScrollStep));
1747 | 
1748 |             // We're at the end of the window.
1749 |             if(oldScrollTop === readability.scrollTop()) {
1750 |                 return;
1751 |             }
1752 | 
1753 |             window.setTimeout(function() {
1754 |                 readability.scrollTo(scrollStart, scrollEnd, steps, interval);
1755 |             }, interval);
1756 |         }
1757 |     },
1758 | 
1759 |     
1760 |     /**
1761 |      * Show the email popup.
1762 |      *
1763 |      * @return void
1764 |      **/
1765 |     emailBox: function () {
1766 |         var emailContainerExists = document.getElementById('email-container');
1767 |         if(null !== emailContainerExists)
1768 |         {
1769 |             return;
1770 |         }
1771 | 
1772 |         var emailContainer = document.createElement("DIV");
1773 |         emailContainer.setAttribute('id', 'email-container');
1774 |         emailContainer.innerHTML = '<iframe src="'+readability.emailSrc + '?pageUrl='+encodeURIComponent(window.location)+'&pageTitle='+encodeURIComponent(document.title)+'" scrolling="no" onload="readability.removeFrame()" style="width:500px; height: 490px; border: 0;"></iframe>';
1775 | 
1776 |         document.body.appendChild(emailContainer);          
1777 |     },
1778 |     
1779 |     /**
1780 |      * Close the email popup. This is a hacktackular way to check if we're in a "close loop".
1781 |      * Since we don't have crossdomain access to the frame, we can only know when it has
1782 |      * loaded again. If it's loaded over 3 times, we know to close the frame.
1783 |      *
1784 |      * @return void
1785 |      **/
1786 |     removeFrame: function () {
1787 |         readability.iframeLoads+=1;
1788 |         if (readability.iframeLoads > 3)
1789 |         {
1790 |             var emailContainer = document.getElementById('email-container');
1791 |             if (null !== emailContainer) {
1792 |                 emailContainer.parentNode.removeChild(emailContainer);
1793 |             }
1794 | 
1795 |             readability.iframeLoads = 0;
1796 |         }           
1797 |     },
1798 |     
1799 |     htmlspecialchars: function (s) {
1800 |         if (typeof(s) === "string") {
1801 |             s = s.replace(/&/g, "&amp;");
1802 |             s = s.replace(/"/g, "&quot;");
1803 |             s = s.replace(/'/g, "&#039;");
1804 |             s = s.replace(/</g, "&lt;");
1805 |             s = s.replace(/>/g, "&gt;");
1806 |         }
1807 |     
1808 |         return s;
1809 |     },
1810 | 
1811 |     flagIsActive: function(flag) {
1812 |         return (readability.flags & flag) > 0;
1813 |     },
1814 |     
1815 |     addFlag: function(flag) {
1816 |         readability.flags = readability.flags | flag;
1817 |     },
1818 |     
1819 |     removeFlag: function(flag) {
1820 |         readability.flags = readability.flags & ~flag;
1821 |     }
1822 |     
1823 | };
1824 | 
1825 | readability.init();
1826 | 


--------------------------------------------------------------------------------