├── .gitignore
├── package.sh
├── misc
├── readability-module.js
├── readability-feedflock.js
└── readability-ori.js
├── test.txt
├── LICENSE.txt
├── test
├── bugs
│ ├── htmlparser.js
│ └── jsdom-bug.js
├── clean-file.js
├── grab-pages.rb
├── clean-proxy.js
├── weird-pages
│ └── w3c-css-no-closing-head.html
└── nytime.html
├── README.md
├── package.json
├── notes.txt
└── lib
└── sprintf.js
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | log/*.log
3 | dist/*
4 | *.tmproj
5 |
6 |
--------------------------------------------------------------------------------
/package.sh:
--------------------------------------------------------------------------------
1 | NAME=node-readability
2 | tar -zcf ./dist/readability.tgz -C .. --exclude=".*" --exclude="test*" $NAME/lib $NAME/LICENSE.txt $NAME/README.md $NAME/package.json
--------------------------------------------------------------------------------
/misc/readability-module.js:
--------------------------------------------------------------------------------
1 | exports.parse = parse;
2 | var jsdom = require('jsdom');
3 | var rdom = require('./readability-my2.js');
4 | var util = require('util');
5 |
6 | function parse(html, url, callback) {
7 | //util.debug(html);
8 | var doc = jsdom.jsdom(html, null, {url: url});
9 | util.log('---DOM created');
10 | var win = doc.parentWindow;
11 | if (!doc.body) {
12 | console.log('empty body');
13 | return callback({title: '', content: ''});
14 | }
15 |
16 | rdom.start(win, function(html) {
17 | //console.log(html);
18 | callback({title: document.title, content: html});
19 | });
20 | }
--------------------------------------------------------------------------------
/test.txt:
--------------------------------------------------------------------------------
1 | http://127.0.0.1:3000/?url=http%3A%2F%2Fwww.bbc.co.uk%2Fukchina%2Fsimp%2Fentertainment%2F2010%2F11%2F101103_ent_harrypotter.shtml
2 |
3 | http://127.0.0.1:3000/?url=http://en.wikipedia.org/wiki/Ruby
4 | http://127.0.0.1:3000/?url=http://buzz.blogger.com/2010/10/safe-browsing-on-blogger.html
5 | http://127.0.0.1:3000/?url=http://www.ifanr.com/24614
6 | http://127.0.0.1:3000/?url=http://www.boston.com/news/politics/articles/2010/11/03/patrick_roars_to_a_2d_term/
7 |
8 |
9 | problems:
10 | slow
11 | http://127.0.0.1:3000/?url=http://www.gazeta.ru/news/lastnews/
12 | http://127.0.0.1:3000/?url=http://www.sqlite.org/fts3.html
13 | http://127.0.0.1:3000/?url=http://news.google.com.hk/nwshp?hl=zh-tw&tab=in
14 |
15 | returned html cannot be parsed by browser
16 | http://blog.zacharyvoase.com/2010/11/11/sockets-and-nodes-i/
17 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2010 Arrix Zhou
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/test/bugs/htmlparser.js:
--------------------------------------------------------------------------------
1 | var request = require('request');
2 | var jsdom = require('jsdom');
3 |
4 | var url = 'http://www.w3.org/TR/css3-2d-transforms/';
5 | request({uri:url}, function (error, response, body) {
6 | var html = body;
7 | var doc = jsdom.jsdom(html, null, {url: url});
8 | console.log(doc.head+''); //[ HEAD ]
9 | console.log(doc.body === null); //true
10 | console.log(doc.head.childNodes[9].tagName); //BODY
11 | });
12 |
13 | var doc = jsdom.jsdom(html, null, {url: ''});
14 |
15 |
16 | var HTML5 = require('html5');
17 | var fs = require('fs');
18 | var content = fs.readFileSync('test/css.html', 'utf-8');
19 | var html = content;
20 | var jsdom = require('jsdom');
21 | var browser = jsdom.browserAugmentation(jsdom.defaultLevel);
22 |
23 | var doc = new browser.HTMLDocument();
24 | var parser = new HTML5.Parser({document: doc});
25 | parser.parse(html);
26 |
27 | var doc2 = jsdom.jsdom(html, null, {parser: HTML5});
28 |
29 |
30 |
31 |
32 | var htmlparser = require("htmlparser");
33 | var handler = new htmlparser.DefaultHandler(function (error, dom) {
34 |
35 | });
36 | var parser = new htmlparser.Parser(handler);
37 | parser.parseComplete(html);
38 | sys.puts(sys.inspect(handler.dom, false, null));
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # node-readability
2 | [Readability.js by Arc90](http://lab.arc90.com/experiments/readability/) ported to node.js.
3 |
4 | Blog post: [Server side readability with node.js](http://arrix.blogspot.com/2010/11/server-side-readability-with-nodejs.html)
5 | ## Requirements
6 | * [node.js](http://nodejs.org/)
7 | * [jsdom](https://github.com/tmpvar/jsdom)
8 | * [htmlparser](https://github.com/tautologistics/node-htmlparser)
9 |
10 | ## Live demo
11 | I'm working on it...
12 | ## Example
13 |
14 | var readability = require('readability');
15 | //...
16 | // This is an very early example. The API is subject to change.
17 | readability.parse(html, url, function(result) {
18 | console.log(result.title, result.content);
19 | });
20 |
21 | ## Performance
22 | In my testing of 140 pages with an average size of **58KB** collected from [digg](http://digg.com/news.rss), [delicious](http://feeds.delicious.com/v2/rss/?count=50) and [hacker news](http://news.ycombinator.com/rss), the average time taken for each page is about **1.1 seconds** on a Mac Mini (2.4G Intel Core 2 Duo).
23 | ## Limitation
24 | * no fetching next pages
25 | * no support for frames
26 |
27 | ## Plan
28 | * Performance optimization
29 | * Better API, more options
30 | * Support more readability features
--------------------------------------------------------------------------------
/test/bugs/jsdom-bug.js:
--------------------------------------------------------------------------------
1 | // jsdom bug: Live NodeList isn't updated after DOM manipulation
2 | // node.js v0.2.4
3 | // jsdom@0.1.20
4 | // https://github.com/tmpvar/jsdom/issues/#issue/77
5 |
6 | var jsdom = require('jsdom');
7 | var html = '
';
8 | var window = jsdom.jsdom(html).createWindow();
9 | var document = window.document;
10 |
11 | var all = document.getElementsByTagName('*');
12 | var i = 2;
13 | var node = all[i];
14 | console.log(''+node); //P#p1
15 | node.parentNode.removeChild(node);
16 |
17 | console.log(''+all[i]); //still P#p1. the live NodeList wasn't updated properly
18 | all.length; //trigger a refresh. the length getter calls update()
19 | console.log(''+all[i]); //P#p2 OK
20 |
21 |
22 | // innerHTML = '' doesn't removed all children
23 | // https://github.com/tmpvar/jsdom/issues/#issue/80
24 | (function() {
25 | var jsdom = require('jsdom');
26 | var html = '';
27 | var doc = jsdom.jsdom(html);
28 | var win = doc.createWindow();
29 | var b = doc.body;
30 | b.innerHTML = '';
31 | console.log(b.innerHTML); //
32 |
33 | var arr = [0, 1, 2, 3, 4, 5];
34 | arr.forEach(function(v, i) {
35 | console.log('[', i, '] ==', v);
36 | arr.splice(i, 1);
37 | });
38 | // output
39 | // [ 0 ] == 0
40 | // [ 1 ] == 2
41 | // [ 2 ] == 4
42 |
43 | })();
44 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "readability",
3 | "version": "0.1.0",
4 | "description": "Arc90's readability.js adapted to node.js",
5 | "keywords": [
6 | "readability"
7 | ],
8 | "maintainers": [
9 | {
10 | "name": "Arrix",
11 | "email": "arrixzhou@gmail.com",
12 | "web": "http://arrix.blogspot.com"
13 | }
14 | ],
15 | "contributors": [
16 | {
17 | "name": "Arrix",
18 | "email": "arrixzhou@gmail.com",
19 | "web": "http://arrix.blogspot.com"
20 | },
21 | {
22 | "name": "Vincent Cao",
23 | "email": "caojunvincent@gmail.com"
24 | }
25 | ],
26 | "bugs": {
27 | "mail": "arrixzhou@gmail.com",
28 | "web": "http://github.com/arrix/node-readability/issues"
29 | },
30 | "licenses": [
31 | {
32 | "type": "MIT",
33 | "url": "http://github.com/arrix/node-readability/LICENSE.txt"
34 | }
35 | ],
36 | "repositories": [
37 | {
38 | "type": "git",
39 | "url": "http://github.com/arrix/node-readability.git"
40 | }
41 | ],
42 | "dependencies": {
43 | "mjsunit.runner": ">=0.1.0",
44 | "jsdom": ">=0.1.21",
45 | "htmlparser": ">=1.7.3"
46 | },
47 | "engines" : { "node" : ">=0.2.5" },
48 | "directories": {
49 | "lib": "lib"
50 | },
51 | "main": "./lib/readability"
52 | }
53 |
54 |
--------------------------------------------------------------------------------
/test/clean-file.js:
--------------------------------------------------------------------------------
1 | var http = require('http'),
2 | url_mod = require('url'),
3 | fs = require('fs');
4 |
5 | var readability = require('../lib/readability.js'),
6 | sprintf = readability.sprintf;
7 |
8 | function cleanFile(path, url, cb) {
9 | var content = fs.readFileSync(path, 'utf-8');
10 | readability.parse(content, url, {removeReadabilityArtifacts: false, removeClassNames: false, debug: true, profile: 1}, cb);
11 | }
12 | if (1) {
13 | cleanFile(__dirname + '/weird-pages/w3c-css-no-closing-head.html', '', function(info) {
14 | //console.log(info.content);
15 | });
16 |
17 | return;
18 | }
19 |
20 | function batch_run() {
21 | var dir = __dirname + '/pages/';
22 | var files = fs.readdirSync(dir);
23 | var results = [];
24 | //files.length = 10;
25 | files.forEach(function(f) {
26 | if (!/\.html/i.test(f)) return;
27 | console.log('######## Processing file...', f);
28 | cleanFile(dir + f, '', function(result) {
29 | results.push({time: result.time, file: f, inputLength: result.inputLength, error: result.error});
30 | });
31 | });
32 |
33 | var total = 0, totalTime = 0;
34 | results.filter(function(v) {return !v.error}).sort(function(a, b) {return a.time - b.time;}).forEach(function(r) {
35 | total++;
36 | totalTime += r.time;
37 | console.log(sprintf('%5.2f\t%8d\t%10s', r.time, r.inputLength, r.file));
38 | });
39 | console.log('total:', total, "avg time:", totalTime/total);
40 | }
41 |
42 | batch_run();
43 |
--------------------------------------------------------------------------------
/test/grab-pages.rb:
--------------------------------------------------------------------------------
1 | require 'open-uri'
2 | require 'rexml/document'
3 | require 'fileutils'
4 |
5 | module Program
6 | class << self
7 | def fetch_feed(url)
8 | content = nil
9 | open(url) do |f|
10 | content = f.read
11 | end
12 | content
13 | end
14 |
15 | def fetch_digg_feed
16 | url = 'http://services.digg.com/2.0/story.getTopNews?type=rss'
17 | content = fetch_feed(url)
18 | content.force_encoding('iso-8859-1');
19 | content.encode('utf-8')
20 | end
21 |
22 | def fetch_hackernews_feed
23 | url = 'http://news.ycombinator.com/rss'
24 | fetch_feed(url)
25 | end
26 |
27 | def fetch_delicious_feed
28 | url = 'http://feeds.delicious.com/v2/rss/?count=30'
29 | fetch_feed(url)
30 | end
31 |
32 | def parse_rss(feed)
33 | xml = REXML::Document.new(feed)
34 | xml.elements.each("//item") do |item|
35 | link = item.get_elements('link')[0].text.strip
36 | title = item.get_elements('title')[0].text.strip
37 | yield link, title
38 | end
39 | end
40 |
41 | def run
42 | dir = File.expand_path('../pages', __FILE__)
43 | FileUtils.mkdir(dir) unless File.exists? dir
44 |
45 | [fetch_digg_feed, fetch_hackernews_feed, fetch_delicious_feed].each do |feed|
46 | parse_rss(feed) do |url, title|
47 | filename = title.gsub(/\W/, '_') + '.html'
48 | filepath = File.join(dir, filename)
49 | puts "fetching #{url} as #{filepath}"
50 | puts `curl --connect-timeout=5 #{url} > #{filepath} &`
51 | sleep 1
52 | end
53 | end
54 | end
55 |
56 | end
57 | end
58 |
59 | if __FILE__ == $0
60 | Program.run
61 | end
62 |
--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
1 | # live NodeList
2 | NodeLists returned by node.childNodes and getElementsByXxx() apis are live which means changes to the DOM tree will be reflected in the NodeList when accessed.
3 |
4 | In jsdom's implementation, a live NodeList is updated when item() or length is accessed but not when the [index] is accessed.
5 | In a live NodeList iteration, you must carefully call list.update() (or just list.length) to trigger an update.
6 | Beware that NodeList update is very expensive! When possible, prefer DOM transversal over getElementsByXxx();
7 |
8 | If no changes will be made the the subtree, it is a good idea to iterate over an Array.
9 | var arr = nodeList.toArray(); //toArray() is not in the standards
10 | var arr = Array.prototype.slice.call(nodeList);
11 |
12 | # nodeList._length
13 | WRONG: In jsom the length getter property of a NodeList calls .update() which re-query against the DOM tree. In a read only loop, it is more efficient to access ._length instead of .length.
14 | var nodes = ele.getElementsByTagName('div'), i, len;
15 | for (i = 0, len = nodes._length; i < len, i++) {
16 | //does not change the dom structure
17 | }
18 | childNodes._length may not be update to date!!!!!
19 |
20 | # .textContent
21 | readability.getInnerText is very frequently used function. My optimization for it reduced the total running time by half.
22 | // hundredfold faster
23 | // use native string.trim
24 | // jsdom's implementation of textContent is innerHTML + strip tags + HTMLDecode
25 | // here we replace it with an optimized tree walker
26 |
27 | # cleanStyles
28 | cleanStyles is recursive, it counts for most running time of prepArticle
29 |
30 | # security
31 | arbitrary js
32 | frames
33 |
34 | # performance
35 | grep TOTAL clean.log|cut -d ' ' -f5|sort -n
36 |
37 | irb>
38 | s = <' +
134 | title + '' +
135 | content +
136 | '';
137 | }
138 | });
--------------------------------------------------------------------------------
/misc/readability-feedflock.js:
--------------------------------------------------------------------------------
1 | (function() {
2 | //require.paths.unshift('./vendor');
3 | var sys = require('sys');
4 | var jsdom = require('jsdom');
5 | //var htmlparser = require('./htmlparser');
6 | //var level = jsdom.defaultLevel;
7 | // var doc = new (level.Document)();
8 | // doc.createWindow = function() {
9 | // window = jsdom.windowAugmentation(level, { document: doc, parser: htmlparser })
10 | // delete window.document.createWindow
11 | // return window
12 | // };
13 | // var document = doc.createWindow().document;
14 |
15 | var document;
16 | var Client = {
17 | parse: function(content, callback) {
18 | document = jsdom.jsdom(content).createWindow().document;
19 | //document.innerHTML = content;
20 | //console.log(document.body);
21 | if (!document.body) {
22 | callback({content:'',title:''});
23 | return;
24 | }
25 |
26 | // Replace all doubled-up tags with
tags, and remove fonts.
27 | var pattern = new RegExp (" [ \r\n\s]* ", "g");
28 | document.body.innerHTML = document.body.innerHTML.replace(pattern, "
").replace(/<\/?font[^>]*>/g, '');
29 |
30 | var allParagraphs = document.getElementsByTagName("p");
31 | var contentDiv = null;
32 | var topDivParas =[];
33 |
34 | var articleContent = document.createElement("DIV");
35 | var articleTitle = document.title
36 |
37 | if (articleTitle)
38 | articleTitle = articleTitle.replace(/^\s+|\s+$/g, '');
39 |
40 | // Study all the paragraphs and find the chunk that has the best score.
41 | // A score is determined by things like: Number of
's, commas, special classes, etc.
42 | for (var j=0; j < allParagraphs.length; j++) {
43 | var parentNode = allParagraphs[j].parentNode;
44 |
45 | if(typeof(parentNode) != 'undefined') {
46 | // Initialize readability data
47 | if(typeof parentNode.readability == 'undefined')
48 | {
49 | parentNode.readability = {"contentScore": 0};
50 |
51 | // Look for a special classname
52 | if(parentNode.className.match(/(comment|meta|footer|footnote)/))
53 | parentNode.readability.contentScore -= 50;
54 | else if(parentNode.className.match(/((^|\\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\\s|$))/))
55 | parentNode.readability.contentScore += 25;
56 |
57 | // Look for a special ID
58 | if(parentNode.id.match(/(comment|meta|footer|footnote)/))
59 | parentNode.readability.contentScore -= 50;
60 | else if(parentNode.id.match(/^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/))
61 | parentNode.readability.contentScore += 25;
62 | }
63 |
64 | // Add a point for the paragraph found
65 | if(this.getInnerText(allParagraphs[j]).length > 10)
66 | parentNode.readability.contentScore++;
67 |
68 | // Add points for any commas within this paragraph
69 | parentNode.readability.contentScore += this.getCharCount(allParagraphs[j]);
70 |
71 | topDivParas.push({ 'node': parentNode, 'score': parentNode.readability.contentScore });
72 | }
73 | }
74 |
75 | for (var i=0; i < topDivParas.length; i++) {
76 | var score = topDivParas[i].score;
77 | if (contentDiv == null || score > contentDiv.score) {
78 | contentDiv = { 'node': topDivParas[i].node, 'score': score }
79 | }
80 | }
81 |
82 | if (contentDiv == null)
83 | return callback({ content: '', title: '' });
84 |
85 | var topDiv = contentDiv.node
86 |
87 | this.cleanStyles(topDiv); // Removes all style attributes
88 | topDiv = this.killDivs(topDiv); // Goes in and removes DIV's that have more non
stuff than
stuff
89 | topDiv = this.killBreaks(topDiv); // Removes any consecutive 's into just one
90 |
91 | // Cleans out junk from the topDiv just in case:
92 | topDiv = this.clean(topDiv, "form");
93 | topDiv = this.clean(topDiv, "object");
94 | topDiv = this.clean(topDiv, "table", 250);
95 | topDiv = this.clean(topDiv, "h1");
96 | topDiv = this.clean(topDiv, "h2");
97 | topDiv = this.clean(topDiv, "iframe");
98 |
99 | articleContent.appendChild(topDiv);
100 |
101 | return callback({ content: articleContent.innerHTML, title: articleTitle });
102 | },
103 | getInnerText: function(e) {
104 | return e.textContent;
105 | },
106 | getCharCount: function( e,s ) {
107 | s = s || ",";
108 | return this.getInnerText(e).split(s).length;
109 | },
110 | cleanStyles: function( e ) {
111 | e = e || document;
112 | var cur = e.firstChild;
113 |
114 | // If we had a bad node, there's not much we can do.
115 | if(!e)
116 | return;
117 |
118 | // Remove any root styles, if we're able.
119 | if(typeof e.removeAttribute == 'function')
120 | e.removeAttribute('style');
121 |
122 | // Go until there are no more child nodes
123 | while ( cur != null ) {
124 | if ( cur.nodeType == 1 ) {
125 | // Remove style attribute(s) :
126 | cur.removeAttribute("style");
127 | this.cleanStyles( cur );
128 | }
129 | cur = cur.nextSibling;
130 | }
131 | },
132 | killDivs: function ( e ) {
133 | var divsList = e.getElementsByTagName( "div" );
134 | var curDivLength = divsList.length;
135 |
136 | // Gather counts for other typical elements embedded within.
137 | // Traverse backwards so we can remove nodes at the same time without effecting the traversal.
138 | for (var i=curDivLength-1; i >= 0; i--) {
139 | var p = divsList[i].getElementsByTagName("p").length;
140 | var img = divsList[i].getElementsByTagName("img").length;
141 | var li = divsList[i].getElementsByTagName("li").length;
142 | var a = divsList[i].getElementsByTagName("a").length;
143 | var embed = divsList[i].getElementsByTagName("embed").length;
144 |
145 | // If the number of commas is less than 10 (bad sign) ...
146 | if ( this.getCharCount(divsList[i]) < 10) {
147 | // And the number of non-paragraph elements is more than paragraphs
148 | // or other ominous signs :
149 | if ( img > p || li > p || a > p || p == 0 || embed > 0) {
150 | divsList[i].parentNode.removeChild(divsList[i]);
151 | }
152 | }
153 | }
154 | return e;
155 | },
156 | killBreaks: function ( e ) {
157 | e.innerHTML = e.innerHTML.replace(/( (\s| ?)*){1,}/g,' ');
158 | return e;
159 | },
160 | clean: function(e, tags, minWords) {
161 | var targetList = e.getElementsByTagName( tags );
162 | minWords = minWords || 1000000;
163 |
164 | for (var y=0; y < targetList.length; y++) {
165 | // If the text content isn't laden with words, remove the child:
166 | if (this.getCharCount(targetList[y], " ") < minWords) {
167 | targetList[y].parentNode.removeChild(targetList[y]);
168 | }
169 | }
170 | return e;
171 | }
172 | };
173 | exports.Client = Client;
174 | })();
--------------------------------------------------------------------------------
/lib/sprintf.js:
--------------------------------------------------------------------------------
1 | /**
2 | sprintf() for JavaScript 0.7-beta1
3 | http://www.diveintojavascript.com/projects/javascript-sprintf
4 |
5 | Copyright (c) Alexandru Marasteanu
6 | All rights reserved.
7 |
8 | Redistribution and use in source and binary forms, with or without
9 | modification, are permitted provided that the following conditions are met:
10 | * Redistributions of source code must retain the above copyright
11 | notice, this list of conditions and the following disclaimer.
12 | * Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | * Neither the name of sprintf() for JavaScript nor the
16 | names of its contributors may be used to endorse or promote products
17 | derived from this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL Alexandru Marasteanu BE LIABLE FOR ANY
23 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 |
31 | Changelog:
32 | 2010.09.06 - 0.7-beta1
33 | - features: vsprintf, support for named placeholders
34 | - enhancements: format cache, reduced global namespace pollution
35 |
36 | 2010.05.22 - 0.6:
37 | - reverted to 0.4 and fixed the bug regarding the sign of the number 0
38 | Note:
39 | Thanks to Raphael Pigulla (http://www.n3rd.org/)
40 | who warned me about a bug in 0.5, I discovered that the last update was
41 | a regress. I appologize for that.
42 |
43 | 2010.05.09 - 0.5:
44 | - bug fix: 0 is now preceeded with a + sign
45 | - bug fix: the sign was not at the right position on padded results (Kamal Abdali)
46 | - switched from GPL to BSD license
47 |
48 | 2007.10.21 - 0.4:
49 | - unit test and patch (David Baird)
50 |
51 | 2007.09.17 - 0.3:
52 | - bug fix: no longer throws exception on empty paramenters (Hans Pufal)
53 |
54 | 2007.09.11 - 0.2:
55 | - feature: added argument swapping
56 |
57 | 2007.04.03 - 0.1:
58 | - initial release
59 | **/
60 |
61 | var sprintf = (function() {
62 | function get_type(variable) {
63 | return Object.prototype.toString.call(variable).slice(8, -1).toLowerCase();
64 | }
65 | function str_repeat(input, multiplier) {
66 | for (var output = []; multiplier > 0; output[--multiplier] = input) {/* do nothing */}
67 | return output.join('');
68 | }
69 |
70 | var str_format = function() {
71 | if (!str_format.cache.hasOwnProperty(arguments[0])) {
72 | str_format.cache[arguments[0]] = str_format.parse(arguments[0]);
73 | }
74 | return str_format.format.call(null, str_format.cache[arguments[0]], arguments);
75 | };
76 |
77 | str_format.format = function(parse_tree, argv) {
78 | var cursor = 1, tree_length = parse_tree.length, node_type = '', arg, output = [], i, k, match, pad, pad_character, pad_length;
79 | for (i = 0; i < tree_length; i++) {
80 | node_type = get_type(parse_tree[i]);
81 | if (node_type === 'string') {
82 | output.push(parse_tree[i]);
83 | }
84 | else if (node_type === 'array') {
85 | match = parse_tree[i]; // convenience purposes only
86 | if (match[2]) { // keyword argument
87 | arg = argv[cursor];
88 | for (k = 0; k < match[2].length; k++) {
89 | if (!arg.hasOwnProperty(match[2][k])) {
90 | throw(sprintf('[sprintf] property "%s" does not exist', match[2][k]));
91 | }
92 | arg = arg[match[2][k]];
93 | }
94 | }
95 | else if (match[1]) { // positional argument (explicit)
96 | arg = argv[match[1]];
97 | }
98 | else { // positional argument (implicit)
99 | arg = argv[cursor++];
100 | }
101 |
102 | if (/[^s]/.test(match[8]) && (get_type(arg) != 'number')) {
103 | throw(sprintf('[sprintf] expecting number but found %s', get_type(arg)));
104 | }
105 | switch (match[8]) {
106 | case 'b': arg = arg.toString(2); break;
107 | case 'c': arg = String.fromCharCode(arg); break;
108 | case 'd': arg = parseInt(arg, 10); break;
109 | case 'e': arg = match[7] ? arg.toExponential(match[7]) : arg.toExponential(); break;
110 | case 'f': arg = match[7] ? parseFloat(arg).toFixed(match[7]) : parseFloat(arg); break;
111 | case 'o': arg = arg.toString(8); break;
112 | case 's': arg = ((arg = String(arg)) && match[7] ? arg.substring(0, match[7]) : arg); break;
113 | case 'u': arg = Math.abs(arg); break;
114 | case 'x': arg = arg.toString(16); break;
115 | case 'X': arg = arg.toString(16).toUpperCase(); break;
116 | }
117 | arg = (/[def]/.test(match[8]) && match[3] && arg >= 0 ? '+'+ arg : arg);
118 | pad_character = match[4] ? match[4] == '0' ? '0' : match[4].charAt(1) : ' ';
119 | pad_length = match[6] - String(arg).length;
120 | pad = match[6] ? str_repeat(pad_character, pad_length) : '';
121 | output.push(match[5] ? arg + pad : pad + arg);
122 | }
123 | }
124 | return output.join('');
125 | };
126 |
127 | str_format.cache = {};
128 |
129 | str_format.parse = function(fmt) {
130 | var _fmt = fmt, match = [], parse_tree = [], arg_names = 0;
131 | while (_fmt) {
132 | if ((match = /^[^\x25]+/.exec(_fmt)) !== null) {
133 | parse_tree.push(match[0]);
134 | }
135 | else if ((match = /^\x25{2}/.exec(_fmt)) !== null) {
136 | parse_tree.push('%');
137 | }
138 | else if ((match = /^\x25(?:([1-9]\d*)\$|\(([^\)]+)\))?(\+)?(0|'[^$])?(-)?(\d+)?(?:\.(\d+))?([b-fosuxX])/.exec(_fmt)) !== null) {
139 | if (match[2]) {
140 | arg_names |= 1;
141 | var field_list = [], replacement_field = match[2], field_match = [];
142 | if ((field_match = /^([a-z_][a-z_\d]*)/i.exec(replacement_field)) !== null) {
143 | field_list.push(field_match[1]);
144 | while ((replacement_field = replacement_field.substring(field_match[0].length)) !== '') {
145 | if ((field_match = /^\.([a-z_][a-z_\d]*)/i.exec(replacement_field)) !== null) {
146 | field_list.push(field_match[1]);
147 | }
148 | else if ((field_match = /^\[(\d+)\]/.exec(replacement_field)) !== null) {
149 | field_list.push(field_match[1]);
150 | }
151 | else {
152 | throw('[sprintf] huh?');
153 | }
154 | }
155 | }
156 | else {
157 | throw('[sprintf] huh?');
158 | }
159 | match[2] = field_list;
160 | }
161 | else {
162 | arg_names |= 2;
163 | }
164 | if (arg_names === 3) {
165 | throw('[sprintf] mixing positional and named placeholders is not (yet) supported');
166 | }
167 | parse_tree.push(match);
168 | }
169 | else {
170 | throw('[sprintf] huh?');
171 | }
172 | _fmt = _fmt.substring(match[0].length);
173 | }
174 | return parse_tree;
175 | };
176 |
177 | return str_format;
178 | })();
179 |
180 | var vsprintf = function(fmt, argv) {
181 | argv.unshift(fmt);
182 | return sprintf.apply(null, argv);
183 | };
184 |
185 | exports.sprintf = sprintf;
186 | exports.vsprintf = vsprintf;
--------------------------------------------------------------------------------
/test/weird-pages/w3c-css-no-closing-head.html:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 |
6 | CSS 2D Transforms Module Level 3
7 |
8 |
9 |
14 |
16 |
17 |
18 |
CSS 2D Transforms allows elements rendered by CSS to be transformed in
80 | two-dimensional space.
81 |
82 |
Status of this document
83 |
84 |
85 |
This section describes the status of this document at the time of
86 | its publication. Other documents may supersede this document. A list of
87 | current W3C publications and the latest revision of this technical report
88 | can be found in the W3C technical reports
89 | index at http://www.w3.org/TR/.
90 |
91 |
Publication as a Working Draft does not imply endorsement by the W3C
92 | Membership. This is a draft document and may be updated, replaced or
93 | obsoleted by other documents at any time. It is inappropriate to cite this
94 | document as other than work in progress.
95 |
96 |
The (archived) public
98 | mailing list www-style@w3.org (see
99 | instructions) is preferred
100 | for discussion of this specification. When sending e-mail, please put the
101 | text “css3-2d-transforms” in the subject, preferably like
102 | this: “[css3-2d-transforms] …summary of
103 | comment…”
104 |
105 |
The CSS visual
178 | formatting model describes a coordinate system within which each
179 | element is positioned. Positions and sizes in this coordinate space can be
180 | thought of as being expressed in pixels, starting in the upper left corner
181 | of the parent with positive values proceeding to the right and down.
182 |
183 |
This coordinate space can be modified with the ‘transform’ property. Using
186 | transform, elements can be translated, rotated and scaled in two
187 | dimensional space. The coordinate space behaves as described in the coordinate
189 | system transformations section of the SVG 1.1 specification. This is a
190 | coordinate system with two axes: the X axis increases horizontally to the
191 | right; the Y axis increases vertically downwards.
192 |
193 |
Specifying a value other than ‘none’ for the ‘transform’ property establishes a
197 | new local coordinate system at the element that it is applied to.
198 | Transformations are cumulative. That is, elements establish their local
199 | coordinate system within the coordinate system of their parent. In this
200 | way, a ‘transform’ property effectively
202 | accumulates all the ‘transform’ properties of its
204 | ancestors. The accumulation of these transforms defines a current
205 | transformation matrix (CTM) for the element.
206 |
207 |
The transform property does not affect the flow of the content
208 | surrounding the transformed element. However, the value of the overflow
209 | area takes into account transformed elements. This behavior is similar to
210 | what happens when elements are translated via relative positioning.
211 | Therefore, if the value of the ‘overflow’ property is ‘scroll’
214 | or ‘auto’, scrollbars will appear as needed
216 | to see content that is transformed outside the visible area.
217 |
218 |
Any value other than ‘none’ for
219 | the transform results in the creation of both a stacking context and a
220 | containing block. The object acts as a containing block for fixed
221 | positioned descendants.
222 |
223 |
Need to go into more detail here about why fixed
224 | positioned objects should do this, i.e., that it's much harder to
225 | implement otherwise.
226 |
227 |
There are two roles for transformations in layout: (1)
228 | transformations that adjust the position of the affected content without
229 | changing the normal layout of that content (much like relative
230 | positioning) and (2) transformation of the content prior to layout that
231 | affects the layout of that content. See http://lists.w3.org/Archives/Public/www-style/2007Oct/0209
233 | for examples of both cases. The "transform" property (as defined in this
234 | document) is equally useful for both roles. This document is focused on
235 | satisfying the first role. There is, however, an architectural question
236 | that arises because there needs to be a way to distinguish which role an
237 | author of a stylesheet wants. The key question is which is the default
238 | behavior/role for the "transform" property and how is the other
239 | behavior/role indicated by a stylesheet author. If you have an opinion on
240 | this topic, please send feedback.
241 |
242 |
What do fixed backgrounds do in transforms? They should
243 | probably ignore the transform completely, since - even transformed - the
244 | object should be acting as "porthole" through which the fixed background
245 | can be viewed in its original form.
246 |
247 |
This property should also be applicable to SVG elements.
248 |
249 |
We also need to specify that SVG transforms *do* combine
250 | with this transform, e.g., if a <foreignObject> is inside
251 | transformed SVG and then defines a transform of its own. This means we may
252 | potentially have to examine the current SVG transform and combine with it
253 | to set the correct transform.
A two-dimensional transformation is applied to an element through the
261 | ‘transform’ property. This property
263 | contains a list of transform functions.
264 | The final transformation value for an element is obtained by performing a
265 | matrix concatenation of each entry in the list. The set of transform
266 | functions is similar to those allowed by SVG.
267 |
268 |
The ‘transform-origin’
319 | property establishes the origin of transformation for an element. This
320 | property is applied by first translating the element by the negated value
321 | of the property, then applying the element's transform, then translating
322 | by the property value. This effectively moves the desired transformation
323 | origin of the element to (0,0) in the local coordinate system, then
324 | applies the element's transform, then moves the element back to its
325 | original position.
326 |
327 |
If only one value is specified, the second value is assumed to be
328 | ‘center’. If at least one value is
329 | not a keyword, then the first value represents the horizontal position and
330 | the second represents the vertical position. Negative <percentage>
331 | and <length> values are allowed.
332 |
333 |
334 |
335 |
336 |
Name:
337 |
338 |
transform-origin
339 |
340 |
341 |
Value:
342 |
343 |
[ [ <percentage> | <length> | left | center | right ] [
344 | <percentage> | <length> | top | center | bottom ]? ] | [ [
345 | left | center | right ] || [ top | center | bottom ] ]
346 |
347 |
348 |
Initial:
349 |
350 |
50% 50%
351 |
352 |
353 |
Applies to:
354 |
355 |
block-level and inline-level elements
356 |
357 |
358 |
Inherited:
359 |
360 |
no
361 |
362 |
363 |
Percentages:
364 |
365 |
refer to the size of the element's box
366 |
367 |
368 |
Media:
369 |
370 |
visual
371 |
372 |
373 |
Computed value:
374 |
375 |
For <length> the absolute value, otherwise a percentage
376 |
377 |
378 |
379 |
4. The Transformation
380 | Functions
381 |
382 |
The value of the transform
383 | property is a list of <transform-functions> applied in the order
384 | provided. The individual transform functions are separated by whitespace.
385 | The set of allowed transform functions is given below. In this list the
386 | type <translation-value> is defined as a <length> or
387 | <percentage> value, and the <angle> type is defined by CSS Values and Units.
389 |
390 |
specifies a 2D transformation in the form of a transformation
396 | matrix of six values. matrix(a,b,c,d,e,f) is equivalent to applying the
398 | transformation matrix [a b c d e f].
399 |
400 |
specifies a 2D
405 | translation by the vector [tx, ty], where tx is the first
406 | translation-value parameter and ty is the optional second
407 | translation-value parameter. If <ty> is not provided, ty
408 | has zero as a value.
409 |
410 |
translateX(<translation-value>)
411 |
412 |
specifies a translation
414 | by the given amount in the X direction.
415 |
416 |
translateY(<translation-value>)
417 |
418 |
specifies a translation
420 | by the given amount in the Y direction.
421 |
422 |
scale(<number>[, <number>])
423 |
424 |
425 |
specifies a 2D scale
427 | operation by the [sx,sy] scaling vector described by the 2 parameters. If
428 | the second parameter is not provided, it is takes a value equal to the
429 | first.
430 |
431 |
scaleX(<number>)
432 |
433 |
specifies a scale operation using the [sx,1] scaling vector, where sx
434 | is given as the parameter.
435 |
436 |
scaleY(<number>)
437 |
438 |
specifies a scale operation using the [1,sy] scaling vector, where sy
439 | is given as the parameter.
440 |
441 |
rotate(<angle>)
442 |
443 |
specifies a 2D
445 | rotation by the angle specified in the parameter about the origin of
446 | the element, as defined by the transform-origin property.
448 |
449 |
specifies a skew
465 | transformation along the X and Y axes. The first angle parameter
466 | specifies the skew on the X axis. The second angle parameter specifies
467 | the skew on the Y axis. If the second parameter is not given then a value
468 | of 0 is used for the Y angle (ie. no skew on the Y axis).
469 |
470 |
471 |
5. Transform Values and
472 | Lists
473 |
474 |
The <translation-value> values are defined as [<percentage>
475 | | <length>]. All other value types are described as CSS types.
477 | If a list of transforms is provided, then the net effect is as if each
478 | transform had been specified separately in the order provided. For
479 | example,
480 |
481 |
516 | Move the element by 80 pixels in both the X and Y directions, then scale
517 | the element by 150%, then rotate it 45 degrees clockwise about the Z axis.
518 | Note that the scale and rotate operate about the center of the element,
519 | since the element has the default transform-origin of 50% 50%.
520 |
522 |
523 |
524 |
525 |
6. Transitions and animations
526 | between transform values
527 |
528 |
When animating or transitioning the value of a transform property the
529 | rules described below are applied. The ‘from’ transform is the transform at the start
531 | of the transition or current keyframe. The ‘end’ transform is the transform at the end of
533 | the transition or current keyframe.
534 |
535 |
536 |
If the ‘from’ and
537 | ‘to’ transforms are both single
538 | functions of the same type:
539 |
540 |
For translate, translateX, translateY, scale, scaleX, scaleY,
541 | rotate, skew, skewX and skewY functions:
542 |
543 |
the individual components of the function are interpolated
544 | numerically.
545 |
546 |
547 |
For matrix:
548 |
549 |
the matrix is decomposed using the
551 | method described by unmatrix into separate translation, scale,
552 | rotation and skew matrices, then each decomposed matrix is
553 | interpolated numerically, and finally combined in order to produce a
554 | resulting 3x2 matrix.
555 |
556 |
557 |
558 |
If both the ‘from’ and
559 | ‘to’ transforms are "none":
560 |
561 |
There is no interpolation necessary
562 |
563 |
564 |
If one of the ‘from’ or
565 | ‘to’ transforms is "none":
566 |
567 |
The ‘none’ is replaced by
568 | an equivalent identity function list for the corresponding transform
569 | function list.
570 |
For example, if the ‘from’
571 | transform is "scale(2)" and the ‘to’ transform is "none" then the value
573 | "scale(1)" will be used as the ‘to’ value, and animation will proceed
575 | using the rule above. Similarly, if the ‘from’ transform is "none" and the
577 | ‘to’ transform is "scale(2)
578 | rotate(50deg)" then the animation will execute as if the ‘from’ value is "scale(1) rotate(0)".
580 |
581 |
The identity functions are translate(0), translateX(0),
582 | translateY(0), scale(1), scaleX(1), scaleY(1), rotate(0), rotateX(0),
583 | rotateY(0), skewX(0), skewY(0), skew(0, 0) and matrix(1, 0, 0, 1, 0,
584 | 0).
585 |
586 |
587 |
If both the ‘from’ and
588 | ‘to’ transforms have the same
589 | number of transform functions and corresponding functions in each
590 | transform list are of the same type:
591 |
592 |
Each transform function is animated with its corresponding
593 | destination function in isolation using the rules described above. The
594 | individual values are then applied as a list to produce resulting
595 | transform value.
596 |
597 |
598 |
Otherwise:
599 |
600 |
The transform function lists are each converted into the equivalent
601 | matrix value and animation proceeds using the rule for a single
602 | function above.
603 |
604 |
605 |
606 |
In some cases, an animation might cause a transformation matrix to be
607 | singular or non-invertible. For example, an animation in which scale moves
608 | from 1 to -1. At the time when the matrix is in such a state, the
609 | transformed element is not rendered.
610 |
611 |
7. Matrix
612 | decomposition for animation
613 |
614 |
When interpolating between 2 matrices, each is decomposed into the
615 | corresponding translation, rotation, scale, skew, and perspective values.
616 | Not all matrices can be accurately described by these values. Those that
617 | can't are decomposed into the most accurate representation possible, using
618 | the technique below. This technique is taken from The "unmatrix" method in
619 | "Graphics Gems II, edited by Jim Arvo". The pseudocode below works on a
620 | 4x4 homogeneous matrix. A 3x2 2D matrix is therefore first converted to
621 | 4x4 homogeneous form.
622 |
623 |
624 | Input: matrix ; a 4x4 matrix
625 | Output: translation ; a 3 component vector
626 | rotation ; Euler angles, represented as a 3 component vector
627 | scale ; a 3 component vector
628 | skew ; skew factors XY,XZ,YZ represented as a 3 component vector
629 | perspective ; a 4 component vector
630 | Returns false if the matrix cannot be decomposed, true if it can
631 |
632 | Supporting functions (point is a 3 component vector, matrix is a 4x4 matrix):
633 | float determinant(matrix) returns the 4x4 determinant of the matrix
634 | matrix inverse(matrix) returns the inverse of the passed matrix
635 | matrix transpose(matrix) returns the transpose of the passed matrix
636 | point multVecMatrix(point, matrix) multiplies the passed point by the passed matrix
637 | and returns the transformed point
638 | float length(point) returns the length of the passed vector
639 | point normalize(point) normalizes the length of the passed point to 1
640 | float dot(point, point) returns the dot product of the passed points
641 | float cos(float) returns the cosine of the passed angle in radians
642 | float asin(float) returns the arcsine in radians of the passed value
643 | float atan2(float y, float x) returns the principal value of the arc tangent of
644 | y/x, using the signs of both arguments to determine
645 | the quadrant of the return value
646 |
647 | Decomposition also makes use of the following function:
648 | point combine(point a, point b, float ascl, float bscl)
649 | result[0] = (ascl * a[0]) + (bscl * b[0])
650 | result[1] = (ascl * a[1]) + (bscl * b[1])
651 | result[2] = (ascl * a[2]) + (bscl * b[2])
652 | return result
653 |
654 |
655 | // Normalize the matrix.
656 | if (matrix[3][3] == 0)
657 | return false
658 |
659 | for (i = 0; i < 4; i++)
660 | for (j = 0; j < 4; j++)
661 | matrix[i][j] /= matrix[3][3]
662 |
663 | // perspectiveMatrix is used to solve for perspective, but it also provides
664 | // an easy way to test for singularity of the upper 3x3 component.
665 | perspectiveMatrix = matrix
666 |
667 | for (i = 0; i < 3; i++)
668 | perspectiveMatrix[i][3] = 0
669 |
670 | perspectiveMatrix[3][3] = 1
671 |
672 | if (determinant(perspectiveMatrix) == 0)
673 | return false
674 |
675 | // First, isolate perspective.
676 | if (matrix[0][3] != 0 || matrix[1][3] != 0 || matrix[2][3] != 0)
677 | // rightHandSide is the right hand side of the equation.
678 | rightHandSide[0] = matrix[0][3];
679 | rightHandSide[1] = matrix[1][3];
680 | rightHandSide[2] = matrix[2][3];
681 | rightHandSide[3] = matrix[3][3];
682 |
683 | // Solve the equation by inverting perspectiveMatrix and multiplying
684 | // rightHandSide by the inverse.
685 | inversePerspectiveMatrix = inverse(perspectiveMatrix)
686 | transposedInversePerspectiveMatrix = transposeMatrix4(inversePerspectiveMatrix)
687 | perspective = multVecMatrix(rightHandSide, transposedInversePerspectiveMatrix)
688 |
689 | // Clear the perspective partition
690 | matrix[0][3] = matrix[1][3] = matrix[2][3] = 0
691 | matrix[3][3] = 1
692 | else
693 | // No perspective.
694 | perspective[0] = perspective[1] = perspective[2] = 0
695 | perspective[3] = 1
696 |
697 | // Next take care of translation
698 | translate[0] = matrix[3][0]
699 | matrix[3][0] = 0
700 | translate[1] = matrix[3][1]
701 | matrix[3][1] = 0
702 | translate[2] = matrix[3][2]
703 | matrix[3][2] = 0
704 |
705 | // Now get scale and shear. 'row' is a 3 element array of 3 component vectors
706 | for (i = 0; i < 3; i++)
707 | row[i][0] = matrix[i][0]
708 | row[i][1] = matrix[i][1]
709 | row[i][2] = matrix[i][2]
710 |
711 | // Compute X scale factor and normalize first row.
712 | scale[0] = length(row[0])
713 | row[0] = normalize(row[0])
714 |
715 | // Compute XY shear factor and make 2nd row orthogonal to 1st.
716 | skew[0] = dot(row[0], row[1])
717 | row[1] = combine(row[1], row[0], 1.0, -skew[0])
718 |
719 | // Now, compute Y scale and normalize 2nd row.
720 | scale[1] = length(row[1])
721 | row[1] = normalize(row[1])
722 | skew[0] /= scale[1];
723 |
724 | // Compute XZ and YZ shears, orthogonalize 3rd row
725 | skew[1] = dot(row[0], row[2])
726 | row[2] = combine(row[2], row[0], 1.0, -skew[1])
727 | skew[2] = dot(row[1], row[2])
728 | row[2] = combine(row[2], row[1], 1.0, -skew[2])
729 |
730 | // Next, get Z scale and normalize 3rd row.
731 | scale[2] = length(row[2])
732 | row[2] = normalize(row[2])
733 | skew[1] /= scale[2]
734 | skew[2] /= scale[2]
735 |
736 | // At this point, the matrix (in rows) is orthonormal.
737 | // Check for a coordinate system flip. If the determinant
738 | // is -1, then negate the matrix and the scaling factors.
739 | pdum3 = cross(row[1], row[2])
740 | if (dot(row[0], pdum3) < 0)
741 | for (i = 0; i < 3; i++) {
742 | scale[0] *= -1;
743 | row[i][0] *= -1
744 | row[i][1] *= -1
745 | row[i][2] *= -1
746 |
747 | // Now, get the rotations ou
748 | rotate[1] = asin(-row[0][2]);
749 | if (cos(rotate[1]) != 0)
750 | rotate[0] = atan2(row[1][2], row[2][2]);
751 | rotate[2] = atan2(row[0][1], row[0][0]);
752 | else
753 | rotate[0] = atan2(-row[2][0], row[1][1]);
754 | rotate[2] = 0;
755 |
756 | return true;
757 |
758 |
759 |
Each component of each returned value is linearly interpolated with the
760 | corresponding component of the other matrix. The resulting components are
761 | then recomposed into a final matrix as though combining the following
762 | transform functions:
763 |
764 |
This section describes the interfaces and functionality added to the
777 | DOM to support runtime access to the functionality described above.
778 |
779 |
The setMatrixValue method replaces
840 | the existing matrix with one computed from parsing the passed string
841 | as though it had been assigned to the transform property in a CSS
842 | style rule.
843 |
Parameters
844 |
845 |
846 |
string of type
847 | DOMString
848 |
849 |
The string to parse.
850 |
851 |
852 |
853 |
854 |
No Return Value
855 |
856 |
Exceptions
857 |
858 |
859 |
DOMException SYNTAX_ERR
860 |
861 |
Thrown when the provided string can not be parsed into a
862 | CSSMatrix.
863 |
The multiply method returns a new
876 | CSSMatrix which is the result of this matrix multiplied by the
877 | passed matrix, with the passed matrix to the right. This matrix is
878 | not modified.
879 |
The multiplyLeft method returns a new
911 | CSSMatrix which is the result of this matrix multiplied by the
912 | passed matrix, with the passed matrix to the left. This matrix is
913 | not modified.
914 |
The translate method returns a new
980 | matrix which is this matrix post multiplied by a translation matrix
981 | containing the passed values. This matrix is not modified.
982 |
Parameters
983 |
984 |
985 |
x of type
986 | float
987 |
988 |
The X component of the translation value.
989 |
990 |
The scale method returns a new matrix
1020 | which is this matrix post multiplied by a scale matrix containing
1021 | the passed values. If the y component is undefined, the x component
1022 | value is used in its place. This matrix is not modified.
1023 |
Parameters
1024 |
1025 |
1026 |
scaleX of type
1027 | float
1028 |
1029 |
The X component of the scale value.
1030 |
1031 |
scaleY of type
1032 | float
1033 |
1034 |
The (optional) Y component of the scale value.
1035 |
The rotate method returns a new
1061 | matrix which is this matrix post multiplied by a rotation matrix.
1062 | The rotation value is in degrees. This matrix is not modified.
1063 |
The skew method returns a new matrix
1096 | which is this matrix post multiplied by a skew matrix. The rotation
1097 | value is in degrees. This matrix is not modified.
1098 |
Parameters
1099 |
1100 |
1101 |
angleX of type
1102 | float
1103 |
1104 |
The angle of skew along the X axis.
1105 |
1106 |
angleY of type
1107 | float
1108 |
1109 |
The angle of skew along the Y axis.
1110 |
1111 |
1112 |
1113 |
1114 |
Return Value
1115 |
1116 |
1117 |
CSSMatrix
1118 |
1119 |
The result matrix.
1120 |
1121 |
1122 |
1123 |
1124 |
No Exceptions
1125 |
1126 |
1127 |
1128 |
1129 |
1130 |
1131 |
1132 |
1133 |
1134 |
1135 |
1136 |
In addition to the interface listed above, the
1137 | getComputedStyle method of the Window object has
1138 | been updated. The transform property
1139 | of the style object returned by getComputedStyle contains a
1140 | DOMString of the form "matrix(a, b, c, d, e, f)" representing the 3x2
1141 | matrix that is the result of applying the individual functions listed in
1142 | the transform property.
1143 |
1144 |
[ [ <percentage> | <length> | left | center | right ] [
1206 | <percentage> | <length> | top | center | bottom ]? ] | [ [
1207 | left | center | right ] || [ top | center | bottom ] ]
1208 |
1209 |
50% 50%
1210 |
1211 |
block-level and inline-level elements
1212 |
1213 |
no
1214 |
1215 |
refer to the size of the element's box
1216 |
1217 |
296 | George W. Bush will end a self-imposed silence about his presidency in an NBC prime-time special on Monday, the eve of the release of his memoir, “Decision Points.” That the interviewer will be Matt Lauer, the co-host of the “Today” show, reveals calculations by Mr. Bush and his advisers, as well as a campaign by NBC.
Matt Lauer of “Today” interviewing George W. Bush in his first one-on-one interview since leaving the White House. The special will be shown on Monday at 8 p.m. Eastern time.
337 | In the past, the first interview of a controversial ex-president would be expected to go to the nation’s top evening news anchor, currently NBC’s Brian Williams. By choosing the top morning anchor instead, both sides are essentially endorsing the soft power of Matt Lauer.
338 | “He’s an extraordinarily fair interviewer,” said Jim Bell, the executive producer of “Today” and of the prime-time special. “We’re living in a time when some of television news is partisan, and Matt and the ‘Today’ show are decidedly not so.”
339 | That was a selling point for Mr. Bush and his advisers, who decided that “the first interview should be in a news context, with a network news anchor,” said David Drake, a senior vice president of Crown, the publisher of “Decision Points.”
340 | For NBC, the interview — which was taped over the course of two days in Texas late last month — is a major coup. “They talked about every subject under the sun,” said Steve Capus, the president of NBC News, who observed that Mr. Bush “has things he wants to get off his chest.”
341 | But critics of Mr. Bush — and there are many, with polls showing that most Americans still hold an unfavorable view of him — who would like to see a televised confrontation over issues like the Iraq war may come away disappointed. The tone of the prime-time special is conversational, not prosecutorial, and for that reason, “Lauer/Bush” is not likely to join “Frost/Nixon” in the public imagination.
342 | Mr. Bell pointedly called the special “a conversation with President Bush about his book,” not just his presidency. Many tough questions are asked, and the word “torture” is used, Mr. Bell emphasized, but it comes down to tone.
343 | Dana Perino, who was a White House press secretary while Mr. Bush was in office, said that tone was an important consideration for the TV book tour.
344 | “He’s not interested in having a debate about the policies,” Ms. Perino said of Mr. Bush. She elaborated later: “There’s been plenty of debates about the decisions he has made. Now he’s trying to explain what he was going through, and the conditions he was working under.”
345 | Doris Kearns Goodwin, the presidential historian, said Mr. Bush’s televised interview was not likely to deviate from the words in his memoir. But “there is some value in seeing his mood,” she said, including his attitude about the memoir itself. (Ms. Goodwin was a paid contributor to NBC until 2008.)
346 | To get the first shot at the Bush interview, each major television network pieced together its best proposal — a “package,” Mr. Drake said — and at least one other offered a prime-time special like NBC’s. He declined to share specifics, but said “it was a close decision.”
347 | The NBC interview is the start of a book tour like almost no other. Mr. Bush will sit down with Oprah Winfrey and Rush Limbaugh, as well as with all three prime-time hosts on the Fox News Channel this week. There will be print interviews, too, but the only one announced so far is with AARP The Magazine.
348 | Andrew Tyndall, who publishes a newsletter about the television news business, The Tyndall Report, said he suspected that Mr. Bush and his aides were striking a balance by selecting Mr. Lauer for the first interview. “On the one hand, you’re looking for comfort,” Mr. Tyndall said. “On the other hand, you don’t want the interview to be perceived as a series of softballs.”
349 | NBC executives privately agreed with that assessment, and said they thought that Mr. Bush would not have felt as comfortable with the network’s other top interviewers.
350 | Asked whether Mr. Williams or the “Meet the Press” moderator David Gregory pursued the interview, Mr. Capus said “I’m sure they did,” but that “Matt was the official push from NBC News and I’m thrilled that we got it.” Mr. Capus said that Mr. Lauer had a “rapport” with Mr. Bush in prior interviews.
351 | Along with comfort, audience size was important. “Today” is both the top-rated morning show and a highly sought-after outlet for authors.
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
775 |
776 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
--------------------------------------------------------------------------------
/misc/readability-ori.js:
--------------------------------------------------------------------------------
1 | /*jslint undef: true, nomen: true, eqeqeq: true, plusplus: true, newcap: true, immed: true, browser: true, devel: true, passfail: false */
2 | /*global window: false, readConvertLinksToFootnotes: false, readStyle: false, readSize: false, readMargin: false, Typekit: false, ActiveXObject: false */
3 |
4 | var dbg = (typeof console !== 'undefined') ? function(s) {
5 | console.log("Readability: " + s);
6 | } : function() {};
7 |
8 | /*
9 | * Readability. An Arc90 Lab Experiment.
10 | * Website: http://lab.arc90.com/experiments/readability
11 | * Source: http://code.google.com/p/arc90labs-readability
12 | *
13 | * "Readability" is a trademark of Arc90 Inc and may not be used without explicit permission.
14 | *
15 | * Copyright (c) 2010 Arc90 Inc
16 | * Readability is licensed under the Apache License, Version 2.0.
17 | **/
18 | var readability = {
19 | version: '1.7.1',
20 | emailSrc: 'http://lab.arc90.com/experiments/readability/email.php',
21 | iframeLoads: 0,
22 | convertLinksToFootnotes: false,
23 | reversePageScroll: false, /* If they hold shift and hit space, scroll up */
24 | frameHack: false, /**
25 | * The frame hack is to workaround a firefox bug where if you
26 | * pull content out of a frame and stick it into the parent element, the scrollbar won't appear.
27 | * So we fake a scrollbar in the wrapping div.
28 | **/
29 | biggestFrame: false,
30 | bodyCache: null, /* Cache the body HTML in case we need to re-use it later */
31 | flags: 0x1 | 0x2 | 0x4, /* Start with all flags set. */
32 |
33 | /* constants */
34 | FLAG_STRIP_UNLIKELYS: 0x1,
35 | FLAG_WEIGHT_CLASSES: 0x2,
36 | FLAG_CLEAN_CONDITIONALLY: 0x4,
37 |
38 | maxPages: 30, /* The maximum number of pages to loop through before we call it quits and just show a link. */
39 | parsedPages: {}, /* The list of pages we've parsed in this call of readability, for autopaging. As a key store for easier searching. */
40 | pageETags: {}, /* A list of the ETag headers of pages we've parsed, in case they happen to match, we'll know it's a duplicate. */
41 |
42 | /**
43 | * All of the regular expressions in use within readability.
44 | * Defined up here so we don't instantiate them repeatedly in loops.
45 | **/
46 | regexps: {
47 | unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
48 | okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
49 | positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
50 | negative: /combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i,
51 | extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single/i,
52 | divToPElements: /<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
53 | replaceBrs: /( ]*>[ \n\r\t]*){2,}/gi,
54 | replaceFonts: /<(\/?)font[^>]*>/gi,
55 | trim: /^\s+|\s+$/g,
56 | normalize: /\s{2,}/g,
57 | killBreaks: /( (\s| ?)*){1,}/g,
58 | videos: /http:\/\/(www\.)?(youtube|vimeo)\.com/i,
59 | skipFootnoteLink: /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
60 | nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, // Match: next, continue, >, >>, » but not >|, »| as those usually mean last.
61 | prevLink: /(prev|earl|old|new|<|«)/i
62 | },
63 |
64 | /**
65 | * Runs readability.
66 | *
67 | * Workflow:
68 | * 1. Prep the document by removing script tags, css, etc.
69 | * 2. Build readability's DOM tree.
70 | * 3. Grab the article content from the current dom tree.
71 | * 4. Replace the current DOM tree with the new one.
72 | * 5. Read peacefully.
73 | *
74 | * @return void
75 | **/
76 | init: function() {
77 | /* Before we do anything, remove all scripts that are not readability. */
78 | window.onload = window.onunload = function() {};
79 |
80 | readability.removeScripts(document);
81 |
82 | if(document.body && !readability.bodyCache) {
83 | readability.bodyCache = document.body.innerHTML;
84 |
85 | }
86 | /* Make sure this document is added to the list of parsed pages first, so we don't double up on the first page */
87 | readability.parsedPages[window.location.href.replace(/\/$/, '')] = true;
88 |
89 | /* Pull out any possible next page link first */
90 | var nextPageLink = readability.findNextPageLink(document.body);
91 |
92 | readability.prepDocument();
93 |
94 | /* Build readability's DOM tree */
95 | var overlay = document.createElement("DIV");
96 | var innerDiv = document.createElement("DIV");
97 | var articleTools = readability.getArticleTools();
98 | var articleTitle = readability.getArticleTitle();
99 | var articleContent = readability.grabArticle();
100 | var articleFooter = readability.getArticleFooter();
101 |
102 | if(!articleContent) {
103 | articleContent = document.createElement("DIV");
104 | articleContent.id = "readability-content";
105 | articleContent.innerHTML = [
106 | "
Sorry, readability was unable to parse this page for content. If you feel like it should have been able to, please let us know by submitting an issue.
",
107 | (readability.frameHack ? "
It appears this page uses frames. Unfortunately, browser security properties often cause Readability to fail on pages that include frames. You may want to try running readability itself on this source page: " + readability.biggestFrame.src + "
" : ""),
108 | "
Also, please note that Readability does not play very nicely with front pages. Readability is intended to work on articles with a sizable chunk of text that you'd like to read comfortably. If you're using Readability on a landing page (like nytimes.com for example), please click into an article first before using Readability.
"
109 | ].join('');
110 |
111 | nextPageLink = null;
112 | }
113 |
114 | overlay.id = "readOverlay";
115 | innerDiv.id = "readInner";
116 |
117 | /* Apply user-selected styling */
118 | document.body.className = readStyle;
119 | document.dir = readability.getSuggestedDirection(articleTitle.innerHTML);
120 |
121 | if (readStyle === "style-athelas" || readStyle === "style-apertura"){
122 | overlay.className = readStyle + " rdbTypekit";
123 | }
124 | else {
125 | overlay.className = readStyle;
126 | }
127 | innerDiv.className = readMargin + " " + readSize;
128 |
129 | if(typeof(readConvertLinksToFootnotes) !== 'undefined' && readConvertLinksToFootnotes === true) {
130 | readability.convertLinksToFootnotes = true;
131 | }
132 |
133 | /* Glue the structure of our document together. */
134 | innerDiv.appendChild( articleTitle );
135 | innerDiv.appendChild( articleContent );
136 | innerDiv.appendChild( articleFooter );
137 | overlay.appendChild( articleTools );
138 | overlay.appendChild( innerDiv );
139 |
140 | /* Clear the old HTML, insert the new content. */
141 | document.body.innerHTML = "";
142 | document.body.insertBefore(overlay, document.body.firstChild);
143 | document.body.removeAttribute('style');
144 |
145 | if(readability.frameHack)
146 | {
147 | var readOverlay = document.getElementById('readOverlay');
148 | readOverlay.style.height = '100%';
149 | readOverlay.style.overflow = 'auto';
150 | }
151 |
152 | /**
153 | * If someone tries to use Readability on a site's root page, give them a warning about usage.
154 | **/
155 | if((window.location.protocol + "//" + window.location.host + "/") === window.location.href)
156 | {
157 | articleContent.style.display = "none";
158 | var rootWarning = document.createElement('p');
159 | rootWarning.id = "readability-warning";
160 | rootWarning.innerHTML = "Readability was intended for use on individual articles and not home pages. " +
161 | "If you'd like to try rendering this page anyway, click here to continue.";
162 |
163 | innerDiv.insertBefore( rootWarning, articleContent );
164 | }
165 |
166 | readability.postProcessContent(articleContent);
167 |
168 | window.scrollTo(0, 0);
169 |
170 | /* If we're using the Typekit library, select the font */
171 | if (readStyle === "style-athelas" || readStyle === "style-apertura") {
172 | readability.useRdbTypekit();
173 | }
174 |
175 | if (nextPageLink) {
176 | /**
177 | * Append any additional pages after a small timeout so that people
178 | * can start reading without having to wait for this to finish processing.
179 | **/
180 | window.setTimeout(function() {
181 | readability.appendNextPage(nextPageLink);
182 | }, 500);
183 | }
184 |
185 | /** Smooth scrolling **/
186 | document.onkeydown = function(e) {
187 | var code = (window.event) ? event.keyCode : e.keyCode;
188 | if (code === 16) {
189 | readability.reversePageScroll = true;
190 | return;
191 | }
192 |
193 | if (code === 32) {
194 | readability.curScrollStep = 0;
195 | var windowHeight = window.innerHeight ? window.innerHeight : (document.documentElement.clientHeight ? document.documentElement.clientHeight : document.body.clientHeight);
196 |
197 | if(readability.reversePageScroll) {
198 | readability.scrollTo(readability.scrollTop(), readability.scrollTop() - (windowHeight - 50), 20, 10);
199 | }
200 | else {
201 | readability.scrollTo(readability.scrollTop(), readability.scrollTop() + (windowHeight - 50), 20, 10);
202 | }
203 |
204 | return false;
205 | }
206 | };
207 |
208 | document.onkeyup = function(e) {
209 | var code = (window.event) ? event.keyCode : e.keyCode;
210 | if (code === 16) {
211 | readability.reversePageScroll = false;
212 | return;
213 | }
214 | };
215 | },
216 |
217 | /**
218 | * Run any post-process modifications to article content as necessary.
219 | *
220 | * @param Element
221 | * @return void
222 | **/
223 | postProcessContent: function(articleContent) {
224 | if(readability.convertLinksToFootnotes && !window.location.href.match(/wikipedia\.org/g)) {
225 | readability.addFootnotes(articleContent);
226 | }
227 |
228 | readability.fixImageFloats(articleContent);
229 | },
230 |
231 | /**
232 | * Some content ends up looking ugly if the image is too large to be floated.
233 | * If the image is wider than a threshold (currently 55%), no longer float it,
234 | * center it instead.
235 | *
236 | * @param Element
237 | * @return void
238 | **/
239 | fixImageFloats: function (articleContent) {
240 | var imageWidthThreshold = Math.min(articleContent.offsetWidth, 800) * 0.55,
241 | images = articleContent.getElementsByTagName('img');
242 |
243 | for(var i=0, il = images.length; i < il; i+=1) {
244 | var image = images[i];
245 |
246 | if(image.offsetWidth > imageWidthThreshold) {
247 | image.className += " blockImage";
248 | }
249 | }
250 | },
251 |
252 | /**
253 | * Get the article tools Element that has buttons like reload, print, email.
254 | *
255 | * @return void
256 | **/
257 | getArticleTools: function () {
258 | var articleTools = document.createElement("DIV");
259 |
260 | articleTools.id = "readTools";
261 | articleTools.innerHTML =
262 | "Reload Original Page" +
263 | "Print Page" +
264 | "Email Page";
265 |
266 | return articleTools;
267 | },
268 |
269 | /**
270 | * retuns the suggested direction of the string
271 | *
272 | * @return "rtl" || "ltr"
273 | **/
274 | getSuggestedDirection: function(text) {
275 | function sanitizeText() {
276 | return text.replace(/@\w+/, "");
277 | }
278 |
279 | function countMatches(match) {
280 | var matches = text.match(new RegExp(match, "g"));
281 | return matches !== null ? matches.length : 0;
282 | }
283 |
284 | function isRTL() {
285 | var count_heb = countMatches("[\\u05B0-\\u05F4\\uFB1D-\\uFBF4]");
286 | var count_arb = countMatches("[\\u060C-\\u06FE\\uFB50-\\uFEFC]");
287 |
288 | // if 20% of chars are Hebrew or Arbic then direction is rtl
289 | return (count_heb + count_arb) * 100 / text.length > 20;
290 | }
291 |
292 | text = sanitizeText(text);
293 | return isRTL() ? "rtl" : "ltr";
294 | },
295 |
296 |
297 | /**
298 | * Get the article title as an H1.
299 | *
300 | * @return void
301 | **/
302 | getArticleTitle: function () {
303 | var curTitle = "",
304 | origTitle = "";
305 |
306 | try {
307 | curTitle = origTitle = document.title;
308 |
309 | if(typeof curTitle !== "string") { /* If they had an element with id "title" in their HTML */
310 | curTitle = origTitle = readability.getInnerText(document.getElementsByTagName('title')[0]);
311 | }
312 | }
313 | catch(e) {}
314 |
315 | if(curTitle.match(/ [\|\-] /))
316 | {
317 | curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1');
318 |
319 | if(curTitle.split(' ').length < 3) {
320 | curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');
321 | }
322 | }
323 | else if(curTitle.indexOf(': ') !== -1)
324 | {
325 | curTitle = origTitle.replace(/.*:(.*)/gi, '$1');
326 |
327 | if(curTitle.split(' ').length < 3) {
328 | curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');
329 | }
330 | }
331 | else if(curTitle.length > 150 || curTitle.length < 15)
332 | {
333 | var hOnes = document.getElementsByTagName('h1');
334 | if(hOnes.length === 1)
335 | {
336 | curTitle = readability.getInnerText(hOnes[0]);
337 | }
338 | }
339 |
340 | curTitle = curTitle.replace( readability.regexps.trim, "" );
341 |
342 | if(curTitle.split(' ').length <= 4) {
343 | curTitle = origTitle;
344 | }
345 |
346 | var articleTitle = document.createElement("H1");
347 | articleTitle.innerHTML = curTitle;
348 |
349 | return articleTitle;
350 | },
351 |
352 | /**
353 | * Get the footer with the readability mark etc.
354 | *
355 | * @return void
356 | **/
357 | getArticleFooter: function () {
358 | var articleFooter = document.createElement("DIV");
359 |
360 | /**
361 | * For research purposes, generate an img src that contains the chosen readstyle etc,
362 | * so we can generate aggregate stats and change styles based on them in the future
363 | **/
364 | // var statsQueryParams = "?readStyle=" + encodeURIComponent(readStyle) + "&readMargin=" + encodeURIComponent(readMargin) + "&readSize=" + encodeURIComponent(readSize);
365 | /* TODO: attach this to an image */
366 |
367 | articleFooter.id = "readFooter";
368 | articleFooter.innerHTML = [
369 | "",
370 | ""].join('');
381 |
382 | return articleFooter;
383 | },
384 |
385 | /**
386 | * Prepare the HTML document for readability to scrape it.
387 | * This includes things like stripping javascript, CSS, and handling terrible markup.
388 | *
389 | * @return void
390 | **/
391 | prepDocument: function () {
392 | /**
393 | * In some cases a body element can't be found (if the HTML is totally hosed for example)
394 | * so we create a new body node and append it to the document.
395 | */
396 | if(document.body === null)
397 | {
398 | var body = document.createElement("body");
399 | try {
400 | document.body = body;
401 | }
402 | catch(e) {
403 | document.documentElement.appendChild(body);
404 | dbg(e);
405 | }
406 | }
407 |
408 | document.body.id = "readabilityBody";
409 |
410 | var frames = document.getElementsByTagName('frame');
411 | if(frames.length > 0)
412 | {
413 | var bestFrame = null;
414 | var bestFrameSize = 0; /* The frame to try to run readability upon. Must be on same domain. */
415 | var biggestFrameSize = 0; /* Used for the error message. Can be on any domain. */
416 | for(var frameIndex = 0; frameIndex < frames.length; frameIndex+=1)
417 | {
418 | var frameSize = frames[frameIndex].offsetWidth + frames[frameIndex].offsetHeight;
419 | var canAccessFrame = false;
420 | try {
421 | var frameBody = frames[frameIndex].contentWindow.document.body;
422 | canAccessFrame = true;
423 | }
424 | catch(eFrames) {
425 | dbg(eFrames);
426 | }
427 |
428 | if(frameSize > biggestFrameSize) {
429 | biggestFrameSize = frameSize;
430 | readability.biggestFrame = frames[frameIndex];
431 | }
432 |
433 | if(canAccessFrame && frameSize > bestFrameSize)
434 | {
435 | readability.frameHack = true;
436 |
437 | bestFrame = frames[frameIndex];
438 | bestFrameSize = frameSize;
439 | }
440 | }
441 |
442 | if(bestFrame)
443 | {
444 | var newBody = document.createElement('body');
445 | newBody.innerHTML = bestFrame.contentWindow.document.body.innerHTML;
446 | newBody.style.overflow = 'scroll';
447 | document.body = newBody;
448 |
449 | var frameset = document.getElementsByTagName('frameset')[0];
450 | if(frameset) {
451 | frameset.parentNode.removeChild(frameset); }
452 | }
453 | }
454 |
455 | /* Remove all stylesheets */
456 | for (var k=0;k < document.styleSheets.length; k+=1) {
457 | if (document.styleSheets[k].href !== null && document.styleSheets[k].href.lastIndexOf("readability") === -1) {
458 | document.styleSheets[k].disabled = true;
459 | }
460 | }
461 |
462 | /* Remove all style tags in head (not doing this on IE) - TODO: Why not? */
463 | var styleTags = document.getElementsByTagName("style");
464 | for (var st=0;st < styleTags.length; st+=1) {
465 | styleTags[st].textContent = "";
466 | }
467 |
468 | /* Turn all double br's into p's */
469 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
470 | document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '
').replace(readability.regexps.replaceFonts, '<$1span>');
471 | },
472 |
473 | /**
474 | * For easier reading, convert this document to have footnotes at the bottom rather than inline links.
475 | * @see http://www.roughtype.com/archives/2010/05/experiments_in.php
476 | *
477 | * @return void
478 | **/
479 | addFootnotes: function(articleContent) {
480 | var footnotesWrapper = document.getElementById('readability-footnotes'),
481 | articleFootnotes = document.getElementById('readability-footnotes-list');
482 |
483 | if(!footnotesWrapper) {
484 | footnotesWrapper = document.createElement("DIV");
485 | footnotesWrapper.id = 'readability-footnotes';
486 | footnotesWrapper.innerHTML = '
";
1410 |
1411 | articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup;
1412 | return;
1413 | }
1414 |
1415 | /**
1416 | * Now that we've built the article page DOM element, get the page content
1417 | * asynchronously and load the cleaned content into the div we created for it.
1418 | **/
1419 | (function(pageUrl, thisPage) {
1420 | readability.ajax(pageUrl, {
1421 | success: function(r) {
1422 |
1423 | /* First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page. */
1424 | var eTag = r.getResponseHeader('ETag');
1425 | if(eTag) {
1426 | if(eTag in readability.pageETags) {
1427 | dbg("Exact duplicate page found via ETag. Aborting.");
1428 | articlePage.style.display = 'none';
1429 | return;
1430 | } else {
1431 | readability.pageETags[eTag] = 1;
1432 | }
1433 | }
1434 |
1435 | // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away.
1436 | var page = document.createElement("DIV");
1437 |
1438 | /**
1439 | * Do some preprocessing to our HTML to make it ready for appending.
1440 | * • Remove any script tags. Swap and reswap newlines with a unicode character because multiline regex doesn't work in javascript.
1441 | * • Turn any noscript tags into divs so that we can parse them. This allows us to find any next page links hidden via javascript.
1442 | * • Turn all double br's into p's - was handled by prepDocument in the original view.
1443 | * Maybe in the future abstract out prepDocument to work for both the original document and AJAX-added pages.
1444 | **/
1445 | var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/.*?<\/script>/gi, '');
1446 | responseHtml = responseHtml.replace(/\n/g,'\uffff').replace(/.*?<\/script>/gi, '');
1447 | responseHtml = responseHtml.replace(/\uffff/g,'\n').replace(/<(\/?)noscript/gi, '<$1div');
1448 | responseHtml = responseHtml.replace(readability.regexps.replaceBrs, '
');
1449 | responseHtml = responseHtml.replace(readability.regexps.replaceFonts, '<$1span>');
1450 |
1451 | page.innerHTML = responseHtml;
1452 |
1453 | /**
1454 | * Reset all flags for the next page, as they will search through it and disable as necessary at the end of grabArticle.
1455 | **/
1456 | readability.flags = 0x1 | 0x2 | 0x4;
1457 |
1458 | var nextPageLink = readability.findNextPageLink(page),
1459 | content = readability.grabArticle(page);
1460 |
1461 | if(!content) {
1462 | dbg("No content found in page to append. Aborting.");
1463 | return;
1464 | }
1465 |
1466 | /**
1467 | * Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
1468 | * Compare it against all of the the previous document's we've gotten. If the previous
1469 | * document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
1470 | **/
1471 | var firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null;
1472 | if(firstP && firstP.innerHTML.length > 100) {
1473 | for(var i=1; i <= readability.curPageNum; i+=1) {
1474 | var rPage = document.getElementById('readability-page-' + i);
1475 | if(rPage && rPage.innerHTML.indexOf(firstP.innerHTML) !== -1) {
1476 | dbg('Duplicate of page ' + i + ' - skipping.');
1477 | articlePage.style.display = 'none';
1478 | readability.parsedPages[pageUrl] = true;
1479 | return;
1480 | }
1481 | }
1482 | }
1483 |
1484 | readability.removeScripts(content);
1485 |
1486 | thisPage.innerHTML = thisPage.innerHTML + content.innerHTML;
1487 |
1488 | /**
1489 | * After the page has rendered, post process the content. This delay is necessary because,
1490 | * in webkit at least, offsetWidth is not set in time to determine image width. We have to
1491 | * wait a little bit for reflow to finish before we can fix floating images.
1492 | **/
1493 | window.setTimeout(
1494 | function() { readability.postProcessContent(thisPage); },
1495 | 500
1496 | );
1497 |
1498 | if(nextPageLink) {
1499 | readability.appendNextPage(nextPageLink);
1500 | }
1501 | }
1502 | });
1503 | }(nextPageLink, articlePage));
1504 | },
1505 |
1506 | /**
1507 | * Get an elements class/id weight. Uses regular expressions to tell if this
1508 | * element looks good or bad.
1509 | *
1510 | * @param Element
1511 | * @return number (Integer)
1512 | **/
1513 | getClassWeight: function (e) {
1514 | if(!readability.flagIsActive(readability.FLAG_WEIGHT_CLASSES)) {
1515 | return 0;
1516 | }
1517 |
1518 | var weight = 0;
1519 |
1520 | /* Look for a special classname */
1521 | if (typeof(e.className) === 'string' && e.className !== '')
1522 | {
1523 | if(e.className.search(readability.regexps.negative) !== -1) {
1524 | weight -= 25; }
1525 |
1526 | if(e.className.search(readability.regexps.positive) !== -1) {
1527 | weight += 25; }
1528 | }
1529 |
1530 | /* Look for a special ID */
1531 | if (typeof(e.id) === 'string' && e.id !== '')
1532 | {
1533 | if(e.id.search(readability.regexps.negative) !== -1) {
1534 | weight -= 25; }
1535 |
1536 | if(e.id.search(readability.regexps.positive) !== -1) {
1537 | weight += 25; }
1538 | }
1539 |
1540 | return weight;
1541 | },
1542 |
1543 | nodeIsVisible: function (node) {
1544 | return (node.offsetWidth !== 0 || node.offsetHeight !== 0) && node.style.display.toLowerCase() !== 'none';
1545 | },
1546 |
1547 | /**
1548 | * Remove extraneous break tags from a node.
1549 | *
1550 | * @param Element
1551 | * @return void
1552 | **/
1553 | killBreaks: function (e) {
1554 | try {
1555 | e.innerHTML = e.innerHTML.replace(readability.regexps.killBreaks,' ');
1556 | }
1557 | catch (eBreaks) {
1558 | dbg("KillBreaks failed - this is an IE bug. Ignoring.: " + eBreaks);
1559 | }
1560 | },
1561 |
1562 | /**
1563 | * Clean a node of all elements of type "tag".
1564 | * (Unless it's a youtube/vimeo video. People love movies.)
1565 | *
1566 | * @param Element
1567 | * @param string tag to clean
1568 | * @return void
1569 | **/
1570 | clean: function (e, tag) {
1571 | var targetList = e.getElementsByTagName( tag );
1572 | var isEmbed = (tag === 'object' || tag === 'embed');
1573 |
1574 | for (var y=targetList.length-1; y >= 0; y-=1) {
1575 | /* Allow youtube and vimeo videos through as people usually want to see those. */
1576 | if(isEmbed) {
1577 | var attributeValues = "";
1578 | for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) {
1579 | attributeValues += targetList[y].attributes[i].value + '|';
1580 | }
1581 |
1582 | /* First, check the elements attributes to see if any of them contain youtube or vimeo */
1583 | if (attributeValues.search(readability.regexps.videos) !== -1) {
1584 | continue;
1585 | }
1586 |
1587 | /* Then check the elements inside this element for the same. */
1588 | if (targetList[y].innerHTML.search(readability.regexps.videos) !== -1) {
1589 | continue;
1590 | }
1591 |
1592 | }
1593 |
1594 | targetList[y].parentNode.removeChild(targetList[y]);
1595 | }
1596 | },
1597 |
1598 | /**
1599 | * Clean an element of all tags of type "tag" if they look fishy.
1600 | * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
1601 | *
1602 | * @return void
1603 | **/
1604 | cleanConditionally: function (e, tag) {
1605 |
1606 | if(!readability.flagIsActive(readability.FLAG_CLEAN_CONDITIONALLY)) {
1607 | return;
1608 | }
1609 |
1610 | var tagsList = e.getElementsByTagName(tag);
1611 | var curTagsLength = tagsList.length;
1612 |
1613 | /**
1614 | * Gather counts for other typical elements embedded within.
1615 | * Traverse backwards so we can remove nodes at the same time without effecting the traversal.
1616 | *
1617 | * TODO: Consider taking into account original contentScore here.
1618 | **/
1619 | for (var i=curTagsLength-1; i >= 0; i-=1) {
1620 | var weight = readability.getClassWeight(tagsList[i]);
1621 | var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0;
1622 |
1623 | dbg("Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")" + ((typeof tagsList[i].readability !== 'undefined') ? (" with score " + tagsList[i].readability.contentScore) : ''));
1624 |
1625 | if(weight+contentScore < 0)
1626 | {
1627 | tagsList[i].parentNode.removeChild(tagsList[i]);
1628 | }
1629 | else if ( readability.getCharCount(tagsList[i],',') < 10) {
1630 | /**
1631 | * If there are not very many commas, and the number of
1632 | * non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
1633 | **/
1634 | var p = tagsList[i].getElementsByTagName("p").length;
1635 | var img = tagsList[i].getElementsByTagName("img").length;
1636 | var li = tagsList[i].getElementsByTagName("li").length-100;
1637 | var input = tagsList[i].getElementsByTagName("input").length;
1638 |
1639 | var embedCount = 0;
1640 | var embeds = tagsList[i].getElementsByTagName("embed");
1641 | for(var ei=0,il=embeds.length; ei < il; ei+=1) {
1642 | if (embeds[ei].src.search(readability.regexps.videos) === -1) {
1643 | embedCount+=1;
1644 | }
1645 | }
1646 |
1647 | var linkDensity = readability.getLinkDensity(tagsList[i]);
1648 | var contentLength = readability.getInnerText(tagsList[i]).length;
1649 | var toRemove = false;
1650 |
1651 | if ( img > p ) {
1652 | toRemove = true;
1653 | } else if(li > p && tag !== "ul" && tag !== "ol") {
1654 | toRemove = true;
1655 | } else if( input > Math.floor(p/3) ) {
1656 | toRemove = true;
1657 | } else if(contentLength < 25 && (img === 0 || img > 2) ) {
1658 | toRemove = true;
1659 | } else if(weight < 25 && linkDensity > 0.2) {
1660 | toRemove = true;
1661 | } else if(weight >= 25 && linkDensity > 0.5) {
1662 | toRemove = true;
1663 | } else if((embedCount === 1 && contentLength < 75) || embedCount > 1) {
1664 | toRemove = true;
1665 | }
1666 |
1667 | if(toRemove) {
1668 | tagsList[i].parentNode.removeChild(tagsList[i]);
1669 | }
1670 | }
1671 | }
1672 | },
1673 |
1674 | /**
1675 | * Clean out spurious headers from an Element. Checks things like classnames and link density.
1676 | *
1677 | * @param Element
1678 | * @return void
1679 | **/
1680 | cleanHeaders: function (e) {
1681 | for (var headerIndex = 1; headerIndex < 3; headerIndex+=1) {
1682 | var headers = e.getElementsByTagName('h' + headerIndex);
1683 | for (var i=headers.length-1; i >=0; i-=1) {
1684 | if (readability.getClassWeight(headers[i]) < 0 || readability.getLinkDensity(headers[i]) > 0.33) {
1685 | headers[i].parentNode.removeChild(headers[i]);
1686 | }
1687 | }
1688 | }
1689 | },
1690 |
1691 | /*** Smooth scrolling logic ***/
1692 |
1693 | /**
1694 | * easeInOut animation algorithm - returns an integer that says how far to move at this point in the animation.
1695 | * Borrowed from jQuery's easing library.
1696 | * @return integer
1697 | **/
1698 | easeInOut: function(start,end,totalSteps,actualStep) {
1699 | var delta = end - start;
1700 |
1701 | if ((actualStep/=totalSteps/2) < 1) {
1702 | return delta/2*actualStep*actualStep + start;
1703 | }
1704 | actualStep -=1;
1705 | return -delta/2 * ((actualStep)*(actualStep-2) - 1) + start;
1706 | },
1707 |
1708 | /**
1709 | * Helper function to, in a cross compatible way, get or set the current scroll offset of the document.
1710 | * @return mixed integer on get, the result of window.scrollTo on set
1711 | **/
1712 | scrollTop: function(scroll){
1713 | var setScroll = typeof scroll !== 'undefined';
1714 |
1715 | if(setScroll) {
1716 | return window.scrollTo(0, scroll);
1717 | }
1718 | if(typeof window.pageYOffset !== 'undefined') {
1719 | return window.pageYOffset;
1720 | }
1721 | else if(document.documentElement.clientHeight) {
1722 | return document.documentElement.scrollTop;
1723 | }
1724 | else {
1725 | return document.body.scrollTop;
1726 | }
1727 | },
1728 |
1729 | /**
1730 | * scrollTo - Smooth scroll to the point of scrollEnd in the document.
1731 | * @return void
1732 | **/
1733 | curScrollStep: 0,
1734 | scrollTo: function (scrollStart, scrollEnd, steps, interval) {
1735 | if(
1736 | (scrollStart < scrollEnd && readability.scrollTop() < scrollEnd) ||
1737 | (scrollStart > scrollEnd && readability.scrollTop() > scrollEnd)
1738 | ) {
1739 | readability.curScrollStep+=1;
1740 | if(readability.curScrollStep > steps) {
1741 | return;
1742 | }
1743 |
1744 | var oldScrollTop = readability.scrollTop();
1745 |
1746 | readability.scrollTop(readability.easeInOut(scrollStart, scrollEnd, steps, readability.curScrollStep));
1747 |
1748 | // We're at the end of the window.
1749 | if(oldScrollTop === readability.scrollTop()) {
1750 | return;
1751 | }
1752 |
1753 | window.setTimeout(function() {
1754 | readability.scrollTo(scrollStart, scrollEnd, steps, interval);
1755 | }, interval);
1756 | }
1757 | },
1758 |
1759 |
1760 | /**
1761 | * Show the email popup.
1762 | *
1763 | * @return void
1764 | **/
1765 | emailBox: function () {
1766 | var emailContainerExists = document.getElementById('email-container');
1767 | if(null !== emailContainerExists)
1768 | {
1769 | return;
1770 | }
1771 |
1772 | var emailContainer = document.createElement("DIV");
1773 | emailContainer.setAttribute('id', 'email-container');
1774 | emailContainer.innerHTML = '';
1775 |
1776 | document.body.appendChild(emailContainer);
1777 | },
1778 |
1779 | /**
1780 | * Close the email popup. This is a hacktackular way to check if we're in a "close loop".
1781 | * Since we don't have crossdomain access to the frame, we can only know when it has
1782 | * loaded again. If it's loaded over 3 times, we know to close the frame.
1783 | *
1784 | * @return void
1785 | **/
1786 | removeFrame: function () {
1787 | readability.iframeLoads+=1;
1788 | if (readability.iframeLoads > 3)
1789 | {
1790 | var emailContainer = document.getElementById('email-container');
1791 | if (null !== emailContainer) {
1792 | emailContainer.parentNode.removeChild(emailContainer);
1793 | }
1794 |
1795 | readability.iframeLoads = 0;
1796 | }
1797 | },
1798 |
1799 | htmlspecialchars: function (s) {
1800 | if (typeof(s) === "string") {
1801 | s = s.replace(/&/g, "&");
1802 | s = s.replace(/"/g, """);
1803 | s = s.replace(/'/g, "'");
1804 | s = s.replace(//g, ">");
1806 | }
1807 |
1808 | return s;
1809 | },
1810 |
1811 | flagIsActive: function(flag) {
1812 | return (readability.flags & flag) > 0;
1813 | },
1814 |
1815 | addFlag: function(flag) {
1816 | readability.flags = readability.flags | flag;
1817 | },
1818 |
1819 | removeFlag: function(flag) {
1820 | readability.flags = readability.flags & ~flag;
1821 | }
1822 |
1823 | };
1824 |
1825 | readability.init();
1826 |
--------------------------------------------------------------------------------