├── VERSION.txt ├── .gitignore ├── tests ├── runserver.bat ├── data.js ├── test_site │ ├── ajax_content.html │ ├── img_input.html │ ├── page1.html │ ├── page4.html │ ├── loop1.html │ ├── loop2.html │ ├── jquery_versions.html │ ├── page2.html │ ├── page3.html │ ├── index.html │ ├── duplicates.html │ ├── ready.html │ ├── csv_page.html │ ├── pjscrape_client.js │ ├── jquery-1.3.1.js │ └── jquery-1.4.1.min.js ├── base_config.js ├── test_img_input.js ├── test_ready.js ├── test_loadscript.js ├── test_recursive_nomaxdepth.js ├── test_recursive_maxdepth.js ├── test_recursive_noloop.js ├── test_basic.js ├── test_prescrape.js ├── test_recursive_allowrepeat.js ├── test_multiple_urls.js ├── test_multiple_suites.js ├── test_csv_autofields.js ├── test_jquery_versions.js ├── test_csv.js ├── test_ignore_duplicates.js ├── test_ignore_duplicates_id.js ├── test_csv_autofields_obj.js ├── test_syntax.js └── runtests.py ├── bin └── pjscrape.bat ├── LICENSE.txt ├── README.md ├── client └── pjscrape_client.js ├── lib └── md5.js └── pjscrape.js /VERSION.txt: -------------------------------------------------------------------------------- 1 | 0.1 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo -------------------------------------------------------------------------------- /tests/runserver.bat: -------------------------------------------------------------------------------- 1 | python -m SimpleHTTPServer 8888 -------------------------------------------------------------------------------- /tests/data.js: -------------------------------------------------------------------------------- 1 | var myVar = "test1"; 2 | _pjs.myVar = "test2"; -------------------------------------------------------------------------------- /tests/test_site/ajax_content.html: -------------------------------------------------------------------------------- 1 |
Page 1 content
10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/test_site/page4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |Page 4 content
10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/test_site/loop1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |Page 2 content
10 | 11 |
Page 3 content
10 | 11 |
| 1 | 12 |string | 13 |string's | 14 |a "quoted" string | 15 |111 | 16 |
| 2 | 19 |string | 20 |string's | 21 |a "quoted" string | 22 |222 | 23 |
| 3 | 26 |string | 27 |string's | 28 |a "quoted" string | 29 |333 | 30 |
_pjs or window._pjs.
11 | * @name _pjs
12 | */
13 | window._pjs = (function($) {
14 |
15 | // munge the location
16 | var loc = window.location,
17 | base = loc.protocol + '//' + loc.hostname + (loc.port ? ':' + loc.port : ''),
18 | path = loc.pathname.split('/').slice(0,-1).join('/') + '/';
19 |
20 | /**
21 | * Check whether a URL is local to this site
22 | * @name _pjs.isLocalUrl
23 | * @param {String} url URL to check
24 | * @return {Boolean} Whether this URL is local
25 | */
26 | function isLocalUrl(url) {
27 | return !url.match(/^(https?:\/\/|mailto:)/) || url.indexOf(base) === 0;
28 | }
29 |
30 | /**
31 | * Convert a local URL to a fully qualified URL (with domain name, etc)
32 | * @name _pjs.toFullUrl
33 | * @param {String} url URL to convert
34 | * @return {String} Fully qualified URL
35 | */
36 | function toFullUrl(url) {
37 | // non-existent, or fully qualified already
38 | if (!url || url.indexOf(base) === 0 || !isLocalUrl(url)) return url;
39 | // absolute url
40 | if (url[0] == '/') return base + url;
41 | // relative url - browser can figure out ..
42 | return base + path + url;
43 | }
44 |
45 | /**
46 | * Convenience function - find all anchor tags on the page matching the given
47 | * selector (or jQuery object) and return an array of fully qualified URLs
48 | * @name _pjs.getAnchorUrls
49 | * @param {String|jQuery} selector Selector or jQuery object to find anchor elements
50 | * @param {Boolean} includeOffsite Whether to include off-site links
51 | * @return {String[]} Array of fully qualified URLs
52 | */
53 | function getAnchorUrls(selector, includeOffsite) {
54 | return $(selector).map(function() {
55 | var href = $(this).attr('href');
56 | return (href && href.indexOf('#') !== 0 && (includeOffsite || isLocalUrl(href))) ?
57 | toFullUrl(href) : undefined;
58 | }).toArray();
59 | }
60 |
61 | /**
62 | * Convenience function - find all tags on the page matching the given
63 | * selector (or jQuery object) and return inner text for each
64 | * @name _pjs.getText
65 | * @param {String|jQuery} selector Selector or jQuery object to find elements
66 | * @return {String[]} Array of text contents
67 | */
68 | function getText(selector) {
69 | return $(selector).map(function() {
70 | return $(this).text();
71 | }).toArray();
72 | }
73 |
74 | /**
75 | * Flag that will be set to true when $(document).ready is called.
76 | * Generally your code will not need to deal with this - use the "ready"
77 | * configuration parameter instead.
78 | * @type Boolean
79 | * @name _pjs.ready
80 | */
81 |
82 | return {
83 | isLocalUrl: isLocalUrl,
84 | toFullUrl: toFullUrl,
85 | getAnchorUrls: getAnchorUrls,
86 | getText: getText,
87 | /**
88 | * Reference to jQuery. This is guaranteed to be
89 | * the pjscrape.js version of the jQuery library.
90 | * Scrapers using the 'noConflict' config option
91 | * should use this reference in their code.
92 | * @type jQuery
93 | * @name _pjs.$
94 | */
95 | '$': $
96 | };
97 | }(_pjs$));
98 |
99 | // bind to .ready()
100 | window._pjs.$(function() {
101 | window._pjs.ready = true;
102 | });
103 |
104 | // for reasons I can't fathom, omitting this line throws an
105 | // error on pages with . Go figure.
106 | console.log('Client-side code initialized');
--------------------------------------------------------------------------------
/tests/runtests.py:
--------------------------------------------------------------------------------
1 | import SimpleHTTPServer
2 | import SocketServer
3 | import threading
4 | import unittest
5 | import subprocess
6 | import os
7 |
8 | PORT = 8888
9 | COMMAND_BASE = ["pyphantomjs", os.path.join('..', 'pjscrape.js'), 'base_config.js']
10 |
11 | def getPjscrapeOutput(script_name):
12 | return subprocess.check_output(COMMAND_BASE + [script_name]).strip()
13 |
14 | class QuietHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
15 | def log_message(self, format, *args):
16 | pass
17 |
18 |
19 | class TestPjscrape(unittest.TestCase):
20 |
21 | @classmethod
22 | def setUpClass(cls):
23 | # set up server
24 | cls.httpd = SocketServer.TCPServer(("", PORT), QuietHTTPRequestHandler)
25 | httpd_thread = threading.Thread(target=cls.httpd.serve_forever)
26 | httpd_thread.setDaemon(True)
27 | httpd_thread.start()
28 |
29 | @classmethod
30 | def tearDownClass(cls):
31 | # tear down server
32 | cls.httpd.shutdown()
33 |
34 | def test_basic(self):
35 | out = getPjscrapeOutput('test_basic.js')
36 | self.assertEqual(out, '["Test Page: Index","Page 1","Page 2"]')
37 |
38 | def test_multiple_urls(self):
39 | out = getPjscrapeOutput('test_multiple_urls.js')
40 | self.assertEqual(out, '["Test Page: Index","Test Page: Page 1","Test Page: Page 2"]')
41 |
42 | def test_multiple_suites(self):
43 | out = getPjscrapeOutput('test_multiple_suites.js')
44 | self.assertEqual(out, '["Test Page: Index","Test Page: Page 1","Test Page: Page 2"]')
45 |
46 | def test_recursive_maxdepth(self):
47 | out = getPjscrapeOutput('test_recursive_maxdepth.js')
48 | self.assertEqual(out, '["Test Page: Index","Test Page: Page 1","Test Page: Page 2"]')
49 |
50 | def test_recursive_nomaxdepth(self):
51 | out = getPjscrapeOutput('test_recursive_nomaxdepth.js')
52 | self.assertEqual(out, '["Test Page: Index","Test Page: Page 1","Test Page: Page 2","Test Page: Page 3","Test Page: Page 4"]')
53 |
54 | def test_recursive_noloop(self):
55 | out = getPjscrapeOutput('test_recursive_noloop.js')
56 | self.assertEqual(out, '["Test Page: Loop 1","Test Page: Loop 2"]')
57 |
58 | def test_recursive_allowrepeat(self):
59 | out = getPjscrapeOutput('test_recursive_allowrepeat.js')
60 | self.assertEqual(out, '["Test Page: Loop 1","Test Page: Loop 2","Test Page: Loop 1","Test Page: Loop 2","Test Page: Loop 1"]')
61 |
62 | def test_csv(self):
63 | out = getPjscrapeOutput('test_csv.js')
64 | # not sure why stdout uses \r\r\n, but that seems to be the case
65 | self.assertEqual(out, '"a","b","c","d","e"\r\r\n"1","string","string\'s","a ""quoted"" string","111"\r\r\n"2","string","string\'s","a ""quoted"" string","222"\r\r\n"3","string","string\'s","a ""quoted"" string","333"')
66 |
67 | def test_csv_autofields(self):
68 | out = getPjscrapeOutput('test_csv_autofields.js')
69 | self.assertEqual(out, '"Column 1","Column 2","Column 3","Column 4","Column 5"\r\r\n"1","string","string\'s","a ""quoted"" string","111"\r\r\n"2","string","string\'s","a ""quoted"" string","222"\r\r\n"3","string","string\'s","a ""quoted"" string","333"')
70 |
71 | def test_csv_autofields_obj(self):
72 | out = getPjscrapeOutput('test_csv_autofields_obj.js')
73 | self.assertEqual(out, '"a","b","c","d","e"\r\r\n"1","string","string\'s","a ""quoted"" string","111"\r\r\n"2","string","string\'s","a ""quoted"" string","222"\r\r\n"3","string","string\'s","a ""quoted"" string","333"')
74 |
75 | def test_prescrape(self):
76 | out = getPjscrapeOutput('test_prescrape.js')
77 | self.assertEqual(out, '["test1","test2"]')
78 |
79 | def test_loadscript(self):
80 | out = getPjscrapeOutput('test_loadscript.js')
81 | self.assertEqual(out, '["test1","test2"]')
82 |
83 | def test_syntax(self):
84 | out = getPjscrapeOutput('test_syntax.js')
85 | self.assertEqual(out, '["Test Page: Index","Page 1","Test Page: Index","Page 1","Page 2"]')
86 |
87 | def test_ready(self):
88 | out = getPjscrapeOutput('test_ready.js')
89 | self.assertEqual(out, '["Content 1","Content 2"]')
90 |
91 | def test_jquery_versions(self):
92 | out = getPjscrapeOutput('test_jquery_versions.js')
93 | self.assertEqual(out, '["1.6.1","1.6.1","1.4.1","1.6.1"]')
94 |
95 | def test_ignore_duplicates(self):
96 | out = getPjscrapeOutput('test_ignore_duplicates.js')
97 | self.assertEqual(out, '[{"a":"test","b":"1"},{"a":"test","b":"2"}]')
98 |
99 | def test_ignore_duplicates_id(self):
100 | out = getPjscrapeOutput('test_ignore_duplicates_id.js')
101 | # keys in alphabetical order due to http://code.google.com/p/phantomjs/issues/detail?id=170
102 | self.assertEqual(out, '[{"a":"test","i":0,"id":"1"},{"a":"test","i":1,"id":"2"}]')
103 |
104 | def test_img_input(self):
105 | out = getPjscrapeOutput('test_img_input.js')
106 | self.assertEqual(out, '["Test Page: Weird Image Input Issue"]')
107 |
108 | if __name__ == '__main__':
109 | # run tests
110 | suite = unittest.TestLoader().loadTestsFromTestCase(TestPjscrape)
111 | unittest.TextTestRunner(verbosity=2).run(suite)
--------------------------------------------------------------------------------
/lib/md5.js:
--------------------------------------------------------------------------------
1 | /*!
2 | * Javascript MD5 implementation by Joseph Myers
3 | * http://www.myersdaily.org/joseph/javascript/md5-text.html
4 | */
5 | var md5 = (function() {
6 | function md5cycle(x, k) {
7 | var a = x[0], b = x[1], c = x[2], d = x[3];
8 |
9 | a = ff(a, b, c, d, k[0], 7, -680876936);
10 | d = ff(d, a, b, c, k[1], 12, -389564586);
11 | c = ff(c, d, a, b, k[2], 17, 606105819);
12 | b = ff(b, c, d, a, k[3], 22, -1044525330);
13 | a = ff(a, b, c, d, k[4], 7, -176418897);
14 | d = ff(d, a, b, c, k[5], 12, 1200080426);
15 | c = ff(c, d, a, b, k[6], 17, -1473231341);
16 | b = ff(b, c, d, a, k[7], 22, -45705983);
17 | a = ff(a, b, c, d, k[8], 7, 1770035416);
18 | d = ff(d, a, b, c, k[9], 12, -1958414417);
19 | c = ff(c, d, a, b, k[10], 17, -42063);
20 | b = ff(b, c, d, a, k[11], 22, -1990404162);
21 | a = ff(a, b, c, d, k[12], 7, 1804603682);
22 | d = ff(d, a, b, c, k[13], 12, -40341101);
23 | c = ff(c, d, a, b, k[14], 17, -1502002290);
24 | b = ff(b, c, d, a, k[15], 22, 1236535329);
25 |
26 | a = gg(a, b, c, d, k[1], 5, -165796510);
27 | d = gg(d, a, b, c, k[6], 9, -1069501632);
28 | c = gg(c, d, a, b, k[11], 14, 643717713);
29 | b = gg(b, c, d, a, k[0], 20, -373897302);
30 | a = gg(a, b, c, d, k[5], 5, -701558691);
31 | d = gg(d, a, b, c, k[10], 9, 38016083);
32 | c = gg(c, d, a, b, k[15], 14, -660478335);
33 | b = gg(b, c, d, a, k[4], 20, -405537848);
34 | a = gg(a, b, c, d, k[9], 5, 568446438);
35 | d = gg(d, a, b, c, k[14], 9, -1019803690);
36 | c = gg(c, d, a, b, k[3], 14, -187363961);
37 | b = gg(b, c, d, a, k[8], 20, 1163531501);
38 | a = gg(a, b, c, d, k[13], 5, -1444681467);
39 | d = gg(d, a, b, c, k[2], 9, -51403784);
40 | c = gg(c, d, a, b, k[7], 14, 1735328473);
41 | b = gg(b, c, d, a, k[12], 20, -1926607734);
42 |
43 | a = hh(a, b, c, d, k[5], 4, -378558);
44 | d = hh(d, a, b, c, k[8], 11, -2022574463);
45 | c = hh(c, d, a, b, k[11], 16, 1839030562);
46 | b = hh(b, c, d, a, k[14], 23, -35309556);
47 | a = hh(a, b, c, d, k[1], 4, -1530992060);
48 | d = hh(d, a, b, c, k[4], 11, 1272893353);
49 | c = hh(c, d, a, b, k[7], 16, -155497632);
50 | b = hh(b, c, d, a, k[10], 23, -1094730640);
51 | a = hh(a, b, c, d, k[13], 4, 681279174);
52 | d = hh(d, a, b, c, k[0], 11, -358537222);
53 | c = hh(c, d, a, b, k[3], 16, -722521979);
54 | b = hh(b, c, d, a, k[6], 23, 76029189);
55 | a = hh(a, b, c, d, k[9], 4, -640364487);
56 | d = hh(d, a, b, c, k[12], 11, -421815835);
57 | c = hh(c, d, a, b, k[15], 16, 530742520);
58 | b = hh(b, c, d, a, k[2], 23, -995338651);
59 |
60 | a = ii(a, b, c, d, k[0], 6, -198630844);
61 | d = ii(d, a, b, c, k[7], 10, 1126891415);
62 | c = ii(c, d, a, b, k[14], 15, -1416354905);
63 | b = ii(b, c, d, a, k[5], 21, -57434055);
64 | a = ii(a, b, c, d, k[12], 6, 1700485571);
65 | d = ii(d, a, b, c, k[3], 10, -1894986606);
66 | c = ii(c, d, a, b, k[10], 15, -1051523);
67 | b = ii(b, c, d, a, k[1], 21, -2054922799);
68 | a = ii(a, b, c, d, k[8], 6, 1873313359);
69 | d = ii(d, a, b, c, k[15], 10, -30611744);
70 | c = ii(c, d, a, b, k[6], 15, -1560198380);
71 | b = ii(b, c, d, a, k[13], 21, 1309151649);
72 | a = ii(a, b, c, d, k[4], 6, -145523070);
73 | d = ii(d, a, b, c, k[11], 10, -1120210379);
74 | c = ii(c, d, a, b, k[2], 15, 718787259);
75 | b = ii(b, c, d, a, k[9], 21, -343485551);
76 |
77 | x[0] = add32(a, x[0]);
78 | x[1] = add32(b, x[1]);
79 | x[2] = add32(c, x[2]);
80 | x[3] = add32(d, x[3]);
81 |
82 | }
83 |
84 | function cmn(q, a, b, x, s, t) {
85 | a = add32(add32(a, q), add32(x, t));
86 | return add32((a << s) | (a >>> (32 - s)), b);
87 | }
88 |
89 | function ff(a, b, c, d, x, s, t) {
90 | return cmn((b & c) | ((~b) & d), a, b, x, s, t);
91 | }
92 |
93 | function gg(a, b, c, d, x, s, t) {
94 | return cmn((b & d) | (c & (~d)), a, b, x, s, t);
95 | }
96 |
97 | function hh(a, b, c, d, x, s, t) {
98 | return cmn(b ^ c ^ d, a, b, x, s, t);
99 | }
100 |
101 | function ii(a, b, c, d, x, s, t) {
102 | return cmn(c ^ (b | (~d)), a, b, x, s, t);
103 | }
104 |
105 | function md51(s) {
106 | txt = '';
107 | var n = s.length,
108 | state = [1732584193, -271733879, -1732584194, 271733878], i;
109 | for (i=64; i<=s.length; i+=64) {
110 | md5cycle(state, md5blk(s.substring(i-64, i)));
111 | }
112 | s = s.substring(i-64);
113 | var tail = [0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0];
114 | for (i=0; iScraping harness for PhantomJS. Requires PhantomJS or PyPhantomJS v.1.2 9 | * (with saveToFile() support, if you want to use the file writer or logger).
10 | * 11 | * @name pjscrape.js 12 | * @author Nick Rabinowitz (www.nickrabinowitz.com) 13 | * @version 0.1 14 | */ 15 | 16 | /* 17 | TODO: 18 | - Get the HTTP response code for the loaded page and fail if it's 40x or 50x 19 | - tests for client utilities? 20 | - docs for the Logger, Formatter, Writer, and HashFunction interfaces? 21 | - test for file writes 22 | - Some sort of test harness (as a bookmarklet, maybe?) to do client-side scraper dev 23 | (could call in a file that's hosted on github, or just do the whole thing in 24 | a bookmarklet - not much code I think) - I'm thinking either pop-up window or just 25 | code injection + console. pjs.addSuite or pjs.addScraper would run immediately, returning 26 | scraper results. pjs.config() would be moot, I think. 27 | - Better docs, obviously. 28 | */ 29 | 30 | phantom.injectJs('lib/md5.js'); 31 | 32 | function fail(msg) { 33 | console.log('FATAL ERROR: ' + msg); 34 | phantom.exit(); 35 | }; 36 | 37 | /** 38 | * @namespace 39 | * Root namespace for PhantomJS-side code 40 | * @name pjs 41 | */ 42 | var pjs = (function(){ 43 | var config = { 44 | timeoutInterval: 100, 45 | timeoutLimit: 3000, 46 | log: 'stdout', 47 | writer: 'stdout', 48 | format: 'json', 49 | logFile: 'pjscrape_log.txt', 50 | outFile: 'pjscrape_out.txt' 51 | }; 52 | 53 | var suites = []; 54 | 55 | 56 | // utils 57 | function isFunction(f) { 58 | return typeof f === 'function'; 59 | } 60 | function funcify(f) { 61 | return isFunction(f) ? f : function() { return f }; 62 | } 63 | function isArray(a) { 64 | return Array.isArray(a); 65 | } 66 | function arrify(a) { 67 | return isArray(a) ? a : a ? [a] : []; 68 | } 69 | function getKeys(o) { 70 | var keys = []; 71 | for (var key in o) keys.push(key); 72 | return keys; 73 | } 74 | function extend(obj) { 75 | Array.prototype.slice.call(arguments, 1).forEach(function(source) { 76 | for (var prop in source) { 77 | if (source[prop] !== void 0) obj[prop] = source[prop]; 78 | } 79 | }); 80 | return obj; 81 | }; 82 | 83 | /** 84 | * @name pjs.loggers 85 | * @namespace 86 | * Logger namespace. You can add new loggers here; new logger classes 87 | * should probably extend pjs.loggers.base and redefine the 88 | *log method.
89 | * @example
90 | // create a new logger
91 | pjs.loggers.myLogger = function() {
92 | return new pjs.loggers.base(function(msg) {
93 | // do some special logging stuff
94 | });
95 | };
96 | // tell pjscrape to use your logger
97 | pjs.config({
98 | log: 'myLogger'
99 | });
100 | */
101 | var loggers = {
102 |
103 | /**
104 | * @name pjs.loggers.base
105 | * @class Abstract base logger class
106 | * @private
107 | */
108 | base: function(logf) {
109 | var log = this;
110 | log.log = logf || function(msg) { console.log(msg) };
111 | log.msg = function(msg) { log.log('* ' + msg) };
112 | log.alert = function(msg) { log.log('! ' + msg) };
113 | log.error = function(msg) { log.log('ERROR: ' + msg) };
114 | },
115 |
116 | /**
117 | * Log to config.logFile
118 | * @name pjs.loggers.file
119 | * @type Logger
120 | */
121 | file: function() {
122 | return new loggers.base(function(msg) {
123 | phantom.saveToFile(msg + "\n", config.logFile, 'a');
124 | });
125 | },
126 |
127 | /**
128 | * Disable logging
129 | * @name pjs.loggers.none
130 | * @type Logger
131 | */
132 | none: function() {
133 | return new loggers.base(function() {});
134 | }
135 | };
136 |
137 | /**
138 | * Log to STDOUT
139 | * @name pjs.loggers.stdout
140 | * @type Logger
141 | */
142 | loggers.stdout = loggers.base;
143 |
144 | /**
145 | * @name pjs.formatters
146 | * @namespace
147 | * Formatter namespace. You can add new formatters here; new formatter classes
148 | * should have the properties start, end, and
149 | * delimiter, and the method format(item). You might
150 | * save some time by inheriting from formatters.raw or formatters.json.
151 | * @example
152 | // create a new formatter
153 | pjs.formatters.pipe = function() {
154 | var f = new pjs.formatters.raw();
155 | f.delimiter = '|';
156 | return f;
157 | };
158 | // tell pjscrape to use your formatter
159 | pjs.config({
160 | format: 'pipe'
161 | });
162 | */
163 | var formatters = {
164 |
165 | /**
166 | * Raw formatter - just uses toString()
167 | * @name pjs.formatters.raw
168 | * @type Formatter
169 | */
170 | raw: function() {
171 | var f = this;
172 | f.start = f.end = f.delimiter = '';
173 | f.format = function(item) {
174 | return item.toString();
175 | };
176 | },
177 |
178 | /**
179 | * Format output as a JSON array
180 | * @name pjs.formatters.json
181 | * @type Formatter
182 | */
183 | json: function() {
184 | var f = this;
185 | f.start = '[';
186 | f.end = ']';
187 | f.delimiter = ',';
188 | f.format = function(item) {
189 | return JSON.stringify(item);
190 | };
191 | },
192 |
193 | /**
194 | * CSV formatter - takes arrays or objects, fields defined by
195 | * config.csvFields or auto-generated based on first item
196 | * @name pjs.formatters.csv
197 | * @type Formatter
198 | */
199 | csv: function() {
200 | var f = this,
201 | fields = config.csvFields,
202 | makeRow = function(a) { return a.map(JSON.stringify).join(',') };
203 |
204 | f.delimiter = "\r\n";
205 | f.start = fields ? makeRow(fields) + f.delimiter : '';
206 | f.end = '';
207 | f.format = function(item) {
208 | if (item && typeof item == 'object') {
209 | var out = '';
210 | // make fields if not defined
211 | if (!fields) {
212 | if (isArray(item)) {
213 | fields = [];
214 | for (var i=0; iwrite method.
250 | * Items returned by scrapers will be added to the output via
251 | * Writer.add(item), which can take any type of object. If
252 | * an array is provided, multipled items will be added.
253 | * @example
254 | // create a new writer
255 | pjs.writer.myWriter = function(log) {
256 | var w = new pjs.writers.base(log);
257 | w.write = function(s) {
258 | // write s to some special place
259 | }
260 | return w;
261 | };
262 | // tell pjscrape to use your writer
263 | pjs.config({
264 | writer: 'myWriter'
265 | });
266 | */
267 | var writers = {
268 | /**
269 | * @name pjs.writers.base
270 | * @class Abstract base writer class
271 | * @private
272 | */
273 | base: function(log) {
274 | var w = this,
275 | count = 0,
276 | items = [],
277 | batchSize = config.batchSize,
278 | format = config.format || 'json',
279 | firstWrite = true,
280 | lastWrite = false;
281 |
282 | // init formatter
283 | var formatter = new formatters[format]();
284 |
285 | // write output
286 | var writeBatch = function(batch) {
287 | log.msg('Writing ' + batch.length + ' items');
288 | w.write(
289 | (firstWrite ? formatter.start : formatter.delimiter) +
290 | batch.map(formatter.format).join(formatter.delimiter) +
291 | (lastWrite ? formatter.end : '')
292 | );
293 | firstWrite = false;
294 | };
295 |
296 | /**
297 | * Add an item to be written to output
298 | * @name pjs.writers.base#add
299 | * @function
300 | * @param {Object|String|Array} Item to add
301 | */
302 | w.add = function(i) {
303 | // add to items
304 | if (i) {
305 | i = arrify(i);
306 | items = items.concat(i);
307 | count += i.length;
308 | // write if necessary
309 | if (batchSize && items.length > batchSize) {
310 | writeBatch(items.splice(0, batchSize));
311 | }
312 | }
313 | };
314 |
315 | /**
316 | * Finish up writing output
317 | * @name pjs.writers.base#finish
318 | * @function
319 | */
320 | w.finish = function() {
321 | lastWrite = true;
322 | writeBatch(items);
323 | };
324 |
325 | /**
326 | * Get the number of items written to output
327 | * @name pjs.writers.base#count
328 | * @function
329 | * @return {Number} Number of items written
330 | */
331 | w.count = function() {
332 | return count;
333 | };
334 |
335 | /**
336 | * Write a string to output
337 | * @name pjs.writers.base#write
338 | * @function
339 | * @param {String} s String to write
340 | */
341 | w.write = function(s) {
342 | console.log(s);
343 | };
344 | },
345 |
346 | /**
347 | * Writes output to config.outFile
348 | * @name pjs.writers.file
349 | * @type Writer
350 | */
351 | file: function(log) {
352 | var w = new writers.base(log);
353 | // clear file
354 | phantom.saveToFile('', config.outFile, 'w');
355 | // write method
356 | w.write = function(s) {
357 | phantom.saveToFile(s, config.outFile, 'a');
358 | };
359 | return w;
360 | },
361 |
362 | /**
363 | * Writes output to one file per item. Items may be provided
364 | * in the format { filename: "file.txt", content: "string" }
365 | * if you'd like to specify the filename in the scraper. Otherwise,
366 | * files are written to config.outFile with serial numbering.
367 | * @name pjs.writers.itemfile
368 | * @type Writer
369 | */
370 | itemfile: function(log) {
371 | var w = this,
372 | count = 0,
373 | format = config.format || 'raw',
374 | formatter = new formatters[format]();
375 |
376 | w.add = function(items) {
377 | // add to items
378 | if (items) {
379 | items = arrify(items);
380 | // write to separate files
381 | items.forEach(function(item) {
382 | var filename;
383 | // support per-item filename syntax
384 | if (item.filename && item.content) {
385 | filename = item.filename;
386 | item = item.content;
387 | }
388 | // otherwise add a serial number to config.outFile
389 | else {
390 | var fileparts = config.outFile.split('.'),
391 | ext = fileparts.pop();
392 | filename = fileparts.join('.') + '-' + (count++) + '.' + ext;
393 | }
394 | phantom.saveToFile(formatter.format(item), filename, 'w');
395 | count++;
396 | });
397 | }
398 | };
399 |
400 | w.finish = function() {};
401 |
402 | w.count = function() {
403 | return count;
404 | };
405 | },
406 | };
407 |
408 | /**
409 | * Write output to STDOUT
410 | * @name pjs.writers.stdout
411 | * @type Writer
412 | */
413 | writers.stdout = writers.base;
414 |
415 | /**
416 | * @name pjs.hashFunctions
417 | * @namespace
418 | * Hash function namespace. You can add new hash functions here; hash functions
419 | * should take an item and return a unique (or unique-enough) string.
420 | * @example
421 | // create a new hash function
422 | pjs.hashFunctions.myHash = function(item) {
423 | return item.mySpecialUID;
424 | };
425 | // tell pjscrape to ignore dupes
426 | pjs.config({
427 | ignoreDuplicates: true
428 | });
429 | // tell pjscrape to use your hash function
430 | pjs.addScraper({
431 | hashFunction: 'myHash',
432 | // etc
433 | });
434 | */
435 | var hashFunctions = {
436 | /** UID hash - assumes item.id; falls back on md5
437 | * @name pjs.hashFunctions.id
438 | * @type HashFunction
439 | */
440 | id: function(item) {
441 | return ('id' in item) ? item.id : hashFunctions.md5(item);
442 | },
443 | /** md5 hash - collisions are possible
444 | * @name pjs.hashFunctions.md5
445 | * @type HashFunction
446 | */
447 | md5: function(item) {
448 | return md5(JSON.stringify(item));
449 | }
450 | };
451 |
452 |
453 | // suite runner
454 | var runner = (function() {
455 | var visited = {},
456 | itemHashes = {},
457 | log,
458 | writer;
459 |
460 | /**
461 | * @class
462 | * Singleton: Manage multiple suites
463 | * @private
464 | */
465 | var SuiteManager = new function() {
466 | var mgr = this,
467 | complete,
468 | suiteq = [];
469 |
470 | // create a single WebPage object for reuse
471 | var page = new WebPage();
472 | // set up console output
473 | page.onConsoleMessage = function(msg, line, id) {
474 | id = id || 'injected code';
475 | if (line) msg += ' (' + id + ' line ' + line + ')';
476 | log.msg('CLIENT: ' + msg);
477 | };
478 | page.onAlert = function(msg) { log.alert('CLIENT: ' + msg) };
479 |
480 | mgr.getPage = function() {
481 | return page;
482 | };
483 |
484 | // set the completion callback
485 | mgr.setComplete = function(cb) {
486 | complete = cb;
487 | };
488 |
489 | // add a ScraperSuite
490 | mgr.add = function(suite) {
491 | suiteq.push(suite);
492 | };
493 |
494 | // run the next ScraperSuite in the queue
495 | mgr.runNext = function() {
496 | var suite = suiteq.shift();
497 | if (suite) suite.run();
498 | else complete();
499 | };
500 | }();
501 |
502 | /**
503 | * @class
504 | * Scraper suite class - represents a set of urls to scrape
505 | * @private
506 | * @param {String} title Title for verbose output
507 | * @param {String[]} urls Urls to scrape
508 | * @param {Object} opts Configuration object
509 | */
510 | var ScraperSuite = function(title, urls, opts) {
511 | var s = this,
512 | truef = function() { return true };
513 | // set up options
514 | s.title = title;
515 | s.urls = urls;
516 | s.opts = extend({
517 | ready: function() { return _pjs.ready; },
518 | scrapable: truef,
519 | preScrape: truef,
520 | hashFunction: hashFunctions.id
521 | }, opts);
522 | // deal with potential arrays and syntax variants
523 | s.opts.loadScript = arrify(opts.loadScripts || opts.loadScript);
524 | s.opts.scrapers = arrify(opts.scrapers || opts.scraper);
525 | // set up completion callback
526 | s.complete = function() {
527 | log.msg(s.title + " complete");
528 | SuiteManager.runNext();
529 | };
530 | s.depth = 0;
531 | }
532 |
533 | ScraperSuite.prototype = {
534 |
535 | /**
536 | * Add an item, checking for duplicates as necessary
537 | * @param {Object|Array} items Item(s) to add
538 | * @private
539 | */
540 | addItem: function(items) {
541 | var s = this;
542 | if (items && config.ignoreDuplicates) {
543 | // ensure array
544 | items = arrify(items);
545 | items = items.filter(function(item) {
546 | var hash = s.opts.hashFunction(item);
547 | if (!itemHashes[hash]) {
548 | // hash miss - new item
549 | itemHashes[hash] = true;
550 | return true;
551 | } else {
552 | // hash hit - likely duplicate
553 | // Could do a second-layer check against the actual object,
554 | // but that requires retaining items in memory - skip for now
555 | return false;
556 | }
557 | });
558 | }
559 | writer.add(items);
560 | },
561 |
562 | /**
563 | * Run the suite, scraping each url
564 | * @private
565 | */
566 | run: function() {
567 | var s = this,
568 | scrapers = s.opts.scrapers,
569 | i = 0;
570 | log.msg(s.title + " starting");
571 | // set up scraper functions
572 | var scrapePage = function(page) {
573 | if (page.evaluate(s.opts.scrapable)) {
574 | // load script(s) if necessary
575 | if (s.opts.loadScript) {
576 | s.opts.loadScript.forEach(function(script) {
577 | page.injectJs(script);
578 | })
579 | }
580 | // run prescrape
581 | page.evaluate(s.opts.preScrape);
582 | // run each scraper and send any results to writer
583 | if (scrapers && scrapers.length) {
584 | scrapers.forEach(function(scraper) {
585 | s.addItem(page.evaluate(scraper))
586 | });
587 | }
588 | }
589 | },
590 | // get base URL for avoiding repeat visits and recursion loops
591 | baseUrl = function(url) {
592 | return s.opts.newHashNewPage ? url.split('#')[0] : url;
593 | },
594 | // completion callback
595 | complete = function(page) {
596 | // recurse if necessary
597 | if (page && s.opts.moreUrls) {
598 | // look for more urls on this page
599 | var moreUrls = page.evaluate(s.opts.moreUrls);
600 | if (moreUrls && (!s.opts.maxDepth || s.depth < s.opts.maxDepth)) {
601 | if (moreUrls.length) {
602 | log.msg('Found ' + moreUrls.length + ' additional urls to scrape');
603 | // make a new sub-suite
604 | var ss = new ScraperSuite(s.title + '-sub' + i++, moreUrls, s.opts);
605 | ss.depth = s.depth + 1;
606 | SuiteManager.add(ss);
607 | }
608 | }
609 | }
610 | runNext();
611 | };
612 | // run each
613 | var runCounter = 0
614 | function runNext() {
615 | if (runCounter < s.urls.length) {
616 | url = baseUrl(s.urls[runCounter++]);
617 | // avoid repeat visits
618 | if (!config.allowRepeatUrls && url in visited) {
619 | runNext();
620 | } else {
621 | // scrape this url
622 | s.scrape(url, scrapePage, complete);
623 | }
624 | } else {
625 | s.complete();
626 | }
627 | }
628 | runNext();
629 | },
630 |
631 | /**
632 | * Scrape a single page.
633 | * @param {String} url Url of page to scrape
634 | * @param {Function} scrapePage Function to scrape page with
635 | * @param {Function} complete Callback function to run when complete
636 | * @private
637 | */
638 | scrape: function(url, scrapePage, complete) {
639 | var opts = this.opts,
640 | page = SuiteManager.getPage();
641 | log.msg('Opening ' + url);
642 | // run the scrape
643 | page.open(url, function(status) {
644 | if (status === "success") {
645 | // mark as visited
646 | visited[url] = true;
647 | log.msg('Scraping ' + url);
648 | // load jQuery
649 | page.injectJs('client/jquery.js');
650 | page.evaluate(function() {
651 | window._pjs$ = jQuery.noConflict(true);
652 | });
653 | // load pjscrape client-side code
654 | page.injectJs('client/pjscrape_client.js');
655 | // reset the global jQuery vars
656 | if (!opts.noConflict) {
657 | page.evaluate(function() {
658 | window.$ = window.jQuery = window._pjs$;
659 | });
660 | }
661 | // run scraper(s)
662 | if (page.evaluate(opts.ready)) {
663 | // run immediately
664 | scrapePage(page);
665 | complete(page);
666 | } else {
667 | // poll ready() until timeout or success
668 | var elapsed = 0,
669 | timeoutId = window.setInterval(function() {
670 | if (page.evaluate(opts.ready) || elapsed > config.timeoutLimit) {
671 | if (elapsed > config.timeoutLimit) {
672 | log.alert('Ready timeout after ' + ~~(elapsed / 1000) + ' seconds');
673 | }
674 | scrapePage(page);
675 | window.clearInterval(timeoutId);
676 | complete(page);
677 | } else {
678 | elapsed += config.timeoutInterval;
679 | }
680 | }, config.timeoutInterval);
681 | }
682 | } else {
683 | log.error('Page did not load: ' + url);
684 | complete(false);
685 | }
686 | });
687 | }
688 | };
689 |
690 | /**
691 | * Run the set of configured scraper suites.
692 | * @name pjs.init
693 | */
694 | function init() {
695 | // check requirements
696 | if (!suites.length) fail('No suites configured');
697 | if (!(config.log in loggers)) fail('Could not find logger: "' + config.log + '"');
698 | if (!(config.writer in writers)) fail('Could not find writer "' + config.writer + '"');
699 |
700 | // init logger
701 | log = new loggers[config.log]();
702 | // init writer
703 | writer = new writers[config.writer](log);
704 |
705 | // init suite manager
706 | SuiteManager.setComplete(function() {
707 | // scrape complete
708 | writer.finish();
709 | log.msg('Saved ' + writer.count() + ' items');
710 | phantom.exit();
711 | });
712 | // make all suites
713 | suites.forEach(function(suite, i) {
714 | SuiteManager.add(new ScraperSuite(
715 | suite.title || "Suite " + i,
716 | arrify(suite.url || suite.urls),
717 | suite
718 | ));
719 | });
720 | // start the suite manager
721 | SuiteManager.runNext();
722 | }
723 |
724 | return {
725 | init: init
726 | }
727 | }());
728 |
729 | // expose namespaces and API functions
730 | return {
731 | loggers: loggers,
732 | formatters: formatters,
733 | writers: writers,
734 | hashFunctions: hashFunctions,
735 | init: runner.init,
736 |
737 | /**
738 | * Set one or more config variables, applying to all suites
739 | * @name pjs.config
740 | * @param {String|Object} key Either a key to set or an object with
741 | * multiple values to set
742 | * @param {mixed} [val] Value to set if using config(key, val) syntax
743 | */
744 | config: function(key, val) {
745 | if (!key) {
746 | return config;
747 | } else if (typeof key == 'object') {
748 | extend(config, key);
749 | } else if (val) {
750 | config[key] = val;
751 | }
752 | },
753 |
754 | /**
755 | * Add one or more scraper suites to be run.
756 | * @name pjs.addSuite
757 | * @param {Object} suite Scraper suite configuration object
758 | * @param {Object} [...] More suite configuration objects
759 | */
760 | addSuite: function() {
761 | suites = Array.prototype.concat.apply(suites, arguments);
762 | },
763 |
764 | /**
765 | * Shorthand function to add a simple scraper suite.
766 | * @name pjs.addScraper
767 | * @param {String|String[]} url URL or array of URLs to scrape
768 | * @param {Function|Function[]} Scraper function or array of scraper functions
769 | */
770 | addScraper: function(url, scraper) {
771 | suites.push({url:url, scraper:scraper});
772 | }
773 | };
774 | }());
775 |
776 |
777 | // make sure we have a config file
778 | if (!phantom.args.length) {
779 | // die
780 | console.log('Usage: pjscrape.js ","
"]||!O.indexOf("",""]||(!O.indexOf(" "," "]||!O.indexOf(""&&O.indexOf("=0;--M){if(o.nodeName(N[M],"tbody")&&!N[M].childNodes.length){N[M].parentNode.removeChild(N[M])}}}if(!o.support.leadingWhitespace&&/^\s/.test(R)){L.insertBefore(K.createTextNode(R.match(/^\s*/)[0]),L.firstChild)}R=o.makeArray(L.childNodes)}if(R.nodeType){G.push(R)}else{G=o.merge(G,R)}});if(I){for(var J=0;G[J];J++){if(o.nodeName(G[J],"script")&&(!G[J].type||G[J].type.toLowerCase()==="text/javascript")){E.push(G[J].parentNode?G[J].parentNode.removeChild(G[J]):G[J])}else{if(G[J].nodeType===1){G.splice.apply(G,[J+1,0].concat(o.makeArray(G[J].getElementsByTagName("script"))))}I.appendChild(G[J])}}return E}return G},attr:function(J,G,K){if(!J||J.nodeType==3||J.nodeType==8){return g}var H=!o.isXMLDoc(J),L=K!==g;G=H&&o.props[G]||G;if(J.tagName){var F=/href|src|style/.test(G);if(G=="selected"&&J.parentNode){J.parentNode.selectedIndex}if(G in J&&H&&!F){if(L){if(G=="type"&&o.nodeName(J,"input")&&J.parentNode){throw"type property can't be changed"}J[G]=K}if(o.nodeName(J,"form")&&J.getAttributeNode(G)){return J.getAttributeNode(G).nodeValue}if(G=="tabIndex"){var I=J.getAttributeNode("tabIndex");return I&&I.specified?I.value:J.nodeName.match(/(button|input|object|select|textarea)/i)?0:J.nodeName.match(/^(a|area)$/i)&&J.href?0:g}return J[G]}if(!o.support.style&&H&&G=="style"){return o.attr(J.style,"cssText",K)}if(L){J.setAttribute(G,""+K)}var E=!o.support.hrefNormalized&&H&&F?J.getAttribute(G,2):J.getAttribute(G);return E===null?g:E}if(!o.support.opacity&&G=="opacity"){if(L){J.zoom=1;J.filter=(J.filter||"").replace(/alpha\([^)]*\)/,"")+(parseInt(K)+""=="NaN"?"":"alpha(opacity="+K*100+")")}return J.filter&&J.filter.indexOf("opacity=")>=0?(parseFloat(J.filter.match(/opacity=([^)]*)/)[1])/100)+"":""}G=G.replace(/-([a-z])/ig,function(M,N){return N.toUpperCase()});if(L){J[G]=K}return J[G]},trim:function(E){return(E||"").replace(/^\s+|\s+$/g,"")},makeArray:function(G){var E=[];if(G!=null){var F=G.length;if(F==null||typeof G==="string"||o.isFunction(G)||G.setInterval){E[0]=G}else{while(F){E[--F]=G[F]}}}return E},inArray:function(G,H){for(var E=0,F=H.length;E
=0){if(!U){S.push(X)}}else{if(U){T[W]=false}}}}return false},ID:function(S){return S[1].replace(/\\/g,"")},TAG:function(T,S){for(var U=0;S[U]===false;U++){}return S[U]&&P(S[U])?T[1]:T[1].toUpperCase()},CHILD:function(S){if(S[1]=="nth"){var T=/(-?)(\d*)n((?:\+|-)?\d*)/.exec(S[2]=="even"&&"2n"||S[2]=="odd"&&"2n+1"||!/\D/.test(S[2])&&"0n+"+S[2]||S[2]);S[2]=(T[1]+(T[2]||1))-0;S[3]=T[3]-0}S[0]="done"+(K++);return S},ATTR:function(T){var S=T[1].replace(/\\/g,"");if(H.attrMap[S]){T[1]=H.attrMap[S]}if(T[2]==="~="){T[4]=" "+T[4]+" "}return T},PSEUDO:function(W,T,U,S,X){if(W[1]==="not"){if(W[3].match(Q).length>1){W[3]=F(W[3],null,null,T)}else{var V=F.filter(W[3],T,U,true^X);if(!U){S.push.apply(S,V)}return false}}else{if(H.match.POS.test(W[0])){return true}}return W},POS:function(S){S.unshift(true);return S}},filters:{enabled:function(S){return S.disabled===false&&S.type!=="hidden"},disabled:function(S){return S.disabled===true},checked:function(S){return S.checked===true},selected:function(S){S.parentNode.selectedIndex;return S.selected===true},parent:function(S){return !!S.firstChild},empty:function(S){return !S.firstChild},has:function(U,T,S){return !!F(S[3],U).length},header:function(S){return/h\d/i.test(S.nodeName)},text:function(S){return"text"===S.type},radio:function(S){return"radio"===S.type},checkbox:function(S){return"checkbox"===S.type},file:function(S){return"file"===S.type},password:function(S){return"password"===S.type},submit:function(S){return"submit"===S.type},image:function(S){return"image"===S.type},reset:function(S){return"reset"===S.type},button:function(S){return"button"===S.type||S.nodeName.toUpperCase()==="BUTTON"},input:function(S){return/input|select|textarea|button/i.test(S.nodeName)}},setFilters:{first:function(T,S){return S===0},last:function(U,T,S,V){return T===V.length-1},even:function(T,S){return S%2===0},odd:function(T,S){return S%2===1},lt:function(U,T,S){return T";if(T.querySelectorAll&&T.querySelectorAll(".TEST").length===0){return}F=function(X,W,U,V){W=W||document;if(!V&&W.nodeType===9&&!P(W)){try{return E(W.querySelectorAll(X),U)}catch(Y){}}return S(X,W,U,V)};F.find=S.find;F.filter=S.filter;F.selectors=S.selectors;F.matches=S.matches})()}if(document.getElementsByClassName&&document.documentElement.getElementsByClassName){H.order.splice(1,0,"CLASS");H.find.CLASS=function(S,T){return T.getElementsByClassName(S[1])}}function O(T,Z,Y,ac,aa,ab){for(var W=0,U=ac.length;W0){W=S;break}}}S=S[T]}ab[V]=W}}}var J=document.compareDocumentPosition?function(T,S){return T.compareDocumentPosition(S)&16}:function(T,S){return T!==S&&(T.contains?T.contains(S):true)};var P=function(S){return S.nodeType===9&&S.documentElement.nodeName!=="HTML"||!!S.ownerDocument&&P(S.ownerDocument)};var I=function(S,Z){var V=[],W="",X,U=Z.nodeType?[Z]:Z;while((X=H.match.PSEUDO.exec(S))){W+=X[0];S=S.replace(H.match.PSEUDO,"")}S=H.relative[S]?S+"*":S;for(var Y=0,T=U.length;YS[3]-0},nth:function(U,T,S){return S[3]-0==T},eq:function(U,T,S){return S[3]-0==T}},filter:{CHILD:function(S,V){var Y=V[1],Z=S.parentNode;var X=V[0];if(Z&&(!Z[X]||!S.nodeIndex)){var W=1;for(var T=Z.firstChild;T;T=T.nextSibling){if(T.nodeType==1){T.nodeIndex=W++}}Z[X]=W-1}if(Y=="first"){return S.nodeIndex==1}else{if(Y=="last"){return S.nodeIndex==Z[X]}else{if(Y=="only"){return Z[X]==1}else{if(Y=="nth"){var ab=false,U=V[2],aa=V[3];if(U==1&&aa==0){return true}if(U==0){if(S.nodeIndex==aa){ab=true}}else{if((S.nodeIndex-aa)%U==0&&(S.nodeIndex-aa)/U>=0){ab=true}}return ab}}}}},PSEUDO:function(Y,U,V,Z){var T=U[1],W=H.filters[T];if(W){return W(Y,V,U,Z)}else{if(T==="contains"){return(Y.textContent||Y.innerText||"").indexOf(U[3])>=0}else{if(T==="not"){var X=U[3];for(var V=0,S=X.length;V=0:V==="~="?(" "+X+" ").indexOf(T)>=0:!U[4]?S:V==="!="?X!=T:V==="^="?X.indexOf(T)===0:V==="$="?X.substr(X.length-T.length)===T:V==="|="?X===T||X.substr(0,T.length+1)===T+"-":false},POS:function(W,T,U,X){var S=T[2],V=H.setFilters[S];if(V){return V(W,U,T,X)}}}};var L=H.match.POS;for(var N in H.match){H.match[N]=RegExp(H.match[N].source+/(?![^\[]*\])(?![^\(]*\))/.source)}var E=function(T,S){T=Array.prototype.slice.call(T);if(S){S.push.apply(S,T);return S}return T};try{Array.prototype.slice.call(document.documentElement.childNodes)}catch(M){E=function(W,V){var T=V||[];if(G.call(W)==="[object Array]"){Array.prototype.push.apply(T,W)}else{if(typeof W.length==="number"){for(var U=0,S=W.length;U";var S=document.documentElement;S.insertBefore(T,S.firstChild);if(!!document.getElementById(U)){H.find.ID=function(W,X,Y){if(typeof X.getElementById!=="undefined"&&!Y){var V=X.getElementById(W[1]);return V?V.id===W[1]||typeof V.getAttributeNode!=="undefined"&&V.getAttributeNode("id").nodeValue===W[1]?[V]:g:[]}};H.filter.ID=function(X,V){var W=typeof X.getAttributeNode!=="undefined"&&X.getAttributeNode("id");return X.nodeType===1&&W&&W.nodeValue===V}}S.removeChild(T)})();(function(){var S=document.createElement("div");S.appendChild(document.createComment(""));if(S.getElementsByTagName("*").length>0){H.find.TAG=function(T,X){var W=X.getElementsByTagName(T[1]);if(T[1]==="*"){var V=[];for(var U=0;W[U];U++){if(W[U].nodeType===1){V.push(W[U])}}W=V}return W}}S.innerHTML="";if(S.firstChild&&S.firstChild.getAttribute("href")!=="#"){H.attrHandle.href=function(T){return T.getAttribute("href",2)}}})();if(document.querySelectorAll){(function(){var S=F,T=document.createElement("div");T.innerHTML="a';var H=K.getElementsByTagName("*"),E=K.getElementsByTagName("a")[0];if(!H||!H.length||!E){return}o.support={leadingWhitespace:K.firstChild.nodeType==3,tbody:!K.getElementsByTagName("tbody").length,objectAll:!!K.getElementsByTagName("object")[0].getElementsByTagName("*").length,htmlSerialize:!!K.getElementsByTagName("link").length,style:/red/.test(E.getAttribute("style")),hrefNormalized:E.getAttribute("href")==="/a",opacity:E.style.opacity==="0.5",cssFloat:!!E.style.cssFloat,scriptEval:false,noCloneEvent:true,boxModel:null};G.type="text/javascript";try{G.appendChild(document.createTextNode("window."+J+"=1;"))}catch(I){}F.insertBefore(G,F.firstChild);if(l[J]){o.support.scriptEval=true;delete l[J]}F.removeChild(G);if(K.attachEvent&&K.fireEvent){K.attachEvent("onclick",function(){o.support.noCloneEvent=false;K.detachEvent("onclick",arguments.callee)});K.cloneNode(true).fireEvent("onclick")}o(function(){var L=document.createElement("div");L.style.width="1px";L.style.paddingLeft="1px";document.body.appendChild(L);o.boxModel=o.support.boxModel=L.offsetWidth===2;document.body.removeChild(L)})})();var w=o.support.cssFloat?"cssFloat":"styleFloat";o.props={"for":"htmlFor","class":"className","float":w,cssFloat:w,styleFloat:w,readonly:"readOnly",maxlength:"maxLength",cellspacing:"cellSpacing",rowspan:"rowSpan",tabindex:"tabIndex"};o.fn.extend({_load:o.fn.load,load:function(G,J,K){if(typeof G!=="string"){return this._load(G)}var I=G.indexOf(" ");if(I>=0){var E=G.slice(I,G.length);G=G.slice(0,I)}var H="GET";if(J){if(o.isFunction(J)){K=J;J=null}else{if(typeof J==="object"){J=o.param(J);H="POST"}}}var F=this;o.ajax({url:G,type:H,dataType:"html",data:J,complete:function(M,L){if(L=="success"||L=="notmodified"){F.html(E?o("").append(M.responseText.replace(/