├── README.md
├── doc
└── element-events.txt
├── index.html
├── index.less
├── less.js
├── main.js
└── prettify.js
/README.md:
--------------------------------------------------------------------------------
1 | # webkit-editor
2 |
3 | This is an experiment in creating a text editor (for computer code) in Safari/WebKit.
4 |
5 | **Requires Safari 4 or Chrome**
6 |
7 | > Disclaimer: This is just for fun and not meant to be a useful editor.
8 |
9 | ## Usage
10 |
11 | Open `index.html` or try it out online.
12 |
13 | ### Disabling Prettify (syntax highlighting)
14 |
15 | This thing currently uses [Google Prettify](http://code.google.com/p/google-code-prettify/) for syntax highlighting which is very buggy. You can disable prettify (and thus enable things like undo/redo which breaks with Prettify) by changing the following line in `index.html`:
16 |
17 |
18 |
19 | to:
20 |
21 |
22 |
23 | ## MIT license
24 |
25 | Copyright (c) 2010 Rasmus Andersson
21 | I'm sorry, but this little experiment only functions properly in 22 | Safari 4 or Chrome. 23 |
24 |25 | Try it anyway 26 |
27 |var sys = require('sys'); 36 | var events = require("events"), 37 | Buffer = require('buffer').Buffer; 38 | 39 | // execute fn (in the local process), finalized by invocation of callback 40 | function execute(ctx, fn, callback) { 41 | var result; 42 | try { 43 | result = fn(ctx, callback); 44 | } catch (err) { 45 | // fn caused some trouble 46 | return callback(err); 47 | } 48 | if (result === callback || result === exports.LATER) { 49 | // fn is async and will invoke callback when done 50 | return callback; 51 | } else if (result === undefined) { 52 | // undefined (no) return value means the context is to be returned 53 | result = ctx; 54 | } 55 | callback(null, result); 56 | } 57 | 58 | exports.LATER = 1; 59 | 60 | if (!Array.prototype.map) { 61 | Array.prototype.map = function(fun /*, thisp*/) { 62 | var len = this.length >>> 0; 63 | var res = new Array(len); 64 | var thisp = arguments[1]; 65 | 66 | for (var i = 0; i < len; i++) { 67 | if (i in this) { 68 | res[i] = fun.call(thisp, this[i], i, this); 69 | } 70 | } 71 | return res; 72 | }; 73 | } 74 | if (!Array.prototype.filter) { 75 | Array.prototype.filter = function (block /*, thisp */) { 76 | var values = []; 77 | var thisp = arguments[1]; 78 | for (var i = 0; i < this.length; i++) { 79 | if (block.call(thisp, this[i])) { 80 | values.push(this[i]); 81 | } 82 | } 83 | return values; 84 | }; 85 | } 86 |87 |
[-1]{0}
',
2455 | '[0]{current}
',
2456 | '[1]{2}
',
2457 | '' + href + " " + 2464 | 'on line ' + e.line + ', column ' + (e.column + 1) + ':
' + 2465 | template.replace(/\[(-?\d)\]/g, function (_, i) { 2466 | return (parseInt(e.line) + parseInt(i)) || ''; 2467 | }).replace(/\{(\d)\}/g, function (_, i) { 2468 | return e.extract[parseInt(i)] || ''; 2469 | }).replace(/\{current\}/, e.extract[1].slice(0, e.column) + 2470 | '' + 2471 | e.extract[1].slice(e.column) + 2472 | ''); 2473 | // CSS for error messages 2474 | createCSS([ 2475 | '.less-error-message span {', 2476 | 'margin-right: 15px;', 2477 | '}', 2478 | '.less-error-message pre {', 2479 | 'color: #ee4444;', 2480 | 'padding: 4px 0;', 2481 | 'margin: 0;', 2482 | '}', 2483 | '.less-error-message pre.ctx {', 2484 | 'color: #dd7777;', 2485 | '}', 2486 | '.less-error-message h3 {', 2487 | 'padding: 15px 0 5px 0;', 2488 | 'margin: 0;', 2489 | '}', 2490 | '.less-error-message a {', 2491 | 'color: #10a', 2492 | '}', 2493 | '.less-error-message .error {', 2494 | 'color: red;', 2495 | 'font-weight: bold;', 2496 | 'padding-bottom: 2px;', 2497 | 'border-bottom: 1px dashed red;', 2498 | '}' 2499 | ].join('\n'), { title: 'error-message' }); 2500 | 2501 | elem.style.cssText = [ 2502 | "font-family: Arial, sans-serif", 2503 | "border: 1px solid #e00", 2504 | "background-color: #eee", 2505 | "border-radius: 5px", 2506 | "-webkit-border-radius: 5px", 2507 | "-moz-border-radius: 5px", 2508 | "color: #e00", 2509 | "padding: 15px", 2510 | "margin-bottom: 15px" 2511 | ].join(';'); 2512 | 2513 | if (less.env == 'development') { 2514 | timer = setInterval(function () { 2515 | if (document.body) { 2516 | if (document.getElementById(id)) { 2517 | document.body.replaceChild(elem, document.getElementById(id)); 2518 | } else { 2519 | document.body.insertBefore(elem, document.body.firstChild); 2520 | } 2521 | clearInterval(timer); 2522 | } 2523 | }, 10); 2524 | } 2525 | } 2526 | 2527 | })(window); -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | (function(exports){ 2 | // ------------------------------------------------------------------------- 3 | // utils 4 | 5 | function prop(obj, name, getter, setter) { 6 | var m = {}; 7 | if (getter) m.get = getter; 8 | if (setter) m.set = setter; 9 | Object.defineProperty(obj, name, m); 10 | } 11 | 12 | /*function getSelectionState() { 13 | var sel = window.getSelection(); 14 | // the Selection object returned is volatile -- make a copy of its state 15 | // See https://developer.mozilla.org/en/DOM/selection for details 16 | return { 17 | anchorNode: sel.anchorNode, // the node in which the selection begins. 18 | anchorOffset: sel.anchorOffset, // number of characters that the selection's 19 | // anchor is offset within the anchorNode. 20 | focusNode: sel.focusNode, // node in which the selection ends. 21 | focusOffset: sel.focusOffset, // number of characters that the selection's 22 | // focus is offset within the focusNode. 23 | baseNode: sel.baseNode, 24 | baseOffset: sel.baseOffset, 25 | isCollapsed: sel.isCollapsed, // whether the selection's start and end 26 | // points are at the same position. 27 | type: sel.type, 28 | extentNode: sel.extentNode, 29 | extentOffset: sel.extentOffset, 30 | //rangeCount: sel.rangeCount, // number of ranges in the selection. 31 | }; 32 | }*/ 33 | 34 | if (!Array.prototype.unique) 35 | Array.prototype.unique = function () { 36 | var buf = [], value; 37 | for (var i=0,L=this.length;i20 | * 21 | * For a fairly comprehensive set of languages see the 22 | * README 23 | * file that came with this source. At a minimum, the lexer should work on a 24 | * number of languages including C and friends, Java, Python, Bash, SQL, HTML, 25 | * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk 26 | * and a subset of Perl, but, because of commenting conventions, doesn't work on 27 | * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. 28 | *
29 | * Usage:
} and {@code } tags in your source with
34 | * {@code class=prettyprint.}
35 | * You can also use the (html deprecated) {@code } tag, but the pretty
36 | * printer needs to do more substantial DOM manipulations to support that, so
37 | * some css styles may not be preserved.
38 | *
} or {@code } element to specify the
42 | * language, as in {@code }. Any class that
43 | * starts with "lang-" followed by a file extension, specifies the file type.
44 | * See the "lang-*.js" files in this directory for code that implements
45 | * per-language file handlers.
46 | *
47 | * Change log:
48 | * cbeust, 2006/08/22
49 | *
50 | * Java annotations (start with "@") are now captured as literals ("lit")
51 | *
52 | * @requires console
53 | * @overrides window
54 | */
55 |
56 | // JSLint declarations
57 | /*global console, document, navigator, setTimeout, window */
58 |
59 | /**
60 | * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
61 | * UI events.
62 | * If set to {@code false}, {@code prettyPrint()} is synchronous.
63 | */
64 | window['PR_SHOULD_USE_CONTINUATION'] = true;
65 |
66 | /** the number of characters between tab columns */
67 | window['PR_TAB_WIDTH'] = 8;
68 |
69 | /** Walks the DOM returning a properly escaped version of innerHTML.
70 | * @param {Node} node
71 | * @param {Array.} out output buffer that receives chunks of HTML.
72 | */
73 | window['PR_normalizedHtml']
74 |
75 | /** Contains functions for creating and registering new language handlers.
76 | * @type {Object}
77 | */
78 | = window['PR']
79 |
80 | /** Pretty print a chunk of code.
81 | *
82 | * @param {string} sourceCodeHtml code as html
83 | * @return {string} code as html, but prettier
84 | */
85 | = window['prettyPrintOne']
86 | /** Find all the {@code } and {@code } tags in the DOM with
87 | * {@code class=prettyprint} and prettify them.
88 | * @param {Function?} opt_whenDone if specified, called when the last entry
89 | * has been finished.
90 | */
91 | = window['prettyPrint'] = void 0;
92 |
93 | /** browser detection. @extern @returns false if not IE, otherwise the major version. */
94 | window['_pr_isIE6'] = function () {
95 | var ieVersion = navigator && navigator.userAgent &&
96 | navigator.userAgent.match(/\bMSIE ([678])\./);
97 | ieVersion = ieVersion ? +ieVersion[1] : false;
98 | window['_pr_isIE6'] = function () { return ieVersion; };
99 | return ieVersion;
100 | };
101 |
102 |
103 | (function () {
104 | // Keyword lists for various languages.
105 | var FLOW_CONTROL_KEYWORDS =
106 | "break continue do else for if return while ";
107 | var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " +
108 | "double enum extern float goto int long register short signed sizeof " +
109 | "static struct switch typedef union unsigned void volatile ";
110 | var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " +
111 | "new operator private protected public this throw true try typeof ";
112 | var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " +
113 | "concept concept_map const_cast constexpr decltype " +
114 | "dynamic_cast explicit export friend inline late_check " +
115 | "mutable namespace nullptr reinterpret_cast static_assert static_cast " +
116 | "template typeid typename using virtual wchar_t where ";
117 | var JAVA_KEYWORDS = COMMON_KEYWORDS +
118 | "abstract boolean byte extends final finally implements import " +
119 | "instanceof null native package strictfp super synchronized throws " +
120 | "transient ";
121 | var CSHARP_KEYWORDS = JAVA_KEYWORDS +
122 | "as base by checked decimal delegate descending event " +
123 | "fixed foreach from group implicit in interface internal into is lock " +
124 | "object out override orderby params partial readonly ref sbyte sealed " +
125 | "stackalloc string select uint ulong unchecked unsafe ushort var ";
126 | var JSCRIPT_KEYWORDS = COMMON_KEYWORDS +
127 | "debugger eval export function get null set undefined var with " +
128 | "Infinity NaN ";
129 | var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " +
130 | "goto if import last local my next no our print package redo require " +
131 | "sub undef unless until use wantarray while BEGIN END ";
132 | var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " +
133 | "elif except exec finally from global import in is lambda " +
134 | "nonlocal not or pass print raise try with yield " +
135 | "False True None ";
136 | var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" +
137 | " defined elsif end ensure false in module next nil not or redo rescue " +
138 | "retry self super then true undef unless until when yield BEGIN END ";
139 | var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " +
140 | "function in local set then until ";
141 | var ALL_KEYWORDS = (
142 | CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS +
143 | PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS);
144 |
145 | // token style names. correspond to css classes
146 | /** token style for a string literal */
147 | var PR_STRING = 'str';
148 | /** token style for a keyword */
149 | var PR_KEYWORD = 'kwd';
150 | /** token style for a comment */
151 | var PR_COMMENT = 'com';
152 | /** token style for a type */
153 | var PR_TYPE = 'typ';
154 | /** token style for a literal value. e.g. 1, null, true. */
155 | var PR_LITERAL = 'lit';
156 | /** token style for a punctuation string. */
157 | var PR_PUNCTUATION = 'pun';
158 | /** token style for a punctuation string. */
159 | var PR_PLAIN = 'pln';
160 |
161 | /** token style for an sgml tag. */
162 | var PR_TAG = 'tag';
163 | /** token style for a markup declaration such as a DOCTYPE. */
164 | var PR_DECLARATION = 'dec';
165 | /** token style for embedded source. */
166 | var PR_SOURCE = 'src';
167 | /** token style for an sgml attribute name. */
168 | var PR_ATTRIB_NAME = 'atn';
169 | /** token style for an sgml attribute value. */
170 | var PR_ATTRIB_VALUE = 'atv';
171 |
172 | /**
173 | * A class that indicates a section of markup that is not code, e.g. to allow
174 | * embedding of line numbers within code listings.
175 | */
176 | var PR_NOCODE = 'nocode';
177 |
178 | /** A set of tokens that can precede a regular expression literal in
179 | * javascript.
180 | * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full
181 | * list, but I've removed ones that might be problematic when seen in
182 | * languages that don't support regular expression literals.
183 | *
184 | * Specifically, I've removed any keywords that can't precede a regexp
185 | * literal in a syntactically legal javascript program, and I've removed the
186 | * "in" keyword since it's not a keyword in many languages, and might be used
187 | * as a count of inches.
188 | *
189 | *
The link a above does not accurately describe EcmaScript rules since
190 | * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
191 | * very well in practice.
192 | *
193 | * @private
194 | */
195 | var REGEXP_PRECEDER_PATTERN = function () {
196 | var preceders = [
197 | "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=",
198 | "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=",
199 | "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";",
200 | "<", "<<", "<<=", "<=", "=", "==", "===", ">",
201 | ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[",
202 | "^", "^=", "^^", "^^=", "{", "|", "|=", "||",
203 | "||=", "~" /* handles =~ and !~ */,
204 | "break", "case", "continue", "delete",
205 | "do", "else", "finally", "instanceof",
206 | "return", "throw", "try", "typeof"
207 | ];
208 | var pattern = '(?:^^|[+-]';
209 | for (var i = 0; i < preceders.length; ++i) {
210 | pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1');
211 | }
212 | pattern += ')\\s*'; // matches at end, and matches empty string
213 | return pattern;
214 | // CAVEAT: this does not properly handle the case where a regular
215 | // expression immediately follows another since a regular expression may
216 | // have flags for case-sensitivity and the like. Having regexp tokens
217 | // adjacent is not valid in any language I'm aware of, so I'm punting.
218 | // TODO: maybe style special characters inside a regexp as punctuation.
219 | }();
220 |
221 | // Define regexps here so that the interpreter doesn't have to create an
222 | // object each time the function containing them is called.
223 | // The language spec requires a new object created even if you don't access
224 | // the $1 members.
225 | var pr_amp = /&/g;
226 | var pr_lt = //g;
228 | var pr_quot = /\"/g;
229 | /** like textToHtml but escapes double quotes to be attribute safe. */
230 | function attribToHtml(str) {
231 | return str.replace(pr_amp, '&')
232 | .replace(pr_lt, '<')
233 | .replace(pr_gt, '>')
234 | .replace(pr_quot, '"');
235 | }
236 |
237 | /** escapest html special characters to html. */
238 | function textToHtml(str) {
239 | return str.replace(pr_amp, '&')
240 | .replace(pr_lt, '<')
241 | .replace(pr_gt, '>');
242 | }
243 |
244 |
245 | var pr_ltEnt = /</g;
246 | var pr_gtEnt = />/g;
247 | var pr_aposEnt = /'/g;
248 | var pr_quotEnt = /"/g;
249 | var pr_ampEnt = /&/g;
250 | var pr_nbspEnt = / /g;
251 | /** unescapes html to plain text. */
252 | function htmlToText(html) {
253 | var pos = html.indexOf('&');
254 | if (pos < 0) { return html; }
255 | // Handle numeric entities specially. We can't use functional substitution
256 | // since that doesn't work in older versions of Safari.
257 | // These should be rare since most browsers convert them to normal chars.
258 | for (--pos; (pos = html.indexOf('', pos + 1)) >= 0;) {
259 | var end = html.indexOf(';', pos);
260 | if (end >= 0) {
261 | var num = html.substring(pos + 3, end);
262 | var radix = 10;
263 | if (num && num.charAt(0) === 'x') {
264 | num = num.substring(1);
265 | radix = 16;
266 | }
267 | var codePoint = parseInt(num, radix);
268 | if (!isNaN(codePoint)) {
269 | html = (html.substring(0, pos) + String.fromCharCode(codePoint) +
270 | html.substring(end + 1));
271 | }
272 | }
273 | }
274 |
275 | return html.replace(pr_ltEnt, '<')
276 | .replace(pr_gtEnt, '>')
277 | .replace(pr_aposEnt, "'")
278 | .replace(pr_quotEnt, '"')
279 | .replace(pr_nbspEnt, ' ')
280 | .replace(pr_ampEnt, '&');
281 | }
282 |
283 | /** is the given node's innerHTML normally unescaped? */
284 | function isRawContent(node) {
285 | return 'XMP' === node.tagName;
286 | }
287 |
288 | var newlineRe = /[\r\n]/g;
289 | /**
290 | * Are newlines and adjacent spaces significant in the given node's innerHTML?
291 | */
292 | function isPreformatted(node, content) {
293 | // PRE means preformatted, and is a very common case, so don't create
294 | // unnecessary computed style objects.
295 | if ('PRE' === node.tagName) { return true; }
296 | if (!newlineRe.test(content)) { return true; } // Don't care
297 | var whitespace = '';
298 | // For disconnected nodes, IE has no currentStyle.
299 | if (node.currentStyle) {
300 | whitespace = node.currentStyle.whiteSpace;
301 | } else if (window.getComputedStyle) {
302 | // Firefox makes a best guess if node is disconnected whereas Safari
303 | // returns the empty string.
304 | whitespace = window.getComputedStyle(node, null).whiteSpace;
305 | }
306 | return !whitespace || whitespace === 'pre';
307 | }
308 |
309 | function normalizedHtml(node, out, opt_sortAttrs) {
310 | switch (node.nodeType) {
311 | case 1: // an element
312 | var name = node.tagName.toLowerCase();
313 |
314 | out.push('<', name);
315 | var attrs = node.attributes;
316 | var n = attrs.length;
317 | if (n) {
318 | if (opt_sortAttrs) {
319 | var sortedAttrs = [];
320 | for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; }
321 | sortedAttrs.sort(function (a, b) {
322 | return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1;
323 | });
324 | attrs = sortedAttrs;
325 | }
326 | for (var i = 0; i < n; ++i) {
327 | var attr = attrs[i];
328 | if (!attr.specified) { continue; }
329 | out.push(' ', attr.name.toLowerCase(),
330 | '="', attribToHtml(attr.value), '"');
331 | }
332 | }
333 | out.push('>');
334 | for (var child = node.firstChild; child; child = child.nextSibling) {
335 | normalizedHtml(child, out, opt_sortAttrs);
336 | }
337 | if (node.firstChild || !/^(?:br|link|img)$/.test(name)) {
338 | out.push('<\/', name, '>');
339 | }
340 | break;
341 | case 3: case 4: // text
342 | out.push(textToHtml(node.nodeValue));
343 | break;
344 | }
345 | }
346 |
347 | /**
348 | * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally
349 | * matches the union o the sets o strings matched d by the input RegExp.
350 | * Since it matches globally, if the input strings have a start-of-input
351 | * anchor (/^.../), it is ignored for the purposes of unioning.
352 | * @param {Array.} regexs non multiline, non-global regexs.
353 | * @return {RegExp} a global regex.
354 | */
355 | function combinePrefixPatterns(regexs) {
356 | var capturedGroupIndex = 0;
357 |
358 | var needToFoldCase = false;
359 | var ignoreCase = false;
360 | for (var i = 0, n = regexs.length; i < n; ++i) {
361 | var regex = regexs[i];
362 | if (regex.ignoreCase) {
363 | ignoreCase = true;
364 | } else if (/[a-z]/i.test(regex.source.replace(
365 | /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
366 | needToFoldCase = true;
367 | ignoreCase = false;
368 | break;
369 | }
370 | }
371 |
372 | function decodeEscape(charsetPart) {
373 | if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); }
374 | switch (charsetPart.charAt(1)) {
375 | case 'b': return 8;
376 | case 't': return 9;
377 | case 'n': return 0xa;
378 | case 'v': return 0xb;
379 | case 'f': return 0xc;
380 | case 'r': return 0xd;
381 | case 'u': case 'x':
382 | return parseInt(charsetPart.substring(2), 16)
383 | || charsetPart.charCodeAt(1);
384 | case '0': case '1': case '2': case '3': case '4':
385 | case '5': case '6': case '7':
386 | return parseInt(charsetPart.substring(1), 8);
387 | default: return charsetPart.charCodeAt(1);
388 | }
389 | }
390 |
391 | function encodeEscape(charCode) {
392 | if (charCode < 0x20) {
393 | return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
394 | }
395 | var ch = String.fromCharCode(charCode);
396 | if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') {
397 | ch = '\\' + ch;
398 | }
399 | return ch;
400 | }
401 |
402 | function caseFoldCharset(charSet) {
403 | var charsetParts = charSet.substring(1, charSet.length - 1).match(
404 | new RegExp(
405 | '\\\\u[0-9A-Fa-f]{4}'
406 | + '|\\\\x[0-9A-Fa-f]{2}'
407 | + '|\\\\[0-3][0-7]{0,2}'
408 | + '|\\\\[0-7]{1,2}'
409 | + '|\\\\[\\s\\S]'
410 | + '|-'
411 | + '|[^-\\\\]',
412 | 'g'));
413 | var groups = [];
414 | var ranges = [];
415 | var inverse = charsetParts[0] === '^';
416 | for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
417 | var p = charsetParts[i];
418 | switch (p) {
419 | case '\\B': case '\\b':
420 | case '\\D': case '\\d':
421 | case '\\S': case '\\s':
422 | case '\\W': case '\\w':
423 | groups.push(p);
424 | continue;
425 | }
426 | var start = decodeEscape(p);
427 | var end;
428 | if (i + 2 < n && '-' === charsetParts[i + 1]) {
429 | end = decodeEscape(charsetParts[i + 2]);
430 | i += 2;
431 | } else {
432 | end = start;
433 | }
434 | ranges.push([start, end]);
435 | // If the range might intersect letters, then expand it.
436 | if (!(end < 65 || start > 122)) {
437 | if (!(end < 65 || start > 90)) {
438 | ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
439 | }
440 | if (!(end < 97 || start > 122)) {
441 | ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
442 | }
443 | }
444 | }
445 |
446 | // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
447 | // -> [[1, 12], [14, 14], [16, 17]]
448 | ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); });
449 | var consolidatedRanges = [];
450 | var lastRange = [NaN, NaN];
451 | for (var i = 0; i < ranges.length; ++i) {
452 | var range = ranges[i];
453 | if (range[0] <= lastRange[1] + 1) {
454 | lastRange[1] = Math.max(lastRange[1], range[1]);
455 | } else {
456 | consolidatedRanges.push(lastRange = range);
457 | }
458 | }
459 |
460 | var out = ['['];
461 | if (inverse) { out.push('^'); }
462 | out.push.apply(out, groups);
463 | for (var i = 0; i < consolidatedRanges.length; ++i) {
464 | var range = consolidatedRanges[i];
465 | out.push(encodeEscape(range[0]));
466 | if (range[1] > range[0]) {
467 | if (range[1] + 1 > range[0]) { out.push('-'); }
468 | out.push(encodeEscape(range[1]));
469 | }
470 | }
471 | out.push(']');
472 | return out.join('');
473 | }
474 |
475 | function allowAnywhereFoldCaseAndRenumberGroups(regex) {
476 | // Split into character sets, escape sequences, punctuation strings
477 | // like ('(', '(?:', ')', '^'), and runs of characters that do not
478 | // include any of the above.
479 | var parts = regex.source.match(
480 | new RegExp(
481 | '(?:'
482 | + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set
483 | + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape
484 | + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape
485 | + '|\\\\[0-9]+' // a back-reference or octal escape
486 | + '|\\\\[^ux0-9]' // other escape sequence
487 | + '|\\(\\?[:!=]' // start of a non-capturing group
488 | + '|[\\(\\)\\^]' // start/emd of a group, or line start
489 | + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters
490 | + ')',
491 | 'g'));
492 | var n = parts.length;
493 |
494 | // Maps captured group numbers to the number they will occupy in
495 | // the output or to -1 if that has not been determined, or to
496 | // undefined if they need not be capturing in the output.
497 | var capturedGroups = [];
498 |
499 | // Walk over and identify back references to build the capturedGroups
500 | // mapping.
501 | for (var i = 0, groupIndex = 0; i < n; ++i) {
502 | var p = parts[i];
503 | if (p === '(') {
504 | // groups are 1-indexed, so max group index is count of '('
505 | ++groupIndex;
506 | } else if ('\\' === p.charAt(0)) {
507 | var decimalValue = +p.substring(1);
508 | if (decimalValue && decimalValue <= groupIndex) {
509 | capturedGroups[decimalValue] = -1;
510 | }
511 | }
512 | }
513 |
514 | // Renumber groups and reduce capturing groups to non-capturing groups
515 | // where possible.
516 | for (var i = 1; i < capturedGroups.length; ++i) {
517 | if (-1 === capturedGroups[i]) {
518 | capturedGroups[i] = ++capturedGroupIndex;
519 | }
520 | }
521 | for (var i = 0, groupIndex = 0; i < n; ++i) {
522 | var p = parts[i];
523 | if (p === '(') {
524 | ++groupIndex;
525 | if (capturedGroups[groupIndex] === undefined) {
526 | parts[i] = '(?:';
527 | }
528 | } else if ('\\' === p.charAt(0)) {
529 | var decimalValue = +p.substring(1);
530 | if (decimalValue && decimalValue <= groupIndex) {
531 | parts[i] = '\\' + capturedGroups[groupIndex];
532 | }
533 | }
534 | }
535 |
536 | // Remove any prefix anchors so that the output will match anywhere.
537 | // ^^ really does mean an anchored match though.
538 | for (var i = 0, groupIndex = 0; i < n; ++i) {
539 | if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
540 | }
541 |
542 | // Expand letters to groupts to handle mixing of case-sensitive and
543 | // case-insensitive patterns if necessary.
544 | if (regex.ignoreCase && needToFoldCase) {
545 | for (var i = 0; i < n; ++i) {
546 | var p = parts[i];
547 | var ch0 = p.charAt(0);
548 | if (p.length >= 2 && ch0 === '[') {
549 | parts[i] = caseFoldCharset(p);
550 | } else if (ch0 !== '\\') {
551 | // TODO: handle letters in numeric escapes.
552 | parts[i] = p.replace(
553 | /[a-zA-Z]/g,
554 | function (ch) {
555 | var cc = ch.charCodeAt(0);
556 | return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
557 | });
558 | }
559 | }
560 | }
561 |
562 | return parts.join('');
563 | }
564 |
565 | var rewritten = [];
566 | for (var i = 0, n = regexs.length; i < n; ++i) {
567 | var regex = regexs[i];
568 | if (regex.global || regex.multiline) { throw new Error('' + regex); }
569 | rewritten.push(
570 | '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
571 | }
572 |
573 | return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
574 | }
575 |
576 | var PR_innerHtmlWorks = null;
577 | function getInnerHtml(node) {
578 | // inner html is hopelessly broken in Safari 2.0.4 when the content is
579 | // an html description of well formed XML and the containing tag is a PRE
580 | // tag, so we detect that case and emulate innerHTML.
581 | if (null === PR_innerHtmlWorks) {
582 | var testNode = document.createElement('PRE');
583 | testNode.appendChild(
584 | document.createTextNode('\n '));
585 | PR_innerHtmlWorks = !/)[\r\n]+/g, '$1')
595 | .replace(/(?:[\r\n]+[ \t]*)+/g, ' ');
596 | }
597 | return content;
598 | }
599 |
600 | var out = [];
601 | for (var child = node.firstChild; child; child = child.nextSibling) {
602 | normalizedHtml(child, out);
603 | }
604 | return out.join('');
605 | }
606 |
607 | /** returns a function that expand tabs to spaces. This function can be fed
608 | * successive chunks of text, and will maintain its own internal state to
609 | * keep track of how tabs are expanded.
610 | * @return {function (string) : string} a function that takes
611 | * plain text and return the text with tabs expanded.
612 | * @private
613 | */
614 | function makeTabExpander(tabWidth) {
615 | var SPACES = ' ';
616 | var charInLine = 0;
617 |
618 | return function (plainText) {
619 | // walk over each character looking for tabs and newlines.
620 | // On tabs, expand them. On newlines, reset charInLine.
621 | // Otherwise increment charInLine
622 | var out = null;
623 | var pos = 0;
624 | for (var i = 0, n = plainText.length; i < n; ++i) {
625 | var ch = plainText.charAt(i);
626 |
627 | switch (ch) {
628 | case '\t':
629 | if (!out) { out = []; }
630 | out.push(plainText.substring(pos, i));
631 | // calculate how much space we need in front of this part
632 | // nSpaces is the amount of padding -- the number of spaces needed
633 | // to move us to the next column, where columns occur at factors of
634 | // tabWidth.
635 | var nSpaces = tabWidth - (charInLine % tabWidth);
636 | charInLine += nSpaces;
637 | for (; nSpaces >= 0; nSpaces -= SPACES.length) {
638 | out.push(SPACES.substring(0, nSpaces));
639 | }
640 | pos = i + 1;
641 | break;
642 | case '\n':
643 | charInLine = 0;
644 | break;
645 | default:
646 | ++charInLine;
647 | }
648 | }
649 | if (!out) { return plainText; }
650 | out.push(plainText.substring(pos));
651 | return out.join('');
652 | };
653 | }
654 |
655 | var pr_chunkPattern = new RegExp(
656 | '[^<]+' // A run of characters other than '<'
657 | + '|<\!--[\\s\\S]*?--\>' // an HTML comment
658 | + '|' // a CDATA section
659 | // a probable tag that should not be highlighted
660 | + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>'
661 | + '|<', // A '<' that does not begin a larger chunk
662 | 'g');
663 | var pr_commentPrefix = /^<\!--/;
664 | var pr_cdataPrefix = /^) into their textual equivalent.
671 | *
672 | * @param {string} s html where whitespace is considered significant.
673 | * @return {Object} source code and extracted tags.
674 | * @private
675 | */
676 | function extractTags(s) {
677 | // since the pattern has the 'g' modifier and defines no capturing groups,
678 | // this will return a list of all chunks which we then classify and wrap as
679 | // PR_Tokens
680 | var matches = s.match(pr_chunkPattern);
681 | var sourceBuf = [];
682 | var sourceBufLen = 0;
683 | var extractedTags = [];
684 | if (matches) {
685 | for (var i = 0, n = matches.length; i < n; ++i) {
686 | var match = matches[i];
687 | if (match.length > 1 && match.charAt(0) === '<') {
688 | if (pr_commentPrefix.test(match)) { continue; }
689 | if (pr_cdataPrefix.test(match)) {
690 | // strip CDATA prefix and suffix. Don't unescape since it's CDATA
691 | sourceBuf.push(match.substring(9, match.length - 3));
692 | sourceBufLen += match.length - 12;
693 | } else if (pr_brPrefix.test(match)) {
694 | //
tags are lexically significant so convert them to text.
695 | // This is undone later.
696 | sourceBuf.push('\n');
697 | ++sourceBufLen;
698 | } else {
699 | if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) {
700 | // A will start a section that should be
701 | // ignored. Continue walking the list until we see a matching end
702 | // tag.
703 | var name = match.match(pr_tagNameRe)[2];
704 | var depth = 1;
705 | var j;
706 | end_tag_loop:
707 | for (j = i + 1; j < n; ++j) {
708 | var name2 = matches[j].match(pr_tagNameRe);
709 | if (name2 && name2[2] === name) {
710 | if (name2[1] === '/') {
711 | if (--depth === 0) { break end_tag_loop; }
712 | } else {
713 | ++depth;
714 | }
715 | }
716 | }
717 | if (j < n) {
718 | extractedTags.push(
719 | sourceBufLen, matches.slice(i, j + 1).join(''));
720 | i = j;
721 | } else { // Ignore unclosed sections.
722 | extractedTags.push(sourceBufLen, match);
723 | }
724 | } else {
725 | extractedTags.push(sourceBufLen, match);
726 | }
727 | }
728 | } else {
729 | var literalText = htmlToText(match);
730 | sourceBuf.push(literalText);
731 | sourceBufLen += literalText.length;
732 | }
733 | }
734 | }
735 | return { source: sourceBuf.join(''), tags: extractedTags };
736 | }
737 |
738 | /** True if the given tag contains a class attribute with the nocode class. */
739 | function isNoCodeTag(tag) {
740 | return !!tag
741 | // First canonicalize the representation of attributes
742 | .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g,
743 | ' $1="$2$3$4"')
744 | // Then look for the attribute we want.
745 | .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/);
746 | }
747 |
748 | /**
749 | * Apply the given language handler to sourceCode and add the resulting
750 | * decorations to out.
751 | * @param {number} basePos the index of sourceCode within the chunk of source
752 | * whose decorations are already present on out.
753 | */
754 | function appendDecorations(basePos, sourceCode, langHandler, out) {
755 | if (!sourceCode) { return; }
756 | var job = {
757 | source: sourceCode,
758 | basePos: basePos
759 | };
760 | langHandler(job);
761 | out.push.apply(out, job.decorations);
762 | }
763 |
764 | /** Given triples of [style, pattern, context] returns a lexing function,
765 | * The lexing function interprets the patterns to find token boundaries and
766 | * returns a decoration list of the form
767 | * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
768 | * where index_n is an index into the sourceCode, and style_n is a style
769 | * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to
770 | * all characters in sourceCode[index_n-1:index_n].
771 | *
772 | * The stylePatterns is a list whose elements have the form
773 | * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].
774 | *
775 | * Style is a style constant like PR_PLAIN, or can be a string of the
776 | * form 'lang-FOO', where FOO is a language extension describing the
777 | * language of the portion of the token in $1 after pattern executes.
778 | * E.g., if style is 'lang-lisp', and group 1 contains the text
779 | * '(hello (world))', then that portion of the token will be passed to the
780 | * registered lisp handler for formatting.
781 | * The text before and after group 1 will be restyled using this decorator
782 | * so decorators should take care that this doesn't result in infinite
783 | * recursion. For example, the HTML lexer rule for SCRIPT elements looks
784 | * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
785 | * '