└── hyperlight ├── .htaccess ├── collect-filetypes.php ├── colors ├── vibrant-ink.css └── zenburn.css ├── doc ├── faq │ └── index.php ├── index.php └── user-guide │ ├── code1.php │ ├── index.php │ └── mini-zenburn.css ├── examples ├── index.php └── theme_switcher.js ├── graphics ├── body-background.png ├── head-backback.png └── head-background.png ├── hyperlight.php ├── index.php ├── jquery-1.2.6.min.js ├── languages ├── cpp.php ├── csharp.php ├── css.php ├── filetypes ├── iphp.php ├── php.php ├── python.php ├── vb.php └── xml.php ├── line-numbers.html ├── plugins └── wordpress │ ├── hyperlight.php │ └── readme.txt ├── preg_helper.php ├── style.css ├── test.php ├── tests.php └── tests ├── csharp ├── module1.vb ├── pizzachili_api.h ├── preg_helper.php ├── python ├── simple.css ├── style.css ├── vb ├── xml └── xml2 /hyperlight/.htaccess: -------------------------------------------------------------------------------- 1 | DirectoryIndex index.php 2 | Order deny,allow 3 | Allow from all 4 | Options +Indexes 5 | -------------------------------------------------------------------------------- /hyperlight/collect-filetypes.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | extensions()); 34 | } 35 | 36 | ?> 37 | Updated . 38 | -------------------------------------------------------------------------------- /hyperlight/colors/vibrant-ink.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2008 Konrad Rudolph 3 | * All rights reserved. 4 | * 5 | * Colour scheme based on the Vibrant Ink scheme by Justin Palmer for the 6 | * TextMate text editor. 7 | * http://alternateidea.com/blog/articles/2006/1/3/textmate-vibrant-ink-theme-and-prototype-bundle 8 | */ 9 | 10 | .source-code { 11 | background: black; 12 | color: white; 13 | } 14 | 15 | .source-code .keyword { color: #F60; font-weight: bold; } 16 | .source-code .keyword.literal { color: #FC0; } 17 | .source-code .keyword.type { color: #FC0; } 18 | .source-code .keyword.builtin { color: #44B4CC; } 19 | .source-code .preprocessor { color: #996; } 20 | .source-code .comment { color: #93C; } 21 | .source-code .comment .doc { color: #399; font-weight: bold; } 22 | .source-code .identifier { color: white; } 23 | .source-code .string, .source-code .char { color: #6F0; } 24 | .source-code .escaped { color: #AAA; } 25 | .source-code .number, .source-code .tag { color: #FFEE98; } 26 | .source-code .regex, .source-code .attribute { color: #44B4CC; } 27 | .source-code .operator { color: #888; } 28 | .source-code .keyword.operator { color: #F60; } 29 | .source-code .whitespace { background: #333; } 30 | .source-code .error { border-bottom: 1px solid red; } 31 | 32 | .source-code .tag .attribute { font-style: italic; } 33 | .source-code.xml .preprocessor .keyword { color #996; } 34 | .source-code.xml .preprocessor .keyword { color: #996; } 35 | .source-code.xml .meta, .source-code.xml .meta .keyword { color: #399; } 36 | 37 | .source-code.cpp .preprocessor .identifier { color: #996; } 38 | 39 | .source-code::-moz-selection, .source-code span::-moz-selection { 40 | background: yellow; 41 | color: black; 42 | } 43 | 44 | .source-code::selection, .source-code span::selection { 45 | background: yellow; 46 | color: black; 47 | } 48 | -------------------------------------------------------------------------------- /hyperlight/colors/zenburn.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2008 Konrad Rudolph 3 | * All rights reserved. 4 | * 5 | * Color scheme for code is a simplified version of the VIM Zenburn scheme: 6 | * http://slinky.imukuppi.org/zenburn/ 7 | */ 8 | 9 | .source-code { 10 | background: #3F3F3F; 11 | color: #DCDCCC; 12 | } 13 | 14 | .source-code .comment { 15 | color: #7F9F7F; 16 | font-style: italic; 17 | } 18 | 19 | .source-code .comment .todo { 20 | color: #DFDFDF; 21 | font-weight: bold; 22 | } 23 | 24 | .source-code .tag { 25 | color: #EFEF8F; 26 | } 27 | 28 | .source-code .identifier { 29 | color: #EFDCBC; 30 | } 31 | 32 | .source-code .keyword { 33 | color: #F0DFAF; 34 | font-weight: bold; 35 | } 36 | 37 | .source-code .keyword.builtin { 38 | color: #EFEF8F; 39 | font-weight: normal; 40 | } 41 | 42 | .source-code .keyword.operator { 43 | color: #FFCFAF; 44 | } 45 | 46 | .source-code .number { 47 | color: #8CD0D3; 48 | } 49 | 50 | .source-code .string { 51 | color: #CC9393; 52 | } 53 | 54 | .source-code::-moz-selection, .source-code span::-moz-selection { 55 | background: #70D2B3; 56 | color: #233322; 57 | } 58 | 59 | .source-code::selection, .source-code span::selection { 60 | background: yellow; 61 | color: black; 62 | } 63 | -------------------------------------------------------------------------------- /hyperlight/doc/faq/index.php: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | ‹? Hyperlight ?› Documentation – FAQ 10 | 11 | 12 | 13 | 14 | 15 | 20 |
21 |
22 |
23 |

Frequently Asked Questions

24 |
25 |

The PHP highligher is broken, right?

26 |
27 |

Yes – unfortunately. This is by design. In our defence, no correct PHP highligher exists. To see why, consider the following code fragment, which is completely valid PHP:

28 | >foo

', 'php'); ?> 29 |

(On a related note, this code fragment actually breaks the editor this text was written in.)

30 |

Executing this code might produce valid HTML – or it might not, depending on the outcome of the coinflip. The editor (or, in our case, highlighter) has no idea about the outcome without executing the code. And even then, the result changes with every subsequent execution. But even if the above code were more predictable, it would have to be executed in order to determine how to highlight the code. This is not done by Hyperlight or any other highlighting engine. The output is therefore wrong in a few cases. Fortunately, such cases should be relatively rare.

31 |
32 |
33 |
34 |
35 | 36 | 37 | 39 | -------------------------------------------------------------------------------- /hyperlight/doc/index.php: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | ‹? Hyperlight ?› Documentation 10 | 11 | 12 | 13 | 14 | 15 | 20 |
21 |
22 |
23 |

Content

24 | 49 | 50 |

Preface

51 |
52 | Does anybody know anything cool to put here? 53 |
54 |

Good syntax highlighting is crucial for many different kinds content providers. Therefore, syntax highlighting libraries for the web have always been a central part of web development. However, requirements have changed. With more sophistication in web design came the demand for libraries that create not only high-quality highlightings but also high-quality HTML and CSS code and that are easy to use and to extend.

55 |

Two libraries have raised the bar considerably: Pygments for Python, and CodeRay for Ruby. For PHP, on the other hand, there’s no modern library that fulfills all of these requirements. This is therefore an attempt to offer a remedy.

56 | 59 |
60 |
61 | 62 | 63 | 65 | -------------------------------------------------------------------------------- /hyperlight/doc/user-guide/code1.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Very simple test for Hyperlight 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /hyperlight/doc/user-guide/index.php: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | ‹? Hyperlight ?› Documentation – User Guide 10 | 11 | 12 | 13 | 14 | 15 | 20 |
21 |
22 |
23 |

User Guide

24 |

Introduction

25 | 31 |

The user guide aims to ease the first steps in using Hyperlight. Luckily, the interface is really easy to use, mainly because it’s also really small (remember: this is a good thing). Most of the configuration goes on behind the scenes or in the CSS.

32 |

The first part will focus on the end user. However, most users will probably want to customize behaviour in one way or another. We will therefore also discuss how to modify or create themes and syntax definitions. And so, without further ado …

33 | 34 |

Getting Started

35 |

At this point, let’s assume that you have already downloaded and unzipped the package into its target location because let’s face it, who wants to have an umpteenth description of how to unzip an archive?

36 |

To use Hyperlight, all you have to do is to include the main file into your PHP source code and invoke the highlighting function. To highlight a source code, this is all you need to do:

37 | 38 |

To put this in some more context, imagine that you want to highlight the current file. Our program might not be self-replicating or self-modifying but it sure is self-embellishing.

39 | 40 |

It really can’t get much simpler than that.

41 |

Notice that we didn’t have to put special HTML tags around our code. hyperlight does this for us. But don’t worry about lack of control. This function has two more optional arguments that you can use to control how these surrounding tags should look like. The first controls which surrounding tag to use and defaults to – what a surprise – ', 'xml', 'code'); ?>. The second argument controls the attributes that the tag should have (in addition to the class). For a detailed description of how to use these arguments, read the reference entry on the hyperlight function.

42 | 43 |
44 |

Regardless of the fourth argument, the class attribute is always present and can’t be removed – and shouldn’t be: it’s necessary for the CSS themes to work.

45 |
46 | 47 |

PHP is something of a special case; it requires a <?php to start a PHP block. However, when posting code, this is often omitted because only a short snippet is posted. That’s fine. Hyperlight offers a special language tag for this rather unique case: iphp.

48 | 49 |

Another special case occurs when we want to highlight a file. Hyperlight provides a shortcut to do this: hyperlight_file. As an added bonus, if you pass a regular file to this function, you don’t need to specify the file’s language explicitly. Hyperlight tries to figure the right language out by itself, based on the filename extension.

50 |

Now, that’s all there really is to it. Told you it was easy. ;-) But trust me, it gets more interesting once we want to create our own themes or language definitions.

51 | 52 |

Creating Themes

53 |

The whole visual appearance of the highlighted code in Hyperlight is based on a few simple CSS rules. The strength of Hyperlight lies in the fact that these rules are controlled by class names that are the same across all language definitions, thereby making it easy to adapt one theme for all languages.

54 |

At the same time, a finer degree of control might be needed because one size doesn’t fit all. This is possible in three ways. First, language definitions can define mappings between different class names. Secondly, rules can be combined and nested. Lastly, if all else fails, code is also tagged with a language-specific class name. This can be used to establish a specific rule for one language only. Of course, these should be used sparingly because they make it much harder to develop colour themes that are usable across all language definitions. We will examine all these techniques in due course.

55 |

The Theme File

56 |

A theme is just a CSS stylesheet that defines a set of rules based on class names. Therefore, in order to write a theme you need to know the rudiments of CSS. To limit the scope of the styles and make the theme definitions interoperate nicely with other, existing styles, it’s recommended that you prefix all theme-specific rules with .

57 | 58 |

An Example

59 |

Let’s look at a small example theme file, actually a fragment of zenburn.css, which is used for this document.

60 | 61 |

Here, we can see three things:

62 |
    63 |
  1. 64 |

    The first rule sets up the environment. However, refrain from setting more specific information here. In particular, don’t set a border or a font face. These are settings that may be set elsewhere.

    65 |
  2. 66 |
  3. 67 |

    The third rule is nested: it applies only to “todo”s nested inside a comment. As an example, it might apply to

    68 | TODO: i18n', 'xml'); ?> 69 |
  4. 70 |
  5. 71 |

    The last rule, on the other hand, is a specialization. It applies only to built-in keywords and overrides the more general keyword styles. It applies to:

    72 | isset', 'xml'); ?> 73 |
  6. 74 |
75 | 76 |

Core Theme Classes

77 |

Hyperlight uses mappings to unify the kinds of CSS classes used. This drastically reduces the number of possible class names across all languages, while still preserving a representative subset. All theme files should at least be aware of this subset. Notice that this doesn’t mean they should provide different styles for all possible rules – this would probably be a bad idea since it clutters the visual needlessly.

78 |

Here is an alphabetically sorted list of these core class names:

79 |
    80 |
  • char – a character literal
  • 81 |
  • comment – a source code comment
  • 82 |
  • doc – a documentation tag; usually nested inside a comment
  • 83 |
  • escaped – some escaped entity; usually nested inside a string
  • 84 |
  • identifier – an identifier such as a variable or a function name
  • 85 |
  • keyword – any keyword or reserved word in the language 86 |
      87 |
    • builtin – a built-in function, such as
    • 88 |
    • literal – a built-in literal, such as
    • 89 |
    • operator – an operator keyword, such as
    • 90 |
    • preprocessor – a preprocessor statement, such as in C++
    • 91 |
    • type – a built-in data type, such as
    • 92 |
    93 |
  • 94 |
  • number – a numeric literal
  • 95 |
  • regex – a regular expression literal
  • 96 |
  • string – a string literal
  • 97 |
  • tag – a tag; this is mostly used in HTML but also elsewhere
  • 98 |
  • todo – a “todo”-like annotation in a comment
  • 99 |
100 | 101 |

Since languages such as HTML or CSS in particular use very different syntactical elements from other languages, it’s reasonable to reuse the above classes in other context. For example, HTML may redefine the keyword class for tag names. 102 |

103 |
104 | 105 | 106 | 108 | -------------------------------------------------------------------------------- /hyperlight/doc/user-guide/mini-zenburn.css: -------------------------------------------------------------------------------- 1 | .source-code { 2 | background: #3F3F3F; 3 | color: #DCDCCC; 4 | } 5 | 6 | .source-code .comment { 7 | color: #7F9F7F; 8 | font-style: italic; 9 | } 10 | 11 | .source-code .comment .todo { 12 | color: #DFDFDF; 13 | font-weight: bold; 14 | } 15 | 16 | .source-code .identifier { 17 | color: #EFDCBC; 18 | } 19 | 20 | .source-code .keyword { 21 | color: #F0DFAF; 22 | font-weight: bold; 23 | } 24 | 25 | .source-code .keyword.builtin { 26 | color: #EFEF8F; 27 | font-weight: normal; 28 | } 29 | -------------------------------------------------------------------------------- /hyperlight/examples/index.php: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | ‹? Hyperlight ?› Examples 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 22 |
23 |
24 |
25 | 29 | 30 | 49 |
50 |
51 | 52 | 53 | 55 | -------------------------------------------------------------------------------- /hyperlight/examples/theme_switcher.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function() { 2 | $('#switch-buttons a').each(function (i, btt) { 3 | $(btt).click(function () { 4 | if ($(this).hasClass('active')) 5 | return false; 6 | $('#switch-buttons a').each(function (i, btt) { $(btt).removeClass('active'); }); 7 | var cssPath = '../colors/' + this.id.replace('theme-', '') + '.css'; 8 | var cssAlreadyLoaded = false; 9 | $('pre.source-code').each(function (j, code) { 10 | $(code).fadeOut('normal', function() { 11 | if (!cssAlreadyLoaded) { 12 | cssAlreadyLoaded = true; 13 | $('link#theme').attr({href: cssPath}); 14 | } 15 | }); 16 | }); 17 | $('pre.source-code').each(function (j, code) { $(code).fadeIn('normal'); }); 18 | $(this).addClass('active'); 19 | return false; 20 | }); 21 | }); 22 | }); 23 | -------------------------------------------------------------------------------- /hyperlight/graphics/body-background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klmr/hyperlight/f32704a74c386fcd24b52dad91390b36398eaa86/hyperlight/graphics/body-background.png -------------------------------------------------------------------------------- /hyperlight/graphics/head-backback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klmr/hyperlight/f32704a74c386fcd24b52dad91390b36398eaa86/hyperlight/graphics/head-backback.png -------------------------------------------------------------------------------- /hyperlight/graphics/head-background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klmr/hyperlight/f32704a74c386fcd24b52dad91390b36398eaa86/hyperlight/graphics/head-background.png -------------------------------------------------------------------------------- /hyperlight/hyperlight.php: -------------------------------------------------------------------------------- 1 | "> 34 | * 35 | * (Remove space between `?` and `>`). 36 | * Although this no longer occurs, it is fixed by checking for `$token === ''` 37 | * in the `emit*` methods. This should never happen anyway. Probably something 38 | * to do with the zero-width lookahead in the PHP syntax definition. 39 | * 40 | * - `hyperlight_calculate_fold_marks`: refactor, write proper handler 41 | * 42 | * - Line numbers (on client-side?) 43 | * 44 | */ 45 | 46 | /** 47 | * Hyperlight source code highlighter for PHP. 48 | * @package hyperlight 49 | */ 50 | 51 | /** @ignore */ 52 | require_once('preg_helper.php'); 53 | 54 | if (!function_exists('array_peek')) { 55 | /** 56 | * @internal 57 | * This does exactly what you think it does. */ 58 | function array_peek(array &$array) { 59 | $cnt = count($array); 60 | return $cnt === 0 ? null : $array[$cnt - 1]; 61 | } 62 | } 63 | 64 | /** 65 | * @internal 66 | * For internal debugging purposes. 67 | */ 68 | function dump($obj, $descr = null) { 69 | if ($descr !== null) 70 | echo "

$descr

"; 71 | ob_start(); 72 | var_dump($obj); 73 | $dump = ob_get_clean(); 74 | ?>
errorSurrounding($code, $position) 88 | ); 89 | } 90 | 91 | // Try to extract the location of the error more or less precisely. 92 | // Only used for a comprehensive display. 93 | private function errorSurrounding($code, $pos) { 94 | $size = 10; 95 | $begin = $pos < $size ? 0 : $pos - $size; 96 | $end = $pos + $size > strlen($code) ? strlen($code) : $pos + $size; 97 | $offs = $pos - $begin; 98 | return substr($code, $begin, $end - $begin) . "\n" . sprintf("%{$offs}s", '^'); 99 | } 100 | } 101 | 102 | /** 103 | * Represents a nesting rule in the grammar of a language definition. 104 | * 105 | * Individual rules can either be represented by raw strings ("simple" rules) or 106 | * by a nesting rule. Nesting rules specify where they can start and end. Inside 107 | * a nesting rule, other rules may be applied (both simple and nesting). 108 | * For example, a nesting rule may define a string literal. Inside that string, 109 | * other rules may be applied that recognize escape sequences. 110 | * 111 | * To use a nesting rule, supply how it may start and end, e.g.: 112 | * 113 | * $string_rule = array('string' => new Rule('/"/', '/"/')); 114 | * 115 | * You also need to specify nested states: 116 | * 117 | * $string_states = array('string' => 'escaped'); 118 | * 119 | * Now you can add another rule for escaped: 120 | * 121 | * $escaped_rule = array('escaped' => '/\\(x\d{1,4}|.)/'); 122 | * 123 | */ 124 | class Rule { 125 | /** 126 | * Common rules. 127 | */ 128 | 129 | const ALL_WHITESPACE = '/(\s|\r|\n)+/'; 130 | const C_IDENTIFIER = '/[a-z_][a-z0-9_]*/i'; 131 | const C_COMMENT = '#//.*?\n|/\*.*?\*/#s'; 132 | const C_MULTILINECOMMENT = '#/\*.*?\*/#s'; 133 | const DOUBLEQUOTESTRING = '/"(?:\\\\"|.)*?"/s'; 134 | const SINGLEQUOTESTRING = "/'(?:\\\\'|.)*?'/s"; 135 | const C_DOUBLEQUOTESTRING = '/L?"(?:\\\\"|.)*?"/s'; 136 | const C_SINGLEQUOTESTRING = "/L?'(?:\\\\'|.)*?'/s"; 137 | const STRING = '/"(?:\\\\"|.)*?"|\'(?:\\\\\'|.)*?\'/s'; 138 | const C_NUMBER = '/ 139 | (?: # Integer followed by optional fractional part. 140 | (?: 141 | 0(?: 142 | x[0-9a-f]+ 143 | | 144 | [0-7]* 145 | ) 146 | | 147 | \d+ 148 | ) 149 | (?:\.\d*)? 150 | (?:e[+-]\d+)? 151 | ) 152 | | 153 | (?: # Just the fractional part. 154 | (?:\.\d+) 155 | (?:e[+-]?\d+)? 156 | ) 157 | /ix'; 158 | 159 | private $_start; 160 | private $_end; 161 | 162 | /** @ignore */ 163 | public function __construct($start, $end = null) { 164 | $this->_start = $start; 165 | $this->_end = $end; 166 | } 167 | 168 | /** 169 | * Returns the pattern with which this rule starts. 170 | * @return string 171 | */ 172 | public function start() { 173 | return $this->_start; 174 | } 175 | 176 | /** 177 | * Returns the pattern with which this rule may end. 178 | * @return string 179 | */ 180 | public function end() { 181 | return $this->_end; 182 | } 183 | } 184 | 185 | /** 186 | * Abstract base class of all Hyperlight language definitions. 187 | * 188 | * In order to define a new language definition, this class is inherited. 189 | * The only function that needs to be overridden is the constructor. Helper 190 | * functions from the base class can then be called to construct the grammar 191 | * and store additional information. 192 | * The name of the subclass must be of the schema {Lang}Language, 193 | * where {Lang} is a short, unique name for the language starting 194 | * with a capital letter and continuing in lower case. For example, 195 | * PhpLanguage is a valid name. The language definition must 196 | * reside in a file located at languages/{lang}.php. Here, 197 | * {lang} is the all-lowercase spelling of the name, e.g. 198 | * languages/php.php. 199 | * 200 | */ 201 | abstract class HyperLanguage { 202 | private $_states = array(); 203 | private $_rules = array(); 204 | private $_mappings = array(); 205 | private $_info = array(); 206 | private $_extensions = array(); 207 | private $_caseInsensitive = false; 208 | private $_postProcessors = array(); 209 | 210 | private static $_languageCache = array(); 211 | private static $_compiledLanguageCache = array(); 212 | private static $_filetypes; 213 | 214 | /** 215 | * Indices for information. 216 | */ 217 | 218 | const NAME = 1; 219 | const VERSION = 2; 220 | const AUTHOR = 10; 221 | const WEBSITE = 5; 222 | const EMAIL = 6; 223 | 224 | /** 225 | * Retrieves a language definition name based on a file extension. 226 | * 227 | * Uses the contents of the languages/filetypes file to 228 | * guess the language definition name from a file name extension. 229 | * This file has to be generated using the 230 | * collect-filetypes.php script every time the language 231 | * definitions have been changed. 232 | * 233 | * @param string $ext the file name extension. 234 | * @return string The language definition name or NULL. 235 | */ 236 | public static function nameFromExt($ext) { 237 | if (self::$_filetypes === null) { 238 | $ft_content = file('languages/filetypes', 1); 239 | 240 | foreach ($ft_content as $line) { 241 | list ($name, $extensions) = explode(':', trim($line)); 242 | $extensions = explode(',', $extensions); 243 | // Inverse lookup. 244 | foreach ($extensions as $extension) 245 | $ft_data[$extension] = $name; 246 | } 247 | self::$_filetypes = $ft_data; 248 | } 249 | $ext = strtolower($ext); 250 | return 251 | array_key_exists($ext, self::$_filetypes) ? 252 | self::$_filetypes[strtolower($ext)] : null; 253 | } 254 | 255 | public static function compile(HyperLanguage $lang) { 256 | $id = $lang->id(); 257 | if (!isset(self::$_compiledLanguageCache[$id])) 258 | self::$_compiledLanguageCache[$id] = $lang->makeCompiledLanguage(); 259 | return self::$_compiledLanguageCache[$id]; 260 | } 261 | 262 | public static function compileFromName($lang) { 263 | return self::compile(self::fromName($lang)); 264 | } 265 | 266 | protected static function exists($lang) { 267 | return isset(self::$_languageCache[$lang]) or 268 | file_exists("languages/$lang.php"); 269 | } 270 | 271 | protected static function fromName($lang) { 272 | if (!isset(self::$_languageCache[$lang])) { 273 | require_once("languages/$lang.php"); 274 | $klass = ucfirst("{$lang}Language"); 275 | self::$_languageCache[$lang] = new $klass(); 276 | } 277 | return self::$_languageCache[$lang]; 278 | } 279 | 280 | public function id() { 281 | $klass = get_class($this); 282 | return strtolower(substr($klass, 0, strlen($klass) - strlen('Language'))); 283 | } 284 | 285 | protected function setCaseInsensitive($value) { 286 | $this->_caseInsensitive = $value; 287 | } 288 | 289 | protected function addStates(array $states) { 290 | $this->_states = self::mergeProperties($this->_states, $states); 291 | } 292 | 293 | protected function getState($key) { 294 | return $this->_states[$key]; 295 | } 296 | 297 | protected function removeState($key) { 298 | unset($this->_states[$key]); 299 | } 300 | 301 | protected function addRules(array $rules) { 302 | $this->_rules = self::mergeProperties($this->_rules, $rules); 303 | } 304 | 305 | protected function getRule($key) { 306 | return $this->_rules[$key]; 307 | } 308 | 309 | protected function removeRule($key) { 310 | unset($this->_rules[$key]); 311 | } 312 | 313 | protected function addMappings(array $mappings) { 314 | // TODO Implement nested mappings. 315 | $this->_mappings = array_merge($this->_mappings, $mappings); 316 | } 317 | 318 | protected function getMapping($key) { 319 | return $this->_mappings[$key]; 320 | } 321 | 322 | protected function removeMapping($key) { 323 | unset($this->_mappings[$key]); 324 | } 325 | 326 | protected function setInfo(array $info) { 327 | $this->_info = $info; 328 | } 329 | 330 | protected function setExtensions(array $extensions) { 331 | $this->_extensions = $extensions; 332 | } 333 | 334 | protected function addPostprocessing($rule, HyperLanguage $language) { 335 | $this->_postProcessors[$rule] = $language; 336 | } 337 | 338 | // protected function addNestedLanguage(HyperLanguage $language, $hoistBackRules) { 339 | // $prefix = get_class($language); 340 | // if (!is_array($hoistBackRules)) 341 | // $hoistBackRules = array($hoistBackRules); 342 | // 343 | // $states = array(); // Step 1: states 344 | // 345 | // foreach ($language->_states as $stateName => $state) { 346 | // $prefixedRules = array(); 347 | // 348 | // if (strstr($stateName, ' ')) { 349 | // $parts = explode(' ', $stateName); 350 | // $prefixed = array(); 351 | // foreach ($parts as $part) 352 | // $prefixed[] = "$prefix$part"; 353 | // $stateName = implode(' ', $prefixed); 354 | // } 355 | // else 356 | // $stateName = "$prefix$stateName"; 357 | // 358 | // foreach ($state as $key => $rule) { 359 | // if (is_string($key) and is_array($rule)) { 360 | // $nestedRules = array(); 361 | // foreach ($rule as $nestedRule) 362 | // $nestedRules[] = ($nestedRule === '') ? '' : 363 | // "$prefix$nestedRule"; 364 | // 365 | // $prefixedRules["$prefix$key"] = $nestedRules; 366 | // } 367 | // else 368 | // $prefixedRules[] = "$prefix$rule"; 369 | // } 370 | // 371 | // if ($stateName === 'init') 372 | // $prefixedRules = array_merge($hoistBackRules, $prefixedRules); 373 | // 374 | // $states[$stateName] = $prefixedRules; 375 | // } 376 | // 377 | // $rules = array(); // Step 2: rules 378 | // // Mappings need to set up already! 379 | // $mappings = array(); 380 | // 381 | // foreach ($language->_rules as $ruleName => $rule) { 382 | // if (is_array($rule)) { 383 | // $nestedRules = array(); 384 | // foreach ($rule as $nestedName => $nestedRule) { 385 | // if (is_string($nestedName)) { 386 | // $nestedRules["$prefix$nestedName"] = $nestedRule; 387 | // $mappings["$prefix$nestedName"] = $nestedName; 388 | // } 389 | // else 390 | // $nestedRules[] = $nestedRule; 391 | // } 392 | // $rules["$prefix$ruleName"] = $nestedRules; 393 | // } 394 | // else { 395 | // $rules["$prefix$ruleName"] = $rule; 396 | // $mappings["$prefix$ruleName"] = $ruleName; 397 | // } 398 | // } 399 | // 400 | // // Step 3: mappings. 401 | // 402 | // foreach ($language->_mappings as $ruleName => $cssClass) { 403 | // if (strstr($ruleName, ' ')) { 404 | // $parts = explode(' ', $ruleName); 405 | // $prefixed = array(); 406 | // foreach ($parts as $part) 407 | // $prefixed[] = "$prefix$part"; 408 | // $mappings[implode(' ', $prefixed)] = $cssClass; 409 | // } 410 | // else 411 | // $mappings["$prefix$ruleName"] = $cssClass; 412 | // } 413 | // 414 | // $this->addStates($states); 415 | // $this->addRules($rules); 416 | // $this->addMappings($mappings); 417 | // 418 | // return $prefix . 'init'; 419 | // } 420 | 421 | private function makeCompiledLanguage() { 422 | return new HyperlightCompiledLanguage( 423 | $this->id(), 424 | $this->_info, 425 | $this->_extensions, 426 | $this->_states, 427 | $this->_rules, 428 | $this->_mappings, 429 | $this->_caseInsensitive, 430 | $this->_postProcessors 431 | ); 432 | } 433 | 434 | private static function mergeProperties(array $old, array $new) { 435 | foreach ($new as $key => $value) { 436 | if (is_string($key)) { 437 | if (isset($old[$key]) and is_array($old[$key])) 438 | $old[$key] = array_merge($old[$key], $new); 439 | else 440 | $old[$key] = $value; 441 | } 442 | else 443 | $old[] = $value; 444 | } 445 | 446 | return $old; 447 | } 448 | } 449 | 450 | class HyperlightCompiledLanguage { 451 | private $_id; 452 | private $_info; 453 | private $_extensions; 454 | private $_states; 455 | private $_rules; 456 | private $_mappings; 457 | private $_caseInsensitive; 458 | private $_postProcessors = array(); 459 | 460 | public function __construct($id, $info, $extensions, $states, $rules, $mappings, $caseInsensitive, $postProcessors) { 461 | $this->_id = $id; 462 | $this->_info = $info; 463 | $this->_extensions = $extensions; 464 | $this->_caseInsensitive = $caseInsensitive; 465 | $this->_states = $this->compileStates($states); 466 | $this->_rules = $this->compileRules($rules); 467 | $this->_mappings = $mappings; 468 | 469 | foreach ($postProcessors as $ppkey => $ppvalue) 470 | $this->_postProcessors[$ppkey] = HyperLanguage::compile($ppvalue); 471 | } 472 | 473 | public function id() { 474 | return $this->_id; 475 | } 476 | 477 | public function name() { 478 | return $this->_info[HyperLanguage::NAME]; 479 | } 480 | 481 | public function authorName() { 482 | if (!array_key_exists(HyperLanguage::AUTHOR, $this->_info)) 483 | return null; 484 | $author = $this->_info[HyperLanguage::AUTHOR]; 485 | if (is_string($author)) 486 | return $author; 487 | if (!array_key_exists(HyperLanguage::NAME, $author)) 488 | return null; 489 | return $author[HyperLanguage::NAME]; 490 | } 491 | 492 | public function authorWebsite() { 493 | if (!array_key_exists(HyperLanguage::AUTHOR, $this->_info) or 494 | !is_array($this->_info[HyperLanguage::AUTHOR]) or 495 | !array_key_exists(HyperLanguage::WEBSITE, $this->_info[HyperLanguage::AUTHOR])) 496 | return null; 497 | return $this->_info[HyperLanguage::AUTHOR][HyperLanguage::WEBSITE]; 498 | } 499 | 500 | public function authorEmail() { 501 | if (!array_key_exists(HyperLanguage::AUTHOR, $this->_info) or 502 | !is_array($this->_info[HyperLanguage::AUTHOR]) or 503 | !array_key_exists(HyperLanguage::EMAIL, $this->_info[HyperLanguage::AUTHOR])) 504 | return null; 505 | return $this->_info[HyperLanguage::AUTHOR][HyperLanguage::EMAIL]; 506 | } 507 | 508 | public function authorContact() { 509 | $email = $this->authorEmail(); 510 | return $email !== null ? $email : $this->authorWebsite(); 511 | } 512 | 513 | public function extensions() { 514 | return $this->_extensions; 515 | } 516 | 517 | public function state($stateName) { 518 | return $this->_states[$stateName]; 519 | } 520 | 521 | public function rule($ruleName) { 522 | return $this->_rules[$ruleName]; 523 | } 524 | 525 | public function className($state) { 526 | if (array_key_exists($state, $this->_mappings)) 527 | return $this->_mappings[$state]; 528 | else if (strstr($state, ' ') === false) 529 | // No mapping for state. 530 | return $state; 531 | else { 532 | // Try mapping parts of nested state name. 533 | $parts = explode(' ', $state); 534 | $ret = array(); 535 | 536 | foreach ($parts as $part) { 537 | if (array_key_exists($part, $this->_mappings)) 538 | $ret[] = $this->_mappings[$part]; 539 | else 540 | $ret[] = $part; 541 | } 542 | 543 | return implode(' ', $ret); 544 | } 545 | } 546 | 547 | public function postProcessors() { 548 | return $this->_postProcessors; 549 | } 550 | 551 | private function compileStates($states) { 552 | $ret = array(); 553 | 554 | foreach ($states as $name => $state) { 555 | $newstate = array(); 556 | 557 | if (!is_array($state)) 558 | $state = array($state); 559 | 560 | foreach ($state as $key => $elem) { 561 | if ($elem === null) 562 | continue; 563 | if (is_string($key)) { 564 | if (!is_array($elem)) 565 | $elem = array($elem); 566 | 567 | foreach ($elem as $el2) { 568 | if ($el2 === '') 569 | $newstate[] = $key; 570 | else 571 | $newstate[] = "$key $el2"; 572 | } 573 | } 574 | else 575 | $newstate[] = $elem; 576 | } 577 | 578 | $ret[$name] = $newstate; 579 | } 580 | 581 | return $ret; 582 | } 583 | 584 | private function compileRules($rules) { 585 | $tmp = array(); 586 | 587 | // Preprocess keyword list and flatten nested lists: 588 | 589 | // End of regular expression matching keywords. 590 | $end = $this->_caseInsensitive ? ')\b/i' : ')\b/'; 591 | 592 | foreach ($rules as $name => $rule) { 593 | if (is_array($rule)) { 594 | if (self::isAssocArray($rule)) { 595 | // Array is a nested list of rules. 596 | foreach ($rule as $key => $value) { 597 | if (is_array($value)) 598 | // Array represents a list of keywords. 599 | $value = '/\b(?:' . implode('|', $value) . $end; 600 | 601 | if (!is_string($key) or strlen($key) === 0) 602 | $tmp[$name] = $value; 603 | else 604 | $tmp["$name $key"] = $value; 605 | } 606 | } 607 | else { 608 | // Array represents a list of keywords. 609 | $rule = '/\b(?:' . implode('|', $rule) . $end; 610 | $tmp[$name] = $rule; 611 | } 612 | } 613 | else { 614 | $tmp[$name] = $rule; 615 | } // if (is_array($rule)) 616 | } // foreach 617 | 618 | $ret = array(); 619 | 620 | foreach ($this->_states as $name => $state) { 621 | $regex_rules = array(); 622 | $regex_names = array(); 623 | $nesting_rules = array(); 624 | 625 | foreach ($state as $rule_name) { 626 | $rule = $tmp[$rule_name]; 627 | if ($rule instanceof Rule) 628 | $nesting_rules[$rule_name] = $rule; 629 | else { 630 | $regex_rules[] = $rule; 631 | $regex_names[] = $rule_name; 632 | } 633 | } 634 | 635 | $ret[$name] = array_merge( 636 | array(preg_merge('|', $regex_rules, $regex_names)), 637 | $nesting_rules 638 | ); 639 | } 640 | 641 | return $ret; 642 | } 643 | 644 | private static function isAssocArray(array $array) { 645 | foreach($array as $key => $_) 646 | if (is_string($key)) 647 | return true; 648 | return false; 649 | } 650 | } 651 | 652 | class Hyperlight { 653 | private $_lang; 654 | private $_result; 655 | private $_states; 656 | private $_omitSpans; 657 | private $_postProcessors = array(); 658 | 659 | public function __construct($lang) { 660 | if (is_string($lang)) 661 | $this->_lang = HyperLanguage::compileFromName(strtolower($lang)); 662 | else if ($lang instanceof HyperlightCompiledLanguage) 663 | $this->_lang = $lang; 664 | else if ($lang instanceof HyperLanguage) 665 | $this->_lang = HyperLanguage::compile($lang); 666 | else 667 | trigger_error( 668 | 'Invalid argument type for $lang to Hyperlight::__construct', 669 | E_USER_ERROR 670 | ); 671 | 672 | foreach ($this->_lang->postProcessors() as $ppkey => $ppvalue) 673 | $this->_postProcessors[$ppkey] = new Hyperlight($ppvalue); 674 | 675 | $this->reset(); 676 | } 677 | 678 | public function language() { 679 | return $this->_lang; 680 | } 681 | 682 | public function reset() { 683 | $this->_states = array('init'); 684 | $this->_omitSpans = array(); 685 | } 686 | 687 | public function render($code) { 688 | // Normalize line breaks. 689 | $this->_code = preg_replace('/\r\n?/', "\n", $code); 690 | $fm = hyperlight_calculate_fold_marks($this->_code, $this->language()->id()); 691 | return hyperlight_apply_fold_marks($this->renderCode(), $fm); 692 | } 693 | 694 | public function renderAndPrint($code) { 695 | echo $this->render($code); 696 | } 697 | 698 | 699 | private function renderCode() { 700 | $code = $this->_code; 701 | $pos = 0; 702 | $len = strlen($code); 703 | $this->_result = ''; 704 | $state = array_peek($this->_states); 705 | 706 | // If there are open states (reentrant parsing), open the corresponding 707 | // tags first: 708 | 709 | for ($i = 1; $i < count($this->_states); ++$i) 710 | if (!$this->_omitSpans[$i - 1]) { 711 | $class = $this->_lang->className($this->_states[$i]); 712 | $this->write(""); 713 | } 714 | 715 | // Emergency break to catch faulty rules. 716 | $prev_pos = -1; 717 | 718 | while ($pos < $len) { 719 | // The token next to the current position, after the inner loop completes. 720 | // i.e. $closest_hit = array($matched_text, $position) 721 | $closest_hit = array('', $len); 722 | // The rule that found this token. 723 | $closest_rule = null; 724 | $rules = $this->_lang->rule($state); 725 | 726 | foreach ($rules as $name => $rule) { 727 | if ($rule instanceof Rule) 728 | $this->matchIfCloser( 729 | $rule->start(), $name, $pos, $closest_hit, $closest_rule 730 | ); 731 | else if (preg_match($rule, $code, $matches, PREG_OFFSET_CAPTURE, $pos) == 1) { 732 | // Search which of the sub-patterns matched. 733 | 734 | foreach ($matches as $group => $match) { 735 | if (!is_string($group)) 736 | continue; 737 | if ($match[1] !== -1) { 738 | $closest_hit = $match; 739 | $closest_rule = str_replace('_', ' ', $group); 740 | break; 741 | } 742 | } 743 | } 744 | } // foreach ($rules) 745 | 746 | // If we're currently inside a rule, check whether we've come to the 747 | // end of it, or the end of any other rule we're nested in. 748 | 749 | if (count($this->_states) > 1) { 750 | $n = count($this->_states) - 1; 751 | do { 752 | $rule = $this->_lang->rule($this->_states[$n - 1]); 753 | $rule = $rule[$this->_states[$n]]; 754 | --$n; 755 | if ($n < 0) 756 | throw new NoMatchingRuleException($this->_states, $pos, $code); 757 | } while ($rule->end() === null); 758 | 759 | $this->matchIfCloser($rule->end(), $n + 1, $pos, $closest_hit, $closest_rule); 760 | } 761 | 762 | // We take the closest hit: 763 | 764 | if ($closest_hit[1] > $pos) 765 | $this->emit(substr($code, $pos, $closest_hit[1] - $pos)); 766 | 767 | $prev_pos = $pos; 768 | $pos = $closest_hit[1] + strlen($closest_hit[0]); 769 | 770 | if ($prev_pos === $pos and is_string($closest_rule)) 771 | if (array_key_exists($closest_rule, $this->_lang->rule($state))) { 772 | array_push($this->_states, $closest_rule); 773 | $state = $closest_rule; 774 | $this->emitPartial('', $closest_rule); 775 | } 776 | 777 | if ($closest_hit[1] === $len) 778 | break; 779 | else if (!is_string($closest_rule)) { 780 | // Pop state. 781 | if (count($this->_states) <= $closest_rule) 782 | throw new NoMatchingRuleException($this->_states, $pos, $code); 783 | 784 | while (count($this->_states) > $closest_rule + 1) { 785 | $lastState = array_pop($this->_states); 786 | $this->emitPop('', $lastState); 787 | } 788 | $lastState = array_pop($this->_states); 789 | $state = array_peek($this->_states); 790 | $this->emitPop($closest_hit[0], $lastState); 791 | } 792 | else if (array_key_exists($closest_rule, $this->_lang->rule($state))) { 793 | // Push state. 794 | array_push($this->_states, $closest_rule); 795 | $state = $closest_rule; 796 | $this->emitPartial($closest_hit[0], $closest_rule); 797 | } 798 | else 799 | $this->emit($closest_hit[0], $closest_rule); 800 | } // while ($pos < $len) 801 | 802 | // Close any tags that are still open (can happen in incomplete code 803 | // fragments that don't necessarily signify an error (consider PHP 804 | // embedded in HTML, or a C++ preprocessor code not ending on newline). 805 | 806 | $omitSpansBackup = $this->_omitSpans; 807 | for ($i = count($this->_states); $i > 1; --$i) 808 | $this->emitPop(); 809 | $this->_omitSpans = $omitSpansBackup; 810 | 811 | return $this->_result; 812 | } 813 | 814 | private function matchIfCloser($expr, $next, $pos, &$closest_hit, &$closest_rule) { 815 | $matches = array(); 816 | if (preg_match($expr, $this->_code, $matches, PREG_OFFSET_CAPTURE, $pos) == 1) { 817 | if ( 818 | ( 819 | // Two hits at same position -- compare length 820 | // For equal lengths: first come, first serve. 821 | $matches[0][1] == $closest_hit[1] and 822 | strlen($matches[0][0]) > strlen($closest_hit[0]) 823 | ) or 824 | $matches[0][1] < $closest_hit[1] 825 | ) { 826 | $closest_hit = $matches[0]; 827 | $closest_rule = $next; 828 | } 829 | } 830 | } 831 | 832 | private function processToken($token) { 833 | if ($token === '') 834 | return ''; 835 | $nest_lang = array_peek($this->_states); 836 | if (array_key_exists($nest_lang, $this->_postProcessors)) 837 | return $this->_postProcessors[$nest_lang]->render($token); 838 | else 839 | #return self::htmlentities($token); 840 | return htmlspecialchars($token, ENT_NOQUOTES); 841 | } 842 | 843 | private function emit($token, $class = '') { 844 | $token = $this->processToken($token); 845 | if ($token === '') 846 | return; 847 | $class = $this->_lang->className($class); 848 | if ($class === '') 849 | $this->write($token); 850 | else 851 | $this->write("$token"); 852 | } 853 | 854 | private function emitPartial($token, $class) { 855 | $token = $this->processToken($token); 856 | $class = $this->_lang->className($class); 857 | if ($class === '') { 858 | if ($token !== '') 859 | $this->write($token); 860 | array_push($this->_omitSpans, true); 861 | } 862 | else { 863 | $this->write("$token"); 864 | array_push($this->_omitSpans, false); 865 | } 866 | } 867 | 868 | private function emitPop($token = '', $class = '') { 869 | $token = $this->processToken($token); 870 | if (array_pop($this->_omitSpans)) 871 | $this->write($token); 872 | else 873 | $this->write("$token"); 874 | } 875 | 876 | private function write($text) { 877 | $this->_result .= $text; 878 | } 879 | 880 | // // DAMN! What did I need them for? Something to do with encoding … 881 | // // but why not use the `$charset` argument on `htmlspecialchars`? 882 | // private static function htmlentitiesCallback($match) { 883 | // switch ($match[0]) { 884 | // case '<': return '<'; 885 | // case '>': return '>'; 886 | // case '&': return '&'; 887 | // } 888 | // } 889 | // 890 | // private static function htmlentities($text) { 891 | // return htmlspecialchars($text, ENT_NOQUOTES); 892 | // return preg_replace_callback( 893 | // '/[<>&]/', array('Hyperlight', 'htmlentitiesCallback'), $text 894 | // ); 895 | // } 896 | } // class Hyperlight 897 | 898 | /** 899 | * echos a highlighted code. 900 | * 901 | * For example, the following 902 | * 903 | * hyperlight('', 'php'); 904 | * 905 | * results in: 906 | * 907 | *
...
908 | *
909 | * 910 | * @param string $code The code. 911 | * @param string $lang The language of the code. 912 | * @param string $tag The surrounding tag to use. Optional. 913 | * @param array $attributes Attributes to decorate {@link $tag} with. 914 | * If no tag is given, this argument can be passed in its place. This 915 | * behaviour will be assumed if the third argument is an array. 916 | * Attributes must be given as a hash of key value pairs. 917 | */ 918 | function hyperlight($code, $lang, $tag = 'pre', array $attributes = array()) { 919 | if ($code == '') 920 | die("`hyperlight` needs a code to work on!"); 921 | if ($lang == '') 922 | die("`hyperlight` needs to know the code's language!"); 923 | if (is_array($tag) and !empty($attributes)) 924 | die("Can't pass array arguments for \$tag *and* \$attributes to `hyperlight`!"); 925 | if ($tag == '') 926 | $tag = 'pre'; 927 | if (is_array($tag)) { 928 | $attributes = $tag; 929 | $tag = 'pre'; 930 | } 931 | $lang = htmlspecialchars(strtolower($lang)); 932 | $class = "source-code $lang"; 933 | 934 | $attr = array(); 935 | foreach ($attributes as $key => $value) { 936 | if ($key == 'class') 937 | $class .= ' ' . htmlspecialchars($value); 938 | else 939 | $attr[] = htmlspecialchars($key) . '="' . 940 | htmlspecialchars($value) . '"'; 941 | } 942 | 943 | $attr = empty($attr) ? '' : ' ' . implode(' ', $attr); 944 | 945 | $hl = new Hyperlight($lang); 946 | echo "<$tag class=\"$class\"$attr>"; 947 | $hl->renderAndPrint(trim($code)); 948 | echo ""; 949 | } 950 | 951 | /** 952 | * Is the same as: 953 | * 954 | * hyperlight(file_get_contents($filename), $lang, $tag, $attributes); 955 | * 956 | * @see hyperlight() 957 | */ 958 | function hyperlight_file($filename, $lang = null, $tag = 'pre', array $attributes = array()) { 959 | if ($lang == '') { 960 | // Try to guess it from file extension. 961 | $pos = strrpos($filename, '.'); 962 | if ($pos !== false) { 963 | $ext = substr($filename, $pos + 1); 964 | $lang = HyperLanguage::nameFromExt($ext); 965 | } 966 | } 967 | hyperlight(file_get_contents($filename), $lang, $tag, $attributes); 968 | } 969 | 970 | if (defined('HYPERLIGHT_SHORTCUT')) { 971 | function hy() { 972 | $args = func_get_args(); 973 | call_user_func_array('hyperlight', $args); 974 | } 975 | function hyf() { 976 | $args = func_get_args(); 977 | call_user_func_array('hyperlight_file', $args); 978 | } 979 | } 980 | 981 | function hyperlight_calculate_fold_marks($code, $lang) { 982 | $supporting_languages = array('csharp', 'vb'); 983 | 984 | if (!in_array($lang, $supporting_languages)) 985 | return array(); 986 | 987 | $fold_begin_marks = array('/^\s*#Region/', '/^\s*#region/'); 988 | $fold_end_marks = array('/^\s*#End Region/', '/\s*#endregion/'); 989 | 990 | $lines = preg_split('/\r|\n|\r\n/', $code); 991 | 992 | $fold_begin = array(); 993 | foreach ($fold_begin_marks as $fbm) 994 | $fold_begin = $fold_begin + preg_grep($fbm, $lines); 995 | 996 | $fold_end = array(); 997 | foreach ($fold_end_marks as $fem) 998 | $fold_end = $fold_end + preg_grep($fem, $lines); 999 | 1000 | if (count($fold_begin) !== count($fold_end) or count($fold_begin) === 0) 1001 | return array(); 1002 | 1003 | $fb = array(); 1004 | $fe = array(); 1005 | foreach ($fold_begin as $line => $_) 1006 | $fb[] = $line; 1007 | 1008 | foreach ($fold_end as $line => $_) 1009 | $fe[] = $line; 1010 | 1011 | $ret = array(); 1012 | for ($i = 0; $i < count($fb); $i++) 1013 | $ret[$fb[$i]] = $fe[$i]; 1014 | 1015 | return $ret; 1016 | } 1017 | 1018 | function hyperlight_apply_fold_marks($code, array $fold_marks) { 1019 | if ($fold_marks === null or count($fold_marks) === 0) 1020 | return $code; 1021 | 1022 | $lines = explode("\n", $code); 1023 | 1024 | foreach ($fold_marks as $begin => $end) { 1025 | $lines[$begin] = '' . $lines[$begin] . ' '; 1026 | $lines[$begin + 1] = '' . $lines[$begin + 1]; 1027 | $lines[$end + 1] = '' . $lines[$end + 1]; 1028 | } 1029 | 1030 | return implode("\n", $lines); 1031 | } 1032 | 1033 | ?> 1034 | -------------------------------------------------------------------------------- /hyperlight/index.php: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | ‹? Hyperlight ?› Code Highlighting for PHP 10 | 11 | 12 | 13 | 14 | 15 | 20 |
21 |
22 |
23 |

Why use Hyperlight?

24 |
    25 |
  • 26 |

    Easy to use. There’s no configuration. The following code will highlight your source code. Nothing more needs to be said or done.

    27 | theResult();', 'iphp'); ?> 30 |

    Even easier, there’s a handy function hyperlight for lightweight usage, especially in HTML templates:

    31 | ', 'php'); ?> 32 |

    This code creates a <pre> container around the code. This can be controlled with a third argument to the function.

    33 |
  • 34 |
  • 35 |

    Easy to extend. The syntax definitions are written in PHP but only very basic language grasp is needed. Syntax definitions are concise and for most tasks, existing templates can be used and it’s enough to customize a basic set of features.

    36 |
  • 37 |
  • 38 |

    Powerful. The syntax definitions use regular expressions but they support stateful parsing through a very simple mechanism. This makes implementing context free grammars effortless.

    39 |
  • 40 |
  • 41 |

    Full CSS support. One single CSS file can be used for all languages to give a consistent look & feel. Elements may be nested for refinements (e.g. highlighting “TODO” items in comments):

    42 | 44 |

    Further refinements are possible in order to differentiate similar elements. Consider the different classes of keywords:

    45 | 48 |
  • 49 |
  • 50 |

    Colour schemes! – This is basically the same as “full CSS support” but it sounds waaay cooler. Since CSS support is naturally included in Hyperlight and syntax files can define appropriate mappings for their lexemes, usage and creation of professional colour schemes is effortless.

    51 |
  • 52 |
53 | 54 |

Why not use something else?

55 |

Sure, there are alternatives. Unfortunately, they are surprisingly few for PHP:

56 | 57 |

Geshi

58 |

If you’re forced to work with PHP version < 5.0, sure, use Geshi. But be prepared that each syntax brings its own (ugly) style, lacking conventions make the use of one CSS for all languages impossible (because they use the same CSS class names for completely different things), a lot of badly-documented configuration is necessary to get the desired result, HTML garbage is produced and the CSS class names are gibberish.

59 |

Furthermore, many of the syntax definitions are badly realized and/or have bugs. Creating an own highlighting isn't trivial because the API is quite complicated, not very powerful and lacks documentation.

60 |

If that doesn't matter to you, Geshi is perhaps not such a bad choice.

61 | 62 |

Pear_TextHighlighter

63 |

Syntax definitions must be given as cumbersome XML files. Need I say more?

64 |
65 |
66 | 67 | 68 | 70 | -------------------------------------------------------------------------------- /hyperlight/languages/cpp.php: -------------------------------------------------------------------------------- 1 | setInfo(array( 11 | parent::NAME => 'C++', 12 | parent::VERSION => '0.4', 13 | parent::AUTHOR => array( 14 | parent::NAME => 'Konrad Rudolph', 15 | parent::WEBSITE => 'madrat.net', 16 | parent::EMAIL => 'konrad_rudolph@madrat.net' 17 | ) 18 | )); 19 | 20 | $this->setExtensions(array('c', 'cc', 'cpp', 'h', 'hpp', 'icl', 'ipp')); 21 | 22 | $keyword = array('keyword' => array('', 'type', 'literal', 'operator')); 23 | $common = array( 24 | 'string', 'char', 'number', 'comment', 25 | 'keyword' => array('', 'type', 'literal', 'operator'), 26 | 'identifier', 27 | 'operator' 28 | ); 29 | 30 | $this->addStates(array( 31 | 'init' => array_merge(array('include', 'preprocessor'), $common), 32 | 'include' => array('incpath'), 33 | 'preprocessor' => array_merge($common, array('pp_newline')), 34 | )); 35 | 36 | $this->addRules(array( 37 | 'whitespace' => RULE::ALL_WHITESPACE, 38 | 'operator' => '/<:|:>|<%|%>|%:|%:%:|\+\+|--|&&|\|\||::|<<|>>|##|\.\.\.|\.\*|->|->*|[-+*\/%^&|!~<>.=,;:?()\[\]\{\}]|[-+*\/%^&|=!~<>]=|<<=|>>=/', 39 | 'include' => new Rule('/#\s*include/', '/\n/'), 40 | 'preprocessor' => new Rule('/#\s*\w+/', '/\n/'), 41 | //'pp_newline' => '/[^\\\\](?\\\\*?)(?P=bs)\\\\\n/', 42 | 'pp_newline' => '/(? '/<[^>]*>|"[^"]*"/', 44 | 'string' => Rule::C_DOUBLEQUOTESTRING, 45 | 'char' => Rule::C_SINGLEQUOTESTRING, 46 | 'number' => Rule::C_NUMBER, 47 | 'comment' => Rule::C_COMMENT, 48 | 'keyword' => array( 49 | array( 50 | 'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 51 | 'const_cast', 'continue', 'default', 'do', 'dynamic_cast', 52 | 'else', 'enum', 'explicit', 'export', 'extern', 'for', 53 | 'firend', 'goto', 'if', 'inline', 'mutable', 'namespace', 54 | 'operator', 'private', 'protected', 'public', 'register', 55 | 'reinterpret_cast', 'return', 'sizeof', 'static', 56 | 'static_cast', 'struct', 'switch', 'template', 'throw', 57 | 'try', 'typedef', 'typename', 'union', 'using', 'virtual', 58 | 'volatile', 'while' 59 | ), 60 | 'type' => array( 61 | 'bool', 'char', 'double', 'float', 'int', 'long', 'short', 62 | 'signed', 'unsigned', 'void', 'wchar_t' 63 | ), 64 | 'literal' => array( 65 | 'false', 'this', 'true' 66 | ), 67 | 'operator' => array( 68 | 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'delete', 69 | 'new', 'not', 'not_eq', 'or', 'or_eq', 'typeid', 'xor', 70 | 'xor_eq' 71 | ), 72 | ), 73 | 'identifier' => Rule::C_IDENTIFIER, 74 | )); 75 | 76 | $this->addMappings(array( 77 | 'operator' => '', 78 | 'include' => 'preprocessor', 79 | 'incpath' => 'tag', 80 | )); 81 | } 82 | } 83 | 84 | ?> 85 | -------------------------------------------------------------------------------- /hyperlight/languages/csharp.php: -------------------------------------------------------------------------------- 1 | setInfo(array( 6 | parent::NAME => 'C#', 7 | parent::VERSION => '0.3', 8 | parent::AUTHOR => array( 9 | parent::NAME => 'Konrad Rudolph', 10 | parent::WEBSITE => 'madrat.net', 11 | parent::EMAIL => 'konrad_rudolph@madrat.net' 12 | ) 13 | )); 14 | 15 | $this->setExtensions(array('cs')); 16 | 17 | $this->setCaseInsensitive(false); 18 | 19 | $this->addStates(array( 20 | 'init' => array( 21 | 'string', 22 | 'char', 23 | 'number', 24 | 'comment' => array('', 'doc'), 25 | 'keyword' => array('', 'type', 'literal', 'operator', 'preprocessor'), 26 | 'identifier', 27 | 'operator', 28 | 'whitespace', 29 | ), 30 | 'comment doc' => 'doc', 31 | )); 32 | 33 | $this->addRules(array( 34 | 'whitespace' => Rule::ALL_WHITESPACE, 35 | 'operator' => '/[-+*\/%&|^!~=<>?{}()\[\].,:;]|&&|\|\||<<|>>|[-=!<>+*\/%&|^]=|<<=|>>=|->/', 36 | 'string' => Rule::C_DOUBLEQUOTESTRING, 37 | 'char' => Rule::C_SINGLEQUOTESTRING, 38 | 'number' => Rule::C_NUMBER, 39 | 'comment' => array( 40 | '#//(?:[^/].*?)?\n|/\*.*?\*/#s', 41 | 'doc' => new Rule('#///#', '/$/m') 42 | ), 43 | 'doc' => '/<(?:".*?"|\'.*?\'|[^>])*>/', 44 | 'keyword' => array( 45 | array( 46 | 'abstract', 'break', 'case', 'catch', 'checked', 'class', 47 | 'const', 'continue', 'default', 'delegate', 'do', 'else', 48 | 'enum', 'event', 'explicit', 'extern', 'finally', 'fixed', 49 | 'for', 'foreach', 'goto', 'if', 'implicit', 'in', 'interface', 50 | 'internal', 'lock', 'namespace', 'operator', 'out', 'override', 51 | 'params', 'private', 'protected', 'public', 'readonly', 'ref', 52 | 'return', 'sealed', 'static', 'struct', 'switch', 'throw', 53 | 'try', 'unchecked', 'unsafe', 'using', 'var', 'virtual', 54 | 'volatile', 'while' 55 | ), 56 | 'type' => array( 57 | 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 58 | 'long', 'object', 'sbyte', 'short', 'string', 'uint', 'ulong', 59 | 'ushort', 'void' 60 | ), 61 | 'literal' => array( 62 | 'base', 'false', 'null', 'this', 'true', 63 | ), 64 | 'operator' => array( 65 | 'as', 'is', 'new', 'sizeof', 'stackallock', 'typeof', 66 | ), 67 | 'preprocessor' => '/#(?:if|else|elif|endif|define|undef|warning|error|line|region|endregion)/' 68 | ), 69 | 'identifier' => '/@?[a-z_][a-z0-9_]*/i', 70 | )); 71 | 72 | $this->addMappings(array( 73 | 'whitespace' => '', 74 | 'operator' => '', 75 | )); 76 | } 77 | } 78 | 79 | ?> 80 | -------------------------------------------------------------------------------- /hyperlight/languages/css.php: -------------------------------------------------------------------------------- 1 | setInfo(array( 6 | parent::NAME => 'CSS', 7 | parent::VERSION => '0.8', 8 | parent::AUTHOR => array( 9 | parent::NAME => 'Konrad Rudolph', 10 | parent::WEBSITE => 'madrat.net', 11 | parent::EMAIL => 'konrad_rudolph@madrat.net' 12 | ) 13 | )); 14 | 15 | $this->setExtensions(array('css')); 16 | 17 | // The following does not conform to the specs but it is necessary 18 | // else numbers wouldn't be recognized any more. 19 | $nmstart = '-?[a-z]'; 20 | $nmchar = '[a-z0-9-]'; 21 | $hex = '[0-9a-f]'; 22 | list($string, $strmod) = preg_strip(Rule::STRING); 23 | $strmod = implode('', $strmod); 24 | 25 | $this->addStates(array( 26 | 'init' => array('comment', 'uri', 'meta', 'id', 'class', 'pseudoclass', 'element', 'block', 'constraint', 'string'), 27 | 'block' => array('comment', 'attribute', 'value'), 28 | 'constraint' => array('identifier', 'string'), 29 | 'value' => array('comment', 'string', 'color', 'number', 'uri', 'identifier', 'important'), 30 | )); 31 | 32 | $this->addRules(array( 33 | 'attribute' => "/$nmstart$nmchar*/i", 34 | 'value' => new Rule('/:/', '/;|(?=\})/'), 35 | 'comment' => Rule::C_MULTILINECOMMENT, 36 | 'meta' => "/@$nmstart$nmchar*/i", 37 | 'id' => "/#$nmstart$nmchar*/i", 38 | 'class' => "/\.$nmstart$nmchar*/", 39 | // Pay attention not to match rules such as ::selection! 40 | 'pseudoclass' => "/(? "/$nmstart$nmchar*/i", 42 | 'block' => new Rule('/\{/', '/\}/'), 43 | 'constraint' => new Rule('/\[/', '/\]/'), 44 | 'number' => '/[+-]?(?:\d+(\.\d+)?|\d*\.\d+)(%|em|ex|px|pt|in|cm|mm|pc|deg|g?rad|m?s|k?Hz)?/', 45 | 'uri' => "/url\(\s*(?:$string|[^\)]*)\s*\)/$strmod", 46 | 'identifier' => "/$nmstart$nmchar*/i", 47 | 'string' => "/$string/$strmod", 48 | 'color' => "/#$hex{3}(?:$hex{3})?/i", 49 | 'important' => '/!\s*important/', 50 | )); 51 | 52 | $this->addMappings(array( 53 | 'element' => 'keyword', 54 | 'id' => 'keyword type', 55 | 'class' => 'keyword builtin', 56 | 'pseudoclass' => 'preprocessor', 57 | 'block' => '', 58 | 'constraint' => '', 59 | 'value' => '', 60 | 'color' => 'string', 61 | 'uri' => 'char', 62 | 'meta' => 'keyword', 63 | )); 64 | } 65 | } 66 | 67 | ?> 68 | -------------------------------------------------------------------------------- /hyperlight/languages/filetypes: -------------------------------------------------------------------------------- 1 | cpp:c,cc,cpp,h,hpp,icl,ipp 2 | css:css 3 | iphp:php,php3,php4,php5,inc 4 | php:php,php3,php4,php5,inc 5 | vb:vb 6 | xml:xml,xsl,xslt,xsd,manifest -------------------------------------------------------------------------------- /hyperlight/languages/iphp.php: -------------------------------------------------------------------------------- 1 | setExtensions(array()); // Not a whole file, just a fragment. 9 | $this->removeState('init'); 10 | $this->addStates(array('init' => $this->getState('php'))); 11 | } 12 | } 13 | 14 | ?> 15 | -------------------------------------------------------------------------------- /hyperlight/languages/php.php: -------------------------------------------------------------------------------- 1 | setInfo(array( 15 | parent::NAME => 'PHP', 16 | parent::VERSION => '0.3', 17 | parent::AUTHOR => array( 18 | parent::NAME => 'Konrad Rudolph', 19 | parent::WEBSITE => 'madrat.net', 20 | parent::EMAIL => 'konrad_rudolph@madrat.net' 21 | ) 22 | )); 23 | 24 | $this->setExtensions(array('php', 'php3', 'php4', 'php5', 'inc')); 25 | 26 | $this->addPostProcessing('html', HyperLanguage::fromName('xml')); 27 | 28 | $this->addStates(array( 29 | 'init' => array('php', 'html'), 30 | 'php' => array( 31 | 'comment', 'string', 'char', 'number', 32 | 'keyword' => array('', 'type', 'literal', 'operator', 'builtin'), 33 | 'identifier', 'variable'), 34 | 'variable' => array('identifier'), 35 | 'html' => array() 36 | )); 37 | 38 | $this->addRules(array( 39 | 'php' => new Rule('/<\?php/', '/\?>/'), 40 | 'html' => new Rule('/(?=.)/', '/(?=<\?php)/'), 41 | 'comment' => Rule::C_COMMENT, 42 | 'string' => Rule::C_DOUBLEQUOTESTRING, 43 | 'char' => Rule::C_SINGLEQUOTESTRING, 44 | 'number' => Rule::C_NUMBER, 45 | 'identifier' => Rule::C_IDENTIFIER, 46 | 'variable' => new Rule('/\$/', '//'), 47 | 'keyword' => array( 48 | array('break', 'case', 'class', 'const', 'continue', 'declare', 'default', 'do', 'else', 'elseif', 'enddeclare', 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile', 'extends', 'for', 'foreach', 'function', 'global', 'if', 'return', 'static', 'switch', 'use', 'var', 'while', 'final', 'interface', 'implements', 'public', 'private', 'protected', 'abstract', 'try', 'catch', 'throw', 'final', 'namespace'), 49 | 'type' => array('exception', 'int'), 50 | 'literal' => array('false', 'null', 'true', 'this'), 51 | 'operator' => array('and', 'as', 'or', 'xor', 'new', 'instanceof', 'clone'), 52 | 'builtin' => array('array', 'die', 'echo', 'empty', 'eval', 'exit', 'include', 'include_once', 'isset', 'list', 'print', 'require', 'require_once', 'unset') 53 | ), 54 | )); 55 | 56 | $this->addMappings(array( 57 | 'char' => 'string', 58 | 'variable' => 'tag', 59 | 'html' => 'preprocessor', 60 | )); 61 | } 62 | } 63 | 64 | ?> 65 | -------------------------------------------------------------------------------- /hyperlight/languages/python.php: -------------------------------------------------------------------------------- 1 | 8 | 9 | class PythonLanguage extends HyperLanguage { 10 | public function __construct() { 11 | $this->setInfo(array( 12 | parent::NAME => 'Python', 13 | parent::VERSION => '0.1', 14 | parent::AUTHOR => array( 15 | parent::NAME => 'Konrad Rudolph', 16 | parent::WEBSITE => 'madrat.net', 17 | parent::EMAIL => 'konrad_rudolph@madrat.net' 18 | ) 19 | )); 20 | 21 | $this->setExtensions(array('py')); 22 | 23 | $this->setCaseInsensitive(false); 24 | 25 | $this->addStates(array( 26 | 'init' => array( 27 | 'string', 28 | 'bytes', 29 | 'number', 30 | 'comment', 31 | 'keyword' => array('', 'literal', 'operator'), 32 | 'identifier' 33 | ), 34 | )); 35 | 36 | $this->addRules(array( 37 | 'string' => Rule::C_DOUBLEQUOTESTRING, 38 | 'bytes' => Rule::C_SINGLEQUOTESTRING, 39 | 'number' => Rule::C_NUMBER, 40 | 'comment' => '/#.*/', 41 | 'keyword' => array( 42 | array( 43 | 'assert', 'break', 'class', 'continue', 'def', 'del', 44 | 'elif', 'else', 'except', 'finally', 'for', 'from', 45 | 'global', 'if', 'import', 'in', 'lambda', 'nonlocal', 46 | 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield' 47 | ), 48 | 'literal' => array( 49 | 'False', 'None', 'True' 50 | ), 51 | 'operator' => array( 52 | 'and', 'as', 'is', 'not', 'or' 53 | ) 54 | ), 55 | 'identifier' => Rule::C_IDENTIFIER, 56 | )); 57 | 58 | $this->addMappings(array( 59 | 'bytes' => 'char' 60 | )); 61 | } 62 | } 63 | 64 | ?> 65 | -------------------------------------------------------------------------------- /hyperlight/languages/vb.php: -------------------------------------------------------------------------------- 1 | setInfo(array( 6 | parent::NAME => 'VB', 7 | parent::VERSION => '1.4', 8 | parent::AUTHOR => array( 9 | parent::NAME => 'Konrad Rudolph', 10 | parent::WEBSITE => 'madrat.net', 11 | parent::EMAIL => 'konrad_rudolph@madrat.net' 12 | ) 13 | )); 14 | 15 | $this->setExtensions(array('vb')); 16 | 17 | $this->setCaseInsensitive(true); 18 | 19 | $this->addStates(array( 20 | 'init' => array( 21 | 'string', 22 | 'number', 23 | 'comment' => array('', 'doc'), 24 | 'keyword' => array('', 'type', 'literal', 'operator', 'preprocessor'), 25 | 'date', 26 | 'identifier', 27 | 'operator', 28 | 'whitespace', 29 | ), 30 | 'string' => 'escaped', 31 | 'comment doc' => 'doc', 32 | )); 33 | 34 | $this->addRules(array( 35 | 'whitespace' => Rule::ALL_WHITESPACE, 36 | 'operator' => '/[-+*\/\\\\^&.=,()<>{}]/', 37 | 'string' => new Rule('/"/', '/"c?/i'), 38 | 'number' => '/(?: # Integer followed by optional fractional part. 39 | (?:&(?:H[0-9a-f]+|O[0-7]+)|\d+) 40 | (?:\.\d*)? 41 | (?:e[+-]\d+)? 42 | U?[SILDFR%@!#&]? 43 | ) 44 | | 45 | (?: # Just the fractional part. 46 | (?:\.\d+) 47 | (?:e[+-]\d+)? 48 | [FR!#]? 49 | ) 50 | /ix', 51 | 'escaped' => '/""/', 52 | 'keyword' => array( 53 | array( 54 | 'addhandler', 'addressof', 'alias', 'as', 'byref', 'byval', 55 | 'call', 'case', 'catch', 'cbool', 'cbyte', 'cchar', 56 | 'cdate', 'cdec', 'cdbl', 'cint', 'class', 'clng', 'cobj', 57 | 'const', 'continue', 'csbyte', 'cshort', 'csng', 'cstr', 58 | 'ctype', 'cuint', 'culng', 'cushort', 'declare', 'default', 59 | 'delegate', 'dim', 'directcast', 'do', 'each', 'else', 60 | 'elseif', 'end', 'endif', 'enum', 'erase', 'error', 61 | 'event', 'exit', 'finally', 'for', 'friend', 'function', 62 | 'get', 'gettype', 'getxmlnamespace', 'global', 'gosub', 63 | 'goto', 'handles', 'if', 'implements', 'imports', 'in', 64 | 'inherits', 'interface', 'let', 'lib', 'loop', 'module', 65 | 'mustinherit', 'mustoverride', 'namespace', 'narrowing', 66 | 'next', 'notinheritable', 'notoverridable', 'of', 'on', 67 | 'operator', 'option', 'optional', 'overloads', 68 | 'overridable', 'overrides', 'paramarray', 'partial', 69 | 'private', 'property', 'protected', 'public', 'raiseevent', 70 | 'readonly', 'redim', 'removehandler', 'resume', 'return', 71 | 'select', 'set', 'shadows', 'shared', 'static', 'step', 72 | 'stop', 'structure', 'sub', 'synclock', 'then', 'throw', 73 | 'to', 'try', 'trycast', 'wend', 'using', 'when', 'while', 74 | 'widening', 'with', 'withevents', 'writeonly' 75 | ), 76 | 'type' => array( 77 | 'boolean', 'byte', 'char', 'date', 'decimal', 'double', 78 | 'long', 'integer', 'object', 'sbyte', 'short', 'single', 79 | 'string', 'variant', 'uinteger', 'ulong', 'ushort' 80 | ), 81 | 'literal' => array( 82 | 'false', 'me', 'mybase', 'myclass', 'nothing', 'true' 83 | ), 84 | 'operator' => array( 85 | 'and', 'andalso', 'is', 'isnot', 'like', 'mod', 'new', 86 | 'not', 'or', 'orelse', 'typeof', 'xor' 87 | ), 88 | 'preprocessor' => '/#(?:const|else|elseif|end if|end region|if|region)/i' 89 | ), 90 | 'comment' => array( 91 | "/(?:'{1,2}[^']|rem\s).*/i", 92 | 'doc' => new Rule("/'''/", '/$/m') 93 | ), 94 | 'date' => '/#.+?#/', 95 | 'identifier' => '/[a-z_][a-z_0-9]*|\[.+?\]/i', 96 | 'doc' => '/<(?:".*?"|\'.*?\'|[^>])*>/', 97 | )); 98 | 99 | $this->addMappings(array( 100 | 'whitespace' => '', 101 | 'operator' => '', 102 | 'date' => 'tag', 103 | )); 104 | } 105 | } 106 | 107 | ?> 108 | -------------------------------------------------------------------------------- /hyperlight/languages/xml.php: -------------------------------------------------------------------------------- 1 | setInfo(array( 6 | parent::NAME => 'XML', 7 | parent::VERSION => '0.3', 8 | parent::AUTHOR => array( 9 | parent::NAME => 'Konrad Rudolph', 10 | parent::WEBSITE => 'madrat.net', 11 | parent::EMAIL => 'konrad_rudolph@madrat.net' 12 | ) 13 | )); 14 | 15 | $this->setExtensions(array('xml', 'xsl', 'xslt', 'xsd', 'manifest')); 16 | 17 | $inline = array('entity'); 18 | $common = array('tagname', 'attribute', 'value' => array('double', 'single')); 19 | 20 | $this->addStates(array( 21 | 'init' => array_merge(array('comment', 'cdata', 'tag'), $inline), 22 | 'tag' => array_merge(array('preprocessor', 'meta'), $common), 23 | 'preprocessor' => $common, 24 | 'meta' => $common, 25 | 'value double' => $inline, 26 | 'value single' => $inline, 27 | )); 28 | 29 | $this->addRules(array( 30 | 'comment' => '//s', 31 | 'cdata' => '//', 32 | 'tag' => new Rule('//'), 33 | 'tagname' => '#(?:(?<=<)|(?<= '/[a-z0-9:-]+/i', 35 | 'preprocessor' => new Rule('/\?/'), 36 | 'meta' => new Rule('/!/'), 37 | 'value' => array( 38 | 'double' => new Rule('/"/', '/"/'), 39 | 'single' => new Rule("/'/", "/'/") 40 | ), 41 | 'entity' => '/&.*?;/', 42 | )); 43 | 44 | $this->addMappings(array( 45 | 'attribute' => 'keyword type', 46 | 'cdata' => '', 47 | 'value double' => 'string', 48 | 'value single' => 'string', 49 | 'entity' => 'escaped', 50 | 'tagname' => 'keyword' 51 | )); 52 | } 53 | } 54 | 55 | ?> 56 | -------------------------------------------------------------------------------- /hyperlight/line-numbers.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | This is a test 7 | 37 | 38 | 49 | 50 | 51 | 52 |

Test with line numbers

53 | 54 |
    55 |
  1.  
  2. 56 |
  3.  
  4. 57 |
  5.  
  6. 58 |
  7.  
  8. 59 |
  9.  
  10. 60 |
  11.  
  12. 61 |
  13.  
  14. 62 |
  15.  
  16. 63 |
  17.  
  18. 64 |
  19.  
  20. 65 |
  21.  
  22. 66 |
  23.  
  24. 67 |
  25.  
  26. 68 |
  27.  
  28. 69 |
  29.  
  30. 70 |
  31.  
  32. 71 |
  33.  
  34. 72 |
  35.  
  36. 73 |
  37.  
  38. 74 |
  39.  
  40. 75 |
  41.  
  42. 76 |
  43.  
  44. 77 |
  45.  
  46. 78 |
  47.  
  48. 79 |
  49.  
  50. 80 |
  51.  
  52. 81 |
  53.  
  54. 82 |
  55.  
  56. 83 |
  57.  
  58. 84 |
  59.  
  60. 85 |
  61.  
  62. 86 |
  63.  
  64. 87 |
88 |
 89 | template <typename T, typename BinaryOperator>
 90 | __global__ void dev_vector_op(T* a, T* b, T* out, unsigned int len) {
 91 |     unsigned int const idx = blockDim.x * blockIdx.x + threadIdx.x;
 92 |     BinaryOperator op;
 93 |     out[idx] = op(a[idx], b[idx]);
 94 | }
 95 | 
 96 | template <typename T, typename BinOp>
 97 | __global__ void dev_reduce_block(T* gdata, T* result, unsigned int const len) {
 98 |     extern __shared__ T sdata[];
 99 |     unsigned int const offset = threadIdx.x;
100 |     BinOp const op;
101 | 
102 |     sdata[2 * offset] = gdata[2 * offset];
103 |     sdata[2 * offset + 1] = gdata[2 * offset + 1];
104 | 
105 | #define IDX(x) ((x) + 2 * offset) * d - 1
106 | 
107 |     for (unsigned int d = 1; d <= len / 2; d *= 2) {
108 |         __syncthreads();
109 |         unsigned int const ai = IDX(1);
110 |         if (ai >= len) continue;
111 |         unsigned int const bi = IDX(2);
112 |         sdata[bi] = op(sdata[ai], sdata[bi]);
113 |     }
114 | 
115 | #undef IDX
116 |     
117 |     __syncthreads();
118 |     if (threadIdx.x == 0)
119 |         *result = sdata[len - 1];
120 | }
121 | 
122 | 123 | 124 | -------------------------------------------------------------------------------- /hyperlight/plugins/wordpress/hyperlight.php: -------------------------------------------------------------------------------- 1 | ! 33 | function hyperlight_before_filter($content) { 34 | return preg_replace_callback( 35 | '#(.*?)#is', 36 | 'hyperlight_highlight_block', 37 | $content 38 | ); 39 | } 40 | 41 | // Replace the hyperlight replace tokens with the corresponding highlighted 42 | // source code. 43 | function hyperlight_after_filter($content) { 44 | global $hyperlight_replace_token; 45 | return preg_replace_callback( 46 | "/$hyperlight_replace_token\d+/", 47 | 'hyperlight_insert_block', 48 | $content 49 | ); 50 | } 51 | 52 | function hyperlight_highlight_block($match) { 53 | global $hyperlight_codes, $hyperlight_replace_token, $hyperlight_code_index; 54 | 55 | // Notice: a key and its value must NOT be separated by space! 56 | $attributes = preg_split('/\s+/s', trim($match[1])); 57 | $code = $match[2]; 58 | 59 | if (count($attributes) > 0) { 60 | $new_attr = array(); 61 | foreach ($attributes as $attr) { 62 | list($name, $value) = explode('=', $attr); 63 | $new_attr[$name] = $value; 64 | } 65 | $attributes = $new_attr; 66 | } 67 | 68 | if (array_key_exists('lang', $attributes)) { 69 | $lang = trim($attributes['lang'], '"\''); 70 | $attributes = array_diff_key($attributes, array('lang' => '')); 71 | } 72 | 73 | if (!isset($lang)) return $match[0]; // No language given: don't highlight. 74 | 75 | $quote = '"'; 76 | $class = "source-code $lang"; 77 | 78 | if (array_key_exists('class', $attributes)) { 79 | $oldclass = $attributes['class']; 80 | if (substr($oldclass, 0, 1) === "'") 81 | $quote = "'"; 82 | $class .= ' ' . trim($oldclass, '"\''); 83 | } 84 | 85 | $attributes['class'] = "$quote$class$quote"; 86 | 87 | $new_attr = array(); 88 | foreach ($attributes as $key => $value) 89 | $new_attr[] = "$key=$value"; 90 | 91 | $attributes = ' ' . implode(' ', $new_attr); 92 | $hyperlight = new Hyperlight($lang); 93 | $code = $hyperlight->render($code); 94 | $index = "$hyperlight_replace_token$hyperlight_code_index"; 95 | ++$hyperlight_code_index; 96 | $hyperlight_codes[$index] = "$code"; 97 | return $index; 98 | } 99 | 100 | function hyperlight_insert_block($match) { 101 | global $hyperlight_codes; 102 | return $hyperlight_codes[$match[0]]; 103 | } 104 | 105 | ?> 106 | -------------------------------------------------------------------------------- /hyperlight/plugins/wordpress/readme.txt: -------------------------------------------------------------------------------- 1 | === Plugin Name === 2 | Contributors: Konrad Rudolph 3 | Tags: syntax highlighting, syntax highlight, syntax formatting, code formatting, code, formatting, highlight, syntax 4 | Requires at least: 2.0.2 5 | Tested up to: 2.8.6 6 | Stable tag: trunk 7 | 8 | A code highlighting plugin for WordPress that just works, and is highly configurable using CSS. 9 | 10 | == Description == 11 | 12 | Hyperlight highlights source code, pure and simple. It's 13 | 14 | * **Easy to use** -- using it is a matter of one function call. 15 | * **Easy to extend** -- write your own language definitions in PHP using regular expressions. 16 | * **Powerful** -- since the parser supports states, it can do so much more than just regular languages. 17 | * **Compliant** -- Hyperlight produces valid, semantic strict XHTML. 18 | * **Configurable** -- Hyperlight produces logical CSS rules which can be used by beautiful colour themes. 19 | 20 | == Installation == 21 | 22 | 1. Create a folder called `hyperlight` in the `/wp-content/plugins/` directory. 23 | 1. Copy the file `hyperlight.php` from the current folder into this folder. 24 | 1. Create (yet another!) a sub-folder called `hyperlight`. 25 | 1. Copy all the hyperlight files there. 26 | 27 | You should end up with a directory structure like this: 28 | 29 | * `hyperlight/` 30 | * `hyperlight.php` -- The WordPress plugin file 31 | * `readme.txt` -- This file 32 | * `hyperlight/` 33 | * ... all Hyperlight files, in particular: 34 | * `hyperlight.php` -- The main Hyperlight include file 35 | 36 | == Frequently Asked Questions == 37 | 38 | = How do I highlight code? = 39 | 40 | Code (in `
` tags) is highlighted automatically if its `
` tag has an attribute `lang`. For example:
41 | 
42 |     
43 |        
44 |     
45 | 46 | = Code doesn't appear formatted! = 47 | 48 | Hyperlight only parses the code and adds appropriate CSS class tags to the HTML output. In order for the code to appear coloured you need to use a Hyperlight colour scheme which consists of a single CSS file that you can drop into your WordPress theme. 49 | Refer to the [Hyperlight documentation](http://code.google.com/p/hyperlight/wiki/UserGuide) for more detail. 50 | 51 | == Changelog == 52 | 53 | = 0.1 = 54 | * Initial WordPress version. 55 | -------------------------------------------------------------------------------- /hyperlight/preg_helper.php: -------------------------------------------------------------------------------- 1 | different modifiers on the individual expressions. The order of 36 | * sub-matches is preserved as well. Numbered back-references are adapted to 37 | * the new overall sub-match count. This means that it's safe to use numbered 38 | * back-refences in the individual expressions! 39 | * If {@link $names} is given, the individual expressions are captured in 40 | * named sub-matches using the contents of that array as names. 41 | * Matching pair-delimiters (e.g. "{…}") are currently 42 | * not supported. 43 | * 44 | * The function assumes that all regular expressions are well-formed. 45 | * Behaviour is undefined if they aren't. 46 | * 47 | * This function was created after a 48 | * {@link http://stackoverflow.com/questions/244959/ StackOverflow discussion}. 49 | * Much of it was written or thought of by “porneL” and “eyelidlessness”. Many 50 | * thanks to both of them. 51 | * 52 | * @param string $glue A string to insert between the individual expressions. 53 | * This should usually be either the empty string, indicating 54 | * concatenation, or the pipe ("|"), indicating alternation. 55 | * Notice that this string might have to be escaped since it is treated 56 | * as a normal character in a regular expression (i.e. "/" will 57 | * end the expression and result in an invalid output). 58 | * @param array $expressions The expressions to merge. The expressions may 59 | * have arbitrary different delimiters and modifiers. 60 | * @param array $names Optional. This is either an empty array or an array of 61 | * strings of the same length as {@link $expressions}. In that case, 62 | * the strings of this array are used to create named sub-matches for the 63 | * expressions. 64 | * @return string An string representing a regular expression equivalent to the 65 | * merged expressions. Returns FALSE if an error occurred. 66 | */ 67 | function preg_merge($glue, array $expressions, array $names = array()) { 68 | // … then, a miracle occurs. 69 | 70 | // Sanity check … 71 | 72 | $use_names = ($names !== null and count($names) !== 0); 73 | 74 | if ( 75 | $use_names and count($names) !== count($expressions) or 76 | !is_string($glue) 77 | ) 78 | return false; 79 | 80 | $result = array(); 81 | // For keeping track of the names for sub-matches. 82 | $names_count = 0; 83 | // For keeping track of *all* captures to re-adjust backreferences. 84 | $capture_count = 0; 85 | 86 | foreach ($expressions as $expression) { 87 | if ($use_names) 88 | $name = str_replace(' ', '_', $names[$names_count++]); 89 | 90 | // Get delimiters and modifiers: 91 | 92 | $stripped = preg_strip($expression); 93 | 94 | if ($stripped === false) 95 | return false; 96 | 97 | list($sub_expr, $modifiers) = $stripped; 98 | 99 | // Re-adjust backreferences: 100 | // TODO What about \R backreferences (\0 isn't allowed, though)? 101 | 102 | // We assume that the expression is correct and therefore don't check 103 | // for matching parentheses. 104 | 105 | $number_of_captures = preg_match_all('/\([^?]|\(\?[^:]/', $sub_expr, $_); 106 | 107 | if ($number_of_captures === false) 108 | return false; 109 | 110 | if ($number_of_captures > 0) { 111 | $backref_expr = '/ 112 | (?" : '?:'; 135 | $new_expr = "($sub_name$sub_modifiers$sub_expr)"; 136 | $result[] = $new_expr; 137 | } 138 | 139 | return '/' . implode($glue, $result) . '/'; 140 | } 141 | 142 | /** 143 | * Strips a regular expression string off its delimiters and modifiers. 144 | * Additionally, normalizes the delimiters (i.e. reformats the pattern so that 145 | * it could have used "/" as delimiter). 146 | * 147 | * @param string $expression The regular expression string to strip. 148 | * @return array An array whose first entry is the expression itself, the 149 | * second an array of delimiters. If the argument is not a valid regular 150 | * expression, returns FALSE. 151 | * 152 | */ 153 | function preg_strip($expression) { 154 | if (preg_match('/^(.)(.*)\\1([imsxeADSUXJu]*)$/s', $expression, $matches) !== 1) 155 | return false; 156 | 157 | $delim = $matches[1]; 158 | $sub_expr = $matches[2]; 159 | if ($delim !== '/') { 160 | // Replace occurrences by the escaped delimiter by its unescaped 161 | // version and escape new delimiter. 162 | $sub_expr = str_replace("\\$delim", $delim, $sub_expr); 163 | $sub_expr = str_replace('/', '\\/', $sub_expr); 164 | } 165 | $modifiers = $matches[3] === '' ? array() : str_split(trim($matches[3])); 166 | 167 | return array($sub_expr, $modifiers); 168 | } 169 | 170 | ?> 171 | -------------------------------------------------------------------------------- /hyperlight/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Verdana; 3 | margin: 0; 4 | } 5 | 6 | #head h1, #content h2 { 7 | margin-top: 0; 8 | } 9 | 10 | #head { 11 | background: url(graphics/head-backback.png) repeat-x; 12 | color:#A67B5B; 13 | height:215px; 14 | } 15 | 16 | #head .text { 17 | background: transparent url(graphics/head-background.png) no-repeat 50% 0%; 18 | height: 100%; 19 | width: 100%; 20 | } 21 | 22 | #head .text h1 { 23 | margin: 0 auto; 24 | width: 600px; 25 | font: italic normal normal 36pt/215px Palatino, Times New Roman, serif; 26 | } 27 | 28 | #content { 29 | background: #BC7B57; 30 | } 31 | 32 | #swoosh { 33 | background: #BC7B57 url(graphics/body-background.png) no-repeat 50% 0%; 34 | width: 100%; 35 | height: 75px; 36 | position: absolute; 37 | z-index: 99; 38 | } 39 | 40 | #guide-menu { 41 | } 42 | 43 | #content .text { 44 | position: relative; 45 | padding: 1em 0; 46 | z-index: 100; 47 | } 48 | 49 | div.text { 50 | width: 600px; 51 | margin: 0 auto; 52 | } 53 | 54 | p { 55 | text-align: justify; 56 | } 57 | 58 | .source-code { 59 | border: 1px solid black; 60 | font-family: Consolas, Courier New; 61 | font-size: 0.9em; 62 | } 63 | 64 | pre { 65 | padding: 0.2em; 66 | } 67 | 68 | #switch-buttons { 69 | list-style: none; 70 | height: 2em; 71 | margin: 0; 72 | padding: 0; 73 | width: 100%; 74 | } 75 | 76 | #switch-buttons li { 77 | display: block; 78 | float: left; 79 | margin: 0.2em 0.4em 0.2em 0; 80 | padding: 0; 81 | width: 7em; 82 | } 83 | 84 | #switch-buttons li a { 85 | background: white; 86 | display: block; 87 | height: 2em; 88 | line-height: 2em; 89 | text-align: center; 90 | text-decoration: none; 91 | width: 100%; 92 | } 93 | 94 | #switch-buttons li a.active { 95 | background: black; 96 | } 97 | -------------------------------------------------------------------------------- /hyperlight/test.php: -------------------------------------------------------------------------------- 1 | language()->name(); 15 | $title = $file === $lang ? 16 | "

Test for language {$pretty_name}

" : 17 | "

Test with file “{$file}” for language {$pretty_name}

"; 18 | echo "$title\n"; 19 | #$lines = count(explode("\n", $code)) - 1; 20 | #echo '
    '; 21 | #for ($i = 0; $i < $lines; $i++) 22 | # echo '
  1.  
  2. '; 23 | #echo '
'; 24 | ?>
renderAndPrint($code); ?>
29 | 30 | 31 | 32 | <?php echo $title; ?> 33 | 34 | 43 | 44 | 45 | 57 | 58 | 59 | 60 |

61 | '; 66 | } 67 | 68 | if (__FILE__ === realpath($_SERVER['SCRIPT_FILENAME'])): 69 | 70 | $default_colorscheme = 'vibrant-ink'; 71 | if (isset($_GET['style'])) { 72 | $colorscheme = $_GET['style']; 73 | if (!file_exists("colors/$colorscheme.css")) 74 | $colorscheme = $default_colorscheme; 75 | } 76 | else 77 | $colorscheme = $default_colorscheme; 78 | 79 | write_prolog('Hyperlight tests', $colorscheme, isset($_GET['debug'])); 80 | ?> 81 |

A few small tests:

82 | 83 |

Look, ma: Inline code. Start off by writing ', 'cpp', 'code'); ?> 84 | at the beginning of your newly-created main.cpp file. 85 | Then you can insert the following code below:

86 | 89 |

Next, let's compile this code and execute it. This is done easily on the console:

90 |
$ g++ -Wall -pedantic -o main main.cpp
 91 | $ ./main
 92 | Hello, world!
93 | Congratulations! You've just run your first C++ program. 94 | '', 'style' => '')); 98 | $args = array_keys($args); 99 | 100 | $tests = empty($args) ? array() : explode(',', implode(',', $args)); 101 | 102 | if (!empty($tests)) 103 | echo '

Showing only test(s) ' . implode(', ', $tests) . '.

'; 104 | 105 | hyperlight_test('python'); 106 | hyperlight_test('csharp'); 107 | hyperlight_test('VB'); 108 | hyperlight_test('simple.css', 'css'); 109 | hyperlight_test('../' . basename(__FILE__), 'php'); 110 | hyperlight_test('preg_helper.php', 'php'); 111 | hyperlight_test('pizzachili_api.h', 'cpp'); 112 | hyperlight_test('XML'); 113 | hyperlight_test('style.css', 'css'); 114 | 115 | ?> 116 |

Test runs

117 |
127 | -------------------------------------------------------------------------------- /hyperlight/tests.php: -------------------------------------------------------------------------------- 1 | getMethods() as $method) { 17 | if ($method->isPublic() and $method->isStatic()) { 18 | $name = $method->getName(); 19 | if (self::$trace) 20 | echo "$kname::$name:\n"; 21 | 22 | $method->invoke(null); 23 | if (self::$trace) 24 | echo "\n"; 25 | } 26 | } 27 | if (self::$trace) 28 | echo "
"; 29 | } 30 | 31 | if (self::$trace) 32 | echo "All tests passed."; 33 | } 34 | 35 | public static function assert($cond, $description = '', $message = '') { 36 | $passed = $cond ? 37 | "passed." : 38 | "failed!"; 39 | if (self::$trace) 40 | echo " $description … $passed\n"; 41 | 42 | if ($cond) 43 | return true; 44 | 45 | // Get stack trace. 46 | $bt = debug_backtrace(); 47 | while ($bt[0]['class'] == 'Test') 48 | array_shift($bt); 49 | 50 | // Exit with error. 51 | $caller = $bt[0]; 52 | $function = $caller['function']; 53 | ob_start(); 54 | var_dump($bt); 55 | $trace = ob_get_clean(); 56 | exit(<<Assertion failed in $function. 58 | $message 59 | Complete stack trace: $trace 60 | FAIL 61 | ); 62 | } 63 | 64 | public static function assertEqual($a, $b, $message = '') { 65 | self::assert($a === $b, "$a === $b", $message); 66 | } 67 | 68 | public static function assertSimilar($a, $b, $message = '') { 69 | self::assert($a == $b, "$a == $b", $message); 70 | } 71 | 72 | public static function assertException($code, $message = '') { 73 | try { 74 | eval($code); 75 | self::assert(false, "No exception", $message); 76 | } 77 | catch (Exception $ex) { 78 | self::assert(true, "Expected exception $ex", $message); 79 | } 80 | } 81 | 82 | public static function assertNoException($code, $message = '') { 83 | try { 84 | eval($code); 85 | self::assert(true, "No exception", $message); 86 | } 87 | catch (Exception $ex) { 88 | self::assert(false, "Unexpected exception $ex", $message); 89 | } 90 | } 91 | 92 | public static function assertMatch($regex, $text, $message = '') { 93 | self::assert( 94 | preg_match($regex, $text) === 1, 95 | "Match \"$text\" with $regex", $message 96 | ); 97 | } 98 | 99 | public static function assertNoMatch($regex, $text, $message = '') { 100 | self::assert( 101 | preg_match($regex, $text) !== 1, 102 | "Do not match \"$text\" with $regex", $message 103 | ); 104 | } 105 | 106 | public static function assertMatchEqual($regex, $text, $match, $group = 0, $message = '') { 107 | self::assert( 108 | preg_match($regex, $text, $matches) === 1, 109 | "Match \"$text\" with $regex", $message 110 | ); 111 | self::assert( 112 | $matches[$group] === $match, 113 | " … with index '$group' against \"$match\"", $message 114 | ); 115 | } 116 | 117 | public static function assertMatchNotEqual($regex, $text, $match, $group = 0, $message = '') { 118 | self::assert( 119 | preg_match($regex, $text, $matches) === 1, 120 | "Match \"$text\" with $regex", $message 121 | ); 122 | self::assert( 123 | $matches[$group] !== $match, 124 | " … with index '$group' not against \"$match\"", $message 125 | ); 126 | } 127 | } 128 | 129 | class PregMergeTests { 130 | static function basic() { 131 | $r = preg_merge('', array('/a/', '/b/')); 132 | 133 | Test::assert($r, 'preg_merge'); 134 | Test::assertMatch($r, 'ab'); 135 | Test::assertNoMatch($r, 'a'); 136 | Test::assertNoMatch($r, 'b'); 137 | 138 | $r = preg_merge('|', array('/a/', '/b/')); 139 | 140 | Test::assert($r, 'preg_merge'); 141 | Test::assertMatch($r, 'ab'); 142 | Test::assertMatch($r, 'a'); 143 | Test::assertMatch($r, 'b'); 144 | Test::assertNoMatch($r, 'x'); 145 | 146 | $r = preg_merge('|', array('/^a/', '/bc$/', '/d/', '/^e/')); 147 | 148 | Test::assert($r, 'preg_merge'); 149 | Test::assertMatch($r, 'ade'); 150 | Test::assertMatch($r, 'de'); 151 | Test::assertMatch($r, 'exc'); 152 | Test::assertMatch($r, 'foobc'); 153 | Test::assertMatch($r, 'abc'); 154 | Test::assertNoMatch($r, 'bca'); 155 | Test::assertNoMatch($r, 'xe'); 156 | } 157 | 158 | static function delims() { 159 | $r = preg_merge('', array('/a/', '#b#')); 160 | 161 | Test::assert($r, 'preg_merge'); 162 | Test::assertMatch($r, 'ab'); 163 | 164 | $r = preg_merge('', array('/a#/', '#/b#')); 165 | 166 | Test::assert($r, 'preg_merge'); 167 | Test::assertMatch($r, 'a#/b'); 168 | 169 | $r = preg_merge('', array(',1,', '|2|', '/3|4/')); 170 | 171 | Test::assert($r, 'preg_merge'); 172 | Test::assertMatch($r, '01234'); 173 | Test::assertMatch($r, '01245'); 174 | Test::assertNoMatch($r, '0134'); 175 | } 176 | 177 | static function modifiers() { 178 | $r = preg_merge('', array('/a/i', '/b/')); 179 | 180 | Test::assert($r, 'preg_merge'); 181 | Test::assertMatch($r, 'ab'); 182 | Test::assertMatch($r, 'Ab'); 183 | Test::assertNoMatch($r, 'aB'); 184 | Test::assertNoMatch($r, 'AB'); 185 | 186 | $r = preg_merge('', array('/a b/x', '/c /')); 187 | 188 | Test::assert($r, 'preg_merge'); 189 | Test::assertMatch($r, 'abc '); 190 | Test::assertNoMatch($r, 'abc'); 191 | Test::assertNoMatch($r, 'a bc '); 192 | 193 | $r = preg_merge('', array('/a/', '/bc./si', '/d./')); 194 | 195 | Test::assert($r, 'preg_merge'); 196 | Test::assertMatch($r, "abc\nde"); 197 | Test::assertMatch($r, "aBc\nde"); 198 | Test::assertNoMatch($r, "Abc\nde"); 199 | Test::assertNoMatch($r, "abc\nDe"); 200 | Test::assertNoMatch($r, "abc\nd\n"); 201 | 202 | $r = preg_merge('', array('/.*/', '/a./')); 203 | 204 | Test::assert($r, 'preg_merge'); 205 | Test::assertMatchEqual($r, 'cbaax', 'cbaax'); 206 | Test::assertNoMatch($r, 'cba'); 207 | Test::assertMatchNotEqual($r, 'cbaax', 'cbaa'); 208 | 209 | $r = preg_merge('', array('/.*/U', '/a./')); 210 | 211 | Test::assert($r, 'preg_merge'); 212 | Test::assertMatchEqual($r, 'cbaax', 'cbaa'); 213 | Test::assertNoMatch($r, 'cba'); 214 | Test::assertMatchNotEqual($r, 'cbaax', 'cbaax'); 215 | } 216 | 217 | static function names() { 218 | $r = preg_merge('', array('/a/', '/b/'), array('one')); 219 | 220 | Test::assert(!$r, 'preg_merge invalid'); 221 | 222 | $r = preg_merge('', array('/a/', '/b/'), array('one', 'two')); 223 | 224 | Test::assert($r, 'preg_merge'); 225 | Test::assertMatchEqual($r, 'xabc', 'a', 'one'); 226 | Test::assertMatchEqual($r, 'xabc', 'b', 'two'); 227 | } 228 | 229 | static function backrefs() { 230 | $r = preg_merge('', array('/(a)/', '/(b)\\1/')); 231 | 232 | Test::assert($r, 'preg_merge'); 233 | Test::assertNoMatch($r, 'aba'); 234 | Test::assertMatch($r, 'abb'); 235 | Test::assertNoMatch($r, 'ab'); 236 | Test::assertNoMatch($r, 'xbb'); 237 | 238 | $r = preg_merge('', array('/(a)/', '/(b)\\\\1/')); 239 | 240 | Test::assert($r, 'preg_merge'); 241 | Test::assertMatch($r, 'ab\\1'); 242 | 243 | $r = preg_merge('', array('/(a)/', '/(b)\\\\\\1/')); 244 | var_dump('/(b)\\\\\\1/'); 245 | var_dump($r); 246 | 247 | Test::assert($r, 'preg_merge'); 248 | Test::assertMatch($r, 'ab\\b'); 249 | 250 | $r = preg_merge('', array('/(a)/', '/(b)\\\\\\1/', '/c((d)\\2)/')); 251 | 252 | Test::assert($r, 'preg_merge'); 253 | Test::assertMatch($r, 'ab\\bcdd'); 254 | } 255 | } 256 | 257 | ?> 258 | -------------------------------------------------------------------------------- /hyperlight/tests/csharp: -------------------------------------------------------------------------------- 1 | /// This is a doc comment 2 | static bool ContainsFragment(string[] paths, string fragment) 3 | { 4 | #region Foobar 5 | 6 | /* 7 | * Test comment spanning some lines. 8 | * And then some … 9 | */ 10 | if (paths.Length == 0) return false; 11 | // paths **must** be pre-sorted via Array.Sort(paths); 12 | int index = Array.BinarySearch(paths, fragment); 13 | // we want the index of the *next highest* path 14 | if (index < 0) { // no match 15 | index = ~index; 16 | } else { // exact match 17 | index++; // for strict substring (non-equal) 18 | } 19 | var @return = 123; 20 | return index < paths.Length && paths[index].StartsWith(fragment); 21 | 22 | #endregion 23 | } 24 | -------------------------------------------------------------------------------- /hyperlight/tests/module1.vb: -------------------------------------------------------------------------------- 1 | Option Strict On 2 | 3 | Imports System.Data.OleDb 4 | Imports System.Collections.Specialized 5 | Imports System.Net 6 | Imports System.IO 7 | Imports System.Text 8 | Imports System.Text.RegularExpressions 9 | 10 | Public Class Crawler 11 | 12 | ' Public 13 | Public Mode As String 14 | Public ProjectName As String 15 | 16 | ' Private 17 | Dim Log As Logger 18 | Public Conn As OleDbConnection 19 | Dim DB As SpiderDB 20 | Dim SystemConfig As MyDictionary 21 | Dim MaxDepth As Integer 22 | Dim MaxRetries As Integer 23 | Dim SpiderOnly As Boolean 24 | Dim ProjectEncoding As Encoding 25 | Dim ProjectConfig As MyDictionary 26 | Dim HTMLEntities As MyDictionary 27 | Dim DBContentTypes As MyDictionary 28 | Dim LinksProcessed As StringDictionary 29 | Dim SystemDateTags As String 30 | Dim ProjectUnwantedTags As String 31 | Dim SleepTime As Integer = 0 32 | Public Total As Integer = 0 33 | Dim CrawlMax As Integer = 0 34 | 35 | Public Sub New(ByVal Mode As String, ByVal Log As Logger, ByVal SystemConfig As MyDictionary, ByVal ProjectName As String, ByVal CrawlMax As Integer) 36 | Me.Mode = Mode 37 | Me.Log = Log 38 | Me.SystemConfig = SystemConfig 39 | Me.ProjectName = ProjectName 40 | Me.CrawlMax = CrawlMax 41 | End Sub 42 | 43 | #Region "Init()" 44 | Sub Init() 45 | 46 | ''''''''''''''''''''''''''''''''''''''''' 47 | 48 | ' Database Connection 49 | ''''''''''''''''''''''''''''''''''''''''' 50 | 51 | Log.dpi("Connecting to DB...") 52 | Conn = New OleDbConnection(SystemConfig("connection")) 53 | Try 54 | Conn.Open() 55 | Log.dpi("Connected to DB...") 56 | Catch e As Exception 57 | Terminate("Unable to connect to database: " & e.Message) 58 | End Try 59 | 60 | ''''''''''''''''''''''''''''''''''''''''' 61 | 62 | ' Project Configuration 63 | ''''''''''''''''''''''''''''''''''''''''' 64 | 65 | ProjectConfig = New MyDictionary 66 | Log.dpi("Loading Project Configuration...") 67 | Try 68 | ProjectConfig.LoadColumns(Conn, "SELECT * FROM [project] WHERE [project]=" & foms(ProjectName)) 69 | Log.dpd("Project Configuration:" & vbNewLine & ProjectConfig.ToString()) 70 | Catch e As Exception 71 | Terminate("Unable to load configuration for project '" & ProjectName & "': " & e.Message) 72 | End Try 73 | 74 | ''''''''''''''''''''''''''''''''''''''''' 75 | 76 | ' Validate Project Configuration 77 | ''''''''''''''''''''''''''''''''''''''''' 78 | 79 | ' Validate start URL 80 | If LCase(URLFunctions.GetProtocol(ProjectConfig("start_url"))) <> "http" Or Not URLFunctions.IsValid(ProjectConfig("start_url")) Then 81 | Terminate("Invalid start_url protocol. Must be a valid http URL: " & ProjectConfig("start_url")) 82 | End If 83 | ' Validate domain settings 84 | If ProjectConfig("domains_allow") = "" And ProjectConfig("domains_reject") = "" Then 85 | ProjectConfig("domains_allow") = URLFunctions.GetDomain(ProjectConfig("start_url")) 86 | Log.dpi("Limiting crawl to domain: " & ProjectConfig("domains_allow")) 87 | End If 88 | SystemDateTags = SystemConfig("DateTags") 89 | ProjectUnwantedTags = SystemConfig("UnwantedTags") 90 | SleepTime = ParseInt(SystemConfig("SleepTime"), 0) 91 | 92 | ' Other settings 93 | MaxDepth = ParseInt(ProjectConfig("max_depth"), 0) 94 | MaxRetries = ParseInt(ProjectConfig("max_retries"), 0) 95 | SpiderOnly = (ParseInt(ProjectConfig("mode"), 0) = 1) 96 | 97 | ' Validate charset 98 | Try 99 | ProjectEncoding = Encoding.GetEncoding(ProjectConfig("charset")) 100 | Catch e As Exception 101 | If ProjectConfig("charset") <> "" Then 102 | Log.dpw("Invalid charset setting '" & ProjectConfig("charset") & "'") 103 | End If 104 | ProjectEncoding = Encoding.ASCII 105 | Log.dpw("Defaulting charset setting to '" & ProjectEncoding.BodyName & "'") 106 | End Try 107 | 108 | ''''''''''''''''''''''''''''''''''''''''' 109 | 110 | ' Spider DB Functions 111 | ''''''''''''''''''''''''''''''''''''''''' 112 | 113 | Log.dpi("Initialising DB...") 114 | DB = New SpiderDB(Conn, ProjectName, Mode, CSVInStr(SystemConfig("UnicodeCharsets"), ProjectEncoding.BodyName)) 115 | DB.CommandTimeout = ParseInt(SystemConfig("CommandTimeout"), 30) 116 | DB.Log = Log 117 | 118 | ''''''''''''''''''''''''''''''''''''''''' 119 | 120 | ' Load content_types table 121 | ''''''''''''''''''''''''''''''''''''''''' 122 | 123 | Log.dpi("Loading Content Types...") 124 | DBContentTypes = New MyDictionary 125 | Try 126 | DBContentTypes.LoadRows(Conn, "SELECT [content_type], [extension] FROM [content_type]") 127 | Catch e As Exception 128 | Terminate("Unable to load content types: " & e.Message) 129 | End Try 130 | If DBContentTypes.Records.Count = 0 Then 131 | Terminate("Zero content types loaded. content_types table is empty.") 132 | End If 133 | 134 | ''''''''''''''''''''''''''''''''''''''''' 135 | 136 | ' Load html_entity table 137 | ''''''''''''''''''''''''''''''''''''''''' 138 | 139 | Log.dpi("Loading HTML Entities...") 140 | HTMLEntities = New MyDictionary 141 | Try 142 | HTMLEntities.LoadRows(Conn, "SELECT [entity], [character] FROM [html_entity]") 143 | Catch e As Exception 144 | Terminate("Unable to load html entities: " & e.Message) 145 | End Try 146 | If DBContentTypes.Records.Count = 0 Then 147 | Terminate("Zero html entities loaded. html_entity table is empty.") 148 | End If 149 | 150 | 151 | End Sub 152 | #End Region 153 | 154 | #Region "Crawl()" 155 | Sub Crawl() 156 | 157 | 158 | ''''''''''''''''''''''''''''''''''''''''' 159 | 160 | ' Main Program 161 | ''''''''''''''''''''''''''''''''''''''''' 162 | 163 | Log.dpi("Program Starting...") 164 | 165 | ' Crawl Mode 166 | If Mode = "resume" Then 167 | ' Resume previous crawl 168 | Log.dpa("Resume mode - Crawling incomplete pages in page store") 169 | ElseIf Mode = "refresh" Then 170 | ' Refresh pages 171 | Log.dpa("Refresh mode - Re-crawling existing pages and updating page store") 172 | Try 173 | DB.ResetPageStore(CrawlMax) 174 | Catch e As Exception 175 | Terminate("Unable to reset page store: " & e.Message) 176 | End Try 177 | Log.dpi("Page Store has been reset") 178 | Else 179 | ' Clear all pages 180 | Log.dpa("Recrawl mode - Clearing page store and recrawling all pages") 181 | Try 182 | DB.ClearPageStore() 183 | Catch e As Exception 184 | Terminate("Unable to clear page store: " & e.Message) 185 | End Try 186 | Log.dpi("Page Store has been cleared") 187 | End If 188 | 189 | ' Maintain Depth 190 | Dim SpiderDepth As Integer = 0 191 | 192 | ' Maintain list of links processed in memory 193 | LinksProcessed = New StringDictionary 194 | 195 | If Mode = "recrawl" Then 196 | ' Save starting page to DB for crawling 197 | Dim arrStartURLs As String() 198 | arrStartURLs = Split(ProjectConfig("start_url"), "|") 199 | Dim StartURL As String 200 | For Each StartURL In arrStartURLs 201 | If Trim(StartURL) <> "" Then 202 | DB.AddtoCrawl(StartURL, "", SpiderDepth, CrawlMax) 203 | LinksProcessed(StartURL) = "1" 204 | End If 205 | Next 206 | End If 207 | 208 | ' Fetch list of pages 209 | Dim SQLString As String 210 | SQLString = "SELECT" 211 | If CrawlMax > 0 Then SQLString = SQLString & " TOP " & CrawlMax 212 | SQLString = SQLString & " [id], [url], [referer], [depth], [state], [crawl_date], DATALENGTH([binary_content]) As datalen FROM [page]" 213 | ' Pages for this project 214 | SQLString = SQLString & " WHERE [project]=" & foms(ProjectName) 215 | ' Pages not yet retrieved 216 | SQLString = SQLString & " AND [state] > -3" 217 | ' Pages that have not been retried to often 218 | SQLString = SQLString & " AND [state] <= " & fomn(MaxRetries) 219 | ' Pages at maximum depth or shallower 220 | If MaxDepth >= 0 Then SQLString = SQLString & " AND [depth]<=" & fomn(MaxDepth) 221 | ' Fetch shallow pages first 222 | SQLString = SQLString & " ORDER BY [depth]" 223 | Log.dpd(SQLString) 224 | 225 | ' Fetch pages 226 | Dim DA As New OleDbDataAdapter 227 | DA.SelectCommand = New OleDbCommand(SQLString, Conn) 228 | Dim RS As New System.Data.DataSet 229 | DA.Fill(RS) 230 | 231 | If RS.Tables(0).Rows.Count > 0 Then 232 | ' Process table until no new pages 233 | Dim myRow1, myRow As System.Data.DataRow 234 | Do While RS.Tables(0).Rows.Count > 0 235 | 236 | ' Loop through all new pages 237 | For Each myRow In RS.Tables(0).Rows 238 | 239 | ' New URL 240 | Dim URL, Referer, Filename As String 241 | URL = myRow("url").ToString() 242 | SpiderDepth = CInt(myRow("depth").ToString()) 243 | Referer = myRow("Referer").ToString() 244 | 245 | Log.dpi("Crawling URL: " & URL & " at depth " & SpiderDepth & " RS(state)=" & myRow("state").ToString()) 246 | 247 | ' Update DB 248 | DB.MarkPageAsAttempted(URL) 249 | 250 | ' HTTP Headers 251 | Dim HTTPHeaders As String 252 | HTTPHeaders = ProjectConfig("http_headers") 253 | Dim CacheDate As DateTime = New Date(0) 254 | ' Don't GET page if we already 255 | If IsCachedResource(SystemConfig("CachedExtensions"), URL) And Not IsDBNull(myRow("crawl_date")) And Not IsDBNull(myRow("datalen")) Then 256 | CacheDate = ParseDate(myRow("crawl_date").ToString(), New Date(0)) 257 | End If 258 | 259 | ' Download page HTML 260 | Dim HTTPResponse As HttpWebResponse 261 | Dim HTTPStatus As Integer = 0 262 | Dim HTTPError As String = "" 263 | 264 | HTTPResponse = GetHTTPResponse(URL, HTTPHeaders, SystemConfig("UserAgent"), CacheDate, Referer, HTTPError, HTTPStatus) 265 | 266 | If IsReference(HTTPResponse) And HTTPError = "" Then 267 | 268 | ' Check data size 269 | If HTTPResponse.ContentLength = 0 And HTTPStatus = 200 Then 270 | Log.dpw("Empty response body from URL: " & URL) 271 | End If 272 | 273 | ' Update DB 274 | DB.MarkPageAsGot(URL, HTTPStatus) 275 | 276 | If Not IsRedirect(HTTPStatus) Then 277 | 278 | If IsBinaryResource(HTTPResponse) Then 279 | 280 | ' Binary content 281 | If Not SpiderOnly And HTTPStatus = 200 Then 282 | Log.dpd("Updating binary resource " & URL) 283 | ' Determine filename 284 | Filename = DetermineFilenameBinary(URL, HTTPResponse) 285 | 286 | ' Save binary content to database 287 | DB.SaveBinaryResource(URL, Filename, HTTPResponse.GetResponseStream(), HTTPResponse.ContentType, HTTPResponse.ContentLength, HTTPResponse.LastModified) 288 | 289 | ElseIf Not SpiderOnly Then 290 | If HTTPStatus = 304 Then 291 | Log.dpd("Keeping cached version of " & URL & " because of status " & HTTPStatus & " (unchanged).") 292 | Else 293 | Log.dpd("Not updating URL " & URL & " with status of " & HTTPStatus) 294 | End If 295 | End If 296 | 297 | Else 298 | ' Text content 299 | Dim HTMLResponseText As String 300 | 301 | ' Extract and decode response body 302 | Dim ResponseEncoding As Encoding = DetermineEncoding(HTTPResponse, ProjectEncoding) 303 | Dim SReader As New StreamReader(HTTPResponse.GetResponseStream, ResponseEncoding) 304 | HTMLResponseText = SReader.ReadToEnd 305 | 306 | If Not SpiderOnly Then 307 | 308 | ' Determine filename 309 | Filename = DetermineFilenameText(URL, HTTPResponse, ProjectConfig("default_document")) 310 | 311 | ' Save text content to database 312 | ExtractMetaDataToDB(URL, Filename, HTMLResponseText, HTTPResponse.GetResponseHeader("Content-Type"), HTTPResponse.ContentLength, HTTPResponse.Headers("Last-Modified")) 313 | End If 314 | 315 | ' Parse out links into DB 316 | ExtractLinksToDB(URL, HTMLResponseText, SpiderDepth, SystemConfig("RegExLinks")) 317 | 318 | End If 319 | 320 | Else 321 | 322 | ' Log redirect 323 | Log.dpi("Redirected to URL: " & HTTPResponse.GetResponseHeader("Location")) 324 | 325 | ' Add link to redirect 326 | AddFoundLink(URL, HTTPResponse.GetResponseHeader("Location"), SpiderDepth) 327 | 328 | End If 329 | 330 | ' Finished with this page 331 | DB.MarkPageAsParsed(URL) 332 | Total = Total + 1 333 | 334 | ' Sleep 335 | If SleepTime > 0 Then System.Threading.Thread.Sleep(SleepTime) 336 | 337 | Else 338 | 339 | ' HTTP Errors 340 | If HTTPStatus = 0 Then 341 | Log.dpe(HTTPError) 342 | Else 343 | Log.dpw(HTTPError) 344 | End If 345 | DB.MarkPageStatus(URL, HTTPStatus) 346 | 347 | End If 348 | 349 | ' Clean up 350 | If Not HTTPResponse Is Nothing Then 351 | HTTPResponse.Close() 352 | HTTPResponse = Nothing 353 | End If 354 | 355 | ' Maintain Max 356 | If CrawlMax <> 0 And Total >= CrawlMax Then Exit For 357 | 358 | Next 359 | 360 | ' Close data set 361 | RS.Reset() 362 | 363 | ' Get new urls 364 | If CrawlMax <> 0 And Total >= CrawlMax Then 365 | Exit Do 366 | Else 367 | Log.dpd(SQLString) 368 | DA.Fill(RS) 369 | End If 370 | Loop 371 | 372 | Else 373 | 374 | ' No Start URL found! 375 | Log.dpe("No URLs found to crawl. Use the recrawl option or check configuration of project (start_url, max_depth, max_retries).") 376 | 377 | End If 378 | 379 | ' Delete pages which were not found 380 | 'If (Mode = "refresh" And CrawlMax = 0) Or Mode = "recrawl" Then 381 | ' Log.dpi("Deleting unrefreshed pages") 382 | ' DB.DeleteUnrefreshedPages() 383 | 'End If 384 | 385 | ''''''''''''''''''''''''''''''''''''''''' 386 | 387 | ' Shutdown 388 | ''''''''''''''''''''''''''''''''''''''''' 389 | 390 | Log.dpi("Cleaning up...") 391 | LinksProcessed.Clear() 392 | RS.Reset() 393 | Log.dpi("Closing DB Connection...") 394 | Conn.Close() 395 | Log.dpi("Spider Completed...") 396 | 397 | End Sub 398 | #End Region 399 | 400 | #Region "Private Functions" 401 | Private Sub ExtractLinksToDB(ByVal URL As String, ByRef HTML As String, ByVal Depth As Integer, ByVal RegExLinks As String) 402 | ' Extract all links from the HTML of a URL 403 | 404 | ' Meta Robots Tag 405 | Dim RobotTag As String 406 | RobotTag = HTMLParser.ExtractMetaContents(HTML, "robots") 407 | If InStr(LCase(RobotTag), "nofollow") > 0 Then 408 | Log.dpi("Robots 'nofollow' tag in URL: " & URL) 409 | Exit Sub 410 | End If 411 | 412 | ' Remove comments & unwanted tags 413 | HTML = HTMLParser.RemoveComments(HTML) 414 | HTML = HTMLParser.RemoveTags(HTML, ProjectUnwantedTags) 415 | 416 | Dim LinkTags As MatchCollection 417 | LinkTags = RegExFunctions.GetMatches(HTML, RegExLinks) 418 | Log.dpi("Found " & LinkTags.Count & " links in URL: " & URL) 419 | 420 | ' Do we have at least 1 link 421 | If LinkTags.Count > 0 Then 422 | Dim LinkTag As Match 423 | For Each LinkTag In LinkTags 424 | If LinkTag.Groups.Count >= 1 Then 425 | AddFoundLink(URL, LinkTag.Groups(1).ToString(), Depth) 426 | End If 427 | Next 428 | End If 429 | 430 | End Sub 431 | 432 | Private Sub ExtractMetaDataToDB(ByVal URL As String, ByVal Filename As String, ByRef HTML As String, ByVal ContentType As String, ByVal ContentLength As Long, ByVal LastModified As String) 433 | ' Extract all links from the HTML of a URL 434 | 435 | ' Sometimes Content-Length is not defined 436 | If ContentLength = -1 Then 437 | ContentLength = Len(HTML) 438 | End If 439 | 440 | ' Meta Robots Tag 441 | Dim RobotTag As String 442 | RobotTag = HTMLParser.ExtractMetaContents(HTML, "robots") 443 | If InStr(LCase(RobotTag), "noindex") > 0 Then 444 | Log.dpi("Robots 'noindex' tag in URL: " & URL) 445 | DB.MarkPageAsIndexed(URL) 446 | Exit Sub 447 | End If 448 | 449 | ' Determine Modified date 450 | ' 1. First try meta tags 451 | Dim DateTag, DateString, DateTags() As String 452 | DateTags = Split(SystemDateTags, ",") 453 | For Each DateTag In DateTags 454 | DateString = HTMLParser.ExtractMetaContents(HTML, DateTag) 455 | If IsDate(DateString) Then Exit For 456 | Next 457 | Dim ModifiedDate As Date 458 | ModifiedDate = ParseDate(DateString, New Date(0)) 459 | ' 2. Fall back on the HTTP Header 460 | If ModifiedDate.Ticks = 0 Then ModifiedDate = ParseDate(LastModified, New Date(0)) 461 | ' 3. Fall back on todays date 462 | If ModifiedDate.Ticks = 0 Then ModifiedDate = Now() 463 | 464 | ' Extract the contents of the tag 465 | Dim HTMLTitle As String 466 | HTMLTitle = HTMLParser.ExtractTagContents(HTML, "title") 467 | HTMLTitle = HTMLParser.Decode(HTMLTitle, HTMLEntities.Records) 468 | 469 | ' Extract the raw text 470 | Dim HTMLText As String = HTML 471 | HTMLParser.ExtractText(HTMLText, ProjectUnwantedTags) 472 | HTMLText = HTMLParser.Decode(HTMLText, HTMLEntities.Records) 473 | 474 | ' Compact text by removing unnecessary whitespace 475 | HTMLText = RegExFunctions.Replace(HTMLText, "\s+", " ") 476 | HTMLText = Trim(HTMLText) 477 | 478 | ' Save to DB 479 | Try 480 | DB.SaveMetaDataToDB(URL, Filename, HTMLTitle, HTMLText, ContentType, ContentLength, ModifiedDate) 481 | Catch e As Exception 482 | Log.dpe("Error saving meta data. Possible corrupt file at URL: " & URL) 483 | End Try 484 | 485 | ' Mark URL as indexed 486 | DB.MarkPageAsIndexed(URL) 487 | 488 | End Sub 489 | 490 | Private Sub AddFoundLink(ByVal URL As String, ByVal LinkURL As String, ByVal Depth As Integer) 491 | ' Validate and add link for crawling 492 | 493 | ' Check Depth 494 | If Depth >= MaxDepth And MaxDepth <> -1 Then 495 | Log.dpi("Maximum depth exceeded, ignoring link to : " & LinkURL & " from " & URL) 496 | Exit Sub 497 | End If 498 | 499 | ' Get absolute link 500 | LinkURL = URLFunctions.GetAbsoluteLink(URL, LinkURL) 501 | 502 | ' Decode from HTML 503 | LinkURL = URLFunctions.DecodeHTMLURL(LinkURL) 504 | 505 | ' Remove parameters if required 506 | If ProjectConfig("params_remove") <> "" And InStr(LinkURL, "?") > 0 Then 507 | Try 508 | LinkURL = RegExFunctions.Replace(LinkURL, "&(" & MakePattern(ProjectConfig("params_remove")) & ")=[^&]+", "") 509 | LinkURL = RegExFunctions.Replace(LinkURL, "\?(" & MakePattern(ProjectConfig("params_remove")) & ")=[^&]+", "?") 510 | Catch e As Exception 511 | Log.dpe("Error in Crawler.AddFoundLink() with RegEx Pattern params_remove='" & ProjectConfig("params_remove") & "': " & e.Message) 512 | End Try 513 | LinkURL = URLFunctions.Normalise(LinkURL) 514 | End If 515 | 516 | ' Check already fetched 517 | If LinksProcessed(LinkURL) = "1" Then 518 | Log.dpd("Already analysed, ignoring link to : " & LinkURL & " from " & URL) 519 | Exit Sub 520 | End If 521 | ' Remember that we've seen this link 522 | LinksProcessed(LinkURL) = "1" 523 | 524 | ' Validate 525 | If IsValidLink(URL, LinkURL) Then 526 | ' Save link to database 527 | DB.SaveLinkToDB(URL, LinkURL) 528 | 529 | ' Save page to database for fetching 530 | DB.AddtoCrawl(LinkURL, URL, Depth + 1, CrawlMax) 531 | End If 532 | 533 | End Sub 534 | 535 | Private Function IsValidLink(ByVal URL As String, ByVal LinkURL As String) As Boolean 536 | ' Returns true if the link should be crawled 537 | 538 | ' Check domain 539 | If Not IsLinkMatchPos(URLFunctions.GetDomain(LinkURL), ProjectConfig("domains_allow")) Then 540 | Log.dpi("No match of domains_allow, ignoring link to : " & LinkURL & " from " & URL) 541 | Return False 542 | End If 543 | If IsLinkMatchNeg(URLFunctions.GetDomain(LinkURL), ProjectConfig("domains_reject")) Then 544 | Log.dpi("Match of domains_reject, ignoring link to : " & LinkURL & " from " & URL) 545 | Return False 546 | End If 547 | 548 | ' Check path 549 | If Not IsLinkMatchPos(LinkURL, ProjectConfig("urls_allow")) Then 550 | Log.dpi("No match of urls_allow, ignoring link to : " & LinkURL & " from " & URL) 551 | Return False 552 | End If 553 | If IsLinkMatchNeg(URLFunctions.GetRelative(LinkURL), ProjectConfig("urls_reject")) Then 554 | Log.dpi("Match of urls_reject, ignoring link to : " & LinkURL & " from " & URL) 555 | Return False 556 | End If 557 | 558 | ' Check extension 559 | Dim Ext As String 560 | Ext = URLFunctions.GetExtension(LinkURL) 561 | If Ext = "" Then Ext = URLFunctions.GetExtension(URLFunctions.GetRelativeStem(LinkURL) & ProjectConfig("default_document")) 562 | If Not IsLinkMatchPos(Ext, ProjectConfig("extensions_allow")) Then 563 | Log.dpi("No match of extensions_allow, ignoring link to : " & LinkURL & " from " & URL) 564 | Return False 565 | End If 566 | If IsLinkMatchNeg(Ext, ProjectConfig("extensions_reject")) Then 567 | Log.dpi("Match of extensions_reject, ignoring link to : " & LinkURL & " from " & URL) 568 | Return False 569 | End If 570 | 571 | ' Stay in http protocol 572 | If Left(LCase(LinkURL), 7) <> "http://" Then 573 | Log.dpi("Ignoring non http:// link URL: " & LinkURL) 574 | Return False 575 | End If 576 | 577 | ' Passed all tests 578 | Return True 579 | 580 | End Function 581 | 582 | Private Function IsLinkMatchPos(ByVal URL As String, ByVal Pattern As String) As Boolean 583 | ' Returns true if pattern is empty, else does regex 584 | 585 | If Pattern = "" Then 586 | Return True 587 | Else 588 | Try 589 | Pattern = MakePattern(Pattern) 590 | Return RegExFunctions.IsMatch(URL, Pattern) 591 | Catch e As Exception 592 | Throw New Exception("Error in IsLinkMatchPos(" & URL & ", " & Pattern & ") : " & e.Message) 593 | End Try 594 | End If 595 | 596 | End Function 597 | 598 | Private Function IsLinkMatchNeg(ByVal URL As String, ByVal Pattern As String) As Boolean 599 | ' Returns false if pattern is empty, else does regex 600 | 601 | If Pattern = "" Then 602 | Return False 603 | Else 604 | Try 605 | Pattern = MakePattern(Pattern) 606 | Return RegExFunctions.IsMatch(URL, Pattern) 607 | Catch e As Exception 608 | Throw New Exception("Error in IsLinkMatchNeg(" & URL & ", " & Pattern & ") : " & e.Message) 609 | End Try 610 | End If 611 | 612 | End Function 613 | 614 | Private Function MakePattern(ByVal Pattern As String) As String 615 | ' Ensure this is a RegEx pattern 616 | 617 | ' This is a CSV List not a RegEx 618 | Pattern = Replace(Pattern, ", ", ",") 619 | Pattern = Replace(Pattern, ",", "|") 620 | Return Pattern 621 | 622 | End Function 623 | 624 | Private Function IsBinaryResource(ByVal HTTPResponse As HttpWebResponse) As Boolean 625 | ' Determines if the resource is binary based on Mime type 626 | 627 | ' Must catch errors in case header is missing 628 | Dim ContentType As String 629 | Try 630 | ContentType = HTTPResponse.GetResponseHeader("Content-Type") 631 | Catch e As Exception 632 | ' Who cares if it's missing? 633 | End Try 634 | 635 | If Left(LCase(ContentType), 4) = "text" Then 636 | Return False 637 | Else 638 | Return True 639 | End If 640 | 641 | End Function 642 | 643 | Private Function IsCachedResource(ByVal CachedExtensions As String, ByVal URL As String) As Boolean 644 | ' Determines if the URL could be cached (binaries) 645 | 646 | Dim Ext As String = LCase(URLFunctions.GetExtension(URL)) 647 | If CSVInStr(CachedExtensions, Ext) Then 648 | Return True 649 | Else 650 | Return False 651 | End If 652 | 653 | End Function 654 | 655 | Private Function IsRedirect(ByVal Status As Integer) As Boolean 656 | ' Determine if a redirect occured 657 | 658 | If Status = 301 Or Status = 302 Then 659 | Return True 660 | Else 661 | Return False 662 | End If 663 | 664 | End Function 665 | 666 | Private Function CheckPageStatus(ByVal Status As Integer, ByVal URL As String) As Boolean 667 | ' Determine if status of a HTTP Request is OK 668 | 669 | CheckPageStatus = True 670 | 671 | ' Check HTTP status code 672 | Log.dpd("HTTP status code (" & Status & ") accessing URL: " & URL) 673 | ' Catch 40x and 50x errors 674 | If Status >= 400 And Status < 600 Then 675 | Log.dpw("HTTP Error code (" & Status & ") accessing URL: " & URL) 676 | CheckPageStatus = False 677 | End If 678 | 679 | End Function 680 | 681 | Private Function DetermineFilenameText(ByVal URL As String, ByVal HTTPResponse As HttpWebResponse, ByVal DefaultDoc As String) As String 682 | ' Determine the filename of a text resource 683 | 684 | ' Try get filename from URL 685 | Dim Filename As String 686 | Filename = URLFunctions.GetFilename(URL) 687 | 688 | ' If we got no filename check default document 689 | If Filename = "" Then Filename = DefaultDoc 690 | 691 | ' Add standard extension based on content type header 692 | Filename = AddStandardExtension(Filename, HTTPResponse.GetResponseHeader("Content-Type")) 693 | 694 | ' If we got no filename, assume index.htm 695 | If Filename = "" Then Filename = "index.htm" 696 | 697 | Return Filename 698 | 699 | End Function 700 | 701 | Private Function DetermineFilenameBinary(ByVal URL As String, ByVal HTTPResponse As HttpWebResponse) As String 702 | ' Determine the filename of a binary resource 703 | 704 | ' Try the content disposition header 705 | Dim Filename As String 706 | ' Header may not exist 707 | Try 708 | Filename = HTTPResponse.GetResponseHeader("Content-Disposition") 709 | Filename = After(Filename, "filename=") 710 | Catch e As Exception 711 | End Try 712 | 713 | ' Try URL 714 | If Filename = "" Then Filename = URLFunctions.Decode(URLFunctions.GetFilename(URL)) 715 | 716 | ' Add standard extension based on content type header 717 | If Filename = "" Then Filename = AddStandardExtension(Filename, HTTPResponse.GetResponseHeader("Content-Type")) 718 | 719 | Return Filename 720 | 721 | End Function 722 | 723 | Private Function AddStandardExtension(ByVal Filename As String, ByVal ContentType As String) As String 724 | 725 | If InStr(ContentType, ";") > 0 Then ContentType = Before(ContentType, ";") 726 | 727 | Dim StandardExtension As String 728 | StandardExtension = DBContentTypes(ContentType) 729 | 730 | If LCase(URLFunctions.GetExtension(Filename)) <> StandardExtension And StandardExtension <> "" Then 731 | Filename = Filename & "." & StandardExtension 732 | ElseIf StandardExtension = "" Then 733 | Log.dpw("Unknown content type " & ContentType) 734 | End If 735 | 736 | Return Filename 737 | 738 | End Function 739 | 740 | Private Function DetermineEncoding(ByVal HTTPResponse As HttpWebResponse, ByVal ProjectEncoding As Encoding) As Encoding 741 | Try 742 | Return Encoding.GetEncoding(HTTPResponse.CharacterSet) 743 | Catch e As Exception 744 | End Try 745 | 746 | Try 747 | Return Encoding.GetEncoding(After(HTTPResponse.ContentType, "charset=")) 748 | Catch e As Exception 749 | End Try 750 | 751 | Return ProjectEncoding 752 | End Function 753 | 754 | Private Function GetHTTPResponse(ByVal URL As String, ByVal CustomHeaders As String, ByVal UserAgent As String, ByVal IfModifiedSince As DateTime, ByVal Referer As String, ByRef ErrorMsg As String, ByRef HTTPStatus As Integer) As HttpWebResponse 755 | 756 | Dim HTTPClient As HttpWebRequest = CType(WebRequest.Create(URL), HttpWebRequest) 757 | 758 | HTTPClient.Timeout = 10000 759 | 760 | ' Standard headers 761 | If UserAgent <> "" Then HTTPClient.UserAgent = UserAgent 762 | If Referer <> "" Then HTTPClient.Referer = Referer 763 | If IfModifiedSince.Ticks > 0 Then HTTPClient.IfModifiedSince = IfModifiedSince 764 | 765 | ' Don't allow automatic redirections 766 | HTTPClient.AllowAutoRedirect = False 767 | 768 | ' Custom headers 769 | ' Set request headers 770 | Dim arrHeaders() As String 771 | Dim header As String 772 | arrHeaders = Split(CustomHeaders, vbNewLine) 773 | For Each header In arrHeaders 774 | If Trim(header) <> "" And InStr(header, ":") > 1 Then 775 | Try 776 | HTTPClient.Headers.Add(Trim(Before(header, ":")), Trim(After(header, ":"))) 777 | Catch e As ArgumentException 778 | Log.dpw("Unable to add header in GetHTTPResponse() : " & header & vbNewLine & e.Message) 779 | End Try 780 | End If 781 | Next 782 | 783 | Dim HTTPResponse As HttpWebResponse 784 | Try 785 | HTTPResponse = CType(HTTPClient.GetResponse(), HttpWebResponse) 786 | HTTPStatus = CInt(HTTPResponse.StatusCode) 787 | Catch e As Exception 788 | HTTPStatus = ParseInt(Between(e.Message, "(", ")")) 789 | ErrorMsg = "Error in GetHTTPResponse(" & URL & ") accessing GetResponse(): " & e.Message 790 | End Try 791 | 792 | Return HTTPResponse 793 | 794 | End Function 795 | 796 | Private Sub Terminate(ByVal Msg As String) 797 | ' Terminate in error 798 | 799 | Me.Dispose() 800 | Throw New Exception(Msg) 801 | 802 | End Sub 803 | 804 | Sub Dispose() 805 | If Conn.State = ConnectionState.Open Then Conn.Close() 806 | End Sub 807 | 808 | #End Region 809 | 810 | End Class 811 | -------------------------------------------------------------------------------- /hyperlight/tests/pizzachili_api.h: -------------------------------------------------------------------------------- 1 | /*========================================================================== 2 | SeqAn - The Library for Sequence Analysis 3 | http://www.seqan.de 4 | ============================================================================ 5 | Copyright (C) 2007 6 | 7 | This library is free software; you can redistribute it and/or 8 | modify it under the terms of the GNU Lesser General Public 9 | License as published by the Free Software Foundation; either 10 | version 3 of the License, or (at your option) any later version. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | Lesser General Public License for more details. 16 | 17 | ============================================================================ 18 | $Id$ 19 | ==========================================================================*/ 20 | 21 | //SEQAN_xNO_GENERATED_FORWARDS: no forwards are generated for this file 22 | 23 | // Some tests. 24 | 25 | char c = '"'; 26 | wchar_t w = L'\''; 27 | 28 | string s = "foob\"ar"; 29 | wstring a = L"abdc\xab\uabcdd"; 30 | 31 | 32 | 1234 33 | 01238 34 | 0xaffedead123 35 | 0.12 36 | 1.2e+4 37 | .3e-2 38 | .54321432143214321 39 | 40 | #ifndef SEQAN_HEADER_PIZZACHILI_API_H 41 | # define SEQAN_HEADER_PIZZACHILI_API_H 42 | 43 | # include <seqan/basic.h> 44 | 45 | namespace SEQAN_NAMESPACE_MAIN { 46 | 47 | namespace impl { 48 | typedef unsigned char uchar_t; 49 | typedef unsigned long ulong_t; 50 | typedef void* index_t; 51 | typedef int error_t; 52 | } // namespace impl 53 | 54 | struct InvalidPizzaChiliSpec; 55 | 56 | template <typename TSpec> 57 | struct PizzaChiliCodeProvider { 58 | typedef InvalidPizzaChiliSpec Type; 59 | }; 60 | 61 | /** 62 | .Tag.Pizza & Chili Index Tags 63 | ..summary:Tag specifying the Pizza & Chili library to use. 64 | ..remarks:More information for all the index libraries can be found in the 65 | @http://pizzachili.dcc.uchile.cl|original documentation@ (or the 66 | @http://pizzachili.di.unipi.it|Italian mirror@). 67 | ..cat:Index 68 | ..tag.PizzaChili_AF:The alphabet-friendly FM index. 69 | ..tag.PizzaChili_CCSA:The compressed compact suffix array index. 70 | ..tag.PizzaChili_FM: The FM (full-text in minute space) index. 71 | ..tag.PizzaChiili_RSA:The repair suffix array index. 72 | ...remarks:The index cannot be saved and loaded. 73 | ..tag.PizzaChili_SA: The simple suffix array index. 74 | ...remarks:The index cannot be saved and loaded. 75 | ..tag.PizzaChili_SADA: the compressed suffix array index. 76 | ...remarks:The index cannot be saved and loaded. 77 | ..see:Spec.Pizza & Chili Index 78 | ..see:Spec.Pizza & Chili String 79 | */ 80 | 81 | // We need to declare these explicitly instead through macro expansion in order 82 | // for them to be included in the forward generated declarations. 83 | 84 | struct _PizzaChili_AF; 85 | typedef Tag<_PizzaChili_AF> const PizzaChili_AF; 86 | 87 | struct _PizzaChili_CCSA; 88 | typedef Tag<_PizzaChili_CCSA> const PizzaChili_CCSA; 89 | 90 | struct _PizzaChili_FM; 91 | typedef Tag<_PizzaChili_FM> const PizzaChili_FM; 92 | 93 | struct _PizzaChili_LZ; 94 | typedef Tag<_PizzaChili_LZ> const PizzaChili_LZ; 95 | 96 | struct _PizzaChili_RSA; 97 | typedef Tag<_PizzaChili_RSA> const PizzaChili_RSA; 98 | 99 | struct _PizzaChili_RLFM; 100 | typedef Tag<_PizzaChili_RLFM> const PizzaChili_RLFM; 101 | 102 | struct _PizzaChili_SA; 103 | typedef Tag<_PizzaChili_SA> const PizzaChili_SA; 104 | 105 | struct _PizzaChili_SADA; 106 | typedef Tag<_PizzaChili_SADA> const PizzaChili_SADA; 107 | 108 | struct _PizzaChili_SSA; 109 | typedef Tag<_PizzaChili_SSA> const PizzaChili_SSA; 110 | 111 | struct _PizzaChili_Test; 112 | typedef Tag<_PizzaChili_Test> const PizzaChili_Test; 113 | 114 | # define SEQAN_MAKE_PIZZACHILI_PROVIDER(name) \ 115 | class PizzaChiliApi##name { \ 116 | public: \ 117 | static char* error_index(impl::error_t e); \ 118 | static int build_index( \ 119 | impl::uchar_t* text, \ 120 | impl::ulong_t length, \ 121 | char* build_options, \ 122 | impl::index_t* index \ 123 | ); \ 124 | static int save_index(impl::index_t index, char* filename); \ 125 | static int load_index(char* filename, impl::index_t* index); \ 126 | static int free_index(impl::index_t index); \ 127 | static int index_size(impl::index_t index, impl::ulong_t* size); \ 128 | static int count( \ 129 | impl::index_t index, \ 130 | impl::uchar_t* pattern, \ 131 | impl::ulong_t length, \ 132 | impl::ulong_t* numocc \ 133 | ); \ 134 | static int locate( \ 135 | impl::index_t index, \ 136 | impl::uchar_t* pattern, \ 137 | impl::ulong_t length, \ 138 | impl::ulong_t** occ, \ 139 | impl::ulong_t* numocc \ 140 | ); \ 141 | static int get_length(impl::index_t index, impl::ulong_t* length); \ 142 | static int extract( \ 143 | impl::index_t index, \ 144 | impl::ulong_t from, \ 145 | impl::ulong_t to, \ 146 | impl::uchar_t** snippet, \ 147 | impl::ulong_t* snippet_length \ 148 | ); \ 149 | static int display( \ 150 | impl::index_t index, \ 151 | impl::uchar_t* pattern, \ 152 | impl::ulong_t length, \ 153 | impl::ulong_t numc, \ 154 | impl::ulong_t* numocc, \ 155 | impl::uchar_t** snippet_text, \ 156 | impl::ulong_t** snippet_length \ 157 | ); \ 158 | static int init_ds_ssort(int adist, int bs_ratio); \ 159 | }; \ 160 | \ 161 | /*struct _PizzaChili_##name; \ 162 | typedef Tag<_PizzaChili_##name> const PizzaChili_##name;*/ \ 163 | \ 164 | template <> \ 165 | struct PizzaChiliCodeProvider<PizzaChili_##name> { \ 166 | typedef PizzaChiliApi##name Type; \ 167 | }; 168 | 169 | SEQAN_MAKE_PIZZACHILI_PROVIDER(AF) 170 | SEQAN_MAKE_PIZZACHILI_PROVIDER(CCSA) 171 | SEQAN_MAKE_PIZZACHILI_PROVIDER(FM) 172 | SEQAN_MAKE_PIZZACHILI_PROVIDER(LZ) 173 | SEQAN_MAKE_PIZZACHILI_PROVIDER(RSA) 174 | SEQAN_MAKE_PIZZACHILI_PROVIDER(RLFM) 175 | SEQAN_MAKE_PIZZACHILI_PROVIDER(SA) 176 | SEQAN_MAKE_PIZZACHILI_PROVIDER(SADA) 177 | SEQAN_MAKE_PIZZACHILI_PROVIDER(SSA) 178 | SEQAN_MAKE_PIZZACHILI_PROVIDER(Test) 179 | 180 | # undef SEQAN_MAKE_PIZZACHILI_PROVIDER 181 | 182 | } // namespace SEQAN_NAMESPACE_MAIN 183 | 184 | #endif // SEQAN_HEADER_PIZZACHILI_API_H 185 | 186 | -------------------------------------------------------------------------------- /hyperlight/tests/preg_helper.php: -------------------------------------------------------------------------------- 1 | <?php 2 | 3 | /* 4 | * Copyright 2008 Konrad Rudolph 5 | * All rights reserved. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | /** 27 | * Merges several regular expressions into one, using the indicated 'glue'. 28 | * 29 | * This function takes care of individual modifiers so it's safe to use 30 | * <i>different</i> modifiers on the individual expressions. The order of 31 | * sub-matches is preserved as well. Numbered back-references are adapted to 32 | * the new overall sub-match count. This means that it's safe to use numbered 33 | * back-refences in the individual expressions! 34 | * If {@link $names} is given, the individual expressions are captured in 35 | * named sub-matches using the contents of that array as names. 36 | * Matching pair-delimiters (e.g. <var>"{…}"</var>) are currently 37 | * <b>not</b> supported. 38 | * 39 | * The function assumes that all regular expressions are well-formed. 40 | * Behaviour is undefined if they aren't. 41 | * 42 | * This function was created after a 43 | * {@link http://stackoverflow.com/questions/244959/ StackOverflow discussion}. 44 | * Much of it was written or thought of by “porneL” and “eyelidlessness”. Many 45 | * thanks to both of them. 46 | * 47 | * @param string $glue A string to insert between the individual expressions. 48 | * This should usually be either the empty string, indicating 49 | * concatenation, or the pipe (<var>"|"</var>), indicating alternation. 50 | * Notice that this string might have to be escaped since it is treated 51 | * as a normal character in a regular expression (i.e. <var>"/"</var> will 52 | * end the expression and result in an invalid output). 53 | * @param array $expressions The expressions to merge. The expressions may 54 | * have arbitrary different delimiters and modifiers. 55 | * @param array $names Optional. This is either an empty array or an array of 56 | * strings of the same length as {@link $expressions}. In that case, 57 | * the strings of this array are used to create named sub-matches for the 58 | * expressions. 59 | * @return string An string representing a regular expression equivalent to the 60 | * merged expressions. Returns <var>FALSE</var> if an error occurred. 61 | */ 62 | function preg_merge($glue, array $expressions, array $names = array()) { 63 | // … then, a miracle occurs. 64 | 65 | // Sanity check … 66 | 67 | $use_names = ($names !== null and count($names) !== 0); 68 | 69 | if ( 70 | $use_names and count($names) !== count($expressions) or 71 | !is_string($glue) 72 | ) 73 | return false; 74 | 75 | $result = array(); 76 | // For keeping track of the names for sub-matches. 77 | $names_count = 0; 78 | // For keeping track of *all* captures to re-adjust backreferences. 79 | $capture_count = 0; 80 | 81 | foreach ($expressions as $expression) { 82 | if ($use_names) 83 | $name = str_replace(' ', '_', $names[$names_count++]); 84 | 85 | // Get delimiters and modifiers: 86 | 87 | $stripped = preg_strip($expression); 88 | 89 | if ($stripped === false) 90 | return false; 91 | 92 | list($sub_expr, $modifiers) = $stripped; 93 | 94 | // Re-adjust backreferences: 95 | 96 | // We assume that the expression is correct and therefore don't check 97 | // for matching parentheses. 98 | 99 | $number_of_captures = preg_match_all('/\([^?]|\(\?[^:]/', $sub_expr, $_); 100 | 101 | if ($number_of_captures === false) 102 | return false; 103 | 104 | if ($number_of_captures > 0) { 105 | // NB: This looks NP-hard. Consider replacing. 106 | $backref_expr = '/ 107 | ( # Only match when not escaped: 108 | [^\\\\] # guarantee an even number of backslashes 109 | (\\\\*?)\\2 # (twice n, preceded by something else). 110 | ) 111 | \\\\ (\d) # Backslash followed by a digit. 112 | /x'; 113 | $sub_expr = preg_replace_callback( 114 | $backref_expr, 115 | create_function( 116 | '$m', 117 | 'return $m[1] . "\\\\" . ((int)$m[3] + ' . $capture_count . ');' 118 | ), 119 | $sub_expr 120 | ); 121 | $capture_count += $number_of_captures; 122 | } 123 | 124 | // Last, construct the new sub-match: 125 | 126 | $modifiers = implode('', $modifiers); 127 | $sub_modifiers = "(?$modifiers)"; 128 | if ($sub_modifiers === '(?)') 129 | $sub_modifiers = ''; 130 | 131 | $sub_name = $use_names ? "?<$name>" : '?:'; 132 | $new_expr = "($sub_name$sub_modifiers$sub_expr)"; 133 | $result[] = $new_expr; 134 | } 135 | 136 | return '/' . implode($glue, $result) . '/'; 137 | } 138 | 139 | /** 140 | * Strips a regular expression string off its delimiters and modifiers. 141 | * Additionally, normalizes the delimiters (i.e. reformats the pattern so that 142 | * it could have used <var>"/"</var> as delimiter). 143 | * 144 | * @param string $expression The regular expression string to strip. 145 | * @return array An array whose first entry is the expression itself, the 146 | * second an array of delimiters. If the argument is not a valid regular 147 | * expression, returns <var>FALSE</var>. 148 | * 149 | */ 150 | function preg_strip($expression) { 151 | if (preg_match('/^(.)(.*)\\1([imsxeADSUXJu]*)$/s', $expression, $matches) !== 1) 152 | return false; 153 | 154 | $delim = $matches[1]; 155 | $sub_expr = $matches[2]; 156 | if ($delim !== '/') { 157 | // Replace occurrences by the escaped delimiter by its unescaped 158 | // version and escape new delimiter. 159 | $sub_expr = str_replace("\\$delim", $delim, $sub_expr); 160 | $sub_expr = str_replace('/', '\\/', $sub_expr); 161 | } 162 | $modifiers = $matches[3] === '' ? array() : str_split(trim($matches[3])); 163 | 164 | return array($sub_expr, $modifiers); 165 | } 166 | 167 | ?> 168 | -------------------------------------------------------------------------------- /hyperlight/tests/python: -------------------------------------------------------------------------------- 1 | # vim:fileencoding=utf-8 2 | 3 | if 1900 < year < 2100 and 1 <= month <= 12 \ 4 | and 1 <= day <= 31 and 0 <= hour < 24 \ 5 | and 0 <= minute < 60 and 0 <= second < 60: # Looks like a valid date 6 | return 1 7 | 8 | month_names = ['Januari', 'Februari', 'Maart', # These are the 9 | 'April', 'Mei', 'Juni', # Dutch names 10 | 'Juli', 'Augustus', 'September', # for the months 11 | 'Oktober', 'November', 'December'] # of the year 12 | 13 | def perm(l): 14 | # Compute the list of all permutations of l 15 | if len(l) <= 1: 16 | return [l] 17 | r = [] 18 | for i in range(len(l)): 19 | s = l[:i] + l[i+1:] 20 | p = perm(s) 21 | for x in p: 22 | r.append(l[i:i+1] + x) 23 | return r 24 | 25 | 7 2147483647 0o177 0b100110111 26 | 3 79228162514264337593543950336 0o377 0x100000000 27 | 79228162514264337593543950336 0xdeadbeef 28 | 29 | 3.14 10. .001 1e100 3.14e-10 0e0 30 | 31 | 3.14j 10.j 10j .001j 1e100j 3.14e-10j 32 | 33 | 'foobar \'x ' 34 | "foobar \"x " 35 | "" 36 | "\N{AMPERSAND}" 37 | '''longstring''' 38 | """also very 39 | long""" 40 | r'foo\' \U1234' 41 | R'foo\' \U1234' 42 | b'cfg' 43 | B"axc" 44 | -------------------------------------------------------------------------------- /hyperlight/tests/simple.css: -------------------------------------------------------------------------------- 1 | body > div.class { 2 | background: #F4F url(foo.bar) no-repeat; 3 | } 4 | -------------------------------------------------------------------------------- /hyperlight/tests/style.css: -------------------------------------------------------------------------------- 1 | /* 2 | Theme name: Plain 3 | Author: Konrad Rudolph 4 | */ 5 | 6 | @import url(reset); 7 | @import url(layout); 8 | @import url(code); 9 | 10 | /* 11 | * $background 12 | */ 13 | 14 | #all, input, textarea { 15 | background: white; 16 | } 17 | 18 | #sidebar > div, #content .summary, #content .comment-list li.alt { 19 | } 20 | 21 | /* 22 | * $color 23 | */ 24 | 25 | #all, input, textarea { 26 | color: #393939; 27 | } 28 | 29 | /* 30 | * $font 31 | */ 32 | 33 | body, label, input, #sidebar, #content, #content .summary h4 { 34 | font-family: Lucida Grande, Lucida Sans Unicode, Verdana, sans-serif; 35 | } 36 | 37 | h1, #content h2, #content h3, #content h4, #content h5 { 38 | font-family: Georgia, Hoefler Text, Constantia, serif; 39 | } 40 | 41 | code, pre { 42 | font-family: Consolas, Andale Mono, Lucida Console, monospace; 43 | font-family: Consolas, Andale Mono, Lucida Console; 44 | } 45 | 46 | /* 47 | * &common 48 | */ 49 | 50 | body { 51 | height: 100%; 52 | line-height: 1em; 53 | } 54 | 55 | a { 56 | color: inherit; 57 | text-decoration: inherit; 58 | } 59 | 60 | h1 { 61 | background: url(gfx/logo-small) no-repeat top right; 62 | color: #4E672E; 63 | font: 6em/150px Georgia, Hoefler Text, Constantia, serif; 64 | letter-spacing: -0.1em; 65 | height: 150px; 66 | padding: 0 0 0 0.5em; 67 | text-transform: lowercase; 68 | word-spacing: 0.25em; 69 | } 70 | 71 | /************************************ 72 | * §ions 73 | ************************************/ 74 | 75 | #main { 76 | } 77 | 78 | /* 79 | * &menu 80 | */ 81 | 82 | #menu { } 83 | 84 | #menu ul { 85 | background: #4E672E; 86 | color: white; 87 | height: 2.5em; 88 | line-height: 2.5em; 89 | list-style: none; 90 | -moz-border-radius: 0.2em; 91 | -webkit-border-radius: 0.2em; 92 | border-radius: 0.2em; 93 | } 94 | 95 | #menu ul li { 96 | display: block; 97 | float: left; 98 | height: 100%; 99 | width: 10em; 100 | } 101 | 102 | #menu ul li a { 103 | background: url(gfx/menu-border) no-repeat right bottom; 104 | display: block; 105 | height: 100%; 106 | font-weight: bold; 107 | text-align: center; 108 | top: 50%; 109 | -moz-border-radius-topleft: 0.2em; 110 | -moz-border-radius-bottomleft: 0.2em; 111 | -webkit-border-top-left-radius: 0.2em; 112 | -webkit-border-bottom-left-radius: 0.2em; 113 | border-bottom-top-radius: 0.2em; 114 | border-bottom-left-radius: 0.2em; 115 | } 116 | 117 | #menu ul li + li a { 118 | -moz-border-radius: 0; 119 | -webkit-border-radius: 0; 120 | border-radius: 0; 121 | } 122 | 123 | #menu ul li a:hover { 124 | /*background: url(gfx/menu-button-hover) repeat-x top;*/ 125 | background-color: #607F36; 126 | } 127 | 128 | #menu ul li a:active { 129 | /*background: #798C61 url(gfx/menu-button-pressed) repeat-x top;*/ 130 | background-color: #425824; 131 | } 132 | 133 | /* 134 | * &sidebar 135 | */ 136 | 137 | #sidebar { 138 | font-size: 0.8em; 139 | } 140 | 141 | #sidebar a { 142 | color: #2D4C66; 143 | } 144 | 145 | #sidebar a:hover { 146 | /* 147 | background-color: #4E672E; 148 | color: white; 149 | */ 150 | color: inherit; 151 | } 152 | 153 | #sidebar > div { 154 | padding: 1em; 155 | } 156 | 157 | #sidebar > div.first { 158 | padding-top: 0; 159 | } 160 | 161 | #sidebar > div h2 { 162 | font-size: 1.5em; 163 | margin: 0 0 0.5em; 164 | } 165 | 166 | #sidebar ul { 167 | list-style: square; 168 | margin: 0; 169 | padding-left: 1em; 170 | } 171 | 172 | #sidebar #searchform { 173 | margin: 0; 174 | padding: 0; 175 | } 176 | 177 | #sidebar #searchform div { 178 | border: 1px solid; 179 | padding: 0.2em 0 0.2em 0.5em; 180 | width: auto; 181 | border-radius: 1em; 182 | -moz-border-radius: 1em; 183 | -webkit-border-radius: 0.9em; 184 | text-align: center; 185 | } 186 | 187 | #sidebar #searchform div #s { 188 | border: none; 189 | padding: 0; 190 | height: 16px; 191 | width: 80%; 192 | vertical-align: bottom; 193 | } 194 | 195 | #sidebar #searchform div #searchsubmit { 196 | background: url(gfx/icon-search) no-repeat center center; 197 | border: none; 198 | height: 16px; 199 | margin: 0; 200 | padding: 0; 201 | width: 16px; 202 | vertical-align: bottom; 203 | } 204 | 205 | #sidebar #tagcloud { 206 | line-height: 1.5em; 207 | list-style: none; 208 | margin: 0; 209 | padding: 0 !important; 210 | text-align: center; 211 | } 212 | 213 | #sidebar #tagcloud li { 214 | display: inline; 215 | } 216 | 217 | #sidebar #tagcloud li a { 218 | vertical-align: middle; 219 | } 220 | 221 | #sidebar #tagcloud .tag.low { 222 | font-size: x-small; 223 | } 224 | 225 | #sidebar #tagcloud .tag.level1 { 226 | font-size: large; 227 | font-weight: bold; 228 | } 229 | 230 | #sidebar #tagcloud .tag.level2 { 231 | font-size: x-large; 232 | font-weight: bold; 233 | } 234 | 235 | #sidebar #tagcloud .tag.level3 { 236 | font-size: xx-large; 237 | font-weight: bold; 238 | } 239 | 240 | /* 241 | * &content 242 | */ 243 | 244 | #content { 245 | font-size: 0.8em; 246 | padding: 0 1em; 247 | } 248 | 249 | #content .error { 250 | font-weight: bold; 251 | } 252 | 253 | #content a { 254 | color: #662D2D; 255 | } 256 | 257 | #content a[href^="http://"]:before { 258 | content: '› '; 259 | } 260 | 261 | #content a:hover { 262 | color: inherit; 263 | } 264 | 265 | #content .hint, #content .notice { 266 | /*font-size: 0.8em;*/ 267 | margin: 0.5em 0; 268 | } 269 | 270 | #content .error { 271 | line-height: 1.5em; 272 | font-size: 1.5em !important; 273 | } 274 | 275 | #content .notice { 276 | border: 3px solid #662D2D; 277 | } 278 | 279 | #content div.notice { 280 | padding: 0.5em; 281 | text-align: center; 282 | } 283 | 284 | #content h2 { 285 | background: url(gfx/dna-bullet) no-repeat left center; 286 | font-size: 1.5em; 287 | line-height: 1em; 288 | height: 1em; 289 | margin: 1em 0; 290 | padding-left: 1em; 291 | padding-left: 24px; 292 | } 293 | 294 | #content h3, 295 | #content h4 { 296 | font-size: 1.3em; 297 | margin: 2em 0 1em; 298 | padding-left: 24px; 299 | } 300 | 301 | #content h4 a { 302 | color: #B29595; 303 | display: none; 304 | } 305 | 306 | #content h4:hover a { 307 | display: inline; 308 | } 309 | 310 | #content h4:hover a:hover { 311 | color: #9B9B9B; 312 | color: #662D2D; 313 | } 314 | 315 | #content .summary { 316 | margin-bottom: 2em; 317 | } 318 | 319 | #content .summary h4 { 320 | font-size: 1em; 321 | margin-top: 1em; 322 | } 323 | 324 | #content .summary h4:hover a { 325 | display: none; 326 | } 327 | 328 | #content .summary > * { 329 | padding-left: 0; 330 | } 331 | 332 | #content .post-meta, 333 | #content blockquote, 334 | #content pre, 335 | #content table, 336 | #content thead, 337 | #content .comment-list li.odd, 338 | #content .comment-list li .comment-head { 339 | border: 1px dotted #9B9B9B; 340 | } 341 | 342 | #content .post-meta { 343 | background: #F6F6F6; 344 | font-size: x-small; 345 | padding: 0.2em; 346 | text-align: right; 347 | } 348 | 349 | #content .post-meta p { 350 | margin: 0; 351 | padding: 0.5em 0; 352 | text-align: right; 353 | } 354 | 355 | #content .post-meta p .tags { 356 | background: url(gfx/icon-tag) no-repeat left bottom; 357 | padding: 2px 0 2px 16px; 358 | } 359 | 360 | #content .post-meta p .comments { 361 | background: url(gfx/icon-comment) no-repeat left bottom; 362 | padding: 2px 0 2px 16px; 363 | } 364 | 365 | #content .post-meta p a.tag { 366 | border: 1px outset; 367 | padding: 0.1em 0.3em; 368 | border-radius: 0.5em; 369 | -moz-border-radius: 0.5em; 370 | -webkit-border-radius: 0.5em; 371 | } 372 | 373 | #content .post-meta p a.tag:hover { 374 | background: #662D2D; 375 | color: #F6F6F6; 376 | } 377 | 378 | #content .post-meta p a.tag:active { 379 | border-style: inset; 380 | } 381 | 382 | 383 | #content p, #content blockquote, #content pre, #content ul, #content ol, #content dl { 384 | line-height: 1.2em; 385 | margin: 1em 0; 386 | padding-left: 24px; 387 | } 388 | 389 | #content p { 390 | text-align: justify; 391 | } 392 | 393 | #content p code { 394 | color: #666 395 | } 396 | 397 | #content blockquote, #content pre { 398 | border-width: 1px; 399 | border-style: dotted none; 400 | padding: 0.5em 0; 401 | padding-left: 24px; 402 | color: #666; 403 | } 404 | 405 | #content blockquote { 406 | background: url(gfx/icon-blockquote) no-repeat left top; 407 | } 408 | 409 | #content blockquote p { 410 | margin: 0; 411 | padding-left: 0; 412 | } 413 | 414 | #content pre { 415 | background: url(gfx/icon-pre) no-repeat left top; 416 | overflow: auto; 417 | } 418 | 419 | #content * ul, #content * ol, #content * dl { 420 | margin: 0; 421 | } 422 | 423 | #content ul { 424 | list-style-image: url(gfx/bullet); 425 | } 426 | 427 | #content ol { 428 | list-style-type: decimal; 429 | } 430 | 431 | #content dl { } 432 | 433 | #content dl dt { 434 | color: #9B9B9B; 435 | font-weight: bold; 436 | margin-left: -24px; 437 | } 438 | 439 | /* 440 | #content dl dt:before { 441 | color: #9B9B9B; 442 | content: '¶\0A'; 443 | } 444 | */ 445 | 446 | #content p > img { 447 | display: block; 448 | margin: 0 auto; 449 | } 450 | 451 | #content table { 452 | border-style: dotted none; 453 | margin: 1em auto; 454 | } 455 | 456 | #content p + table, #content blockquote + table, #content pre + table, #content table + table { 457 | margin-top: 0; 458 | } 459 | 460 | #content table thead { 461 | border-style: none none dotted; 462 | } 463 | 464 | #content table tr.odd td { 465 | background: #F6F6F6; 466 | } 467 | 468 | #content table th, #content table td { 469 | padding: 0.2em 0.5em; 470 | } 471 | 472 | #content table .align-left { 473 | text-align: left; 474 | } 475 | 476 | #content table .align-center { 477 | text-align: center; 478 | } 479 | 480 | #content table .align-right { 481 | text-align: right; 482 | } 483 | 484 | #content .comment-list { 485 | list-style: none; 486 | list-style-image: none; 487 | margin: 0; 488 | padding: 0; 489 | } 490 | 491 | #content .comment-list li { 492 | padding: 0.5em; 493 | } 494 | 495 | #content .comment-list li.author { 496 | background: #F6F6F6; 497 | } 498 | 499 | #content .comment-list li.odd { 500 | border-style: solid none; 501 | } 502 | 503 | #content .comment-list li .comment-head, 504 | #content .comment-list li .comment-body { } 505 | 506 | #content .comment-list li .comment-head { 507 | border-style: none none dotted; 508 | overflow: hidden; 509 | } 510 | 511 | #content .comment-list li .comment-head img { 512 | vertical-align: middle; 513 | } 514 | 515 | #content .comment-list li .comment-head span { 516 | display: block; 517 | height: 32px; 518 | line-height: 32px; 519 | } 520 | 521 | #content .comment-list li .comment-head .comment-meta { 522 | float: right; 523 | } 524 | 525 | #content form { } 526 | 527 | #content form label { 528 | font-size: 0.8em; 529 | } 530 | 531 | #content form div { 532 | margin: 0.2em 0; 533 | vertical-align: bottom; 534 | } 535 | 536 | #content form input, #content form .textarea { 537 | border: 1px solid; 538 | padding: 0.2em; 539 | } 540 | 541 | #content form #openid_enabled_link:before { 542 | content: '' !important; 543 | } 544 | 545 | #content form #comment { 546 | border: none; 547 | width: 100%; 548 | } 549 | 550 | /* 551 | * Footer 552 | */ 553 | 554 | #footer { 555 | text-align: center; 556 | } 557 | 558 | #footer p { 559 | font-size: 0.75em; 560 | } 561 | -------------------------------------------------------------------------------- /hyperlight/tests/vb: -------------------------------------------------------------------------------- 1 | #Region " This is all an elaborate scam " 2 | ''' <summary id="a>b">This is a doc comment</summary> 3 | Protected Sub Page_Load(ByVal sender As Object, ByVal e As EventArgs) Handles Me.Load 4 | Dim [Dim] As [Enum] = Parallel.For(1, 10, Function (x) x) 5 | 'set page title 6 | Page.Title = "Something" 7 | Dim r As String = "Say ""Hello""" 8 | Dim i As Integer = 1234 9 | Dim d As Double = 1.23 10 | #Region " Test region " 11 | Dim s As Single = .123F 12 | Dim l As Long = 123L 13 | Dim ul As ULong = 123UL 14 | Dim c As Char = "x"c 15 | #End Region 16 | Dim h As Integer = &H0 17 | Dim t As Date = #5/31/1993 1:15:30 PM# 18 | Dim f As Single = 1.32e-5F 19 | Rem specialities 20 | Dim mysub = True OrElse False 21 | Dim sub2 = False 22 | Dim [sub] = Nothing 23 | End Sub 24 | #End Region 25 | -------------------------------------------------------------------------------- /hyperlight/tests/xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8" ?> 2 | 3 | <!DOCTYPE html> 4 | 5 | <html xml:lang="en"> 6 | <head> 7 | <!-- A colourful ---- 8 | ---- comment spanning multiple lines--> 9 | <title>Nö 10 | 11 | 14 | 15 | 16 | 17 |

& Don't do it!"

18 | nothing here to see!]> ]] >]]> 19 | 20 | 21 | --------------------------------------------------------------------------------