├── README.md
├── ReflectionTypeHint.php
├── ReflectionTypeHint_example.php
├── Text
    └── Censure.php
├── UTF8-CHANGELOG.txt
└── UTF8.php


/README.md:
--------------------------------------------------------------------------------
 1 | php-censure
 2 | ===========
 3 | 
 4 | Клон одноименной библиотеки антимата с google-code
 5 | 
 6 | Оригинал можно найти по ссылке: http://code.google.com/p/php-censure/
 7 | 
 8 | Библиотека позволяет определить наличие в тексте на русском языке мата (в том числе многие криптованные варианты) 
 9 | и/или заменить его произвольным набором символов
10 | 
11 | Оригинальное описание
12 | 
13 |     Алгоритм достаточно надёжен и быстр, в т.ч. на больших объёмах данных
14 |     Метод обнаружения мата основывается на корнях и предлогах русского языка, а не на словаре
15 |     Слова "лох", "хер", "залупа", "сука" матерными словами не считаются (см. словарь Даля)
16 |     Разработка ведётся с 2005 года 
17 | 
18 | Согласно статье 20.1 КоАП РФ нецензурная брань в общественных местах (интернет — место общественное) 
19 | расценивается как мелкое хулиганство, за что установлена административная ответственность — наложение 
20 | штрафа в размере от пятисот до одной тысячи рублей или административный арест на срок до пятнадцати суток. 
21 | 


--------------------------------------------------------------------------------
/ReflectionTypeHint.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * A class for validating method parameters to allowed types via reflection.
  4 |  *
  5 |  * Purpose
  6 |  *   Used as a more convenient multiple assert(), standing after the declaration of the methods.
  7 |  *
  8 |  * Features and advantage
  9 |  *   * Very easy to use
 10 |  *   * Ability to turn off on the production server
 11 |  *
 12 |  * WARNING
 13 |  *   On a production server, it is important to disable assert, that would save server resources.
 14 |  *   For this, use the assert_options(ASSERT_ACTIVE, false) or INI setting "assert.active 0".
 15 |  *   In this case ReflectionTypeHint::isValid() always returns TRUE!
 16 |  *
 17 |  * Useful links
 18 |  *   http://www.ilia.ws/archives/205-Type-hinting-for-PHP-5.3.html
 19 |  *   http://php.net/manual/en/language.oop5.typehinting.php
 20 |  * 
 21 |  * @example  ReflectionTypeHint_example.php
 22 |  * @link     http://code.google.com/p/php5-reflection-type-hint/
 23 |  * @license  http://creativecommons.org/licenses/by-sa/3.0/
 24 |  * @author   Nasibullin Rinat
 25 |  * @version  1.1.0
 26 |  */
 27 | class ReflectionTypeHint
 28 | {
 29 | 	protected static $hints = array(
 30 | 		'int'      => 'is_int',
 31 | 		'integer'  => 'is_int',
 32 | 		'digit'    => 'ctype_digit',
 33 | 		'number'   => 'ctype_digit',
 34 | 		'float'    => 'is_float',
 35 | 		'double'   => 'is_float',
 36 | 		'real'     => 'is_float',
 37 | 		'numeric'  => 'is_numeric',
 38 | 		'str'      => 'is_string',
 39 | 		'string'   => 'is_string',
 40 | 		'char'     => 'is_string',
 41 | 		'bool'     => 'is_bool',
 42 | 		'boolean'  => 'is_bool',
 43 | 		'null'     => 'is_null',
 44 | 		'array'    => 'is_array',
 45 | 		'obj'      => 'is_object',
 46 | 		'object'   => 'is_object',
 47 | 		'res'      => 'is_resource',
 48 | 		'resource' => 'is_resource',
 49 | 		'scalar'   => 'is_scalar',  #integer, float, string or boolean
 50 | 		'cb'       => 'is_callable',
 51 | 		'callback' => 'is_callable',
 52 | 	);
 53 | 
 54 | 	#calling the methods of this class only statically!
 55 | 	private function __construct() {}
 56 | 
 57 | 	public static function isValid()
 58 | 	{
 59 | 		if (! assert_options(ASSERT_ACTIVE)) return true;
 60 | 		$bt = self::debugBacktrace(null, 1);
 61 | 		extract($bt);  //to $file, $line, $function, $class, $object, $type, $args
 62 | 		if (! $args) return true; #speed improve
 63 | 		$r = new ReflectionMethod($class, $function);
 64 | 		$doc = $r->getDocComment();
 65 | 		$cache_id = $class. $type. $function;
 66 | 		preg_match_all('~	[\r\n]++ [\x20\t]++ \* [\x20\t]++
 67 | 							@param
 68 | 							[\x20\t]++
 69 | 							\K #memory reduce
 70 | 							( [_a-z]++[_a-z\d]*+
 71 | 								(?>[|/,][_a-z]+[_a-z\d]*)*+
 72 | 							) #1 types
 73 | 							[\x20\t]++
 74 | 							&?+\$([_a-z]++[_a-z\d]*+) #2 name
 75 | 						~sixSX', $doc, $params, PREG_SET_ORDER);
 76 | 		$parameters = $r->getParameters();
 77 | 		//d($args, $params, $parameters);
 78 | 		if (count($parameters) > count($params))
 79 | 		{
 80 | 			$message = 'phpDoc %d piece(s) @param description expected in %s%s%s(), %s given, ' . PHP_EOL
 81 | 					 . 'called in %s on line %d ' . PHP_EOL
 82 | 					 . 'and defined in %s on line %d';
 83 | 			$message = sprintf($message, count($parameters), $class, $type, $function, count($params), $file, $line, $r->getFileName(), $r->getStartLine());
 84 | 			trigger_error($message, E_USER_NOTICE);
 85 | 		}
 86 | 		foreach ($args as $i => $value)
 87 | 		{
 88 | 			if (! isset($params[$i])) return true;
 89 | 			if ($parameters[$i]->name !== $params[$i][2])
 90 | 			{
 91 | 				$param_num = $i + 1;
 92 | 				$message = 'phpDoc @param %d in %s%s%s() must be named as $%s, $%s given, ' . PHP_EOL
 93 | 						 . 'called in %s on line %d ' . PHP_EOL
 94 | 						 . 'and defined in %s on line %d';
 95 | 				$message = sprintf($message, $param_num, $class, $type, $function, $parameters[$i]->name, $params[$i][2], $file, $line, $r->getFileName(), $r->getStartLine());
 96 | 				trigger_error($message, E_USER_NOTICE);
 97 | 			}
 98 | 
 99 | 			$hints = preg_split('~[|/,]~sSX', $params[$i][1]);
100 | 			if (! self::checkValueTypes($hints, $value))
101 | 			{
102 | 				$param_num = $i + 1;
103 | 				$message = 'Argument %d passed to %s%s%s() must be an %s, %s given, ' . PHP_EOL
104 | 						 . 'called in %s on line %d ' . PHP_EOL
105 | 						 . 'and defined in %s on line %d';
106 | 				$message = sprintf($message, $param_num, $class, $type, $function, implode('|', $hints), (is_object($value) ? get_class($value) . ' ' : '') . gettype($value), $file, $line, $r->getFileName(), $r->getStartLine());
107 | 				trigger_error($message, E_USER_WARNING);
108 | 				return false;
109 | 			}
110 | 		}
111 | 		return true;
112 | 	}
113 | 
114 | 	/**
115 | 	 * Return stacktrace. Correctly work with call_user_func*()
116 | 	 * (totally skip them correcting caller references).
117 | 	 * If $return_frame is present, return only $return_frame matched caller, not all stacktrace.
118 | 	 *
119 | 	 * @param   string|null  $re_ignore     example: '~^' . preg_quote(__CLASS__, '~') . '(?![a-zA-Z\d])~sSX'
120 | 	 * @param   int|null     $return_frame
121 | 	 * @return  array
122 | 	 */
123 | 	public static function debugBacktrace($re_ignore = null, $return_frame = null)
124 | 	{
125 | 		$trace = debug_backtrace();
126 | 
127 | 		$a = array();
128 | 		$frames = 0;
129 | 		for ($i = 0, $n = count($trace); $i < $n; $i++)
130 | 		{
131 | 			$t = $trace[$i];
132 | 			if (! $t) continue;
133 | 
134 | 			// Next frame.
135 | 			$next = isset($trace[$i+1])? $trace[$i+1] : null;
136 | 
137 | 			// Dummy frame before call_user_func*() frames.
138 | 			if (! isset($t['file']) && $next)
139 | 			{
140 | 				$t['over_function'] = $trace[$i+1]['function'];
141 | 				$t = $t + $trace[$i+1];
142 | 				$trace[$i+1] = null; // skip call_user_func on next iteration
143 | 			}
144 | 
145 | 			// Skip myself frame.
146 | 			if (++$frames < 2) continue;
147 | 
148 | 			// 'class' and 'function' field of next frame define where this frame function situated.
149 | 			// Skip frames for functions situated in ignored places.
150 | 			if ($re_ignore && $next)
151 | 			{
152 | 				// Name of function "inside which" frame was generated.
153 | 				$frame_caller = (isset($next['class']) ? $next['class'] . $next['type'] : '')
154 | 							  . (isset($next['function']) ? $next['function'] : '');
155 | 				if (preg_match($re_ignore, $frame_caller)) continue;
156 | 			}
157 | 
158 | 			// On each iteration we consider ability to add PREVIOUS frame to $a stack.
159 | 			if (count($a) === $return_frame) return $t;
160 | 			$a[] = $t;
161 | 		}
162 | 		return $a;
163 | 	}
164 | 
165 | 	/**
166 | 	 * Checks a value to the allowed types
167 | 	 *
168 | 	 * @param   array  $types
169 | 	 * @param   mixed  $value
170 | 	 * @return  bool
171 | 	 */
172 | 	public static function checkValueTypes(array $types, $value)
173 | 	{
174 | 		foreach ($types as $type)
175 | 		{
176 | 			$type = strtolower($type);
177 | 			if (array_key_exists($type, self::$hints) && call_user_func(self::$hints[$type], $value)) return true;
178 | 			if (is_object($value) && @is_a($value, $type)) return true;
179 | 			if ($type === 'mixed') return true;
180 | 		}
181 | 		return false;
182 | 	}
183 | }


--------------------------------------------------------------------------------
/ReflectionTypeHint_example.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | class Example
 3 | {
 4 | 	/**
 5 | 	 * This is myMethod!
 6 | 	 *
 7 | 	 * @param   string|array  $s  param1
 8 | 	 * @param   int           $i  param2
 9 | 	 * @param   Example|null  $e  param3
10 | 	 * @param   bool          $b  param4
11 | 	 * @param   array/null    $a  param5
12 | 	 * @return  array|bool    returns FALSE if error occurred
13 | 	 */
14 | 	public function myMethod($s, $i, $e = null, $b = true, array $a = null)
15 | 	{
16 | 		if (! ReflectionTypeHint::isValid()) return false;
17 | 		//...
18 | 	}
19 | }
20 | 
21 | Example::myMethod('sss', 75467, $e, true);
22 | $e = new Example();
23 | //$e->myMethod('sss', 75467, new Exception(), true);
24 | 


--------------------------------------------------------------------------------
/Text/Censure.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Определение наличия мата (нецензурных слов) в тексте, матотест
  4 |  *
  5 |  * Алгоритм достаточно надёжен и быстр, в т.ч. на больших объёмах данных
  6 |  * Метод обнаружения мата основывается на корнях и предлогах русского языка, а не на словаре
  7 |  * Слова "лох", "хер", "залупа", "сука" матерными словами не считаются (см. словарь Даля)
  8 |  * Разработка ведётся с 2005 года
  9 |  *
 10 |  * Класс явл. хорошим учебным пособием по изучению регулярных выражений и... русского мата! =)
 11 |  *
 12 |  * http://www.google.com/search?q=%F2%EE%EB%EA%EE%E2%FB%E9%20%F1%EB%EE%E2%E0%F0%FC%20%F0%F3%F1%F1%EA%EE%E3%EE%20%EC%E0%F2%E0&ie=cp1251&oe=UTF-8
 13 |  * http://www.awd.ru/dic.htm (Толковый словарь русского мата)
 14 |  *
 15 |  * Согласно статье 20.1 КоАП РФ нецензурная брань в общественных местах (интернет — место общественное) расценивается как мелкое хулиганство,
 16 |  * за что установлена административная ответственность — наложение штрафа в размере от пятисот до одной тысячи рублей или административный арест на срок до пятнадцати суток.
 17 |  *
 18 |  * TODO
 19 |  *   * добавить цифровую подделку с нулём под букву O
 20 |  *
 21 |  * @link     http://code.google.com/p/php-censure/
 22 |  * @license  http://creativecommons.org/licenses/by-sa/3.0/
 23 |  * @author   Nasibullin Rinat
 24 |  * @version  3.2.7
 25 |  */
 26 | class Text_Censure
 27 | {
 28 | 	#запрещаем создание экземпляра класса, вызов методов этого класса только статически!
 29 | 	private function __construct() {}
 30 | 
 31 | 	/**
 32 | 	 *
 33 | 	 * @param    string       $s         строка для проверки
 34 | 	 * @param    string       $delta     ширина найденного фрагмента в словах
 35 | 	 *                                   (кол-во слов от матного слова слева и справа, максимально 10)
 36 | 	 * @param    string       $continue  строка, которая будет вставлена в начале и в конце фрагмента
 37 | 	 * @param    bool         $is_html   расценивать строку как HTML код?
 38 | 	 *                                   в режиме $is_html === TRUE html код игнорируется, а html сущности заменяются в "чистый" UTF-8
 39 | 	 * @param    string|null  $replace   строка, на которую заменять матный фрагмент, например: '[ой]' ($replace д.б. в кодировке $charset)
 40 | 	 *                                   опция работает в PHP >= 5.2.0
 41 | 	 * @param    string       $charset   кодировка символов (родная кодировка -- UTF-8, для других будет прозрачное перекодирование)
 42 | 	 * @return   bool|string|int|null    Если $replace === NULL, то возвращает FALSE, если мат не обнаружен, иначе фрагмент текста с матерным словом.
 43 | 	 *                                   Если $replace !== NULL, то возвращает исходную строку, где фрагменты мата заменены на $replace.
 44 | 	 *                                   В случае возникновения ошибки возвращает код ошибки > 0 (integer):
 45 | 	 *                                     * PREG_INTERNAL_ERROR
 46 | 	 *                                     * PREG_BACKTRACK_LIMIT_ERROR (see also pcre.backtrack_limit)
 47 | 	 *                                     * PREG_RECURSION_LIMIT_ERROR (see also pcre.recursion_limit)
 48 | 	 *                                     * PREG_BAD_UTF8_ERROR
 49 | 	 *                                     * PREG_BAD_UTF8_OFFSET_ERROR (since PHP 5.3.0)
 50 | 	 *                                   Или -1, если ReflectionTypeHint вернул ошибку
 51 | 	 */
 52 | 	public static function parse(
 53 | 		$s,
 54 | 		$delta = 3,
 55 | 		$continue = "\xe2\x80\xa6",
 56 | 		$is_html = true,
 57 | 		$replace = null,
 58 | 		$charset = 'UTF-8')
 59 | 	{
 60 | 		if (! ReflectionTypeHint::isValid()) return -1;
 61 | 		if ($s === null) return null;
 62 | 
 63 | 		static $re_badwords = null;
 64 | 
 65 | 		if ($re_badwords === null)
 66 | 		{
 67 | 			#предлоги русского языка:
 68 | 			#[всуо]|
 69 | 			#по|за|на|об|до|от|вы|вс|вз|из|ис|
 70 | 			#под|про|при|над|низ|раз|рас|воз|вос|
 71 | 			#пооб|повы|пона|поза|недо|пере|одно|
 72 | 			#полуза|произ|пораз|много|
 73 | 			$pretext = array(
 74 | 				#1
 75 | 				'[уyоoаa]_?      (?=[еёeхx])',        #у, о   (уебать, охуеть, ахуеть)
 76 | 				'[вvbсc]_?       (?=[хпбмгжxpmgj])',  #в, с   (впиздячить, схуярить)
 77 | 				'[вvbсc]_?[ъь]_? (?=[еёe])',          #въ, съ (съебаться, въебать)
 78 | 				'ё_?             (?=[бb6])',          #ё      (ёбля)
 79 | 				#2
 80 | 				'[вvb]_?[ыi]_?',      #вы
 81 | 				'[зz3]_?[аa]_?',      #за
 82 | 				'[нnh]_?[аaеeиi]_?',  #на, не, ни
 83 | 				'[вvb]_?[сc]_?          (?=[хпбмгжxpmgj])',  #вс (вспизднуть)
 84 | 				'[оo]_?[тtбb6]_?        (?=[хпбмгжxpmgj])',  #от, об
 85 | 				'[оo]_?[тtбb6]_?[ъь]_?  (?=[еёe])',          #отъ, объ
 86 | 				'[иiвvb]_?[зz3]_?       (?=[хпбмгжxpmgj])',  #[ив]з
 87 | 				'[иiвvb]_?[зz3]_?[ъь]_? (?=[еёe])',          #[ив]зъ
 88 | 				'[иi]_?[сc]_?           (?=[хпбмгжxpmgj])',  #ис
 89 | 				'[пpдdg]_?[оo]_? (?> [бb6]_?         (?=[хпбмгжxpmgj])
 90 |                                | [бb6]_?  [ъь]_? (?=[еёe])
 91 |                                | [зz3]_? [аa] _?
 92 |                              )?',  #по, до, пообъ, дообъ, поза, доза (двойные символы вырезаются!)
 93 | 				#3
 94 | 				'[пp]_?[рr]_?[оoиi]_?',  #пр[ои]
 95 | 				'[зz3]_?[лl]_?[оo]_?',   #зло (злоебучая)
 96 | 				'[нnh]_?[аa]_?[дdg]_?         (?=[хпбмгжxpmgj])',  #над
 97 | 				'[нnh]_?[аa]_?[дdg]_?[ъь]_?   (?=[еёe])',          #надъ
 98 | 				'[пp]_?[оoаa]_?[дdg]_?        (?=[хпбмгжxpmgj])',  #под
 99 | 				'[пp]_?[оoаa]_?[дdg]_?[ъь]_?  (?=[еёe])',          #подъ
100 | 				'[рr]_?[аa]_?[зz3сc]_?        (?=[хпбмгжxpmgj])',  #ра[зс]
101 | 				'[рr]_?[аa]_?[зz3сc]_?[ъь]_?  (?=[еёe])',          #ра[зс]ъ
102 | 				'[вvb]_?[оo]_?[зz3сc]_?       (?=[хпбмгжxpmgj])',  #во[зс]
103 | 				'[вvb]_?[оo]_?[зz3сc]_?[ъь]_? (?=[еёe])',          #во[зс]ъ
104 | 				#4
105 | 				'[нnh]_?[еe]_?[дdg]_?[оo]_?',    #недо
106 | 				'[пp]_?[еe]_?[рr]_?[еe]_?',      #пере
107 | 				'[oо]_?[дdg]_?[нnh]_?[оo]_?',    #одно
108 | 				'[кk]_?[oо]_?[нnh]_?[оo]_?',     #коно    (коноебиться)
109 | 				'[мm]_?[уy]_?[дdg]_?[oоaа]_?',   #муд[оа] (мудаёб)
110 | 				'[oо]_?[сc]_?[тt]_?[оo]_?',      #осто    (остопиздело)
111 | 				'[дdg]_?[уy]_?[рpr]_?[оoаa]_?',  #дур[оа]
112 | 				'[хx]_?[уy]_?[дdg]_?[оoаa]_?',   #худ[оа] (худоебина)
113 | 				#5
114 | 				'[мm]_?[нnh]_?[оo]_?[гg]_?[оo]_?',    #много
115 | 				'[мm]_?[оo]_?[рpr]_?[дdg]_?[оoаa]_?', #морд[оа]
116 | 				'[мm]_?[оo]_?[зz3]_?[гg]_?[оoаa]_?',  #мозг[оа]
117 | 				'[дdg]_?[оo]_?[лl]_?[бb6]_?[оoаa]_?', #долб[оа]
118 | 				'[оo]_?[сc]_?[тt]_?[рpr]_?[оo]_?',    #остро
119 | 			);
120 | 
121 | 			$badwords = array(
122 | 				#Слово на букву Х
123 | 				'(?<=\PL) %RE_PRETEXT%?
124 |                       [hхx]_?[уyu]_?[ийiеeёяюju]     #хуй, хуя, хую, хуем, хуёвый, охуительный
125 |                       #исключения:
126 |                       (?<! _hue(?=_)     #HUE     -- цветовая палитра
127 |                          | _hue(?=so_)   #hueso   -- испанское слово
128 |                          | _хуе(?=дин)   #Хуедин  -- город в Румынии
129 |                          | _hyu(?=ndai_) #Hyundai -- марка корейского автомобиля
130 |                       )',
131 | 				'(?<=\PL) ([з3][бb6][сc])\b',// збс
132 | 				'(?<=\PL) ([ф][а][к])\b',// фак
133 | 				'([бb6][лl]_?[тt])',//блять
134 | 				'\bблет\b',//блет
135 | 				'\bблэт\b',//блет
136 | 				'([т]_?[в]_?[о]_?[ю]_?[м]_?[а]_?[т]_?[ь])',  #твою мать
137 | 				'\b[нn][аa][хx]\b',  #нах
138 | 
139 | 				#Слово на букву П
140 | 				'(?<=\PL) %RE_PRETEXT%?
141 |                       [пp]_?[иieеё]_?[зz3]_?[дd](?=_?[:vowel:])',  #п[ие]зда, пизде, пиздёж, пизду, пиздюлина, пиздобол, опиздинеть, пиздых, подпёздывать
142 | 
143 | 				#Слово на букву Е
144 | 				'(?<=\PL) %RE_PRETEXT%?
145 |                       [eеё]_?
146 | 							#исключения
147 | 							(?<!н[eе][её]_|т_е_)    #неё, т.е. большие
148 |                       [бb6]_? (?= [уyиi]_                       #ебу, еби
149 |                                 | [ыиiоoaаеeёуy]_?[:consonant:] #ебут, ебать, ебись, ебёт, поеботина, выебываться, ёбарь
150 |                                    #исключения
151 |                                   (?<!_ebo[kt](?=_)|буд)        #ebook, eboot, ее будут
152 |                                 | [лl](?:[оoаaыиiя]|ya)         #ебло, ебла, ебливая, еблись, еблысь, ёбля
153 |                                 | [нn]_?[уy]                    #ёбнул, ёбнутый
154 |                                 | [кk]_?[аa]                    #взъёбка
155 |                                 | [сc]_?[тt]                    #ебсти
156 |                                )',
157 | 
158 | 				#Слово на букву Е (c обязательной приставкой от 2-х и более букв!)
159 | 				'(?<=\PL) %RE_PRETEXT%
160 |                       (?<= \pL\pL|\pL_\pL_)
161 |                       [eеё]_?[бb6]    #долбоёб, дураёб, изъёб, заёб, заебай, разъебай, мудоёбы
162 |             ',
163 | 
164 | 				#Слово на букву Е
165 | 				'(?<=\PL) ёб (?=\PL)',  #ёб твою мать
166 | 
167 | 				#Слово на букву Б
168 | 				'(?<=\PL) %RE_PRETEXT%?
169 |                       [бb6]_?[лl]_?(?:я|ya)(?: _         #бля
170 |                                              | _?[тдtd]  #блять, бляди
171 |                                            )',
172 | 
173 | 				#ПИДОР
174 | 				'(?<=\PL) [пp]_?[иieе]_?[дdg]_?[eеaаoо]_?[rpр]',  #п[ие]д[оеа]р
175 | 
176 | 				#МУДАК
177 | 				'(?<=\PL) [мm]_?[уy]_?[дdg]_?[аa]  #мудак, мудачок
178 |                       #исключения:
179 |                       (?<!_myda(?=s_))  #Chelonia mydas -- морская зеленая (суповая) черепаха
180 |             ',
181 | 
182 | 				#ЖОПА
183 | 				'(?<=\PL) [zж]_?h?_?[оo]_?[pп]_?[aаyуыiеeoо]',  #жоп[ауыео]
184 | 
185 | 				#МАНДА
186 | 				#исключения: город Мандалай, округ Мандаль, индейский народ Мандан, фамилия Мандель, мандарин
187 | 				'(?<=\PL) [мm]_?[аa]_?[нnh]_?[дdg]_?[aаyуыiеeoо]  #манд[ауыео]
188 |                       #исключения:
189 |                       (?<! манда(?=[лн]|рин)
190 |                          | manda(?=[ln]|rin)
191 |                          | манде(?=ль)
192 |                       )',
193 | 
194 | 				#ГОВНО
195 | 				'(?<=\PL) [гg]_?[оo]_?[вvb]_?[нnh]_?[оoаaяеeyу]',  #говн[оаяеу]
196 | 
197 | 				#FUCK
198 | 				'(?<=\PL) f_?u_?[cс]_?k',  #fuck, fucking
199 | 
200 | 				/*
201 | 				#ЛОХ
202 | 				' л_?[оo]_?[хx]',
203 | 
204 | 				#СУКА
205 | 				'[^р]_?[scс]_?[yуu]_?[kк]_?[aаiи]', #сука (кроме слова "барсука" - это животное-грызун)
206 | 				'[^р]_?[scс]_?[yуu]_?[4ч]_?[кk]',   #сучк(и) (кроме слова "барсучка")
207 | 
208 | 				#ХЕР
209 | 				' %RE_PRETEXT%?[хxh]_?[еe]_?[рpr](_?[нnh]_?(я|ya)| )', #%RE_PRETEXT%хер(ня)
210 | 
211 | 				#ЗАЛУПА
212 | 				' [зz3]_?[аa]_?[лl]_?[уy]_?[пp]_?[аa]',
213 | 				*/
214 | 			);
215 | 
216 | 			$trans = array(
217 | 				'_'             => '\x20',                       #пробел
218 | 				'\pL'           => '[^\x20\d]',                  #буква
219 | 				'\PL'           => '[\x20\d]',                   #не буква
220 | 				'[:vowel:]'     => '[аеиоуыэюяёaeioyu]',         #гласные буквы
221 | 				'[:consonant:]' => '[^аеиоуыэюяёaeioyu\x20\d]',  #согласные буквы
222 | 			);
223 | 
224 | 			$re_badwords = str_replace(
225 | 				'%RE_PRETEXT%',
226 | 				'(?:' . implode('|', $pretext) . ')',  #однократный шаблон с альтернативами использовать нельзя!
227 | 				'~' . implode('|', $badwords) . '~sxuSX'
228 | 			);
229 | 			$re_badwords = strtr($re_badwords, $trans);
230 | 		}
231 | 
232 | 		$s       = UTF8::convert_from($s,       $charset);
233 | 		$replace = UTF8::convert_from($replace, $charset);
234 | 
235 | 		$ss = $s;  #saves original string
236 | 
237 | 		if ($is_html)
238 | 		{
239 | 			#скрипты не вырезаем, т.к. м.б. обходной маневр на с кодом на javascript:
240 | 			#<script>document.write('сло'+'во')</script>
241 | 			#хотя давать пользователю возможность использовать код на javascript нехорошо
242 | 			$s = is_callable(array('HTML', 'strip_tags')) ? HTML::strip_tags($s, null, true, array('comment', 'style', 'map', 'frameset', 'object', 'applet'))
243 | 														  : strip_tags($s);
244 | 			#заменяем html-сущности в "чистый" UTF-8
245 | 			$s = UTF8::html_entity_decode($s, $is_htmlspecialchars = true);
246 | 		}
247 | 
248 | 		if (strtoupper(substr($charset, 0, 3)) === 'UTF')  #UTF-8, UTF-16, UTF-32
249 | 		{
250 | 			#remove combining diactrical marks
251 | 			$additional_chars = array(
252 | 				"\xc2\xad",  #"мягкие" переносы строк (&shy;)
253 | 			);
254 | 			$s = UTF8::diactrical_remove($s, $additional_chars);
255 | 		}
256 | 
257 | 		#ВотБ/\яПидорыОхуелиБлятьНахуйПохуйПи3децПолный
258 | 		if (version_compare(PHP_VERSION, '5.2.0', '>='))
259 | 		{
260 | 			$s = preg_replace('~     [\p{Lu}3] (?>\p{Ll}+|/\\\\|[@36]+)++   #Вот
261 | 								 (?= [\p{Lu}3] (?:\p{Ll} |/\\\\|[@36] ) )   #Бля
262 | 							   ~sxuSX', '$0 ', $s);
263 | 		}
264 | 
265 | 		$s = mb_strtolower($s);
266 | 
267 | 		#получаем в массив только буквы и цифры
268 | 		#"с_л@о#во,с\xc2\xa7лово.Слово" -> "с л о во с лово слово слово слово слово"
269 | 		preg_match_all('~(?> \xd0[\xb0-\xbf]|\xd1[\x80-\x8f\x91]  #[а-я]
270 | 						  |  /\\\\     #л
271 | 						  |  @         #а
272 | 						  |  [a-z\d]+
273 | 						  )+
274 | 						~sxSX', $s, $m);
275 | 		$s = ' ' . implode(' ', $m[0]) . ' ';
276 | 
277 | 		$trans = array(
278 | 			'/\\' => 'л',  #Б/\ЯТЬ --> БЛЯТЬ
279 | 			'@'   => 'а',  #пизд@  --> пизда
280 | 		);
281 | 		$s = strtr($s, $trans);
282 | 
283 | 		#цифровые подделки под буквы
284 | 		$trans = array(
285 | 			'~ [3з]++ [3з\x20]*+ ~sxuSX' => 'з',
286 | 			'~ [6б]++ [6б\x20]*+ ~sxuSX' => 'б',
287 | 		);
288 | 		$s = preg_replace(array_keys($trans), array_values($trans), $s);
289 | 
290 | 		#убираем все повторяющиеся символы, ловим обман типа "х-у-у-й"
291 | 		#"сллоооовоо   слово  х у у й" --> "слово слово х у й"
292 | 		$s = preg_replace('/(  [\xd0\xd1][\x80-\xbf] \x20?  #optimized [а-я]
293 |                              | [a-z\d] \x20?
294 |                              ) \\1+
295 |                            /sxSX', '$1', $s);
296 | 
297 | 		if ($replace === null || version_compare(PHP_VERSION, '5.2.0', '<'))
298 | 		{
299 | 			$result = preg_match($re_badwords, $s, $m, PREG_OFFSET_CAPTURE);
300 | 			if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return preg_last_error();
301 | 			if ($result === false) return 1;  #PREG_INTERNAL_ERROR = 1
302 | 			if ($result && $replace === null)
303 | 			{
304 | 				list($word, $offset) = $m[0];
305 | 				$s1 = substr($s, 0, $offset);
306 | 				$s2 = substr($s, $offset + strlen($word));
307 | 				$delta = intval($delta);
308 | 				if ($delta === 0) $fragment = '[' . trim($word) . ']';
309 | 				else
310 | 				{
311 | 					if ($delta < 1 || $delta > 10) $delta = 3;
312 | 					preg_match('/  (?> \x20 (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)++ ){1,' . $delta . '}+
313 |                                    \x20?+
314 |                                 $/sxSX', $s1, $m1);
315 | 					preg_match('/^ (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)*+  #ending
316 |                                    \x20?+
317 |                                    (?> (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)++ \x20 ){0,' . $delta . '}+
318 |                                 /sxSX', $s2, $m2);
319 | 					$fragment = (ltrim(@$m1[0]) !== ltrim($s1) ? $continue : '') .
320 | 						trim((isset($m1[0]) ? $m1[0] : '') . '[' . trim($word) . ']' . (isset($m2[0]) ? $m2[0] : '')) .
321 | 						(rtrim(@$m2[0]) !== rtrim($s2) ? $continue : '');
322 | 				}
323 | 				return UTF8::convert_to($fragment, $charset);
324 | 			}
325 | 			return false;
326 | 		}
327 | 
328 | 		$result = preg_match_all($re_badwords, $s, $m);
329 | 		if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return preg_last_error();
330 | 		if ($result === false) return 1;  #PREG_INTERNAL_ERROR = 1
331 | 		if ($result > 0)
332 | 		{
333 | 			#d($s, $m[0]);
334 | 			$s = $ss;
335 | 			#замена матного фрагмента на $replace
336 | 			foreach ($m[0] as $w)
337 | 			{
338 | 				$re_w = '~' . preg_replace_callback('~(?:/\\\\|[^\x20])~suSX', array('self', '_make_regexp_callback'), $w) . '~sxuiSX';
339 | 				$ss = preg_replace($re_w, $replace, $ss);
340 | 				#d($re_w);
341 | 			}
342 | 			while ($ss !== $s) $ss = self::parse($s = $ss, $delta, $continue, $is_html, $replace, 'UTF-8');
343 | 		}
344 | 		return UTF8::convert_to($ss, $charset);
345 | 	}
346 | 
347 | 	private static function _make_regexp_callback(array $m)
348 | 	{
349 | 		#$re_holes = '[\x00-\x20\-_\*\~\.\'"\^=`:]';
350 | 		#$re_holes = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]';
351 | 		$re_holes = '(?!/\\\\)[^\p{L}\d]';  #non letter, non digit, non '/\'
352 | 		if ($m[0] === 'а')     $re = '[@аА]++           (?>[:holes:]|[@аА]+)*+';
353 | 		elseif ($m[0] === 'з') $re = '[3зЗ]++           (?>[:holes:]|[3зЗ]+)*+';
354 | 		elseif ($m[0] === 'б') $re = '[6бБ]++           (?>[:holes:]|[6бБ]+)*+';
355 | 		elseif ($m[0] === 'л') $re = '(?>[лЛ]+|/\\\\)++ (?>[:holes:]|[лЛ]+|/\\\\)*+';
356 | 		else
357 | 		{
358 | 			#в PCRE-7.2 флаг /i в комбинации с /u в регулярном выражении почему-то не работает (BUG?)
359 | 			#поэтому делаем класс символов с буквами в обоих регистрах
360 | 			$char = '[' . preg_quote($m[0] . UTF8::uppercase($m[0]), '~') . ']';
361 | 			$re = str_replace('$0', $char, '$0++ (?>[:holes:]|$0+)*+');
362 | 		}
363 | 		return str_replace('[:holes:]', $re_holes, $re . "\r\n");
364 | 	}
365 | }
366 | 


--------------------------------------------------------------------------------
/UTF8-CHANGELOG.txt:
--------------------------------------------------------------------------------
 1 | 2.2.2 / 2011-06-24
 2 | 
 3 | 	* Convert case functions improved: from all russian charsets to UTF8 native support was added
 4 | 	* UTF8::stripos() speed improved
 5 | 	* constant REPLACEMENT_CHAR added
 6 | 
 7 | 2.2.1 / 2011-06-08
 8 | 
 9 | 	* UTF8::preg_quote_case_insensitive() added
10 | 	* UTF8::stripos() speed improved
11 | 
12 | 2.2.0 / 2011-06-06
13 | 
14 | 	* UTF8::strlen(), UTF8::substr(), UTF8::strpos(),
15 | 	  UTF8::html_entity_encode(), UTF8::html_entity_decode(),
16 |       UTF8::convert_case(), UTF8::lowercase(), UTF8::uppercase() speed improved
17 | 	* UTF8::stripos(), UTF8::to_unicode(), UTF8::from_unicode() added
18 | 	* UTF8::strtolower(), UTF8::strtoupper() as wrapper to UTF8::convert_case() added
19 | 	* Unicode character database to 6.0.0 (2010-06-04) updated
20 | 	* UTF8::$convert_case_table improved
21 | 
22 | 2.1.3 / 2011-05-31
23 | 
24 | 	* UTF8::truncate() small bug fixed
25 | 
26 | 2.1.2 / 2011-03-25
27 | 
28 | 	* Класс требует PHP-5.3.x
29 | 	* UTF8::$char_re deprecated
30 | 	* Добавлен метод UTF8::tests(), который тестирует методы класса на правильность работы
31 | 	* Добавлены методы UTF8::strcmp(), UTF8::strncmp(), UTF8::strcasecmp()
32 | 	* UTF8::is_utf8(), UTF8::str_limit(), UTF8::str_split() speed improved
33 | 	* Добавлен 2-й параметр в UTF8::html_entity_encode()
34 | 	* Добавлен 3-й параметр в UTF8::ucwords()
35 | 	* Методы UTF8::convert_case(), UTF8::lowercase(), UTF8::uppercase() могут принимать массив в 1-м параметре
36 | 	* Мелкие улучшения в UTF8::strtr()
37 | 	* Модернизирован класс ReflectionTypeHint
38 | 
39 | 2.1.1 / 2010-07-19
40 | 
41 | 	* Добавлены методы array_change_key_case(), range(), strtr()
42 | 	* Улучшен метод convert_files_from()
43 | 	* Unicode Character Database 5.2.0
44 | 	* Исправлены ошибки в trim(), ltrim(), rtrim(), str_pad(), которые могут возникать в некоторых случаях
45 | 
46 | 2.1.0 / 2010-03-26
47 | 
48 | 	* Удалён метод unescape_recursive()
49 | 	* Добавлен метод convert_files_from()
50 | 	* Несколько методов теперь могут принимать массив и делать их обход рекурсивно
51 | 	* Почти все методы для обработки строк могут принимать и возвращать NULL
52 | 
53 | 2.0.2 / 2010-02-13
54 | 
55 | 	* Новые методы is_ascii(), ltrim(), rtrim(), trim(), str_pad(), strspn()
56 | 	* Исправлена небольшая ошибка в str_limit()
57 | 	* Исправлена ошибка в методах convert_from() и convert_to(): они ошибочно возвращали FALSE,
58 | 	  если подать на вход массив, содержащий элементы типа boolean со значением FALSE
59 | 
60 | 2.0.1 / 2010-02-08
61 | 
62 | 	* Удалён метод convert_from_cp1259(), используйте convert_from('cp1251')
63 | 	* Метод convert_from_utf16() теперь приватный, используйте convert_from('UTF-16')
64 | 	* Добавлены методы convert_to(), diactrical_remove(), diactrical_restore()
65 | 	* Другие мелкие исправления
66 | 


--------------------------------------------------------------------------------
/UTF8.php:
--------------------------------------------------------------------------------
   1 | <?php
   2 | /**
   3 |  * PHP5 UTF-8 is a UTF-8 aware library of functions mirroring PHP's own string functions.
   4 |  *
   5 |  * The powerful solution/contribution for UTF-8 support in your framework/CMS, written on PHP.
   6 |  * This library is advance of http://sourceforge.net/projects/phputf8 (last updated in 2007).
   7 |  *
   8 |  * UTF-8 support in PHP 5.
   9 |  *
  10 |  * Features and benefits of using this class
  11 |  *   * Compatibility with the interface standard PHP functions that deal with single-byte encodings
  12 |  *   * Ability to work without PHP extensions ICONV and MBSTRING, if any, that are actively used!
  13 |  *   * Useful features are missing from the ICONV and MBSTRING
  14 |  *   * The methods that take and return a string, are able to take and return null (useful for selects from a database)
  15 |  *   * Several methods are able to process arrays recursively
  16 |  *   * A single interface and encapsulation (you can inherit and override)
  17 |  *   * High performance, reliability and quality code
  18 |  *   * PHP> = 5.3.x
  19 |  *
  20 |  * In Russian:
  21 |  *
  22 |  * Поддержка UTF-8 в PHP 5.
  23 |  *
  24 |  * Возможности и преимущества использования этого класса
  25 |  *   * Совместимость с интерфейсом стандартных PHP функций, работающих с однобайтовыми кодировками
  26 |  *   * Возможность работы без PHP расширений ICONV и MBSTRING, если они есть, то активно используются!
  27 |  *   * Полезные функции, отсутствующие в ICONV и MBSTRING
  28 |  *   * Методы, которые принимают и возвращают строку, умеют принимать и возвращать null (удобно при выборках значений из базы данных)
  29 |  *   * Несколько методов умеют обрабатывать массивы рекурсивно
  30 |  *   * Единый интерфейс и инкапсуляция (можно унаследоваться и переопределить методы)
  31 |  *   * Высокая производительность, надёжность и качественный код
  32 |  *   * PHP >= 5.3.x
  33 |  *
  34 |  * Example:
  35 |  *   $s = 'Hello, Привет';
  36 |  *   if (UTF8::is_utf8($s)) echo UTF8::strlen($s);
  37 |  *
  38 |  * UTF-8 encoding scheme:
  39 |  *   2^7   0x00000000 — 0x0000007F  0xxxxxxx
  40 |  *   2^11  0x00000080 — 0x000007FF  110xxxxx 10xxxxxx
  41 |  *   2^16  0x00000800 — 0x0000FFFF  1110xxxx 10xxxxxx 10xxxxxx
  42 |  *   2^21  0x00010000 — 0x001FFFFF  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  43 |  *   1-4 bytes length: 2^7 + 2^11 + 2^16 + 2^21 = 2 164 864
  44 |  *
  45 |  * If I was a owner of the world, I would leave only 2 encoding: UTF-8 and UTF-32 ;-)
  46 |  *
  47 |  * Useful links
  48 |  *   http://ru.wikipedia.org/wiki/UTF8
  49 |  *   http://www.madore.org/~david/misc/unitest/   A Unicode Test Page
  50 |  *   http://www.unicode.org/
  51 |  *   http://www.unicode.org/reports/
  52 |  *   http://www.unicode.org/reports/tr10/      Unicode Collation Algorithm
  53 |  *   http://www.unicode.org/Public/UCA/6.0.0/  Unicode Collation Algorithm
  54 |  *   http://www.unicode.org/reports/tr6/       A Standard Compression Scheme for Unicode
  55 |  *   http://www.fileformat.info/info/unicode/char/search.htm  Unicode Character Search
  56 |  *
  57 |  * @link     http://code.google.com/p/php5-utf8/
  58 |  * @license  http://creativecommons.org/licenses/by-sa/3.0/
  59 |  * @author   Nasibullin Rinat
  60 |  * @version  2.2.2
  61 |  */
  62 | class UTF8
  63 | {
  64 | 	#REPLACEMENT CHARACTER (for broken char)
  65 | 	const REPLACEMENT_CHAR = "\xEF\xBF\xBD"; #U+FFFD
  66 | 
  67 | 	/**
  68 | 	 * Regular expression for a character in UTF-8 without the use of a flag /u
  69 | 	 * @deprecated  Instead, use a dot (".") and the flag /u, it works faster!
  70 | 	 * @var string
  71 | 	 */
  72 | 	public static $char_re = '  [\x09\x0A\x0D\x20-\x7E]           # ASCII strict
  73 |                               # [\x00-\x7F]                       # ASCII non-strict (including control chars)
  74 |                               | [\xC2-\xDF][\x80-\xBF]            # non-overlong 2-byte
  75 |                               |  \xE0[\xA0-\xBF][\x80-\xBF]       # excluding overlongs
  76 |                               | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
  77 |                               |  \xED[\x80-\x9F][\x80-\xBF]       # excluding surrogates
  78 |                               |  \xF0[\x90-\xBF][\x80-\xBF]{2}    # planes 1-3
  79 |                               | [\xF1-\xF3][\x80-\xBF]{3}         # planes 4-15
  80 |                               |  \xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
  81 |                              ';
  82 | 
  83 | 	/**
  84 | 	 * Combining diactrical marks (Unicode 5.1).
  85 | 	 *
  86 | 	 * For example, russian letters in composed form: "Ё" (U+0401), "Й" (U+0419),
  87 | 	 * decomposed form: (U+0415 U+0308), (U+0418 U+0306)
  88 | 	 *
  89 | 	 * @link http://www.unicode.org/charts/PDF/U0300.pdf
  90 | 	 * @link http://www.unicode.org/charts/PDF/U1DC0.pdf
  91 | 	 * @link http://www.unicode.org/charts/PDF/UFE20.pdf
  92 | 	 * @var  string
  93 | 	 */
  94 | 	#public static $diactrical_re = '\p{M}'; #alternative, but only with /u flag
  95 | 	public static $diactrical_re = '  \xcc[\x80-\xb9]|\xcd[\x80-\xaf]  #UNICODE range: U+0300 — U+036F (for letters)
  96 |                                     | \xe2\x83[\x90-\xbf]              #UNICODE range: U+20D0 — U+20FF (for symbols)
  97 |                                     | \xe1\xb7[\x80-\xbf]              #UNICODE range: U+1DC0 — U+1DFF (supplement)
  98 |                                     | \xef\xb8[\xa0-\xaf]              #UNICODE range: U+FE20 — U+FE2F (combining half marks)
  99 |                                    ';
 100 | 
 101 | 	/**
 102 | 	 * @var  array
 103 | 	 */
 104 | 	public static $html_special_chars_table = array(
 105 | 		'&quot;' => "\x22",  #U+0022 ["] &#34; quotation mark = APL quote
 106 | 		'&amp;'  => "\x26",  #U+0026 [&] &#38; ampersand
 107 | 		'&lt;'   => "\x3c",  #U+003C [<] &#60; less-than sign
 108 | 		'&gt;'   => "\x3e",  #U+003E [>] &#62; greater-than sign
 109 | 	);
 110 | 
 111 | 	/**
 112 | 	 * @link http://www.fileformat.info/format/w3c/entitytest.htm?sort=Unicode%20Character  HTML Entity Browser Test Page
 113 | 	 * @var  array
 114 | 	 */
 115 | 	public static $html_entity_table = array(
 116 | 		#Latin-1 Entities:
 117 | 		'&nbsp;'   => "\xc2\xa0",  #U+00A0 [ ] no-break space = non-breaking space
 118 | 		'&iexcl;'  => "\xc2\xa1",  #U+00A1 [¡] inverted exclamation mark
 119 | 		'&cent;'   => "\xc2\xa2",  #U+00A2 [¢] cent sign
 120 | 		'&pound;'  => "\xc2\xa3",  #U+00A3 [£] pound sign
 121 | 		'&curren;' => "\xc2\xa4",  #U+00A4 [¤] currency sign
 122 | 		'&yen;'    => "\xc2\xa5",  #U+00A5 [¥] yen sign = yuan sign
 123 | 		'&brvbar;' => "\xc2\xa6",  #U+00A6 [¦] broken bar = broken vertical bar
 124 | 		'&sect;'   => "\xc2\xa7",  #U+00A7 [§] section sign
 125 | 		'&uml;'    => "\xc2\xa8",  #U+00A8 [¨] diaeresis = spacing diaeresis
 126 | 		'&copy;'   => "\xc2\xa9",  #U+00A9 [©] copyright sign
 127 | 		'&ordf;'   => "\xc2\xaa",  #U+00AA [ª] feminine ordinal indicator
 128 | 		'&laquo;'  => "\xc2\xab",  #U+00AB [«] left-pointing double angle quotation mark = left pointing guillemet
 129 | 		'&not;'    => "\xc2\xac",  #U+00AC [¬] not sign
 130 | 		'&shy;'    => "\xc2\xad",  #U+00AD [ ] soft hyphen = discretionary hyphen
 131 | 		'&reg;'    => "\xc2\xae",  #U+00AE [®] registered sign = registered trade mark sign
 132 | 		'&macr;'   => "\xc2\xaf",  #U+00AF [¯] macron = spacing macron = overline = APL overbar
 133 | 		'&deg;'    => "\xc2\xb0",  #U+00B0 [°] degree sign
 134 | 		'&plusmn;' => "\xc2\xb1",  #U+00B1 [±] plus-minus sign = plus-or-minus sign
 135 | 		'&sup2;'   => "\xc2\xb2",  #U+00B2 [²] superscript two = superscript digit two = squared
 136 | 		'&sup3;'   => "\xc2\xb3",  #U+00B3 [³] superscript three = superscript digit three = cubed
 137 | 		'&acute;'  => "\xc2\xb4",  #U+00B4 [´] acute accent = spacing acute
 138 | 		'&micro;'  => "\xc2\xb5",  #U+00B5 [µ] micro sign
 139 | 		'&para;'   => "\xc2\xb6",  #U+00B6 [¶] pilcrow sign = paragraph sign
 140 | 		'&middot;' => "\xc2\xb7",  #U+00B7 [·] middle dot = Georgian comma = Greek middle dot
 141 | 		'&cedil;'  => "\xc2\xb8",  #U+00B8 [¸] cedilla = spacing cedilla
 142 | 		'&sup1;'   => "\xc2\xb9",  #U+00B9 [¹] superscript one = superscript digit one
 143 | 		'&ordm;'   => "\xc2\xba",  #U+00BA [º] masculine ordinal indicator
 144 | 		'&raquo;'  => "\xc2\xbb",  #U+00BB [»] right-pointing double angle quotation mark = right pointing guillemet
 145 | 		'&frac14;' => "\xc2\xbc",  #U+00BC [¼] vulgar fraction one quarter = fraction one quarter
 146 | 		'&frac12;' => "\xc2\xbd",  #U+00BD [½] vulgar fraction one half = fraction one half
 147 | 		'&frac34;' => "\xc2\xbe",  #U+00BE [¾] vulgar fraction three quarters = fraction three quarters
 148 | 		'&iquest;' => "\xc2\xbf",  #U+00BF [¿] inverted question mark = turned question mark
 149 | 		#Latin capital letter
 150 | 		'&Agrave;' => "\xc3\x80",  #Latin capital letter A with grave = Latin capital letter A grave
 151 | 		'&Aacute;' => "\xc3\x81",  #Latin capital letter A with acute
 152 | 		'&Acirc;'  => "\xc3\x82",  #Latin capital letter A with circumflex
 153 | 		'&Atilde;' => "\xc3\x83",  #Latin capital letter A with tilde
 154 | 		'&Auml;'   => "\xc3\x84",  #Latin capital letter A with diaeresis
 155 | 		'&Aring;'  => "\xc3\x85",  #Latin capital letter A with ring above = Latin capital letter A ring
 156 | 		'&AElig;'  => "\xc3\x86",  #Latin capital letter AE = Latin capital ligature AE
 157 | 		'&Ccedil;' => "\xc3\x87",  #Latin capital letter C with cedilla
 158 | 		'&Egrave;' => "\xc3\x88",  #Latin capital letter E with grave
 159 | 		'&Eacute;' => "\xc3\x89",  #Latin capital letter E with acute
 160 | 		'&Ecirc;'  => "\xc3\x8a",  #Latin capital letter E with circumflex
 161 | 		'&Euml;'   => "\xc3\x8b",  #Latin capital letter E with diaeresis
 162 | 		'&Igrave;' => "\xc3\x8c",  #Latin capital letter I with grave
 163 | 		'&Iacute;' => "\xc3\x8d",  #Latin capital letter I with acute
 164 | 		'&Icirc;'  => "\xc3\x8e",  #Latin capital letter I with circumflex
 165 | 		'&Iuml;'   => "\xc3\x8f",  #Latin capital letter I with diaeresis
 166 | 		'&ETH;'    => "\xc3\x90",  #Latin capital letter ETH
 167 | 		'&Ntilde;' => "\xc3\x91",  #Latin capital letter N with tilde
 168 | 		'&Ograve;' => "\xc3\x92",  #Latin capital letter O with grave
 169 | 		'&Oacute;' => "\xc3\x93",  #Latin capital letter O with acute
 170 | 		'&Ocirc;'  => "\xc3\x94",  #Latin capital letter O with circumflex
 171 | 		'&Otilde;' => "\xc3\x95",  #Latin capital letter O with tilde
 172 | 		'&Ouml;'   => "\xc3\x96",  #Latin capital letter O with diaeresis
 173 | 		'&times;'  => "\xc3\x97",  #U+00D7 [×] multiplication sign
 174 | 		'&Oslash;' => "\xc3\x98",  #Latin capital letter O with stroke = Latin capital letter O slash
 175 | 		'&Ugrave;' => "\xc3\x99",  #Latin capital letter U with grave
 176 | 		'&Uacute;' => "\xc3\x9a",  #Latin capital letter U with acute
 177 | 		'&Ucirc;'  => "\xc3\x9b",  #Latin capital letter U with circumflex
 178 | 		'&Uuml;'   => "\xc3\x9c",  #Latin capital letter U with diaeresis
 179 | 		'&Yacute;' => "\xc3\x9d",  #Latin capital letter Y with acute
 180 | 		'&THORN;'  => "\xc3\x9e",  #Latin capital letter THORN
 181 | 		#Latin small letter
 182 | 		'&szlig;'  => "\xc3\x9f",  #Latin small letter sharp s = ess-zed
 183 | 		'&agrave;' => "\xc3\xa0",  #Latin small letter a with grave = Latin small letter a grave
 184 | 		'&aacute;' => "\xc3\xa1",  #Latin small letter a with acute
 185 | 		'&acirc;'  => "\xc3\xa2",  #Latin small letter a with circumflex
 186 | 		'&atilde;' => "\xc3\xa3",  #Latin small letter a with tilde
 187 | 		'&auml;'   => "\xc3\xa4",  #Latin small letter a with diaeresis
 188 | 		'&aring;'  => "\xc3\xa5",  #Latin small letter a with ring above = Latin small letter a ring
 189 | 		'&aelig;'  => "\xc3\xa6",  #Latin small letter ae = Latin small ligature ae
 190 | 		'&ccedil;' => "\xc3\xa7",  #Latin small letter c with cedilla
 191 | 		'&egrave;' => "\xc3\xa8",  #Latin small letter e with grave
 192 | 		'&eacute;' => "\xc3\xa9",  #Latin small letter e with acute
 193 | 		'&ecirc;'  => "\xc3\xaa",  #Latin small letter e with circumflex
 194 | 		'&euml;'   => "\xc3\xab",  #Latin small letter e with diaeresis
 195 | 		'&igrave;' => "\xc3\xac",  #Latin small letter i with grave
 196 | 		'&iacute;' => "\xc3\xad",  #Latin small letter i with acute
 197 | 		'&icirc;'  => "\xc3\xae",  #Latin small letter i with circumflex
 198 | 		'&iuml;'   => "\xc3\xaf",  #Latin small letter i with diaeresis
 199 | 		'&eth;'    => "\xc3\xb0",  #Latin small letter eth
 200 | 		'&ntilde;' => "\xc3\xb1",  #Latin small letter n with tilde
 201 | 		'&ograve;' => "\xc3\xb2",  #Latin small letter o with grave
 202 | 		'&oacute;' => "\xc3\xb3",  #Latin small letter o with acute
 203 | 		'&ocirc;'  => "\xc3\xb4",  #Latin small letter o with circumflex
 204 | 		'&otilde;' => "\xc3\xb5",  #Latin small letter o with tilde
 205 | 		'&ouml;'   => "\xc3\xb6",  #Latin small letter o with diaeresis
 206 | 		'&divide;' => "\xc3\xb7",  #U+00F7 [÷] division sign
 207 | 		'&oslash;' => "\xc3\xb8",  #Latin small letter o with stroke = Latin small letter o slash
 208 | 		'&ugrave;' => "\xc3\xb9",  #Latin small letter u with grave
 209 | 		'&uacute;' => "\xc3\xba",  #Latin small letter u with acute
 210 | 		'&ucirc;'  => "\xc3\xbb",  #Latin small letter u with circumflex
 211 | 		'&uuml;'   => "\xc3\xbc",  #Latin small letter u with diaeresis
 212 | 		'&yacute;' => "\xc3\xbd",  #Latin small letter y with acute
 213 | 		'&thorn;'  => "\xc3\xbe",  #Latin small letter thorn
 214 | 		'&yuml;'   => "\xc3\xbf",  #Latin small letter y with diaeresis
 215 | 		#Symbols and Greek Letters:
 216 | 		'&fnof;'    => "\xc6\x92",  #U+0192 [ƒ] Latin small f with hook = function = florin
 217 | 		'&Alpha;'   => "\xce\x91",  #Greek capital letter alpha
 218 | 		'&Beta;'    => "\xce\x92",  #Greek capital letter beta
 219 | 		'&Gamma;'   => "\xce\x93",  #Greek capital letter gamma
 220 | 		'&Delta;'   => "\xce\x94",  #Greek capital letter delta
 221 | 		'&Epsilon;' => "\xce\x95",  #Greek capital letter epsilon
 222 | 		'&Zeta;'    => "\xce\x96",  #Greek capital letter zeta
 223 | 		'&Eta;'     => "\xce\x97",  #Greek capital letter eta
 224 | 		'&Theta;'   => "\xce\x98",  #Greek capital letter theta
 225 | 		'&Iota;'    => "\xce\x99",  #Greek capital letter iota
 226 | 		'&Kappa;'   => "\xce\x9a",  #Greek capital letter kappa
 227 | 		'&Lambda;'  => "\xce\x9b",  #Greek capital letter lambda
 228 | 		'&Mu;'      => "\xce\x9c",  #Greek capital letter mu
 229 | 		'&Nu;'      => "\xce\x9d",  #Greek capital letter nu
 230 | 		'&Xi;'      => "\xce\x9e",  #Greek capital letter xi
 231 | 		'&Omicron;' => "\xce\x9f",  #Greek capital letter omicron
 232 | 		'&Pi;'      => "\xce\xa0",  #Greek capital letter pi
 233 | 		'&Rho;'     => "\xce\xa1",  #Greek capital letter rho
 234 | 		'&Sigma;'   => "\xce\xa3",  #Greek capital letter sigma
 235 | 		'&Tau;'     => "\xce\xa4",  #Greek capital letter tau
 236 | 		'&Upsilon;' => "\xce\xa5",  #Greek capital letter upsilon
 237 | 		'&Phi;'     => "\xce\xa6",  #Greek capital letter phi
 238 | 		'&Chi;'     => "\xce\xa7",  #Greek capital letter chi
 239 | 		'&Psi;'     => "\xce\xa8",  #Greek capital letter psi
 240 | 		'&Omega;'   => "\xce\xa9",  #Greek capital letter omega
 241 | 		'&alpha;'   => "\xce\xb1",  #Greek small letter alpha
 242 | 		'&beta;'    => "\xce\xb2",  #Greek small letter beta
 243 | 		'&gamma;'   => "\xce\xb3",  #Greek small letter gamma
 244 | 		'&delta;'   => "\xce\xb4",  #Greek small letter delta
 245 | 		'&epsilon;' => "\xce\xb5",  #Greek small letter epsilon
 246 | 		'&zeta;'    => "\xce\xb6",  #Greek small letter zeta
 247 | 		'&eta;'     => "\xce\xb7",  #Greek small letter eta
 248 | 		'&theta;'   => "\xce\xb8",  #Greek small letter theta
 249 | 		'&iota;'    => "\xce\xb9",  #Greek small letter iota
 250 | 		'&kappa;'   => "\xce\xba",  #Greek small letter kappa
 251 | 		'&lambda;'  => "\xce\xbb",  #Greek small letter lambda
 252 | 		'&mu;'      => "\xce\xbc",  #Greek small letter mu
 253 | 		'&nu;'      => "\xce\xbd",  #Greek small letter nu
 254 | 		'&xi;'      => "\xce\xbe",  #Greek small letter xi
 255 | 		'&omicron;' => "\xce\xbf",  #Greek small letter omicron
 256 | 		'&pi;'      => "\xcf\x80",  #Greek small letter pi
 257 | 		'&rho;'     => "\xcf\x81",  #Greek small letter rho
 258 | 		'&sigmaf;'  => "\xcf\x82",  #Greek small letter final sigma
 259 | 		'&sigma;'   => "\xcf\x83",  #Greek small letter sigma
 260 | 		'&tau;'     => "\xcf\x84",  #Greek small letter tau
 261 | 		'&upsilon;' => "\xcf\x85",  #Greek small letter upsilon
 262 | 		'&phi;'     => "\xcf\x86",  #Greek small letter phi
 263 | 		'&chi;'     => "\xcf\x87",  #Greek small letter chi
 264 | 		'&psi;'     => "\xcf\x88",  #Greek small letter psi
 265 | 		'&omega;'   => "\xcf\x89",  #Greek small letter omega
 266 | 		'&thetasym;'=> "\xcf\x91",  #Greek small letter theta symbol
 267 | 		'&upsih;'   => "\xcf\x92",  #Greek upsilon with hook symbol
 268 | 		'&piv;'     => "\xcf\x96",  #U+03D6 [ϖ] Greek pi symbol
 269 | 
 270 | 		'&bull;'    => "\xe2\x80\xa2",  #U+2022 [•] bullet = black small circle
 271 | 		'&hellip;'  => "\xe2\x80\xa6",  #U+2026 […] horizontal ellipsis = three dot leader
 272 | 		'&prime;'   => "\xe2\x80\xb2",  #U+2032 [′] prime = minutes = feet (для обозначения минут и футов)
 273 | 		'&Prime;'   => "\xe2\x80\xb3",  #U+2033 [″] double prime = seconds = inches (для обозначения секунд и дюймов).
 274 | 		'&oline;'   => "\xe2\x80\xbe",  #U+203E [‾] overline = spacing overscore
 275 | 		'&frasl;'   => "\xe2\x81\x84",  #U+2044 [⁄] fraction slash
 276 | 		'&weierp;'  => "\xe2\x84\x98",  #U+2118 [℘] script capital P = power set = Weierstrass p
 277 | 		'&image;'   => "\xe2\x84\x91",  #U+2111 [ℑ] blackletter capital I = imaginary part
 278 | 		'&real;'    => "\xe2\x84\x9c",  #U+211C [ℜ] blackletter capital R = real part symbol
 279 | 		'&trade;'   => "\xe2\x84\xa2",  #U+2122 [™] trade mark sign
 280 | 		'&alefsym;' => "\xe2\x84\xb5",  #U+2135 [ℵ] alef symbol = first transfinite cardinal
 281 | 		'&larr;'    => "\xe2\x86\x90",  #U+2190 [←] leftwards arrow
 282 | 		'&uarr;'    => "\xe2\x86\x91",  #U+2191 [↑] upwards arrow
 283 | 		'&rarr;'    => "\xe2\x86\x92",  #U+2192 [→] rightwards arrow
 284 | 		'&darr;'    => "\xe2\x86\x93",  #U+2193 [↓] downwards arrow
 285 | 		'&harr;'    => "\xe2\x86\x94",  #U+2194 [↔] left right arrow
 286 | 		'&crarr;'   => "\xe2\x86\xb5",  #U+21B5 [↵] downwards arrow with corner leftwards = carriage return
 287 | 		'&lArr;'    => "\xe2\x87\x90",  #U+21D0 [⇐] leftwards double arrow
 288 | 		'&uArr;'    => "\xe2\x87\x91",  #U+21D1 [⇑] upwards double arrow
 289 | 		'&rArr;'    => "\xe2\x87\x92",  #U+21D2 [⇒] rightwards double arrow
 290 | 		'&dArr;'    => "\xe2\x87\x93",  #U+21D3 [⇓] downwards double arrow
 291 | 		'&hArr;'    => "\xe2\x87\x94",  #U+21D4 [⇔] left right double arrow
 292 | 		'&forall;'  => "\xe2\x88\x80",  #U+2200 [∀] for all
 293 | 		'&part;'    => "\xe2\x88\x82",  #U+2202 [∂] partial differential
 294 | 		'&exist;'   => "\xe2\x88\x83",  #U+2203 [∃] there exists
 295 | 		'&empty;'   => "\xe2\x88\x85",  #U+2205 [∅] empty set = null set = diameter
 296 | 		'&nabla;'   => "\xe2\x88\x87",  #U+2207 [∇] nabla = backward difference
 297 | 		'&isin;'    => "\xe2\x88\x88",  #U+2208 [∈] element of
 298 | 		'&notin;'   => "\xe2\x88\x89",  #U+2209 [∉] not an element of
 299 | 		'&ni;'      => "\xe2\x88\x8b",  #U+220B [∋] contains as member
 300 | 		'&prod;'    => "\xe2\x88\x8f",  #U+220F [∏] n-ary product = product sign
 301 | 		'&sum;'     => "\xe2\x88\x91",  #U+2211 [∑] n-ary sumation
 302 | 		'&minus;'   => "\xe2\x88\x92",  #U+2212 [−] minus sign
 303 | 		'&lowast;'  => "\xe2\x88\x97",  #U+2217 [∗] asterisk operator
 304 | 		'&radic;'   => "\xe2\x88\x9a",  #U+221A [√] square root = radical sign
 305 | 		'&prop;'    => "\xe2\x88\x9d",  #U+221D [∝] proportional to
 306 | 		'&infin;'   => "\xe2\x88\x9e",  #U+221E [∞] infinity
 307 | 		'&ang;'     => "\xe2\x88\xa0",  #U+2220 [∠] angle
 308 | 		'&and;'     => "\xe2\x88\xa7",  #U+2227 [∧] logical and = wedge
 309 | 		'&or;'      => "\xe2\x88\xa8",  #U+2228 [∨] logical or = vee
 310 | 		'&cap;'     => "\xe2\x88\xa9",  #U+2229 [∩] intersection = cap
 311 | 		'&cup;'     => "\xe2\x88\xaa",  #U+222A [∪] union = cup
 312 | 		'&int;'     => "\xe2\x88\xab",  #U+222B [∫] integral
 313 | 		'&there4;'  => "\xe2\x88\xb4",  #U+2234 [∴] therefore
 314 | 		'&sim;'     => "\xe2\x88\xbc",  #U+223C [∼] tilde operator = varies with = similar to
 315 | 		'&cong;'    => "\xe2\x89\x85",  #U+2245 [≅] approximately equal to
 316 | 		'&asymp;'   => "\xe2\x89\x88",  #U+2248 [≈] almost equal to = asymptotic to
 317 | 		'&ne;'      => "\xe2\x89\xa0",  #U+2260 [≠] not equal to
 318 | 		'&equiv;'   => "\xe2\x89\xa1",  #U+2261 [≡] identical to
 319 | 		'&le;'      => "\xe2\x89\xa4",  #U+2264 [≤] less-than or equal to
 320 | 		'&ge;'      => "\xe2\x89\xa5",  #U+2265 [≥] greater-than or equal to
 321 | 		'&sub;'     => "\xe2\x8a\x82",  #U+2282 [⊂] subset of
 322 | 		'&sup;'     => "\xe2\x8a\x83",  #U+2283 [⊃] superset of
 323 | 		'&nsub;'    => "\xe2\x8a\x84",  #U+2284 [⊄] not a subset of
 324 | 		'&sube;'    => "\xe2\x8a\x86",  #U+2286 [⊆] subset of or equal to
 325 | 		'&supe;'    => "\xe2\x8a\x87",  #U+2287 [⊇] superset of or equal to
 326 | 		'&oplus;'   => "\xe2\x8a\x95",  #U+2295 [⊕] circled plus = direct sum
 327 | 		'&otimes;'  => "\xe2\x8a\x97",  #U+2297 [⊗] circled times = vector product
 328 | 		'&perp;'    => "\xe2\x8a\xa5",  #U+22A5 [⊥] up tack = orthogonal to = perpendicular
 329 | 		'&sdot;'    => "\xe2\x8b\x85",  #U+22C5 [⋅] dot operator
 330 | 		'&lceil;'   => "\xe2\x8c\x88",  #U+2308 [⌈] left ceiling = APL upstile
 331 | 		'&rceil;'   => "\xe2\x8c\x89",  #U+2309 [⌉] right ceiling
 332 | 		'&lfloor;'  => "\xe2\x8c\x8a",  #U+230A [⌊] left floor = APL downstile
 333 | 		'&rfloor;'  => "\xe2\x8c\x8b",  #U+230B [⌋] right floor
 334 | 		'&lang;'    => "\xe2\x8c\xa9",  #U+2329 [〈] left-pointing angle bracket = bra
 335 | 		'&rang;'    => "\xe2\x8c\xaa",  #U+232A [〉] right-pointing angle bracket = ket
 336 | 		'&loz;'     => "\xe2\x97\x8a",  #U+25CA [◊] lozenge
 337 | 		'&spades;'  => "\xe2\x99\xa0",  #U+2660 [♠] black spade suit
 338 | 		'&clubs;'   => "\xe2\x99\xa3",  #U+2663 [♣] black club suit = shamrock
 339 | 		'&hearts;'  => "\xe2\x99\xa5",  #U+2665 [♥] black heart suit = valentine
 340 | 		'&diams;'   => "\xe2\x99\xa6",  #U+2666 [♦] black diamond suit
 341 | 		#Other Special Characters:
 342 | 		'&OElig;'  => "\xc5\x92",  #U+0152 [Œ] Latin capital ligature OE
 343 | 		'&oelig;'  => "\xc5\x93",  #U+0153 [œ] Latin small ligature oe
 344 | 		'&Scaron;' => "\xc5\xa0",  #U+0160 [Š] Latin capital letter S with caron
 345 | 		'&scaron;' => "\xc5\xa1",  #U+0161 [š] Latin small letter s with caron
 346 | 		'&Yuml;'   => "\xc5\xb8",  #U+0178 [Ÿ] Latin capital letter Y with diaeresis
 347 | 		'&circ;'   => "\xcb\x86",  #U+02C6 [ˆ] modifier letter circumflex accent
 348 | 		'&tilde;'  => "\xcb\x9c",  #U+02DC [˜] small tilde
 349 | 		'&ensp;'   => "\xe2\x80\x82",  #U+2002 [ ] en space
 350 | 		'&emsp;'   => "\xe2\x80\x83",  #U+2003 [ ] em space
 351 | 		'&thinsp;' => "\xe2\x80\x89",  #U+2009 [ ] thin space
 352 | 		'&zwnj;'   => "\xe2\x80\x8c",  #U+200C [‌] zero width non-joiner
 353 | 		'&zwj;'    => "\xe2\x80\x8d",  #U+200D [‍] zero width joiner
 354 | 		'&lrm;'    => "\xe2\x80\x8e",  #U+200E [‎] left-to-right mark
 355 | 		'&rlm;'    => "\xe2\x80\x8f",  #U+200F [‏] right-to-left mark
 356 | 		'&ndash;'  => "\xe2\x80\x93",  #U+2013 [–] en dash
 357 | 		'&mdash;'  => "\xe2\x80\x94",  #U+2014 [—] em dash
 358 | 		'&lsquo;'  => "\xe2\x80\x98",  #U+2018 [‘] left single quotation mark
 359 | 		'&rsquo;'  => "\xe2\x80\x99",  #U+2019 [’] right single quotation mark (and apostrophe!)
 360 | 		'&sbquo;'  => "\xe2\x80\x9a",  #U+201A [‚] single low-9 quotation mark
 361 | 		'&ldquo;'  => "\xe2\x80\x9c",  #U+201C [“] left double quotation mark
 362 | 		'&rdquo;'  => "\xe2\x80\x9d",  #U+201D [”] right double quotation mark
 363 | 		'&bdquo;'  => "\xe2\x80\x9e",  #U+201E [„] double low-9 quotation mark
 364 | 		'&dagger;' => "\xe2\x80\xa0",  #U+2020 [†] dagger
 365 | 		'&Dagger;' => "\xe2\x80\xa1",  #U+2021 [‡] double dagger
 366 | 		'&permil;' => "\xe2\x80\xb0",  #U+2030 [‰] per mille sign
 367 | 		'&lsaquo;' => "\xe2\x80\xb9",  #U+2039 [‹] single left-pointing angle quotation mark
 368 | 		'&rsaquo;' => "\xe2\x80\xba",  #U+203A [›] single right-pointing angle quotation mark
 369 | 		'&euro;'   => "\xe2\x82\xac",  #U+20AC [€] euro sign
 370 | 	);
 371 | 
 372 | 	/**
 373 | 	 * This table contains the data on how cp1259 characters map into Unicode (UTF-8).
 374 | 	 * The cp1259 map describes standart tatarish cyrillic charset and based on the cp1251 table.
 375 | 	 * cp1259 -- this is an outdated one byte encoding of the Tatar language,
 376 | 	 * which includes all the Russian letters from cp1251.
 377 | 	 *
 378 | 	 * @link  http://search.cpan.org/CPAN/authors/id/A/AM/AMICHAUER/Lingua-TT-Yanalif-0.08.tar.gz
 379 | 	 * @link  http://www.unicode.org/charts/PDF/U0400.pdf
 380 | 	 */
 381 | 	public static $cp1259_table = array(
 382 | 		#bytes from 0x00 to 0x7F (ASCII) saved as is
 383 | 		"\x80" => "\xd3\x98",      #U+04d8 CYRILLIC CAPITAL LETTER SCHWA
 384 | 		"\x81" => "\xd0\x83",      #U+0403 CYRILLIC CAPITAL LETTER GJE
 385 | 		"\x82" => "\xe2\x80\x9a",  #U+201a SINGLE LOW-9 QUOTATION MARK
 386 | 		"\x83" => "\xd1\x93",      #U+0453 CYRILLIC SMALL LETTER GJE
 387 | 		"\x84" => "\xe2\x80\x9e",  #U+201e DOUBLE LOW-9 QUOTATION MARK
 388 | 		"\x85" => "\xe2\x80\xa6",  #U+2026 HORIZONTAL ELLIPSIS
 389 | 		"\x86" => "\xe2\x80\xa0",  #U+2020 DAGGER
 390 | 		"\x87" => "\xe2\x80\xa1",  #U+2021 DOUBLE DAGGER
 391 | 		"\x88" => "\xe2\x82\xac",  #U+20ac EURO SIGN
 392 | 		"\x89" => "\xe2\x80\xb0",  #U+2030 PER MILLE SIGN
 393 | 		"\x8a" => "\xd3\xa8",      #U+04e8 CYRILLIC CAPITAL LETTER BARRED O
 394 | 		"\x8b" => "\xe2\x80\xb9",  #U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 395 | 		"\x8c" => "\xd2\xae",      #U+04ae CYRILLIC CAPITAL LETTER STRAIGHT U
 396 | 		"\x8d" => "\xd2\x96",      #U+0496 CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
 397 | 		"\x8e" => "\xd2\xa2",      #U+04a2 CYRILLIC CAPITAL LETTER EN WITH HOOK
 398 | 		"\x8f" => "\xd2\xba",      #U+04ba CYRILLIC CAPITAL LETTER SHHA
 399 | 		"\x90" => "\xd3\x99",      #U+04d9 CYRILLIC SMALL LETTER SCHWA
 400 | 		"\x91" => "\xe2\x80\x98",  #U+2018 LEFT SINGLE QUOTATION MARK
 401 | 		"\x92" => "\xe2\x80\x99",  #U+2019 RIGHT SINGLE QUOTATION MARK
 402 | 		"\x93" => "\xe2\x80\x9c",  #U+201c LEFT DOUBLE QUOTATION MARK
 403 | 		"\x94" => "\xe2\x80\x9d",  #U+201d RIGHT DOUBLE QUOTATION MARK
 404 | 		"\x95" => "\xe2\x80\xa2",  #U+2022 BULLET
 405 | 		"\x96" => "\xe2\x80\x93",  #U+2013 EN DASH
 406 | 		"\x97" => "\xe2\x80\x94",  #U+2014 EM DASH
 407 | 		#"\x98"                    #UNDEFINED
 408 | 		"\x99" => "\xe2\x84\xa2",  #U+2122 TRADE MARK SIGN
 409 | 		"\x9a" => "\xd3\xa9",      #U+04e9 CYRILLIC SMALL LETTER BARRED O
 410 | 		"\x9b" => "\xe2\x80\xba",  #U+203a SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 411 | 		"\x9c" => "\xd2\xaf",      #U+04af CYRILLIC SMALL LETTER STRAIGHT U
 412 | 		"\x9d" => "\xd2\x97",      #U+0497 CYRILLIC SMALL LETTER ZHE WITH DESCENDER
 413 | 		"\x9e" => "\xd2\xa3",      #U+04a3 CYRILLIC SMALL LETTER EN WITH HOOK
 414 | 		"\x9f" => "\xd2\xbb",      #U+04bb CYRILLIC SMALL LETTER SHHA
 415 | 		"\xa0" => "\xc2\xa0",      #U+00a0 NO-BREAK SPACE
 416 | 		"\xa1" => "\xd0\x8e",      #U+040e CYRILLIC CAPITAL LETTER SHORT U
 417 | 		"\xa2" => "\xd1\x9e",      #U+045e CYRILLIC SMALL LETTER SHORT U
 418 | 		"\xa3" => "\xd0\x88",      #U+0408 CYRILLIC CAPITAL LETTER JE
 419 | 		"\xa4" => "\xc2\xa4",      #U+00a4 CURRENCY SIGN
 420 | 		"\xa5" => "\xd2\x90",      #U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN
 421 | 		"\xa6" => "\xc2\xa6",      #U+00a6 BROKEN BAR
 422 | 		"\xa7" => "\xc2\xa7",      #U+00a7 SECTION SIGN
 423 | 		"\xa8" => "\xd0\x81",      #U+0401 CYRILLIC CAPITAL LETTER IO
 424 | 		"\xa9" => "\xc2\xa9",      #U+00a9 COPYRIGHT SIGN
 425 | 		"\xaa" => "\xd0\x84",      #U+0404 CYRILLIC CAPITAL LETTER UKRAINIAN IE
 426 | 		"\xab" => "\xc2\xab",      #U+00ab LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 427 | 		"\xac" => "\xc2\xac",      #U+00ac NOT SIGN
 428 | 		"\xad" => "\xc2\xad",      #U+00ad SOFT HYPHEN
 429 | 		"\xae" => "\xc2\xae",      #U+00ae REGISTERED SIGN
 430 | 		"\xaf" => "\xd0\x87",      #U+0407 CYRILLIC CAPITAL LETTER YI
 431 | 		"\xb0" => "\xc2\xb0",      #U+00b0 DEGREE SIGN
 432 | 		"\xb1" => "\xc2\xb1",      #U+00b1 PLUS-MINUS SIGN
 433 | 		"\xb2" => "\xd0\x86",      #U+0406 CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
 434 | 		"\xb3" => "\xd1\x96",      #U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
 435 | 		"\xb4" => "\xd2\x91",      #U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN
 436 | 		"\xb5" => "\xc2\xb5",      #U+00b5 MICRO SIGN
 437 | 		"\xb6" => "\xc2\xb6",      #U+00b6 PILCROW SIGN
 438 | 		"\xb7" => "\xc2\xb7",      #U+00b7 MIDDLE DOT
 439 | 		"\xb8" => "\xd1\x91",      #U+0451 CYRILLIC SMALL LETTER IO
 440 | 		"\xb9" => "\xe2\x84\x96",  #U+2116 NUMERO SIGN
 441 | 		"\xba" => "\xd1\x94",      #U+0454 CYRILLIC SMALL LETTER UKRAINIAN IE
 442 | 		"\xbb" => "\xc2\xbb",      #U+00bb RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 443 | 		"\xbc" => "\xd1\x98",      #U+0458 CYRILLIC SMALL LETTER JE
 444 | 		"\xbd" => "\xd0\x85",      #U+0405 CYRILLIC CAPITAL LETTER DZE
 445 | 		"\xbe" => "\xd1\x95",      #U+0455 CYRILLIC SMALL LETTER DZE
 446 | 		"\xbf" => "\xd1\x97",      #U+0457 CYRILLIC SMALL LETTER YI
 447 | 		"\xc0" => "\xd0\x90",      #U+0410 CYRILLIC CAPITAL LETTER A
 448 | 		"\xc1" => "\xd0\x91",      #U+0411 CYRILLIC CAPITAL LETTER BE
 449 | 		"\xc2" => "\xd0\x92",      #U+0412 CYRILLIC CAPITAL LETTER VE
 450 | 		"\xc3" => "\xd0\x93",      #U+0413 CYRILLIC CAPITAL LETTER GHE
 451 | 		"\xc4" => "\xd0\x94",      #U+0414 CYRILLIC CAPITAL LETTER DE
 452 | 		"\xc5" => "\xd0\x95",      #U+0415 CYRILLIC CAPITAL LETTER IE
 453 | 		"\xc6" => "\xd0\x96",      #U+0416 CYRILLIC CAPITAL LETTER ZHE
 454 | 		"\xc7" => "\xd0\x97",      #U+0417 CYRILLIC CAPITAL LETTER ZE
 455 | 		"\xc8" => "\xd0\x98",      #U+0418 CYRILLIC CAPITAL LETTER I
 456 | 		"\xc9" => "\xd0\x99",      #U+0419 CYRILLIC CAPITAL LETTER SHORT I
 457 | 		"\xca" => "\xd0\x9a",      #U+041a CYRILLIC CAPITAL LETTER KA
 458 | 		"\xcb" => "\xd0\x9b",      #U+041b CYRILLIC CAPITAL LETTER EL
 459 | 		"\xcc" => "\xd0\x9c",      #U+041c CYRILLIC CAPITAL LETTER EM
 460 | 		"\xcd" => "\xd0\x9d",      #U+041d CYRILLIC CAPITAL LETTER EN
 461 | 		"\xce" => "\xd0\x9e",      #U+041e CYRILLIC CAPITAL LETTER O
 462 | 		"\xcf" => "\xd0\x9f",      #U+041f CYRILLIC CAPITAL LETTER PE
 463 | 		"\xd0" => "\xd0\xa0",      #U+0420 CYRILLIC CAPITAL LETTER ER
 464 | 		"\xd1" => "\xd0\xa1",      #U+0421 CYRILLIC CAPITAL LETTER ES
 465 | 		"\xd2" => "\xd0\xa2",      #U+0422 CYRILLIC CAPITAL LETTER TE
 466 | 		"\xd3" => "\xd0\xa3",      #U+0423 CYRILLIC CAPITAL LETTER U
 467 | 		"\xd4" => "\xd0\xa4",      #U+0424 CYRILLIC CAPITAL LETTER EF
 468 | 		"\xd5" => "\xd0\xa5",      #U+0425 CYRILLIC CAPITAL LETTER HA
 469 | 		"\xd6" => "\xd0\xa6",      #U+0426 CYRILLIC CAPITAL LETTER TSE
 470 | 		"\xd7" => "\xd0\xa7",      #U+0427 CYRILLIC CAPITAL LETTER CHE
 471 | 		"\xd8" => "\xd0\xa8",      #U+0428 CYRILLIC CAPITAL LETTER SHA
 472 | 		"\xd9" => "\xd0\xa9",      #U+0429 CYRILLIC CAPITAL LETTER SHCHA
 473 | 		"\xda" => "\xd0\xaa",      #U+042a CYRILLIC CAPITAL LETTER HARD SIGN
 474 | 		"\xdb" => "\xd0\xab",      #U+042b CYRILLIC CAPITAL LETTER YERU
 475 | 		"\xdc" => "\xd0\xac",      #U+042c CYRILLIC CAPITAL LETTER SOFT SIGN
 476 | 		"\xdd" => "\xd0\xad",      #U+042d CYRILLIC CAPITAL LETTER E
 477 | 		"\xde" => "\xd0\xae",      #U+042e CYRILLIC CAPITAL LETTER YU
 478 | 		"\xdf" => "\xd0\xaf",      #U+042f CYRILLIC CAPITAL LETTER YA
 479 | 		"\xe0" => "\xd0\xb0",      #U+0430 CYRILLIC SMALL LETTER A
 480 | 		"\xe1" => "\xd0\xb1",      #U+0431 CYRILLIC SMALL LETTER BE
 481 | 		"\xe2" => "\xd0\xb2",      #U+0432 CYRILLIC SMALL LETTER VE
 482 | 		"\xe3" => "\xd0\xb3",      #U+0433 CYRILLIC SMALL LETTER GHE
 483 | 		"\xe4" => "\xd0\xb4",      #U+0434 CYRILLIC SMALL LETTER DE
 484 | 		"\xe5" => "\xd0\xb5",      #U+0435 CYRILLIC SMALL LETTER IE
 485 | 		"\xe6" => "\xd0\xb6",      #U+0436 CYRILLIC SMALL LETTER ZHE
 486 | 		"\xe7" => "\xd0\xb7",      #U+0437 CYRILLIC SMALL LETTER ZE
 487 | 		"\xe8" => "\xd0\xb8",      #U+0438 CYRILLIC SMALL LETTER I
 488 | 		"\xe9" => "\xd0\xb9",      #U+0439 CYRILLIC SMALL LETTER SHORT I
 489 | 		"\xea" => "\xd0\xba",      #U+043a CYRILLIC SMALL LETTER KA
 490 | 		"\xeb" => "\xd0\xbb",      #U+043b CYRILLIC SMALL LETTER EL
 491 | 		"\xec" => "\xd0\xbc",      #U+043c CYRILLIC SMALL LETTER EM
 492 | 		"\xed" => "\xd0\xbd",      #U+043d CYRILLIC SMALL LETTER EN
 493 | 		"\xee" => "\xd0\xbe",      #U+043e CYRILLIC SMALL LETTER O
 494 | 		"\xef" => "\xd0\xbf",      #U+043f CYRILLIC SMALL LETTER PE
 495 | 		"\xf0" => "\xd1\x80",      #U+0440 CYRILLIC SMALL LETTER ER
 496 | 		"\xf1" => "\xd1\x81",      #U+0441 CYRILLIC SMALL LETTER ES
 497 | 		"\xf2" => "\xd1\x82",      #U+0442 CYRILLIC SMALL LETTER TE
 498 | 		"\xf3" => "\xd1\x83",      #U+0443 CYRILLIC SMALL LETTER U
 499 | 		"\xf4" => "\xd1\x84",      #U+0444 CYRILLIC SMALL LETTER EF
 500 | 		"\xf5" => "\xd1\x85",      #U+0445 CYRILLIC SMALL LETTER HA
 501 | 		"\xf6" => "\xd1\x86",      #U+0446 CYRILLIC SMALL LETTER TSE
 502 | 		"\xf7" => "\xd1\x87",      #U+0447 CYRILLIC SMALL LETTER CHE
 503 | 		"\xf8" => "\xd1\x88",      #U+0448 CYRILLIC SMALL LETTER SHA
 504 | 		"\xf9" => "\xd1\x89",      #U+0449 CYRILLIC SMALL LETTER SHCHA
 505 | 		"\xfa" => "\xd1\x8a",      #U+044a CYRILLIC SMALL LETTER HARD SIGN
 506 | 		"\xfb" => "\xd1\x8b",      #U+044b CYRILLIC SMALL LETTER YERU
 507 | 		"\xfc" => "\xd1\x8c",      #U+044c CYRILLIC SMALL LETTER SOFT SIGN
 508 | 		"\xfd" => "\xd1\x8d",      #U+044d CYRILLIC SMALL LETTER E
 509 | 		"\xfe" => "\xd1\x8e",      #U+044e CYRILLIC SMALL LETTER YU
 510 | 		"\xff" => "\xd1\x8f",      #U+044f CYRILLIC SMALL LETTER YA
 511 | 	);
 512 | 
 513 | 	/**
 514 | 	 * UTF-8 Case lookup table
 515 | 	 *
 516 | 	 * This lookuptable defines the upper case letters to their correspponding
 517 | 	 * lower case letter in UTF-8
 518 | 	 *
 519 | 	 * @author Andreas Gohr <andi@splitbrain.org>
 520 | 	 */
 521 | 	public static $convert_case_table = array(
 522 | 		#CASE_UPPER => case_lower
 523 | 		"\x41" => "\x61", #A a
 524 | 		"\x42" => "\x62", #B b
 525 | 		"\x43" => "\x63", #C c
 526 | 		"\x44" => "\x64", #D d
 527 | 		"\x45" => "\x65", #E e
 528 | 		"\x46" => "\x66", #F f
 529 | 		"\x47" => "\x67", #G g
 530 | 		"\x48" => "\x68", #H h
 531 | 		"\x49" => "\x69", #I i
 532 | 		"\x4a" => "\x6a", #J j
 533 | 		"\x4b" => "\x6b", #K k
 534 | 		"\x4c" => "\x6c", #L l
 535 | 		"\x4d" => "\x6d", #M m
 536 | 		"\x4e" => "\x6e", #N n
 537 | 		"\x4f" => "\x6f", #O o
 538 | 		"\x50" => "\x70", #P p
 539 | 		"\x51" => "\x71", #Q q
 540 | 		"\x52" => "\x72", #R r
 541 | 		"\x53" => "\x73", #S s
 542 | 		"\x54" => "\x74", #T t
 543 | 		"\x55" => "\x75", #U u
 544 | 		"\x56" => "\x76", #V v
 545 | 		"\x57" => "\x77", #W w
 546 | 		"\x58" => "\x78", #X x
 547 | 		"\x59" => "\x79", #Y y
 548 | 		"\x5a" => "\x7a", #Z z
 549 | 		"\xc3\x80" => "\xc3\xa0",
 550 | 		"\xc3\x81" => "\xc3\xa1",
 551 | 		"\xc3\x82" => "\xc3\xa2",
 552 | 		"\xc3\x83" => "\xc3\xa3",
 553 | 		"\xc3\x84" => "\xc3\xa4",
 554 | 		"\xc3\x85" => "\xc3\xa5",
 555 | 		"\xc3\x86" => "\xc3\xa6",
 556 | 		"\xc3\x87" => "\xc3\xa7",
 557 | 		"\xc3\x88" => "\xc3\xa8",
 558 | 		"\xc3\x89" => "\xc3\xa9",
 559 | 		"\xc3\x8a" => "\xc3\xaa",
 560 | 		"\xc3\x8b" => "\xc3\xab",
 561 | 		"\xc3\x8c" => "\xc3\xac",
 562 | 		"\xc3\x8d" => "\xc3\xad",
 563 | 		"\xc3\x8e" => "\xc3\xae",
 564 | 		"\xc3\x8f" => "\xc3\xaf",
 565 | 		"\xc3\x90" => "\xc3\xb0",
 566 | 		"\xc3\x91" => "\xc3\xb1",
 567 | 		"\xc3\x92" => "\xc3\xb2",
 568 | 		"\xc3\x93" => "\xc3\xb3",
 569 | 		"\xc3\x94" => "\xc3\xb4",
 570 | 		"\xc3\x95" => "\xc3\xb5",
 571 | 		"\xc3\x96" => "\xc3\xb6",
 572 | 		"\xc3\x98" => "\xc3\xb8",
 573 | 		"\xc3\x99" => "\xc3\xb9",
 574 | 		"\xc3\x9a" => "\xc3\xba",
 575 | 		"\xc3\x9b" => "\xc3\xbb",
 576 | 		"\xc3\x9c" => "\xc3\xbc",
 577 | 		"\xc3\x9d" => "\xc3\xbd",
 578 | 		"\xc3\x9e" => "\xc3\xbe",
 579 | 		"\xc4\x80" => "\xc4\x81",
 580 | 		"\xc4\x82" => "\xc4\x83",
 581 | 		"\xc4\x84" => "\xc4\x85",
 582 | 		"\xc4\x86" => "\xc4\x87",
 583 | 		"\xc4\x88" => "\xc4\x89",
 584 | 		"\xc4\x8a" => "\xc4\x8b",
 585 | 		"\xc4\x8c" => "\xc4\x8d",
 586 | 		"\xc4\x8e" => "\xc4\x8f",
 587 | 		"\xc4\x90" => "\xc4\x91",
 588 | 		"\xc4\x92" => "\xc4\x93",
 589 | 		"\xc4\x94" => "\xc4\x95",
 590 | 		"\xc4\x96" => "\xc4\x97",
 591 | 		"\xc4\x98" => "\xc4\x99",
 592 | 		"\xc4\x9a" => "\xc4\x9b",
 593 | 		"\xc4\x9c" => "\xc4\x9d",
 594 | 		"\xc4\x9e" => "\xc4\x9f",
 595 | 		"\xc4\xa0" => "\xc4\xa1",
 596 | 		"\xc4\xa2" => "\xc4\xa3",
 597 | 		"\xc4\xa4" => "\xc4\xa5",
 598 | 		"\xc4\xa6" => "\xc4\xa7",
 599 | 		"\xc4\xa8" => "\xc4\xa9",
 600 | 		"\xc4\xaa" => "\xc4\xab",
 601 | 		"\xc4\xac" => "\xc4\xad",
 602 | 		"\xc4\xae" => "\xc4\xaf",
 603 | 		"\xc4\xb2" => "\xc4\xb3",
 604 | 		"\xc4\xb4" => "\xc4\xb5",
 605 | 		"\xc4\xb6" => "\xc4\xb7",
 606 | 		"\xc4\xb9" => "\xc4\xba",
 607 | 		"\xc4\xbb" => "\xc4\xbc",
 608 | 		"\xc4\xbd" => "\xc4\xbe",
 609 | 		"\xc4\xbf" => "\xc5\x80",
 610 | 		"\xc5\x81" => "\xc5\x82",
 611 | 		"\xc5\x83" => "\xc5\x84",
 612 | 		"\xc5\x85" => "\xc5\x86",
 613 | 		"\xc5\x87" => "\xc5\x88",
 614 | 		"\xc5\x8a" => "\xc5\x8b",
 615 | 		"\xc5\x8c" => "\xc5\x8d",
 616 | 		"\xc5\x8e" => "\xc5\x8f",
 617 | 		"\xc5\x90" => "\xc5\x91",
 618 | 		"\xc5\x92" => "\xc5\x93",
 619 | 		"\xc5\x94" => "\xc5\x95",
 620 | 		"\xc5\x96" => "\xc5\x97",
 621 | 		"\xc5\x98" => "\xc5\x99",
 622 | 		"\xc5\x9a" => "\xc5\x9b",
 623 | 		"\xc5\x9c" => "\xc5\x9d",
 624 | 		"\xc5\x9e" => "\xc5\x9f",
 625 | 		"\xc5\xa0" => "\xc5\xa1",
 626 | 		"\xc5\xa2" => "\xc5\xa3",
 627 | 		"\xc5\xa4" => "\xc5\xa5",
 628 | 		"\xc5\xa6" => "\xc5\xa7",
 629 | 		"\xc5\xa8" => "\xc5\xa9",
 630 | 		"\xc5\xaa" => "\xc5\xab",
 631 | 		"\xc5\xac" => "\xc5\xad",
 632 | 		"\xc5\xae" => "\xc5\xaf",
 633 | 		"\xc5\xb0" => "\xc5\xb1",
 634 | 		"\xc5\xb2" => "\xc5\xb3",
 635 | 		"\xc5\xb4" => "\xc5\xb5",
 636 | 		"\xc5\xb6" => "\xc5\xb7",
 637 | 		"\xc5\xb8" => "\xc3\xbf",
 638 | 		"\xc5\xb9" => "\xc5\xba",
 639 | 		"\xc5\xbb" => "\xc5\xbc",
 640 | 		"\xc5\xbd" => "\xc5\xbe",
 641 | 		"\xc6\x81" => "\xc9\x93",
 642 | 		"\xc6\x82" => "\xc6\x83",
 643 | 		"\xc6\x84" => "\xc6\x85",
 644 | 		"\xc6\x86" => "\xc9\x94",
 645 | 		"\xc6\x87" => "\xc6\x88",
 646 | 		"\xc6\x89" => "\xc9\x96",
 647 | 		"\xc6\x8a" => "\xc9\x97",
 648 | 		"\xc6\x8b" => "\xc6\x8c",
 649 | 		"\xc6\x8e" => "\xc7\x9d",
 650 | 		"\xc6\x8f" => "\xc9\x99",
 651 | 		"\xc6\x90" => "\xc9\x9b",
 652 | 		"\xc6\x91" => "\xc6\x92",
 653 | 		"\xc6\x94" => "\xc9\xa3",
 654 | 		"\xc6\x96" => "\xc9\xa9",
 655 | 		"\xc6\x97" => "\xc9\xa8",
 656 | 		"\xc6\x98" => "\xc6\x99",
 657 | 		"\xc6\x9c" => "\xc9\xaf",
 658 | 		"\xc6\x9d" => "\xc9\xb2",
 659 | 		"\xc6\x9f" => "\xc9\xb5",
 660 | 		"\xc6\xa0" => "\xc6\xa1",
 661 | 		"\xc6\xa2" => "\xc6\xa3",
 662 | 		"\xc6\xa4" => "\xc6\xa5",
 663 | 		"\xc6\xa6" => "\xca\x80",
 664 | 		"\xc6\xa7" => "\xc6\xa8",
 665 | 		"\xc6\xa9" => "\xca\x83",
 666 | 		"\xc6\xac" => "\xc6\xad",
 667 | 		"\xc6\xae" => "\xca\x88",
 668 | 		"\xc6\xaf" => "\xc6\xb0",
 669 | 		"\xc6\xb1" => "\xca\x8a",
 670 | 		"\xc6\xb2" => "\xca\x8b",
 671 | 		"\xc6\xb3" => "\xc6\xb4",
 672 | 		"\xc6\xb5" => "\xc6\xb6",
 673 | 		"\xc6\xb7" => "\xca\x92",
 674 | 		"\xc6\xb8" => "\xc6\xb9",
 675 | 		"\xc6\xbc" => "\xc6\xbd",
 676 | 		"\xc7\x85" => "\xc7\x86",
 677 | 		"\xc7\x88" => "\xc7\x89",
 678 | 		"\xc7\x8b" => "\xc7\x8c",
 679 | 		"\xc7\x8d" => "\xc7\x8e",
 680 | 		"\xc7\x8f" => "\xc7\x90",
 681 | 		"\xc7\x91" => "\xc7\x92",
 682 | 		"\xc7\x93" => "\xc7\x94",
 683 | 		"\xc7\x95" => "\xc7\x96",
 684 | 		"\xc7\x97" => "\xc7\x98",
 685 | 		"\xc7\x99" => "\xc7\x9a",
 686 | 		"\xc7\x9b" => "\xc7\x9c",
 687 | 		"\xc7\x9e" => "\xc7\x9f",
 688 | 		"\xc7\xa0" => "\xc7\xa1",
 689 | 		"\xc7\xa2" => "\xc7\xa3",
 690 | 		"\xc7\xa4" => "\xc7\xa5",
 691 | 		"\xc7\xa6" => "\xc7\xa7",
 692 | 		"\xc7\xa8" => "\xc7\xa9",
 693 | 		"\xc7\xaa" => "\xc7\xab",
 694 | 		"\xc7\xac" => "\xc7\xad",
 695 | 		"\xc7\xae" => "\xc7\xaf",
 696 | 		"\xc7\xb2" => "\xc7\xb3",
 697 | 		"\xc7\xb4" => "\xc7\xb5",
 698 | 		"\xc7\xb6" => "\xc6\x95",
 699 | 		"\xc7\xb7" => "\xc6\xbf",
 700 | 		"\xc7\xb8" => "\xc7\xb9",
 701 | 		"\xc7\xba" => "\xc7\xbb",
 702 | 		"\xc7\xbc" => "\xc7\xbd",
 703 | 		"\xc7\xbe" => "\xc7\xbf",
 704 | 		"\xc8\x80" => "\xc8\x81",
 705 | 		"\xc8\x82" => "\xc8\x83",
 706 | 		"\xc8\x84" => "\xc8\x85",
 707 | 		"\xc8\x86" => "\xc8\x87",
 708 | 		"\xc8\x88" => "\xc8\x89",
 709 | 		"\xc8\x8a" => "\xc8\x8b",
 710 | 		"\xc8\x8c" => "\xc8\x8d",
 711 | 		"\xc8\x8e" => "\xc8\x8f",
 712 | 		"\xc8\x90" => "\xc8\x91",
 713 | 		"\xc8\x92" => "\xc8\x93",
 714 | 		"\xc8\x94" => "\xc8\x95",
 715 | 		"\xc8\x96" => "\xc8\x97",
 716 | 		"\xc8\x98" => "\xc8\x99",
 717 | 		"\xc8\x9a" => "\xc8\x9b",
 718 | 		"\xc8\x9c" => "\xc8\x9d",
 719 | 		"\xc8\x9e" => "\xc8\x9f",
 720 | 		"\xc8\xa0" => "\xc6\x9e",
 721 | 		"\xc8\xa2" => "\xc8\xa3",
 722 | 		"\xc8\xa4" => "\xc8\xa5",
 723 | 		"\xc8\xa6" => "\xc8\xa7",
 724 | 		"\xc8\xa8" => "\xc8\xa9",
 725 | 		"\xc8\xaa" => "\xc8\xab",
 726 | 		"\xc8\xac" => "\xc8\xad",
 727 | 		"\xc8\xae" => "\xc8\xaf",
 728 | 		"\xc8\xb0" => "\xc8\xb1",
 729 | 		"\xc8\xb2" => "\xc8\xb3",
 730 | 		"\xce\x86" => "\xce\xac",
 731 | 		"\xce\x88" => "\xce\xad",
 732 | 		"\xce\x89" => "\xce\xae",
 733 | 		"\xce\x8a" => "\xce\xaf",
 734 | 		"\xce\x8c" => "\xcf\x8c",
 735 | 		"\xce\x8e" => "\xcf\x8d",
 736 | 		"\xce\x8f" => "\xcf\x8e",
 737 | 		"\xce\x91" => "\xce\xb1",
 738 | 		"\xce\x92" => "\xce\xb2",
 739 | 		"\xce\x93" => "\xce\xb3",
 740 | 		"\xce\x94" => "\xce\xb4",
 741 | 		"\xce\x95" => "\xce\xb5",
 742 | 		"\xce\x96" => "\xce\xb6",
 743 | 		"\xce\x97" => "\xce\xb7",
 744 | 		"\xce\x98" => "\xce\xb8",
 745 | 		"\xce\x99" => "\xce\xb9",
 746 | 		"\xce\x9a" => "\xce\xba",
 747 | 		"\xce\x9b" => "\xce\xbb",
 748 | 		"\xce\x9c" => "\xc2\xb5",
 749 | 		"\xce\x9d" => "\xce\xbd",
 750 | 		"\xce\x9e" => "\xce\xbe",
 751 | 		"\xce\x9f" => "\xce\xbf",
 752 | 		"\xce\xa0" => "\xcf\x80",
 753 | 		"\xce\xa1" => "\xcf\x81",
 754 | 		"\xce\xa3" => "\xcf\x82",
 755 | 		"\xce\xa4" => "\xcf\x84",
 756 | 		"\xce\xa5" => "\xcf\x85",
 757 | 		"\xce\xa6" => "\xcf\x86",
 758 | 		"\xce\xa7" => "\xcf\x87",
 759 | 		"\xce\xa8" => "\xcf\x88",
 760 | 		"\xce\xa9" => "\xcf\x89",
 761 | 		"\xce\xaa" => "\xcf\x8a",
 762 | 		"\xce\xab" => "\xcf\x8b",
 763 | 		"\xcf\x98" => "\xcf\x99",
 764 | 		"\xcf\x9a" => "\xcf\x9b",
 765 | 		"\xcf\x9c" => "\xcf\x9d",
 766 | 		"\xcf\x9e" => "\xcf\x9f",
 767 | 		"\xcf\xa0" => "\xcf\xa1",
 768 | 		"\xcf\xa2" => "\xcf\xa3",
 769 | 		"\xcf\xa4" => "\xcf\xa5",
 770 | 		"\xcf\xa6" => "\xcf\xa7",
 771 | 		"\xcf\xa8" => "\xcf\xa9",
 772 | 		"\xcf\xaa" => "\xcf\xab",
 773 | 		"\xcf\xac" => "\xcf\xad",
 774 | 		"\xcf\xae" => "\xcf\xaf",
 775 | 		"\xd0\x80" => "\xd1\x90",
 776 | 		"\xd0\x81" => "\xd1\x91",
 777 | 		"\xd0\x82" => "\xd1\x92",
 778 | 		"\xd0\x83" => "\xd1\x93",
 779 | 		"\xd0\x84" => "\xd1\x94",
 780 | 		"\xd0\x85" => "\xd1\x95",
 781 | 		"\xd0\x86" => "\xd1\x96",
 782 | 		"\xd0\x87" => "\xd1\x97",
 783 | 		"\xd0\x88" => "\xd1\x98",
 784 | 		"\xd0\x89" => "\xd1\x99",
 785 | 		"\xd0\x8a" => "\xd1\x9a",
 786 | 		"\xd0\x8b" => "\xd1\x9b",
 787 | 		"\xd0\x8c" => "\xd1\x9c",
 788 | 		"\xd0\x8d" => "\xd1\x9d",
 789 | 		"\xd0\x8e" => "\xd1\x9e",
 790 | 		"\xd0\x8f" => "\xd1\x9f",
 791 | 		"\xd0\x90" => "\xd0\xb0",
 792 | 		"\xd0\x91" => "\xd0\xb1",
 793 | 		"\xd0\x92" => "\xd0\xb2",
 794 | 		"\xd0\x93" => "\xd0\xb3",
 795 | 		"\xd0\x94" => "\xd0\xb4",
 796 | 		"\xd0\x95" => "\xd0\xb5",
 797 | 		"\xd0\x96" => "\xd0\xb6",
 798 | 		"\xd0\x97" => "\xd0\xb7",
 799 | 		"\xd0\x98" => "\xd0\xb8",
 800 | 		"\xd0\x99" => "\xd0\xb9",
 801 | 		"\xd0\x9a" => "\xd0\xba",
 802 | 		"\xd0\x9b" => "\xd0\xbb",
 803 | 		"\xd0\x9c" => "\xd0\xbc",
 804 | 		"\xd0\x9d" => "\xd0\xbd",
 805 | 		"\xd0\x9e" => "\xd0\xbe",
 806 | 		"\xd0\x9f" => "\xd0\xbf",
 807 | 		"\xd0\xa0" => "\xd1\x80",
 808 | 		"\xd0\xa1" => "\xd1\x81",
 809 | 		"\xd0\xa2" => "\xd1\x82",
 810 | 		"\xd0\xa3" => "\xd1\x83",
 811 | 		"\xd0\xa4" => "\xd1\x84",
 812 | 		"\xd0\xa5" => "\xd1\x85",
 813 | 		"\xd0\xa6" => "\xd1\x86",
 814 | 		"\xd0\xa7" => "\xd1\x87",
 815 | 		"\xd0\xa8" => "\xd1\x88",
 816 | 		"\xd0\xa9" => "\xd1\x89",
 817 | 		"\xd0\xaa" => "\xd1\x8a",
 818 | 		"\xd0\xab" => "\xd1\x8b",
 819 | 		"\xd0\xac" => "\xd1\x8c",
 820 | 		"\xd0\xad" => "\xd1\x8d",
 821 | 		"\xd0\xae" => "\xd1\x8e",
 822 | 		"\xd0\xaf" => "\xd1\x8f",
 823 | 		"\xd1\xa0" => "\xd1\xa1",
 824 | 		"\xd1\xa2" => "\xd1\xa3",
 825 | 		"\xd1\xa4" => "\xd1\xa5",
 826 | 		"\xd1\xa6" => "\xd1\xa7",
 827 | 		"\xd1\xa8" => "\xd1\xa9",
 828 | 		"\xd1\xaa" => "\xd1\xab",
 829 | 		"\xd1\xac" => "\xd1\xad",
 830 | 		"\xd1\xae" => "\xd1\xaf",
 831 | 		"\xd1\xb0" => "\xd1\xb1",
 832 | 		"\xd1\xb2" => "\xd1\xb3",
 833 | 		"\xd1\xb4" => "\xd1\xb5",
 834 | 		"\xd1\xb6" => "\xd1\xb7",
 835 | 		"\xd1\xb8" => "\xd1\xb9",
 836 | 		"\xd1\xba" => "\xd1\xbb",
 837 | 		"\xd1\xbc" => "\xd1\xbd",
 838 | 		"\xd1\xbe" => "\xd1\xbf",
 839 | 		"\xd2\x80" => "\xd2\x81",
 840 | 		"\xd2\x8a" => "\xd2\x8b",
 841 | 		"\xd2\x8c" => "\xd2\x8d",
 842 | 		"\xd2\x8e" => "\xd2\x8f",
 843 | 		"\xd2\x90" => "\xd2\x91",
 844 | 		"\xd2\x92" => "\xd2\x93",
 845 | 		"\xd2\x94" => "\xd2\x95",
 846 | 		"\xd2\x96" => "\xd2\x97",
 847 | 		"\xd2\x98" => "\xd2\x99",
 848 | 		"\xd2\x9a" => "\xd2\x9b",
 849 | 		"\xd2\x9c" => "\xd2\x9d",
 850 | 		"\xd2\x9e" => "\xd2\x9f",
 851 | 		"\xd2\xa0" => "\xd2\xa1",
 852 | 		"\xd2\xa2" => "\xd2\xa3",
 853 | 		"\xd2\xa4" => "\xd2\xa5",
 854 | 		"\xd2\xa6" => "\xd2\xa7",
 855 | 		"\xd2\xa8" => "\xd2\xa9",
 856 | 		"\xd2\xaa" => "\xd2\xab",
 857 | 		"\xd2\xac" => "\xd2\xad",
 858 | 		"\xd2\xae" => "\xd2\xaf",
 859 | 		"\xd2\xb0" => "\xd2\xb1",
 860 | 		"\xd2\xb2" => "\xd2\xb3",
 861 | 		"\xd2\xb4" => "\xd2\xb5",
 862 | 		"\xd2\xb6" => "\xd2\xb7",
 863 | 		"\xd2\xb8" => "\xd2\xb9",
 864 | 		"\xd2\xba" => "\xd2\xbb",
 865 | 		"\xd2\xbc" => "\xd2\xbd",
 866 | 		"\xd2\xbe" => "\xd2\xbf",
 867 | 		"\xd3\x81" => "\xd3\x82",
 868 | 		"\xd3\x83" => "\xd3\x84",
 869 | 		"\xd3\x85" => "\xd3\x86",
 870 | 		"\xd3\x87" => "\xd3\x88",
 871 | 		"\xd3\x89" => "\xd3\x8a",
 872 | 		"\xd3\x8b" => "\xd3\x8c",
 873 | 		"\xd3\x8d" => "\xd3\x8e",
 874 | 		"\xd3\x90" => "\xd3\x91",
 875 | 		"\xd3\x92" => "\xd3\x93",
 876 | 		"\xd3\x94" => "\xd3\x95",
 877 | 		"\xd3\x96" => "\xd3\x97",
 878 | 		"\xd3\x98" => "\xd3\x99",
 879 | 		"\xd3\x9a" => "\xd3\x9b",
 880 | 		"\xd3\x9c" => "\xd3\x9d",
 881 | 		"\xd3\x9e" => "\xd3\x9f",
 882 | 		"\xd3\xa0" => "\xd3\xa1",
 883 | 		"\xd3\xa2" => "\xd3\xa3",
 884 | 		"\xd3\xa4" => "\xd3\xa5",
 885 | 		"\xd3\xa6" => "\xd3\xa7",
 886 | 		"\xd3\xa8" => "\xd3\xa9",
 887 | 		"\xd3\xaa" => "\xd3\xab",
 888 | 		"\xd3\xac" => "\xd3\xad",
 889 | 		"\xd3\xae" => "\xd3\xaf",
 890 | 		"\xd3\xb0" => "\xd3\xb1",
 891 | 		"\xd3\xb2" => "\xd3\xb3",
 892 | 		"\xd3\xb4" => "\xd3\xb5",
 893 | 		"\xd3\xb8" => "\xd3\xb9",
 894 | 		"\xd4\x80" => "\xd4\x81",
 895 | 		"\xd4\x82" => "\xd4\x83",
 896 | 		"\xd4\x84" => "\xd4\x85",
 897 | 		"\xd4\x86" => "\xd4\x87",
 898 | 		"\xd4\x88" => "\xd4\x89",
 899 | 		"\xd4\x8a" => "\xd4\x8b",
 900 | 		"\xd4\x8c" => "\xd4\x8d",
 901 | 		"\xd4\x8e" => "\xd4\x8f",
 902 | 		"\xd4\xb1" => "\xd5\xa1",
 903 | 		"\xd4\xb2" => "\xd5\xa2",
 904 | 		"\xd4\xb3" => "\xd5\xa3",
 905 | 		"\xd4\xb4" => "\xd5\xa4",
 906 | 		"\xd4\xb5" => "\xd5\xa5",
 907 | 		"\xd4\xb6" => "\xd5\xa6",
 908 | 		"\xd4\xb7" => "\xd5\xa7",
 909 | 		"\xd4\xb8" => "\xd5\xa8",
 910 | 		"\xd4\xb9" => "\xd5\xa9",
 911 | 		"\xd4\xba" => "\xd5\xaa",
 912 | 		"\xd4\xbb" => "\xd5\xab",
 913 | 		"\xd4\xbc" => "\xd5\xac",
 914 | 		"\xd4\xbd" => "\xd5\xad",
 915 | 		"\xd4\xbe" => "\xd5\xae",
 916 | 		"\xd4\xbf" => "\xd5\xaf",
 917 | 		"\xd5\x80" => "\xd5\xb0",
 918 | 		"\xd5\x81" => "\xd5\xb1",
 919 | 		"\xd5\x82" => "\xd5\xb2",
 920 | 		"\xd5\x83" => "\xd5\xb3",
 921 | 		"\xd5\x84" => "\xd5\xb4",
 922 | 		"\xd5\x85" => "\xd5\xb5",
 923 | 		"\xd5\x86" => "\xd5\xb6",
 924 | 		"\xd5\x87" => "\xd5\xb7",
 925 | 		"\xd5\x88" => "\xd5\xb8",
 926 | 		"\xd5\x89" => "\xd5\xb9",
 927 | 		"\xd5\x8a" => "\xd5\xba",
 928 | 		"\xd5\x8b" => "\xd5\xbb",
 929 | 		"\xd5\x8c" => "\xd5\xbc",
 930 | 		"\xd5\x8d" => "\xd5\xbd",
 931 | 		"\xd5\x8e" => "\xd5\xbe",
 932 | 		"\xd5\x8f" => "\xd5\xbf",
 933 | 		"\xd5\x90" => "\xd6\x80",
 934 | 		"\xd5\x91" => "\xd6\x81",
 935 | 		"\xd5\x92" => "\xd6\x82",
 936 | 		"\xd5\x93" => "\xd6\x83",
 937 | 		"\xd5\x94" => "\xd6\x84",
 938 | 		"\xd5\x95" => "\xd6\x85",
 939 | 		"\xd5\x96" => "\xd6\x86",
 940 | 		"\xe1\xb8\x80" => "\xe1\xb8\x81",
 941 | 		"\xe1\xb8\x82" => "\xe1\xb8\x83",
 942 | 		"\xe1\xb8\x84" => "\xe1\xb8\x85",
 943 | 		"\xe1\xb8\x86" => "\xe1\xb8\x87",
 944 | 		"\xe1\xb8\x88" => "\xe1\xb8\x89",
 945 | 		"\xe1\xb8\x8a" => "\xe1\xb8\x8b",
 946 | 		"\xe1\xb8\x8c" => "\xe1\xb8\x8d",
 947 | 		"\xe1\xb8\x8e" => "\xe1\xb8\x8f",
 948 | 		"\xe1\xb8\x90" => "\xe1\xb8\x91",
 949 | 		"\xe1\xb8\x92" => "\xe1\xb8\x93",
 950 | 		"\xe1\xb8\x94" => "\xe1\xb8\x95",
 951 | 		"\xe1\xb8\x96" => "\xe1\xb8\x97",
 952 | 		"\xe1\xb8\x98" => "\xe1\xb8\x99",
 953 | 		"\xe1\xb8\x9a" => "\xe1\xb8\x9b",
 954 | 		"\xe1\xb8\x9c" => "\xe1\xb8\x9d",
 955 | 		"\xe1\xb8\x9e" => "\xe1\xb8\x9f",
 956 | 		"\xe1\xb8\xa0" => "\xe1\xb8\xa1",
 957 | 		"\xe1\xb8\xa2" => "\xe1\xb8\xa3",
 958 | 		"\xe1\xb8\xa4" => "\xe1\xb8\xa5",
 959 | 		"\xe1\xb8\xa6" => "\xe1\xb8\xa7",
 960 | 		"\xe1\xb8\xa8" => "\xe1\xb8\xa9",
 961 | 		"\xe1\xb8\xaa" => "\xe1\xb8\xab",
 962 | 		"\xe1\xb8\xac" => "\xe1\xb8\xad",
 963 | 		"\xe1\xb8\xae" => "\xe1\xb8\xaf",
 964 | 		"\xe1\xb8\xb0" => "\xe1\xb8\xb1",
 965 | 		"\xe1\xb8\xb2" => "\xe1\xb8\xb3",
 966 | 		"\xe1\xb8\xb4" => "\xe1\xb8\xb5",
 967 | 		"\xe1\xb8\xb6" => "\xe1\xb8\xb7",
 968 | 		"\xe1\xb8\xb8" => "\xe1\xb8\xb9",
 969 | 		"\xe1\xb8\xba" => "\xe1\xb8\xbb",
 970 | 		"\xe1\xb8\xbc" => "\xe1\xb8\xbd",
 971 | 		"\xe1\xb8\xbe" => "\xe1\xb8\xbf",
 972 | 		"\xe1\xb9\x80" => "\xe1\xb9\x81",
 973 | 		"\xe1\xb9\x82" => "\xe1\xb9\x83",
 974 | 		"\xe1\xb9\x84" => "\xe1\xb9\x85",
 975 | 		"\xe1\xb9\x86" => "\xe1\xb9\x87",
 976 | 		"\xe1\xb9\x88" => "\xe1\xb9\x89",
 977 | 		"\xe1\xb9\x8a" => "\xe1\xb9\x8b",
 978 | 		"\xe1\xb9\x8c" => "\xe1\xb9\x8d",
 979 | 		"\xe1\xb9\x8e" => "\xe1\xb9\x8f",
 980 | 		"\xe1\xb9\x90" => "\xe1\xb9\x91",
 981 | 		"\xe1\xb9\x92" => "\xe1\xb9\x93",
 982 | 		"\xe1\xb9\x94" => "\xe1\xb9\x95",
 983 | 		"\xe1\xb9\x96" => "\xe1\xb9\x97",
 984 | 		"\xe1\xb9\x98" => "\xe1\xb9\x99",
 985 | 		"\xe1\xb9\x9a" => "\xe1\xb9\x9b",
 986 | 		"\xe1\xb9\x9c" => "\xe1\xb9\x9d",
 987 | 		"\xe1\xb9\x9e" => "\xe1\xb9\x9f",
 988 | 		"\xe1\xb9\xa0" => "\xe1\xb9\xa1",
 989 | 		"\xe1\xb9\xa2" => "\xe1\xb9\xa3",
 990 | 		"\xe1\xb9\xa4" => "\xe1\xb9\xa5",
 991 | 		"\xe1\xb9\xa6" => "\xe1\xb9\xa7",
 992 | 		"\xe1\xb9\xa8" => "\xe1\xb9\xa9",
 993 | 		"\xe1\xb9\xaa" => "\xe1\xb9\xab",
 994 | 		"\xe1\xb9\xac" => "\xe1\xb9\xad",
 995 | 		"\xe1\xb9\xae" => "\xe1\xb9\xaf",
 996 | 		"\xe1\xb9\xb0" => "\xe1\xb9\xb1",
 997 | 		"\xe1\xb9\xb2" => "\xe1\xb9\xb3",
 998 | 		"\xe1\xb9\xb4" => "\xe1\xb9\xb5",
 999 | 		"\xe1\xb9\xb6" => "\xe1\xb9\xb7",
1000 | 		"\xe1\xb9\xb8" => "\xe1\xb9\xb9",
1001 | 		"\xe1\xb9\xba" => "\xe1\xb9\xbb",
1002 | 		"\xe1\xb9\xbc" => "\xe1\xb9\xbd",
1003 | 		"\xe1\xb9\xbe" => "\xe1\xb9\xbf",
1004 | 		"\xe1\xba\x80" => "\xe1\xba\x81",
1005 | 		"\xe1\xba\x82" => "\xe1\xba\x83",
1006 | 		"\xe1\xba\x84" => "\xe1\xba\x85",
1007 | 		"\xe1\xba\x86" => "\xe1\xba\x87",
1008 | 		"\xe1\xba\x88" => "\xe1\xba\x89",
1009 | 		"\xe1\xba\x8a" => "\xe1\xba\x8b",
1010 | 		"\xe1\xba\x8c" => "\xe1\xba\x8d",
1011 | 		"\xe1\xba\x8e" => "\xe1\xba\x8f",
1012 | 		"\xe1\xba\x90" => "\xe1\xba\x91",
1013 | 		"\xe1\xba\x92" => "\xe1\xba\x93",
1014 | 		"\xe1\xba\x94" => "\xe1\xba\x95",
1015 | 		"\xe1\xba\xa0" => "\xe1\xba\xa1",
1016 | 		"\xe1\xba\xa2" => "\xe1\xba\xa3",
1017 | 		"\xe1\xba\xa4" => "\xe1\xba\xa5",
1018 | 		"\xe1\xba\xa6" => "\xe1\xba\xa7",
1019 | 		"\xe1\xba\xa8" => "\xe1\xba\xa9",
1020 | 		"\xe1\xba\xaa" => "\xe1\xba\xab",
1021 | 		"\xe1\xba\xac" => "\xe1\xba\xad",
1022 | 		"\xe1\xba\xae" => "\xe1\xba\xaf",
1023 | 		"\xe1\xba\xb0" => "\xe1\xba\xb1",
1024 | 		"\xe1\xba\xb2" => "\xe1\xba\xb3",
1025 | 		"\xe1\xba\xb4" => "\xe1\xba\xb5",
1026 | 		"\xe1\xba\xb6" => "\xe1\xba\xb7",
1027 | 		"\xe1\xba\xb8" => "\xe1\xba\xb9",
1028 | 		"\xe1\xba\xba" => "\xe1\xba\xbb",
1029 | 		"\xe1\xba\xbc" => "\xe1\xba\xbd",
1030 | 		"\xe1\xba\xbe" => "\xe1\xba\xbf",
1031 | 		"\xe1\xbb\x80" => "\xe1\xbb\x81",
1032 | 		"\xe1\xbb\x82" => "\xe1\xbb\x83",
1033 | 		"\xe1\xbb\x84" => "\xe1\xbb\x85",
1034 | 		"\xe1\xbb\x86" => "\xe1\xbb\x87",
1035 | 		"\xe1\xbb\x88" => "\xe1\xbb\x89",
1036 | 		"\xe1\xbb\x8a" => "\xe1\xbb\x8b",
1037 | 		"\xe1\xbb\x8c" => "\xe1\xbb\x8d",
1038 | 		"\xe1\xbb\x8e" => "\xe1\xbb\x8f",
1039 | 		"\xe1\xbb\x90" => "\xe1\xbb\x91",
1040 | 		"\xe1\xbb\x92" => "\xe1\xbb\x93",
1041 | 		"\xe1\xbb\x94" => "\xe1\xbb\x95",
1042 | 		"\xe1\xbb\x96" => "\xe1\xbb\x97",
1043 | 		"\xe1\xbb\x98" => "\xe1\xbb\x99",
1044 | 		"\xe1\xbb\x9a" => "\xe1\xbb\x9b",
1045 | 		"\xe1\xbb\x9c" => "\xe1\xbb\x9d",
1046 | 		"\xe1\xbb\x9e" => "\xe1\xbb\x9f",
1047 | 		"\xe1\xbb\xa0" => "\xe1\xbb\xa1",
1048 | 		"\xe1\xbb\xa2" => "\xe1\xbb\xa3",
1049 | 		"\xe1\xbb\xa4" => "\xe1\xbb\xa5",
1050 | 		"\xe1\xbb\xa6" => "\xe1\xbb\xa7",
1051 | 		"\xe1\xbb\xa8" => "\xe1\xbb\xa9",
1052 | 		"\xe1\xbb\xaa" => "\xe1\xbb\xab",
1053 | 		"\xe1\xbb\xac" => "\xe1\xbb\xad",
1054 | 		"\xe1\xbb\xae" => "\xe1\xbb\xaf",
1055 | 		"\xe1\xbb\xb0" => "\xe1\xbb\xb1",
1056 | 		"\xe1\xbb\xb2" => "\xe1\xbb\xb3",
1057 | 		"\xe1\xbb\xb4" => "\xe1\xbb\xb5",
1058 | 		"\xe1\xbb\xb6" => "\xe1\xbb\xb7",
1059 | 		"\xe1\xbb\xb8" => "\xe1\xbb\xb9",
1060 | 		"\xe1\xbc\x88" => "\xe1\xbc\x80",
1061 | 		"\xe1\xbc\x89" => "\xe1\xbc\x81",
1062 | 		"\xe1\xbc\x8a" => "\xe1\xbc\x82",
1063 | 		"\xe1\xbc\x8b" => "\xe1\xbc\x83",
1064 | 		"\xe1\xbc\x8c" => "\xe1\xbc\x84",
1065 | 		"\xe1\xbc\x8d" => "\xe1\xbc\x85",
1066 | 		"\xe1\xbc\x8e" => "\xe1\xbc\x86",
1067 | 		"\xe1\xbc\x8f" => "\xe1\xbc\x87",
1068 | 		"\xe1\xbc\x98" => "\xe1\xbc\x90",
1069 | 		"\xe1\xbc\x99" => "\xe1\xbc\x91",
1070 | 		"\xe1\xbc\x9a" => "\xe1\xbc\x92",
1071 | 		"\xe1\xbc\x9b" => "\xe1\xbc\x93",
1072 | 		"\xe1\xbc\x9c" => "\xe1\xbc\x94",
1073 | 		"\xe1\xbc\x9d" => "\xe1\xbc\x95",
1074 | 		"\xe1\xbc\xa9" => "\xe1\xbc\xa1",
1075 | 		"\xe1\xbc\xaa" => "\xe1\xbc\xa2",
1076 | 		"\xe1\xbc\xab" => "\xe1\xbc\xa3",
1077 | 		"\xe1\xbc\xac" => "\xe1\xbc\xa4",
1078 | 		"\xe1\xbc\xad" => "\xe1\xbc\xa5",
1079 | 		"\xe1\xbc\xae" => "\xe1\xbc\xa6",
1080 | 		"\xe1\xbc\xaf" => "\xe1\xbc\xa7",
1081 | 		"\xe1\xbc\xb8" => "\xe1\xbc\xb0",
1082 | 		"\xe1\xbc\xb9" => "\xe1\xbc\xb1",
1083 | 		"\xe1\xbc\xba" => "\xe1\xbc\xb2",
1084 | 		"\xe1\xbc\xbb" => "\xe1\xbc\xb3",
1085 | 		"\xe1\xbc\xbc" => "\xe1\xbc\xb4",
1086 | 		"\xe1\xbc\xbd" => "\xe1\xbc\xb5",
1087 | 		"\xe1\xbc\xbe" => "\xe1\xbc\xb6",
1088 | 		"\xe1\xbc\xbf" => "\xe1\xbc\xb7",
1089 | 		"\xe1\xbd\x88" => "\xe1\xbd\x80",
1090 | 		"\xe1\xbd\x89" => "\xe1\xbd\x81",
1091 | 		"\xe1\xbd\x8a" => "\xe1\xbd\x82",
1092 | 		"\xe1\xbd\x8b" => "\xe1\xbd\x83",
1093 | 		"\xe1\xbd\x8c" => "\xe1\xbd\x84",
1094 | 		"\xe1\xbd\x8d" => "\xe1\xbd\x85",
1095 | 		"\xe1\xbd\x99" => "\xe1\xbd\x91",
1096 | 		"\xe1\xbd\x9b" => "\xe1\xbd\x93",
1097 | 		"\xe1\xbd\x9d" => "\xe1\xbd\x95",
1098 | 		"\xe1\xbd\x9f" => "\xe1\xbd\x97",
1099 | 		"\xe1\xbd\xa9" => "\xe1\xbd\xa1",
1100 | 		"\xe1\xbd\xaa" => "\xe1\xbd\xa2",
1101 | 		"\xe1\xbd\xab" => "\xe1\xbd\xa3",
1102 | 		"\xe1\xbd\xac" => "\xe1\xbd\xa4",
1103 | 		"\xe1\xbd\xad" => "\xe1\xbd\xa5",
1104 | 		"\xe1\xbd\xae" => "\xe1\xbd\xa6",
1105 | 		"\xe1\xbd\xaf" => "\xe1\xbd\xa7",
1106 | 		"\xe1\xbe\x88" => "\xe1\xbe\x80",
1107 | 		"\xe1\xbe\x89" => "\xe1\xbe\x81",
1108 | 		"\xe1\xbe\x8a" => "\xe1\xbe\x82",
1109 | 		"\xe1\xbe\x8b" => "\xe1\xbe\x83",
1110 | 		"\xe1\xbe\x8c" => "\xe1\xbe\x84",
1111 | 		"\xe1\xbe\x8d" => "\xe1\xbe\x85",
1112 | 		"\xe1\xbe\x8e" => "\xe1\xbe\x86",
1113 | 		"\xe1\xbe\x8f" => "\xe1\xbe\x87",
1114 | 		"\xe1\xbe\x98" => "\xe1\xbe\x90",
1115 | 		"\xe1\xbe\x99" => "\xe1\xbe\x91",
1116 | 		"\xe1\xbe\x9a" => "\xe1\xbe\x92",
1117 | 		"\xe1\xbe\x9b" => "\xe1\xbe\x93",
1118 | 		"\xe1\xbe\x9c" => "\xe1\xbe\x94",
1119 | 		"\xe1\xbe\x9d" => "\xe1\xbe\x95",
1120 | 		"\xe1\xbe\x9e" => "\xe1\xbe\x96",
1121 | 		"\xe1\xbe\x9f" => "\xe1\xbe\x97",
1122 | 		"\xe1\xbe\xa9" => "\xe1\xbe\xa1",
1123 | 		"\xe1\xbe\xaa" => "\xe1\xbe\xa2",
1124 | 		"\xe1\xbe\xab" => "\xe1\xbe\xa3",
1125 | 		"\xe1\xbe\xac" => "\xe1\xbe\xa4",
1126 | 		"\xe1\xbe\xad" => "\xe1\xbe\xa5",
1127 | 		"\xe1\xbe\xae" => "\xe1\xbe\xa6",
1128 | 		"\xe1\xbe\xaf" => "\xe1\xbe\xa7",
1129 | 		"\xe1\xbe\xb8" => "\xe1\xbe\xb0",
1130 | 		"\xe1\xbe\xb9" => "\xe1\xbe\xb1",
1131 | 		"\xe1\xbe\xba" => "\xe1\xbd\xb0",
1132 | 		"\xe1\xbe\xbb" => "\xe1\xbd\xb1",
1133 | 		"\xe1\xbe\xbc" => "\xe1\xbe\xb3",
1134 | 		"\xe1\xbf\x88" => "\xe1\xbd\xb2",
1135 | 		"\xe1\xbf\x89" => "\xe1\xbd\xb3",
1136 | 		"\xe1\xbf\x8a" => "\xe1\xbd\xb4",
1137 | 		"\xe1\xbf\x8b" => "\xe1\xbd\xb5",
1138 | 		"\xe1\xbf\x8c" => "\xe1\xbf\x83",
1139 | 		"\xe1\xbf\x98" => "\xe1\xbf\x90",
1140 | 		"\xe1\xbf\x99" => "\xe1\xbf\x91",
1141 | 		"\xe1\xbf\x9a" => "\xe1\xbd\xb6",
1142 | 		"\xe1\xbf\x9b" => "\xe1\xbd\xb7",
1143 | 		"\xe1\xbf\xa9" => "\xe1\xbf\xa1",
1144 | 		"\xe1\xbf\xaa" => "\xe1\xbd\xba",
1145 | 		"\xe1\xbf\xab" => "\xe1\xbd\xbb",
1146 | 		"\xe1\xbf\xac" => "\xe1\xbf\xa5",
1147 | 		"\xe1\xbf\xb8" => "\xe1\xbd\xb8",
1148 | 		"\xe1\xbf\xb9" => "\xe1\xbd\xb9",
1149 | 		"\xe1\xbf\xba" => "\xe1\xbd\xbc",
1150 | 		"\xe1\xbf\xbb" => "\xe1\xbd\xbd",
1151 | 		"\xe1\xbf\xbc" => "\xe1\xbf\xb3",
1152 | 		"\xef\xbc\xa1" => "\xef\xbd\x81",
1153 | 		"\xef\xbc\xa2" => "\xef\xbd\x82",
1154 | 		"\xef\xbc\xa3" => "\xef\xbd\x83",
1155 | 		"\xef\xbc\xa4" => "\xef\xbd\x84",
1156 | 		"\xef\xbc\xa5" => "\xef\xbd\x85",
1157 | 		"\xef\xbc\xa6" => "\xef\xbd\x86",
1158 | 		"\xef\xbc\xa7" => "\xef\xbd\x87",
1159 | 		"\xef\xbc\xa8" => "\xef\xbd\x88",
1160 | 		"\xef\xbc\xa9" => "\xef\xbd\x89",
1161 | 		"\xef\xbc\xaa" => "\xef\xbd\x8a",
1162 | 		"\xef\xbc\xab" => "\xef\xbd\x8b",
1163 | 		"\xef\xbc\xac" => "\xef\xbd\x8c",
1164 | 		"\xef\xbc\xad" => "\xef\xbd\x8d",
1165 | 		"\xef\xbc\xae" => "\xef\xbd\x8e",
1166 | 		"\xef\xbc\xaf" => "\xef\xbd\x8f",
1167 | 		"\xef\xbc\xb0" => "\xef\xbd\x90",
1168 | 		"\xef\xbc\xb1" => "\xef\xbd\x91",
1169 | 		"\xef\xbc\xb2" => "\xef\xbd\x92",
1170 | 		"\xef\xbc\xb3" => "\xef\xbd\x93",
1171 | 		"\xef\xbc\xb4" => "\xef\xbd\x94",
1172 | 		"\xef\xbc\xb5" => "\xef\xbd\x95",
1173 | 		"\xef\xbc\xb6" => "\xef\xbd\x96",
1174 | 		"\xef\xbc\xb7" => "\xef\xbd\x97",
1175 | 		"\xef\xbc\xb8" => "\xef\xbd\x98",
1176 | 		"\xef\xbc\xb9" => "\xef\xbd\x99",
1177 | 		"\xef\xbc\xba" => "\xef\xbd\x9a",
1178 | 	);
1179 | 
1180 | 	#Unicode Character Database 6.0.0 (2010-06-04)
1181 | 	#autogenerated by unicode_blocks_txt2php() PHP function at 2011-06-04 00:19:39, 209 blocks total
1182 | 	public static $unicode_blocks = array(
1183 | 		'Basic Latin' => array(
1184 | 			0 => 0x0000,
1185 | 			1 => 0x007F,
1186 | 			2 => 0,
1187 | 		),
1188 | 		'Latin-1 Supplement' => array(
1189 | 			0 => 0x0080,
1190 | 			1 => 0x00FF,
1191 | 			2 => 1,
1192 | 		),
1193 | 		'Latin Extended-A' => array(
1194 | 			0 => 0x0100,
1195 | 			1 => 0x017F,
1196 | 			2 => 2,
1197 | 		),
1198 | 		'Latin Extended-B' => array(
1199 | 			0 => 0x0180,
1200 | 			1 => 0x024F,
1201 | 			2 => 3,
1202 | 		),
1203 | 		'IPA Extensions' => array(
1204 | 			0 => 0x0250,
1205 | 			1 => 0x02AF,
1206 | 			2 => 4,
1207 | 		),
1208 | 		'Spacing Modifier Letters' => array(
1209 | 			0 => 0x02B0,
1210 | 			1 => 0x02FF,
1211 | 			2 => 5,
1212 | 		),
1213 | 		'Combining Diacritical Marks' => array(
1214 | 			0 => 0x0300,
1215 | 			1 => 0x036F,
1216 | 			2 => 6,
1217 | 		),
1218 | 		'Greek and Coptic' => array(
1219 | 			0 => 0x0370,
1220 | 			1 => 0x03FF,
1221 | 			2 => 7,
1222 | 		),
1223 | 		'Cyrillic' => array(
1224 | 			0 => 0x0400,
1225 | 			1 => 0x04FF,
1226 | 			2 => 8,
1227 | 		),
1228 | 		'Cyrillic Supplement' => array(
1229 | 			0 => 0x0500,
1230 | 			1 => 0x052F,
1231 | 			2 => 9,
1232 | 		),
1233 | 		'Armenian' => array(
1234 | 			0 => 0x0530,
1235 | 			1 => 0x058F,
1236 | 			2 => 10,
1237 | 		),
1238 | 		'Hebrew' => array(
1239 | 			0 => 0x0590,
1240 | 			1 => 0x05FF,
1241 | 			2 => 11,
1242 | 		),
1243 | 		'Arabic' => array(
1244 | 			0 => 0x0600,
1245 | 			1 => 0x06FF,
1246 | 			2 => 12,
1247 | 		),
1248 | 		'Syriac' => array(
1249 | 			0 => 0x0700,
1250 | 			1 => 0x074F,
1251 | 			2 => 13,
1252 | 		),
1253 | 		'Arabic Supplement' => array(
1254 | 			0 => 0x0750,
1255 | 			1 => 0x077F,
1256 | 			2 => 14,
1257 | 		),
1258 | 		'Thaana' => array(
1259 | 			0 => 0x0780,
1260 | 			1 => 0x07BF,
1261 | 			2 => 15,
1262 | 		),
1263 | 		'NKo' => array(
1264 | 			0 => 0x07C0,
1265 | 			1 => 0x07FF,
1266 | 			2 => 16,
1267 | 		),
1268 | 		'Samaritan' => array(
1269 | 			0 => 0x0800,
1270 | 			1 => 0x083F,
1271 | 			2 => 17,
1272 | 		),
1273 | 		'Mandaic' => array(
1274 | 			0 => 0x0840,
1275 | 			1 => 0x085F,
1276 | 			2 => 18,
1277 | 		),
1278 | 		'Devanagari' => array(
1279 | 			0 => 0x0900,
1280 | 			1 => 0x097F,
1281 | 			2 => 19,
1282 | 		),
1283 | 		'Bengali' => array(
1284 | 			0 => 0x0980,
1285 | 			1 => 0x09FF,
1286 | 			2 => 20,
1287 | 		),
1288 | 		'Gurmukhi' => array(
1289 | 			0 => 0x0A00,
1290 | 			1 => 0x0A7F,
1291 | 			2 => 21,
1292 | 		),
1293 | 		'Gujarati' => array(
1294 | 			0 => 0x0A80,
1295 | 			1 => 0x0AFF,
1296 | 			2 => 22,
1297 | 		),
1298 | 		'Oriya' => array(
1299 | 			0 => 0x0B00,
1300 | 			1 => 0x0B7F,
1301 | 			2 => 23,
1302 | 		),
1303 | 		'Tamil' => array(
1304 | 			0 => 0x0B80,
1305 | 			1 => 0x0BFF,
1306 | 			2 => 24,
1307 | 		),
1308 | 		'Telugu' => array(
1309 | 			0 => 0x0C00,
1310 | 			1 => 0x0C7F,
1311 | 			2 => 25,
1312 | 		),
1313 | 		'Kannada' => array(
1314 | 			0 => 0x0C80,
1315 | 			1 => 0x0CFF,
1316 | 			2 => 26,
1317 | 		),
1318 | 		'Malayalam' => array(
1319 | 			0 => 0x0D00,
1320 | 			1 => 0x0D7F,
1321 | 			2 => 27,
1322 | 		),
1323 | 		'Sinhala' => array(
1324 | 			0 => 0x0D80,
1325 | 			1 => 0x0DFF,
1326 | 			2 => 28,
1327 | 		),
1328 | 		'Thai' => array(
1329 | 			0 => 0x0E00,
1330 | 			1 => 0x0E7F,
1331 | 			2 => 29,
1332 | 		),
1333 | 		'Lao' => array(
1334 | 			0 => 0x0E80,
1335 | 			1 => 0x0EFF,
1336 | 			2 => 30,
1337 | 		),
1338 | 		'Tibetan' => array(
1339 | 			0 => 0x0F00,
1340 | 			1 => 0x0FFF,
1341 | 			2 => 31,
1342 | 		),
1343 | 		'Myanmar' => array(
1344 | 			0 => 0x1000,
1345 | 			1 => 0x109F,
1346 | 			2 => 32,
1347 | 		),
1348 | 		'Georgian' => array(
1349 | 			0 => 0x10A0,
1350 | 			1 => 0x10FF,
1351 | 			2 => 33,
1352 | 		),
1353 | 		'Hangul Jamo' => array(
1354 | 			0 => 0x1100,
1355 | 			1 => 0x11FF,
1356 | 			2 => 34,
1357 | 		),
1358 | 		'Ethiopic' => array(
1359 | 			0 => 0x1200,
1360 | 			1 => 0x137F,
1361 | 			2 => 35,
1362 | 		),
1363 | 		'Ethiopic Supplement' => array(
1364 | 			0 => 0x1380,
1365 | 			1 => 0x139F,
1366 | 			2 => 36,
1367 | 		),
1368 | 		'Cherokee' => array(
1369 | 			0 => 0x13A0,
1370 | 			1 => 0x13FF,
1371 | 			2 => 37,
1372 | 		),
1373 | 		'Unified Canadian Aboriginal Syllabics' => array(
1374 | 			0 => 0x1400,
1375 | 			1 => 0x167F,
1376 | 			2 => 38,
1377 | 		),
1378 | 		'Ogham' => array(
1379 | 			0 => 0x1680,
1380 | 			1 => 0x169F,
1381 | 			2 => 39,
1382 | 		),
1383 | 		'Runic' => array(
1384 | 			0 => 0x16A0,
1385 | 			1 => 0x16FF,
1386 | 			2 => 40,
1387 | 		),
1388 | 		'Tagalog' => array(
1389 | 			0 => 0x1700,
1390 | 			1 => 0x171F,
1391 | 			2 => 41,
1392 | 		),
1393 | 		'Hanunoo' => array(
1394 | 			0 => 0x1720,
1395 | 			1 => 0x173F,
1396 | 			2 => 42,
1397 | 		),
1398 | 		'Buhid' => array(
1399 | 			0 => 0x1740,
1400 | 			1 => 0x175F,
1401 | 			2 => 43,
1402 | 		),
1403 | 		'Tagbanwa' => array(
1404 | 			0 => 0x1760,
1405 | 			1 => 0x177F,
1406 | 			2 => 44,
1407 | 		),
1408 | 		'Khmer' => array(
1409 | 			0 => 0x1780,
1410 | 			1 => 0x17FF,
1411 | 			2 => 45,
1412 | 		),
1413 | 		'Mongolian' => array(
1414 | 			0 => 0x1800,
1415 | 			1 => 0x18AF,
1416 | 			2 => 46,
1417 | 		),
1418 | 		'Unified Canadian Aboriginal Syllabics Extended' => array(
1419 | 			0 => 0x18B0,
1420 | 			1 => 0x18FF,
1421 | 			2 => 47,
1422 | 		),
1423 | 		'Limbu' => array(
1424 | 			0 => 0x1900,
1425 | 			1 => 0x194F,
1426 | 			2 => 48,
1427 | 		),
1428 | 		'Tai Le' => array(
1429 | 			0 => 0x1950,
1430 | 			1 => 0x197F,
1431 | 			2 => 49,
1432 | 		),
1433 | 		'New Tai Lue' => array(
1434 | 			0 => 0x1980,
1435 | 			1 => 0x19DF,
1436 | 			2 => 50,
1437 | 		),
1438 | 		'Khmer Symbols' => array(
1439 | 			0 => 0x19E0,
1440 | 			1 => 0x19FF,
1441 | 			2 => 51,
1442 | 		),
1443 | 		'Buginese' => array(
1444 | 			0 => 0x1A00,
1445 | 			1 => 0x1A1F,
1446 | 			2 => 52,
1447 | 		),
1448 | 		'Tai Tham' => array(
1449 | 			0 => 0x1A20,
1450 | 			1 => 0x1AAF,
1451 | 			2 => 53,
1452 | 		),
1453 | 		'Balinese' => array(
1454 | 			0 => 0x1B00,
1455 | 			1 => 0x1B7F,
1456 | 			2 => 54,
1457 | 		),
1458 | 		'Sundanese' => array(
1459 | 			0 => 0x1B80,
1460 | 			1 => 0x1BBF,
1461 | 			2 => 55,
1462 | 		),
1463 | 		'Batak' => array(
1464 | 			0 => 0x1BC0,
1465 | 			1 => 0x1BFF,
1466 | 			2 => 56,
1467 | 		),
1468 | 		'Lepcha' => array(
1469 | 			0 => 0x1C00,
1470 | 			1 => 0x1C4F,
1471 | 			2 => 57,
1472 | 		),
1473 | 		'Ol Chiki' => array(
1474 | 			0 => 0x1C50,
1475 | 			1 => 0x1C7F,
1476 | 			2 => 58,
1477 | 		),
1478 | 		'Vedic Extensions' => array(
1479 | 			0 => 0x1CD0,
1480 | 			1 => 0x1CFF,
1481 | 			2 => 59,
1482 | 		),
1483 | 		'Phonetic Extensions' => array(
1484 | 			0 => 0x1D00,
1485 | 			1 => 0x1D7F,
1486 | 			2 => 60,
1487 | 		),
1488 | 		'Phonetic Extensions Supplement' => array(
1489 | 			0 => 0x1D80,
1490 | 			1 => 0x1DBF,
1491 | 			2 => 61,
1492 | 		),
1493 | 		'Combining Diacritical Marks Supplement' => array(
1494 | 			0 => 0x1DC0,
1495 | 			1 => 0x1DFF,
1496 | 			2 => 62,
1497 | 		),
1498 | 		'Latin Extended Additional' => array(
1499 | 			0 => 0x1E00,
1500 | 			1 => 0x1EFF,
1501 | 			2 => 63,
1502 | 		),
1503 | 		'Greek Extended' => array(
1504 | 			0 => 0x1F00,
1505 | 			1 => 0x1FFF,
1506 | 			2 => 64,
1507 | 		),
1508 | 		'General Punctuation' => array(
1509 | 			0 => 0x2000,
1510 | 			1 => 0x206F,
1511 | 			2 => 65,
1512 | 		),
1513 | 		'Superscripts and Subscripts' => array(
1514 | 			0 => 0x2070,
1515 | 			1 => 0x209F,
1516 | 			2 => 66,
1517 | 		),
1518 | 		'Currency Symbols' => array(
1519 | 			0 => 0x20A0,
1520 | 			1 => 0x20CF,
1521 | 			2 => 67,
1522 | 		),
1523 | 		'Combining Diacritical Marks for Symbols' => array(
1524 | 			0 => 0x20D0,
1525 | 			1 => 0x20FF,
1526 | 			2 => 68,
1527 | 		),
1528 | 		'Letterlike Symbols' => array(
1529 | 			0 => 0x2100,
1530 | 			1 => 0x214F,
1531 | 			2 => 69,
1532 | 		),
1533 | 		'Number Forms' => array(
1534 | 			0 => 0x2150,
1535 | 			1 => 0x218F,
1536 | 			2 => 70,
1537 | 		),
1538 | 		'Arrows' => array(
1539 | 			0 => 0x2190,
1540 | 			1 => 0x21FF,
1541 | 			2 => 71,
1542 | 		),
1543 | 		'Mathematical Operators' => array(
1544 | 			0 => 0x2200,
1545 | 			1 => 0x22FF,
1546 | 			2 => 72,
1547 | 		),
1548 | 		'Miscellaneous Technical' => array(
1549 | 			0 => 0x2300,
1550 | 			1 => 0x23FF,
1551 | 			2 => 73,
1552 | 		),
1553 | 		'Control Pictures' => array(
1554 | 			0 => 0x2400,
1555 | 			1 => 0x243F,
1556 | 			2 => 74,
1557 | 		),
1558 | 		'Optical Character Recognition' => array(
1559 | 			0 => 0x2440,
1560 | 			1 => 0x245F,
1561 | 			2 => 75,
1562 | 		),
1563 | 		'Enclosed Alphanumerics' => array(
1564 | 			0 => 0x2460,
1565 | 			1 => 0x24FF,
1566 | 			2 => 76,
1567 | 		),
1568 | 		'Box Drawing' => array(
1569 | 			0 => 0x2500,
1570 | 			1 => 0x257F,
1571 | 			2 => 77,
1572 | 		),
1573 | 		'Block Elements' => array(
1574 | 			0 => 0x2580,
1575 | 			1 => 0x259F,
1576 | 			2 => 78,
1577 | 		),
1578 | 		'Geometric Shapes' => array(
1579 | 			0 => 0x25A0,
1580 | 			1 => 0x25FF,
1581 | 			2 => 79,
1582 | 		),
1583 | 		'Miscellaneous Symbols' => array(
1584 | 			0 => 0x2600,
1585 | 			1 => 0x26FF,
1586 | 			2 => 80,
1587 | 		),
1588 | 		'Dingbats' => array(
1589 | 			0 => 0x2700,
1590 | 			1 => 0x27BF,
1591 | 			2 => 81,
1592 | 		),
1593 | 		'Miscellaneous Mathematical Symbols-A' => array(
1594 | 			0 => 0x27C0,
1595 | 			1 => 0x27EF,
1596 | 			2 => 82,
1597 | 		),
1598 | 		'Supplemental Arrows-A' => array(
1599 | 			0 => 0x27F0,
1600 | 			1 => 0x27FF,
1601 | 			2 => 83,
1602 | 		),
1603 | 		'Braille Patterns' => array(
1604 | 			0 => 0x2800,
1605 | 			1 => 0x28FF,
1606 | 			2 => 84,
1607 | 		),
1608 | 		'Supplemental Arrows-B' => array(
1609 | 			0 => 0x2900,
1610 | 			1 => 0x297F,
1611 | 			2 => 85,
1612 | 		),
1613 | 		'Miscellaneous Mathematical Symbols-B' => array(
1614 | 			0 => 0x2980,
1615 | 			1 => 0x29FF,
1616 | 			2 => 86,
1617 | 		),
1618 | 		'Supplemental Mathematical Operators' => array(
1619 | 			0 => 0x2A00,
1620 | 			1 => 0x2AFF,
1621 | 			2 => 87,
1622 | 		),
1623 | 		'Miscellaneous Symbols and Arrows' => array(
1624 | 			0 => 0x2B00,
1625 | 			1 => 0x2BFF,
1626 | 			2 => 88,
1627 | 		),
1628 | 		'Glagolitic' => array(
1629 | 			0 => 0x2C00,
1630 | 			1 => 0x2C5F,
1631 | 			2 => 89,
1632 | 		),
1633 | 		'Latin Extended-C' => array(
1634 | 			0 => 0x2C60,
1635 | 			1 => 0x2C7F,
1636 | 			2 => 90,
1637 | 		),
1638 | 		'Coptic' => array(
1639 | 			0 => 0x2C80,
1640 | 			1 => 0x2CFF,
1641 | 			2 => 91,
1642 | 		),
1643 | 		'Georgian Supplement' => array(
1644 | 			0 => 0x2D00,
1645 | 			1 => 0x2D2F,
1646 | 			2 => 92,
1647 | 		),
1648 | 		'Tifinagh' => array(
1649 | 			0 => 0x2D30,
1650 | 			1 => 0x2D7F,
1651 | 			2 => 93,
1652 | 		),
1653 | 		'Ethiopic Extended' => array(
1654 | 			0 => 0x2D80,
1655 | 			1 => 0x2DDF,
1656 | 			2 => 94,
1657 | 		),
1658 | 		'Cyrillic Extended-A' => array(
1659 | 			0 => 0x2DE0,
1660 | 			1 => 0x2DFF,
1661 | 			2 => 95,
1662 | 		),
1663 | 		'Supplemental Punctuation' => array(
1664 | 			0 => 0x2E00,
1665 | 			1 => 0x2E7F,
1666 | 			2 => 96,
1667 | 		),
1668 | 		'CJK Radicals Supplement' => array(
1669 | 			0 => 0x2E80,
1670 | 			1 => 0x2EFF,
1671 | 			2 => 97,
1672 | 		),
1673 | 		'Kangxi Radicals' => array(
1674 | 			0 => 0x2F00,
1675 | 			1 => 0x2FDF,
1676 | 			2 => 98,
1677 | 		),
1678 | 		'Ideographic Description Characters' => array(
1679 | 			0 => 0x2FF0,
1680 | 			1 => 0x2FFF,
1681 | 			2 => 99,
1682 | 		),
1683 | 		'CJK Symbols and Punctuation' => array(
1684 | 			0 => 0x3000,
1685 | 			1 => 0x303F,
1686 | 			2 => 100,
1687 | 		),
1688 | 		'Hiragana' => array(
1689 | 			0 => 0x3040,
1690 | 			1 => 0x309F,
1691 | 			2 => 101,
1692 | 		),
1693 | 		'Katakana' => array(
1694 | 			0 => 0x30A0,
1695 | 			1 => 0x30FF,
1696 | 			2 => 102,
1697 | 		),
1698 | 		'Bopomofo' => array(
1699 | 			0 => 0x3100,
1700 | 			1 => 0x312F,
1701 | 			2 => 103,
1702 | 		),
1703 | 		'Hangul Compatibility Jamo' => array(
1704 | 			0 => 0x3130,
1705 | 			1 => 0x318F,
1706 | 			2 => 104,
1707 | 		),
1708 | 		'Kanbun' => array(
1709 | 			0 => 0x3190,
1710 | 			1 => 0x319F,
1711 | 			2 => 105,
1712 | 		),
1713 | 		'Bopomofo Extended' => array(
1714 | 			0 => 0x31A0,
1715 | 			1 => 0x31BF,
1716 | 			2 => 106,
1717 | 		),
1718 | 		'CJK Strokes' => array(
1719 | 			0 => 0x31C0,
1720 | 			1 => 0x31EF,
1721 | 			2 => 107,
1722 | 		),
1723 | 		'Katakana Phonetic Extensions' => array(
1724 | 			0 => 0x31F0,
1725 | 			1 => 0x31FF,
1726 | 			2 => 108,
1727 | 		),
1728 | 		'Enclosed CJK Letters and Months' => array(
1729 | 			0 => 0x3200,
1730 | 			1 => 0x32FF,
1731 | 			2 => 109,
1732 | 		),
1733 | 		'CJK Compatibility' => array(
1734 | 			0 => 0x3300,
1735 | 			1 => 0x33FF,
1736 | 			2 => 110,
1737 | 		),
1738 | 		'CJK Unified Ideographs Extension A' => array(
1739 | 			0 => 0x3400,
1740 | 			1 => 0x4DBF,
1741 | 			2 => 111,
1742 | 		),
1743 | 		'Yijing Hexagram Symbols' => array(
1744 | 			0 => 0x4DC0,
1745 | 			1 => 0x4DFF,
1746 | 			2 => 112,
1747 | 		),
1748 | 		'CJK Unified Ideographs' => array(
1749 | 			0 => 0x4E00,
1750 | 			1 => 0x9FFF,
1751 | 			2 => 113,
1752 | 		),
1753 | 		'Yi Syllables' => array(
1754 | 			0 => 0xA000,
1755 | 			1 => 0xA48F,
1756 | 			2 => 114,
1757 | 		),
1758 | 		'Yi Radicals' => array(
1759 | 			0 => 0xA490,
1760 | 			1 => 0xA4CF,
1761 | 			2 => 115,
1762 | 		),
1763 | 		'Lisu' => array(
1764 | 			0 => 0xA4D0,
1765 | 			1 => 0xA4FF,
1766 | 			2 => 116,
1767 | 		),
1768 | 		'Vai' => array(
1769 | 			0 => 0xA500,
1770 | 			1 => 0xA63F,
1771 | 			2 => 117,
1772 | 		),
1773 | 		'Cyrillic Extended-B' => array(
1774 | 			0 => 0xA640,
1775 | 			1 => 0xA69F,
1776 | 			2 => 118,
1777 | 		),
1778 | 		'Bamum' => array(
1779 | 			0 => 0xA6A0,
1780 | 			1 => 0xA6FF,
1781 | 			2 => 119,
1782 | 		),
1783 | 		'Modifier Tone Letters' => array(
1784 | 			0 => 0xA700,
1785 | 			1 => 0xA71F,
1786 | 			2 => 120,
1787 | 		),
1788 | 		'Latin Extended-D' => array(
1789 | 			0 => 0xA720,
1790 | 			1 => 0xA7FF,
1791 | 			2 => 121,
1792 | 		),
1793 | 		'Syloti Nagri' => array(
1794 | 			0 => 0xA800,
1795 | 			1 => 0xA82F,
1796 | 			2 => 122,
1797 | 		),
1798 | 		'Common Indic Number Forms' => array(
1799 | 			0 => 0xA830,
1800 | 			1 => 0xA83F,
1801 | 			2 => 123,
1802 | 		),
1803 | 		'Phags-pa' => array(
1804 | 			0 => 0xA840,
1805 | 			1 => 0xA87F,
1806 | 			2 => 124,
1807 | 		),
1808 | 		'Saurashtra' => array(
1809 | 			0 => 0xA880,
1810 | 			1 => 0xA8DF,
1811 | 			2 => 125,
1812 | 		),
1813 | 		'Devanagari Extended' => array(
1814 | 			0 => 0xA8E0,
1815 | 			1 => 0xA8FF,
1816 | 			2 => 126,
1817 | 		),
1818 | 		'Kayah Li' => array(
1819 | 			0 => 0xA900,
1820 | 			1 => 0xA92F,
1821 | 			2 => 127,
1822 | 		),
1823 | 		'Rejang' => array(
1824 | 			0 => 0xA930,
1825 | 			1 => 0xA95F,
1826 | 			2 => 128,
1827 | 		),
1828 | 		'Hangul Jamo Extended-A' => array(
1829 | 			0 => 0xA960,
1830 | 			1 => 0xA97F,
1831 | 			2 => 129,
1832 | 		),
1833 | 		'Javanese' => array(
1834 | 			0 => 0xA980,
1835 | 			1 => 0xA9DF,
1836 | 			2 => 130,
1837 | 		),
1838 | 		'Cham' => array(
1839 | 			0 => 0xAA00,
1840 | 			1 => 0xAA5F,
1841 | 			2 => 131,
1842 | 		),
1843 | 		'Myanmar Extended-A' => array(
1844 | 			0 => 0xAA60,
1845 | 			1 => 0xAA7F,
1846 | 			2 => 132,
1847 | 		),
1848 | 		'Tai Viet' => array(
1849 | 			0 => 0xAA80,
1850 | 			1 => 0xAADF,
1851 | 			2 => 133,
1852 | 		),
1853 | 		'Ethiopic Extended-A' => array(
1854 | 			0 => 0xAB00,
1855 | 			1 => 0xAB2F,
1856 | 			2 => 134,
1857 | 		),
1858 | 		'Meetei Mayek' => array(
1859 | 			0 => 0xABC0,
1860 | 			1 => 0xABFF,
1861 | 			2 => 135,
1862 | 		),
1863 | 		'Hangul Syllables' => array(
1864 | 			0 => 0xAC00,
1865 | 			1 => 0xD7AF,
1866 | 			2 => 136,
1867 | 		),
1868 | 		'Hangul Jamo Extended-B' => array(
1869 | 			0 => 0xD7B0,
1870 | 			1 => 0xD7FF,
1871 | 			2 => 137,
1872 | 		),
1873 | 		'High Surrogates' => array(
1874 | 			0 => 0xD800,
1875 | 			1 => 0xDB7F,
1876 | 			2 => 138,
1877 | 		),
1878 | 		'High Private Use Surrogates' => array(
1879 | 			0 => 0xDB80,
1880 | 			1 => 0xDBFF,
1881 | 			2 => 139,
1882 | 		),
1883 | 		'Low Surrogates' => array(
1884 | 			0 => 0xDC00,
1885 | 			1 => 0xDFFF,
1886 | 			2 => 140,
1887 | 		),
1888 | 		'Private Use Area' => array(
1889 | 			0 => 0xE000,
1890 | 			1 => 0xF8FF,
1891 | 			2 => 141,
1892 | 		),
1893 | 		'CJK Compatibility Ideographs' => array(
1894 | 			0 => 0xF900,
1895 | 			1 => 0xFAFF,
1896 | 			2 => 142,
1897 | 		),
1898 | 		'Alphabetic Presentation Forms' => array(
1899 | 			0 => 0xFB00,
1900 | 			1 => 0xFB4F,
1901 | 			2 => 143,
1902 | 		),
1903 | 		'Arabic Presentation Forms-A' => array(
1904 | 			0 => 0xFB50,
1905 | 			1 => 0xFDFF,
1906 | 			2 => 144,
1907 | 		),
1908 | 		'Variation Selectors' => array(
1909 | 			0 => 0xFE00,
1910 | 			1 => 0xFE0F,
1911 | 			2 => 145,
1912 | 		),
1913 | 		'Vertical Forms' => array(
1914 | 			0 => 0xFE10,
1915 | 			1 => 0xFE1F,
1916 | 			2 => 146,
1917 | 		),
1918 | 		'Combining Half Marks' => array(
1919 | 			0 => 0xFE20,
1920 | 			1 => 0xFE2F,
1921 | 			2 => 147,
1922 | 		),
1923 | 		'CJK Compatibility Forms' => array(
1924 | 			0 => 0xFE30,
1925 | 			1 => 0xFE4F,
1926 | 			2 => 148,
1927 | 		),
1928 | 		'Small Form Variants' => array(
1929 | 			0 => 0xFE50,
1930 | 			1 => 0xFE6F,
1931 | 			2 => 149,
1932 | 		),
1933 | 		'Arabic Presentation Forms-B' => array(
1934 | 			0 => 0xFE70,
1935 | 			1 => 0xFEFF,
1936 | 			2 => 150,
1937 | 		),
1938 | 		'Halfwidth and Fullwidth Forms' => array(
1939 | 			0 => 0xFF00,
1940 | 			1 => 0xFFEF,
1941 | 			2 => 151,
1942 | 		),
1943 | 		'Specials' => array(
1944 | 			0 => 0xFFF0,
1945 | 			1 => 0xFFFF,
1946 | 			2 => 152,
1947 | 		),
1948 | 		'Linear B Syllabary' => array(
1949 | 			0 => 0x10000,
1950 | 			1 => 0x1007F,
1951 | 			2 => 153,
1952 | 		),
1953 | 		'Linear B Ideograms' => array(
1954 | 			0 => 0x10080,
1955 | 			1 => 0x100FF,
1956 | 			2 => 154,
1957 | 		),
1958 | 		'Aegean Numbers' => array(
1959 | 			0 => 0x10100,
1960 | 			1 => 0x1013F,
1961 | 			2 => 155,
1962 | 		),
1963 | 		'Ancient Greek Numbers' => array(
1964 | 			0 => 0x10140,
1965 | 			1 => 0x1018F,
1966 | 			2 => 156,
1967 | 		),
1968 | 		'Ancient Symbols' => array(
1969 | 			0 => 0x10190,
1970 | 			1 => 0x101CF,
1971 | 			2 => 157,
1972 | 		),
1973 | 		'Phaistos Disc' => array(
1974 | 			0 => 0x101D0,
1975 | 			1 => 0x101FF,
1976 | 			2 => 158,
1977 | 		),
1978 | 		'Lycian' => array(
1979 | 			0 => 0x10280,
1980 | 			1 => 0x1029F,
1981 | 			2 => 159,
1982 | 		),
1983 | 		'Carian' => array(
1984 | 			0 => 0x102A0,
1985 | 			1 => 0x102DF,
1986 | 			2 => 160,
1987 | 		),
1988 | 		'Old Italic' => array(
1989 | 			0 => 0x10300,
1990 | 			1 => 0x1032F,
1991 | 			2 => 161,
1992 | 		),
1993 | 		'Gothic' => array(
1994 | 			0 => 0x10330,
1995 | 			1 => 0x1034F,
1996 | 			2 => 162,
1997 | 		),
1998 | 		'Ugaritic' => array(
1999 | 			0 => 0x10380,
2000 | 			1 => 0x1039F,
2001 | 			2 => 163,
2002 | 		),
2003 | 		'Old Persian' => array(
2004 | 			0 => 0x103A0,
2005 | 			1 => 0x103DF,
2006 | 			2 => 164,
2007 | 		),
2008 | 		'Deseret' => array(
2009 | 			0 => 0x10400,
2010 | 			1 => 0x1044F,
2011 | 			2 => 165,
2012 | 		),
2013 | 		'Shavian' => array(
2014 | 			0 => 0x10450,
2015 | 			1 => 0x1047F,
2016 | 			2 => 166,
2017 | 		),
2018 | 		'Osmanya' => array(
2019 | 			0 => 0x10480,
2020 | 			1 => 0x104AF,
2021 | 			2 => 167,
2022 | 		),
2023 | 		'Cypriot Syllabary' => array(
2024 | 			0 => 0x10800,
2025 | 			1 => 0x1083F,
2026 | 			2 => 168,
2027 | 		),
2028 | 		'Imperial Aramaic' => array(
2029 | 			0 => 0x10840,
2030 | 			1 => 0x1085F,
2031 | 			2 => 169,
2032 | 		),
2033 | 		'Phoenician' => array(
2034 | 			0 => 0x10900,
2035 | 			1 => 0x1091F,
2036 | 			2 => 170,
2037 | 		),
2038 | 		'Lydian' => array(
2039 | 			0 => 0x10920,
2040 | 			1 => 0x1093F,
2041 | 			2 => 171,
2042 | 		),
2043 | 		'Kharoshthi' => array(
2044 | 			0 => 0x10A00,
2045 | 			1 => 0x10A5F,
2046 | 			2 => 172,
2047 | 		),
2048 | 		'Old South Arabian' => array(
2049 | 			0 => 0x10A60,
2050 | 			1 => 0x10A7F,
2051 | 			2 => 173,
2052 | 		),
2053 | 		'Avestan' => array(
2054 | 			0 => 0x10B00,
2055 | 			1 => 0x10B3F,
2056 | 			2 => 174,
2057 | 		),
2058 | 		'Inscriptional Parthian' => array(
2059 | 			0 => 0x10B40,
2060 | 			1 => 0x10B5F,
2061 | 			2 => 175,
2062 | 		),
2063 | 		'Inscriptional Pahlavi' => array(
2064 | 			0 => 0x10B60,
2065 | 			1 => 0x10B7F,
2066 | 			2 => 176,
2067 | 		),
2068 | 		'Old Turkic' => array(
2069 | 			0 => 0x10C00,
2070 | 			1 => 0x10C4F,
2071 | 			2 => 177,
2072 | 		),
2073 | 		'Rumi Numeral Symbols' => array(
2074 | 			0 => 0x10E60,
2075 | 			1 => 0x10E7F,
2076 | 			2 => 178,
2077 | 		),
2078 | 		'Brahmi' => array(
2079 | 			0 => 0x11000,
2080 | 			1 => 0x1107F,
2081 | 			2 => 179,
2082 | 		),
2083 | 		'Kaithi' => array(
2084 | 			0 => 0x11080,
2085 | 			1 => 0x110CF,
2086 | 			2 => 180,
2087 | 		),
2088 | 		'Cuneiform' => array(
2089 | 			0 => 0x12000,
2090 | 			1 => 0x123FF,
2091 | 			2 => 181,
2092 | 		),
2093 | 		'Cuneiform Numbers and Punctuation' => array(
2094 | 			0 => 0x12400,
2095 | 			1 => 0x1247F,
2096 | 			2 => 182,
2097 | 		),
2098 | 		'Egyptian Hieroglyphs' => array(
2099 | 			0 => 0x13000,
2100 | 			1 => 0x1342F,
2101 | 			2 => 183,
2102 | 		),
2103 | 		'Bamum Supplement' => array(
2104 | 			0 => 0x16800,
2105 | 			1 => 0x16A3F,
2106 | 			2 => 184,
2107 | 		),
2108 | 		'Kana Supplement' => array(
2109 | 			0 => 0x1B000,
2110 | 			1 => 0x1B0FF,
2111 | 			2 => 185,
2112 | 		),
2113 | 		'Byzantine Musical Symbols' => array(
2114 | 			0 => 0x1D000,
2115 | 			1 => 0x1D0FF,
2116 | 			2 => 186,
2117 | 		),
2118 | 		'Musical Symbols' => array(
2119 | 			0 => 0x1D100,
2120 | 			1 => 0x1D1FF,
2121 | 			2 => 187,
2122 | 		),
2123 | 		'Ancient Greek Musical Notation' => array(
2124 | 			0 => 0x1D200,
2125 | 			1 => 0x1D24F,
2126 | 			2 => 188,
2127 | 		),
2128 | 		'Tai Xuan Jing Symbols' => array(
2129 | 			0 => 0x1D300,
2130 | 			1 => 0x1D35F,
2131 | 			2 => 189,
2132 | 		),
2133 | 		'Counting Rod Numerals' => array(
2134 | 			0 => 0x1D360,
2135 | 			1 => 0x1D37F,
2136 | 			2 => 190,
2137 | 		),
2138 | 		'Mathematical Alphanumeric Symbols' => array(
2139 | 			0 => 0x1D400,
2140 | 			1 => 0x1D7FF,
2141 | 			2 => 191,
2142 | 		),
2143 | 		'Mahjong Tiles' => array(
2144 | 			0 => 0x1F000,
2145 | 			1 => 0x1F02F,
2146 | 			2 => 192,
2147 | 		),
2148 | 		'Domino Tiles' => array(
2149 | 			0 => 0x1F030,
2150 | 			1 => 0x1F09F,
2151 | 			2 => 193,
2152 | 		),
2153 | 		'Playing Cards' => array(
2154 | 			0 => 0x1F0A0,
2155 | 			1 => 0x1F0FF,
2156 | 			2 => 194,
2157 | 		),
2158 | 		'Enclosed Alphanumeric Supplement' => array(
2159 | 			0 => 0x1F100,
2160 | 			1 => 0x1F1FF,
2161 | 			2 => 195,
2162 | 		),
2163 | 		'Enclosed Ideographic Supplement' => array(
2164 | 			0 => 0x1F200,
2165 | 			1 => 0x1F2FF,
2166 | 			2 => 196,
2167 | 		),
2168 | 		'Miscellaneous Symbols And Pictographs' => array(
2169 | 			0 => 0x1F300,
2170 | 			1 => 0x1F5FF,
2171 | 			2 => 197,
2172 | 		),
2173 | 		'Emoticons' => array(
2174 | 			0 => 0x1F600,
2175 | 			1 => 0x1F64F,
2176 | 			2 => 198,
2177 | 		),
2178 | 		'Transport And Map Symbols' => array(
2179 | 			0 => 0x1F680,
2180 | 			1 => 0x1F6FF,
2181 | 			2 => 199,
2182 | 		),
2183 | 		'Alchemical Symbols' => array(
2184 | 			0 => 0x1F700,
2185 | 			1 => 0x1F77F,
2186 | 			2 => 200,
2187 | 		),
2188 | 		'CJK Unified Ideographs Extension B' => array(
2189 | 			0 => 0x20000,
2190 | 			1 => 0x2A6DF,
2191 | 			2 => 201,
2192 | 		),
2193 | 		'CJK Unified Ideographs Extension C' => array(
2194 | 			0 => 0x2A700,
2195 | 			1 => 0x2B73F,
2196 | 			2 => 202,
2197 | 		),
2198 | 		'CJK Unified Ideographs Extension D' => array(
2199 | 			0 => 0x2B740,
2200 | 			1 => 0x2B81F,
2201 | 			2 => 203,
2202 | 		),
2203 | 		'CJK Compatibility Ideographs Supplement' => array(
2204 | 			0 => 0x2F800,
2205 | 			1 => 0x2FA1F,
2206 | 			2 => 204,
2207 | 		),
2208 | 		'Tags' => array(
2209 | 			0 => 0xE0000,
2210 | 			1 => 0xE007F,
2211 | 			2 => 205,
2212 | 		),
2213 | 		'Variation Selectors Supplement' => array(
2214 | 			0 => 0xE0100,
2215 | 			1 => 0xE01EF,
2216 | 			2 => 206,
2217 | 		),
2218 | 		'Supplementary Private Use Area-A' => array(
2219 | 			0 => 0xF0000,
2220 | 			1 => 0xFFFFF,
2221 | 			2 => 207,
2222 | 		),
2223 | 		'Supplementary Private Use Area-B' => array(
2224 | 			0 => 0x100000,
2225 | 			1 => 0x10FFFF,
2226 | 			2 => 208,
2227 | 		),
2228 | 	);
2229 | 
2230 | 	#calling the methods of this class only statically!
2231 | 	private function __construct() {}
2232 | 
2233 | 	/**
2234 | 	 * Remove combining diactrical marks, with possibility of the restore
2235 | 	 * Удаляет диакритические знаки в тексте, с возможностью восстановления (опция)
2236 | 	 *
2237 | 	 * @param   string|null       $s
2238 | 	 * @param   array|null        $additional_chars   for example: "\xc2\xad"  #soft hyphen = discretionary hyphen
2239 | 	 * @param   bool              $is_can_restored
2240 | 	 * @param   array|null        &$restore_table
2241 | 	 * @return  string|bool|null  Returns FALSE if error occurred
2242 | 	 */
2243 | 	public static function diactrical_remove($s, $additional_chars = null, $is_can_restored = false, &$restore_table = null)
2244 | 	{
2245 | 		if (! ReflectionTypeHint::isValid()) return false;
2246 | 		if (is_null($s)) return $s;
2247 | 
2248 | 		if ($additional_chars)
2249 | 		{
2250 | 			foreach ($additional_chars as $k => &$v) $v = preg_quote($v, '/');
2251 | 			$re = '/((?>' . self::$diactrical_re . '|' . implode('|', $additional_chars) . ')+)/sxSX';
2252 | 		}
2253 | 		else $re = '/((?>' . self::$diactrical_re . ')+)/sxSX';
2254 | 		if (! $is_can_restored) return preg_replace($re, '', $s);
2255 | 
2256 | 		$restore_table = array();
2257 | 		$a = preg_split($re, $s, -1, PREG_SPLIT_DELIM_CAPTURE);
2258 | 		$c = count($a);
2259 | 		if ($c === 1) return $s;
2260 | 		$pos = 0;
2261 | 		$s2 = '';
2262 | 		for ($i = 0; $i < $c - 1; $i += 2)
2263 | 		{
2264 | 			$s2 .= $a[$i];
2265 | 			#запоминаем символьные (не байтовые!) позиции
2266 | 			$pos += self::strlen($a[$i]);
2267 | 			$restore_table['offsets'][$pos] = $a[$i + 1];
2268 | 		}
2269 | 		$restore_table['length'] = $pos + self::strlen(end($a));
2270 | 		return $s2 . end($a);
2271 | 	}
2272 | 
2273 | 	/**
2274 | 	 * Restore combining diactrical marks, removed by self::diactrical_remove()
2275 | 	 * In Russian:
2276 | 	 * Восстанавливает диакритические знаки в тексте, при условии, что их символьные позиции и кол-во символов не изменились!
2277 | 	 *
2278 | 	 * @see     self::diactrical_remove()
2279 | 	 * @param   string|null       $s
2280 | 	 * @param   array             $restore_table
2281 | 	 * @return  string|bool|null  Returns FALSE if error occurred (broken $restore_table)
2282 | 	 */
2283 | 	public static function diactrical_restore($s, array $restore_table)
2284 | 	{
2285 | 		if (! ReflectionTypeHint::isValid()) return false;
2286 | 		if (is_null($s)) return $s;
2287 | 
2288 | 		if (! $restore_table) return $s;
2289 | 		if (! is_int(@$restore_table['length']) ||
2290 | 			! is_array(@$restore_table['offsets']) ||
2291 | 			$restore_table['length'] !== self::strlen($s)) return false;
2292 | 		$a = array();
2293 | 		$length = $offset = 0;
2294 | 		$s2 = '';
2295 | 		foreach ($restore_table['offsets'] as $pos => $diactricals)
2296 | 		{
2297 | 			$length = $pos - $offset;
2298 | 			$s2 .= self::substr($s, $offset, $length) . $diactricals;
2299 | 			$offset = $pos;
2300 | 		}
2301 | 		return $s2 . self::substr($s, $offset, strlen($s));
2302 | 	}
2303 | 
2304 | 	/**
2305 | 	 * Encodes data from another character encoding to UTF-8.
2306 | 	 *
2307 | 	 * @param   array|scalar|null  $data
2308 | 	 * @param   string             $charset
2309 | 	 * @return  array|scalar|null  Returns FALSE if error occurred
2310 | 	 */
2311 | 	public static function convert_from($data, $charset = 'cp1251')
2312 | 	{
2313 | 		if (! ReflectionTypeHint::isValid()) return false;
2314 | 		return self::_convert($data, $charset, 'UTF-8');
2315 | 	}
2316 | 
2317 | 	/**
2318 | 	 * Encodes data from UTF-8 to another character encoding.
2319 | 	 *
2320 | 	 * @param   array|scalar|null  $data
2321 | 	 * @param   string             $charset
2322 | 	 * @return  array|scalar|null  Returns FALSE if error occurred
2323 | 	 */
2324 | 	public static function convert_to($data, $charset = 'cp1251')
2325 | 	{
2326 | 		if (! ReflectionTypeHint::isValid()) return false;
2327 | 		return self::_convert($data, 'UTF-8', $charset);
2328 | 	}
2329 | 
2330 | 	/**
2331 | 	 * Recoding the data of any structure to/from UTF-8.
2332 | 	 * Arrays traversed recursively, recoded keys and values.
2333 | 	 *
2334 | 	 * @see mb_encoding_aliases()
2335 | 	 * @param   array|scalar|null  $data
2336 | 	 * @param   string             $charset_from
2337 | 	 * @param   string             $charset_to
2338 | 	 * @return  array|scalar|null  Returns FALSE if error occurred
2339 | 	 */
2340 | 	private static function _convert($data, $charset_from, $charset_to)
2341 | 	{
2342 | 		if (! ReflectionTypeHint::isValid()) return false;  #for recursive calls
2343 | 		if ($charset_from === $charset_to) return $data;
2344 | 		if (is_array($data))
2345 | 		{
2346 | 			$d = array();
2347 | 			foreach ($data as $k => &$v)
2348 | 			{
2349 | 				$k = self::_convert($k, $charset_from, $charset_to);
2350 | 				if ($k === false) return false;
2351 | 				$d[$k] = self::_convert($v, $charset_from, $charset_to);
2352 | 				if ($d[$k] === false && ! is_bool($v)) return false;
2353 | 			}
2354 | 			return $d;
2355 | 		}
2356 | 		if (is_string($data))
2357 | 		{
2358 | 			#smart behaviour for errors protected + speed improve
2359 | 			if ($charset_from === 'UTF-8' && ! self::is_utf8($data)) return $data;
2360 | 			if ($charset_to === 'UTF-8' && self::is_utf8($data)) return $data;
2361 | 
2362 | 			#since PHP-5.3.x iconv() faster then mb_convert_encoding()
2363 | 			if (function_exists('iconv')) return iconv($charset_from, $charset_to . '//IGNORE//TRANSLIT', $data);
2364 | 			if (function_exists('mb_convert_encoding')) return mb_convert_encoding($data, $charset_to, $charset_from);
2365 | 
2366 | 			#charset_from
2367 | 			if ($charset_from === 'UTF-16' || $charset_from === 'UCS-2')  return self::_convert_from_utf16($data);
2368 | 			if ($charset_from === 'cp1251' || $charset_from === 'cp1259') return strtr($data, self::$cp1259_table);
2369 | 			if ($charset_from === 'koi8-r' || $charset_from === 'KOI8-R') return strtr(convert_cyr_string($data, 'k', 'w'), self::$cp1259_table);
2370 | 			if ($charset_from === 'iso8859-5') return strtr(convert_cyr_string($data, 'i', 'w'), self::$cp1259_table);
2371 | 			if ($charset_from === 'cp866') return strtr(convert_cyr_string($data, 'a', 'w'), self::$cp1259_table);
2372 | 			if ($charset_from === 'mac-cyrillic') return strtr(convert_cyr_string($data, 'm', 'w'), self::$cp1259_table);
2373 | 
2374 | 			#charset_to
2375 | 			if ($charset_to === 'cp1251' || $charset_to === 'cp1259') return strtr($data, array_flip(self::$cp1259_table));
2376 | 
2377 | 			#last trying
2378 | 			if (function_exists('recode_string'))
2379 | 			{
2380 | 				$s = @recode_string($charset_from . '..' . $charset_to, $data);
2381 | 				if (is_string($s)) return $s;
2382 | 			}
2383 | 
2384 | 			trigger_error('Convert "' . $charset_from . '" --> "' . $charset_to . '" is not supported native, "iconv" or "mbstring" extension required', E_USER_WARNING);
2385 | 			return false;
2386 | 		}
2387 | 		return $data;
2388 | 	}
2389 | 
2390 | 	/**
2391 | 	 * Convert UTF-16 / UCS-2 encoding string to UTF-8.
2392 | 	 * Surrogates UTF-16 are supported!
2393 | 	 * 
2394 | 	 * In Russian:
2395 | 	 * Преобразует строку из кодировки UTF-16 / UCS-2 в UTF-8.
2396 | 	 * Суррогаты UTF-16 поддерживаются!
2397 | 	 *
2398 | 	 * @param    string        $s
2399 | 	 * @param    string        $type      'BE' -- big endian byte order
2400 | 	 *                                    'LE' -- little endian byte order
2401 | 	 * @param    bool          $to_array  returns array chars instead whole string?
2402 | 	 * @return   string|array|bool        UTF-8 string, array chars or FALSE if error occurred
2403 | 	 */
2404 | 	private static function _convert_from_utf16($s, $type = 'BE', $to_array = false)
2405 | 	{
2406 | 		static $types = array(
2407 | 			'BE' => 'n',  #unsigned short (always 16 bit, big endian byte order)
2408 | 			'LE' => 'v',  #unsigned short (always 16 bit, little endian byte order)
2409 | 		);
2410 | 		if (! array_key_exists($type, $types))
2411 | 		{
2412 | 			trigger_error('Unexpected value in 2-nd parameter, "' . $type . '" given!', E_USER_WARNING);
2413 | 			return false;
2414 | 		}
2415 | 		#the fastest way:
2416 | 		if (function_exists('iconv') || function_exists('mb_convert_encoding'))
2417 | 		{
2418 | 			if (function_exists('iconv'))                   $s = iconv('UTF-16' . $type, 'UTF-8', $s);
2419 | 			elseif (function_exists('mb_convert_encoding')) $s = mb_convert_encoding($s, 'UTF-8', 'UTF-16' . $type);
2420 | 			if (! $to_array) return $s;
2421 | 			return self::str_split($s);
2422 | 		}
2423 | 
2424 | 		/*
2425 |         http://en.wikipedia.org/wiki/UTF-16
2426 | 
2427 |         The improvement that UTF-16 made over UCS-2 is its ability to encode
2428 |         characters in planes 1-16, not just those in plane 0 (BMP).
2429 | 
2430 |         UTF-16 represents non-BMP characters (those from U+10000 through U+10FFFF)
2431 |         using a pair of 16-bit words, known as a surrogate pair.
2432 |         First 1000016 is subtracted from the code point to give a 20-bit value.
2433 |         This is then split into two separate 10-bit values each of which is represented
2434 |         as a surrogate with the most significant half placed in the first surrogate.
2435 |         To allow safe use of simple word-oriented string processing, separate ranges
2436 |         of values are used for the two surrogates: 0xD800-0xDBFF for the first, most
2437 |         significant surrogate and 0xDC00-0xDFFF for the second, least significant surrogate.
2438 | 
2439 |         For example, the character at code point U+10000 becomes the code unit sequence 0xD800 0xDC00,
2440 |         and the character at U+10FFFD, the upper limit of Unicode, becomes the sequence 0xDBFF 0xDFFD.
2441 |         Unicode and ISO/IEC 10646 do not, and will never, assign characters to any of the code points
2442 |         in the U+D800-U+DFFF range, so an individual code value from a surrogate pair does not ever
2443 |         represent a character.
2444 | 
2445 |         http://www.russellcottrell.com/greek/utilities/SurrogatePairCalculator.htm
2446 |         http://www.russellcottrell.com/greek/utilities/UnicodeRanges.htm
2447 | 
2448 |         Conversion of a Unicode scalar value S to a surrogate pair <H, L>:
2449 |           H = Math.floor((S - 0x10000) / 0x400) + 0xD800;
2450 |           L = ((S - 0x10000) % 0x400) + 0xDC00;
2451 |         The conversion of a surrogate pair <H, L> to a scalar value:
2452 |           N = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000;
2453 | 		*/
2454 | 		$a = array();
2455 | 		$hi = false;
2456 | 		foreach (unpack($types[$type] . '*', $s) as $codepoint)
2457 | 		{
2458 | 			#surrogate process
2459 | 			if ($hi !== false)
2460 | 			{
2461 | 				$lo = $codepoint;
2462 | 				if ($lo < 0xDC00 || $lo > 0xDFFF) $a[] = "\xEF\xBF\xBD"; #U+FFFD REPLACEMENT CHARACTER (for broken char)
2463 | 				else
2464 | 				{
2465 | 					$codepoint = (($hi - 0xD800) * 0x400) + ($lo - 0xDC00) + 0x10000;
2466 | 					$a[] = self::chr($codepoint);
2467 | 				}
2468 | 				$hi = false;
2469 | 			}
2470 | 			elseif ($codepoint < 0xD800 || $codepoint > 0xDBFF) $a[] = self::chr($codepoint); #not surrogate
2471 | 			else $hi = $codepoint; #surrogate was found
2472 | 		}
2473 | 		return $to_array ? $a : implode('', $a);
2474 | 	}
2475 | 
2476 | 	/**
2477 | 	 * Strips out device control codes in the ASCII range.
2478 | 	 *
2479 | 	 * @param   string|null       String to clean
2480 | 	 * @return  string|bool|null  Returns FALSE if error occurred
2481 | 	 */
2482 | 	public static function strict($s)
2483 | 	{
2484 | 		if (! ReflectionTypeHint::isValid()) return false;
2485 | 		if (is_null($s)) return $s;
2486 | 		return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F]+/sSX', '', $s);
2487 | 	}
2488 | 
2489 | 	/**
2490 | 	 * Check the data accessory to the class of characters ASCII.
2491 | 	 * For null, integer, float, boolean returns TRUE.
2492 | 	 *
2493 | 	 * Массивы обходятся рекурсивно, если в хотябы одном элементе массива
2494 | 	 * его значение не ASCII, возвращается FALSE.
2495 | 	 *
2496 | 	 * @param   array|scalar|null  $data
2497 | 	 * @return  bool
2498 | 	 */
2499 | 	public static function is_ascii($data)
2500 | 	{
2501 | 		if (! ReflectionTypeHint::isValid()) return false;
2502 | 		if (is_array($data))
2503 | 		{
2504 | 			foreach ($data as $k => &$v)
2505 | 			{
2506 | 				if (! self::is_ascii($k) || ! self::is_ascii($v)) return false;
2507 | 			}
2508 | 			return true;
2509 | 		}
2510 | 		#ltrim() little faster then preg_match()
2511 | 		#if (is_string($data)) return preg_match('/^[\x00-\x7f]*$/sSX', $data); #deprecated
2512 | 		if (is_string($data)) return ltrim($data, "\x00..\x7f") === '';
2513 | 		if (is_scalar($data) || is_null($data)) return true;  #~ null, integer, float, boolean
2514 | 		return false; #object or resource
2515 | 	}
2516 | 
2517 | 	/**
2518 | 	 * Returns true if data is valid UTF-8 and false otherwise.
2519 | 	 * For null, integer, float, boolean returns TRUE.
2520 | 	 *
2521 | 	 * The arrays are traversed recursively, if At least one element of the array
2522 | 	 * its value is not in UTF-8, returns FALSE.
2523 | 	 *
2524 | 	 * @link    http://www.w3.org/International/questions/qa-forms-utf-8.html
2525 | 	 * @link    http://ru3.php.net/mb_detect_encoding
2526 | 	 * @link    http://webtest.philigon.ru/articles/utf8/
2527 | 	 * @link    http://unicode.coeurlumiere.com/
2528 | 	 * @param   array|scalar|null  $data
2529 | 	 * @param   bool               $is_strict  strict the range of ASCII?
2530 | 	 * @return  bool
2531 | 	 */
2532 | 	public static function is_utf8($data, $is_strict = true)
2533 | 	{
2534 | 		if (! ReflectionTypeHint::isValid()) return false;
2535 | 		if (is_array($data))
2536 | 		{
2537 | 			foreach ($data as $k => &$v)
2538 | 			{
2539 | 				if (! self::is_utf8($k, $is_strict) || ! self::is_utf8($v, $is_strict)) return false;
2540 | 			}
2541 | 			return true;
2542 | 		}
2543 | 		if (is_string($data))
2544 | 		{
2545 | 			if (! preg_match('~~suSX', $data)) return false;
2546 | 			if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return false;
2547 | 			#preg_match('~~suSX') much faster (up to 4 times), then mb_check_encoding($data, 'UTF-8')!
2548 | 			#if (function_exists('mb_check_encoding') && ! mb_check_encoding($data, 'UTF-8')) return false; #DEPRECATED
2549 | 			if ($is_strict && preg_match('/[^\x09\x0A\x0D\x20-\xBF\xC2-\xF7]/sSX', $data)) return false;
2550 | 			return true;
2551 | 		}
2552 | 		if (is_scalar($data) || is_null($data)) return true;  #~ null, integer, float, boolean
2553 | 		return false; #object or resource
2554 | 	}
2555 | 
2556 | 	/**
2557 | 	 * Tries to detect if a string is in Unicode encoding
2558 | 	 *
2559 | 	 * @deprecated  Slowly, use self::is_utf8() instead
2560 | 	 * @see     self::is_utf8()
2561 | 	 * @param   string   $s          текст
2562 | 	 * @param   bool     $is_strict  строгая проверка диапазона ASCII?
2563 | 	 * @return  bool
2564 | 	 */
2565 | 	public static function check($s, $is_strict = true)
2566 | 	{
2567 | 		if (! ReflectionTypeHint::isValid()) return false;
2568 | 		for ($i = 0, $len = strlen($s); $i < $len; $i++)
2569 | 		{
2570 | 			$c = ord($s[$i]);
2571 | 			if ($c < 0x80) #1 byte  0bbbbbbb
2572 | 			{
2573 | 				if ($is_strict === false || ($c > 0x1F && $c < 0x7F) || $c == 0x09 || $c == 0x0A || $c == 0x0D) continue;
2574 | 			}
2575 | 			if (($c & 0xE0) == 0xC0) $n = 1; #2 bytes 110bbbbb 10bbbbbb
2576 | 			elseif (($c & 0xF0) == 0xE0) $n = 2; #3 bytes 1110bbbb 10bbbbbb 10bbbbbb
2577 | 			elseif (($c & 0xF8) == 0xF0) $n = 3; #4 bytes 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
2578 | 			elseif (($c & 0xFC) == 0xF8) $n = 4; #5 bytes 111110bb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb
2579 | 			elseif (($c & 0xFE) == 0xFC) $n = 5; #6 bytes 1111110b 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb
2580 | 			else return false; #does not match any model
2581 | 			#n bytes matching 10bbbbbb follow ?
2582 | 			for ($j = 0; $j < $n; $j++)
2583 | 			{
2584 | 				$i++;
2585 | 				if ($i == $len || ((ord($s[$i]) & 0xC0) != 0x80) ) return false;
2586 | 			}
2587 | 		}
2588 | 		return true;
2589 | 	}
2590 | 
2591 | 	/**
2592 | 	 * Check the data in UTF-8 charset on given ranges of the standard UNICODE.
2593 | 	 * The suitable alternative to regular expressions.
2594 | 	 *
2595 | 	 * For null, integer, float, boolean returns TRUE.
2596 | 	 *
2597 | 	 * Arrays traversed recursively (keys and values).
2598 | 	 * At least if one array element value is not passed checking, it returns FALSE.
2599 | 	 *
2600 | 	 * @example
2601 | 	 *   #A simple check the standard named ranges:
2602 | 	 *   UTF8::blocks_check('поисковые системы Google и Yandex', array('Basic Latin', 'Cyrillic'));
2603 | 	 *   #You can check the named, direct ranges or codepoints together:
2604 | 	 *   UTF8::blocks_check('поисковые системы Google и Yandex', array(array(0x20, 0x7E),     #[\x20-\x7E]
2605 | 	 *                                                                 array(0x0410, 0x044F), #[A-Яa-я]
2606 | 	 *                                                                 0x0401, #russian yo (Ё)
2607 | 	 *                                                                 0x0451, #russian ye (ё)
2608 | 	 *                                                                 'Arrows',
2609 | 	 *                                                                ));
2610 | 	 *
2611 | 	 * @link    http://www.unicode.org/charts/
2612 | 	 * @param   array|scalar|null  $data
2613 | 	 * @param   array|string       $blocks
2614 | 	 * @return  bool               Возвращает TRUE, если все символы из текста принадлежат указанным диапазонам
2615 | 	 *                             и FALSE в противном случае или для разбитого UTF-8.
2616 | 	 */
2617 | 	public static function blocks_check($data, $blocks)
2618 | 	{
2619 | 		if (! ReflectionTypeHint::isValid()) return false;
2620 | 
2621 | 		if (is_array($data))
2622 | 		{
2623 | 			foreach ($data as $k => &$v)
2624 | 			{
2625 | 				if (! self::blocks_check($k, $blocks) || ! self::blocks_check($v, $blocks)) return false;
2626 | 			}
2627 | 			return true;
2628 | 		}
2629 | 
2630 | 		if (is_string($data))
2631 | 		{
2632 | 			$chars = self::str_split($data);
2633 | 			if ($chars === false) return false; #broken UTF-8
2634 | 			unset($data); #memory free
2635 | 			$skip = array(); #save to cache already checked symbols
2636 | 			foreach ($chars as $i => $char)
2637 | 			{
2638 | 				if (array_key_exists($char, $skip)) continue; #speed improve
2639 | 				$codepoint = self::ord($char);
2640 | 				if ($codepoint === false) return false; #broken UTF-8
2641 | 				$is_valid = false;
2642 | 				$blocks = (array)$blocks;
2643 | 				foreach ($blocks as $j => $block)
2644 | 				{
2645 | 					if (is_string($block))
2646 | 					{
2647 | 						if (! array_key_exists($block, self::$unicode_blocks))
2648 | 						{
2649 | 							trigger_error('Unknown block "' . $block . '"!', E_USER_WARNING);
2650 | 							return false;
2651 | 						}
2652 | 						list ($min, $max) = self::$unicode_blocks[$block];
2653 | 					}
2654 | 					elseif (is_array($block)) list ($min, $max) = $block;
2655 | 					elseif (is_int($block)) $min = $max = $block;
2656 | 					else trigger_error('A string/array/int type expected for block[' . $j . ']!', E_USER_ERROR);
2657 | 					if ($codepoint >= $min && $codepoint <= $max)
2658 | 					{
2659 | 						$is_valid = true;
2660 | 						break;
2661 | 					}
2662 | 				}#foreach
2663 | 				if (! $is_valid) return false;
2664 | 				$skip[$char] = null;
2665 | 			}#foreach
2666 | 			return true;
2667 | 		}
2668 | 		if (is_scalar($data) || is_null($data)) return true;  #~ null, integer, float, boolean
2669 | 		return false; #object or resource
2670 | 	}
2671 | 
2672 | 	/**
2673 | 	 * Recode $_GET, $_POST, $_COOKIE, $_REQUEST, $_FILES from $charset encoding to UTF-8, if necessary.
2674 | 	 * A side effect is a positive protection against XSS attacks with non-printable characters on the vulnerable PHP function.
2675 | 	 * Thus web forms can be sent to the server in 2-encoding: $charset and UTF-8.
2676 | 	 * For example: ?тест[тест]=тест
2677 | 	 *
2678 | 	 * Алгоритм работы:
2679 | 	 * 1) Функция проверяет массивы $_GET, $_POST, $_COOKIE, $_REQUEST, $_FILES
2680 | 	 *    на корректность значений элементов кодировке UTF-8.
2681 | 	 * 2) Значения не в UTF-8 принимаются как $charset и конвертируется в UTF-8,
2682 | 	 *    при этом байты от 0x00 до 0x7F (ASCII) сохраняются как есть.
2683 | 	 * 3) Сконвертированные значения снова проверяются.
2684 | 	 *    Если данные опять не в кодировке UTF-8, то они считаются разбитыми и функция возвращает FALSE.
2685 | 	 *
2686 | 	 * NOTICE
2687 | 	 *   Функция должна вызываться после self::unescape_request()!
2688 | 	 *
2689 | 	 * @see     self::unescape_request()
2690 | 	 * @param   bool    $is_hex2bin  Декодировать HEX-данные?
2691 | 	 *                               Пример: 0xd09ec2a0d0bad0bed0bcd0bfd0b0d0bdd0b8d0b8 => О компании
2692 | 	 *                               Параметры в URL адресах иногда бывает удобно кодировать не функцией rawurlencode(),
2693 | 	 *                               а использовать следующий механизм (к тому же кодирующий данные более компактно):
2694 | 	 *                               '0x' . bin2hex($string)
2695 | 	 * @param   string  $charset
2696 | 	 * @return  bool                 Возвращает TRUE, если все значения элементов массивов в кодировке UTF-8
2697 | 	 *                               и FALSE + E_USER_WARNING в противном случае.
2698 | 	 */
2699 | 	public static function autoconvert_request($is_hex2bin = false, $charset = 'cp1251')
2700 | 	{
2701 | 		if (! ReflectionTypeHint::isValid()) return false;
2702 | 		$is_converted = false;
2703 | 		$is_broken = false;
2704 | 		foreach (array('_GET', '_POST', '_COOKIE', '_FILES') as $k => $v)
2705 | 		{
2706 | 			if (! array_key_exists($v, $GLOBALS)) continue;
2707 | 			#использовать array_walk_recursive() не предоставляется возможным,
2708 | 			#т.к. его callback функция не поддерживает передачу ключа по ссылке
2709 | 			$GLOBALS[$v] = self::_autoconvert_request_recursive($GLOBALS[$v], $is_converted, $is_broken, $is_hex2bin, $charset);
2710 | 			if ($is_broken)
2711 | 			{
2712 | 				trigger_error('Array $' . $v . ' does not have keys/values in UTF-8 charset!', E_USER_WARNING);
2713 | 				return false;
2714 | 			}
2715 | 		}
2716 | 		if ($is_converted)
2717 | 		{
2718 | 			$_REQUEST =
2719 | 				(isset($_COOKIE) ? $_COOKIE : array()) +
2720 | 				(isset($_POST) ? $_POST : array()) +
2721 | 				(isset($_GET) ? $_GET : array());
2722 | 		}
2723 | 		return true;
2724 | 	}
2725 | 
2726 | 	private static function _autoconvert_request_recursive(&$data, &$is_converted, &$is_broken, $is_hex2bin, $charset)
2727 | 	{
2728 | 		if ($is_broken) return $data;  #speed improve
2729 | 		if (is_array($data))
2730 | 		{
2731 | 			$d = array();
2732 | 			foreach ($data as $k => &$v)
2733 | 			{
2734 | 				$k = self::_autoconvert_request($k, $is_converted, $is_broken, $is_hex2bin, $charset);
2735 | 				if ($is_broken) return $data;  #speed improve
2736 | 				$d[$k] = self::_autoconvert_request_recursive($v, $is_converted, $is_broken, $is_hex2bin, $charset);
2737 | 				if ($is_broken) return $data;  #speed improve
2738 | 			}
2739 | 			return $d;
2740 | 		}
2741 | 		return self::_autoconvert_request($data, $is_converted, $is_broken, $is_hex2bin, $charset);
2742 | 	}
2743 | 
2744 | 	private static function _autoconvert_request(&$s, &$is_converted, &$is_broken, $is_hex2bin, $charset)
2745 | 	{
2746 | 		#regexp speed improve by using strpos()
2747 | 		if ($is_hex2bin && strpos($s, '0x') === 0 && preg_match('/^0x((?:[\da-fA-F]{2})+)$/sSX', $s, $m))
2748 | 		{
2749 | 			$s = pack('H' . strlen($m[1]), $m[1]); #hex2bin()
2750 | 			$is_converted = true;
2751 | 		}
2752 | 		if (! self::is_utf8($s))
2753 | 		{
2754 | 			$s = self::convert_from($s, $charset);
2755 | 			if ($s === false) $is_broken = true;
2756 | 			elseif (! self::is_utf8($s))
2757 | 			{
2758 | 				trigger_error('String 0x ' . substr(bin2hex($s), 0, 100) . '... is not UTF-8!', E_USER_WARNING);
2759 | 				$is_broken = true;
2760 | 			}
2761 | 			else $is_converted = true;
2762 | 		}
2763 | 		return $s;
2764 | 	}
2765 | 
2766 | 	/**
2767 | 	 * Сравнение строк
2768 | 	 *
2769 | 	 * @param   string|null    $s1
2770 | 	 * @param   string|null    $s2
2771 | 	 * @param   string         $locale   For example, 'en_CA', 'ru_RU'
2772 | 	 * @return  int|bool|null  Returns FALSE if error occurred
2773 | 	 *                         Returns < 0 if $s1 is less than $s2;
2774 | 	 *                                 > 0 if $s1 is greater than $s2;
2775 | 	 *                                 0 if they are equal.
2776 | 	 */
2777 | 	public static function strcmp($s1, $s2, $locale = '')
2778 | 	{
2779 | 		if (! ReflectionTypeHint::isValid()) return false;
2780 | 		if (is_null($s1) || is_null($s2)) return null;
2781 | 		if (! function_exists('collator_create')) return strcmp($s1, $s2);
2782 | 		# PHP 5 >= 5.3.0, PECL intl >= 1.0.0
2783 | 		# If empty string ("") or "root" are passed, UCA rules will be used.
2784 | 		$c = new Collator($locale);
2785 | 		if (! $c)
2786 | 		{
2787 | 			# Returns an "empty" object on error. You can use intl_get_error_code() and/or intl_get_error_message() to know what happened.
2788 | 			trigger_error(intl_get_error_message(), E_USER_WARNING);
2789 | 			return false;
2790 | 		}
2791 | 		return $c->compare($s1, $s2);
2792 | 	}
2793 | 
2794 | 	/**
2795 | 	 * Сравнение строк для N первых символов
2796 | 	 *
2797 | 	 * @param   string|null    $s1
2798 | 	 * @param   string|null    $s2
2799 | 	 * @param   int            $length
2800 | 	 * @return  int|bool|null  Returns FALSE if error occurred
2801 | 	 *                         Returns < 0 if $s1 is less than $s2;
2802 | 	 *                                 > 0 if $s1 is greater than $s2;
2803 | 	 *                                 0 if they are equal.
2804 | 	 */
2805 | 	public static function strncmp($s1, $s2, $length)
2806 | 	{
2807 | 		if (! ReflectionTypeHint::isValid()) return false;
2808 | 		if (is_null($s1) || is_null($s2)) return null;
2809 | 		return self::strcmp(self::substr($s1, 0, $length), self::substr($s2, 0, $length));
2810 | 	}
2811 | 
2812 | 	/**
2813 | 	 * Implementation strcasecmp() function for UTF-8 encoding string.
2814 | 	 *
2815 | 	 * @param   string|null    $s1
2816 | 	 * @param   string|null    $s2
2817 | 	 * @return  int|bool|null  Returns FALSE if error occurred
2818 | 	 *                         Returns < 0 if $s1 is less than $s2;
2819 | 	 *                                 > 0 if $s1 is greater than $s2;
2820 | 	 *                                 0 if they are equal.
2821 | 	 */
2822 | 	public static function strcasecmp($s1, $s2)
2823 | 	{
2824 | 		if (! ReflectionTypeHint::isValid()) return false;
2825 | 		if (is_null($s1) || is_null($s2)) return null;
2826 | 		return self::strcmp(self::lowercase($s1), self::lowercase($s2));
2827 | 	}
2828 | 
2829 | 	/**
2830 | 	 * Converts a UTF-8 string to a UNICODE codepoints
2831 | 	 *
2832 | 	 * @param   string|null     $s  UTF-8 string
2833 | 	 * @return  array|bool|null     Unicode codepoints
2834 | 	 *                              Returns FALSE if $s broken (not UTF-8)
2835 | 	 */
2836 | 	public static function to_unicode($s)
2837 | 	{
2838 | 		if (! ReflectionTypeHint::isValid()) return false;
2839 | 		if (is_null($s)) return $s;
2840 | 
2841 | 		$s2 = null;
2842 | 		#since PHP-5.3.x iconv() little faster then mb_convert_encoding()
2843 | 		if (function_exists('iconv')) $s2 = @iconv('UTF-8', 'UCS-4BE', $s);
2844 | 		elseif (function_exists('mb_convert_encoding')) $s2 = @mb_convert_encoding($s, 'UCS-4BE', 'UTF-8');
2845 | 		if (is_string($s2)) return array_values(unpack('N*', $s2));
2846 | 		if ($s2 !== null) return false;
2847 | 
2848 | 		$a = self::str_split($s);
2849 | 		if ($a === false) return false;
2850 | 		return array_map(array(__CLASS__, 'ord'), $a);
2851 | 	}
2852 | 
2853 | 	/**
2854 | 	 * Converts a UNICODE codepoints to a UTF-8 string
2855 | 	 *
2856 | 	 * @param   array|null       $a  Unicode codepoints
2857 | 	 * @return  string|bool|null     UTF-8 string
2858 | 	 *                               Returns FALSE if error occurred
2859 | 	 */
2860 | 	public static function from_unicode($a)
2861 | 	{
2862 | 		if (! ReflectionTypeHint::isValid()) return false;
2863 | 		if (is_null($a)) return $a;
2864 | 
2865 | 		#since PHP-5.3.x iconv() little faster then mb_convert_encoding()
2866 | 		if (function_exists('iconv'))
2867 | 		{
2868 | 			array_walk($a, function(&$cp) { $cp = pack('N', $cp); });
2869 | 			$s = @iconv('UCS-4BE', 'UTF-8', implode('', $a));
2870 | 			if (! is_string($s)) return false;
2871 | 			return $s;
2872 | 		}
2873 | 		if (function_exists('mb_convert_encoding'))
2874 | 		{
2875 | 			array_walk($a, function(&$cp) { $cp = pack('N', $cp); });
2876 | 			$s = mb_convert_encoding(implode('', $a), 'UTF-8', 'UCS-4BE');
2877 | 			if (! is_string($s)) return false;
2878 | 			return $s;
2879 | 		}
2880 | 
2881 | 		return implode('', array_map(array(__CLASS__, 'chr'), $a));
2882 | 	}
2883 | 
2884 | 	/**
2885 | 	 * Converts a UTF-8 character to a UNICODE codepoint
2886 | 	 *
2887 | 	 * @param   string|null    $char  UTF-8 character
2888 | 	 * @return  int|bool|null         Unicode codepoint
2889 | 	 *                                Returns FALSE if $char broken (not UTF-8)
2890 | 	 */
2891 | 	public static function ord($char)
2892 | 	{
2893 | 		if (! ReflectionTypeHint::isValid()) return false;
2894 | 		if (is_null($char)) return $char;
2895 | 
2896 | 		static $cache = array();
2897 | 		if (array_key_exists($char, $cache)) return $cache[$char]; #speed improve
2898 | 
2899 | 		switch (strlen($char))
2900 | 		{
2901 | 			case 1 : return $cache[$char] = ord($char);
2902 | 			case 2 : return $cache[$char] = (ord($char{1}) & 63) |
2903 | 											((ord($char{0}) & 31) << 6);
2904 | 			case 3 : return $cache[$char] = (ord($char{2}) & 63) |
2905 | 											((ord($char{1}) & 63) << 6) |
2906 | 											((ord($char{0}) & 15) << 12);
2907 | 			case 4 : return $cache[$char] = (ord($char{3}) & 63) |
2908 | 											((ord($char{2}) & 63) << 6) |
2909 | 											((ord($char{1}) & 63) << 12) |
2910 | 											((ord($char{0}) & 7)  << 18);
2911 | 			default :
2912 | 				trigger_error('Character 0x' . bin2hex($char) . ' is not UTF-8!', E_USER_WARNING);
2913 | 				return false;
2914 | 		}
2915 | 	}
2916 | 
2917 | 	/**
2918 | 	 * Converts a UNICODE codepoint to a UTF-8 character
2919 | 	 *
2920 | 	 * @param   int|digit|null  $cp  Unicode codepoint
2921 | 	 * @return  string|bool|null     UTF-8 character
2922 | 	 *                               Returns FALSE if error occurred
2923 | 	 */
2924 | 	public static function chr($cp)
2925 | 	{
2926 | 		if (! ReflectionTypeHint::isValid()) return false;
2927 | 		if (is_null($cp)) return $cp;
2928 | 
2929 | 		static $cache = array();
2930 | 		if (array_key_exists($cp, $cache)) return $cache[$cp]; #speed improve
2931 | 
2932 | 		if ($cp <= 0x7f)     return $cache[$cp] = chr($cp);
2933 | 		if ($cp <= 0x7ff)    return $cache[$cp] = chr(0xc0 | ($cp >> 6))  .
2934 | 												  chr(0x80 | ($cp & 0x3f));
2935 | 		if ($cp <= 0xffff)   return $cache[$cp] = chr(0xe0 | ($cp >> 12)) .
2936 | 												  chr(0x80 | (($cp >> 6) & 0x3f)) .
2937 | 												  chr(0x80 | ($cp & 0x3f));
2938 | 		if ($cp <= 0x10ffff) return $cache[$cp] = chr(0xf0 | ($cp >> 18)) .
2939 | 												  chr(0x80 | (($cp >> 12) & 0x3f)) .
2940 | 												  chr(0x80 | (($cp >> 6) & 0x3f)) .
2941 | 												  chr(0x80 | ($cp & 0x3f));
2942 | 		#U+FFFD REPLACEMENT CHARACTER
2943 | 		return $cache[$cp] = "\xEF\xBF\xBD";
2944 | 	}
2945 | 
2946 | 	/**
2947 | 	 * Implementation chunk_split() function for UTF-8 encoding string.
2948 | 	 *
2949 | 	 * @param   string|null       $s
2950 | 	 * @param   int|digit|null    $length
2951 | 	 * @param   string|null       $glue
2952 | 	 * @return  string|bool|null  Returns FALSE if error occurred
2953 | 	 */
2954 | 	public static function chunk_split($s, $length = null, $glue = null)
2955 | 	{
2956 | 		if (! ReflectionTypeHint::isValid()) return false;
2957 | 		if (is_null($s)) return $s;
2958 | 
2959 | 		$length = intval($length);
2960 | 		$glue   = strval($glue);
2961 | 		if ($length < 1) $length = 76;
2962 | 		if ($glue === '') $glue = "\r\n";
2963 | 		if (! is_array($a = self::str_split($s, $length))) return false;
2964 | 		return implode($glue, $a);
2965 | 	}
2966 | 
2967 | 	/**
2968 | 	 * Changes all keys in an array
2969 | 	 *
2970 | 	 * @param   array|null       $a
2971 | 	 * @param   int              $mode  {CASE_LOWER|CASE_UPPER}
2972 | 	 * @return  array|bool|null  Returns FALSE if error occurred
2973 | 	 */
2974 | 	public static function array_change_key_case($a, $mode)
2975 | 	{
2976 | 		if (! ReflectionTypeHint::isValid()) return false;
2977 | 		if (! is_array($a)) return $a;
2978 | 		$a2 = array();
2979 | 		foreach ($a as $k => $v)
2980 | 		{
2981 | 			if (is_string($k))
2982 | 			{
2983 | 				$k = self::convert_case($k, $mode);
2984 | 				if ($k === false) return false;
2985 | 			}
2986 | 			$a2[$k] = $v;
2987 | 		}
2988 | 		return $a2;
2989 | 	}
2990 | 
2991 | 	/**
2992 | 	 * Конвертирует регистр букв в данных в кодировке UTF-8.
2993 | 	 * Массивы обходятся рекурсивно, при этом конвертируются только значения
2994 | 	 * в элементах массива, а ключи остаются без изменений.
2995 | 	 * Для конвертирования только ключей используйте метод self::array_change_key_case().
2996 | 	 *
2997 | 	 * @see     self::array_change_key_case()
2998 | 	 * @link    http://www.unicode.org/charts/PDF/U0400.pdf
2999 | 	 * @link    http://ru.wikipedia.org/wiki/ISO_639-1
3000 | 	 * @param   array|scalar|null $data  Данные произвольной структуры
3001 | 	 * @param   int               $mode  {CASE_LOWER|CASE_UPPER}
3002 | 	 * @param   bool              $is_ascii_optimization    for speed improve
3003 | 	 * @return  scalar|bool|null  Returns FALSE if error occurred
3004 | 	 */
3005 | 	public static function convert_case($data, $mode, $is_ascii_optimization = true)
3006 | 	{
3007 | 		if (! ReflectionTypeHint::isValid()) return false;
3008 | 
3009 | 		if (is_array($data))
3010 | 		{
3011 | 			foreach ($data as $k => &$v) $v = self::convert_case($v, $mode);
3012 | 			return $data;
3013 | 		}
3014 | 		if (! is_string($data) || ! $data) return $data;
3015 | 
3016 | 		if ($mode === CASE_UPPER)
3017 | 		{
3018 | 			if ($is_ascii_optimization && self::is_ascii($data)) return strtoupper($data); #speed improve!
3019 | 			#deprecated, since PHP-5.3.x strtr() 2-3 times faster then mb_strtolower()
3020 | 			#if (function_exists('mb_strtoupper')) return mb_strtoupper($data, 'utf-8');
3021 | 			return strtr($data, array_flip(self::$convert_case_table));
3022 | 		}
3023 | 		if ($mode === CASE_LOWER)
3024 | 		{
3025 | 			if ($is_ascii_optimization && self::is_ascii($data)) return strtolower($data); #speed improve!
3026 | 			#deprecated, since PHP-5.3.x strtr() 2-3 times faster then mb_strtolower()
3027 | 			#if (function_exists('mb_strtolower')) return mb_strtolower($data, 'utf-8');
3028 | 			return strtr($data, self::$convert_case_table);
3029 | 		}
3030 | 		trigger_error('Parameter 2 should be a constant of CASE_LOWER or CASE_UPPER!', E_USER_WARNING);
3031 | 		return $data;
3032 | 	}
3033 | 
3034 | 	/**
3035 | 	 * Convert a data to lower case
3036 | 	 *
3037 | 	 * @param   array|scalar|null  $data
3038 | 	 * @return  scalar|bool|null   Returns FALSE if error occurred	 */
3039 | 	public static function lowercase($data)
3040 | 	{
3041 | 		if (! ReflectionTypeHint::isValid()) return false;
3042 | 		return self::convert_case($data, CASE_LOWER);
3043 | 	}
3044 | 
3045 | 	/**
3046 | 	 * Convert a data to upper case
3047 | 	 *
3048 | 	 * @param   array|scalar|null  $data
3049 | 	 * @return  scalar|null        Returns FALSE if error occurred
3050 | 	 */
3051 | 	public static function uppercase($data)
3052 | 	{
3053 | 		if (! ReflectionTypeHint::isValid()) return false;
3054 | 		return self::convert_case($data, CASE_UPPER);
3055 | 	}
3056 | 
3057 | 	/**
3058 | 	 * Convert a data to lower case
3059 | 	 *
3060 | 	 * @param   array|scalar|null  $data
3061 | 	 * @return  scalar|bool|null   Returns FALSE if error occurred
3062 | 	 */
3063 | 	public static function strtolower($data)
3064 | 	{
3065 | 		if (! ReflectionTypeHint::isValid()) return false;
3066 | 		return self::convert_case($data, CASE_LOWER);
3067 | 	}
3068 | 
3069 | 	/**
3070 | 	 * Convert a data to upper case
3071 | 	 *
3072 | 	 * @param   array|scalar|null  $data
3073 | 	 * @return  scalar|null        Returns FALSE if error occurred
3074 | 	 */
3075 | 	public static function strtoupper($data)
3076 | 	{
3077 | 		if (! ReflectionTypeHint::isValid()) return false;
3078 | 		return self::convert_case($data, CASE_UPPER);
3079 | 	}
3080 | 
3081 | 
3082 | 	/**
3083 | 	 * Convert all HTML entities to native UTF-8 characters
3084 | 	 * Функция декодирует гораздо больше именованных сущностей, чем стандартная html_entity_decode()
3085 | 	 * Все dec и hex сущности так же переводятся в UTF-8.
3086 | 	 *
3087 | 	 * Example: '&quot;' or '&#34;' or '&#x22;' will be converted to '"'.
3088 | 	 *
3089 | 	 * @link    http://www.htmlhelp.com/reference/html40/entities/
3090 | 	 * @link    http://www.alanwood.net/demos/ent4_frame.html (HTML 4.01 Character Entity References)
3091 | 	 * @link    http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset1.asp?frame=true
3092 | 	 * @link    http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp?frame=true
3093 | 	 * @link    http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp?frame=true
3094 | 	 *
3095 | 	 * @param   scalar|null  $s
3096 | 	 * @param   bool         $is_special_chars   Дополнительно обрабатывать специальные html сущности? (&lt; &gt; &amp; &quot;)
3097 | 	 * @return  scalar|null  Returns FALSE if error occurred
3098 | 	 */
3099 | 	public static function html_entity_decode($s, $is_special_chars = false)
3100 | 	{
3101 | 		if (! ReflectionTypeHint::isValid()) return false;
3102 | 		if (! is_string($s)) return $s;
3103 | 
3104 | 		#speed improve
3105 | 		if (strlen($s) < 4  #по минимальной длине сущности - 4 байта: &#d; &xx;
3106 | 			|| ($pos = strpos($s, '&') === false) || strpos($s, ';', $pos) === false) return $s;
3107 | 
3108 | 		$table = self::$html_entity_table;
3109 | 		if ($is_special_chars) $table += self::$html_special_chars_table;
3110 | 
3111 | 		#replace named entities
3112 | 		$s = strtr($s, $table);
3113 | 		#block below deprecated, since PHP-5.3.x strtr() 1.5 times faster
3114 | 		if (0 && preg_match_all('/&[a-zA-Z]++\d*+;/sSX', $s, $m, null, $pos))
3115 | 		{
3116 | 			foreach (array_unique($m[0]) as $entity)
3117 | 			{
3118 | 				if (array_key_exists($entity, $table)) $s = str_replace($entity, $table[$entity], $s);
3119 | 			}
3120 | 		}
3121 | 
3122 | 		#заменяем числовые dec и hex сущности:
3123 | 		if (strpos($s, '&#') !== false)  #speed improve
3124 | 		{
3125 | 			$class = __CLASS__;
3126 | 			$html_special_chars_table_flipped = array_flip(self::$html_special_chars_table);
3127 | 			$s = preg_replace_callback('/&#((x)[\da-fA-F]{1,6}+|\d{1,7}+);/sSX',
3128 | 										function (array $m) use ($class, $html_special_chars_table_flipped, $is_special_chars)
3129 | 										{
3130 | 											$codepoint = isset($m[2]) && $m[2] === 'x' ? hexdec($m[1]) : $m[1];
3131 | 											if (! $is_special_chars)
3132 | 											{
3133 | 												$char = pack('C', $codepoint);
3134 | 												if (array_key_exists($char, $html_special_chars_table_flipped)) return $html_special_chars_table_flipped[$char];
3135 | 											}
3136 | 											return $class::chr($codepoint);
3137 | 										}, $s);
3138 | 		}
3139 | 		return $s;
3140 | 	}
3141 | 
3142 | 	/**
3143 | 	 * Convert special UTF-8 characters to HTML entities.
3144 | 	 * Функция кодирует гораздо больше именованных сущностей, чем стандартная htmlentities()
3145 | 	 *
3146 | 	 * @link  http://www.htmlhelp.com/reference/html40/entities/
3147 | 	 * @link  http://www.alanwood.net/demos/ent4_frame.html (HTML 4.01 Character Entity References)
3148 | 	 * @link  http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset1.asp?frame=true
3149 | 	 * @link  http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp?frame=true
3150 | 	 * @link  http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp?frame=true
3151 | 	 *
3152 | 	 * @param   scalar|null  $s
3153 | 	 * @param   bool         $is_special_chars_only          Обрабатывать только специальные html сущности? (&lt; &gt; &amp; &quot;)
3154 | 	 * @return  scalar|null  Returns FALSE if error occurred
3155 | 	 */
3156 | 	public static function html_entity_encode($s, $is_special_chars_only = false)
3157 | 	{
3158 | 		if (! ReflectionTypeHint::isValid()) return false;
3159 | 		if (! is_string($s)) return $s;
3160 | 
3161 | 		#if ($is_special_chars_only) return strtr($s, array_flip(self::$html_special_chars_table));
3162 | 		if ($is_special_chars_only) return htmlspecialchars($s);
3163 | 
3164 | 		#replace UTF-8 chars to named entities:
3165 | 		$s = strtr($s, array_flip(self::$html_entity_table));
3166 | 		#block below deprecated, since PHP-5.3.x strtr() 3 times faster
3167 | 		if (0 && preg_match_all('~(?>	[\xc2\xc3\xc5\xc6\xcb\xce\xcf][\x80-\xbf]  #2 bytes
3168 | 									|	\xe2[\x80-\x99][\x82-\xac]                 #3 bytes
3169 | 								  )
3170 | 								~sxSX', $s, $m))
3171 | 		{
3172 | 			$table = array_flip(self::$html_entity_table);
3173 | 			foreach (array_unique($m[0]) as $char)
3174 | 			{
3175 | 				if (array_key_exists($char, $table)) $s = str_replace($char, $table[$char], $s);
3176 | 			}
3177 | 		}
3178 | 
3179 | 		return $s;
3180 | 	}
3181 | 
3182 | 	/**
3183 | 	 * Make regular expression for case insensitive match
3184 | 	 * Example (non ASCII): "123_слово_test" => "123_(с|С)(л|Л)(о|О)(в|В)(о|О)_[tT][eE][sS][tT]"
3185 | 	 * Example (only ASCII): "123_test" => "(?i:123_test)"
3186 | 	 *
3187 | 	 * @param  string $s
3188 | 	 * @param  string|null $delimiter  If the optional delimiter is specified, it will also be escaped.
3189 | 	 *                                 This is useful for escaping the delimiter that is required by the PCRE functions.
3190 | 	 *                                 The / is the most commonly used delimiter.
3191 | 	 * @return string|bool|null        Returns FALSE if error occurred
3192 | 	 */
3193 | 	public static function preg_quote_case_insensitive($s, $delimiter = null)
3194 | 	{
3195 | 		if (! ReflectionTypeHint::isValid()) return false;
3196 | 		if (is_null($s)) return $s;
3197 | 
3198 | 		if (self::is_ascii($s)) return '(?i:' . preg_quote($s, $delimiter) . ')'; #speed improve
3199 | 
3200 | 		$s_re = '';
3201 | 		$s_lc = UTF8::lowercase($s); if ($s_lc === false) return false;
3202 | 		$s_uc = UTF8::uppercase($s); if ($s_uc === false) return false;
3203 | 
3204 | 		$chars_lc = UTF8::str_split($s_lc); if ($chars_lc === false) return false;
3205 | 		$chars_uc = UTF8::str_split($s_uc); if ($chars_uc === false) return false;
3206 | 
3207 | 		foreach ($chars_lc as $i => $char)
3208 | 		{
3209 | 			if ($chars_lc[$i] === $chars_uc[$i])
3210 | 				$s_re .= preg_quote($chars_lc[$i], $delimiter);
3211 | 			elseif (self::is_ascii($chars_lc[$i]))
3212 | 				$s_re .= '[' . preg_quote($chars_lc[$i] . $chars_uc[$i], $delimiter) . ']';
3213 | 			else
3214 | 				$s_re .= '(' . preg_quote($chars_lc[$i], $delimiter) . '|'
3215 | 							 . preg_quote($chars_uc[$i], $delimiter) . ')';
3216 | 		}
3217 | 		return $s_re;
3218 | 	}
3219 | 
3220 | 	/**
3221 | 	 * Call preg_match_all() and convert byte offsets into character offsets for PREG_OFFSET_CAPTURE flag.
3222 | 	 * This is regardless of whether you use /u modifier.
3223 | 	 *
3224 | 	 * @link  http://bolknote.ru/2010/09/08/~2704
3225 | 	 *
3226 | 	 * @param   string           $pattern
3227 | 	 * @param   string|null      $subject
3228 | 	 * @param   array            $matches
3229 | 	 * @param   int              $flags
3230 | 	 * @param   int              $char_offset
3231 | 	 * @return  array|bool|null  Returns FALSE if error occurred
3232 | 	 */
3233 | 	public static function preg_match_all($pattern, $subject, &$matches, $flags = PREG_PATTERN_ORDER, $char_offset = 0)
3234 | 	{
3235 | 		if (! ReflectionTypeHint::isValid()) return false;
3236 | 		if (is_null($subject)) return null;
3237 | 
3238 | 		$byte_offset = ($char_offset > 0) ? strlen(self::substr($subject, 0, $char_offset)) : $char_offset;
3239 | 
3240 | 		$return = preg_match_all($pattern, $subject, $matches, $flags, $byte_offset);
3241 | 		if ($return === false) return false;
3242 | 
3243 | 		if ($flags & PREG_OFFSET_CAPTURE)
3244 | 		{
3245 | 			foreach ($matches as &$match)
3246 | 			{
3247 | 				foreach ($match as &$a) $a[1] = self::strlen(substr($subject, 0, $a[1]));
3248 | 			}
3249 | 		}
3250 | 
3251 | 		return $return;
3252 | 	}
3253 | 
3254 | 	#alias for self::str_limit()
3255 | 	public static function truncate($s, $maxlength = null, $continue = "\xe2\x80\xa6", &$is_cutted = null, $tail_min_length = 20)
3256 | 	{
3257 | 		return self::str_limit($s, $maxlength, $continue, $is_cutted, $tail_min_length);
3258 | 	}
3259 | 
3260 | 	/**
3261 | 	 * Обрезает текст в кодировке UTF-8 до заданной длины,
3262 | 	 * причём последнее слово показывается целиком, а не обрывается на середине.
3263 | 	 * Html сущности корректно обрабатываются.
3264 | 	 *
3265 | 	 * @param   string|null     $s                Текст в кодировке UTF-8
3266 | 	 * @param   int|null|digit  $maxlength        Ограничение длины текста
3267 | 	 * @param   string          $continue         Завершающая строка, которая будет вставлена после текста, если он обрежется
3268 | 	 * @param   bool|null       &$is_cutted       Текст был обрезан?
3269 | 	 * @param   int|digit       $tail_min_length  Если длина "хвоста", оставшегося после обрезки текста, меньше $tail_min_length,
3270 | 	 *                                            то текст возвращается без изменений
3271 | 	 * @return  string|bool|null                  Returns FALSE if error occurred
3272 | 	 */
3273 | 	public static function str_limit($s, $maxlength = null, $continue = "\xe2\x80\xa6", &$is_cutted = null, $tail_min_length = 20) #"\xe2\x80\xa6" = "&hellip;"
3274 | 	{
3275 | 		if (! ReflectionTypeHint::isValid()) return false;
3276 | 		if (is_null($s)) return $s;
3277 | 
3278 | 		$is_cutted = false;
3279 | 		if ($continue === null) $continue = "\xe2\x80\xa6";
3280 | 		if (! $maxlength) $maxlength = 256;
3281 | 
3282 | 		#speed improve block
3283 | 		#{{{
3284 | 		if (strlen($s) <= $maxlength) return $s;
3285 | 		$s2 = str_replace("\r\n", '?', $s);
3286 | 		$s2 = preg_replace('/&(?> [a-zA-Z][a-zA-Z\d]+
3287 |                                 | \#(?> \d{1,4}
3288 |                                       | x[\da-fA-F]{2,4}
3289 |                                     )
3290 |                               );  # html сущности (&lt; &gt; &amp; &quot;)
3291 |                             /sxSX', '?', $s2);
3292 | 		if (strlen($s2) <= $maxlength || self::strlen($s2) <= $maxlength) return $s;
3293 | 		#}}}
3294 | 
3295 | 		$r = preg_match_all('/(?> \r\n   # переносы строк
3296 | 								   | &(?> [a-zA-Z][a-zA-Z\d]+
3297 | 										| \#(?> \d{1,4}
3298 | 											  | x[\da-fA-F]{2,4}
3299 | 											)
3300 | 									  );  # html сущности (&lt; &gt; &amp; &quot;)
3301 | 								   | .
3302 | 								 )
3303 | 								/sxuSX', $s, $m);
3304 | 		if ($r === false) return false;
3305 | 
3306 | 		#d($m);
3307 | 		if (count($m[0]) <= $maxlength) return $s;
3308 | 
3309 | 		$left = implode('', array_slice($m[0], 0, $maxlength));
3310 | 		#из диапазона ASCII исключаем буквы, цифры, открывающие парные символы [a-zA-Z\d\(\{\[] и некоторые др. символы
3311 | 		#нельзя вырезать в конце строки символ ";", т.к. он используются в сущностях &xxx;
3312 | 		$left2 = rtrim($left, "\x00..\x28\x2A..\x2F\x3A\x3C..\x3E\x40\x5B\x5C\x5E..\x60\x7B\x7C\x7E\x7F");
3313 | 		if (strlen($left) !== strlen($left2)) $return = $left2 . $continue;
3314 | 		else
3315 | 		{
3316 | 			#добавляем остаток к обрезанному слову
3317 | 			$right = implode('', array_slice($m[0], $maxlength));
3318 | 			preg_match('/^(?> [\d\)\]\}\-\.:]+  #цифры, закрывающие парные символы, дефис для составных слов, дата, время, IP-адреса, URL типа www.ya.ru:80!
3319 |                             | \p{L}+        #буквы
3320 |                             | \xe2\x80\x9d  #закрывающие кавычки
3321 |                             | \xe2\x80\x99  #закрывающие кавычки
3322 | 							| \xe2\x80\x9c  #закрывающие кавычки
3323 | 							| \xc2\xbb      #закрывающие кавычки
3324 |                           )+
3325 |                         /suxSX', $right, $m);
3326 | 			#d($m);
3327 | 			$right = isset($m[0]) ? rtrim($m[0], '.-') : '';
3328 | 			$return = $left . $right;
3329 | 			if (strlen($return) !== strlen($s)) $return .= $continue;
3330 | 		}
3331 | 		if (self::strlen($s) - self::strlen($return) < $tail_min_length) return $s;
3332 | 
3333 | 		$is_cutted = true;
3334 | 		return $return;
3335 | 	}
3336 | 
3337 | 	/**
3338 | 	 * Implementation str_split() function for UTF-8 encoding string.
3339 | 	 *
3340 | 	 * @param   string|null      $s
3341 | 	 * @param   int|null|digit   $length
3342 | 	 * @return  array|bool|null  Returns FALSE if error occurred
3343 | 	 */
3344 | 	public static function str_split($s, $length = null)
3345 | 	{
3346 | 		if (! ReflectionTypeHint::isValid()) return false;
3347 | 		if (is_null($s)) return $s;
3348 | 
3349 | 		$length = ($length === null) ? 1 : intval($length);
3350 | 		if ($length < 1) return false;
3351 | 		#there are limits in regexp for {min,max}!
3352 | 		if (preg_match_all('~.~suSX', $s, $m) === false) return false;
3353 | 		if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return false;
3354 | 		if ($length === 1) $a = $m[0];
3355 | 		else
3356 | 		{
3357 | 			$a = array();
3358 | 			for ($i = 0, $c = count($m[0]); $i < $c; $i += $length) $a[] = implode('', array_slice($m[0], $i, $length));
3359 | 		}
3360 | 		return $a;
3361 | 	}
3362 | 
3363 | 	/**
3364 | 	 * Implementation strlen() function for UTF-8 encoding string.
3365 | 	 *
3366 | 	 * @param   string|null    $s
3367 | 	 * @return  int|bool|null  Returns FALSE if error occurred
3368 | 	 */
3369 | 	public static function strlen($s)
3370 | 	{
3371 | 		if (! ReflectionTypeHint::isValid()) return false;
3372 | 		if (is_null($s)) return $s;
3373 | 
3374 | 		//since PHP-5.3.x mb_strlen() faster then strlen(utf8_decode())
3375 | 		if (function_exists('mb_strlen')) return mb_strlen($s, 'utf-8');
3376 | 
3377 | 		/*
3378 |           utf8_decode() converts characters that are not in ISO-8859-1 to '?', which, for the purpose of counting, is quite alright.
3379 |           It's much faster than iconv_strlen()
3380 |           Note: this function does not count bad UTF-8 bytes in the string - these are simply ignored
3381 | 		*/
3382 | 		return strlen(utf8_decode($s));
3383 | 
3384 | 		/*
3385 |         #slowly then strlen(utf8_decode())
3386 |         if (function_exists('iconv_strlen')) return iconv_strlen($s, 'utf-8');
3387 | 
3388 |         #Do not count UTF-8 continuation bytes
3389 |         #return strlen(preg_replace('/[\x80-\xBF]/sSX', '', $s));
3390 | 
3391 |         #slowly then strlen(utf8_decode())
3392 |         preg_match_all('~.~suSX', $str, $m);
3393 |         return count($m[0]);
3394 | 
3395 |         #slowly then preg_match_all() + count()
3396 |         $n = 0;
3397 |         for ($i = 0, $len = strlen($s); $i < $len; $i++)
3398 |         {
3399 |             $c = ord(substr($s, $i, 1));
3400 |             if ($c < 0x80) $n++;                 #single-byte (0xxxxxx)
3401 |             elseif (($c & 0xC0) == 0xC0) $n++;   #multi-byte starting byte (11xxxxxx)
3402 |         }
3403 |         return $n;
3404 | 		*/
3405 | 	}
3406 | 
3407 | 	/**
3408 | 	 * Implementation strpos() function for UTF-8 encoding string
3409 | 	 *
3410 | 	 * @param   string|null    $s       The entire string
3411 | 	 * @param   string|int     $needle  The searched substring
3412 | 	 * @param   int|null       $offset  The optional offset parameter specifies the position from which the search should be performed
3413 | 	 * @return  int|bool|null           Returns the numeric position of the first occurrence of needle in haystack.
3414 | 	 *                                  If needle is not found, will return FALSE.
3415 | 	 */
3416 | 	public static function strpos($s, $needle, $offset = null)
3417 | 	{
3418 | 		if (! ReflectionTypeHint::isValid()) return false;
3419 | 		if (is_null($s)) return $s;
3420 | 
3421 | 		if ($offset === null || $offset < 0) $offset = 0;
3422 | 		if (function_exists('mb_strpos')) return mb_strpos($s, $needle, $offset, 'utf-8');
3423 | 		#iconv_strpos() deprecated, because slowly than self::strlen(substr())
3424 | 		#if (function_exists('iconv_strpos')) return iconv_strpos($s, $needle, $offset, 'utf-8');
3425 | 		$byte_pos = $offset;
3426 | 		do if (($byte_pos = strpos($s, $needle, $byte_pos)) === false) return false;
3427 | 		while (($char_pos = self::strlen(substr($s, 0, $byte_pos++))) < $offset);
3428 | 		return $char_pos;
3429 | 	}
3430 | 
3431 | 	/**
3432 | 	 * Find position of first occurrence of a case-insensitive string.
3433 | 	 *
3434 | 	 * @param   string|null    $s       The entire string
3435 | 	 * @param   string|int     $needle  The searched substring
3436 | 	 * @param   int|null       $offset  The optional offset parameter specifies the position from which the search should be performed
3437 | 	 * @return  int|bool|null           Returns the numeric position of the first occurrence of needle in haystack.
3438 | 	 *                                  If needle is not found, will return FALSE.
3439 | 	 */
3440 | 	public static function stripos($s, $needle, $offset = null)
3441 | 	{
3442 | 		if (! ReflectionTypeHint::isValid()) return false;
3443 | 		if (is_null($s)) return $s;
3444 | 
3445 | 		if ($offset === null || $offset < 0) $offset = 0;
3446 | 		if (function_exists('mb_stripos')) return mb_stripos($s, $needle, $offset, 'utf-8');
3447 | 
3448 | 		#optimization block (speed improve)
3449 | 		#{{{
3450 | 		$ascii_int = intval(self::is_ascii($s)) + intval(self::is_ascii($needle));
3451 | 		if ($ascii_int === 1) return false;
3452 | 		if ($ascii_int === 2) return stripos($s, $needle, $offset);
3453 | 		#}}}
3454 | 
3455 | 		$s = self::convert_case($s, CASE_LOWER, false);
3456 | 		if ($s === false) return false;
3457 | 		$needle = self::convert_case($needle, CASE_LOWER, false);
3458 | 		if ($needle === false) return false;
3459 | 		return self::strpos($s, $needle, $offset);
3460 | 	}
3461 | 
3462 | 	/**
3463 | 	 * Implementation strrev() function for UTF-8 encoding string
3464 | 	 *
3465 | 	 * @param   string|null       $s
3466 | 	 * @return  string|bool|null  Returns FALSE if error occurred
3467 | 	 */
3468 | 	public static function strrev($s)
3469 | 	{
3470 | 		if (! ReflectionTypeHint::isValid()) return false;
3471 | 		if (is_null($s)) return $s;
3472 | 
3473 | 		if (0) #TODO test speed
3474 | 		{
3475 | 			$s = self::_convert($s, 'UTF-8', 'UTF-32');
3476 | 			if (! is_string($s)) return false;
3477 | 			$s = implode('', array_reverse(str_split($s, 4)));
3478 | 			return self::_convert($s, 'UTF-32', 'UTF-8');
3479 | 		}
3480 | 
3481 | 		if (! is_array($a = self::str_split($s))) return false;
3482 | 		return implode('', array_reverse($a));
3483 | 	}
3484 | 
3485 | 	/**
3486 | 	 * Implementation substr() function for UTF-8 encoding string.
3487 | 	 *
3488 | 	 * @link     http://www.w3.org/International/questions/qa-forms-utf-8.html
3489 | 	 * @param    string|null       $s
3490 | 	 * @param    int|digit         $offset
3491 | 	 * @param    int|null|digit    $length
3492 | 	 * @return   string|bool|null             Returns FALSE if error occurred
3493 | 	 */
3494 | 	public static function substr($s, $offset, $length = null)
3495 | 	{
3496 | 		if (! ReflectionTypeHint::isValid()) return false;
3497 | 		if (is_null($s)) return $s;
3498 | 
3499 | 		#since PHP-5.3.x mb_substr() faster then iconv_substr()
3500 | 		if (function_exists('mb_substr'))
3501 | 		{
3502 | 			if ($length === null) $length = self::strlen($s);
3503 | 			return mb_substr($s, $offset, $length, 'utf-8');
3504 | 		}
3505 | 		if (function_exists('iconv_substr'))
3506 | 		{
3507 | 			if ($length === null) $length = self::strlen($s);
3508 | 			return iconv_substr($s, $offset, $length, 'utf-8');
3509 | 		}
3510 | 
3511 | 		static $_s = null;
3512 | 		static $_a = null;
3513 | 
3514 | 		if ($_s !== $s) $_a = self::str_split($_s = $s);
3515 | 		if (! is_array($_a)) return false;
3516 | 		if ($length !== null) $a = array_slice($_a, $offset, $length);
3517 | 		else                  $a = array_slice($_a, $offset);
3518 | 		return implode('', $a);
3519 | 	}
3520 | 
3521 | 	/**
3522 | 	 * Implementation substr_replace() function for UTF-8 encoding string.
3523 | 	 *
3524 | 	 * @param   string|null       $s
3525 | 	 * @param   string|int        $replacement
3526 | 	 * @param   int|digit         $start
3527 | 	 * @param   int|null          $length
3528 | 	 * @return  string|bool|null  Returns FALSE if error occurred
3529 | 	 */
3530 | 	public static function substr_replace($s, $replacement, $start, $length = null)
3531 | 	{
3532 | 		if (! ReflectionTypeHint::isValid()) return false;
3533 | 		if (is_null($s)) return $s;
3534 | 
3535 | 		if (! is_array($a = self::str_split($s))) return false;
3536 | 		array_splice($a, $start, $length, $replacement);
3537 | 		return implode('', $a);
3538 | 	}
3539 | 
3540 | 	/**
3541 | 	 * Implementation ucfirst() function for UTF-8 encoding string.
3542 | 	 * Преобразует первый символ строки в кодировке UTF-8 в верхний регистр.
3543 | 	 *
3544 | 	 * @param   string|null       $s
3545 | 	 * @param   bool              $is_other_to_lowercase  остальные символы преобразуются в нижний регистр?
3546 | 	 * @return  string|bool|null  Returns FALSE if error occurred
3547 | 	 */
3548 | 	public static function ucfirst($s, $is_other_to_lowercase = true)
3549 | 	{
3550 | 		if (! ReflectionTypeHint::isValid()) return false;
3551 | 		if (is_null($s)) return $s;
3552 | 
3553 | 		if ($s === '' || ! is_string($s)) return $s;
3554 | 		if (! preg_match('/^(.)(.*)$/suSX', $s, $m)) return false;
3555 | 		return self::uppercase($m[1]) . ($is_other_to_lowercase ? self::lowercase($m[2]) : $m[2]);
3556 | 	}
3557 | 
3558 | 	/**
3559 | 	 * Implementation ucwords() function for UTF-8 encoding string.
3560 | 	 * Преобразует в верхний регистр первый символ каждого слова в строке в кодировке UTF-8,
3561 | 	 * остальные символы каждого слова преобразуются в нижний регистр.
3562 | 	 *
3563 | 	 * @param   string|null       $s
3564 | 	 * @param   bool              $is_other_to_lowercase  остальные символы преобразуются в нижний регистр?
3565 | 	 * @param   string            $spaces_re
3566 | 	 * @return  string|bool|null  Returns FALSE if error occurred
3567 | 	 */
3568 | 	public static function ucwords($s, $is_other_to_lowercase = true, $spaces_re = '~([\pZ\s]+)~suSX') #\pXps is POSIX space: property Z or tab, NL, VT, FF, CR
3569 | 	{
3570 | 		if (! ReflectionTypeHint::isValid()) return false;
3571 | 		if (is_null($s)) return $s;
3572 | 
3573 | 		$words = preg_split($spaces_re, $s, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
3574 | 		foreach ($words as $k => $word)
3575 | 		{
3576 | 			$words[$k] = self::ucfirst($word, $is_other_to_lowercase = true);
3577 | 			if ($words[$k] === false) return false;
3578 | 		}
3579 | 		return implode('', $words);
3580 | 	}
3581 | 
3582 | 	/**
3583 | 	 * Decodes a string in the format %uXXXX or %u{XXXXXX} in the UTF-8 string.
3584 | 	 *
3585 | 	 * Используется для декодирования данных типа "%u0442%u0435%u0441%u0442",
3586 | 	 * закодированных устаревшей функцией javascript://encode().
3587 | 	 * Рекомендуется использовать функцию javascript://encodeURIComponent().
3588 | 	 *
3589 | 	 * NOTICE
3590 | 	 * Устаревший формат %uXXXX позволяет использовать юникод только из диапазона UCS-2, т.е. от U+0 до U+FFFF
3591 | 	 *
3592 | 	 * @param   scalar|array|null  $data
3593 | 	 * @param   bool               $is_rawurlencode
3594 | 	 * @return  scalar|array|null  Returns FALSE if error occurred
3595 | 	 */
3596 | 	public static function unescape($data, $is_rawurlencode = false)
3597 | 	{
3598 | 		if (! ReflectionTypeHint::isValid()) return false;
3599 | 		if (is_array($data))
3600 | 		{
3601 | 			$d = array();
3602 | 			foreach ($data as $k => &$v)
3603 | 			{
3604 | 				$k = self::unescape($k, $is_rawurlencode);
3605 | 				if ($k === false) return false;
3606 | 				$d[$k] = self::unescape($v, $is_rawurlencode);
3607 | 				if ($d[$k] === false && ! is_bool($v)) return false;
3608 | 			}
3609 | 			return $d;
3610 | 		}
3611 | 		if (is_string($data))
3612 | 		{
3613 | 			if (strpos($data, '%u') === false) return $data; #use strpos() for speed improving
3614 | 			return preg_replace_callback('/%u(  [\da-fA-F]{4}+          #%uXXXX     only UCS-2
3615 |                                               | \{ [\da-fA-F]{1,6}+ \}  #%u{XXXXXX} extended form for all UNICODE charts
3616 |                                              )
3617 |                                           /sxSX',
3618 | 											function (array $m) use ($is_rawurlencode)
3619 | 											{
3620 | 												$codepoint = hexdec(trim($m[1], '{}'));
3621 | 												$char = self::chr($codepoint);
3622 | 												return $is_rawurlencode ? rawurlencode($char) : $char;
3623 | 											},
3624 | 											$data);
3625 | 		}
3626 | 		if (is_scalar($data) || is_null($data)) return $data;  #~ null, integer, float, boolean
3627 | 		return false; #object or resource
3628 | 	}
3629 | 
3630 | 	/**
3631 | 	 * 1) Corrects the global arrays $_GET, $_POST, $_COOKIE, $_REQUEST
3632 | 	 *    decoded values ​​in the format %uXXXX and %u{XXXXXX}, encoded,
3633 | 	 *    for example, through an outdated javascript function escape().
3634 | 	 *    Standard PHP5 cannot do it.
3635 | 	 * 2) If in the HTTP_COOKIE there are parameters with the same name,
3636 | 	 *    takes the last value, not the first, as in the QUERY_STRING.
3637 | 	 * 3) Creates an array of $_POST for non-standard Content-Type, for example, "Content-Type: application/octet-stream".
3638 | 	 *    Standard PHP5 creates an array for "Content-Type: application/x-www-form-urlencoded" and "Content-Type: multipart/form-data".
3639 | 	 *
3640 | 	 * Сессии, куки и независимая авторизация на поддоменах.
3641 | 	 *
3642 | 	 * ПРИМЕР 1
3643 | 	 * У рабочего сайта http://domain.com появились поддомены.
3644 | 	 * Для кроссдоменной авторизации через механизм сессий имя хоста для COOKIE было изменено с "domain.com" на ".domain.com"
3645 | 	 * В результате авторизация не работает.
3646 | 	 * Помогает очистка COOKIE, но их принудительная очистка на тысячах пользовательских компьютеров проблематична.
3647 | 	 * Проблема в следующем: если в HTTP_COOKIE есть параметры с одинаковым именем, то берётся последнее значение,
3648 | 	 * а не первое, как в QUERY_STRING.
3649 | 	 * Более подробное описание:
3650 | 	 * PHP не правильно (?) обрабатывает заголовок HTTP_COOKIE, если там встречаются параметры с одинаковым именем, но разными значениями.
3651 | 	 * Пример запроса HTTP-заголовка клиентом: "Cookie: sid=chpgs2fiak-330mzqza; sid=cmz5tnp5zz-xlbbgqp"
3652 | 	 * В этом случае сервер берёт первое значение, а не последнее.
3653 | 	 * Хотя если в QUERY_STRING есть такая ситуация, всегда берётся последний параметр.
3654 | 	 * В HTTP_COOKIE два параметра с одинаковым именем могут появиться, если отправить клиенту следующие HTTP-заголовки:
3655 | 	 * "Set-Cookie: sid=chpgs2fiak-330mzqza; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=domain.com"  (только domain.com)
3656 | 	 * "Set-Cookie: sid=cmz6uqorzv-1bn35110; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=.domain.com" (domain.com и все его поддомены)
3657 | 	 * Решение: поменять имя сессии.
3658 | 	 *
3659 | 	 * ПРИМЕР 2
3660 | 	 * Есть рабочие сайты: http://domain.com (основной), http://admin.domain.com (админка),
3661 | 	 * http://sub1.domain.com (подпроект 1), http://sub2.domain.com, (подпроект 2).
3662 | 	 * Так же имеется сервер разработки http://dev.domain.com, на котором м. б. свои поддомены.
3663 | 	 * Требуется сделать независимую кросс-доменную авторизацию для http://*.domain.com и http://*.dev.domain.com.
3664 | 	 * Для сохранения статуса авторизации будем использовать сессию, имя и значение которой пишется в COOKIE.
3665 | 	 * Т. к. домены http://*.dev.domain.com имеют пересечение с доменами http://*.domain.com,
3666 | 	 * для независимой авторизации	нужно использовать разные имена сессий.
3667 | 	 * Пример HTTP заголовков ответа сервера:
3668 | 	 * "Set-Cookie: sid=chpgs2fiak-330mzqza; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=.domain.com" (.domain.com и все его поддомены)
3669 | 	 * "Set-Cookie: sid.dev=cmz6uqorzv-1bn35110; expires=Thu, 15 Oct 2009 14:23:42 GMT; path=/; domain=.dev.domain.com" (dev.domain.com и все его поддомены)
3670 | 	 *
3671 | 	 * @link    http://tools.ietf.org/html/rfc2965  RFC 2965 - HTTP State Management Mechanism
3672 | 	 * @return  void
3673 | 	 */
3674 | 	public static function unescape_request()
3675 | 	{
3676 | 		$fixed = false;
3677 |         #ATTENTION! HTTP_RAW_POST_DATA is only accessible when Content-Type of POST request is NOT default "application/x-www-form-urlencoded"!
3678 | 		$HTTP_RAW_POST_DATA = isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] === 'POST' ? (isset($GLOBALS['HTTP_RAW_POST_DATA']) ? $GLOBALS['HTTP_RAW_POST_DATA'] : @file_get_contents('php://input')) : null;
3679 | 		if (ini_get('always_populate_raw_post_data')) $GLOBALS['HTTP_RAW_POST_DATA'] = $HTTP_RAW_POST_DATA;
3680 | 		foreach (array( '_GET'    => isset($_SERVER['QUERY_STRING']) ? $_SERVER['QUERY_STRING'] : null,
3681 | 						'_POST'   => $HTTP_RAW_POST_DATA,
3682 | 						'_COOKIE' => isset($_SERVER['HTTP_COOKIE']) ? $_SERVER['HTTP_COOKIE'] : null,
3683 | 						) as $k => $v)
3684 | 		{
3685 | 			if (! is_string($v)) continue;
3686 | 			if ($k === '_COOKIE')
3687 | 			{
3688 | 				$v = preg_replace('/; *+/sSX', '&', $v);
3689 | 				unset($_COOKIE); #будем парсить HTTP_COOKIE сами, чтобы сделать обработку как у QUERY_STRING
3690 | 			}
3691 | 			if (strpos($v, '%u') !== false)
3692 | 			{
3693 | 				parse_str(self::unescape($v, $is_rawurlencode = true), $GLOBALS[$k]);
3694 | 				$fixed = true;
3695 | 				continue;
3696 | 			}
3697 | 			if (array_key_exists($k, $GLOBALS)) continue;
3698 | 			parse_str($v, $GLOBALS[$k]);
3699 | 			$fixed = true;
3700 | 		}
3701 | 		if ($fixed)
3702 | 		{
3703 | 			$_REQUEST =
3704 | 				(isset($_COOKIE) ? $_COOKIE : array()) +
3705 | 				(isset($_POST) ? $_POST : array()) +
3706 | 				(isset($_GET) ? $_GET : array());
3707 | 		}
3708 | 	}
3709 | 
3710 | 	/**
3711 | 	 * Calculates the height of the edit text in <textarea> html tag by value and width.
3712 | 	 *
3713 | 	 * В большинстве случаев будет корректно работать для моноширинных шрифтов.
3714 | 	 * Т.к. браузер переносит последнее слово, которое не умещается на строке,
3715 | 	 * на следующую строку, высота м.б. меньше ожидаемой.
3716 | 	 * Этот алгоритм явл. простым (и быстрым) и не отслеживает переносы слов.
3717 | 	 *
3718 | 	 * @param   string|null     $s         Текст
3719 | 	 * @param   int|digit       $cols      Ширина области редактирования (колонок)
3720 | 	 * @param   int|digit       $min_rows  Минимальное кол-во строк
3721 | 	 * @param   int|digit       $max_rows  Максимальное кол-во строк
3722 | 	 * @return  int|bool|null              Number of rows (lines)
3723 | 	 */
3724 | 	public static function textarea_rows($s, $cols, $min_rows = 3, $max_rows = 32)
3725 | 	{
3726 | 		if (! ReflectionTypeHint::isValid()) return false;
3727 | 		if (is_null($s)) return $s;
3728 | 
3729 | 		if (strlen($s) == 0) return $min_rows;  #speed improve
3730 | 		$rows = 0;
3731 | 		#utf8_decode() converts characters that are not in ISO-8859-1 to '?'
3732 | 		foreach (preg_split('/\r\n|[\r\n]/sSX', utf8_decode($s)) as $line)
3733 | 		{
3734 | 			$rows += ceil((strlen($line) + 1) / $cols);
3735 | 			if ($rows > $max_rows) return $max_rows;
3736 | 		}
3737 | 		return ($rows < $min_rows) ? $min_rows : $rows;
3738 | 	}
3739 | 
3740 | 	/**
3741 | 	 * @param   string|null       $s
3742 | 	 * @param   string|null       $charlist
3743 | 	 * @return  string|bool|null
3744 | 	 */
3745 | 	public static function ltrim($s, $charlist = null)
3746 | 	{
3747 | 		if (! ReflectionTypeHint::isValid()) return false;
3748 | 		if (is_null($s)) return $s;
3749 | 		if ($charlist === null || self::is_ascii($charlist)) return ltrim($s);
3750 | 		return preg_replace('~^[' . self::_preg_quote_class($charlist, '~') . ']+~suSX', '', $s);
3751 | 	}
3752 | 
3753 | 	/**
3754 | 	 * @param   string|null       $s
3755 | 	 * @param   string|null       $charlist
3756 | 	 * @return  string|bool|null
3757 | 	 */
3758 | 	public static function rtrim($s, $charlist = null)
3759 | 	{
3760 | 		if (! ReflectionTypeHint::isValid()) return false;
3761 | 		if (is_null($s)) return $s;
3762 | 		if ($charlist === null || self::is_ascii($charlist)) return rtrim($s);
3763 | 		return preg_replace('~[' . self::_preg_quote_class($charlist, '~') . ']+$~suSX', '', $s);
3764 | 	}
3765 | 
3766 | 	/**
3767 | 	 * @param   scalar|null  $s
3768 | 	 * @param   string|null  $charlist
3769 | 	 * @return  scalar|null
3770 | 	 */
3771 | 	public static function trim($s, $charlist = null)
3772 | 	{
3773 | 		if (! ReflectionTypeHint::isValid()) return false;
3774 | 		if (is_null($s)) return $s;
3775 | 		if ($charlist === null || self::is_ascii($charlist)) return trim($s);
3776 | 		$charlist_re = self::_preg_quote_class($charlist, '~');
3777 | 		$s = preg_replace('~^[' . $charlist_re . ']+~suSX', '', $s);
3778 | 		return preg_replace('~[' . $charlist_re . ']+$~suSX', '', $s);
3779 | 	}
3780 | 
3781 | 	private static function _preg_quote_class($charlist, $delimiter = null)
3782 | 	{
3783 | 		#return preg_quote($charlist, $delimiter); #DEPRECATED
3784 | 		$quote_table = array(
3785 | 			'\\' => '\\\\',
3786 | 			'-'  => '\-',
3787 | 			']'  => '\]',
3788 | 		);
3789 | 		if (is_string($delimiter)) $quote_table[$delimiter] = '\\' . $delimiter;
3790 | 		return strtr($charlist, $quote_table);
3791 | 	}
3792 | 
3793 | 	/**
3794 | 	 * @param   string|null       $s
3795 | 	 * @param   int|digit         $length
3796 | 	 * @param   string            $pad_str
3797 | 	 * @param   int               $type     STR_PAD_LEFT, STR_PAD_RIGHT or STR_PAD_BOTH
3798 | 	 * @return  string|bool|null
3799 | 	 */
3800 | 	public static function str_pad($s, $length, $pad_str = ' ', $type = STR_PAD_RIGHT)
3801 | 	{
3802 | 		if (! ReflectionTypeHint::isValid()) return false;
3803 | 		if (is_null($s)) return $s;
3804 | 
3805 | 		$input_len = self::strlen($s);
3806 | 		if ($length <= $input_len) return $s;
3807 | 
3808 | 		$pad_str_len = self::strlen($pad_str);
3809 | 		$pad_len = $length - $input_len;
3810 | 
3811 | 		if ($type == STR_PAD_RIGHT)
3812 | 		{
3813 | 			$repeat_num = ceil($pad_len / $pad_str_len);
3814 | 			return self::substr($s . str_repeat($pad_str, $repeat_num), 0, $length);
3815 | 		}
3816 | 
3817 | 		if ($type == STR_PAD_LEFT)
3818 | 		{
3819 | 			$repeat_num = ceil($pad_len / $pad_str_len);
3820 | 			return self::substr(str_repeat($pad_str, $repeat_num), 0, intval(floor($pad_len))) . $s;
3821 | 		}
3822 | 
3823 | 		if ($type == STR_PAD_BOTH)
3824 | 		{
3825 | 			$pad_len /= 2;
3826 | 			$pad_amount_left  = intval(floor($pad_len));
3827 | 			$pad_amount_right = intval(ceil($pad_len));
3828 | 			$repeat_times_left  = ceil($pad_amount_left  / $pad_str_len);
3829 | 			$repeat_times_right = ceil($pad_amount_right / $pad_str_len);
3830 | 
3831 | 			$padding_left  = self::substr(str_repeat($pad_str, $repeat_times_left),  0, $pad_amount_left);
3832 | 			$padding_right = self::substr(str_repeat($pad_str, $repeat_times_right), 0, $pad_amount_right);
3833 | 			return $padding_left . $s . $padding_right;
3834 | 		}
3835 | 
3836 | 		trigger_error('Parameter 4 should be a constant of STR_PAD_RIGHT, STR_PAD_LEFT or STR_PAD_BOTH!', E_USER_WARNING);
3837 | 		return false;
3838 | 	}
3839 | 
3840 | 	/**
3841 | 	 * @param   string    $str
3842 | 	 * @param   string    $mask
3843 | 	 * @param   int|null  $start
3844 | 	 * @param   int|null  $length
3845 | 	 * @return  int|bool
3846 | 	 */
3847 | 	public static function strspn($str, $mask, $start = null, $length = null)
3848 | 	{
3849 | 		if (! ReflectionTypeHint::isValid()) return false;
3850 | 		#if (self::is_ascii($str) && self::is_ascii($mask)) return strspn($str, $mask, $start, $length);
3851 | 		if ($start !== null || $length !== null) $str = self::substr($str, $start, $length);
3852 | 		if (preg_match('~^[' . preg_quote($mask, '~') . ']+~uSX', $str, $m)) self::strlen($m[0]);
3853 | 		return 0;
3854 | 	}
3855 | 
3856 | 	/**
3857 | 	 * Recode the text files in a specified folder in the UTF-8
3858 | 	 * In the processing skipped binary files, files encoded in UTF-8, files that could not convert.
3859 | 	 * So method works reliably enough.
3860 | 	 *
3861 | 	 *
3862 | 	 * @param   string       $dir             Директория для сканирования
3863 | 	 * @param   string|null  $files_re        Регул. выражение для шаблона имён файлов,
3864 | 	 *                                        например: '~\.(?:txt|sql|php|pl|py|sh|tpl|xml|xsl|html|xhtml|phtml|htm|js|json|css|conf|cfg|ini|htaccess)$~sSX'
3865 | 	 * @param   bool         $is_recursive    Обрабатывать вложенные папки и файлы?
3866 | 	 * @param   string       $charset         Исходная кодировка
3867 | 	 * @param   string|null  $dirs_ignore_re  Регул. выражение для исключения папок из обработки
3868 | 	 *                                        например: '~^(?:cache|images?|photos?|fonts?|img|ico|\.svn|\.hg|\.cvs)$~siSX'
3869 | 	 * @param   bool         $is_echo         Печать имён обработанных файлов и статус обработки в выходной поток?
3870 | 	 * @param   bool         $is_simulate     Сымитировать работу без реальной перезаписи файлов?
3871 | 	 * @return  int|bool                      Возвращает кол-во перекодированных файлов
3872 | 	 *                                        Returns FALSE if error occurred
3873 | 	 */
3874 | 	public static function convert_files_from(
3875 | 		$dir,
3876 | 		$files_re = null,
3877 | 		$is_recursive = true,
3878 | 		$charset = 'cp1251',
3879 | 		$dirs_ignore_re = null,
3880 | 		$is_echo = false,
3881 | 		$is_simulate = false)
3882 | 	{
3883 | 		if (! ReflectionTypeHint::isValid()) return false;
3884 | 
3885 | 		$dh = opendir($dir);
3886 | 		if (! is_resource($dh)) return false;
3887 | 		$counter = 0;
3888 | 		while (($name = readdir($dh)) !== false)
3889 | 		{
3890 | 			if ($name == '.' || $name == '..') continue;
3891 | 			$file = $dir . '/' . $name;
3892 | 			if (is_file($file))
3893 | 			{
3894 | 				if (is_string($files_re) && ! preg_match($files_re, $name)) continue;
3895 | 				if ($is_echo) echo $file;
3896 | 				$s = @file_get_contents($file);
3897 | 				if (! is_string($s))
3898 | 				{
3899 | 					if ($is_echo) echo '  Error to reading' . PHP_EOL;
3900 | 					return false;
3901 | 				}
3902 | 				if (self::is_utf8($s))
3903 | 				{
3904 | 					if ($is_echo) echo '  UTF-8' . PHP_EOL;
3905 | 					continue;
3906 | 				}
3907 | 				$s = self::_convert($s, $charset, 'UTF-8');
3908 | 				#игнорируем ошибки при попытке перекодировать бинарные файлы
3909 | 				if (! is_string($s) || ! self::is_utf8($s))
3910 | 				{
3911 | 					if ($is_echo) echo '  Binary' . PHP_EOL;
3912 | 					continue;
3913 | 				}
3914 | 
3915 | 				$ext = strtolower(pathinfo($name, PATHINFO_EXTENSION));
3916 | 				if ($ext === 'htm' || $ext === 'html' || $ext === 'xhtml' || $ext === 'phtml' || $ext === 'tpl')
3917 | 				{
3918 | 					$s = preg_replace('~(<meta .+? content="text/html; [\x00-\x20]+ charset=) #1
3919 | 											[-a-zA-Z\d]+
3920 | 											(" [^>]* >)  #2
3921 | 										~sixSX', '$1utf-8$2', $s);
3922 | 				}
3923 | 				if ($ext === 'xml' || $ext === 'xsl' || $ext === 'tpl')
3924 | 				{
3925 | 					$s = preg_replace('~(<\?xml .+? encoding=") #1
3926 | 											[-a-zA-Z\d]+
3927 | 											(" .*? \?>)         #2
3928 | 										~sixSX', '$1utf-8$2', $s);
3929 | 				}
3930 | 
3931 | 				if (! $is_simulate)
3932 | 				{
3933 | 					$bytes = @file_put_contents($file, $s);
3934 | 					if ($bytes === false)
3935 | 					{
3936 | 						if ($is_echo) echo '  Error to writing' . PHP_EOL;
3937 | 						return false;
3938 | 					}
3939 | 				}
3940 | 				if ($is_echo) echo '  ' . $charset . ' -> UTF-8' . PHP_EOL;
3941 | 				$counter++;
3942 | 			}
3943 | 			elseif ($is_recursive && is_dir($file))
3944 | 			{
3945 | 				if (! is_string($dirs_ignore_re) || ! preg_match($dirs_ignore_re, $name))
3946 | 				{
3947 | 					$c = self::convert_files_from($file, $files_re, $is_recursive, $charset, $dirs_ignore_re, $is_echo, $is_simulate);
3948 | 					if ($c === false) return false;
3949 | 					$counter += $c;
3950 | 				}
3951 | 			}
3952 | 		}
3953 | 		closedir($dh);
3954 | 		return $counter;
3955 | 	}
3956 | 
3957 | 	/**
3958 | 	 *
3959 | 	 * @param   int|string  $low
3960 | 	 * @param   int|string  $high
3961 | 	 * @param   int         $step
3962 | 	 * @return  array|bool         Returns FALSE if error occurred
3963 | 	 */
3964 | 	public static function range($low, $high, $step = 1)
3965 | 	{
3966 | 		if (! ReflectionTypeHint::isValid()) return false;
3967 | 		if (is_int($low) || is_int($high)) return range($low, $high, $step);  #speed improve
3968 | 		$low_cp  = self::ord($low);
3969 | 		$high_cp = self::ord($high);
3970 | 		if ($low_cp === false || $high_cp === false) return false;
3971 | 		$a = range($low_cp, $high_cp, $step);
3972 | 		return array_map(array('self', 'chr'), $a);
3973 | 	}
3974 | 
3975 | 	/**
3976 | 	 *
3977 | 	 * @param   string|null       $s
3978 | 	 * @param   string|array      $from
3979 | 	 * @param   string|null       $to
3980 | 	 * @return  string|bool|null         Returns FALSE if error occurred
3981 | 	 */
3982 | 	public static function strtr($s, $from, $to = null)
3983 | 	{
3984 | 		if (! ReflectionTypeHint::isValid()) return false;
3985 | 		if (is_null($s)) return $s;
3986 | 		if (is_array($from)) return strtr($s, $from); #speed improve
3987 | 		$keys   = self::str_split($from);
3988 | 		$values = self::str_split($to);
3989 | 		if ($keys === false || $values === false) return false;
3990 | 		$table = array_combine($keys, $values);
3991 | 		if (! is_array($table)) return false;
3992 | 		return strtr($s, $table);
3993 | 	}
3994 | 
3995 | 	public static function tests()
3996 | 	{
3997 | 		assert_options(ASSERT_ACTIVE,   true);
3998 | 		assert_options(ASSERT_BAIL,     true);
3999 | 		assert_options(ASSERT_WARNING,  true);
4000 | 		assert_options(ASSERT_QUIET_EVAL, false);
4001 | 		$a = array(
4002 | 			'self::html_entity_decode("&quot;&amp;&lt;&gt;", true) === "\"&<>"',
4003 | 			'self::html_entity_decode("&quot;&amp;&lt;&gt;", false) === "&quot;&amp;&lt;&gt;"',
4004 | 			'self::html_entity_decode("&amp;amp;", true) === "&amp;"',
4005 | 			'self::html_entity_decode("&amp;amp;", false) === "&amp;amp;"',
4006 | 			'self::html_entity_decode("&#034;", true) === "\""',
4007 | 			'self::html_entity_decode("&#034;", false) === "&quot;"',
4008 | 			'self::html_entity_decode("&#039;", true) === "\'"',
4009 | 			'self::html_entity_decode("&#039;", false) === "\'"',
4010 | 			'self::html_entity_decode("&#x22;", true) === "\""',
4011 | 			'self::html_entity_decode("&#x22;", false) === "&quot;"',
4012 | 
4013 | 			'self::array_change_key_case(array("АБВГД" => "АБВГД"), CASE_LOWER) === array("абвгд" => "АБВГД")',
4014 | 			'self::array_change_key_case(array("абвгд" => "абвгд"), CASE_UPPER) === array("АБВГД" => "абвгд")',
4015 | 
4016 | 			'self::blocks_check("Яндекс", "Cyrillic") === true',
4017 | 			'self::blocks_check("Google", "Basic Latin") === true',
4018 | 			'self::blocks_check("Google & Яндекс", array("Basic Latin", "Cyrillic")) === true',
4019 | 			'self::blocks_check("Ё-моё, Yandex!", array(array(0x20, 0x7E),    #[\x20-\x7E]
4020 | 														array(0x0410, 0x044F), #[A-Яa-я]
4021 | 														0x0401, #russian yo (Ё)
4022 | 														0x0451, #russian ye (ё)
4023 | 													)) === true',
4024 | 
4025 | 			'self::chunk_split("абвг", 2) === "аб\r\nвг"',
4026 | 			'self::chunk_split("абвг", 2, "|") === "аб|вг"',
4027 | 
4028 | 			'self::lowercase("1234-ABCD-АБВГ") === "1234-abcd-абвг"',
4029 | 			'self::lowercase(array("1234-ABCD-АБВГ" => "1234-ABCD-АБВГ")) === array("1234-ABCD-АБВГ" => "1234-abcd-абвг")',
4030 | 			'self::uppercase("1234-abcd-абвг") === "1234-ABCD-АБВГ"',
4031 | 			'self::uppercase(array("1234-abcd-абвг" => "1234-abcd-абвг")) === array("1234-abcd-абвг" => "1234-ABCD-АБВГ")',
4032 | 
4033 | 			'self::convert_from(self::convert_to("123-ABC-abc-АБВ-абв", $charset = "cp1251"), $charset = "cp1251") === "123-ABC-abc-АБВ-абв"',
4034 | 
4035 | 			'self::diactrical_remove("вдох\xc2\xadно\xc2\xadве\xcc\x81\xc2\xadние") === "вдох\xc2\xadно\xc2\xadве\xc2\xadние"',
4036 | 			'self::diactrical_remove("вдох\xc2\xadно\xc2\xadве\xcc\x81\xc2\xadние", array("\xc2\xad")) === "вдохновение"',
4037 | 			'self::diactrical_remove("вдох\xc2\xadно\xc2\xadве\xcc\x81\xc2\xadние", array("\xc2\xad"), true, $restore_table) === "вдохновение"',
4038 | 			'self::diactrical_restore("вдохновение", $restore_table) === "вдох\xc2\xadно\xc2\xadве\xcc\x81\xc2\xadние"',
4039 | 
4040 | 			'self::is_utf8(file_get_contents(' . var_export(__FILE__, true) . ', true)) === true',
4041 | 			'self::is_utf8(file_get_contents(' . var_export(__FILE__, true) . ', false)) === true',
4042 | 			'self::is_ascii(file_get_contents(' . var_export(__FILE__, true) . ')) === false',
4043 | 
4044 | 			#range() uses ord() and chr()
4045 | 			'self::range("A", "D") === array("A", "B", "C", "D")',
4046 | 			'self::range("а", "г") === array("а", "б", "в", "г")',
4047 | 			'self::range(1, 3) === array(1, 2, 3)',
4048 | 
4049 | 			'"↔" === self::chr(self::ord("↔"))',
4050 | 			'"123-ABC-abc-АБВ-абв" === self::from_unicode(self::to_unicode("123-ABC-abc-АБВ-абв"))',
4051 | 			'self::strpos("123-ABC-abc-абв-АБВ-где", "АБВ") === 16',
4052 | 			'self::stripos("123-ABC-abc-абд-АБВ-где", "абв") === 16',
4053 | 			'self::strpos("123-ABC-abc", "АБВ") === false',
4054 | 			'self::strpos("123-АБВ-абв", "abc") === false',
4055 | 
4056 | 			'self::preg_quote_case_insensitive("123_слово_test") === "123_(с|С)(л|Л)(о|О)(в|В)(о|О)_[tT][eE][sS][tT]"',
4057 | 			'self::preg_quote_case_insensitive("123_test") === "(?i:123_test)"',
4058 | 
4059 | 			//'self::strlen(file_get_contents(' . var_export(__FILE__, true) . ', true))'
4060 | 		);
4061 | 		foreach ($a as $k => $v) if (! assert($v)) return false;
4062 | 
4063 | 		//$start_time = microtime(true);
4064 | 		//$s = file_get_contents(__FILE__);
4065 | 		//for ($i = 0; $i < 10; $i++) $r = self::html_entity_encode($s);
4066 | 		//$time = microtime(true) - $start_time;
4067 | 		//d($time, $r);
4068 | 
4069 | 		return true;
4070 | 	}
4071 | 
4072 | }


--------------------------------------------------------------------------------