├── .gitignore ├── tests ├── bootstrap.php └── URLifyTest.php ├── .travis.yml ├── phpunit.xml ├── INSTALL ├── scripts ├── filter.php ├── downcode.php └── transliterate.php ├── composer.json ├── LICENSE ├── README.md └── URLify.php /.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | composer.phar 3 | composer.lock 4 | .idea -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | tests 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | To install URLify, you can add it as a dependency ar by downloading the composer.phar executable. 2 | 3 | `$ curl -s http://getcomposer.org/installer | php` 4 | 5 | and run install 6 | 7 | `$ php composer.phar install` 8 | 9 | For more details, see http://getcomposer.org. 10 | 11 | and install URLify library: 12 | 13 | `$ composer require jbroadway/urlify` 14 | 15 | -------------------------------------------------------------------------------- /scripts/filter.php: -------------------------------------------------------------------------------- 1 | 2) { 8 | die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n"); 9 | } 10 | 11 | //Process the provided argument 12 | if($argc === 2) { 13 | $s = $argv[1]; 14 | //Or read from stdin if the argument wasn't present 15 | } else { 16 | $piped = true; 17 | $s = file_get_contents("php://stdin"); 18 | } 19 | 20 | echo URLify::filter ($s) . ($piped ? "\n" : ""); 21 | -------------------------------------------------------------------------------- /scripts/downcode.php: -------------------------------------------------------------------------------- 1 | 2) { 8 | die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n"); 9 | } 10 | 11 | //Process the provided argument 12 | if($argc === 2) { 13 | $s = $argv[1]; 14 | //Or read from stdin if the argument wasn't present 15 | } else { 16 | $piped = true; 17 | $s = file_get_contents("php://stdin"); 18 | } 19 | 20 | echo URLify::downcode ($s) . ($piped ? "\n" : ""); 21 | -------------------------------------------------------------------------------- /scripts/transliterate.php: -------------------------------------------------------------------------------- 1 | 2) { 8 | die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n"); 9 | } 10 | 11 | //Process the provided argument 12 | if($argc === 2) { 13 | $s = $argv[1]; 14 | //Or read from stdin if the argument wasn't present 15 | } else { 16 | $piped = true; 17 | $s = file_get_contents("php://stdin"); 18 | } 19 | 20 | echo URLify::transliterate($s) . ($piped ? "\n" : ""); 21 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jbroadway/urlify", 3 | "type": "library", 4 | "description": "PHP port of URLify.js from the Django project. Transliterates non-ascii characters for use in URLs.", 5 | "keywords": ["urlify","transliterate","translit","transliteration","url","encode","slug","link","iconv"], 6 | "homepage": "https://github.com/jbroadway/urlify", 7 | "license": "BSD", 8 | "authors": [ 9 | { 10 | "name": "Johnny Broadway", 11 | "email": "johnny@johnnybroadway.com", 12 | "homepage": "http://www.johnnybroadway.com/" 13 | } 14 | ], 15 | "require": { 16 | "php": ">=5.3.0" 17 | }, 18 | "autoload": { 19 | "psr-0": { "URLify": "" } 20 | }, 21 | "extra": { 22 | "branch-alias": { 23 | "dev-master": "1.0-dev" 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Django Software Foundation and individual contributors. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of Django nor the names of its contributors may be used 15 | to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # URLify for PHP 2 | 3 | A PHP port of [URLify.js](https://github.com/django/django/blob/master/django/contrib/admin/static/admin/js/urlify.js) 4 | from the Django project. Handles symbols from Latin languages as well as Arabic, Azerbaijani, Czech, German, Greek, Kazakh, 5 | Latvian, Lithuanian, Persian, Polish, Romanian, Bulgarian, Russian, Serbian, Turkish, Ukrainian and Vietnamese. Symbols it cannot 6 | transliterate it will simply omit. 7 | 8 | ## Usage: 9 | 10 | To generate slugs for URLs: 11 | 12 | ```php 13 | 22 | ``` 23 | 24 | To generate slugs for file names: 25 | 26 | ```php 27 | 33 | ``` 34 | 35 | 36 | To simply transliterate characters: 37 | 38 | ```php 39 | 53 | ``` 54 | 55 | To extend the character list: 56 | 57 | ```php 58 | '?', '®' => '(r)', '¼' => '1/4', 62 | '½' => '1/2', '¾' => '3/4', '¶' => 'P' 63 | )); 64 | 65 | echo URLify::downcode ('¿ ® ¼ ¼ ¾ ¶'); 66 | // "? (r) 1/2 1/2 3/4 P" 67 | 68 | ?> 69 | ``` 70 | 71 | To extend the list of words to remove: 72 | 73 | ```php 74 | 79 | ``` 80 | 81 | To prioritize a certain language map: 82 | 83 | ```php 84 | 93 | ``` 94 | Please note that the "ü" is transliterated to "ue" in the first case, whereas it results in a simple "u" in the latter. 95 | -------------------------------------------------------------------------------- /tests/URLifyTest.php: -------------------------------------------------------------------------------- 1 | assertEquals (' J\'etudie le francais ', URLify::downcode (' J\'étudie le français ')); 5 | $this->assertEquals ('Lo siento, no hablo espanol.', URLify::downcode ('Lo siento, no hablo español.')); 6 | $this->assertEquals ('F3PWS', URLify::downcode ('ΦΞΠΏΣ')); 7 | $this->assertEquals ('foo-bar', URLify::filter ('_foo_bar_')); 8 | } 9 | 10 | function test_filter () { 11 | $this->assertEquals ('jetudie-le-francais', URLify::filter (' J\'étudie le français ')); 12 | $this->assertEquals ('lo-siento-no-hablo-espanol', URLify::filter ('Lo siento, no hablo español.')); 13 | $this->assertEquals ('f3pws', URLify::filter ('ΦΞΠΏΣ')); 14 | $this->assertEquals ('', URLify::filter('大般若經')); 15 | $this->assertEquals ('test-.txt', URLify::filter('test-大般若經.txt', 60, "", $file_name = true)); 16 | $this->assertEquals ('yakrhy-ltoytr', URLify::filter('ياكرهي لتويتر')); 17 | $this->assertEquals ('saaat-25', URLify::filter('ساعت ۲۵')); 18 | $this->assertEquals ('foto.jpg', URLify::filter ('фото.jpg', 60, "", $file_name = true)); 19 | // priorization of language-specific maps 20 | $this->assertEquals ('aouaou', URLify::filter ('ÄÖÜäöü',60,"tr")); 21 | $this->assertEquals ('aeoeueaeoeue', URLify::filter ('ÄÖÜäöü',60,"de")); 22 | 23 | $this->assertEquals ('bobby-mcferrin-dont-worry-be-happy', URLify::filter ("Bobby McFerrin — Don't worry be happy",600,"en")); 24 | // test stripping and conversion of UTF-8 spaces 25 | $this->assertEquals ('test-mahito-mukai', URLify::filter('向井 真人test (Mahito Mukai)')); 26 | // Treat underscore as space 27 | $this->assertEquals ('text_with_underscore', URLify::filter('text_with_underscore', 60, "en", true, true, true, false)); 28 | } 29 | 30 | function test_add_chars () { 31 | $this->assertEquals ('¿ ® ¼ ¼ ¾ ¶', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶')); 32 | URLify::add_chars (array ( 33 | '¿' => '?', '®' => '(r)', '¼' => '1/4', 34 | '¼' => '1/2', '¾' => '3/4', '¶' => 'P' 35 | )); 36 | $this->assertEquals ('? (r) 1/2 1/2 3/4 P', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶')); 37 | } 38 | 39 | function test_remove_words () { 40 | $this->assertEquals ('foo-bar', URLify::filter ('foo bar')); 41 | URLify::remove_words (array ('foo', 'bar')); 42 | $this->assertEquals ('', URLify::filter ('foo bar')); 43 | } 44 | 45 | function test_many_rounds_with_unknown_language_code () { 46 | for ($i = 0; $i < 1000; $i++) { 47 | URLify::downcode ('Lo siento, no hablo español.',-1); 48 | } 49 | } 50 | 51 | function test_remove_words_disable () { 52 | URLify::remove_words (array ('foo', 'bar')); 53 | $this->assertEquals ('foo-bar', URLify::filter ('foo bar', 60, '', false, false)); 54 | } 55 | } 56 | 57 | ?> 58 | -------------------------------------------------------------------------------- /URLify.php: -------------------------------------------------------------------------------- 1 | array ( /* German */ 23 | 'Ä' => 'Ae', 'Ö' => 'Oe', 'Ü' => 'Ue', 'ä' => 'ae', 'ö' => 'oe', 'ü' => 'ue', 'ß' => 'ss', 24 | 'ẞ' => 'SS' 25 | ), 26 | 'latin' => array ( 27 | 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A','Ă' => 'A', 'Æ' => 'AE', 'Ç' => 28 | 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 29 | 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 30 | 'O', 'Ő' => 'O', 'Ø' => 'O', 'Œ' => 'OE' ,'Ș' => 'S','Ț' => 'T', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ű' => 'U', 31 | 'Ý' => 'Y', 'Þ' => 'TH', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 32 | 'a', 'å' => 'a', 'ă' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 33 | 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd', 'ñ' => 'n', 'ò' => 'o', 'ó' => 34 | 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ő' => 'o', 'ø' => 'o', 'œ' => 'oe', 'ș' => 's', 'ț' => 't', 'ù' => 'u', 'ú' => 'u', 35 | 'û' => 'u', 'ü' => 'u', 'ű' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y' 36 | ), 37 | 'latin_symbols' => array ( 38 | '©' => '(c)' 39 | ), 40 | 'el' => array ( /* Greek */ 41 | 'α' => 'a', 'β' => 'b', 'γ' => 'g', 'δ' => 'd', 'ε' => 'e', 'ζ' => 'z', 'η' => 'h', 'θ' => '8', 42 | 'ι' => 'i', 'κ' => 'k', 'λ' => 'l', 'μ' => 'm', 'ν' => 'n', 'ξ' => '3', 'ο' => 'o', 'π' => 'p', 43 | 'ρ' => 'r', 'σ' => 's', 'τ' => 't', 'υ' => 'y', 'φ' => 'f', 'χ' => 'x', 'ψ' => 'ps', 'ω' => 'w', 44 | 'ά' => 'a', 'έ' => 'e', 'ί' => 'i', 'ό' => 'o', 'ύ' => 'y', 'ή' => 'h', 'ώ' => 'w', 'ς' => 's', 45 | 'ϊ' => 'i', 'ΰ' => 'y', 'ϋ' => 'y', 'ΐ' => 'i', 46 | 'Α' => 'A', 'Β' => 'B', 'Γ' => 'G', 'Δ' => 'D', 'Ε' => 'E', 'Ζ' => 'Z', 'Η' => 'H', 'Θ' => '8', 47 | 'Ι' => 'I', 'Κ' => 'K', 'Λ' => 'L', 'Μ' => 'M', 'Ν' => 'N', 'Ξ' => '3', 'Ο' => 'O', 'Π' => 'P', 48 | 'Ρ' => 'R', 'Σ' => 'S', 'Τ' => 'T', 'Υ' => 'Y', 'Φ' => 'F', 'Χ' => 'X', 'Ψ' => 'PS', 'Ω' => 'W', 49 | 'Ά' => 'A', 'Έ' => 'E', 'Ί' => 'I', 'Ό' => 'O', 'Ύ' => 'Y', 'Ή' => 'H', 'Ώ' => 'W', 'Ϊ' => 'I', 50 | 'Ϋ' => 'Y' 51 | ), 52 | 'tr' => array ( /* Turkish */ 53 | 'ş' => 's', 'Ş' => 'S', 'ı' => 'i', 'İ' => 'I', 'ç' => 'c', 'Ç' => 'C', 'ü' => 'u', 'Ü' => 'U', 54 | 'ö' => 'o', 'Ö' => 'O', 'ğ' => 'g', 'Ğ' => 'G' 55 | ), 56 | 'bg' => array( /* Bulgarian */ 57 | 'Щ' => 'Sht', 'Ш' => 'Sh', 'Ч' => 'Ch', 'Ц' => 'C', 'Ю' => 'Yu', 'Я' => 'Ya', 58 | 'Ж' => 'J', 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', 59 | 'Е' => 'E', 'З' => 'Z', 'И' => 'I', 'Й' => 'Y', 'К' => 'K', 'Л' => 'L', 60 | 'М' => 'M', 'Н' => 'N', 'О' => 'O', 'П' => 'P', 'Р' => 'R', 'С' => 'S', 61 | 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ь' => '', 'Ъ' => 'A', 62 | 'щ' => 'sht', 'ш' => 'sh', 'ч' => 'ch', 'ц' => 'c', 'ю' => 'yu', 'я' => 'ya', 63 | 'ж' => 'j', 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 64 | 'е' => 'e', 'з' => 'z', 'и' => 'i', 'й' => 'y', 'к' => 'k', 'л' => 'l', 65 | 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's', 66 | 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ь' => '', 'ъ' => 'a' 67 | ), 68 | 'ru' => array ( /* Russian */ 69 | 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'yo', 'ж' => 'zh', 70 | 'з' => 'z', 'и' => 'i', 'й' => 'i', 'к' => 'k', 'л' => 'l', 'м' => 'm', 'н' => 'n', 'о' => 'o', 71 | 'п' => 'p', 'р' => 'r', 'с' => 's', 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c', 72 | 'ч' => 'ch', 'ш' => 'sh', 'щ' => 'sh', 'ъ' => '', 'ы' => 'y', 'ь' => '', 'э' => 'e', 'ю' => 'yu', 73 | 'я' => 'ya', 74 | 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', 'Е' => 'E', 'Ё' => 'Yo', 'Ж' => 'Zh', 75 | 'З' => 'Z', 'И' => 'I', 'Й' => 'I', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O', 76 | 'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ц' => 'C', 77 | 'Ч' => 'Ch', 'Ш' => 'Sh', 'Щ' => 'Sh', 'Ъ' => '', 'Ы' => 'Y', 'Ь' => '', 'Э' => 'E', 'Ю' => 'Yu', 78 | 'Я' => 'Ya', 79 | '№' => '' 80 | ), 81 | 'uk' => array ( /* Ukrainian */ 82 | 'Є' => 'Ye', 'І' => 'I', 'Ї' => 'Yi', 'Ґ' => 'G', 'є' => 'ye', 'і' => 'i', 'ї' => 'yi', 'ґ' => 'g' 83 | ), 84 | 'kk' => array ( /* Kazakh */ 85 | 'Ә' => 'A', 'Ғ' => 'G', 'Қ' => 'Q', 'Ң' => 'N', 'Ө' => 'O', 'Ұ' => 'U', 'Ү' => 'U', 'Һ' => 'H', 86 | 'ә' => 'a', 'ғ' => 'g', 'қ' => 'q', 'ң' => 'n', 'ө' => 'o', 'ұ' => 'u', 'ү' => 'u', 'һ' => 'h', 87 | ), 88 | 'cs' => array ( /* Czech */ 89 | 'č' => 'c', 'ď' => 'd', 'ě' => 'e', 'ň' => 'n', 'ř' => 'r', 'š' => 's', 'ť' => 't', 'ů' => 'u', 90 | 'ž' => 'z', 'Č' => 'C', 'Ď' => 'D', 'Ě' => 'E', 'Ň' => 'N', 'Ř' => 'R', 'Š' => 'S', 'Ť' => 'T', 91 | 'Ů' => 'U', 'Ž' => 'Z' 92 | ), 93 | 'pl' => array ( /* Polish */ 94 | 'ą' => 'a', 'ć' => 'c', 'ę' => 'e', 'ł' => 'l', 'ń' => 'n', 'ó' => 'o', 'ś' => 's', 'ź' => 'z', 95 | 'ż' => 'z', 'Ą' => 'A', 'Ć' => 'C', 'Ę' => 'e', 'Ł' => 'L', 'Ń' => 'N', 'Ó' => 'O', 'Ś' => 'S', 96 | 'Ź' => 'Z', 'Ż' => 'Z' 97 | ), 98 | 'ro' => array ( /* Romanian */ 99 | 'ă' => 'a', 'â' => 'a', 'î' => 'i', 'ș' => 's', 'ț' => 't', 'Ţ' => 'T', 'ţ' => 't' 100 | ), 101 | 'lv' => array ( /* Latvian */ 102 | 'ā' => 'a', 'č' => 'c', 'ē' => 'e', 'ģ' => 'g', 'ī' => 'i', 'ķ' => 'k', 'ļ' => 'l', 'ņ' => 'n', 103 | 'š' => 's', 'ū' => 'u', 'ž' => 'z', 'Ā' => 'A', 'Č' => 'C', 'Ē' => 'E', 'Ģ' => 'G', 'Ī' => 'i', 104 | 'Ķ' => 'k', 'Ļ' => 'L', 'Ņ' => 'N', 'Š' => 'S', 'Ū' => 'u', 'Ž' => 'Z' 105 | ), 106 | 'lt' => array ( /* Lithuanian */ 107 | 'ą' => 'a', 'č' => 'c', 'ę' => 'e', 'ė' => 'e', 'į' => 'i', 'š' => 's', 'ų' => 'u', 'ū' => 'u', 'ž' => 'z', 108 | 'Ą' => 'A', 'Č' => 'C', 'Ę' => 'E', 'Ė' => 'E', 'Į' => 'I', 'Š' => 'S', 'Ų' => 'U', 'Ū' => 'U', 'Ž' => 'Z' 109 | ), 110 | 'vn' => array ( /* Vietnamese */ 111 | 'Á' => 'A', 'À' => 'A', 'Ả' => 'A', 'Ã' => 'A', 'Ạ' => 'A', 'Ă' => 'A', 'Ắ' => 'A', 'Ằ' => 'A', 'Ẳ' => 'A', 'Ẵ' => 'A', 'Ặ' => 'A', 'Â' => 'A', 'Ấ' => 'A', 'Ầ' => 'A', 'Ẩ' => 'A', 'Ẫ' => 'A', 'Ậ' => 'A', 112 | 'á' => 'a', 'à' => 'a', 'ả' => 'a', 'ã' => 'a', 'ạ' => 'a', 'ă' => 'a', 'ắ' => 'a', 'ằ' => 'a', 'ẳ' => 'a', 'ẵ' => 'a', 'ặ' => 'a', 'â' => 'a', 'ấ' => 'a', 'ầ' => 'a', 'ẩ' => 'a', 'ẫ' => 'a', 'ậ' => 'a', 113 | 'É' => 'E', 'È' => 'E', 'Ẻ' => 'E', 'Ẽ' => 'E', 'Ẹ' => 'E', 'Ê' => 'E', 'Ế' => 'E', 'Ề' => 'E', 'Ể' => 'E', 'Ễ' => 'E', 'Ệ' => 'E', 114 | 'é' => 'e', 'è' => 'e', 'ẻ' => 'e', 'ẽ' => 'e', 'ẹ' => 'e', 'ê' => 'e', 'ế' => 'e', 'ề' => 'e', 'ể' => 'e', 'ễ' => 'e', 'ệ' => 'e', 115 | 'Í' => 'I', 'Ì' => 'I', 'Ỉ' => 'I', 'Ĩ' => 'I', 'Ị' => 'I', 'í' => 'i', 'ì' => 'i', 'ỉ' => 'i', 'ĩ' => 'i', 'ị' => 'i', 116 | 'Ó' => 'O', 'Ò' => 'O', 'Ỏ' => 'O', 'Õ' => 'O', 'Ọ' => 'O', 'Ô' => 'O', 'Ố' => 'O', 'Ồ' => 'O', 'Ổ' => 'O', 'Ỗ' => 'O', 'Ộ' => 'O', 'Ơ' => 'O', 'Ớ' => 'O', 'Ờ' => 'O', 'Ở' => 'O', 'Ỡ' => 'O', 'Ợ' => 'O', 117 | 'ó' => 'o', 'ò' => 'o', 'ỏ' => 'o', 'õ' => 'o', 'ọ' => 'o', 'ô' => 'o', 'ố' => 'o', 'ồ' => 'o', 'ổ' => 'o', 'ỗ' => 'o', 'ộ' => 'o', 'ơ' => 'o', 'ớ' => 'o', 'ờ' => 'o', 'ở' => 'o', 'ỡ' => 'o', 'ợ' => 'o', 118 | 'Ú' => 'U', 'Ù' => 'U', 'Ủ' => 'U', 'Ũ' => 'U', 'Ụ' => 'U', 'Ư' => 'U', 'Ứ' => 'U', 'Ừ' => 'U', 'Ử' => 'U', 'Ữ' => 'U', 'Ự' => 'U', 119 | 'ú' => 'u', 'ù' => 'u', 'ủ' => 'u', 'ũ' => 'u', 'ụ' => 'u', 'ư' => 'u', 'ứ' => 'u', 'ừ' => 'u', 'ử' => 'u', 'ữ' => 'u', 'ự' => 'u', 120 | 'Ý' => 'Y', 'Ỳ' => 'Y', 'Ỷ' => 'Y', 'Ỹ' => 'Y', 'Ỵ' => 'Y', 'ý' => 'y', 'ỳ' => 'y', 'ỷ' => 'y', 'ỹ' => 'y', 'ỵ' => 'y', 121 | 'Đ' => 'D', 'đ' => 'd' 122 | ), 123 | 'ar' => array ( /* Arabic */ 124 | 'أ' => 'a', 'ب' => 'b', 'ت' => 't', 'ث' => 'th', 'ج' => 'g', 'ح' => 'h', 'خ' => 'kh', 'د' => 'd', 125 | 'ذ' => 'th', 'ر' => 'r', 'ز' => 'z', 'س' => 's', 'ش' => 'sh', 'ص' => 's', 'ض' => 'd', 'ط' => 't', 126 | 'ظ' => 'th', 'ع' => 'aa', 'غ' => 'gh', 'ف' => 'f', 'ق' => 'k', 'ك' => 'k', 'ل' => 'l', 'م' => 'm', 127 | 'ن' => 'n', 'ه' => 'h', 'و' => 'o', 'ي' => 'y', 128 | 'ا' => 'a', 'إ' => 'a', 'آ' => 'a', 'ؤ' => 'o', 'ئ' => 'y', 'ء' => 'aa', 129 | '٠' => '0', '١' => '1', '٢' => '2', '٣' => '3', '٤' => '4', '٥' => '5', '٦' => '6', '٧' => '7', '٨' => '8', '٩' => '9', 130 | ), 131 | 'fa' => array ( /* Persian */ 132 | 'گ' => 'g', 'ژ' => 'j', 'پ' => 'p', 'چ' => 'ch', 'ی' => 'y', 'ک' => 'k', 133 | '۰' => '0', '۱' => '1', '۲' => '2', '۳' => '3', '۴' => '4', '۵' => '5', '۶' => '6', '۷' => '7', '۸' => '8', '۹' => '9', 134 | ), 135 | 'sr' => array ( /* Serbian */ 136 | 'ђ' => 'dj', 'ј' => 'j', 'љ' => 'lj', 'њ' => 'nj', 'ћ' => 'c', 'џ' => 'dz', 'đ' => 'dj', 137 | 'Ђ' => 'Dj', 'Ј' => 'j', 'Љ' => 'Lj', 'Њ' => 'Nj', 'Ћ' => 'C', 'Џ' => 'Dz', 'Đ' => 'Dj' 138 | ), 139 | 'az' => array ( /* Azerbaijani */ 140 | 'ç' => 'c', 'ə' => 'e', 'ğ' => 'g', 'ı' => 'i', 'ö' => 'o', 'ş' => 's', 'ü' => 'u', 141 | 'Ç' => 'C', 'Ə' => 'E', 'Ğ' => 'G', 'İ' => 'I', 'Ö' => 'O', 'Ş' => 'S', 'Ü' => 'U' 142 | ) 143 | ); 144 | 145 | /** 146 | * List of words to remove from URLs. 147 | */ 148 | public static $remove_list = array ( 149 | 'a', 'an', 'as', 'at', 'before', 'but', 'by', 'for', 'from', 150 | 'is', 'in', 'into', 'like', 'of', 'off', 'on', 'onto', 'per', 151 | 'since', 'than', 'the', 'this', 'that', 'to', 'up', 'via', 152 | 'with' 153 | ); 154 | 155 | /** 156 | * The character map. 157 | */ 158 | private static $map = array (); 159 | 160 | /** 161 | * The character list as a string. 162 | */ 163 | private static $chars = ''; 164 | 165 | /** 166 | * The character list as a regular expression. 167 | */ 168 | private static $regex = ''; 169 | 170 | /** 171 | * The current language 172 | */ 173 | private static $language = ''; 174 | 175 | /** 176 | * Initializes the character map. 177 | * @param string $language 178 | */ 179 | private static function init ($language = "") 180 | { 181 | if (count (self::$map) > 0 && (($language == "") || ($language == self::$language))) { 182 | return; 183 | } 184 | 185 | /* Is a specific map associated with $language ? */ 186 | if (isset(self::$maps[$language]) && is_array(self::$maps[$language])) { 187 | /* Move this map to end. This means it will have priority over others */ 188 | $m = self::$maps[$language]; 189 | unset(self::$maps[$language]); 190 | self::$maps[$language] = $m; 191 | } 192 | /* Reset static vars */ 193 | self::$language = $language; 194 | self::$map = array(); 195 | self::$chars = ''; 196 | 197 | foreach (self::$maps as $map) { 198 | foreach ($map as $orig => $conv) { 199 | self::$map[$orig] = $conv; 200 | self::$chars .= $orig; 201 | } 202 | } 203 | 204 | self::$regex = '/[' . self::$chars . ']/u'; 205 | } 206 | 207 | /** 208 | * Add new characters to the list. `$map` should be a hash. 209 | * @param array $map 210 | */ 211 | public static function add_chars ($map) 212 | { 213 | if (! is_array ($map)) { 214 | throw new LogicException ('$map must be an associative array.'); 215 | } 216 | self::$maps[] = $map; 217 | self::$map = array (); 218 | self::$chars = ''; 219 | } 220 | 221 | /** 222 | * Append words to the remove list. Accepts either single words 223 | * or an array of words. 224 | * @param mixed $words 225 | */ 226 | public static function remove_words ($words) 227 | { 228 | $words = is_array ($words) ? $words : array ($words); 229 | self::$remove_list = array_merge (self::$remove_list, $words); 230 | } 231 | 232 | /** 233 | * Transliterates characters to their ASCII equivalents. 234 | * $language specifies a priority for a specific language. 235 | * The latter is useful if languages have different rules for the same character. 236 | * @param string $text 237 | * @param string $language 238 | * @return string 239 | */ 240 | public static function downcode ($text, $language = "") 241 | { 242 | self::init ($language); 243 | 244 | if (preg_match_all (self::$regex, $text, $matches)) { 245 | for ($i = 0; $i < count ($matches[0]); $i++) { 246 | $char = $matches[0][$i]; 247 | if (isset (self::$map[$char])) { 248 | $text = str_replace ($char, self::$map[$char], $text); 249 | } 250 | } 251 | } 252 | return $text; 253 | } 254 | 255 | /** 256 | * Filters a string, e.g., "Petty theft" to "petty-theft" 257 | * @param string $text The text to return filtered 258 | * @param int $length The length (after filtering) of the string to be returned 259 | * @param string $language The transliteration language, passed down to downcode() 260 | * @param bool $file_name Whether there should be and additional filter considering this is a filename 261 | * @param bool $use_remove_list Whether you want to remove specific elements previously set in self::$remove_list 262 | * @param bool $lower_case Whether you want the filter to maintain casing or lowercase everything (default) 263 | * @param bool $treat_underscore_as_space Treat underscore as space, so it will replaced with "-" 264 | * @return string 265 | */ 266 | public static function filter ($text, $length = 60, $language = "", $file_name = false, $use_remove_list = true, $lower_case = true, $treat_underscore_as_space = true) 267 | { 268 | $text = self::downcode ($text,$language); 269 | 270 | if ($use_remove_list) { 271 | // remove all these words from the string before urlifying 272 | $text = preg_replace ('/\b(' . join ('|', self::$remove_list) . ')\b/i', '', $text); 273 | } 274 | 275 | // if downcode doesn't hit, the char will be stripped here 276 | $remove_pattern = ($file_name) ? '/[^_\-.\-a-zA-Z0-9\s]/u' : '/[^\s_\-a-zA-Z0-9]/u'; 277 | $text = preg_replace ($remove_pattern, '', $text); // remove unneeded chars 278 | if ($treat_underscore_as_space) { 279 | $text = str_replace ('_', ' ', $text); // treat underscores as spaces 280 | } 281 | $text = preg_replace ('/^\s+|\s+$/u', '', $text); // trim leading/trailing spaces 282 | $text = preg_replace ('/[-\s]+/u', '-', $text); // convert spaces to hyphens 283 | if ($lower_case) { 284 | $text = strtolower ($text); // convert to lowercase 285 | } 286 | 287 | return trim (substr ($text, 0, $length), '-'); // trim to first $length chars 288 | } 289 | 290 | /** 291 | * Alias of `URLify::downcode()`. 292 | */ 293 | public static function transliterate ($text) 294 | { 295 | return self::downcode ($text); 296 | } 297 | } 298 | --------------------------------------------------------------------------------