├── .gitignore
├── tests
├── bootstrap.php
└── URLifyTest.php
├── .travis.yml
├── phpunit.xml
├── INSTALL
├── scripts
├── filter.php
├── downcode.php
└── transliterate.php
├── composer.json
├── LICENSE
├── README.md
└── URLify.php
/.gitignore:
--------------------------------------------------------------------------------
1 | vendor
2 | composer.phar
3 | composer.lock
4 | .idea
--------------------------------------------------------------------------------
/tests/bootstrap.php:
--------------------------------------------------------------------------------
1 |
2 |
3 | tests
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
1 | To install URLify, you can add it as a dependency ar by downloading the composer.phar executable.
2 |
3 | `$ curl -s http://getcomposer.org/installer | php`
4 |
5 | and run install
6 |
7 | `$ php composer.phar install`
8 |
9 | For more details, see http://getcomposer.org.
10 |
11 | and install URLify library:
12 |
13 | `$ composer require jbroadway/urlify`
14 |
15 |
--------------------------------------------------------------------------------
/scripts/filter.php:
--------------------------------------------------------------------------------
1 | 2) {
8 | die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n");
9 | }
10 |
11 | //Process the provided argument
12 | if($argc === 2) {
13 | $s = $argv[1];
14 | //Or read from stdin if the argument wasn't present
15 | } else {
16 | $piped = true;
17 | $s = file_get_contents("php://stdin");
18 | }
19 |
20 | echo URLify::filter ($s) . ($piped ? "\n" : "");
21 |
--------------------------------------------------------------------------------
/scripts/downcode.php:
--------------------------------------------------------------------------------
1 | 2) {
8 | die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n");
9 | }
10 |
11 | //Process the provided argument
12 | if($argc === 2) {
13 | $s = $argv[1];
14 | //Or read from stdin if the argument wasn't present
15 | } else {
16 | $piped = true;
17 | $s = file_get_contents("php://stdin");
18 | }
19 |
20 | echo URLify::downcode ($s) . ($piped ? "\n" : "");
21 |
--------------------------------------------------------------------------------
/scripts/transliterate.php:
--------------------------------------------------------------------------------
1 | 2) {
8 | die ("Usage (argument): php " . basename(__FILE__) . " \"\"\nUsage (pipe): | php " . basename(__FILE__) . "\n");
9 | }
10 |
11 | //Process the provided argument
12 | if($argc === 2) {
13 | $s = $argv[1];
14 | //Or read from stdin if the argument wasn't present
15 | } else {
16 | $piped = true;
17 | $s = file_get_contents("php://stdin");
18 | }
19 |
20 | echo URLify::transliterate($s) . ($piped ? "\n" : "");
21 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "jbroadway/urlify",
3 | "type": "library",
4 | "description": "PHP port of URLify.js from the Django project. Transliterates non-ascii characters for use in URLs.",
5 | "keywords": ["urlify","transliterate","translit","transliteration","url","encode","slug","link","iconv"],
6 | "homepage": "https://github.com/jbroadway/urlify",
7 | "license": "BSD",
8 | "authors": [
9 | {
10 | "name": "Johnny Broadway",
11 | "email": "johnny@johnnybroadway.com",
12 | "homepage": "http://www.johnnybroadway.com/"
13 | }
14 | ],
15 | "require": {
16 | "php": ">=5.3.0"
17 | },
18 | "autoload": {
19 | "psr-0": { "URLify": "" }
20 | },
21 | "extra": {
22 | "branch-alias": {
23 | "dev-master": "1.0-dev"
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) Django Software Foundation and individual contributors.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without modification,
5 | are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice,
8 | this list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright
11 | notice, this list of conditions and the following disclaimer in the
12 | documentation and/or other materials provided with the distribution.
13 |
14 | 3. Neither the name of Django nor the names of its contributors may be used
15 | to endorse or promote products derived from this software without
16 | specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # URLify for PHP
2 |
3 | A PHP port of [URLify.js](https://github.com/django/django/blob/master/django/contrib/admin/static/admin/js/urlify.js)
4 | from the Django project. Handles symbols from Latin languages as well as Arabic, Azerbaijani, Czech, German, Greek, Kazakh,
5 | Latvian, Lithuanian, Persian, Polish, Romanian, Bulgarian, Russian, Serbian, Turkish, Ukrainian and Vietnamese. Symbols it cannot
6 | transliterate it will simply omit.
7 |
8 | ## Usage:
9 |
10 | To generate slugs for URLs:
11 |
12 | ```php
13 |
22 | ```
23 |
24 | To generate slugs for file names:
25 |
26 | ```php
27 |
33 | ```
34 |
35 |
36 | To simply transliterate characters:
37 |
38 | ```php
39 |
53 | ```
54 |
55 | To extend the character list:
56 |
57 | ```php
58 | '?', '®' => '(r)', '¼' => '1/4',
62 | '½' => '1/2', '¾' => '3/4', '¶' => 'P'
63 | ));
64 |
65 | echo URLify::downcode ('¿ ® ¼ ¼ ¾ ¶');
66 | // "? (r) 1/2 1/2 3/4 P"
67 |
68 | ?>
69 | ```
70 |
71 | To extend the list of words to remove:
72 |
73 | ```php
74 |
79 | ```
80 |
81 | To prioritize a certain language map:
82 |
83 | ```php
84 |
93 | ```
94 | Please note that the "ü" is transliterated to "ue" in the first case, whereas it results in a simple "u" in the latter.
95 |
--------------------------------------------------------------------------------
/tests/URLifyTest.php:
--------------------------------------------------------------------------------
1 | assertEquals (' J\'etudie le francais ', URLify::downcode (' J\'étudie le français '));
5 | $this->assertEquals ('Lo siento, no hablo espanol.', URLify::downcode ('Lo siento, no hablo español.'));
6 | $this->assertEquals ('F3PWS', URLify::downcode ('ΦΞΠΏΣ'));
7 | $this->assertEquals ('foo-bar', URLify::filter ('_foo_bar_'));
8 | }
9 |
10 | function test_filter () {
11 | $this->assertEquals ('jetudie-le-francais', URLify::filter (' J\'étudie le français '));
12 | $this->assertEquals ('lo-siento-no-hablo-espanol', URLify::filter ('Lo siento, no hablo español.'));
13 | $this->assertEquals ('f3pws', URLify::filter ('ΦΞΠΏΣ'));
14 | $this->assertEquals ('', URLify::filter('大般若經'));
15 | $this->assertEquals ('test-.txt', URLify::filter('test-大般若經.txt', 60, "", $file_name = true));
16 | $this->assertEquals ('yakrhy-ltoytr', URLify::filter('ياكرهي لتويتر'));
17 | $this->assertEquals ('saaat-25', URLify::filter('ساعت ۲۵'));
18 | $this->assertEquals ('foto.jpg', URLify::filter ('фото.jpg', 60, "", $file_name = true));
19 | // priorization of language-specific maps
20 | $this->assertEquals ('aouaou', URLify::filter ('ÄÖÜäöü',60,"tr"));
21 | $this->assertEquals ('aeoeueaeoeue', URLify::filter ('ÄÖÜäöü',60,"de"));
22 |
23 | $this->assertEquals ('bobby-mcferrin-dont-worry-be-happy', URLify::filter ("Bobby McFerrin — Don't worry be happy",600,"en"));
24 | // test stripping and conversion of UTF-8 spaces
25 | $this->assertEquals ('test-mahito-mukai', URLify::filter('向井 真人test (Mahito Mukai)'));
26 | // Treat underscore as space
27 | $this->assertEquals ('text_with_underscore', URLify::filter('text_with_underscore', 60, "en", true, true, true, false));
28 | }
29 |
30 | function test_add_chars () {
31 | $this->assertEquals ('¿ ® ¼ ¼ ¾ ¶', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶'));
32 | URLify::add_chars (array (
33 | '¿' => '?', '®' => '(r)', '¼' => '1/4',
34 | '¼' => '1/2', '¾' => '3/4', '¶' => 'P'
35 | ));
36 | $this->assertEquals ('? (r) 1/2 1/2 3/4 P', URLify::downcode ('¿ ® ¼ ¼ ¾ ¶'));
37 | }
38 |
39 | function test_remove_words () {
40 | $this->assertEquals ('foo-bar', URLify::filter ('foo bar'));
41 | URLify::remove_words (array ('foo', 'bar'));
42 | $this->assertEquals ('', URLify::filter ('foo bar'));
43 | }
44 |
45 | function test_many_rounds_with_unknown_language_code () {
46 | for ($i = 0; $i < 1000; $i++) {
47 | URLify::downcode ('Lo siento, no hablo español.',-1);
48 | }
49 | }
50 |
51 | function test_remove_words_disable () {
52 | URLify::remove_words (array ('foo', 'bar'));
53 | $this->assertEquals ('foo-bar', URLify::filter ('foo bar', 60, '', false, false));
54 | }
55 | }
56 |
57 | ?>
58 |
--------------------------------------------------------------------------------
/URLify.php:
--------------------------------------------------------------------------------
1 | array ( /* German */
23 | 'Ä' => 'Ae', 'Ö' => 'Oe', 'Ü' => 'Ue', 'ä' => 'ae', 'ö' => 'oe', 'ü' => 'ue', 'ß' => 'ss',
24 | 'ẞ' => 'SS'
25 | ),
26 | 'latin' => array (
27 | 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A','Ă' => 'A', 'Æ' => 'AE', 'Ç' =>
28 | 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I',
29 | 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' =>
30 | 'O', 'Ő' => 'O', 'Ø' => 'O', 'Œ' => 'OE' ,'Ș' => 'S','Ț' => 'T', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ű' => 'U',
31 | 'Ý' => 'Y', 'Þ' => 'TH', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' =>
32 | 'a', 'å' => 'a', 'ă' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e',
33 | 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd', 'ñ' => 'n', 'ò' => 'o', 'ó' =>
34 | 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ő' => 'o', 'ø' => 'o', 'œ' => 'oe', 'ș' => 's', 'ț' => 't', 'ù' => 'u', 'ú' => 'u',
35 | 'û' => 'u', 'ü' => 'u', 'ű' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y'
36 | ),
37 | 'latin_symbols' => array (
38 | '©' => '(c)'
39 | ),
40 | 'el' => array ( /* Greek */
41 | 'α' => 'a', 'β' => 'b', 'γ' => 'g', 'δ' => 'd', 'ε' => 'e', 'ζ' => 'z', 'η' => 'h', 'θ' => '8',
42 | 'ι' => 'i', 'κ' => 'k', 'λ' => 'l', 'μ' => 'm', 'ν' => 'n', 'ξ' => '3', 'ο' => 'o', 'π' => 'p',
43 | 'ρ' => 'r', 'σ' => 's', 'τ' => 't', 'υ' => 'y', 'φ' => 'f', 'χ' => 'x', 'ψ' => 'ps', 'ω' => 'w',
44 | 'ά' => 'a', 'έ' => 'e', 'ί' => 'i', 'ό' => 'o', 'ύ' => 'y', 'ή' => 'h', 'ώ' => 'w', 'ς' => 's',
45 | 'ϊ' => 'i', 'ΰ' => 'y', 'ϋ' => 'y', 'ΐ' => 'i',
46 | 'Α' => 'A', 'Β' => 'B', 'Γ' => 'G', 'Δ' => 'D', 'Ε' => 'E', 'Ζ' => 'Z', 'Η' => 'H', 'Θ' => '8',
47 | 'Ι' => 'I', 'Κ' => 'K', 'Λ' => 'L', 'Μ' => 'M', 'Ν' => 'N', 'Ξ' => '3', 'Ο' => 'O', 'Π' => 'P',
48 | 'Ρ' => 'R', 'Σ' => 'S', 'Τ' => 'T', 'Υ' => 'Y', 'Φ' => 'F', 'Χ' => 'X', 'Ψ' => 'PS', 'Ω' => 'W',
49 | 'Ά' => 'A', 'Έ' => 'E', 'Ί' => 'I', 'Ό' => 'O', 'Ύ' => 'Y', 'Ή' => 'H', 'Ώ' => 'W', 'Ϊ' => 'I',
50 | 'Ϋ' => 'Y'
51 | ),
52 | 'tr' => array ( /* Turkish */
53 | 'ş' => 's', 'Ş' => 'S', 'ı' => 'i', 'İ' => 'I', 'ç' => 'c', 'Ç' => 'C', 'ü' => 'u', 'Ü' => 'U',
54 | 'ö' => 'o', 'Ö' => 'O', 'ğ' => 'g', 'Ğ' => 'G'
55 | ),
56 | 'bg' => array( /* Bulgarian */
57 | 'Щ' => 'Sht', 'Ш' => 'Sh', 'Ч' => 'Ch', 'Ц' => 'C', 'Ю' => 'Yu', 'Я' => 'Ya',
58 | 'Ж' => 'J', 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D',
59 | 'Е' => 'E', 'З' => 'Z', 'И' => 'I', 'Й' => 'Y', 'К' => 'K', 'Л' => 'L',
60 | 'М' => 'M', 'Н' => 'N', 'О' => 'O', 'П' => 'P', 'Р' => 'R', 'С' => 'S',
61 | 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ь' => '', 'Ъ' => 'A',
62 | 'щ' => 'sht', 'ш' => 'sh', 'ч' => 'ch', 'ц' => 'c', 'ю' => 'yu', 'я' => 'ya',
63 | 'ж' => 'j', 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd',
64 | 'е' => 'e', 'з' => 'z', 'и' => 'i', 'й' => 'y', 'к' => 'k', 'л' => 'l',
65 | 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's',
66 | 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ь' => '', 'ъ' => 'a'
67 | ),
68 | 'ru' => array ( /* Russian */
69 | 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'yo', 'ж' => 'zh',
70 | 'з' => 'z', 'и' => 'i', 'й' => 'i', 'к' => 'k', 'л' => 'l', 'м' => 'm', 'н' => 'n', 'о' => 'o',
71 | 'п' => 'p', 'р' => 'r', 'с' => 's', 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'c',
72 | 'ч' => 'ch', 'ш' => 'sh', 'щ' => 'sh', 'ъ' => '', 'ы' => 'y', 'ь' => '', 'э' => 'e', 'ю' => 'yu',
73 | 'я' => 'ya',
74 | 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', 'Е' => 'E', 'Ё' => 'Yo', 'Ж' => 'Zh',
75 | 'З' => 'Z', 'И' => 'I', 'Й' => 'I', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O',
76 | 'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ц' => 'C',
77 | 'Ч' => 'Ch', 'Ш' => 'Sh', 'Щ' => 'Sh', 'Ъ' => '', 'Ы' => 'Y', 'Ь' => '', 'Э' => 'E', 'Ю' => 'Yu',
78 | 'Я' => 'Ya',
79 | '№' => ''
80 | ),
81 | 'uk' => array ( /* Ukrainian */
82 | 'Є' => 'Ye', 'І' => 'I', 'Ї' => 'Yi', 'Ґ' => 'G', 'є' => 'ye', 'і' => 'i', 'ї' => 'yi', 'ґ' => 'g'
83 | ),
84 | 'kk' => array ( /* Kazakh */
85 | 'Ә' => 'A', 'Ғ' => 'G', 'Қ' => 'Q', 'Ң' => 'N', 'Ө' => 'O', 'Ұ' => 'U', 'Ү' => 'U', 'Һ' => 'H',
86 | 'ә' => 'a', 'ғ' => 'g', 'қ' => 'q', 'ң' => 'n', 'ө' => 'o', 'ұ' => 'u', 'ү' => 'u', 'һ' => 'h',
87 | ),
88 | 'cs' => array ( /* Czech */
89 | 'č' => 'c', 'ď' => 'd', 'ě' => 'e', 'ň' => 'n', 'ř' => 'r', 'š' => 's', 'ť' => 't', 'ů' => 'u',
90 | 'ž' => 'z', 'Č' => 'C', 'Ď' => 'D', 'Ě' => 'E', 'Ň' => 'N', 'Ř' => 'R', 'Š' => 'S', 'Ť' => 'T',
91 | 'Ů' => 'U', 'Ž' => 'Z'
92 | ),
93 | 'pl' => array ( /* Polish */
94 | 'ą' => 'a', 'ć' => 'c', 'ę' => 'e', 'ł' => 'l', 'ń' => 'n', 'ó' => 'o', 'ś' => 's', 'ź' => 'z',
95 | 'ż' => 'z', 'Ą' => 'A', 'Ć' => 'C', 'Ę' => 'e', 'Ł' => 'L', 'Ń' => 'N', 'Ó' => 'O', 'Ś' => 'S',
96 | 'Ź' => 'Z', 'Ż' => 'Z'
97 | ),
98 | 'ro' => array ( /* Romanian */
99 | 'ă' => 'a', 'â' => 'a', 'î' => 'i', 'ș' => 's', 'ț' => 't', 'Ţ' => 'T', 'ţ' => 't'
100 | ),
101 | 'lv' => array ( /* Latvian */
102 | 'ā' => 'a', 'č' => 'c', 'ē' => 'e', 'ģ' => 'g', 'ī' => 'i', 'ķ' => 'k', 'ļ' => 'l', 'ņ' => 'n',
103 | 'š' => 's', 'ū' => 'u', 'ž' => 'z', 'Ā' => 'A', 'Č' => 'C', 'Ē' => 'E', 'Ģ' => 'G', 'Ī' => 'i',
104 | 'Ķ' => 'k', 'Ļ' => 'L', 'Ņ' => 'N', 'Š' => 'S', 'Ū' => 'u', 'Ž' => 'Z'
105 | ),
106 | 'lt' => array ( /* Lithuanian */
107 | 'ą' => 'a', 'č' => 'c', 'ę' => 'e', 'ė' => 'e', 'į' => 'i', 'š' => 's', 'ų' => 'u', 'ū' => 'u', 'ž' => 'z',
108 | 'Ą' => 'A', 'Č' => 'C', 'Ę' => 'E', 'Ė' => 'E', 'Į' => 'I', 'Š' => 'S', 'Ų' => 'U', 'Ū' => 'U', 'Ž' => 'Z'
109 | ),
110 | 'vn' => array ( /* Vietnamese */
111 | 'Á' => 'A', 'À' => 'A', 'Ả' => 'A', 'Ã' => 'A', 'Ạ' => 'A', 'Ă' => 'A', 'Ắ' => 'A', 'Ằ' => 'A', 'Ẳ' => 'A', 'Ẵ' => 'A', 'Ặ' => 'A', 'Â' => 'A', 'Ấ' => 'A', 'Ầ' => 'A', 'Ẩ' => 'A', 'Ẫ' => 'A', 'Ậ' => 'A',
112 | 'á' => 'a', 'à' => 'a', 'ả' => 'a', 'ã' => 'a', 'ạ' => 'a', 'ă' => 'a', 'ắ' => 'a', 'ằ' => 'a', 'ẳ' => 'a', 'ẵ' => 'a', 'ặ' => 'a', 'â' => 'a', 'ấ' => 'a', 'ầ' => 'a', 'ẩ' => 'a', 'ẫ' => 'a', 'ậ' => 'a',
113 | 'É' => 'E', 'È' => 'E', 'Ẻ' => 'E', 'Ẽ' => 'E', 'Ẹ' => 'E', 'Ê' => 'E', 'Ế' => 'E', 'Ề' => 'E', 'Ể' => 'E', 'Ễ' => 'E', 'Ệ' => 'E',
114 | 'é' => 'e', 'è' => 'e', 'ẻ' => 'e', 'ẽ' => 'e', 'ẹ' => 'e', 'ê' => 'e', 'ế' => 'e', 'ề' => 'e', 'ể' => 'e', 'ễ' => 'e', 'ệ' => 'e',
115 | 'Í' => 'I', 'Ì' => 'I', 'Ỉ' => 'I', 'Ĩ' => 'I', 'Ị' => 'I', 'í' => 'i', 'ì' => 'i', 'ỉ' => 'i', 'ĩ' => 'i', 'ị' => 'i',
116 | 'Ó' => 'O', 'Ò' => 'O', 'Ỏ' => 'O', 'Õ' => 'O', 'Ọ' => 'O', 'Ô' => 'O', 'Ố' => 'O', 'Ồ' => 'O', 'Ổ' => 'O', 'Ỗ' => 'O', 'Ộ' => 'O', 'Ơ' => 'O', 'Ớ' => 'O', 'Ờ' => 'O', 'Ở' => 'O', 'Ỡ' => 'O', 'Ợ' => 'O',
117 | 'ó' => 'o', 'ò' => 'o', 'ỏ' => 'o', 'õ' => 'o', 'ọ' => 'o', 'ô' => 'o', 'ố' => 'o', 'ồ' => 'o', 'ổ' => 'o', 'ỗ' => 'o', 'ộ' => 'o', 'ơ' => 'o', 'ớ' => 'o', 'ờ' => 'o', 'ở' => 'o', 'ỡ' => 'o', 'ợ' => 'o',
118 | 'Ú' => 'U', 'Ù' => 'U', 'Ủ' => 'U', 'Ũ' => 'U', 'Ụ' => 'U', 'Ư' => 'U', 'Ứ' => 'U', 'Ừ' => 'U', 'Ử' => 'U', 'Ữ' => 'U', 'Ự' => 'U',
119 | 'ú' => 'u', 'ù' => 'u', 'ủ' => 'u', 'ũ' => 'u', 'ụ' => 'u', 'ư' => 'u', 'ứ' => 'u', 'ừ' => 'u', 'ử' => 'u', 'ữ' => 'u', 'ự' => 'u',
120 | 'Ý' => 'Y', 'Ỳ' => 'Y', 'Ỷ' => 'Y', 'Ỹ' => 'Y', 'Ỵ' => 'Y', 'ý' => 'y', 'ỳ' => 'y', 'ỷ' => 'y', 'ỹ' => 'y', 'ỵ' => 'y',
121 | 'Đ' => 'D', 'đ' => 'd'
122 | ),
123 | 'ar' => array ( /* Arabic */
124 | 'أ' => 'a', 'ب' => 'b', 'ت' => 't', 'ث' => 'th', 'ج' => 'g', 'ح' => 'h', 'خ' => 'kh', 'د' => 'd',
125 | 'ذ' => 'th', 'ر' => 'r', 'ز' => 'z', 'س' => 's', 'ش' => 'sh', 'ص' => 's', 'ض' => 'd', 'ط' => 't',
126 | 'ظ' => 'th', 'ع' => 'aa', 'غ' => 'gh', 'ف' => 'f', 'ق' => 'k', 'ك' => 'k', 'ل' => 'l', 'م' => 'm',
127 | 'ن' => 'n', 'ه' => 'h', 'و' => 'o', 'ي' => 'y',
128 | 'ا' => 'a', 'إ' => 'a', 'آ' => 'a', 'ؤ' => 'o', 'ئ' => 'y', 'ء' => 'aa',
129 | '٠' => '0', '١' => '1', '٢' => '2', '٣' => '3', '٤' => '4', '٥' => '5', '٦' => '6', '٧' => '7', '٨' => '8', '٩' => '9',
130 | ),
131 | 'fa' => array ( /* Persian */
132 | 'گ' => 'g', 'ژ' => 'j', 'پ' => 'p', 'چ' => 'ch', 'ی' => 'y', 'ک' => 'k',
133 | '۰' => '0', '۱' => '1', '۲' => '2', '۳' => '3', '۴' => '4', '۵' => '5', '۶' => '6', '۷' => '7', '۸' => '8', '۹' => '9',
134 | ),
135 | 'sr' => array ( /* Serbian */
136 | 'ђ' => 'dj', 'ј' => 'j', 'љ' => 'lj', 'њ' => 'nj', 'ћ' => 'c', 'џ' => 'dz', 'đ' => 'dj',
137 | 'Ђ' => 'Dj', 'Ј' => 'j', 'Љ' => 'Lj', 'Њ' => 'Nj', 'Ћ' => 'C', 'Џ' => 'Dz', 'Đ' => 'Dj'
138 | ),
139 | 'az' => array ( /* Azerbaijani */
140 | 'ç' => 'c', 'ə' => 'e', 'ğ' => 'g', 'ı' => 'i', 'ö' => 'o', 'ş' => 's', 'ü' => 'u',
141 | 'Ç' => 'C', 'Ə' => 'E', 'Ğ' => 'G', 'İ' => 'I', 'Ö' => 'O', 'Ş' => 'S', 'Ü' => 'U'
142 | )
143 | );
144 |
145 | /**
146 | * List of words to remove from URLs.
147 | */
148 | public static $remove_list = array (
149 | 'a', 'an', 'as', 'at', 'before', 'but', 'by', 'for', 'from',
150 | 'is', 'in', 'into', 'like', 'of', 'off', 'on', 'onto', 'per',
151 | 'since', 'than', 'the', 'this', 'that', 'to', 'up', 'via',
152 | 'with'
153 | );
154 |
155 | /**
156 | * The character map.
157 | */
158 | private static $map = array ();
159 |
160 | /**
161 | * The character list as a string.
162 | */
163 | private static $chars = '';
164 |
165 | /**
166 | * The character list as a regular expression.
167 | */
168 | private static $regex = '';
169 |
170 | /**
171 | * The current language
172 | */
173 | private static $language = '';
174 |
175 | /**
176 | * Initializes the character map.
177 | * @param string $language
178 | */
179 | private static function init ($language = "")
180 | {
181 | if (count (self::$map) > 0 && (($language == "") || ($language == self::$language))) {
182 | return;
183 | }
184 |
185 | /* Is a specific map associated with $language ? */
186 | if (isset(self::$maps[$language]) && is_array(self::$maps[$language])) {
187 | /* Move this map to end. This means it will have priority over others */
188 | $m = self::$maps[$language];
189 | unset(self::$maps[$language]);
190 | self::$maps[$language] = $m;
191 | }
192 | /* Reset static vars */
193 | self::$language = $language;
194 | self::$map = array();
195 | self::$chars = '';
196 |
197 | foreach (self::$maps as $map) {
198 | foreach ($map as $orig => $conv) {
199 | self::$map[$orig] = $conv;
200 | self::$chars .= $orig;
201 | }
202 | }
203 |
204 | self::$regex = '/[' . self::$chars . ']/u';
205 | }
206 |
207 | /**
208 | * Add new characters to the list. `$map` should be a hash.
209 | * @param array $map
210 | */
211 | public static function add_chars ($map)
212 | {
213 | if (! is_array ($map)) {
214 | throw new LogicException ('$map must be an associative array.');
215 | }
216 | self::$maps[] = $map;
217 | self::$map = array ();
218 | self::$chars = '';
219 | }
220 |
221 | /**
222 | * Append words to the remove list. Accepts either single words
223 | * or an array of words.
224 | * @param mixed $words
225 | */
226 | public static function remove_words ($words)
227 | {
228 | $words = is_array ($words) ? $words : array ($words);
229 | self::$remove_list = array_merge (self::$remove_list, $words);
230 | }
231 |
232 | /**
233 | * Transliterates characters to their ASCII equivalents.
234 | * $language specifies a priority for a specific language.
235 | * The latter is useful if languages have different rules for the same character.
236 | * @param string $text
237 | * @param string $language
238 | * @return string
239 | */
240 | public static function downcode ($text, $language = "")
241 | {
242 | self::init ($language);
243 |
244 | if (preg_match_all (self::$regex, $text, $matches)) {
245 | for ($i = 0; $i < count ($matches[0]); $i++) {
246 | $char = $matches[0][$i];
247 | if (isset (self::$map[$char])) {
248 | $text = str_replace ($char, self::$map[$char], $text);
249 | }
250 | }
251 | }
252 | return $text;
253 | }
254 |
255 | /**
256 | * Filters a string, e.g., "Petty theft" to "petty-theft"
257 | * @param string $text The text to return filtered
258 | * @param int $length The length (after filtering) of the string to be returned
259 | * @param string $language The transliteration language, passed down to downcode()
260 | * @param bool $file_name Whether there should be and additional filter considering this is a filename
261 | * @param bool $use_remove_list Whether you want to remove specific elements previously set in self::$remove_list
262 | * @param bool $lower_case Whether you want the filter to maintain casing or lowercase everything (default)
263 | * @param bool $treat_underscore_as_space Treat underscore as space, so it will replaced with "-"
264 | * @return string
265 | */
266 | public static function filter ($text, $length = 60, $language = "", $file_name = false, $use_remove_list = true, $lower_case = true, $treat_underscore_as_space = true)
267 | {
268 | $text = self::downcode ($text,$language);
269 |
270 | if ($use_remove_list) {
271 | // remove all these words from the string before urlifying
272 | $text = preg_replace ('/\b(' . join ('|', self::$remove_list) . ')\b/i', '', $text);
273 | }
274 |
275 | // if downcode doesn't hit, the char will be stripped here
276 | $remove_pattern = ($file_name) ? '/[^_\-.\-a-zA-Z0-9\s]/u' : '/[^\s_\-a-zA-Z0-9]/u';
277 | $text = preg_replace ($remove_pattern, '', $text); // remove unneeded chars
278 | if ($treat_underscore_as_space) {
279 | $text = str_replace ('_', ' ', $text); // treat underscores as spaces
280 | }
281 | $text = preg_replace ('/^\s+|\s+$/u', '', $text); // trim leading/trailing spaces
282 | $text = preg_replace ('/[-\s]+/u', '-', $text); // convert spaces to hyphens
283 | if ($lower_case) {
284 | $text = strtolower ($text); // convert to lowercase
285 | }
286 |
287 | return trim (substr ($text, 0, $length), '-'); // trim to first $length chars
288 | }
289 |
290 | /**
291 | * Alias of `URLify::downcode()`.
292 | */
293 | public static function transliterate ($text)
294 | {
295 | return self::downcode ($text);
296 | }
297 | }
298 |
--------------------------------------------------------------------------------