├── requirements.txt ├── tox.ini ├── .gitignore ├── .travis.yml ├── slugify ├── alt_translates.py ├── __init__.py ├── main.py └── tests.py ├── setup.py └── README.rst /requirements.txt: -------------------------------------------------------------------------------- 1 | regex 2 | Unidecode>=0.04.14,<0.05 3 | nose 4 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py33,py34,py35,pypy 3 | 4 | [testenv] 5 | commands= 6 | python slugify/tests.py 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .tox/ 2 | .idea/ 3 | 4 | .hgignore 5 | 6 | *.pyc 7 | *.pyo 8 | *.orig 9 | *.egg-info 10 | *~ 11 | 12 | build/ 13 | dist/ 14 | 15 | # Common virtualenv names 16 | virtualenv 17 | venv 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - '2.7' 5 | - '3.3' 6 | - '3.4' 7 | - '3.5' 8 | - 'pypy' 9 | 10 | install: 11 | - python setup.py install 12 | 13 | script: 14 | - python slugify/tests.py 15 | 16 | -------------------------------------------------------------------------------- /slugify/alt_translates.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | CYRILLIC = { # instead of: 4 | u'ё': u'e', # io / yo 5 | u'у': u'y', # u 6 | u'х': u'h', # kh 7 | u'щ': u'sch', # shch 8 | u'ю': u'u', # iu / yu 9 | u'я': u'ya', # ia 10 | } 11 | 12 | GERMAN = { # instead of: 13 | u'ä': u'ae', # a 14 | u'ö': u'oe', # o 15 | u'ü': u'ue', # u 16 | } 17 | 18 | GREEK = { # instead of: 19 | u'Ξ': u'X', # Ks 20 | u'χ': u'ch', # kh 21 | 22 | u'ϒ': u'Y', # U 23 | u'υ': u'y', # u 24 | u'ύ': u'y', # ... 25 | u'ϋ': u'y', 26 | u'ΰ': u'y', 27 | } 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | from setuptools import setup, find_packages 4 | 5 | 6 | setup( 7 | name='awesome-slugify', 8 | version='1.6.5', 9 | 10 | author='Dmitry Voronin', 11 | author_email='dimka665@gmail.com', 12 | 13 | url='https://github.com/dimka665/awesome-slugify', 14 | description='Python flexible slugify function', 15 | 16 | packages=find_packages(), 17 | install_requires=[ 18 | 'regex', 19 | 'Unidecode>=0.04.14,<0.05', 20 | ], 21 | 22 | license='GNU GPLv3', 23 | classifiers=[ 24 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 25 | 'Programming Language :: Python', 26 | 'Programming Language :: Python :: 2', 27 | 'Programming Language :: Python :: 2.6', 28 | 'Programming Language :: Python :: 2.7', 29 | 'Programming Language :: Python :: 3', 30 | 'Programming Language :: Python :: 3.3', 31 | 'Programming Language :: Python :: 3.4', 32 | 'Programming Language :: Python :: 3.5', 33 | ], 34 | keywords='slugify slug transliteration russian german unicode translation flexible', 35 | ) 36 | -------------------------------------------------------------------------------- /slugify/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .main import Slugify, UniqueSlugify 3 | from .alt_translates import * 4 | 5 | 6 | slugify = Slugify() 7 | unique_slugify = UniqueSlugify() 8 | slugify_unicode = Slugify(translate=None) 9 | 10 | slugify_url = Slugify() 11 | slugify_url.to_lower = True 12 | slugify_url.stop_words = ('a', 'an', 'the') 13 | slugify_url.max_length = 200 14 | 15 | slugify_filename = Slugify() 16 | slugify_filename.separator = '_' 17 | slugify_filename.safe_chars = '-.' 18 | slugify_filename.max_length = 255 19 | 20 | slugify_ru = Slugify(pretranslate=CYRILLIC) 21 | slugify_de = Slugify(pretranslate=GERMAN) 22 | slugify_el = Slugify(pretranslate=GREEK) 23 | 24 | 25 | # Legacy code 26 | def deprecate_init(Klass): 27 | class NewKlass(Klass): 28 | def __init__(self, *args, **kwargs): 29 | import warnings 30 | warnings.simplefilter('once') 31 | warnings.warn("'slugify.get_slugify' is deprecated; use 'slugify.Slugify' instead.", 32 | DeprecationWarning, stacklevel=2) 33 | super(NewKlass, self).__init__(*args, **kwargs) 34 | return NewKlass 35 | 36 | # get_slugify was deprecated in 2014, march 31 37 | get_slugify = deprecate_init(Slugify) 38 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | awesome-slugify 3 | =============== 4 | .. image:: https://travis-ci.org/dimka665/awesome-slugify.svg?branch=master 5 | :target: https://travis-ci.org/dimka665/awesome-slugify 6 | 7 | **Python flexible slugify function** 8 | 9 | | PyPi: https://pypi.python.org/pypi/awesome-slugify 10 | | Github: https://github.com/dimka665/awesome-slugify 11 | 12 | 13 | Install 14 | ======= 15 | .. code-block:: bash 16 | 17 | pip install awesome-slugify 18 | 19 | Usage 20 | ===== 21 | 22 | .. code-block:: python 23 | 24 | from slugify import slugify 25 | 26 | slugify('Any text') # 'Any-text' 27 | 28 | Custom slugify 29 | ============== 30 | 31 | .. code-block:: python 32 | 33 | from slugify import slugify, Slugify, UniqueSlugify 34 | 35 | slugify('Any text', to_lower=True) # 'any-text' 36 | 37 | custom_slugify = Slugify(to_lower=True) 38 | custom_slugify('Any text') # 'any-text' 39 | 40 | custom_slugify.separator = '_' 41 | custom_slugify('Any text') # 'any_text' 42 | 43 | custom_slugify = UniqueSlugify() 44 | custom_slugify('Any text') # 'any-text' 45 | custom_slugify('Any text') # 'any-text-1' 46 | 47 | slugify function optional args 48 | ------------------------------ 49 | 50 | .. code-block:: python 51 | 52 | to_lower # if True convert text to lowercase 53 | max_length # output string max length 54 | separator # separator string 55 | capitalize # if True upper first letter 56 | 57 | 58 | Slugify class args 59 | ------------------ 60 | 61 | .. code-block:: python 62 | 63 | pretranslate = None # function or dict for replace before translation 64 | translate = unidecode.unidecode # function for slugifying or None 65 | safe_chars = '' # additional safe chars 66 | stop_words = () # remove these words from slug 67 | 68 | to_lower = False # default to_lower value 69 | max_length = None # default max_length value 70 | separator = '-' # default separator value 71 | capitalize = False # default capitalize value 72 | 73 | UniqueSlugify class args 74 | ------------------------ 75 | 76 | .. code-block:: python 77 | 78 | # all Slugify class args + 79 | uids = [] # initial unique ids 80 | 81 | Predefined slugify functions 82 | ============================ 83 | 84 | Some slugify functions is predefined this way: 85 | 86 | .. code-block:: python 87 | 88 | from slugify import Slugify, CYRILLIC, GERMAN, GREEK 89 | 90 | slugify = Slugify() 91 | slugify_unicode = Slugify(translate=None) 92 | 93 | slugify_url = Slugify() 94 | slugify_url.to_lower = True 95 | slugify_url.stop_words = ('a', 'an', 'the') 96 | slugify_url.max_length = 200 97 | 98 | slugify_filename = Slugify() 99 | slugify_filename.separator = '_' 100 | slugify_filename.safe_chars = '-.' 101 | slugify_filename.max_length = 255 102 | 103 | slugify_ru = Slugify(pretranslate=CYRILLIC) 104 | slugify_de = Slugify(pretranslate=GERMAN) 105 | slugify_el = Slugify(pretranslate=GREEK) 106 | 107 | Examples 108 | ======== 109 | 110 | .. code-block:: python 111 | 112 | from slugify import Slugify, UniqueSlugify, slugify, slugify_unicode 113 | from slugify import slugify_url, slugify_filename 114 | from slugify import slugify_ru, slugify_de 115 | 116 | slugify('one kožušček') # one-kozuscek 117 | slugify('one two three', separator='.') # one.two.three 118 | slugify('one two three four', max_length=12) # one-two-four (12 chars) 119 | slugify('one TWO', to_lower=True) # one-two 120 | slugify('one TWO', capitalize=True) # One-TWO 121 | 122 | slugify_filename(u'Дrаft №2.txt') # Draft_2.txt 123 | slugify_url(u'Дrаft №2.txt') # draft-2-txt 124 | 125 | my_slugify = Slugify() 126 | my_slugify.separator = '.' 127 | my_slugify.pretranslate = {'я': 'i', '♥': 'love'} 128 | my_slugify('Я ♥ борщ') # I.love.borshch (custom translate) 129 | 130 | slugify('Я ♥ борщ') # Ia-borshch (standard translation) 131 | slugify_ru('Я ♥ борщ') # Ya-borsch (alternative russian translation) 132 | slugify_unicode('Я ♥ борщ') # Я-борщ (sanitize only) 133 | 134 | slugify_de('ÜBER Über slugify') # UEBER-Ueber-slugify 135 | 136 | slugify_unique = UniqueSlugify(separator='_') 137 | slugify_unique('one TWO') # One_TWO 138 | slugify_unique('one TWO') # One_TWO_1 139 | 140 | slugify_unique = UniqueSlugify(uids=['cellar-door']) 141 | slugify_unique('cellar door') # cellar-door-1 142 | 143 | 144 | Custom Unique Slugify Checker 145 | ============================= 146 | 147 | .. code-block:: python 148 | 149 | from slugify import UniqueSlugify 150 | 151 | def my_unique_check(text, uids): 152 | if text in uids: 153 | return False 154 | return not SomeDBClass.objects.filter(slug_field=text).exists() 155 | 156 | custom_slugify_unique = UniqueSlugify(unique_check=my_unique_check) 157 | 158 | # Checks the database for a matching document 159 | custom_slugify_unique('te occidere possunt') 160 | 161 | 162 | Running UnitTests 163 | ================= 164 | 165 | .. code-block:: bash 166 | 167 | $ virtualenv venv 168 | $ venv/bin/pip install -r requirements.txt 169 | $ venv/bin/nosetests slugify 170 | -------------------------------------------------------------------------------- /slugify/main.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import sys 4 | 5 | from unidecode import unidecode 6 | import regex as re 7 | 8 | 9 | # Don't set regex.DEFAULT_VERSION to regex.VERSION1 cause 10 | # this option will influence on 3rd party libs. E.g. `mailgun` and `flanker`. 11 | # Use regex.VERSION1 regex flag. 12 | 13 | # re.VERSION1 - New enhanced behaviour with nested sets and set operations 14 | 15 | 16 | if sys.version_info[0] == 2: 17 | str_type = unicode # Python 2 18 | else: 19 | str_type = str # Python 3 20 | 21 | 22 | def join_words(words, separator, max_length=None): 23 | """ 24 | words - iterator or list 25 | """ 26 | 27 | if not max_length: 28 | return separator.join(words) 29 | 30 | words = iter(words) # List to Generator 31 | try: 32 | text = next(words) 33 | except StopIteration: 34 | return u'' 35 | 36 | for word in words: 37 | if len(text + separator + word) <= max_length: 38 | text += separator + word 39 | 40 | return text[:max_length] 41 | 42 | # uppercase letters to translate to uppercase letters, NOT camelcase 43 | UPPER_TO_UPPER_LETTERS_RE = \ 44 | u''' 45 | ( 46 | \p{Uppercase_Letter} {2,} # 2 or more adjacent letters - UP always 47 | | 48 | \p{Uppercase_Letter} # target one uppercase letter, then 49 | (?= 50 | [^\p{Lowercase_Letter}…\p{Term}--,،﹐,]+ # not chars breaks possible UP (…abc.?!:;) 51 | \p{Uppercase_Letter} {2} # and 2 uppercase letters 52 | ) 53 | | 54 | (?<= 55 | \p{Uppercase_Letter} {2} # 2 uppercase letters 56 | [^\p{Lowercase_Letter}…\p{Term}--,،﹐,]+ # not chars breaks possible UP (…abc.?!:;), then 57 | ) 58 | \p{Uppercase_Letter} # target one uppercase letter, then 59 | (?! 60 | \p{Lowercase_Letter} # not lowercase letter 61 | | 62 | […\p{Term}--,،﹐,]\p{Uppercase_Letter} # and not dot (.?…!:;) with uppercase letter 63 | ) 64 | ) 65 | ''' 66 | 67 | 68 | class Slugify(object): 69 | 70 | upper_to_upper_letters_re = re.compile(UPPER_TO_UPPER_LETTERS_RE, re.VERBOSE | re.VERSION1) 71 | _safe_chars = '' 72 | _stop_words = () 73 | 74 | def __init__(self, pretranslate=None, translate=unidecode, safe_chars='', stop_words=(), 75 | to_lower=False, max_length=None, separator=u'-', capitalize=False, 76 | fold_abbrs=False): 77 | 78 | self.pretranslate = pretranslate 79 | self.translate = translate 80 | self.safe_chars = safe_chars 81 | self.stop_words = stop_words 82 | 83 | self.to_lower = to_lower 84 | self.max_length = max_length 85 | self.separator = separator 86 | self.capitalize = capitalize 87 | self.fold_abbrs = fold_abbrs 88 | 89 | def pretranslate_dict_to_function(self, convert_dict): 90 | 91 | # add uppercase letters 92 | for letter, translation in list(convert_dict.items()): 93 | letter_upper = letter.upper() 94 | if letter_upper != letter and letter_upper not in convert_dict: 95 | convert_dict[letter_upper] = translation.capitalize() 96 | 97 | self.convert_dict = convert_dict 98 | PRETRANSLATE = re.compile(u'(\L)', options=convert_dict) 99 | 100 | # translate some letters before translating 101 | return lambda text: PRETRANSLATE.sub(lambda m: convert_dict[m.group(1)], text) 102 | 103 | def set_pretranslate(self, pretranslate): 104 | if isinstance(pretranslate, dict): 105 | pretranslate = self.pretranslate_dict_to_function(pretranslate) 106 | 107 | elif pretranslate is None: 108 | pretranslate = lambda text: text 109 | 110 | elif not callable(pretranslate): 111 | error_message = u"Keyword argument 'pretranslate' must be dict, None or callable. Not {0.__class__.__name__}".format(pretranslate) 112 | raise ValueError(error_message) 113 | 114 | self._pretranslate = pretranslate 115 | 116 | pretranslate = property(fset=set_pretranslate) 117 | 118 | def set_translate(self, func): 119 | if func: 120 | self._translate = func 121 | else: 122 | self._translate = lambda text: text 123 | 124 | translate = property(fset=set_translate) 125 | 126 | def set_safe_chars(self, safe_chars): 127 | self._safe_chars = safe_chars 128 | self.apostrophe_is_not_safe = "'" not in safe_chars 129 | self.calc_unwanted_chars_re() 130 | 131 | safe_chars = property(fset=set_safe_chars) 132 | 133 | def set_stop_words(self, stop_words): 134 | self._stop_words = tuple(stop_words) 135 | self.calc_unwanted_chars_re() 136 | 137 | stop_words = property(fset=set_stop_words) 138 | 139 | def calc_unwanted_chars_re(self): 140 | unwanted_chars_re = u'[^\p{{AlNum}}{safe_chars}]+'.format(safe_chars=re.escape(self._safe_chars or '')) 141 | self.unwanted_chars_re = re.compile(unwanted_chars_re, re.IGNORECASE) 142 | 143 | if self._stop_words: 144 | unwanted_chars_and_words_re = unwanted_chars_re + u'|(?)(?!\p{AlNum})' 145 | self.unwanted_chars_and_words_re = re.compile(unwanted_chars_and_words_re, re.IGNORECASE, stop_words=self._stop_words) 146 | else: 147 | self.unwanted_chars_and_words_re = None 148 | 149 | def sanitize(self, text): 150 | if self.apostrophe_is_not_safe: 151 | text = text.replace("'", '').strip() # remove ' 152 | 153 | if self.unwanted_chars_and_words_re: 154 | words = [word for word in self.unwanted_chars_and_words_re.split(text) if word] 155 | if words: 156 | return words 157 | 158 | words = filter(None, self.unwanted_chars_re.split(text)) 159 | return words 160 | 161 | def __call__(self, text, **kwargs): 162 | 163 | max_length = kwargs.get('max_length', self.max_length) 164 | separator = kwargs.get('separator', self.separator) 165 | 166 | if not isinstance(text, str_type): 167 | text = text.decode('utf8', 'ignore') 168 | 169 | if kwargs.get('fold_abbrs', self.fold_abbrs): 170 | text = re.sub(r'(? 3 and text not in uids 297 | 298 | slugify = UniqueSlugify(unique_check=my_unique_check) 299 | 300 | self.assertEqual(slugify('te occidere possunt'), 'te-occidere-possunt') 301 | self.assertEqual(slugify('te occidere possunt'), 'te-occidere-possunt-1') 302 | self.assertEqual(slugify('boo'), 'boo-1') 303 | self.assertEqual(slugify('boo'), 'boo-2') 304 | 305 | 306 | class DeprecationTestCase(unittest.TestCase): 307 | 308 | def test_deprecated_get_slugify(self): 309 | import warnings 310 | 311 | with warnings.catch_warnings(record=True) as warning: 312 | warnings.simplefilter('once') 313 | 314 | slugify = get_slugify() 315 | self.assertEqual(slugify('This % is a test ---'), 'This-is-a-test') 316 | self.assertTrue("'slugify.get_slugify' is deprecated" in str(warning[-1].message)) 317 | 318 | 319 | if __name__ == '__main__': 320 | unittest.main() 321 | --------------------------------------------------------------------------------