├── requirements.txt
├── tox.ini
├── .gitignore
├── .travis.yml
├── slugify
    ├── alt_translates.py
    ├── __init__.py
    ├── main.py
    └── tests.py
├── setup.py
└── README.rst


/requirements.txt:
--------------------------------------------------------------------------------
1 | regex
2 | Unidecode>=0.04.14,<0.05
3 | nose
4 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py27,py33,py34,py35,pypy
3 | 
4 | [testenv]
5 | commands=
6 |     python slugify/tests.py
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .tox/
 2 | .idea/
 3 | 
 4 | .hgignore
 5 | 
 6 | *.pyc
 7 | *.pyo
 8 | *.orig
 9 | *.egg-info
10 | *~
11 | 
12 | build/
13 | dist/
14 | 
15 | # Common virtualenv names
16 | virtualenv
17 | venv
18 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - '2.7'
 5 |   - '3.3'
 6 |   - '3.4'
 7 |   - '3.5'
 8 |   - 'pypy'
 9 | 
10 | install:
11 |   - python setup.py install
12 | 
13 | script:
14 |   - python slugify/tests.py
15 | 
16 | 


--------------------------------------------------------------------------------
/slugify/alt_translates.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | CYRILLIC = {       # instead of:
 4 |     u'ё': u'e',    # io / yo
 5 |     u'у': u'y',    # u
 6 |     u'х': u'h',    # kh
 7 |     u'щ': u'sch',  # shch
 8 |     u'ю': u'u',    # iu / yu
 9 |     u'я': u'ya',   # ia
10 | }
11 | 
12 | GERMAN = {         # instead of:
13 |     u'ä': u'ae',   # a
14 |     u'ö': u'oe',   # o
15 |     u'ü': u'ue',   # u
16 | }
17 | 
18 | GREEK = {          # instead of:
19 |     u'Ξ': u'X',    # Ks
20 |     u'χ': u'ch',   # kh
21 | 
22 |     u'ϒ': u'Y',    # U
23 |     u'υ': u'y',    # u
24 |     u'ύ': u'y',    # ...
25 |     u'ϋ': u'y',
26 |     u'ΰ': u'y',
27 | }
28 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # coding=utf8
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | 
 6 | setup(
 7 |     name='awesome-slugify',
 8 |     version='1.6.5',
 9 | 
10 |     author='Dmitry Voronin',
11 |     author_email='dimka665@gmail.com',
12 | 
13 |     url='https://github.com/dimka665/awesome-slugify',
14 |     description='Python flexible slugify function',
15 | 
16 |     packages=find_packages(),
17 |     install_requires=[
18 |         'regex',
19 |         'Unidecode>=0.04.14,<0.05',
20 |     ],
21 | 
22 |     license='GNU GPLv3',
23 |     classifiers=[
24 |         'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
25 |         'Programming Language :: Python',
26 |         'Programming Language :: Python :: 2',
27 |         'Programming Language :: Python :: 2.6',
28 |         'Programming Language :: Python :: 2.7',
29 |         'Programming Language :: Python :: 3',
30 |         'Programming Language :: Python :: 3.3',
31 |         'Programming Language :: Python :: 3.4',
32 |         'Programming Language :: Python :: 3.5',
33 |     ],
34 |     keywords='slugify slug transliteration russian german unicode translation flexible',
35 | )
36 | 


--------------------------------------------------------------------------------
/slugify/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .main import Slugify, UniqueSlugify
 3 | from .alt_translates import *
 4 | 
 5 | 
 6 | slugify = Slugify()
 7 | unique_slugify = UniqueSlugify()
 8 | slugify_unicode = Slugify(translate=None)
 9 | 
10 | slugify_url = Slugify()
11 | slugify_url.to_lower = True
12 | slugify_url.stop_words = ('a', 'an', 'the')
13 | slugify_url.max_length = 200
14 | 
15 | slugify_filename = Slugify()
16 | slugify_filename.separator = '_'
17 | slugify_filename.safe_chars = '-.'
18 | slugify_filename.max_length = 255
19 | 
20 | slugify_ru = Slugify(pretranslate=CYRILLIC)
21 | slugify_de = Slugify(pretranslate=GERMAN)
22 | slugify_el = Slugify(pretranslate=GREEK)
23 | 
24 | 
25 | # Legacy code
26 | def deprecate_init(Klass):
27 |     class NewKlass(Klass):
28 |         def __init__(self, *args, **kwargs):
29 |             import warnings
30 |             warnings.simplefilter('once')
31 |             warnings.warn("'slugify.get_slugify' is deprecated; use 'slugify.Slugify' instead.",
32 |                           DeprecationWarning, stacklevel=2)
33 |             super(NewKlass, self).__init__(*args, **kwargs)
34 |     return NewKlass
35 | 
36 | # get_slugify was deprecated in 2014, march 31
37 | get_slugify = deprecate_init(Slugify)
38 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ===============
  2 | awesome-slugify
  3 | ===============
  4 | .. image:: https://travis-ci.org/dimka665/awesome-slugify.svg?branch=master
  5 |     :target: https://travis-ci.org/dimka665/awesome-slugify
  6 | 
  7 | **Python flexible slugify function**
  8 | 
  9 | | PyPi: https://pypi.python.org/pypi/awesome-slugify
 10 | | Github: https://github.com/dimka665/awesome-slugify
 11 | 
 12 | 
 13 | Install
 14 | =======
 15 | .. code-block:: bash
 16 | 
 17 |     pip install awesome-slugify
 18 | 
 19 | Usage
 20 | =====
 21 | 
 22 | .. code-block:: python
 23 | 
 24 |     from slugify import slugify
 25 | 
 26 |     slugify('Any text')  # 'Any-text'
 27 | 
 28 | Custom slugify
 29 | ==============
 30 | 
 31 | .. code-block:: python
 32 | 
 33 |     from slugify import slugify, Slugify, UniqueSlugify
 34 | 
 35 |     slugify('Any text', to_lower=True)  # 'any-text'
 36 | 
 37 |     custom_slugify = Slugify(to_lower=True)
 38 |     custom_slugify('Any text')          # 'any-text'
 39 | 
 40 |     custom_slugify.separator = '_'
 41 |     custom_slugify('Any text')          # 'any_text'
 42 | 
 43 |     custom_slugify = UniqueSlugify()
 44 |     custom_slugify('Any text')          # 'any-text'
 45 |     custom_slugify('Any text')          # 'any-text-1'
 46 | 
 47 | slugify function optional args
 48 | ------------------------------
 49 | 
 50 | .. code-block:: python
 51 | 
 52 |     to_lower              # if True convert text to lowercase
 53 |     max_length            # output string max length
 54 |     separator             # separator string
 55 |     capitalize            # if True upper first letter
 56 | 
 57 | 
 58 | Slugify class args
 59 | ------------------
 60 | 
 61 | .. code-block:: python
 62 | 
 63 |     pretranslate = None               # function or dict for replace before translation
 64 |     translate = unidecode.unidecode   # function for slugifying or None
 65 |     safe_chars = ''                   # additional safe chars
 66 |     stop_words = ()                   # remove these words from slug
 67 | 
 68 |     to_lower = False                  # default to_lower value
 69 |     max_length = None                 # default max_length value
 70 |     separator = '-'                   # default separator value
 71 |     capitalize = False                # default capitalize value
 72 | 
 73 | UniqueSlugify class args
 74 | ------------------------
 75 | 
 76 | .. code-block:: python
 77 | 
 78 |     # all Slugify class args +
 79 |     uids = []                         # initial unique ids
 80 | 
 81 | Predefined slugify functions
 82 | ============================
 83 | 
 84 | Some slugify functions is predefined this way:
 85 | 
 86 | .. code-block:: python
 87 | 
 88 |     from slugify import Slugify, CYRILLIC, GERMAN, GREEK
 89 | 
 90 |     slugify = Slugify()
 91 |     slugify_unicode = Slugify(translate=None)
 92 | 
 93 |     slugify_url = Slugify()
 94 |     slugify_url.to_lower = True
 95 |     slugify_url.stop_words = ('a', 'an', 'the')
 96 |     slugify_url.max_length = 200
 97 | 
 98 |     slugify_filename = Slugify()
 99 |     slugify_filename.separator = '_'
100 |     slugify_filename.safe_chars = '-.'
101 |     slugify_filename.max_length = 255
102 | 
103 |     slugify_ru = Slugify(pretranslate=CYRILLIC)
104 |     slugify_de = Slugify(pretranslate=GERMAN)
105 |     slugify_el = Slugify(pretranslate=GREEK)
106 | 
107 | Examples
108 | ========
109 | 
110 | .. code-block:: python
111 | 
112 |     from slugify import Slugify, UniqueSlugify, slugify, slugify_unicode
113 |     from slugify import slugify_url, slugify_filename
114 |     from slugify import slugify_ru, slugify_de
115 | 
116 |     slugify('one kožušček')                       # one-kozuscek
117 |     slugify('one two three', separator='.')       # one.two.three
118 |     slugify('one two three four', max_length=12)  # one-two-four   (12 chars)
119 |     slugify('one TWO', to_lower=True)             # one-two
120 |     slugify('one TWO', capitalize=True)           # One-TWO
121 | 
122 |     slugify_filename(u'Дrаft №2.txt')             # Draft_2.txt
123 |     slugify_url(u'Дrаft №2.txt')                  # draft-2-txt
124 | 
125 |     my_slugify = Slugify()
126 |     my_slugify.separator = '.'
127 |     my_slugify.pretranslate = {'я': 'i', '♥': 'love'}
128 |     my_slugify('Я ♥ борщ')                        # I.love.borshch  (custom translate)
129 | 
130 |     slugify('Я ♥ борщ')                           # Ia-borshch  (standard translation)
131 |     slugify_ru('Я ♥ борщ')                        # Ya-borsch   (alternative russian translation)
132 |     slugify_unicode('Я ♥ борщ')                   # Я-борщ      (sanitize only)
133 | 
134 |     slugify_de('ÜBER Über slugify')               # UEBER-Ueber-slugify
135 | 
136 |     slugify_unique = UniqueSlugify(separator='_')
137 |     slugify_unique('one TWO')                     # One_TWO
138 |     slugify_unique('one TWO')                     # One_TWO_1
139 | 
140 |     slugify_unique = UniqueSlugify(uids=['cellar-door'])
141 |     slugify_unique('cellar door')                 # cellar-door-1
142 | 
143 | 
144 | Custom Unique Slugify Checker
145 | =============================
146 | 
147 | .. code-block:: python
148 | 
149 |     from slugify import UniqueSlugify
150 | 
151 |     def my_unique_check(text, uids):
152 |         if text in uids:
153 |             return False
154 |         return not SomeDBClass.objects.filter(slug_field=text).exists()
155 | 
156 |     custom_slugify_unique = UniqueSlugify(unique_check=my_unique_check)
157 | 
158 |     # Checks the database for a matching document
159 |     custom_slugify_unique('te occidere possunt')
160 | 
161 | 
162 | Running UnitTests
163 | =================
164 | 
165 | .. code-block:: bash
166 | 
167 |     $ virtualenv venv
168 |     $ venv/bin/pip install -r requirements.txt
169 |     $ venv/bin/nosetests slugify
170 | 


--------------------------------------------------------------------------------
/slugify/main.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import sys
  4 | 
  5 | from unidecode import unidecode
  6 | import regex as re
  7 | 
  8 | 
  9 | # Don't set regex.DEFAULT_VERSION to regex.VERSION1 cause
 10 | # this option will influence on 3rd party libs. E.g. `mailgun` and `flanker`.
 11 | # Use regex.VERSION1 regex flag.
 12 | 
 13 | # re.VERSION1 - New enhanced behaviour with nested sets and set operations
 14 | 
 15 | 
 16 | if sys.version_info[0] == 2:
 17 |     str_type = unicode  # Python 2
 18 | else:
 19 |     str_type = str  # Python 3
 20 | 
 21 | 
 22 | def join_words(words, separator, max_length=None):
 23 |     """
 24 |     words - iterator or list
 25 |     """
 26 | 
 27 |     if not max_length:
 28 |         return separator.join(words)
 29 | 
 30 |     words = iter(words)   # List to Generator
 31 |     try:
 32 |         text = next(words)
 33 |     except StopIteration:
 34 |         return u''
 35 | 
 36 |     for word in words:
 37 |         if len(text + separator + word) <= max_length:
 38 |             text += separator + word
 39 | 
 40 |     return text[:max_length]
 41 | 
 42 | # uppercase letters to translate to uppercase letters, NOT camelcase
 43 | UPPER_TO_UPPER_LETTERS_RE = \
 44 |     u'''
 45 |     (
 46 |             \p{Uppercase_Letter} {2,}                          # 2 or more adjacent letters - UP always
 47 |         |
 48 |             \p{Uppercase_Letter}                               # target one uppercase letter, then
 49 |                 (?=
 50 |                     [^\p{Lowercase_Letter}…\p{Term}--,،﹐，]+    # not chars breaks possible UP (…abc.?!:;)
 51 |                     \p{Uppercase_Letter} {2}                   # and 2 uppercase letters
 52 |                 )
 53 |         |
 54 |             (?<=
 55 |                 \p{Uppercase_Letter} {2}                       # 2 uppercase letters
 56 |                 [^\p{Lowercase_Letter}…\p{Term}--,،﹐，]+       # not chars breaks possible UP (…abc.?!:;), then
 57 |             )
 58 |             \p{Uppercase_Letter}                               # target one uppercase letter, then
 59 |             (?!
 60 |                     \p{Lowercase_Letter}                       # not lowercase letter
 61 |                 |
 62 |                     […\p{Term}--,،﹐，]\p{Uppercase_Letter}      # and not dot (.?…!:;) with uppercase letter
 63 |             )
 64 |     )
 65 |     '''
 66 | 
 67 | 
 68 | class Slugify(object):
 69 | 
 70 |     upper_to_upper_letters_re = re.compile(UPPER_TO_UPPER_LETTERS_RE, re.VERBOSE | re.VERSION1)
 71 |     _safe_chars = ''
 72 |     _stop_words = ()
 73 | 
 74 |     def __init__(self, pretranslate=None, translate=unidecode, safe_chars='', stop_words=(),
 75 |                  to_lower=False, max_length=None, separator=u'-', capitalize=False,
 76 |                  fold_abbrs=False):
 77 | 
 78 |         self.pretranslate = pretranslate
 79 |         self.translate = translate
 80 |         self.safe_chars = safe_chars
 81 |         self.stop_words = stop_words
 82 | 
 83 |         self.to_lower = to_lower
 84 |         self.max_length = max_length
 85 |         self.separator = separator
 86 |         self.capitalize = capitalize
 87 |         self.fold_abbrs = fold_abbrs
 88 | 
 89 |     def pretranslate_dict_to_function(self, convert_dict):
 90 | 
 91 |         # add uppercase letters
 92 |         for letter, translation in list(convert_dict.items()):
 93 |             letter_upper = letter.upper()
 94 |             if letter_upper != letter and letter_upper not in convert_dict:
 95 |                 convert_dict[letter_upper] = translation.capitalize()
 96 | 
 97 |         self.convert_dict = convert_dict
 98 |         PRETRANSLATE = re.compile(u'(\L<options>)', options=convert_dict)
 99 | 
100 |         # translate some letters before translating
101 |         return lambda text: PRETRANSLATE.sub(lambda m: convert_dict[m.group(1)], text)
102 | 
103 |     def set_pretranslate(self, pretranslate):
104 |         if isinstance(pretranslate, dict):
105 |             pretranslate = self.pretranslate_dict_to_function(pretranslate)
106 | 
107 |         elif pretranslate is None:
108 |             pretranslate = lambda text: text
109 | 
110 |         elif not callable(pretranslate):
111 |             error_message = u"Keyword argument 'pretranslate' must be dict, None or callable. Not {0.__class__.__name__}".format(pretranslate)
112 |             raise ValueError(error_message)
113 | 
114 |         self._pretranslate = pretranslate
115 | 
116 |     pretranslate = property(fset=set_pretranslate)
117 | 
118 |     def set_translate(self, func):
119 |         if func:
120 |             self._translate = func
121 |         else:
122 |             self._translate = lambda text: text
123 | 
124 |     translate = property(fset=set_translate)
125 | 
126 |     def set_safe_chars(self, safe_chars):
127 |         self._safe_chars = safe_chars
128 |         self.apostrophe_is_not_safe = "'" not in safe_chars
129 |         self.calc_unwanted_chars_re()
130 | 
131 |     safe_chars = property(fset=set_safe_chars)
132 | 
133 |     def set_stop_words(self, stop_words):
134 |         self._stop_words = tuple(stop_words)
135 |         self.calc_unwanted_chars_re()
136 | 
137 |     stop_words = property(fset=set_stop_words)
138 | 
139 |     def calc_unwanted_chars_re(self):
140 |         unwanted_chars_re = u'[^\p{{AlNum}}{safe_chars}]+'.format(safe_chars=re.escape(self._safe_chars or ''))
141 |         self.unwanted_chars_re = re.compile(unwanted_chars_re, re.IGNORECASE)
142 | 
143 |         if self._stop_words:
144 |             unwanted_chars_and_words_re = unwanted_chars_re + u'|(?<!\p{AlNum})(?:\L<stop_words>)(?!\p{AlNum})'
145 |             self.unwanted_chars_and_words_re = re.compile(unwanted_chars_and_words_re, re.IGNORECASE, stop_words=self._stop_words)
146 |         else:
147 |             self.unwanted_chars_and_words_re = None
148 | 
149 |     def sanitize(self, text):
150 |         if self.apostrophe_is_not_safe:
151 |             text = text.replace("'", '').strip()  # remove '
152 | 
153 |         if self.unwanted_chars_and_words_re:
154 |             words = [word for word in self.unwanted_chars_and_words_re.split(text) if word]
155 |             if words:
156 |                 return words
157 | 
158 |         words = filter(None, self.unwanted_chars_re.split(text))
159 |         return words
160 | 
161 |     def __call__(self, text, **kwargs):
162 | 
163 |         max_length = kwargs.get('max_length', self.max_length)
164 |         separator = kwargs.get('separator', self.separator)
165 | 
166 |         if not isinstance(text, str_type):
167 |             text = text.decode('utf8', 'ignore')
168 | 
169 |         if kwargs.get('fold_abbrs', self.fold_abbrs):
170 |             text = re.sub(r'(?<![\p{Letter}.])((?:\p{Letter}\.){2,})', lambda x: x.group(0).replace('.', ''), text)
171 | 
172 |         if kwargs.get('to_lower', self.to_lower):
173 |             text = self._pretranslate(text)
174 |             text = self._translate(text)
175 |             text = text.lower()
176 |         else:
177 |             text_parts = self.upper_to_upper_letters_re.split(text)
178 | 
179 |             for position, text_part in enumerate(text_parts):
180 |                 text_part = self._pretranslate(text_part)
181 |                 text_part = self._translate(text_part)
182 |                 if position % 2:
183 |                     text_part = text_part.upper()
184 | 
185 |                 text_parts[position] = text_part
186 | 
187 |             text = u''.join(text_parts)
188 | 
189 |         words = self.sanitize(text)
190 |         text = join_words(words, separator, max_length)
191 | 
192 |         if text and kwargs.get('capitalize', self.capitalize):
193 |             text = text[0].upper() + text[1:]
194 | 
195 |         return text
196 | 
197 | 
198 | class UniqueSlugify(Slugify):
199 |     """
200 |     Manage unique slugified ids
201 |     """
202 | 
203 |     def __init__(self, *args, **kwargs):
204 |         # don't declare uids in args to avoid problem if someone uses positional arguments on initialization
205 |         self.uids = kwargs.pop('uids', set())
206 |         if isinstance(self.uids, list):
207 |             self.uids = set(self.uids)
208 |         self.unique_check = kwargs.pop(
209 |             "unique_check",
210 |             lambda text, uids: self.default_unique_check(text, uids)
211 |         )
212 |         super(UniqueSlugify, self).__init__(*args, **kwargs)
213 | 
214 |     def __call__(self, text, **kwargs):
215 |         # get slugified text
216 |         text = super(UniqueSlugify, self).__call__(text, **kwargs)
217 |         count = 0
218 |         newtext = text
219 |         separator = kwargs.get('separator', self.separator)
220 |         while not self.unique_check(newtext, self.uids):
221 |             count += 1
222 |             newtext = "%s%s%d" % (text, separator, count)
223 |         self.uids.add(newtext)
224 |         return newtext
225 | 
226 |     def default_unique_check(self, text, uids):
227 |         return text not in uids
228 | 
229 | # \p{SB=AT} = '.․﹒．'
230 | # \p{SB=ST} = '!?՜՞։؟۔܀܁܂߹।॥၊။።፧፨᙮᜵᜶‼‽⁇⁈⁉⸮。꓿꘎꘏꤯﹖﹗！？｡'
231 | # \p{Term}  = '!,.:;?;·։׃،؛؟۔܀܁܂܃܄܅܆܇܈܉܊܌߸߹।॥๚๛༈།༎༏༐༑༒၊။፡።፣፤፥፦፧፨᙭᙮᛫᛬᛭។៕៖៚‼‽⁇⁈⁉⸮、。꓾꓿꘍꘎꘏꤯﹐﹑﹒﹔﹕﹖﹗！，．：；？｡､'
232 | # \p{Sterm} = '! .  ?՜՞։؟܀   ܁     ܂߹।॥၊။               ።፧፨  ᙮᜵᜶        ‼‽⁇⁈⁉⸮ 。 ꓿ ꘎꘏꤯﹒     ﹖﹗！．    ？｡'
233 | 
234 | # \p{SB=AT} = .
235 | # \p{SB=ST} =   ! ?
236 | # \p{Term}  = . ! ? , : ;
237 | # \p{Sterm} = . ! ?
238 | 
239 | # \u002c - Latin comma
240 | # \u060c - Arabic comma
241 | # \ufe50 - Small comma
242 | # \uff0c - Fullwidth comma
243 | 
244 | # […\p{Term}--,،﹐，] - ellipsis + Terms - commas
245 | 


--------------------------------------------------------------------------------
/slugify/tests.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | 
  3 | import unittest
  4 | 
  5 | from slugify import Slugify, UniqueSlugify
  6 | from slugify import slugify, slugify_unicode, unique_slugify
  7 | from slugify import slugify_url, slugify_filename
  8 | from slugify import slugify_ru, slugify_de, slugify_el
  9 | 
 10 | from slugify import get_slugify
 11 | 
 12 | 
 13 | class SlugifyTestCase(unittest.TestCase):
 14 | 
 15 |     def test_slugify_english(self):
 16 |         self.assertEqual(slugify('This % is a test ---'), 'This-is-a-test')
 17 |         self.assertEqual(slugify('_this_is_a__test___'), 'this-is-a-test')
 18 |         self.assertEqual(slugify('- - -This -- is a ## test ---'), 'This-is-a-test')
 19 | 
 20 |     def test_slugify_umlaut(self):
 21 |         self.assertEqual(slugify('kožušček'), 'kozuscek',)
 22 |         self.assertEqual(slugify('C\'est déjà l\'été.'), 'Cest-deja-lete')
 23 |         self.assertEqual(slugify('jaja---lol-méméméoo--a'), 'jaja-lol-mememeoo-a')
 24 |         self.assertEqual(slugify('Nín hǎo. Wǒ shì zhōng guó rén'), 'Nin-hao-Wo-shi-zhong-guo-ren')
 25 |         self.assertEqual(slugify('Programmes de publicité - Solutions d\'entreprise'),
 26 |                          'Programmes-de-publicite-Solutions-dentreprise')
 27 | 
 28 |     def test_slugify_chinese(self):
 29 |         self.assertEqual(slugify('北亰'), 'Bei-Jing')
 30 | 
 31 |     def test_slugify_russian(self):
 32 |         self.assertEqual(slugify('Компьютер'), 'Kompiuter')
 33 |         self.assertEqual(slugify('Транслитерирует и русский'), 'Transliteriruet-i-russkii')
 34 |         self.assertEqual(slugify('ёжик из щуки сварил уху'), 'iozhik-iz-shchuki-svaril-ukhu')
 35 |         self.assertEqual(slugify('Ах, Юля-Юля'), 'Akh-Iulia-Iulia')
 36 | 
 37 |     def test_slugify_ru(self):
 38 |         self.assertEqual(slugify_ru('Компьютер'), 'Komputer')
 39 |         self.assertEqual(slugify_ru('Транслитерирует и русский'), 'Transliteriryet-i-rysskii')
 40 |         self.assertEqual(slugify_ru('ёжик из щуки сварил уху'), 'ezhik-iz-schyki-svaril-yhy')
 41 |         self.assertEqual(slugify_ru('Ах, Юля-Юля'), 'Ah-Ulya-Ulya')
 42 | 
 43 |     def test_slugify_de(self):
 44 |         self.assertEqual(slugify_de('Öl und SÜD'), 'Oel-und-SUED')
 45 | 
 46 |     def test_greek(self):
 47 |         self.assertEqual(slugify_el('ϒ Ϋ υ ϋ ΰ'), 'Y-Y-y-y-y')
 48 | 
 49 |     def test_slugify_unicode(self):
 50 |         self.assertEqual(slugify_unicode('-=Слово по-русски=-'), u'Слово-по-русски')
 51 |         self.assertEqual(slugify_unicode('слово_по_русски'), u'слово-по-русски')
 52 | 
 53 | 
 54 | class NumericTestCase(unittest.TestCase):
 55 | 
 56 |     def test_mixed_alphanumeric(self):
 57 |         self.assertEqual(slugify('5 neat tricks'), '5-neat-tricks')
 58 |         self.assertEqual(slugify('these 20 heroes'), 'these-20-heroes')
 59 |         self.assertEqual(slugify('building 42'), 'building-42')
 60 | 
 61 |     def test_numeric(self):
 62 |         self.assertEqual(slugify('404'), '404')
 63 |         self.assertEqual(slugify('1'), '1')
 64 | 
 65 | 
 66 | class PredefinedSlugifyTestCase(unittest.TestCase):
 67 | 
 68 |     def test_slugify_url(self):
 69 |         self.assertEqual(slugify_url('The Über article'), 'uber-article')
 70 | 
 71 |     def test_slugify_filename(self):
 72 |         self.assertEqual(slugify_filename(u'Дrаft №2.txt'), u'Draft_2.txt')
 73 | 
 74 | 
 75 | class ToLowerTestCase(unittest.TestCase):
 76 | 
 77 |     def test_to_lower(self):
 78 |         self.assertEqual(slugify('Test TO lower', to_lower=True), 'test-to-lower')
 79 | 
 80 |     def test_to_lower_arg(self):
 81 |         slugify = Slugify()
 82 |         slugify.to_lower = True
 83 | 
 84 |         self.assertEqual(slugify('Test TO lower'), 'test-to-lower')
 85 |         self.assertEqual(slugify('Test TO lower', to_lower=False), 'Test-TO-lower')
 86 | 
 87 |     def test_to_lower_with_capitalize(self):
 88 |         self.assertEqual(slugify('Test TO lower', to_lower=True, capitalize=True), 'Test-to-lower')
 89 | 
 90 |     def test_to_lower_with_unicode(self):
 91 |         self.assertEqual(slugify('自転車', to_lower=True), 'zi-zhuan-che')
 92 | 
 93 | 
 94 | class UpperTestCase(unittest.TestCase):
 95 |     def test_full_upper(self):
 96 |         self.assertEqual(slugify_ru('ЯНДЕКС'), 'YANDEKS')
 97 | 
 98 |     def test_camel_word(self):
 99 |         self.assertEqual(slugify_ru('Яндекс'), 'Yandeks')
100 |         self.assertEqual(slugify_ru('UP Яндекс'), 'UP-Yandeks')
101 |         self.assertEqual(slugify_ru('Яндекс UP'), 'Yandeks-UP')
102 | 
103 |     def test_part_of_word(self):
104 |         self.assertEqual(slugify_de('ÜBERslugify'), 'UEBERslugify')
105 |         self.assertEqual(slugify_de('ÜBERslugifÜ AUF'), 'UEBERslugifUE-AUF')
106 | 
107 |     def test_at_start_of_sentence(self):
108 |         self.assertEqual(slugify_ru('Я пошёл'), 'Ya-poshel')
109 |         self.assertEqual(slugify_ru('Я Пошёл'), 'Ya-Poshel')
110 |         self.assertEqual(slugify_ru('Я ПОШёл'), 'YA-POSHel')
111 |         self.assertEqual(slugify_ru('Я ПОШЁЛ. Я Пошел'), 'YA-POSHEL-Ya-Poshel')
112 | 
113 |     def test_at_end_of_sentence(self):
114 |         self.assertEqual(slugify_ru('пошЁЛ Я'), 'poshEL-YA')
115 |         self.assertEqual(slugify_ru('пошЁЛ Я.'), 'poshEL-YA')
116 |         self.assertEqual(slugify_ru('пошёл Я. ПОШЁЛ'), 'poshel-Ya-POSHEL')
117 | 
118 |     def test_one_letter_words(self):
119 |         self.assertEqual(slugify_ru('Э Я Г Д Е ?'), 'E-Ya-G-D-E')
120 |         self.assertEqual(slugify_ru('UP Э Я Г Д Е ?'), 'UP-E-YA-G-D-E')
121 | 
122 |     def test_abbreviation(self):
123 |         self.assertEqual(slugify_ru('UP Я.Б.Ч'), 'UP-Ya-B-Ch')
124 | 
125 | 
126 | class PretranslateTestCase(unittest.TestCase):
127 | 
128 |     def test_pretranslate(self):
129 |         EMOJI_TRANSLATION = {
130 |             u'ʘ‿ʘ': u'smiling',
131 |             u'ಠ_ಠ': u'disapproval',
132 |             u'♥‿♥': u'enamored',
133 |             u'♥': u'love',
134 | 
135 |             u'(c)': u'copyright',
136 |             u'©': u'copyright',
137 |         }
138 |         slugify_emoji = Slugify(pretranslate=EMOJI_TRANSLATION)
139 |         self.assertEqual(slugify_emoji(u'ʘ‿ʘ'), u'smiling')
140 |         self.assertEqual(slugify_emoji(u'ಠ_ಠ'), u'disapproval')
141 |         self.assertEqual(slugify_emoji(u'(c)'), u'copyright')
142 |         self.assertEqual(slugify_emoji(u'©'), u'copyright')
143 | 
144 |     def test_pretranslate_lambda(self):
145 |         slugify_reverse = Slugify(pretranslate=lambda value: value[::-1])
146 |         self.assertEqual(slugify_reverse('slug'), 'guls')
147 | 
148 |     def test_wrong_argument_type(self):
149 |         self.assertRaises(ValueError, lambda: Slugify(pretranslate=set([1, 2])))
150 | 
151 | 
152 | class SanitizeTestCase(unittest.TestCase):
153 |     def test_sanitize(self):
154 |         self.assertEqual(slugify('test_sanitize'), 'test-sanitize')
155 | 
156 |     def test_safe_chars(self):
157 |         slugify = Slugify()
158 | 
159 |         slugify.safe_chars = '_'
160 |         self.assertEqual(slugify('test_sanitize'), 'test_sanitize')
161 | 
162 |         slugify.safe_chars = "'"
163 |         self.assertEqual(slugify('Конь-Огонь'), "Kon'-Ogon'")
164 | 
165 | 
166 | class StopWordsTestCase(unittest.TestCase):
167 |     def test_stop_words(self):
168 |         slugify = Slugify(stop_words=['a', 'the'])
169 | 
170 |         self.assertEqual(slugify('A red apple'), 'red-apple')
171 |         self.assertEqual(slugify('The4 red apple'), 'The4-red-apple')
172 | 
173 |         self.assertEqual(slugify('_The_red_the-apple'), 'red-apple')
174 |         self.assertEqual(slugify('The__red_apple'), 'red-apple')
175 | 
176 |         slugify.safe_chars = '*'
177 |         self.assertEqual(slugify('*The*red*apple'), '*-*red*apple')
178 |         self.assertEqual(slugify('The**red*apple'), '**red*apple')
179 | 
180 |         slugify.stop_words = ['x', 'y']
181 |         self.assertEqual(slugify('x y n'), 'n')
182 | 
183 |     def test_only_stop_words_text(self):
184 |         slugify = Slugify(stop_words=['a', 'the'])
185 | 
186 |         self.assertEqual(slugify('The A'), 'The-A')
187 | 
188 | 
189 | class TruncateTestCase(unittest.TestCase):
190 | 
191 |     def test_truncate(self):
192 |         self.assertEqual(slugify('one two three four', max_length=7), 'one-two')
193 |         self.assertEqual(slugify('one two three four', max_length=8), 'one-two')
194 |         self.assertEqual(slugify('one two three four', max_length=12), 'one-two-four')
195 |         self.assertEqual(slugify('one two three four', max_length=13), 'one-two-three')
196 |         self.assertEqual(slugify('one two three four', max_length=14), 'one-two-three')
197 | 
198 |     def test_truncate_on_empty(self):
199 |         self.assertEqual(slugify('', max_length=10), '')
200 | 
201 |     def test_truncate_short(self):
202 |         self.assertEqual(slugify('dlinnoeslovo', max_length=7), 'dlinnoe')
203 |         self.assertEqual(slugify('dlinnoeslovo и ещё слово', max_length=11), 'dlinnoeslov')
204 | 
205 |     def test_truncate_long(self):
206 |         self.assertEqual(slugify('шшш щщщ слово', max_length=11), 'shshsh')
207 |         self.assertEqual(slugify('шшш щщщ слово', max_length=12), 'shshsh-slovo')
208 |         self.assertEqual(slugify('шшш щщщ слово', max_length=18), 'shshsh-slovo')
209 |         self.assertEqual(slugify('шшш щщщ слово', max_length=19), 'shshsh-shchshchshch')
210 |         self.assertEqual(slugify('шшш щщщ слово', max_length=24), 'shshsh-shchshchshch')
211 |         self.assertEqual(slugify('шшш щщщ слово', max_length=25), 'shshsh-shchshchshch-slovo')
212 | 
213 |     def test_truncate_unwanted(self):
214 |         self.assertEqual(slugify('...one...two...three...four...', max_length=12), 'one-two-four')
215 | 
216 |     def test_truncate_long_separator(self):
217 |         self.assertEqual(slugify('one two three four', max_length=14, separator='...'), 'one...two')
218 | 
219 | 
220 | class FoldAbbreviationTestCase(unittest.TestCase):
221 |     def test_not_fold_abbr(self):
222 |         slugify = Slugify(fold_abbrs=False)
223 |         self.assertEqual('Back-in-U-S-S-R', slugify('Back in U.S.S.R.'))
224 | 
225 |     def test_fold_abbr(self):
226 |         slugify = Slugify(fold_abbrs=True)
227 |         self.assertEqual('Back-in-USSR', slugify('Back in U.S.S.R.'))
228 |         self.assertEqual('Back-in-USSR', slugify('Back in U.S.S.R'))
229 | 
230 |     def test_fold_abbr_1(self):
231 |         self.assertEqual('Back-in-USSR-Text', slugify('Back in U.S.S.R. () Text', fold_abbrs=True))
232 | 
233 |     def test_fold_abbr_2(self):
234 |         slugify = Slugify(fold_abbrs=True)
235 |         self.assertEqual('Back-in-USSR-Text', slugify('Back in U.S.S.R. () Text'))
236 | 
237 |     def test_fold_abbr_3(self):
238 |         slugify = Slugify(fold_abbrs=True)
239 |         self.assertEqual('Back-in-USSR-Text', slugify('Back in U.S.S.R. () Text'))
240 | 
241 |     def test_fold_abbr_4(self):
242 |         slugify = Slugify(fold_abbrs=True)
243 |         self.assertEqual('mind-in-a-box', slugify('mind.in.a.box'))
244 |         self.assertEqual('mind-in-a-b-c-box', slugify('mind.in.a.b.c.box'))
245 |         self.assertEqual('a-b-c-box', slugify('.a.b.c.box'))
246 |         self.assertEqual('abcbox', slugify('a.b.c.box'))
247 |         self.assertEqual('abcb-ox', slugify('a.b.c.b ox'))
248 | 
249 | class OtherTestCase(unittest.TestCase):
250 | 
251 |     def test_prevent_double_pretranslation(self):
252 |         slugify = Slugify(pretranslate={'s': 'ss'})
253 |         self.assertEqual(slugify('BOOST'), 'BOOSST')
254 | 
255 |     def test_capitalize(self):
256 |         self.assertEqual(slugify('this Is A test', capitalize=True), 'This-Is-A-test')
257 | 
258 |     def test_capitalize_on_empty(self):
259 |         self.assertEqual(slugify('', capitalize=True), '')
260 | 
261 | 
262 | class UniqueTestCase(unittest.TestCase):
263 | 
264 |     def test_unique_slugify(self):
265 |         self.assertEqual(unique_slugify('This % is a test ---'), 'This-is-a-test')
266 |         self.assertEqual(unique_slugify('- - -This -- is a ## test ---'), 'This-is-a-test-1')
267 |         self.assertEqual(unique_slugify('_this_is_a__test___'), 'this-is-a-test')
268 | 
269 |     def test_unique(self):
270 |         slugify = UniqueSlugify()
271 |         self.assertEqual(slugify('This % is another test ---'), 'This-is-another-test')
272 |         self.assertEqual(slugify('- - -This -- is another ## test ---'), 'This-is-another-test-1')
273 | 
274 |     def test_init_uids(self):
275 |         slugify = UniqueSlugify(uids=['This-is-my-test', 'This-is-another-test'])
276 |         self.assertEqual(slugify('This % is a test ---'), 'This-is-a-test')
277 |         self.assertEqual(slugify('This % is my test ---'), 'This-is-my-test-1')
278 |         self.assertTrue(isinstance(slugify.uids, set))
279 | 
280 |         slugify = UniqueSlugify(uids=set(["let-me-not", "to-the-marriage", "of-true-minds"]))
281 |         self.assertEqual(slugify("of-true-minds"), "of-true-minds-1")
282 |         self.assertEqual(slugify("of-true-minds"), "of-true-minds-2")
283 | 
284 |     def test_init_other(self):
285 |         slugify = UniqueSlugify(separator=u'_')
286 |         self.assertEqual(slugify('This % is another test ---'), 'This_is_another_test')
287 |         self.assertEqual(slugify('- - -This -- is another ## test ---'), 'This_is_another_test_1')
288 | 
289 |     def test_unique_other(self):
290 |         slugify = UniqueSlugify()
291 |         self.assertEqual(slugify('This % is another test ---', separator='_'), 'This_is_another_test')
292 |         self.assertEqual(slugify('- - -This -- is another ## test ---', separator='_'), 'This_is_another_test_1')
293 | 
294 |     def test_is_unique_override(self):
295 |         def my_unique_check(text, uids):
296 |             return len(text) > 3 and text not in uids
297 | 
298 |         slugify = UniqueSlugify(unique_check=my_unique_check)
299 | 
300 |         self.assertEqual(slugify('te occidere possunt'), 'te-occidere-possunt')
301 |         self.assertEqual(slugify('te occidere possunt'), 'te-occidere-possunt-1')
302 |         self.assertEqual(slugify('boo'), 'boo-1')
303 |         self.assertEqual(slugify('boo'), 'boo-2')
304 | 
305 | 
306 | class DeprecationTestCase(unittest.TestCase):
307 | 
308 |     def test_deprecated_get_slugify(self):
309 |         import warnings
310 | 
311 |         with warnings.catch_warnings(record=True) as warning:
312 |             warnings.simplefilter('once')
313 | 
314 |             slugify = get_slugify()
315 |             self.assertEqual(slugify('This % is a test ---'), 'This-is-a-test')
316 |             self.assertTrue("'slugify.get_slugify' is deprecated" in str(warning[-1].message))
317 | 
318 | 
319 | if __name__ == '__main__':
320 |     unittest.main()
321 | 


--------------------------------------------------------------------------------