├── .gitignore
├── README.md
├── setup.py
├── tests.py
└── unicode_tr
├── __init__.py
└── extras.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 |
21 | # Installer logs
22 | pip-log.txt
23 |
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 |
29 | # Translations
30 | *.mo
31 |
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | .idea
37 | .idea/*
38 |
39 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | unicode_tr
2 | ==========
3 |
4 | [](https://drone.io/github.com/emre/unicode_tr/latest)
5 |
6 |
7 | a python module to make unicode strings work as expected for turkish chars. solves the turkish "İ" problem.
8 |
9 | lower(), upper(), capitalize() and title() methods are patched.
10 |
11 | installation
12 | ==========
13 |
14 | ```
15 | pip install unicode_tr
16 | ```
17 | or if you like 90s:
18 |
19 | ```
20 | easy_install unicode_tr
21 | ```
22 |
23 | or add unicode_tr directory to the your path.
24 |
25 | usage
26 | ============
27 |
28 | ```python
29 | # -*- coding: utf-8 -*-
30 | from unicode_tr import unicode_tr
31 |
32 | text_true = unicode_tr(u"istanbul")
33 | text_wrong = unicode(u"istanbul")
34 |
35 | # string.upper
36 | print text_true.upper(), text_wrong.upper()
37 | # output -> İSTANBUL ISTANBUL
38 |
39 | # string.capitalize
40 | print text_true.capitalize(), text_wrong.capitalize()
41 | # output -> İstanbul Istanbul
42 |
43 | # string.lower
44 | text_true = unicode_tr(u"ÇINAR")
45 | text_false = unicode(u"ÇINAR")
46 |
47 | print text_true.lower(), text_false.lower()
48 | # output -> çınar çinar
49 |
50 | # string.title
51 | text_true = unicode_tr(u"izmir istanbul")
52 | text_false = unicode(u"izmir istanbul")
53 |
54 | print text_true.title(), text_false.title()
55 | # output -> İzmir İstanbul Izmir Istanbul
56 |
57 |
58 | ```
59 |
60 | extras
61 | ============
62 | *extras.slugify*
63 |
64 | Turkish language supported slugify function.
65 |
66 | > Converts to lowercase, removes non-word characters (alphanumerics and
67 | > underscores) and converts spaces to hyphens. Also strips leading and
68 | > trailing whitespace."
69 |
70 | ```
71 | In [1]: from unicode_tr.extras import slugify
72 |
73 | In [2]: slugify("türkçe")
74 | Out[2]: u'turkce'
75 |
76 | In [3]: slugify("diyarbakır")
77 | Out[3]: u'diyarbakir'
78 |
79 | ```
80 |
81 | Note: If you want to deasciify your text: @emres/turkish-deasciifier
82 |
83 |
84 |
85 |
86 |
87 |
88 | [](https://bitdeli.com/free "Bitdeli Badge")
89 |
90 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf8 -*-
2 |
3 | from distutils.core import setup
4 |
5 | setup(
6 | name='unicode_tr',
7 | version='0.6.1',
8 | packages=['unicode_tr'],
9 | url='http://github.com/emre/unicode_tr',
10 | license='',
11 | author='Emre Yilmaz',
12 | author_email='mail@emreyilmaz.me',
13 | description='a python module to make unicode strings work as expected for turkish chars. solves the turkish "İ" problem.'
14 | )
15 |
--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from unicode_tr import unicode_tr
4 | from unicode_tr.extras import slugify
5 |
6 | import unittest
7 |
8 |
9 | class TestTurkishWords(unittest.TestCase):
10 |
11 | UPPER_CASES = [
12 | {"word": u"ığdır", "upper": u"IĞDIR"},
13 | {"word": u"ırmak", "upper": u"IRMAK"},
14 | {"word": u"timu", "upper": u"TİMU",}
15 | ]
16 |
17 | LOWER_CASES = [
18 | {"word": u"İstanbul", "lower": u"istanbul"},
19 | {"word": u"Irmak", "lower": u"ırmak"},
20 | {"word": u"ÇESİL", "lower": u"çesil"},
21 | {"word": u"Ğaaaa", "lower": u"ğaaaa"},
22 | ]
23 |
24 | CAPITALIZE_CASES = [
25 | {"word": u"KADIKÖY", "capitalize": u"Kadıköy"},
26 | {"word": u"çınar", "capitalize": u"Çınar"},
27 | {"word": u"şansal", "capitalize": u"Şansal"},
28 | {"word": u"istanbul", "capitalize": u"İstanbul",}
29 | ]
30 |
31 | TITLE_CASES = [
32 | {"phrase": u"ısparta", "title": u"Isparta"},
33 | {"phrase": u"ısparta istanbul", "title": u"Isparta İstanbul"},
34 | {"phrase": u"İstanbul", "title": u"İstanbul"},
35 | {"phrase": u"çarşı timu", "title": u"Çarşı Timu"},
36 | {"phrase": u"Ğaaa ÇEŞİL KADIKÖY", "title": u"Ğaaa Çeşil Kadıköy"},
37 | {"phrase": u"ŞamaTa ısparta istanbul", "title": u"Şamata Isparta İstanbul"},
38 | ]
39 |
40 | SLUG_CASES = [
41 | {"phrase": "Türkçe", "slug": "turkce"},
42 | {"phrase": "Diyarbakır", "slug": "diyarbakir"},
43 | {"phrase": "Yeni başlayanlar için yalnızlık", "slug": "yeni-baslayanlar-icin-yalnizlik"},
44 | ]
45 |
46 | def test_upper(self):
47 | for case in self.UPPER_CASES:
48 | word = unicode_tr(case.get("word"))
49 | self.assertEquals(word.upper(), case.get("upper"))
50 |
51 | def test_lower(self):
52 | for case in self.LOWER_CASES:
53 | word = unicode_tr(case.get("word"))
54 | self.assertEquals(word.lower(), case.get("lower"))
55 |
56 | def test_capitalize(self):
57 | for case in self.CAPITALIZE_CASES:
58 | word = unicode_tr(case.get("word"))
59 | self.assertEquals(word.capitalize(), case.get("capitalize"))
60 |
61 | def test_title(self):
62 | for case in self.TITLE_CASES:
63 | phrase = unicode_tr(case.get("phrase"))
64 | self.assertEquals(phrase.title(), case.get("title"))
65 |
66 | def test_slugify(self):
67 | for case in self.SLUG_CASES:
68 | self.assertEquals(slugify(case.get("phrase")), case.get("slug"))
69 |
70 |
71 | if __name__ == '__main__':
72 | unittest.main()
73 |
--------------------------------------------------------------------------------
/unicode_tr/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf8 -*-
2 |
3 | try:
4 | __instance = unicode
5 | except:
6 | __instance = str
7 |
8 | class unicode_tr(__instance):
9 | CHARMAP = {
10 | "to_upper": {
11 | u"ı": u"I",
12 | u"i": u"İ",
13 | },
14 | "to_lower": {
15 | u"I": u"ı",
16 | u"İ": u"i",
17 | }
18 | }
19 |
20 | def lower(self):
21 | for key, value in self.CHARMAP.get("to_lower").items():
22 | self = self.replace(key, value)
23 |
24 | return self.lower()
25 |
26 | def upper(self):
27 | for key, value in self.CHARMAP.get("to_upper").items():
28 | self = self.replace(key, value)
29 |
30 | return self.upper()
31 |
32 | def capitalize(self):
33 | first, rest = self[0], self[1:]
34 | return unicode_tr(first).upper() + unicode_tr(rest).lower()
35 |
36 | def title(self):
37 | return " ".join(map(lambda x: unicode_tr(x).capitalize(), self.split()))
38 |
--------------------------------------------------------------------------------
/unicode_tr/extras.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf8 -*-
2 |
3 | import unicodedata
4 | import re
5 |
6 | from . import __instance
7 |
8 | def slugify(value):
9 | """
10 | django.utils.text.slugify
11 | patched for ı and İ chars.
12 | """
13 | if not isinstance(value, __instance):
14 | value = __instance(value, 'utf8')
15 |
16 | value = value.replace(u'\u0131', 'i')
17 | value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
18 | value = re.sub('[^\w\s-]', '', value).strip().lower()
19 |
20 | return re.sub('[-\s]+', '-', value)
21 |
22 |
--------------------------------------------------------------------------------