├── .gitignore ├── README.md ├── setup.py ├── tests.py └── unicode_tr ├── __init__.py └── extras.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | .idea 37 | .idea/* 38 | 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | unicode_tr 2 | ========== 3 | 4 | [![Build Status](https://drone.io/github.com/emre/unicode_tr/status.png)](https://drone.io/github.com/emre/unicode_tr/latest)     5 | 6 | 7 | a python module to make unicode strings work as expected for turkish chars. solves the turkish "İ" problem. 8 | 9 | lower(), upper(), capitalize() and title() methods are patched. 10 | 11 | installation 12 | ========== 13 | 14 | ``` 15 | pip install unicode_tr 16 | ``` 17 | or if you like 90s: 18 | 19 | ``` 20 | easy_install unicode_tr 21 | ``` 22 | 23 | or add unicode_tr directory to the your path. 24 | 25 | usage 26 | ============ 27 | 28 | ```python 29 | # -*- coding: utf-8 -*- 30 | from unicode_tr import unicode_tr 31 | 32 | text_true = unicode_tr(u"istanbul") 33 | text_wrong = unicode(u"istanbul") 34 | 35 | # string.upper 36 | print text_true.upper(), text_wrong.upper() 37 | # output -> İSTANBUL ISTANBUL 38 | 39 | # string.capitalize 40 | print text_true.capitalize(), text_wrong.capitalize() 41 | # output -> İstanbul Istanbul 42 | 43 | # string.lower 44 | text_true = unicode_tr(u"ÇINAR") 45 | text_false = unicode(u"ÇINAR") 46 | 47 | print text_true.lower(), text_false.lower() 48 | # output -> çınar çinar 49 | 50 | # string.title 51 | text_true = unicode_tr(u"izmir istanbul") 52 | text_false = unicode(u"izmir istanbul") 53 | 54 | print text_true.title(), text_false.title() 55 | # output -> İzmir İstanbul Izmir Istanbul 56 | 57 | 58 | ``` 59 | 60 | extras 61 | ============ 62 | *extras.slugify* 63 | 64 | Turkish language supported slugify function. 65 | 66 | > Converts to lowercase, removes non-word characters (alphanumerics and 67 | > underscores) and converts spaces to hyphens. Also strips leading and 68 | > trailing whitespace." 69 | 70 | ``` 71 | In [1]: from unicode_tr.extras import slugify 72 | 73 | In [2]: slugify("türkçe") 74 | Out[2]: u'turkce' 75 | 76 | In [3]: slugify("diyarbakır") 77 | Out[3]: u'diyarbakir' 78 | 79 | ``` 80 | 81 | Note: If you want to deasciify your text: @emres/turkish-deasciifier 82 | 83 | 84 | 85 | 86 | 87 | 88 | [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/emre/unicode_tr/trend.png)](https://bitdeli.com/free "Bitdeli Badge") 89 | 90 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | from distutils.core import setup 4 | 5 | setup( 6 | name='unicode_tr', 7 | version='0.6.1', 8 | packages=['unicode_tr'], 9 | url='http://github.com/emre/unicode_tr', 10 | license='', 11 | author='Emre Yilmaz', 12 | author_email='mail@emreyilmaz.me', 13 | description='a python module to make unicode strings work as expected for turkish chars. solves the turkish "İ" problem.' 14 | ) 15 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from unicode_tr import unicode_tr 4 | from unicode_tr.extras import slugify 5 | 6 | import unittest 7 | 8 | 9 | class TestTurkishWords(unittest.TestCase): 10 | 11 | UPPER_CASES = [ 12 | {"word": u"ığdır", "upper": u"IĞDIR"}, 13 | {"word": u"ırmak", "upper": u"IRMAK"}, 14 | {"word": u"timu", "upper": u"TİMU",} 15 | ] 16 | 17 | LOWER_CASES = [ 18 | {"word": u"İstanbul", "lower": u"istanbul"}, 19 | {"word": u"Irmak", "lower": u"ırmak"}, 20 | {"word": u"ÇESİL", "lower": u"çesil"}, 21 | {"word": u"Ğaaaa", "lower": u"ğaaaa"}, 22 | ] 23 | 24 | CAPITALIZE_CASES = [ 25 | {"word": u"KADIKÖY", "capitalize": u"Kadıköy"}, 26 | {"word": u"çınar", "capitalize": u"Çınar"}, 27 | {"word": u"şansal", "capitalize": u"Şansal"}, 28 | {"word": u"istanbul", "capitalize": u"İstanbul",} 29 | ] 30 | 31 | TITLE_CASES = [ 32 | {"phrase": u"ısparta", "title": u"Isparta"}, 33 | {"phrase": u"ısparta istanbul", "title": u"Isparta İstanbul"}, 34 | {"phrase": u"İstanbul", "title": u"İstanbul"}, 35 | {"phrase": u"çarşı timu", "title": u"Çarşı Timu"}, 36 | {"phrase": u"Ğaaa ÇEŞİL KADIKÖY", "title": u"Ğaaa Çeşil Kadıköy"}, 37 | {"phrase": u"ŞamaTa ısparta istanbul", "title": u"Şamata Isparta İstanbul"}, 38 | ] 39 | 40 | SLUG_CASES = [ 41 | {"phrase": "Türkçe", "slug": "turkce"}, 42 | {"phrase": "Diyarbakır", "slug": "diyarbakir"}, 43 | {"phrase": "Yeni başlayanlar için yalnızlık", "slug": "yeni-baslayanlar-icin-yalnizlik"}, 44 | ] 45 | 46 | def test_upper(self): 47 | for case in self.UPPER_CASES: 48 | word = unicode_tr(case.get("word")) 49 | self.assertEquals(word.upper(), case.get("upper")) 50 | 51 | def test_lower(self): 52 | for case in self.LOWER_CASES: 53 | word = unicode_tr(case.get("word")) 54 | self.assertEquals(word.lower(), case.get("lower")) 55 | 56 | def test_capitalize(self): 57 | for case in self.CAPITALIZE_CASES: 58 | word = unicode_tr(case.get("word")) 59 | self.assertEquals(word.capitalize(), case.get("capitalize")) 60 | 61 | def test_title(self): 62 | for case in self.TITLE_CASES: 63 | phrase = unicode_tr(case.get("phrase")) 64 | self.assertEquals(phrase.title(), case.get("title")) 65 | 66 | def test_slugify(self): 67 | for case in self.SLUG_CASES: 68 | self.assertEquals(slugify(case.get("phrase")), case.get("slug")) 69 | 70 | 71 | if __name__ == '__main__': 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /unicode_tr/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | try: 4 | __instance = unicode 5 | except: 6 | __instance = str 7 | 8 | class unicode_tr(__instance): 9 | CHARMAP = { 10 | "to_upper": { 11 | u"ı": u"I", 12 | u"i": u"İ", 13 | }, 14 | "to_lower": { 15 | u"I": u"ı", 16 | u"İ": u"i", 17 | } 18 | } 19 | 20 | def lower(self): 21 | for key, value in self.CHARMAP.get("to_lower").items(): 22 | self = self.replace(key, value) 23 | 24 | return self.lower() 25 | 26 | def upper(self): 27 | for key, value in self.CHARMAP.get("to_upper").items(): 28 | self = self.replace(key, value) 29 | 30 | return self.upper() 31 | 32 | def capitalize(self): 33 | first, rest = self[0], self[1:] 34 | return unicode_tr(first).upper() + unicode_tr(rest).lower() 35 | 36 | def title(self): 37 | return " ".join(map(lambda x: unicode_tr(x).capitalize(), self.split())) 38 | -------------------------------------------------------------------------------- /unicode_tr/extras.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import unicodedata 4 | import re 5 | 6 | from . import __instance 7 | 8 | def slugify(value): 9 | """ 10 | django.utils.text.slugify 11 | patched for ı and İ chars. 12 | """ 13 | if not isinstance(value, __instance): 14 | value = __instance(value, 'utf8') 15 | 16 | value = value.replace(u'\u0131', 'i') 17 | value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') 18 | value = re.sub('[^\w\s-]', '', value).strip().lower() 19 | 20 | return re.sub('[-\s]+', '-', value) 21 | 22 | --------------------------------------------------------------------------------