├── bidi
    ├── __init__.py
    ├── mirror.pyc
    ├── algorithm.pyc
    ├── tests.py
    ├── arabic_reshaper.py
    ├── mirror.py
    └── algorithm.py
├── Default (Linux).sublime-keymap
├── Default (OSX).sublime-keymap
├── Context.sublime-menu
├── Default (Windows).sublime-keymap
├── Default.sublime-commands
├── Main.sublime-menu
├── rtl.py
└── README.md


/bidi/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Default (Linux).sublime-keymap:
--------------------------------------------------------------------------------
1 | [
2 | 	{
3 | 		"keys": ["ctrl+b"],
4 | 		"command": "bidi"
5 | 	}
6 | ]


--------------------------------------------------------------------------------
/Default (OSX).sublime-keymap:
--------------------------------------------------------------------------------
1 | [
2 | 	{
3 | 		"keys": ["ctrl+b"],
4 | 		"command": "bidi"
5 | 	}
6 | ]


--------------------------------------------------------------------------------
/bidi/mirror.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HosseinRashno/Sublime-Text-2-BIDI/HEAD/bidi/mirror.pyc


--------------------------------------------------------------------------------
/bidi/algorithm.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HosseinRashno/Sublime-Text-2-BIDI/HEAD/bidi/algorithm.pyc


--------------------------------------------------------------------------------
/Context.sublime-menu:
--------------------------------------------------------------------------------
1 | [
2 |     { "command": "bidi", "caption":"Bidirectional text" },
3 |     { "command": "bidiselection", "caption":"Bidirectional Selection" }
4 | ]
5 | 


--------------------------------------------------------------------------------
/Default (Windows).sublime-keymap:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{
 3 | 		"keys": ["ctrl+b"],
 4 | 		"command": "bidi"
 5 | 	},
 6 | 	{
 7 | 		"keys": ["ctrl+u"],
 8 | 		"command": "bidiselection"
 9 | 	}
10 | ]


--------------------------------------------------------------------------------
/Default.sublime-commands:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{
 3 | 		"caption": "Bidirectional text",
 4 | 		"command": "run_bidi"
 5 | 	},
 6 | 	{
 7 | 		"caption": "Bidirectional selection",
 8 | 		"command": "run_bidiselection"
 9 | 	}
10 | ]


--------------------------------------------------------------------------------
/Main.sublime-menu:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "caption": "Tools",
 4 |         "mnemonic": "t",
 5 |         "id": "tools",
 6 |         "children":
 7 |         [
 8 |             {"caption":"-"},
 9 |             {
10 |                 "caption": "Bidirectional text",
11 |                 "mnemonic": "B",
12 |                 "command": "bidi"
13 |             },
14 |             {
15 |                 "caption": "Bidirectional selection",
16 |                 "mnemonic": "S",
17 |                 "command": "bidiselection"
18 |             }
19 |         ]
20 |     }
21 | ]


--------------------------------------------------------------------------------
/rtl.py:
--------------------------------------------------------------------------------
 1 | import sublime, sublime_plugin, sys
 2 | 
 3 | sys.path.append( 'bidi' )
 4 | try:
 5 | 
 6 |     # Python 3
 7 | 
 8 |     from .bidi.arabic_reshaper import reshape
 9 |     from .bidi.algorithm import get_display
10 | except ValueError:
11 | 
12 |     # Python 2
13 | 
14 |     from bidi.arabic_reshaper import reshape
15 |     from bidi.algorithm import get_display
16 | 
17 | class bidiCommand(sublime_plugin.TextCommand):
18 | 	def run(self, edit):
19 | 		region = sublime.Region(0, self.view.size())
20 | 		bidiRegion(region, self.view, edit)
21 | 
22 | class bidiselectionCommand(sublime_plugin.TextCommand):
23 | 	def run(self, edit):
24 | 		selectionSet = self.view.sel()
25 | 		for selectionRegion in selectionSet:
26 | 			bidiRegion(selectionRegion, self.view, edit)
27 | 
28 | def bidiRegion(region, view, edit):
29 | 	txt = view.substr(region)
30 | 	reshaped_text = reshape(txt)
31 | 	bdiText = get_display(reshaped_text)
32 | 	view.replace(edit, region, bdiText)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Bidirectional text support for Sublime Text 3
 2 | ===================
 3 | 
 4 | Currently Sublime Text 3 is not supporting bidirectional languages like Arabic, Hebrew etc.. Using this plugin you can view bidirectional texts.
 5 | 
 6 | Please note, I don't know Arabic or Hebrew. I have checked the results by pattern matching. Its a starting point. 
 7 | 
 8 | <img alt="" src="https://lh6.googleusercontent.com/-GAIfpn6Oeyg/UHHVmqjSG7I/AAAAAAAAEng/wWPjpxu5e5I/s799/sublime-text2-arabic.jpg" title="Sublime text 2 BiDi - RTL Support plugin" class="alignnone" width="799" height="268">
 9 | 
10 | Install
11 | -----------------
12 | Clone it into Sublime Package directory.
13 | 
14 | <img alt="" src="https://lh6.googleusercontent.com/-zm8xnRDkluI/UG9mLrQYgPI/AAAAAAAAEm0/8qOUUMngOlw/s800/rtl-sublime-text.jpg" title="Sublime Text 2 - BiDi Plugin" class="alignnone" width="800" height="569">
15 | 
16 | Set Font face to any Arabic supporting font (Arial) in user settings. <br>
17 | <img alt="" src="https://lh3.googleusercontent.com/-l_CN_p6kJKM/UHHVlhvTU5I/AAAAAAAAEnY/8fLi3mbYoUU/s412/sublime-text2-user-pref-menu.jpg" title="Sublime text 2 user settings" class="alignnone" width="412" height="321"> <br>
18 | 
19 | <img alt="" src="https://lh6.googleusercontent.com/-VM_A9JCJhT0/UHHVlFSsMNI/AAAAAAAAEnU/CXvpyMjdv2U/s516/sublime-text2-user-pref.jpg" title="Sublime text 2 user settings" class="alignnone" width="516" height="153">
20 | 
21 | Usage
22 | ----------------------
23 | Open file.
24 | Enter text
25 | Tools > Bidirectional text (ctrl+b)
26 | 
27 | 
28 | <img alt="" src="https://lh6.googleusercontent.com/-o8kkAWZDmcw/UG9lAk9omKI/AAAAAAAAEmk/u__PYos0-IY/s800/bidi-sublime-text2.jpg" title="Sublime text 2 Bidirectional text" class="alignnone" width="800" height="569">
29 | 
30 | 
31 | Command Accebility 
32 | -------------------
33 | Tools > Bidirectional text
34 | Ctrl + B
35 | Right click > Bidirectional text
36 | 
37 | Bug tracker
38 | ----------
39 | Post an issue here on Github. 
40 | https://github.com/praveenvijayan/Sublime-Text-2-BIDI/issues
41 | 
42 | Resources
43 | ----------
44 | http://www.decodize.com/html/sublime-text-2-bidirectional-language-support-plugin/
45 | 
46 | Twitter 
47 | ------------------
48 | Follow for updates :  <a href="http://twitter.com/praveen_vijaya">@praveen_vijaya</a>
49 | 
50 | Thanks
51 | ----
52 | https://github.com/MeirKriheli/python-bidi <br>
53 | https://github.com/mpcabd/python-arabic-reshaper
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/bidi/tests.py:
--------------------------------------------------------------------------------
  1 | # This file is part of python-bidi
  2 | #
  3 | # python-bidi is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU Lesser General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU Lesser General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | 
 16 | # Copyright (C) 2008-2010 Yaacov Zamir <kzamir_a_walla.co.il>,
 17 | # Meir kriheli <meir@mksoft.co.il>
 18 | """BiDi algorithm unit tests"""
 19 | 
 20 | import unittest
 21 | from bidi.algorithm import get_display, get_empty_storage, get_embedding_levels
 22 | 
 23 | class TestBidiAlgorithm(unittest.TestCase):
 24 |     "Tests the bidi algorithm (based on GNU fribidi ones)"
 25 | 
 26 |     def test_surrogate(self):
 27 |         """Test for storage and base levels in case of surrogate pairs"""
 28 | 
 29 |         storage = get_empty_storage()
 30 | 
 31 |         text = u'HELLO \U0001d7f612'
 32 |         get_embedding_levels(text, storage, upper_is_rtl=True)
 33 | 
 34 |         # should return 9, not 10 even in --with-unicode=ucs2
 35 |         self.assertEqual(len(storage['chars']), 9)
 36 | 
 37 |         # Is the expected result ? should be EN
 38 |         _ch = storage['chars'][6]
 39 |         self.assertEqual(_ch['ch'], u'\U0001d7f6')
 40 |         self.assertEqual(_ch['type'], 'EN')
 41 | 
 42 |         display = get_display(text, upper_is_rtl=True)
 43 |         self.assertEqual(display, u'\U0001d7f612 OLLEH')
 44 | 
 45 |     def test_implict_with_upper_is_rtl(self):
 46 |         '''Implicit tests'''
 47 | 
 48 |         tests = (
 49 |             (u'car is THE CAR in arabic', u'car is RAC EHT in arabic'),
 50 |             (u'CAR IS the car IN ENGLISH', u'HSILGNE NI the car SI RAC'),
 51 |             (u'he said "IT IS 123, 456, OK"', u'he said "KO ,456 ,123 SI TI"'),
 52 |             (u'he said "IT IS (123, 456), OK"', u'he said "KO ,(456 ,123) SI TI"'),
 53 |             (u'he said "IT IS 123,456, OK"', u'he said "KO ,123,456 SI TI"'),
 54 |             (u'he said "IT IS (123,456), OK"', u'he said "KO ,(123,456) SI TI"'),
 55 |             (u'HE SAID "it is 123, 456, ok"', u'"it is 123, 456, ok" DIAS EH'),
 56 |             (u'<H123>shalom</H123>', u'<123H/>shalom<123H>'),
 57 |             (u'<h123>SAALAM</h123>', u'<h123>MALAAS</h123>'),
 58 |             (u'HE SAID "it is a car!" AND RAN', u'NAR DNA "!it is a car" DIAS EH'),
 59 |             (u'HE SAID "it is a car!x" AND RAN', u'NAR DNA "it is a car!x" DIAS EH'),
 60 |             (u'SOLVE 1*5 1-5 1/5 1+5', u'1+5 1/5 1-5 5*1 EVLOS'),
 61 |             (u'THE RANGE IS 2.5..5', u'5..2.5 SI EGNAR EHT'),
 62 |             (u'-2 CELSIUS IS COLD', u'DLOC SI SUISLEC 2-'),
 63 |         )
 64 | 
 65 |         for storage, display in tests:
 66 |             self.assertEqual(get_display(storage, upper_is_rtl=True), display)
 67 | 
 68 |     def test_override_base_dir(self):
 69 |         """Tests overriding the base paragraph direction"""
 70 | 
 71 |         # normaly the display should be :MOLAHS be since we're overriding the
 72 |         # base dir the colon should be at the end of the display
 73 |         storage = u'SHALOM:'
 74 |         display = u'MOLAHS:'
 75 | 
 76 |         self.assertEqual(get_display(storage, upper_is_rtl=True, base_dir='L'), display)
 77 | 
 78 | 
 79 | 
 80 |     def test_output_encoding(self):
 81 |         """Make sure the display is in the same encdoing as the incoming text"""
 82 | 
 83 |         storage = '\xf9\xec\xe5\xed'        # Hebrew word shalom in cp1255
 84 |         display = '\xed\xe5\xec\xf9'
 85 | 
 86 |         self.assertEqual(get_display(storage, encoding='cp1255'), display)
 87 | 
 88 | 
 89 |     def test_explicit_with_upper_is_rtl(self):
 90 |         """Explicit tests"""
 91 |         tests = (
 92 |             (u'this is _LJUST_o', u'this is JUST'),
 93 |             (u'a _lsimple _RteST_o th_oat', u'a simple TSet that'),
 94 |             (u'HAS A _LPDF missing', u'PDF missing A SAH'),
 95 |             (u'AnD hOw_L AbOuT, 123,987 tHiS_o', u'w AbOuT, 123,987 tHiSOh DnA'),
 96 |             (u'a GOOD - _L_oTEST.', u'a TSET - DOOG.'),
 97 |             (u'here_L is_o_o_o _R a good one_o', u'here is eno doog a'),
 98 |             (u'THE _rbest _lONE and', u'best ENO and EHT'),
 99 |             (u'A REAL BIG_l_o BUG!', u'!GUB GIB LAER A'),
100 |             (u'a _L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_L_Rbug', u'a gub'),
101 |             (u'AN ARABIC _l_o 123-456 NICE ONE!', u'!ENO ECIN 456-123  CIBARA NA'),
102 |             (u'AN ARABIC _l _o 123-456 PAIR', u'RIAP   123-456 CIBARA NA'),
103 |             (u'this bug 67_r_o89 catched!', u'this bug 6789 catched!'),
104 |         )
105 | 
106 |         # adopt fribidi's CapRtl encoding
107 |         mappings = {
108 |             u'_>': u"\u200E",
109 |             u'_<': u"\u200F",
110 |             u'_l': u"\u202A",
111 |             u'_r': u"\u202B",
112 |             u'_o': u"\u202C",
113 |             u'_L': u"\u202D",
114 |             u'_R': u"\u202E",
115 |             u'__': '_',
116 |         }
117 | 
118 |         for storage, display in tests:
119 |             for key, val in mappings.items():
120 |                 storage = storage.replace(key, val)
121 |             self.assertEqual(get_display(storage, upper_is_rtl=True), display)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     unittest.main()
126 | 


--------------------------------------------------------------------------------
/bidi/arabic_reshaper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # This work is licensed under the GNU Public License (GPL).
  4 | # To view a copy of this license, visit http://www.gnu.org/copyleft/gpl.html
  5 | 
  6 | # Written by Abd Allah Diab (mpcabd)
  7 | # Email: mpcabd ^at^ gmail ^dot^ com
  8 | # Website: http://mpcabd.igeex.biz
  9 | 
 10 | # Ported and tweaked from Java to Python, from Better Arabic Reshaper [https://github.com/agawish/Better-Arabic-Reshaper/]
 11 | 
 12 | import re
 13 | 
 14 | DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_MDD 		= u'\u0622'
 15 | DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_HAMAZA		= u'\u0623'
 16 | DEFINED_CHARACTERS_ORGINAL_ALF_LOWER_HAMAZA 		= u'\u0625'
 17 | DEFINED_CHARACTERS_ORGINAL_ALF 				= u'\u0627'
 18 | DEFINED_CHARACTERS_ORGINAL_LAM				= u'\u0644'
 19 | 
 20 | LAM_ALEF_GLYPHS = [
 21 | 	[u'\u3BA6', u'\uFEF6', u'\uFEF5'],
 22 | 	[u'\u3BA7', u'\uFEF8', u'\uFEF7'],
 23 | 	[u'\u0627', u'\uFEFC', u'\uFEFB'],
 24 | 	[u'\u0625', u'\uFEFA', u'\uFEF9']
 25 | ]
 26 | 
 27 | HARAKAT = [
 28 | 	u'\u0600', u'\u0601', u'\u0602', u'\u0603', u'\u0606', u'\u0607', u'\u0608', u'\u0609',
 29 | 	u'\u060A', u'\u060B', u'\u060D', u'\u060E', u'\u0610', u'\u0611', u'\u0612', u'\u0613',
 30 | 	u'\u0614', u'\u0615', u'\u0616', u'\u0617', u'\u0618', u'\u0619', u'\u061A', u'\u061B',
 31 | 	u'\u061E', u'\u061F', u'\u0621', u'\u063B', u'\u063C', u'\u063D', u'\u063E', u'\u063F',
 32 | 	u'\u0640', u'\u064B', u'\u064C', u'\u064D', u'\u064E', u'\u064F', u'\u0650', u'\u0651',
 33 | 	u'\u0652', u'\u0653', u'\u0654', u'\u0655', u'\u0656', u'\u0657', u'\u0658', u'\u0659',
 34 | 	u'\u065A', u'\u065B', u'\u065C', u'\u065D', u'\u065E', u'\u0660', u'\u066A', u'\u066B',
 35 | 	u'\u066C', u'\u066F', u'\u0670', u'\u0672', u'\u06D4', u'\u06D5', u'\u06D6', u'\u06D7',
 36 | 	u'\u06D8', u'\u06D9', u'\u06DA', u'\u06DB', u'\u06DC', u'\u06DF', u'\u06E0', u'\u06E1',
 37 | 	u'\u06E2', u'\u06E3', u'\u06E4', u'\u06E5', u'\u06E6', u'\u06E7', u'\u06E8', u'\u06E9',
 38 | 	u'\u06EA', u'\u06EB', u'\u06EC', u'\u06ED', u'\u06EE', u'\u06EF', u'\u06D6', u'\u06D7',
 39 | 	u'\u06D8', u'\u06D9', u'\u06DA', u'\u06DB', u'\u06DC', u'\u06DD', u'\u06DE', u'\u06DF',
 40 | 	u'\u06F0', u'\u06FD', u'\uFE70', u'\uFE71', u'\uFE72', u'\uFE73', u'\uFE74', u'\uFE75',
 41 | 	u'\uFE76', u'\uFE77', u'\uFE78', u'\uFE79', u'\uFE7A', u'\uFE7B', u'\uFE7C', u'\uFE7D',
 42 | 	u'\uFE7E', u'\uFE7F', u'\uFC5E', u'\uFC5F', u'\uFC60', u'\uFC61', u'\uFC62', u'\uFC63'
 43 | ]
 44 | 
 45 | ARABIC_GLYPHS = {
 46 | 	u'\u0622' : [u'\u0622', u'\uFE81', u'\uFE81', u'\uFE82', u'\uFE82', 2],
 47 | 	u'\u0623' : [u'\u0623', u'\uFE83', u'\uFE83', u'\uFE84', u'\uFE84', 2],
 48 | 	u'\u0624' : [u'\u0624', u'\uFE85', u'\uFE85', u'\uFE86', u'\uFE86', 2],
 49 | 	u'\u0625' : [u'\u0625', u'\uFE87', u'\uFE87', u'\uFE88', u'\uFE88', 2],
 50 | 	u'\u0626' : [u'\u0626', u'\uFE89', u'\uFE8B', u'\uFE8C', u'\uFE8A', 4],
 51 | 	u'\u0627' : [u'\u0627', u'\u0627', u'\u0627', u'\uFE8E', u'\uFE8E', 2],
 52 | 	u'\u0628' : [u'\u0628', u'\uFE8F', u'\uFE91', u'\uFE92', u'\uFE90', 4],
 53 | 	u'\u0629' : [u'\u0629', u'\uFE93', u'\uFE93', u'\uFE94', u'\uFE94', 2],
 54 | 	u'\u062A' : [u'\u062A', u'\uFE95', u'\uFE97', u'\uFE98', u'\uFE96', 4],
 55 | 	u'\u062B' : [u'\u062B', u'\uFE99', u'\uFE9B', u'\uFE9C', u'\uFE9A', 4],
 56 | 	u'\u062C' : [u'\u062C', u'\uFE9D', u'\uFE9F', u'\uFEA0', u'\uFE9E', 4],
 57 | 	u'\u062D' : [u'\u062D', u'\uFEA1', u'\uFEA3', u'\uFEA4', u'\uFEA2', 4],
 58 | 	u'\u062E' : [u'\u062E', u'\uFEA5', u'\uFEA7', u'\uFEA8', u'\uFEA6', 4],
 59 | 	u'\u062F' : [u'\u062F', u'\uFEA9', u'\uFEA9', u'\uFEAA', u'\uFEAA', 2],
 60 | 	u'\u0630' : [u'\u0630', u'\uFEAB', u'\uFEAB', u'\uFEAC', u'\uFEAC', 2],
 61 | 	u'\u0631' : [u'\u0631', u'\uFEAD', u'\uFEAD', u'\uFEAE', u'\uFEAE', 2],
 62 | 	u'\u0632' : [u'\u0632', u'\uFEAF', u'\uFEAF', u'\uFEB0', u'\uFEB0', 2],
 63 | 	u'\u0633' : [u'\u0633', u'\uFEB1', u'\uFEB3', u'\uFEB4', u'\uFEB2', 4],
 64 | 	u'\u0634' : [u'\u0634', u'\uFEB5', u'\uFEB7', u'\uFEB8', u'\uFEB6', 4],
 65 | 	u'\u0635' : [u'\u0635', u'\uFEB9', u'\uFEBB', u'\uFEBC', u'\uFEBA', 4],
 66 | 	u'\u0636' : [u'\u0636', u'\uFEBD', u'\uFEBF', u'\uFEC0', u'\uFEBE', 4],
 67 | 	u'\u0637' : [u'\u0637', u'\uFEC1', u'\uFEC3', u'\uFEC4', u'\uFEC2', 4],
 68 | 	u'\u0638' : [u'\u0638', u'\uFEC5', u'\uFEC7', u'\uFEC8', u'\uFEC6', 4],
 69 | 	u'\u0639' : [u'\u0639', u'\uFEC9', u'\uFECB', u'\uFECC', u'\uFECA', 4],
 70 | 	u'\u063A' : [u'\u063A', u'\uFECD', u'\uFECF', u'\uFED0', u'\uFECE', 4],
 71 | 	u'\u0641' : [u'\u0641', u'\uFED1', u'\uFED3', u'\uFED4', u'\uFED2', 4],
 72 | 	u'\u0642' : [u'\u0642', u'\uFED5', u'\uFED7', u'\uFED8', u'\uFED6', 4],
 73 | 	u'\u0643' : [u'\u0643', u'\uFED9', u'\uFEDB', u'\uFEDC', u'\uFEDA', 4],
 74 | 	u'\u0644' : [u'\u0644', u'\uFEDD', u'\uFEDF', u'\uFEE0', u'\uFEDE', 4],
 75 | 	u'\u0645' : [u'\u0645', u'\uFEE1', u'\uFEE3', u'\uFEE4', u'\uFEE2', 4],
 76 | 	u'\u0646' : [u'\u0646', u'\uFEE5', u'\uFEE7', u'\uFEE8', u'\uFEE6', 4],
 77 | 	u'\u0647' : [u'\u0647', u'\uFEE9', u'\uFEEB', u'\uFEEC', u'\uFEEA', 4],
 78 | 	u'\u0648' : [u'\u0648', u'\uFEED', u'\uFEED', u'\uFEEE', u'\uFEEE', 2],
 79 | 	u'\u0649' : [u'\u0649', u'\uFEEF', u'\uFEEF', u'\uFEF0', u'\uFEF0', 2],
 80 | 	u'\u0671' : [u'\u0671', u'\u0671', u'\u0671', u'\uFB51', u'\uFB51', 2],
 81 | 	u'\u064A' : [u'\u064A', u'\uFEF1', u'\uFEF3', u'\uFEF4', u'\uFEF2', 4],
 82 | 	u'\u066E' : [u'\u066E', u'\uFBE4', u'\uFBE8', u'\uFBE9', u'\uFBE5', 4],
 83 | 	u'\u06AA' : [u'\u06AA', u'\uFB8E', u'\uFB90', u'\uFB91', u'\uFB8F', 4],
 84 | 	u'\u06C1' : [u'\u06C1', u'\uFBA6', u'\uFBA8', u'\uFBA9', u'\uFBA7', 4],
 85 | 	u'\u06E4' : [u'\u06E4', u'\u06E4', u'\u06E4', u'\u06E4', u'\uFEEE', 2],
 86 | 	u'\u067E' : [u'\u067E', u'\uFB56', u'\uFB58', u'\uFB59', u'\uFB57', 4],
 87 | 	u'\u0698' : [u'\u0698', u'\uFB8A', u'\uFB8A', u'\uFB8B', u'\uFB8B', 2],
 88 | 	u'\u06A9' : [u'\u06A9', u'\uFB8E', u'\uFB90', u'\uFB91', u'\uFB8F', 4],
 89 | 	u'\u06AF' : [u'\u06AF', u'\uFB92', u'\uFB94', u'\uFB95', u'\uFB93', 4],
 90 | 	u'\u06CC' : [u'\u06CC', u'\uFBFC', u'\uFBFE', u'\uFBFF', u'\uFBFD', 4],
 91 | 	u'\u0686' : [u'\u0686', u'\uFB7A', u'\uFB7C', u'\uFB7D', u'\uFB7B', 4]
 92 | }
 93 | 
 94 | ARABIC_GLYPHS_LIST = [
 95 | 	[u'\u0622', u'\uFE81', u'\uFE81', u'\uFE82', u'\uFE82', 2],
 96 | 	[u'\u0623', u'\uFE83', u'\uFE83', u'\uFE84', u'\uFE84', 2],
 97 | 	[u'\u0624', u'\uFE85', u'\uFE85', u'\uFE86', u'\uFE86', 2],
 98 | 	[u'\u0625', u'\uFE87', u'\uFE87', u'\uFE88', u'\uFE88', 2],
 99 | 	[u'\u0626', u'\uFE89', u'\uFE8B', u'\uFE8C', u'\uFE8A', 4],
100 | 	[u'\u0627', u'\u0627', u'\u0627', u'\uFE8E', u'\uFE8E', 2],
101 | 	[u'\u0628', u'\uFE8F', u'\uFE91', u'\uFE92', u'\uFE90', 4],
102 | 	[u'\u0629', u'\uFE93', u'\uFE93', u'\uFE94', u'\uFE94', 2],
103 | 	[u'\u062A', u'\uFE95', u'\uFE97', u'\uFE98', u'\uFE96', 4],
104 | 	[u'\u062B', u'\uFE99', u'\uFE9B', u'\uFE9C', u'\uFE9A', 4],
105 | 	[u'\u062C', u'\uFE9D', u'\uFE9F', u'\uFEA0', u'\uFE9E', 4],
106 | 	[u'\u062D', u'\uFEA1', u'\uFEA3', u'\uFEA4', u'\uFEA2', 4],
107 | 	[u'\u062E', u'\uFEA5', u'\uFEA7', u'\uFEA8', u'\uFEA6', 4],
108 | 	[u'\u062F', u'\uFEA9', u'\uFEA9', u'\uFEAA', u'\uFEAA', 2],
109 | 	[u'\u0630', u'\uFEAB', u'\uFEAB', u'\uFEAC', u'\uFEAC', 2],
110 | 	[u'\u0631', u'\uFEAD', u'\uFEAD', u'\uFEAE', u'\uFEAE', 2],
111 | 	[u'\u0632', u'\uFEAF', u'\uFEAF', u'\uFEB0', u'\uFEB0', 2],
112 | 	[u'\u0633', u'\uFEB1', u'\uFEB3', u'\uFEB4', u'\uFEB2', 4],
113 | 	[u'\u0634', u'\uFEB5', u'\uFEB7', u'\uFEB8', u'\uFEB6', 4],
114 | 	[u'\u0635', u'\uFEB9', u'\uFEBB', u'\uFEBC', u'\uFEBA', 4],
115 | 	[u'\u0636', u'\uFEBD', u'\uFEBF', u'\uFEC0', u'\uFEBE', 4],
116 | 	[u'\u0637', u'\uFEC1', u'\uFEC3', u'\uFEC4', u'\uFEC2', 4],
117 | 	[u'\u0638', u'\uFEC5', u'\uFEC7', u'\uFEC8', u'\uFEC6', 4],
118 | 	[u'\u0639', u'\uFEC9', u'\uFECB', u'\uFECC', u'\uFECA', 4],
119 | 	[u'\u063A', u'\uFECD', u'\uFECF', u'\uFED0', u'\uFECE', 4],
120 | 	[u'\u0641', u'\uFED1', u'\uFED3', u'\uFED4', u'\uFED2', 4],
121 | 	[u'\u0642', u'\uFED5', u'\uFED7', u'\uFED8', u'\uFED6', 4],
122 | 	[u'\u0643', u'\uFED9', u'\uFEDB', u'\uFEDC', u'\uFEDA', 4],
123 | 	[u'\u0644', u'\uFEDD', u'\uFEDF', u'\uFEE0', u'\uFEDE', 4],
124 | 	[u'\u0645', u'\uFEE1', u'\uFEE3', u'\uFEE4', u'\uFEE2', 4],
125 | 	[u'\u0646', u'\uFEE5', u'\uFEE7', u'\uFEE8', u'\uFEE6', 4],
126 | 	[u'\u0647', u'\uFEE9', u'\uFEEB', u'\uFEEC', u'\uFEEA', 4],
127 | 	[u'\u0648', u'\uFEED', u'\uFEED', u'\uFEEE', u'\uFEEE', 2],
128 | 	[u'\u0649', u'\uFEEF', u'\uFEEF', u'\uFEF0', u'\uFEF0', 2],
129 | 	[u'\u0671', u'\u0671', u'\u0671', u'\uFB51', u'\uFB51', 2],
130 | 	[u'\u064A', u'\uFEF1', u'\uFEF3', u'\uFEF4', u'\uFEF2', 4],
131 | 	[u'\u066E', u'\uFBE4', u'\uFBE8', u'\uFBE9', u'\uFBE5', 4],
132 | 	[u'\u06AA', u'\uFB8E', u'\uFB90', u'\uFB91', u'\uFB8F', 4],
133 | 	[u'\u06C1', u'\uFBA6', u'\uFBA8', u'\uFBA9', u'\uFBA7', 4],
134 | 	[u'\u067E', u'\uFB56', u'\uFB58', u'\uFB59', u'\uFB57', 4],
135 | 	[u'\u0698', u'\uFB8A', u'\uFB8A', u'\uFB8B', u'\uFB8B', 2],
136 | 	[u'\u06A9', u'\uFB8E', u'\uFB90', u'\uFB91', u'\uFB8F', 4],
137 | 	[u'\u06AF', u'\uFB92', u'\uFB94', u'\uFB95', u'\uFB93', 4],
138 | 	[u'\u06CC', u'\uFBFC', u'\uFBFE', u'\uFBFF', u'\uFBFD', 4],
139 | 	[u'\u0686', u'\uFB7A', u'\uFB7C', u'\uFB7D', u'\uFB7B', 4],
140 | ]
141 | 
142 | def get_reshaped_glyph(target, location):
143 | 	if target in ARABIC_GLYPHS:
144 | 		return ARABIC_GLYPHS[target][location]
145 | 	else:
146 | 		return target
147 | 		
148 | def get_glyph_type(target):
149 | 	if target in ARABIC_GLYPHS:
150 | 		return ARABIC_GLYPHS[target][5]
151 | 	else:
152 | 		return 2
153 | 		
154 | def is_haraka(target):
155 | 	return target in HARAKAT
156 | 		
157 | def replace_lam_alef(unshaped_word):
158 | 	list_word = list(unshaped_word)
159 | 	letter_before = u''
160 | 	for i in range(len(unshaped_word)):
161 | 		if not is_haraka(unshaped_word[i]) and unshaped_word[i] != DEFINED_CHARACTERS_ORGINAL_LAM:
162 | 			letter_before = unshaped_word[i]
163 | 
164 | 		if unshaped_word[i] == DEFINED_CHARACTERS_ORGINAL_LAM:
165 | 			candidate_lam = unshaped_word[i]
166 | 			lam_position = i
167 | 			haraka_position = i + 1
168 | 			
169 | 			while haraka_position < len(unshaped_word) and is_haraka(unshaped_word[haraka_position]):
170 | 				haraka_position += 1
171 | 				
172 | 			if haraka_position < len(unshaped_word):
173 | 				if lam_position > 0 and get_glyph_type(letter_before) > 2:
174 | 					lam_alef = get_lam_alef(list_word[haraka_position], candidate_lam, False)
175 | 				else:
176 | 					lam_alef = get_lam_alef(list_word[haraka_position], candidate_lam, True)
177 | 				if lam_alef != '':
178 | 					list_word[lam_position] = lam_alef
179 | 					list_word[haraka_position] = u' '
180 | 			
181 | 	return u''.join(list_word).replace(u' ', u'')
182 | 		
183 | def get_lam_alef(candidate_alef, candidate_lam, is_end_of_word):
184 | 	shift_rate = 1
185 | 	reshaped_lam_alef = u''
186 | 	if is_end_of_word:
187 | 		shift_rate += 1
188 | 	
189 | 	if DEFINED_CHARACTERS_ORGINAL_LAM == candidate_lam:
190 | 		if DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_MDD == candidate_alef:
191 | 			reshaped_lam_alef = LAM_ALEF_GLYPHS[0][shift_rate]
192 | 		
193 | 		if DEFINED_CHARACTERS_ORGINAL_ALF_UPPER_HAMAZA == candidate_alef:
194 | 			reshaped_lam_alef = LAM_ALEF_GLYPHS[1][shift_rate]
195 | 		
196 | 		if DEFINED_CHARACTERS_ORGINAL_ALF == candidate_alef:
197 | 			reshaped_lam_alef = LAM_ALEF_GLYPHS[2][shift_rate]
198 | 		
199 | 		if DEFINED_CHARACTERS_ORGINAL_ALF_LOWER_HAMAZA == candidate_alef:
200 | 			reshaped_lam_alef = LAM_ALEF_GLYPHS[3][shift_rate]
201 | 	
202 | 	return reshaped_lam_alef
203 | 
204 | class DecomposedWord(object):
205 | 	def __init__(self, word):
206 | 		self.stripped_harakat = []
207 | 		self.harakat_positions = []
208 | 		self.stripped_regular_letters = []
209 | 		self.letters_position = []
210 | 
211 | 		for i in range(len(word)):
212 | 			c = word[i]
213 | 			if is_haraka(c):
214 | 				self.harakat_positions.append(i)
215 | 				self.stripped_harakat.append(c)
216 | 			else:
217 | 				self.letters_position.append(i)
218 | 				self.stripped_regular_letters.append(c)
219 | 
220 | 	def reconstruct_word(self, reshaped_word):
221 | 		l = list(u'\0' * (len(self.stripped_harakat) + len(reshaped_word)))
222 | 		for i in range(len(self.letters_position)):
223 | 			l[self.letters_position[i]] = reshaped_word[i]
224 | 		for i in range(len(self.harakat_positions)):
225 | 			l[self.harakat_positions[i]] = self.stripped_harakat[i]
226 | 		return u''.join(l)
227 | 
228 | def get_reshaped_word(unshaped_word):
229 | 	unshaped_word = replace_lam_alef(unshaped_word)
230 | 	decomposed_word = DecomposedWord(unshaped_word)
231 | 	result = u''
232 | 	if decomposed_word.stripped_regular_letters:
233 | 		result = reshape_it(u''.join(decomposed_word.stripped_regular_letters))
234 | 	return decomposed_word.reconstruct_word(result)
235 | 
236 | def reshape_it(unshaped_word):
237 | 	if not unshaped_word:
238 | 		return u''
239 | 	if len(unshaped_word) == 1:
240 | 		return get_reshaped_glyph(unshaped_word[0], 1)
241 | 	reshaped_word = []
242 | 	for i in range(len(unshaped_word)):
243 | 		before = False
244 | 		after = False
245 | 		if i == 0:
246 | 			after = get_glyph_type(unshaped_word[i]) == 4
247 | 		elif i == len(unshaped_word) - 1:
248 | 			before = get_glyph_type(unshaped_word[i - 1]) == 4
249 | 		else:
250 | 			after = get_glyph_type(unshaped_word[i]) == 4
251 | 			before = get_glyph_type(unshaped_word[i - 1]) == 4
252 | 		if after and before:
253 | 			reshaped_word.append(get_reshaped_glyph(unshaped_word[i], 3))
254 | 		elif after and not before:
255 | 			reshaped_word.append(get_reshaped_glyph(unshaped_word[i], 2))
256 | 		elif not after and before:
257 | 			reshaped_word.append(get_reshaped_glyph(unshaped_word[i], 4))
258 | 		elif not after and not before:
259 | 			reshaped_word.append(get_reshaped_glyph(unshaped_word[i], 1))
260 | 
261 | 	return u''.join(reshaped_word)
262 | 
263 | 
264 | def is_arabic_character(target):
265 | 	return target in ARABIC_GLYPHS or target in HARAKAT
266 | 	
267 | def get_words(sentence):
268 | 	if sentence:
269 | 		return re.split('\\s', sentence)
270 | 	return []
271 | 	
272 | def has_arabic_letters(word):
273 | 	for c in word:
274 | 		if is_arabic_character(c):
275 | 			return True
276 | 	return False
277 | 
278 | def is_arabic_word(word):
279 | 	for c in word:
280 | 		if not is_arabic_character(c):
281 | 			return False
282 | 	return True
283 | 	
284 | def get_words_from_mixed_word(word):
285 | 	temp_word = u''
286 | 	words = []
287 | 	for c in word:
288 | 		if is_arabic_character(c):
289 | 			if temp_word and not is_arabic_word(temp_word):
290 | 				words.append(temp_word)
291 | 				temp_word = c
292 | 			else:
293 | 				temp_word += c
294 | 		else:
295 | 			if temp_word and is_arabic_word(temp_word):
296 | 				words.append(temp_word)
297 | 				temp_word = c
298 | 			else:
299 | 				temp_word += c
300 | 	if temp_word:
301 | 		words.append(temp_word)
302 | 	return words
303 | 	
304 | def reshape(text):
305 | 	if text:
306 | 		lines = re.split('\\r?\\n', text)
307 | 		for i in range(len(lines)):
308 | 			lines[i] = reshape_sentence(lines[i])
309 | 		return u'\n'.join(lines)
310 | 	return u''
311 | 	
312 | def reshape_sentence(sentence):
313 | 	words = get_words(sentence)
314 | 	for i in range(len(words)):
315 | 		word = words[i]
316 | 		if has_arabic_letters(word):
317 | 			if is_arabic_word(word):
318 | 				words[i] = get_reshaped_word(word)
319 | 			else:
320 | 				mixed_words = get_words_from_mixed_word(word)
321 | 				for j in range(len(mixed_words)):
322 | 					mixed_words[j] = get_reshaped_word(mixed_words[j])
323 | 				words[i] = u''.join(mixed_words)
324 | 	return u' '.join(words)
325 | 


--------------------------------------------------------------------------------
/bidi/mirror.py:
--------------------------------------------------------------------------------
  1 | # This file is part of python-bidi
  2 | #
  3 | # python-bidi is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU Lesser General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU Lesser General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | 
 16 | # Copyright (C) 2008-2010 Yaacov Zamir <kzamir_a_walla.co.il>,
 17 | # Meir kriheli <meir@mksoft.co.il>
 18 | """Mirrored chars"""
 19 | 
 20 | # Can't seem to get this data from python's unicode data, so this is imported
 21 | # from http://www.unicode.org/Public/UNIDATA/BidiMirroring.txt
 22 | MIRRORED = {
 23 | u'\u0028': u'\u0029', # LEFT PARENTHESIS
 24 | u'\u0029': u'\u0028', # RIGHT PARENTHESIS
 25 | u'\u003C': u'\u003E', # LESS-THAN SIGN
 26 | u'\u003E': u'\u003C', # GREATER-THAN SIGN
 27 | u'\u005B': u'\u005D', # LEFT SQUARE BRACKET
 28 | u'\u005D': u'\u005B', # RIGHT SQUARE BRACKET
 29 | u'\u007B': u'\u007D', # LEFT CURLY BRACKET
 30 | u'\u007D': u'\u007B', # RIGHT CURLY BRACKET
 31 | u'\u00AB': u'\u00BB', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 32 | u'\u00BB': u'\u00AB', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 33 | u'\u0F3A': u'\u0F3B', # TIBETAN MARK GUG RTAGS GYON
 34 | u'\u0F3B': u'\u0F3A', # TIBETAN MARK GUG RTAGS GYAS
 35 | u'\u0F3C': u'\u0F3D', # TIBETAN MARK ANG KHANG GYON
 36 | u'\u0F3D': u'\u0F3C', # TIBETAN MARK ANG KHANG GYAS
 37 | u'\u169B': u'\u169C', # OGHAM FEATHER MARK
 38 | u'\u169C': u'\u169B', # OGHAM REVERSED FEATHER MARK
 39 | u'\u2039': u'\u203A', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 40 | u'\u203A': u'\u2039', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 41 | u'\u2045': u'\u2046', # LEFT SQUARE BRACKET WITH QUILL
 42 | u'\u2046': u'\u2045', # RIGHT SQUARE BRACKET WITH QUILL
 43 | u'\u207D': u'\u207E', # SUPERSCRIPT LEFT PARENTHESIS
 44 | u'\u207E': u'\u207D', # SUPERSCRIPT RIGHT PARENTHESIS
 45 | u'\u208D': u'\u208E', # SUBSCRIPT LEFT PARENTHESIS
 46 | u'\u208E': u'\u208D', # SUBSCRIPT RIGHT PARENTHESIS
 47 | u'\u2208': u'\u220B', # ELEMENT OF
 48 | u'\u2209': u'\u220C', # NOT AN ELEMENT OF
 49 | u'\u220A': u'\u220D', # SMALL ELEMENT OF
 50 | u'\u220B': u'\u2208', # CONTAINS AS MEMBER
 51 | u'\u220C': u'\u2209', # DOES NOT CONTAIN AS MEMBER
 52 | u'\u220D': u'\u220A', # SMALL CONTAINS AS MEMBER
 53 | u'\u2215': u'\u29F5', # DIVISION SLASH
 54 | u'\u223C': u'\u223D', # TILDE OPERATOR
 55 | u'\u223D': u'\u223C', # REVERSED TILDE
 56 | u'\u2243': u'\u22CD', # ASYMPTOTICALLY EQUAL TO
 57 | u'\u2252': u'\u2253', # APPROXIMATELY EQUAL TO OR THE IMAGE OF
 58 | u'\u2253': u'\u2252', # IMAGE OF OR APPROXIMATELY EQUAL TO
 59 | u'\u2254': u'\u2255', # COLON EQUALS
 60 | u'\u2255': u'\u2254', # EQUALS COLON
 61 | u'\u2264': u'\u2265', # LESS-THAN OR EQUAL TO
 62 | u'\u2265': u'\u2264', # GREATER-THAN OR EQUAL TO
 63 | u'\u2266': u'\u2267', # LESS-THAN OVER EQUAL TO
 64 | u'\u2267': u'\u2266', # GREATER-THAN OVER EQUAL TO
 65 | u'\u2268': u'\u2269', # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
 66 | u'\u2269': u'\u2268', # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
 67 | u'\u226A': u'\u226B', # MUCH LESS-THAN
 68 | u'\u226B': u'\u226A', # MUCH GREATER-THAN
 69 | u'\u226E': u'\u226F', # [BEST FIT] NOT LESS-THAN
 70 | u'\u226F': u'\u226E', # [BEST FIT] NOT GREATER-THAN
 71 | u'\u2270': u'\u2271', # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
 72 | u'\u2271': u'\u2270', # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
 73 | u'\u2272': u'\u2273', # [BEST FIT] LESS-THAN OR EQUIVALENT TO
 74 | u'\u2273': u'\u2272', # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
 75 | u'\u2274': u'\u2275', # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
 76 | u'\u2275': u'\u2274', # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
 77 | u'\u2276': u'\u2277', # LESS-THAN OR GREATER-THAN
 78 | u'\u2277': u'\u2276', # GREATER-THAN OR LESS-THAN
 79 | u'\u2278': u'\u2279', # [BEST FIT] NEITHER LESS-THAN NOR GREATER-THAN
 80 | u'\u2279': u'\u2278', # [BEST FIT] NEITHER GREATER-THAN NOR LESS-THAN
 81 | u'\u227A': u'\u227B', # PRECEDES
 82 | u'\u227B': u'\u227A', # SUCCEEDS
 83 | u'\u227C': u'\u227D', # PRECEDES OR EQUAL TO
 84 | u'\u227D': u'\u227C', # SUCCEEDS OR EQUAL TO
 85 | u'\u227E': u'\u227F', # [BEST FIT] PRECEDES OR EQUIVALENT TO
 86 | u'\u227F': u'\u227E', # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
 87 | u'\u2280': u'\u2281', # [BEST FIT] DOES NOT PRECEDE
 88 | u'\u2281': u'\u2280', # [BEST FIT] DOES NOT SUCCEED
 89 | u'\u2282': u'\u2283', # SUBSET OF
 90 | u'\u2283': u'\u2282', # SUPERSET OF
 91 | u'\u2284': u'\u2285', # [BEST FIT] NOT A SUBSET OF
 92 | u'\u2285': u'\u2284', # [BEST FIT] NOT A SUPERSET OF
 93 | u'\u2286': u'\u2287', # SUBSET OF OR EQUAL TO
 94 | u'\u2287': u'\u2286', # SUPERSET OF OR EQUAL TO
 95 | u'\u2288': u'\u2289', # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
 96 | u'\u2289': u'\u2288', # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
 97 | u'\u228A': u'\u228B', # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
 98 | u'\u228B': u'\u228A', # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
 99 | u'\u228F': u'\u2290', # SQUARE IMAGE OF
100 | u'\u2290': u'\u228F', # SQUARE ORIGINAL OF
101 | u'\u2291': u'\u2292', # SQUARE IMAGE OF OR EQUAL TO
102 | u'\u2292': u'\u2291', # SQUARE ORIGINAL OF OR EQUAL TO
103 | u'\u2298': u'\u29B8', # CIRCLED DIVISION SLASH
104 | u'\u22A2': u'\u22A3', # RIGHT TACK
105 | u'\u22A3': u'\u22A2', # LEFT TACK
106 | u'\u22A6': u'\u2ADE', # ASSERTION
107 | u'\u22A8': u'\u2AE4', # TRUE
108 | u'\u22A9': u'\u2AE3', # FORCES
109 | u'\u22AB': u'\u2AE5', # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
110 | u'\u22B0': u'\u22B1', # PRECEDES UNDER RELATION
111 | u'\u22B1': u'\u22B0', # SUCCEEDS UNDER RELATION
112 | u'\u22B2': u'\u22B3', # NORMAL SUBGROUP OF
113 | u'\u22B3': u'\u22B2', # CONTAINS AS NORMAL SUBGROUP
114 | u'\u22B4': u'\u22B5', # NORMAL SUBGROUP OF OR EQUAL TO
115 | u'\u22B5': u'\u22B4', # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
116 | u'\u22B6': u'\u22B7', # ORIGINAL OF
117 | u'\u22B7': u'\u22B6', # IMAGE OF
118 | u'\u22C9': u'\u22CA', # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
119 | u'\u22CA': u'\u22C9', # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
120 | u'\u22CB': u'\u22CC', # LEFT SEMIDIRECT PRODUCT
121 | u'\u22CC': u'\u22CB', # RIGHT SEMIDIRECT PRODUCT
122 | u'\u22CD': u'\u2243', # REVERSED TILDE EQUALS
123 | u'\u22D0': u'\u22D1', # DOUBLE SUBSET
124 | u'\u22D1': u'\u22D0', # DOUBLE SUPERSET
125 | u'\u22D6': u'\u22D7', # LESS-THAN WITH DOT
126 | u'\u22D7': u'\u22D6', # GREATER-THAN WITH DOT
127 | u'\u22D8': u'\u22D9', # VERY MUCH LESS-THAN
128 | u'\u22D9': u'\u22D8', # VERY MUCH GREATER-THAN
129 | u'\u22DA': u'\u22DB', # LESS-THAN EQUAL TO OR GREATER-THAN
130 | u'\u22DB': u'\u22DA', # GREATER-THAN EQUAL TO OR LESS-THAN
131 | u'\u22DC': u'\u22DD', # EQUAL TO OR LESS-THAN
132 | u'\u22DD': u'\u22DC', # EQUAL TO OR GREATER-THAN
133 | u'\u22DE': u'\u22DF', # EQUAL TO OR PRECEDES
134 | u'\u22DF': u'\u22DE', # EQUAL TO OR SUCCEEDS
135 | u'\u22E0': u'\u22E1', # [BEST FIT] DOES NOT PRECEDE OR EQUAL
136 | u'\u22E1': u'\u22E0', # [BEST FIT] DOES NOT SUCCEED OR EQUAL
137 | u'\u22E2': u'\u22E3', # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
138 | u'\u22E3': u'\u22E2', # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
139 | u'\u22E4': u'\u22E5', # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
140 | u'\u22E5': u'\u22E4', # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
141 | u'\u22E6': u'\u22E7', # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
142 | u'\u22E7': u'\u22E6', # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
143 | u'\u22E8': u'\u22E9', # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
144 | u'\u22E9': u'\u22E8', # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
145 | u'\u22EA': u'\u22EB', # [BEST FIT] NOT NORMAL SUBGROUP OF
146 | u'\u22EB': u'\u22EA', # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
147 | u'\u22EC': u'\u22ED', # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
148 | u'\u22ED': u'\u22EC', # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
149 | u'\u22F0': u'\u22F1', # UP RIGHT DIAGONAL ELLIPSIS
150 | u'\u22F1': u'\u22F0', # DOWN RIGHT DIAGONAL ELLIPSIS
151 | u'\u22F2': u'\u22FA', # ELEMENT OF WITH LONG HORIZONTAL STROKE
152 | u'\u22F3': u'\u22FB', # ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
153 | u'\u22F4': u'\u22FC', # SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
154 | u'\u22F6': u'\u22FD', # ELEMENT OF WITH OVERBAR
155 | u'\u22F7': u'\u22FE', # SMALL ELEMENT OF WITH OVERBAR
156 | u'\u22FA': u'\u22F2', # CONTAINS WITH LONG HORIZONTAL STROKE
157 | u'\u22FB': u'\u22F3', # CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
158 | u'\u22FC': u'\u22F4', # SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
159 | u'\u22FD': u'\u22F6', # CONTAINS WITH OVERBAR
160 | u'\u22FE': u'\u22F7', # SMALL CONTAINS WITH OVERBAR
161 | u'\u2308': u'\u2309', # LEFT CEILING
162 | u'\u2309': u'\u2308', # RIGHT CEILING
163 | u'\u230A': u'\u230B', # LEFT FLOOR
164 | u'\u230B': u'\u230A', # RIGHT FLOOR
165 | u'\u2329': u'\u232A', # LEFT-POINTING ANGLE BRACKET
166 | u'\u232A': u'\u2329', # RIGHT-POINTING ANGLE BRACKET
167 | u'\u2768': u'\u2769', # MEDIUM LEFT PARENTHESIS ORNAMENT
168 | u'\u2769': u'\u2768', # MEDIUM RIGHT PARENTHESIS ORNAMENT
169 | u'\u276A': u'\u276B', # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
170 | u'\u276B': u'\u276A', # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
171 | u'\u276C': u'\u276D', # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
172 | u'\u276D': u'\u276C', # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
173 | u'\u276E': u'\u276F', # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
174 | u'\u276F': u'\u276E', # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
175 | u'\u2770': u'\u2771', # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
176 | u'\u2771': u'\u2770', # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
177 | u'\u2772': u'\u2773', # LIGHT LEFT TORTOISE SHELL BRACKET
178 | u'\u2773': u'\u2772', # LIGHT RIGHT TORTOISE SHELL BRACKET
179 | u'\u2774': u'\u2775', # MEDIUM LEFT CURLY BRACKET ORNAMENT
180 | u'\u2775': u'\u2774', # MEDIUM RIGHT CURLY BRACKET ORNAMENT
181 | u'\u27C3': u'\u27C4', # OPEN SUBSET
182 | u'\u27C4': u'\u27C3', # OPEN SUPERSET
183 | u'\u27C5': u'\u27C6', # LEFT S-SHAPED BAG DELIMITER
184 | u'\u27C6': u'\u27C5', # RIGHT S-SHAPED BAG DELIMITER
185 | u'\u27C8': u'\u27C9', # REVERSE SOLIDUS PRECEDING SUBSET
186 | u'\u27C9': u'\u27C8', # SUPERSET PRECEDING SOLIDUS
187 | u'\u27D5': u'\u27D6', # LEFT OUTER JOIN
188 | u'\u27D6': u'\u27D5', # RIGHT OUTER JOIN
189 | u'\u27DD': u'\u27DE', # LONG RIGHT TACK
190 | u'\u27DE': u'\u27DD', # LONG LEFT TACK
191 | u'\u27E2': u'\u27E3', # WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK
192 | u'\u27E3': u'\u27E2', # WHITE CONCAVE-SIDED DIAMOND WITH RIGHTWARDS TICK
193 | u'\u27E4': u'\u27E5', # WHITE SQUARE WITH LEFTWARDS TICK
194 | u'\u27E5': u'\u27E4', # WHITE SQUARE WITH RIGHTWARDS TICK
195 | u'\u27E6': u'\u27E7', # MATHEMATICAL LEFT WHITE SQUARE BRACKET
196 | u'\u27E7': u'\u27E6', # MATHEMATICAL RIGHT WHITE SQUARE BRACKET
197 | u'\u27E8': u'\u27E9', # MATHEMATICAL LEFT ANGLE BRACKET
198 | u'\u27E9': u'\u27E8', # MATHEMATICAL RIGHT ANGLE BRACKET
199 | u'\u27EA': u'\u27EB', # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
200 | u'\u27EB': u'\u27EA', # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
201 | u'\u27EC': u'\u27ED', # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
202 | u'\u27ED': u'\u27EC', # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
203 | u'\u27EE': u'\u27EF', # MATHEMATICAL LEFT FLATTENED PARENTHESIS
204 | u'\u27EF': u'\u27EE', # MATHEMATICAL RIGHT FLATTENED PARENTHESIS
205 | u'\u2983': u'\u2984', # LEFT WHITE CURLY BRACKET
206 | u'\u2984': u'\u2983', # RIGHT WHITE CURLY BRACKET
207 | u'\u2985': u'\u2986', # LEFT WHITE PARENTHESIS
208 | u'\u2986': u'\u2985', # RIGHT WHITE PARENTHESIS
209 | u'\u2987': u'\u2988', # Z NOTATION LEFT IMAGE BRACKET
210 | u'\u2988': u'\u2987', # Z NOTATION RIGHT IMAGE BRACKET
211 | u'\u2989': u'\u298A', # Z NOTATION LEFT BINDING BRACKET
212 | u'\u298A': u'\u2989', # Z NOTATION RIGHT BINDING BRACKET
213 | u'\u298B': u'\u298C', # LEFT SQUARE BRACKET WITH UNDERBAR
214 | u'\u298C': u'\u298B', # RIGHT SQUARE BRACKET WITH UNDERBAR
215 | u'\u298D': u'\u2990', # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
216 | u'\u298E': u'\u298F', # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
217 | u'\u298F': u'\u298E', # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
218 | u'\u2990': u'\u298D', # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
219 | u'\u2991': u'\u2992', # LEFT ANGLE BRACKET WITH DOT
220 | u'\u2992': u'\u2991', # RIGHT ANGLE BRACKET WITH DOT
221 | u'\u2993': u'\u2994', # LEFT ARC LESS-THAN BRACKET
222 | u'\u2994': u'\u2993', # RIGHT ARC GREATER-THAN BRACKET
223 | u'\u2995': u'\u2996', # DOUBLE LEFT ARC GREATER-THAN BRACKET
224 | u'\u2996': u'\u2995', # DOUBLE RIGHT ARC LESS-THAN BRACKET
225 | u'\u2997': u'\u2998', # LEFT BLACK TORTOISE SHELL BRACKET
226 | u'\u2998': u'\u2997', # RIGHT BLACK TORTOISE SHELL BRACKET
227 | u'\u29B8': u'\u2298', # CIRCLED REVERSE SOLIDUS
228 | u'\u29C0': u'\u29C1', # CIRCLED LESS-THAN
229 | u'\u29C1': u'\u29C0', # CIRCLED GREATER-THAN
230 | u'\u29C4': u'\u29C5', # SQUARED RISING DIAGONAL SLASH
231 | u'\u29C5': u'\u29C4', # SQUARED FALLING DIAGONAL SLASH
232 | u'\u29CF': u'\u29D0', # LEFT TRIANGLE BESIDE VERTICAL BAR
233 | u'\u29D0': u'\u29CF', # VERTICAL BAR BESIDE RIGHT TRIANGLE
234 | u'\u29D1': u'\u29D2', # BOWTIE WITH LEFT HALF BLACK
235 | u'\u29D2': u'\u29D1', # BOWTIE WITH RIGHT HALF BLACK
236 | u'\u29D4': u'\u29D5', # TIMES WITH LEFT HALF BLACK
237 | u'\u29D5': u'\u29D4', # TIMES WITH RIGHT HALF BLACK
238 | u'\u29D8': u'\u29D9', # LEFT WIGGLY FENCE
239 | u'\u29D9': u'\u29D8', # RIGHT WIGGLY FENCE
240 | u'\u29DA': u'\u29DB', # LEFT DOUBLE WIGGLY FENCE
241 | u'\u29DB': u'\u29DA', # RIGHT DOUBLE WIGGLY FENCE
242 | u'\u29F5': u'\u2215', # REVERSE SOLIDUS OPERATOR
243 | u'\u29F8': u'\u29F9', # BIG SOLIDUS
244 | u'\u29F9': u'\u29F8', # BIG REVERSE SOLIDUS
245 | u'\u29FC': u'\u29FD', # LEFT-POINTING CURVED ANGLE BRACKET
246 | u'\u29FD': u'\u29FC', # RIGHT-POINTING CURVED ANGLE BRACKET
247 | u'\u2A2B': u'\u2A2C', # MINUS SIGN WITH FALLING DOTS
248 | u'\u2A2C': u'\u2A2B', # MINUS SIGN WITH RISING DOTS
249 | u'\u2A2D': u'\u2A2E', # PLUS SIGN IN LEFT HALF CIRCLE
250 | u'\u2A2E': u'\u2A2D', # PLUS SIGN IN RIGHT HALF CIRCLE
251 | u'\u2A34': u'\u2A35', # MULTIPLICATION SIGN IN LEFT HALF CIRCLE
252 | u'\u2A35': u'\u2A34', # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
253 | u'\u2A3C': u'\u2A3D', # INTERIOR PRODUCT
254 | u'\u2A3D': u'\u2A3C', # RIGHTHAND INTERIOR PRODUCT
255 | u'\u2A64': u'\u2A65', # Z NOTATION DOMAIN ANTIRESTRICTION
256 | u'\u2A65': u'\u2A64', # Z NOTATION RANGE ANTIRESTRICTION
257 | u'\u2A79': u'\u2A7A', # LESS-THAN WITH CIRCLE INSIDE
258 | u'\u2A7A': u'\u2A79', # GREATER-THAN WITH CIRCLE INSIDE
259 | u'\u2A7D': u'\u2A7E', # LESS-THAN OR SLANTED EQUAL TO
260 | u'\u2A7E': u'\u2A7D', # GREATER-THAN OR SLANTED EQUAL TO
261 | u'\u2A7F': u'\u2A80', # LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
262 | u'\u2A80': u'\u2A7F', # GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
263 | u'\u2A81': u'\u2A82', # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
264 | u'\u2A82': u'\u2A81', # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
265 | u'\u2A83': u'\u2A84', # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT
266 | u'\u2A84': u'\u2A83', # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT
267 | u'\u2A8B': u'\u2A8C', # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN
268 | u'\u2A8C': u'\u2A8B', # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN
269 | u'\u2A91': u'\u2A92', # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL
270 | u'\u2A92': u'\u2A91', # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL
271 | u'\u2A93': u'\u2A94', # LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL
272 | u'\u2A94': u'\u2A93', # GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL
273 | u'\u2A95': u'\u2A96', # SLANTED EQUAL TO OR LESS-THAN
274 | u'\u2A96': u'\u2A95', # SLANTED EQUAL TO OR GREATER-THAN
275 | u'\u2A97': u'\u2A98', # SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE
276 | u'\u2A98': u'\u2A97', # SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE
277 | u'\u2A99': u'\u2A9A', # DOUBLE-LINE EQUAL TO OR LESS-THAN
278 | u'\u2A9A': u'\u2A99', # DOUBLE-LINE EQUAL TO OR GREATER-THAN
279 | u'\u2A9B': u'\u2A9C', # DOUBLE-LINE SLANTED EQUAL TO OR LESS-THAN
280 | u'\u2A9C': u'\u2A9B', # DOUBLE-LINE SLANTED EQUAL TO OR GREATER-THAN
281 | u'\u2AA1': u'\u2AA2', # DOUBLE NESTED LESS-THAN
282 | u'\u2AA2': u'\u2AA1', # DOUBLE NESTED GREATER-THAN
283 | u'\u2AA6': u'\u2AA7', # LESS-THAN CLOSED BY CURVE
284 | u'\u2AA7': u'\u2AA6', # GREATER-THAN CLOSED BY CURVE
285 | u'\u2AA8': u'\u2AA9', # LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
286 | u'\u2AA9': u'\u2AA8', # GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
287 | u'\u2AAA': u'\u2AAB', # SMALLER THAN
288 | u'\u2AAB': u'\u2AAA', # LARGER THAN
289 | u'\u2AAC': u'\u2AAD', # SMALLER THAN OR EQUAL TO
290 | u'\u2AAD': u'\u2AAC', # LARGER THAN OR EQUAL TO
291 | u'\u2AAF': u'\u2AB0', # PRECEDES ABOVE SINGLE-LINE EQUALS SIGN
292 | u'\u2AB0': u'\u2AAF', # SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN
293 | u'\u2AB3': u'\u2AB4', # PRECEDES ABOVE EQUALS SIGN
294 | u'\u2AB4': u'\u2AB3', # SUCCEEDS ABOVE EQUALS SIGN
295 | u'\u2ABB': u'\u2ABC', # DOUBLE PRECEDES
296 | u'\u2ABC': u'\u2ABB', # DOUBLE SUCCEEDS
297 | u'\u2ABD': u'\u2ABE', # SUBSET WITH DOT
298 | u'\u2ABE': u'\u2ABD', # SUPERSET WITH DOT
299 | u'\u2ABF': u'\u2AC0', # SUBSET WITH PLUS SIGN BELOW
300 | u'\u2AC0': u'\u2ABF', # SUPERSET WITH PLUS SIGN BELOW
301 | u'\u2AC1': u'\u2AC2', # SUBSET WITH MULTIPLICATION SIGN BELOW
302 | u'\u2AC2': u'\u2AC1', # SUPERSET WITH MULTIPLICATION SIGN BELOW
303 | u'\u2AC3': u'\u2AC4', # SUBSET OF OR EQUAL TO WITH DOT ABOVE
304 | u'\u2AC4': u'\u2AC3', # SUPERSET OF OR EQUAL TO WITH DOT ABOVE
305 | u'\u2AC5': u'\u2AC6', # SUBSET OF ABOVE EQUALS SIGN
306 | u'\u2AC6': u'\u2AC5', # SUPERSET OF ABOVE EQUALS SIGN
307 | u'\u2ACD': u'\u2ACE', # SQUARE LEFT OPEN BOX OPERATOR
308 | u'\u2ACE': u'\u2ACD', # SQUARE RIGHT OPEN BOX OPERATOR
309 | u'\u2ACF': u'\u2AD0', # CLOSED SUBSET
310 | u'\u2AD0': u'\u2ACF', # CLOSED SUPERSET
311 | u'\u2AD1': u'\u2AD2', # CLOSED SUBSET OR EQUAL TO
312 | u'\u2AD2': u'\u2AD1', # CLOSED SUPERSET OR EQUAL TO
313 | u'\u2AD3': u'\u2AD4', # SUBSET ABOVE SUPERSET
314 | u'\u2AD4': u'\u2AD3', # SUPERSET ABOVE SUBSET
315 | u'\u2AD5': u'\u2AD6', # SUBSET ABOVE SUBSET
316 | u'\u2AD6': u'\u2AD5', # SUPERSET ABOVE SUPERSET
317 | u'\u2ADE': u'\u22A6', # SHORT LEFT TACK
318 | u'\u2AE3': u'\u22A9', # DOUBLE VERTICAL BAR LEFT TURNSTILE
319 | u'\u2AE4': u'\u22A8', # VERTICAL BAR DOUBLE LEFT TURNSTILE
320 | u'\u2AE5': u'\u22AB', # DOUBLE VERTICAL BAR DOUBLE LEFT TURNSTILE
321 | u'\u2AEC': u'\u2AED', # DOUBLE STROKE NOT SIGN
322 | u'\u2AED': u'\u2AEC', # REVERSED DOUBLE STROKE NOT SIGN
323 | u'\u2AF7': u'\u2AF8', # TRIPLE NESTED LESS-THAN
324 | u'\u2AF8': u'\u2AF7', # TRIPLE NESTED GREATER-THAN
325 | u'\u2AF9': u'\u2AFA', # DOUBLE-LINE SLANTED LESS-THAN OR EQUAL TO
326 | u'\u2AFA': u'\u2AF9', # DOUBLE-LINE SLANTED GREATER-THAN OR EQUAL TO
327 | u'\u2E02': u'\u2E03', # LEFT SUBSTITUTION BRACKET
328 | u'\u2E03': u'\u2E02', # RIGHT SUBSTITUTION BRACKET
329 | u'\u2E04': u'\u2E05', # LEFT DOTTED SUBSTITUTION BRACKET
330 | u'\u2E05': u'\u2E04', # RIGHT DOTTED SUBSTITUTION BRACKET
331 | u'\u2E09': u'\u2E0A', # LEFT TRANSPOSITION BRACKET
332 | u'\u2E0A': u'\u2E09', # RIGHT TRANSPOSITION BRACKET
333 | u'\u2E0C': u'\u2E0D', # LEFT RAISED OMISSION BRACKET
334 | u'\u2E0D': u'\u2E0C', # RIGHT RAISED OMISSION BRACKET
335 | u'\u2E1C': u'\u2E1D', # LEFT LOW PARAPHRASE BRACKET
336 | u'\u2E1D': u'\u2E1C', # RIGHT LOW PARAPHRASE BRACKET
337 | u'\u2E20': u'\u2E21', # LEFT VERTICAL BAR WITH QUILL
338 | u'\u2E21': u'\u2E20', # RIGHT VERTICAL BAR WITH QUILL
339 | u'\u2E22': u'\u2E23', # TOP LEFT HALF BRACKET
340 | u'\u2E23': u'\u2E22', # TOP RIGHT HALF BRACKET
341 | u'\u2E24': u'\u2E25', # BOTTOM LEFT HALF BRACKET
342 | u'\u2E25': u'\u2E24', # BOTTOM RIGHT HALF BRACKET
343 | u'\u2E26': u'\u2E27', # LEFT SIDEWAYS U BRACKET
344 | u'\u2E27': u'\u2E26', # RIGHT SIDEWAYS U BRACKET
345 | u'\u2E28': u'\u2E29', # LEFT DOUBLE PARENTHESIS
346 | u'\u2E29': u'\u2E28', # RIGHT DOUBLE PARENTHESIS
347 | u'\u3008': u'\u3009', # LEFT ANGLE BRACKET
348 | u'\u3009': u'\u3008', # RIGHT ANGLE BRACKET
349 | u'\u300A': u'\u300B', # LEFT DOUBLE ANGLE BRACKET
350 | u'\u300B': u'\u300A', # RIGHT DOUBLE ANGLE BRACKET
351 | u'\u300C': u'\u300D', # [BEST FIT] LEFT CORNER BRACKET
352 | u'\u300D': u'\u300C', # [BEST FIT] RIGHT CORNER BRACKET
353 | u'\u300E': u'\u300F', # [BEST FIT] LEFT WHITE CORNER BRACKET
354 | u'\u300F': u'\u300E', # [BEST FIT] RIGHT WHITE CORNER BRACKET
355 | u'\u3010': u'\u3011', # LEFT BLACK LENTICULAR BRACKET
356 | u'\u3011': u'\u3010', # RIGHT BLACK LENTICULAR BRACKET
357 | u'\u3014': u'\u3015', # LEFT TORTOISE SHELL BRACKET
358 | u'\u3015': u'\u3014', # RIGHT TORTOISE SHELL BRACKET
359 | u'\u3016': u'\u3017', # LEFT WHITE LENTICULAR BRACKET
360 | u'\u3017': u'\u3016', # RIGHT WHITE LENTICULAR BRACKET
361 | u'\u3018': u'\u3019', # LEFT WHITE TORTOISE SHELL BRACKET
362 | u'\u3019': u'\u3018', # RIGHT WHITE TORTOISE SHELL BRACKET
363 | u'\u301A': u'\u301B', # LEFT WHITE SQUARE BRACKET
364 | u'\u301B': u'\u301A', # RIGHT WHITE SQUARE BRACKET
365 | u'\uFE59': u'\uFE5A', # SMALL LEFT PARENTHESIS
366 | u'\uFE5A': u'\uFE59', # SMALL RIGHT PARENTHESIS
367 | u'\uFE5B': u'\uFE5C', # SMALL LEFT CURLY BRACKET
368 | u'\uFE5C': u'\uFE5B', # SMALL RIGHT CURLY BRACKET
369 | u'\uFE5D': u'\uFE5E', # SMALL LEFT TORTOISE SHELL BRACKET
370 | u'\uFE5E': u'\uFE5D', # SMALL RIGHT TORTOISE SHELL BRACKET
371 | u'\uFE64': u'\uFE65', # SMALL LESS-THAN SIGN
372 | u'\uFE65': u'\uFE64', # SMALL GREATER-THAN SIGN
373 | u'\uFF08': u'\uFF09', # FULLWIDTH LEFT PARENTHESIS
374 | u'\uFF09': u'\uFF08', # FULLWIDTH RIGHT PARENTHESIS
375 | u'\uFF1C': u'\uFF1E', # FULLWIDTH LESS-THAN SIGN
376 | u'\uFF1E': u'\uFF1C', # FULLWIDTH GREATER-THAN SIGN
377 | u'\uFF3B': u'\uFF3D', # FULLWIDTH LEFT SQUARE BRACKET
378 | u'\uFF3D': u'\uFF3B', # FULLWIDTH RIGHT SQUARE BRACKET
379 | u'\uFF5B': u'\uFF5D', # FULLWIDTH LEFT CURLY BRACKET
380 | u'\uFF5D': u'\uFF5B', # FULLWIDTH RIGHT CURLY BRACKET
381 | u'\uFF5F': u'\uFF60', # FULLWIDTH LEFT WHITE PARENTHESIS
382 | u'\uFF60': u'\uFF5F', # FULLWIDTH RIGHT WHITE PARENTHESIS
383 | u'\uFF62': u'\uFF63', # [BEST FIT] HALFWIDTH LEFT CORNER BRACKET
384 | u'\uFF63': u'\uFF62', # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET
385 | }
386 | 


--------------------------------------------------------------------------------
/bidi/algorithm.py:
--------------------------------------------------------------------------------
  1 | # This file is part of python-bidi
  2 | #
  3 | # python-bidi is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or
  6 | # (at your option) any later version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful,
  9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | # GNU Lesser General Public License for more details.
 12 | #
 13 | # You should have received a copy of the GNU Lesser General Public License
 14 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 | 
 16 | # Copyright (C) 2008-2010 Yaacov Zamir <kzamir_a_walla.co.il>,
 17 | # Meir kriheli <meir@mksoft.co.il>
 18 | "bidirectional alogrithm implementation"
 19 | 
 20 | from unicodedata import bidirectional, mirrored
 21 | import inspect
 22 | import sys
 23 | from collections import deque
 24 | 
 25 | try:
 26 | 
 27 |     # Python 3
 28 | 
 29 |     from .mirror import MIRRORED
 30 | except ValueError:
 31 | 
 32 |     # Python 2
 33 |     from bidi.mirror import MIRRORED
 34 | 
 35 | # Some definitions
 36 | PARAGRAPH_LEVELS = { 'L':0, 'AL':1, 'R': 1 }
 37 | EXPLICIT_LEVEL_LIMIT = 62
 38 | 
 39 | _LEAST_GREATER_ODD = lambda x: (x + 1) | 1
 40 | _LEAST_GREATER_EVEN = lambda x: (x + 2) & ~1
 41 | 
 42 | X2_X5_MAPPINGS = {
 43 |     'RLE': (_LEAST_GREATER_ODD, 'N'),
 44 |     'LRE': (_LEAST_GREATER_EVEN, 'N'),
 45 |     'RLO': (_LEAST_GREATER_ODD, 'R'),
 46 |     'LRO': (_LEAST_GREATER_EVEN, 'L'),
 47 | }
 48 | 
 49 | # Added 'B' so X6 won't execute in that case and X8 will run it's course
 50 | X6_IGNORED = list(X2_X5_MAPPINGS.keys()) + ['BN', 'PDF', 'B']
 51 | X9_REMOVED = list(X2_X5_MAPPINGS.keys()) + ['BN', 'PDF']
 52 | 
 53 | _embedding_direction = lambda x:('L', 'R')[x % 2]
 54 | 
 55 | _IS_UCS2 = sys.maxunicode == 65535
 56 | _SURROGATE_MIN, _SURROGATE_MAX = 55296, 56319 # D800, DBFF
 57 | 
 58 | def debug_storage(storage, base_info=False, chars=True, runs=False):
 59 |     "Display debug information for the storage"
 60 | 
 61 |     import codecs
 62 |     import locale
 63 |     import sys
 64 | 
 65 |     stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr)
 66 | 
 67 |     caller = inspect.stack()[1][3]
 68 |     stderr.write('in %s\n' % caller)
 69 | 
 70 |     if base_info:
 71 |         stderr.write(u'  base level  : %d\n' % storage['base_level'])
 72 |         stderr.write(u'  base dir    : %s\n' % storage['base_dir'])
 73 | 
 74 |     if runs:
 75 |         stderr.write(u'  runs        : %s\n' % list(storage['runs']))
 76 | 
 77 |     if chars:
 78 |         output = u'  Chars       : '
 79 |         for _ch in storage['chars']:
 80 |             if _ch != '\n':
 81 |                 output += _ch['ch']
 82 |             else:
 83 |                 output += 'C'
 84 |         stderr.write(output + u'\n')
 85 | 
 86 |         output = u'  Res. levels : %s\n' % u''.join(
 87 |             [unicode(_ch['level']) for _ch in storage['chars']])
 88 |         stderr.write(output)
 89 | 
 90 |         _types = [_ch['type'].ljust(3) for _ch in storage['chars']]
 91 | 
 92 |         for i in range(3):
 93 |             if i:
 94 |                 output = u'                %s\n'
 95 |             else:
 96 |                 output = u'  Res. types  : %s\n'
 97 |             stderr.write(output % u''.join([_t[i] for _t in _types]))
 98 | 
 99 | 
100 | def get_base_level(text, upper_is_rtl=False):
101 |     """Get the paragraph base embedding level. Returns 0 for LTR,
102 |     1 for RTL.
103 | 
104 |     `text` a unicode object.
105 | 
106 |     Set `upper_is_rtl` to True to treat upper case chars as strong 'R'
107 |     for debugging (default: False).
108 | 
109 |     """
110 | 
111 |     base_level = None
112 | 
113 |     prev_surrogate = False
114 |     # P2
115 |     for _ch in text:
116 |         # surrogate in case of ucs2
117 |         if _IS_UCS2 and (_SURROGATE_MIN <= ord(_ch) <= _SURROGATE_MAX):
118 |             prev_surrogate = _ch
119 |             continue
120 |         elif prev_surrogate:
121 |             _ch = prev_surrogate + _ch
122 |             prev_surrogate = False
123 | 
124 |         # treat upper as RTL ?
125 |         if upper_is_rtl and _ch.isupper():
126 |             base_level = 1
127 |             break
128 | 
129 |         bidi_type = bidirectional(_ch)
130 | 
131 |         if bidi_type in ('AL', 'R'):
132 |             base_level = 1
133 |             break
134 | 
135 |         elif bidi_type == 'L':
136 |             base_level = 0
137 |             break
138 | 
139 |     # P3
140 |     if base_level is None:
141 |         base_level = 0
142 | 
143 |     return base_level
144 | 
145 | def get_embedding_levels(text, storage, upper_is_rtl=False, debug=False):
146 |     """Get the paragraph base embedding level and direction,
147 |     set the storage to the array of chars"""
148 | 
149 |     prev_surrogate = False
150 |     base_level = storage['base_level']
151 | 
152 |     # preset the storage's chars
153 |     for _ch in text:
154 |         if _IS_UCS2 and (_SURROGATE_MIN <= ord(_ch) <= _SURROGATE_MAX):
155 |             prev_surrogate = _ch
156 |             continue
157 |         elif prev_surrogate:
158 |             _ch = prev_surrogate + _ch
159 |             prev_surrogate = False
160 | 
161 |         if upper_is_rtl and _ch.isupper():
162 |             bidi_type = 'R'
163 |         else:
164 |             bidi_type = bidirectional(_ch)
165 |         storage['chars'].append({'ch':_ch, 'level':base_level, 'type':bidi_type,
166 |                                  'orig':bidi_type})
167 |     if debug:
168 |         debug_storage(storage, base_info=True)
169 | 
170 | def explicit_embed_and_overrides(storage, debug=False):
171 |     """Apply X1 to X9 rules of the unicode algorithm.
172 | 
173 |     See http://unicode.org/reports/tr9/#Explicit_Levels_and_Directions
174 | 
175 |     """
176 |     overflow_counter = almost_overflow_counter = 0
177 |     directional_override = 'N'
178 |     levels = deque()
179 | 
180 |     #X1
181 |     embedding_level = storage['base_level']
182 | 
183 |     for _ch in storage['chars']:
184 |         bidi_type = _ch['type']
185 | 
186 |         level_func, override = X2_X5_MAPPINGS.get(bidi_type, (None, None))
187 | 
188 |         if level_func:
189 |             # So this is X2 to X5
190 |             # if we've past EXPLICIT_LEVEL_LIMIT, note it and do nothing
191 | 
192 |             if overflow_counter != 0:
193 |                 overflow_counter += 1
194 |                 continue
195 | 
196 |             new_level = level_func(embedding_level)
197 |             if new_level < EXPLICIT_LEVEL_LIMIT:
198 |                 levels.append( (embedding_level, directional_override) )
199 |                 embedding_level, directional_override = new_level, override
200 | 
201 |             elif embedding_level == EXPLICIT_LEVEL_LIMIT -2:
202 |                 # The new level is invalid, but a valid level can still be
203 |                 # achieved if this level is 60 and we encounter an RLE or
204 |                 # RLO further on.  So record that we 'almost' overflowed.
205 |                 almost_overflow_counter += 1
206 | 
207 |             else:
208 |                 overflow_counter += 1
209 |         else:
210 |             # X6
211 |             if bidi_type not in X6_IGNORED:
212 |                 _ch['level'] = embedding_level
213 |                 if directional_override != 'N':
214 |                     _ch['type'] = directional_override
215 | 
216 |             # X7
217 |             elif bidi_type == 'PDF':
218 |                 if overflow_counter:
219 |                     overflow_counter -= 1
220 |                 elif almost_overflow_counter and \
221 |                         embedding_level != EXPLICIT_LEVEL_LIMIT - 1:
222 |                     almost_overflow_counter -= 1
223 |                 elif levels:
224 |                     embedding_level, directional_override = levels.pop()
225 | 
226 |             # X8
227 |             elif bidi_type == 'B':
228 |                 levels.clear()
229 |                 overflow_counter = almost_overflow_counter = 0
230 |                 embedding_level = _ch['level'] = storage['base_level']
231 |                 directional_override = 'N'
232 | 
233 |     #Removes the explicit embeds and overrides of types
234 |     #RLE, LRE, RLO, LRO, PDF, and BN. Adjusts extended chars
235 |     #next and prev as well
236 | 
237 |     #Applies X9. See http://unicode.org/reports/tr9/#X9
238 |     storage['chars'] = [_ch for _ch in storage['chars']\
239 |                         if _ch['type'] not in X9_REMOVED]
240 | 
241 |     calc_level_runs(storage)
242 | 
243 |     if debug:
244 |         debug_storage(storage, runs=True)
245 | 
246 | def calc_level_runs(storage):
247 |     """Split the storage to run of char types at the same level.
248 | 
249 |     Applies X10. See http://unicode.org/reports/tr9/#X10
250 |     """
251 |     #run level depends on the higher of the two levels on either side of
252 |     #the boundary If the higher level is odd, the type is R; otherwise,
253 |     #it is L
254 | 
255 |     storage['runs'].clear()
256 |     chars = storage['chars']
257 | 
258 |     #empty string ?
259 |     if not chars:
260 |         return
261 | 
262 |     calc_level_run = lambda b_l, b_r: ['L', 'R'][max(b_l, b_r) % 2]
263 | 
264 |     first_char = chars[0]
265 | 
266 |     sor = calc_level_run(storage['base_level'], first_char['level'])
267 |     eor = None
268 | 
269 |     run_start = run_length = 0
270 | 
271 |     prev_level, prev_type = first_char['level'], first_char['type']
272 | 
273 |     for _ch in chars:
274 |         curr_level, curr_type = _ch['level'], _ch['type']
275 | 
276 |         if curr_level == prev_level:
277 |             run_length += 1
278 |         else:
279 |             eor = calc_level_run(prev_level, curr_level)
280 |             storage['runs'].append({'sor':sor, 'eor':eor, 'start':run_start,
281 |                             'type': prev_type,'length': run_length})
282 |             sor = eor
283 |             run_start += run_length
284 |             run_length = 1
285 | 
286 |         prev_level, prev_type = curr_level, curr_type
287 | 
288 |     # for the last char/runlevel
289 |     eor = calc_level_run(curr_level, storage['base_level'])
290 |     storage['runs'].append({'sor':sor, 'eor':eor, 'start':run_start,
291 |                             'type':curr_type, 'length': run_length})
292 | 
293 | def resolve_weak_types(storage, debug=False):
294 |     """Reslove weak type rules W1 - W3.
295 | 
296 |     See: http://unicode.org/reports/tr9/#Resolving_Weak_Types
297 | 
298 |     """
299 | 
300 |     for run in storage['runs']:
301 |         prev_strong = prev_type = run['sor']
302 |         start, length = run['start'], run['length']
303 |         chars = storage['chars'][start:start+length]
304 |         for _ch in chars:
305 |             # W1. Examine each nonspacing mark (NSM) in the level run, and
306 |             # change the type of the NSM to the type of the previous character.
307 |             # If the NSM is at the start of the level run, it will get the type
308 |             # of sor.
309 |             bidi_type = _ch['type']
310 | 
311 |             if bidi_type == 'NSM':
312 |                 _ch['type'] = bidi_type = prev_type
313 | 
314 |             # W2. Search backward from each instance of a European number until
315 |             # the first strong type (R, L, AL, or sor) is found. If an AL is
316 |             # found, change the type of the European number to Arabic number.
317 |             if bidi_type == 'EN' and prev_strong == 'AL':
318 |                 _ch['type'] = 'AN'
319 | 
320 |             # update prev_strong if needed
321 |             if bidi_type in ('R', 'L', 'AL'):
322 |                 prev_strong = bidi_type
323 | 
324 |             prev_type = _ch['type']
325 | 
326 |         # W3. Change all ALs to R
327 |         for _ch in chars:
328 |             if _ch['type'] == 'AL':
329 |                 _ch['type'] = 'R'
330 | 
331 |         # W4. A single European separator between two European numbers changes
332 |         # to a European number. A single common separator between two numbers of
333 |         # the same type changes to that type.
334 |         for idx in range(1, len(chars) -1 ):
335 |             bidi_type = chars[idx]['type']
336 |             prev_type = chars[idx-1]['type']
337 |             next_type = chars[idx+1]['type']
338 | 
339 |             if bidi_type == 'ES' and (prev_type == next_type == 'EN'):
340 |                 chars[idx]['type'] = 'EN'
341 | 
342 |             if bidi_type == 'CS' and prev_type == next_type and \
343 |                        prev_type in ('AN', 'EN'):
344 |                 chars[idx]['type'] = prev_type
345 | 
346 | 
347 |         # W5. A sequence of European terminators adjacent to European numbers
348 |         # changes to all European numbers.
349 |         for idx in range(len(chars)):
350 |             if chars[idx]['type'] == 'EN':
351 |                 for et_idx in range(idx-1, -1, -1):
352 |                     if chars[et_idx]['type'] == 'ET':
353 |                         chars[et_idx]['type'] = 'EN'
354 |                     else:
355 |                         break
356 |                 for et_idx in range(idx+1, len(chars)):
357 |                     if chars[et_idx]['type'] == 'ET':
358 |                         chars[et_idx]['type'] = 'EN'
359 |                     else:
360 |                         break
361 | 
362 |         # W6. Otherwise, separators and terminators change to Other Neutral.
363 |         for _ch in chars:
364 |             if _ch['type'] in ('ET', 'ES', 'CS'):
365 |                 _ch['type'] = 'ON'
366 | 
367 |         # W7. Search backward from each instance of a European number until the
368 |         # first strong type (R, L, or sor) is found. If an L is found, then
369 |         # change the type of the European number to L.
370 |         prev_strong = run['sor']
371 |         for _ch in chars:
372 |             if _ch['type'] == 'EN' and prev_strong == 'L':
373 |                 _ch['type'] = 'L'
374 | 
375 |             if _ch['type'] in ('L', 'R'):
376 |                 prev_strong = _ch['type']
377 | 
378 |     if debug:
379 |         debug_storage(storage, runs=True)
380 | 
381 | def resolve_neutral_types(storage, debug):
382 |     """Resolving neutral types. Implements N1 and N2
383 | 
384 |     See: http://unicode.org/reports/tr9/#Resolving_Neutral_Types
385 | 
386 |     """
387 | 
388 |     for run in storage['runs']:
389 |         start, length = run['start'], run['length']
390 |         # use sor and eor
391 |         chars = [{'type':run['sor']}] + storage['chars'][start:start+length] +\
392 |                 [{'type':run['eor']}]
393 |         total_chars = len(chars)
394 | 
395 |         seq_start = None
396 |         for idx in range(total_chars):
397 |             _ch = chars[idx]
398 |             if _ch['type'] in ('B', 'S', 'WS', 'ON'):
399 |                 # N1. A sequence of neutrals takes the direction of the
400 |                 # surrounding strong text if the text on both sides has the same
401 |                 # direction. European and Arabic numbers act as if they were R
402 |                 # in terms of their influence on neutrals. Start-of-level-run
403 |                 # (sor) and end-of-level-run (eor) are used at level run
404 |                 # boundaries.
405 |                 if seq_start is None:
406 |                     seq_start = idx
407 |                     prev_bidi_type = chars[idx-1]['type']
408 |             else:
409 |                 if seq_start is not None:
410 |                     next_bidi_type = chars[idx]['type']
411 | 
412 |                     if prev_bidi_type in ('AN', 'EN'):
413 |                         prev_bidi_type = 'R'
414 | 
415 |                     if next_bidi_type in ('AN', 'EN'):
416 |                         next_bidi_type = 'R'
417 | 
418 |                     for seq_idx in range(seq_start, idx):
419 |                         if prev_bidi_type == next_bidi_type:
420 |                             chars[seq_idx]['type'] = prev_bidi_type
421 |                         else:
422 |                             # N2. Any remaining neutrals take the embedding
423 |                             # direction. The embedding direction for the given
424 |                             # neutral character is derived from its embedding
425 |                             # level: L if the character is set to an even level,
426 |                             # and R if the level is odd.
427 |                             chars[seq_idx]['type'] = \
428 |                                 _embedding_direction(chars[seq_idx]['level'])
429 | 
430 |                     seq_start = None
431 | 
432 |     if debug:
433 |         debug_storage(storage)
434 | 
435 | def resolve_implicit_levels(storage, debug):
436 |     """Resolving implicit levels (I1, I2)
437 | 
438 |     See: http://unicode.org/reports/tr9/#Resolving_Implicit_Levels
439 | 
440 |     """
441 |     for run in storage['runs']:
442 |         start, length = run['start'], run['length']
443 |         chars = storage['chars'][start:start+length]
444 | 
445 |         for _ch in chars:
446 |             # only those types are allowed at this stage
447 |             assert _ch['type'] in ('L', 'R', 'EN', 'AN'),\
448 |                     '%s not allowed here' % _ch['type']
449 | 
450 |             if _embedding_direction(_ch['level']) == 'L':
451 |                 # I1. For all characters with an even (left-to-right) embedding
452 |                 # direction, those of type R go up one level and those of type
453 |                 # AN or EN go up two levels.
454 |                 if _ch['type'] == 'R':
455 |                     _ch['level'] += 1
456 |                 elif _ch['type'] != 'L':
457 |                     _ch['level'] += 2
458 |             else:
459 |                 # I2. For all characters with an odd (right-to-left) embedding
460 |                 # direction, those of type L, EN or AN  go up one level.
461 |                 if _ch['type'] != 'R':
462 |                     _ch['level'] += 1
463 | 
464 |     if debug:
465 |         debug_storage(storage, runs=True)
466 | 
467 | def reverse_contiguous_sequence(chars, line_start, line_end, highest_level,
468 |                                 lowest_odd_level):
469 |     """L2. From the highest level found in the text to the lowest odd
470 |     level on each line, including intermediate levels not actually
471 |     present in the text, reverse any contiguous sequence of characters
472 |     that are at that level or higher.
473 | 
474 |     """
475 |     for level in range(highest_level, lowest_odd_level-1, -1):
476 |         _start = _end = None
477 | 
478 |         for run_idx in range(line_start, line_end+1):
479 |             run_ch = chars[run_idx]
480 | 
481 |             if run_ch['level'] >= level:
482 |                 if _start is None:
483 |                     _start = _end = run_idx
484 |                 else:
485 |                     _end = run_idx
486 |             else:
487 |                 if _end:
488 |                     chars[_start:+_end+1] = \
489 |                             reversed(chars[_start:+_end+1])
490 |                     _start = _end = None
491 | 
492 |         # anything remaining ?
493 |         if _start is not None:
494 |             chars[_start:+_end+1] = \
495 |                 reversed(chars[_start:+_end+1])
496 | 
497 | 
498 | def reorder_resolved_levels(storage, debug):
499 |     """L1 and L2 rules"""
500 | 
501 |     # Applies L1.
502 | 
503 |     should_reset = True
504 |     chars = storage['chars']
505 | 
506 |     for _ch in chars[::-1]:
507 |         # L1. On each line, reset the embedding level of the following
508 |         # characters to the paragraph embedding level:
509 |         if _ch['orig'] in ('B', 'S'):
510 |             # 1. Segment separators,
511 |             # 2. Paragraph separators,
512 |             _ch['level'] = storage['base_level']
513 |             should_reset = True
514 |         elif should_reset and _ch['orig'] in ('BN', 'WS'):
515 |             # 3. Any sequence of whitespace characters preceding a segment
516 |             # separator or paragraph separator
517 |             # 4. Any sequence of white space characters at the end of the
518 |             # line.
519 |             _ch['level'] = storage['base_level']
520 |         else:
521 |             should_reset = False
522 | 
523 |     max_len = len(chars)
524 | 
525 |     # L2 should be per line
526 |     # Calculates highest level and loweset odd level on the fly.
527 | 
528 |     line_start = line_end = 0
529 |     highest_level = 0
530 |     lowest_odd_level = EXPLICIT_LEVEL_LIMIT
531 | 
532 |     for idx in range(max_len):
533 |         _ch = chars[idx]
534 | 
535 |         # calc the levels
536 |         char_level = _ch['level']
537 |         if char_level > highest_level:
538 |             highest_level = char_level
539 | 
540 |         if char_level % 2 and char_level < lowest_odd_level:
541 |             lowest_odd_level = char_level
542 | 
543 |         if _ch['orig'] == 'B' or idx == max_len -1:
544 |             line_end = idx
545 |             # omit line breaks
546 |             if _ch['orig'] == 'B':
547 |                 line_end -= 1
548 | 
549 |             reverse_contiguous_sequence(chars, line_start, line_end,
550 |                                         highest_level, lowest_odd_level)
551 | 
552 |             # reset for next line run
553 |             line_start = idx+1
554 |             highest_level = 0
555 |             lowest_odd_level = EXPLICIT_LEVEL_LIMIT
556 | 
557 |     if debug:
558 |         debug_storage(storage)
559 | 
560 | 
561 | def apply_mirroring(storage, debug):
562 |     """Applies L4: mirroring
563 | 
564 |     See: http://unicode.org/reports/tr9/#L4
565 | 
566 |     """
567 |     # L4. A character is depicted by a mirrored glyph if and only if (a) the
568 |     # resolved directionality of that character is R, and (b) the
569 |     # Bidi_Mirrored property value of that character is true.
570 |     for _ch in storage['chars']:
571 |         unichar = _ch['ch']
572 |         if mirrored(unichar) and \
573 |                      _embedding_direction(_ch['level']) == 'R':
574 |             _ch['ch'] = MIRRORED.get(unichar, unichar)
575 | 
576 |     if debug:
577 |         debug_storage(storage)
578 | 
579 | def get_empty_storage():
580 |     """Return an empty storage skeleton, usable for testing"""
581 |     return {
582 |         'base_level': None,
583 |         'base_dir' : None,
584 |         'chars': [],
585 |         'runs' : deque(),
586 |     }
587 | 
588 | 
589 | def get_display(unicode_or_str, encoding='utf-8', upper_is_rtl=False,
590 |                 base_dir=None, debug=False):
591 |     """Accepts unicode or string. In case it's a string, `encoding`
592 |     is needed as it works on unicode ones (default:"utf-8").
593 | 
594 |     Set `upper_is_rtl` to True to treat upper case chars as strong 'R'
595 |     for debugging (default: False).
596 | 
597 |     Set `base_dir` to 'L' or 'R' to override the calculated base_level.
598 | 
599 |     Set `debug` to True to display (using sys.stderr) the steps taken with the
600 |     algorithm.
601 | 
602 |     Returns the display layout, either as unicode or `encoding` encoded
603 |     string.
604 | 
605 |     """
606 |     storage = get_empty_storage()
607 | 
608 |     # utf-8 ? we need unicode
609 |     if isinstance(unicode_or_str, str):
610 |         text = unicode_or_str
611 |         decoded = False
612 |     else:
613 |         text = unicode_or_str.decode(encoding)
614 |         decoded = True
615 | 
616 |     if base_dir is None:
617 |         base_level = get_base_level(text, upper_is_rtl)
618 |     else:
619 |         base_level = PARAGRAPH_LEVELS[base_dir]
620 | 
621 |     storage['base_level'] = base_level
622 |     storage['base_dir'] = ('L', 'R')[base_level]
623 | 
624 |     get_embedding_levels(text, storage, upper_is_rtl, debug)
625 |     explicit_embed_and_overrides(storage, debug)
626 |     resolve_weak_types(storage, debug)
627 |     resolve_neutral_types(storage, debug)
628 |     resolve_implicit_levels(storage, debug)
629 |     reorder_resolved_levels(storage, debug)
630 |     apply_mirroring(storage, debug)
631 | 
632 |     chars = storage['chars']
633 |     display = u''.join([_ch['ch'] for _ch in chars])
634 | 
635 |     if decoded:
636 |         return display.encode(encoding)
637 |     else:
638 |         return display
639 | 


--------------------------------------------------------------------------------