├── .gitignore ├── Lib └── compositor │ ├── __init__.py │ ├── caseConversionMaps.py │ ├── classDefinitionTables.py │ ├── cmap.py │ ├── error.py │ ├── featureList.py │ ├── font.py │ ├── glyphRecord.py │ ├── layoutEngine.py │ ├── logger.py │ ├── lookupList.py │ ├── scriptList.py │ ├── subTablesBase.py │ ├── subTablesGPOS.py │ ├── subTablesGSUB.py │ ├── tables.py │ ├── textUtilities.py │ └── wordBreakProperties.py ├── License.txt ├── MANIFEST.in ├── README.md ├── demo.py ├── pyproject.toml ├── setup.py ├── todo.txt └── tools ├── PropList.txt ├── SpecialCasing.txt ├── UnicodeData.txt ├── UnicodeReferenceGenerator.py └── WordBreakProperty.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.egg-info/ 3 | __pycache__/ 4 | *.pyc -------------------------------------------------------------------------------- /Lib/compositor/__init__.py: -------------------------------------------------------------------------------- 1 | from compositor.error import CompositorError 2 | from compositor.layoutEngine import LayoutEngine 3 | from compositor.font import Font, Info, Glyph 4 | 5 | version = "0.3b" 6 | -------------------------------------------------------------------------------- /Lib/compositor/classDefinitionTables.py: -------------------------------------------------------------------------------- 1 | class ClassDef(object): 2 | 3 | """ 4 | Deviation from spec: 5 | - StartGlyph attribute is not implemented. 6 | - GlyphCount attribute is not implemented. 7 | - ClassValueArray attribute is not implemented. 8 | 9 | The structure of this object does not closely 10 | follow the specification. Instead, the basic 11 | functionality is implemented through standard 12 | dict methods. 13 | 14 | To determine if a glyph is in the class: 15 | >>> "x" in aClass 16 | True 17 | 18 | To get the class value of a particular glyph: 19 | >>> aClass["x"] 20 | 330 21 | """ 22 | 23 | __slots__ = ["_map"] 24 | 25 | def __init__(self): 26 | self._map = None 27 | 28 | def loadFromFontTools(self, classDef): 29 | self._map = dict(classDef.classDefs) 30 | return self 31 | 32 | def __getitem__(self, glyphName): 33 | return self._map.get(glyphName, 0) 34 | 35 | def _get_Glyphs(self): 36 | return self._map 37 | 38 | Glyphs = property(_get_Glyphs, doc="This is for reference only. Not for use in processing.") 39 | 40 | 41 | class GlyphClassDef(ClassDef): 42 | 43 | """ 44 | This is a subclass of ClassDefFormat1. 45 | 46 | Retrieving the class for a glyph from this 47 | object will always return a value. If the 48 | glyph is not in the class definitions, 49 | zero will be returned. 50 | """ 51 | 52 | 53 | class MarkAttachClassDef(ClassDef): 54 | 55 | """ 56 | This is a subclass of ClassDefFormat1. 57 | 58 | Retrieving the class for a glyph from this 59 | object will always return a value. If the 60 | glyph is not in the class definitions, 61 | zero will be returned. 62 | """ 63 | -------------------------------------------------------------------------------- /Lib/compositor/cmap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for handling the cmap table 3 | and character mapping in general. 4 | """ 5 | 6 | def extractCMAP(ttFont): 7 | for platformID, encodingID in [(3, 10), (0, 3), (3, 1)]: 8 | cmapSubtable = ttFont["cmap"].getcmap(platformID, encodingID) 9 | if cmapSubtable is not None: 10 | return cmapSubtable.cmap 11 | from compositor.error import CompositorError 12 | raise CompositorError("Found neither CMAP (3, 10), (0, 3), nor (3, 1) in font.") 13 | 14 | def reverseCMAP(cmap): 15 | reversed = {} 16 | for value, name in cmap.items(): 17 | if name not in reversed: 18 | reversed[name] = [] 19 | reversed[name].append(value) 20 | return reversed 21 | -------------------------------------------------------------------------------- /Lib/compositor/error.py: -------------------------------------------------------------------------------- 1 | class CompositorError(Exception): pass -------------------------------------------------------------------------------- /Lib/compositor/featureList.py: -------------------------------------------------------------------------------- 1 | """ 2 | FeatureList object (and friends). 3 | """ 4 | 5 | 6 | __all__ = ["FeatureList", "FeatureRecord"] 7 | 8 | 9 | class FeatureList(object): 10 | 11 | __slots__ = ["FeatureCount", "FeatureRecord"] 12 | 13 | def __init__(self): 14 | self.FeatureCount = 0 15 | self.FeatureRecord = [] 16 | 17 | def loadFromFontTools(self, featureList): 18 | self.FeatureCount = featureList.FeatureCount 19 | self.FeatureRecord = [] 20 | self.FeatureRecord = [FeatureRecord().loadFromFontTools(record) for record in featureList.FeatureRecord] 21 | return self 22 | 23 | 24 | class FeatureRecord(object): 25 | 26 | __slots__ = ["FeatureTag", "Feature"] 27 | 28 | def __init__(self): 29 | self.FeatureTag = None 30 | self.Feature = None 31 | 32 | def loadFromFontTools(self, featureRecord): 33 | self.FeatureTag = featureRecord.FeatureTag 34 | self.Feature = Feature().loadFromFontTools(featureRecord.Feature) 35 | return self 36 | 37 | 38 | class Feature(object): 39 | 40 | __slots__ = ["FeatureParams", "LookupCount", "LookupListIndex"] 41 | 42 | def __init__(self): 43 | self.FeatureParams = None 44 | self.LookupCount = 0 45 | self.LookupListIndex = [] 46 | 47 | def loadFromFontTools(self, feature): 48 | self.FeatureParams = feature.FeatureParams # XXX? 49 | self.LookupCount = feature.LookupCount 50 | self.LookupListIndex = list(feature.LookupListIndex) 51 | return self 52 | 53 | -------------------------------------------------------------------------------- /Lib/compositor/font.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | import weakref 3 | from fontTools.ttLib import TTFont 4 | from fontTools.pens.basePen import AbstractPen 5 | from fontTools.misc.textTools import tostr 6 | from compositor.layoutEngine import LayoutEngine 7 | from compositor.glyphRecord import GlyphRecord 8 | from compositor.cmap import extractCMAP 9 | from compositor.error import CompositorError 10 | 11 | 12 | class Font(LayoutEngine): 13 | 14 | def __init__(self, path, glyphClass=None): 15 | super(Font, self).__init__() 16 | self.path = path 17 | self._glyphs = {} 18 | if isinstance(path, TTFont): 19 | self.source = path 20 | else: 21 | self.source = TTFont(path) 22 | self.loadGlyphSet() 23 | self.loadCMAP() 24 | self.loadFeatures() 25 | self.loadInfo() 26 | if glyphClass is None: 27 | glyphClass = Glyph 28 | self.glyphClass = glyphClass 29 | 30 | def __del__(self): 31 | del self._glyphs 32 | self.source.close() 33 | del self.source 34 | 35 | # -------------- 36 | # initialization 37 | # -------------- 38 | 39 | def loadCMAP(self): 40 | cmap = extractCMAP(self.source) 41 | self.setCMAP(cmap) 42 | 43 | def loadGlyphSet(self): 44 | self.glyphSet = self.source.getGlyphSet() 45 | # the glyph order will be needed later 46 | # to assign the proper glyph index to 47 | # glyph objects. 48 | order = self.source.getGlyphOrder() 49 | self._glyphOrder = {} 50 | for index, glyphName in enumerate(order): 51 | self._glyphOrder[glyphName] = index 52 | 53 | def loadInfo(self): 54 | self.info = info = Info() 55 | head = self.source["head"] 56 | hhea = self.source["hhea"] 57 | os2 = self.source["OS/2"] 58 | info.unitsPerEm = head.unitsPerEm 59 | info.ascender = hhea.ascent 60 | info.descender = hhea.descent 61 | info.xHeight = os2.sxHeight 62 | info.capHeight = os2.sCapHeight 63 | # names 64 | nameIDs = {} 65 | for nameRecord in self.source["name"].names: 66 | nameID = nameRecord.nameID 67 | platformID = nameRecord.platformID 68 | platEncID = nameRecord.platEncID 69 | langID = nameRecord.langID 70 | nameIDs[nameID, platformID, platEncID, langID] = nameRecord.toUnicode() 71 | # to retrieve the family and style names, first start 72 | # with the preferred name entries and progress to less 73 | # specific entries until something is found. 74 | familyPriority = [(16, 1, 0, 0), (16, 1, None, None), (16, None, None, None), 75 | (1, 1, 0, 0), (1, 1, None, None), (1, None, None, None)] 76 | familyName = self._skimNameIDs(nameIDs, familyPriority) 77 | stylePriority = [(17, 1, 0, 0), (17, 1, None, None), (17, None, None, None), 78 | (2, 1, 0, 0), (2, 1, None, None), (2, None, None, None)] 79 | styleName = self._skimNameIDs(nameIDs, stylePriority) 80 | if familyName is None or styleName is None: 81 | raise CompositorError("Could not extract name data from name table.") 82 | self.info.familyName = familyName 83 | self.info.styleName = styleName 84 | # stylistic set names 85 | self.stylisticSetNames = {} 86 | if self.gsub: 87 | for featureRecord in self.gsub.FeatureList.FeatureRecord: 88 | params = featureRecord.Feature.FeatureParams 89 | if hasattr(params, "UINameID"): 90 | ssNameID = params.UINameID 91 | namePriority = [(ssNameID, 1, 0, 0), (ssNameID, 1, None, None), (ssNameID, 3, 1, 1033), (ssNameID, 3, None, None)] 92 | ssName = self._skimNameIDs(nameIDs, namePriority) 93 | if ssName: 94 | self.stylisticSetNames[featureRecord.FeatureTag] = ssName 95 | 96 | def _skimNameIDs(self, nameIDs, priority): 97 | for (nameID, platformID, platEncID, langID) in priority: 98 | for (nID, pID, pEID, lID), text in nameIDs.items(): 99 | if nID != nameID: 100 | continue 101 | if pID != platformID and platformID is not None: 102 | continue 103 | if pEID != platEncID and platEncID is not None: 104 | continue 105 | if lID != langID and langID is not None: 106 | continue 107 | return text 108 | 109 | def loadFeatures(self): 110 | gdef = None 111 | if "GDEF" in self.source: 112 | gdef = self.source["GDEF"] 113 | gsub = None 114 | if "GSUB" in self.source: 115 | gsub = self.source["GSUB"] 116 | gpos = None 117 | if "GPOS" in self.source: 118 | gpos = self.source["GPOS"] 119 | self.setFeatureTables(gdef, gsub, gpos) 120 | 121 | # ------------- 122 | # dict behavior 123 | # ------------- 124 | 125 | def keys(self): 126 | return self.glyphSet.keys() 127 | 128 | def __contains__(self, name): 129 | return name in self.glyphSet 130 | 131 | def __getitem__(self, name): 132 | if name not in self._glyphs: 133 | if name not in self.glyphSet: 134 | name = self.fallbackGlyph 135 | glyph = self.glyphSet[name] 136 | index = self._glyphOrder[name] 137 | glyph = self.glyphClass(name, index, glyph, self) 138 | self._glyphs[name] = glyph 139 | return self._glyphs[name] 140 | 141 | # ----------------- 142 | # string processing 143 | # ----------------- 144 | 145 | def stringToGlyphNames(self, string): 146 | glyphNames = [] 147 | for c in string: 148 | c = tostr(c) 149 | v = ord(c) 150 | if v in self.cmap: 151 | glyphNames.append(self.cmap[v]) 152 | elif self.fallbackGlyph is not None: 153 | glyphNames.append(self.fallbackGlyph) 154 | return glyphNames 155 | 156 | def stringToGlyphRecords(self, string): 157 | return [GlyphRecord(glyphName) for glyphName in self.stringToGlyphNames(string)] 158 | 159 | def didProcessingGSUB(self, glyphRecords): 160 | for glyphRecord in glyphRecords: 161 | glyphRecord.advanceWidth += self[glyphRecord.glyphName].width 162 | 163 | # ------------- 164 | # Miscellaneous 165 | # ------------- 166 | 167 | def getGlyphOrder(self): 168 | return self.source.getGlyphOrder() 169 | 170 | 171 | class Info(object): pass 172 | 173 | 174 | class Glyph(object): 175 | 176 | def __init__(self, name, index, source, font): 177 | # the char string must be loaded by drawing it 178 | if not hasattr(source, "width"): 179 | source.draw(_GlyphLoadPen()) 180 | self.name = name 181 | self.source = source 182 | self.width = source.width 183 | self.font = weakref.ref(font) 184 | self.index = index 185 | 186 | def draw(self, pen): 187 | self.source.draw(pen) 188 | 189 | def _get_bounds(self): 190 | from fontTools.pens.boundsPen import BoundsPen 191 | pen = BoundsPen(self.font()) 192 | self.draw(pen) 193 | return pen.bounds 194 | 195 | bounds = property(_get_bounds) 196 | 197 | 198 | class _GlyphLoadPen(AbstractPen): 199 | 200 | def __init__(self): 201 | pass 202 | 203 | def moveTo(self, pt): 204 | pass 205 | 206 | def lineTo(self, pt): 207 | pass 208 | 209 | def curveTo(self, *points): 210 | pass 211 | 212 | def qCurveTo(self, *points): 213 | pass 214 | 215 | def addComponent(self, glyphName, transformation): 216 | pass 217 | 218 | def closePath(self): 219 | pass 220 | 221 | def endPath(self): 222 | pass 223 | -------------------------------------------------------------------------------- /Lib/compositor/glyphRecord.py: -------------------------------------------------------------------------------- 1 | class GlyphRecord(object): 2 | 3 | """ 4 | GlyphRecord object. 5 | 6 | This is the object type which will be contained in the list 7 | returned by font.process("A String"). 8 | 9 | This object should NOT be constructed outside of a 10 | Compositor context. 11 | 12 | This object contains the following attributes: 13 | - glyphName 14 | The glyph name. 15 | - xPlacement 16 | - yPlacement 17 | - xAdvance 18 | - yAdvance 19 | The numerical values that control the placement 20 | and advance of the glyph. For more information 21 | on these, check the ValueRecord specification 22 | here (scroll way down the page): 23 | http://www.microsoft.com/typography/otspec/gpos.htm 24 | - alternates 25 | This is a list containing alternates for the glyph 26 | referenced by this glyph record. During processing 27 | by the tables in the engine, this list of will be 28 | mutated and obliterated n number of times based on 29 | the features and lookups being processed. There is no 30 | guarantee that the alternates listed here will 31 | reference the final glyph contained in the record. 32 | Therefore, this validation is up to the caller. 33 | Also, the internal processing will populate this 34 | list with glyph names. 35 | Note: You do not need to worry about any of the 36 | validation or population issues discussed here 37 | if you are using the Font object. That 38 | object handles all of the necessary cleanup in 39 | the process method. 40 | - ligatureComponents 41 | This is a list of glyph names that are the 42 | components of a ligature. 43 | 44 | This object contains three methods for making educated 45 | guesses about Unicode values. This is necessary when 46 | word breaks are determined. 47 | - saveState 48 | This method saves the glyph name provided, which 49 | can either be a glyph name or a list of glyph names 50 | in the case of lgatures. This will add the glyph name 51 | to the record's substitution history. This should be 52 | done before a substitution is made. 53 | - getSide1GlyphNameWithUnicodeValue 54 | - getSide2GlyphNameWithUnicodeValue 55 | These two methods find the most recent glyph name 56 | for each side that has a Unicode value. When called, 57 | they work backwards through the glyph names saved with 58 | the saveState method until a glyph name with a Unicode 59 | value is found. 60 | """ 61 | 62 | __slots__ = ["glyph", "glyphName", "xPlacement", "yPlacement", 63 | "xAdvance", "yAdvance", "advanceWidth", "advanceHeight", 64 | "alternates", "_alternatesReference", 65 | "_ligatureComponents", "_ligatureComponentsReference", 66 | "_substitutionHistory"] 67 | 68 | def __init__(self, glyphName): 69 | self.glyph = None 70 | self.glyphName = glyphName 71 | self.xPlacement = 0 72 | self.yPlacement = 0 73 | self.xAdvance = 0 74 | self.yAdvance = 0 75 | self.advanceWidth = 0 76 | self.advanceHeight = 0 77 | self.alternates = [] 78 | self._alternatesReference = None 79 | self._ligatureComponents = [] 80 | self._substitutionHistory = [] 81 | 82 | def __repr__(self): 83 | name = str(self.glyphName) 84 | xP = str(self.xPlacement) 85 | yP = str(self.yPlacement) 86 | xA = str(self.xAdvance) 87 | yA = str(self.yAdvance) 88 | s = "" % (name, xP, yP, xA, yA) 89 | return s 90 | 91 | def __add__(self, valueRecord): 92 | self.xPlacement += valueRecord.XPlacement 93 | self.yPlacement += valueRecord.YPlacement 94 | self.xAdvance += valueRecord.XAdvance 95 | self.yAdvance += valueRecord.YAdvance 96 | return self 97 | 98 | def _get_ligatureComponents(self): 99 | return list(self._ligatureComponents) 100 | 101 | def _set_ligatureComponents(self, components): 102 | self._ligatureComponents = list(components) 103 | 104 | ligatureComponents = property(_get_ligatureComponents, _set_ligatureComponents) 105 | 106 | def saveState(self, glyphName): 107 | if isinstance(glyphName, list): 108 | glyphName = list(glyphName) 109 | self._substitutionHistory.append(glyphName) 110 | 111 | def getSide1GlyphNameWithUnicodeValue(self, reversedCMAP): 112 | if self.glyphName in reversedCMAP: 113 | return self.glyphName 114 | for glyphName in reversed(self._substitutionHistory): 115 | if isinstance(glyphName, list): 116 | glyphName = glyphName[0] 117 | if glyphName in reversedCMAP: 118 | return glyphName 119 | return None 120 | 121 | def getSide2GlyphNameWithUnicodeValue(self, reversedCMAP): 122 | if self.glyphName in reversedCMAP: 123 | return self.glyphName 124 | for glyphName in reversed(self._substitutionHistory): 125 | if isinstance(glyphName, list): 126 | glyphName = glyphName[-1] 127 | if glyphName in reversedCMAP: 128 | return glyphName 129 | return None 130 | 131 | 132 | def glyphNamesToGlyphRecords(glyphList): 133 | """ 134 | >>> glyphList = ["a", "b"] 135 | >>> glyphNamesToGlyphRecords(glyphList) 136 | [, ] 137 | """ 138 | return [GlyphRecord(glyphName) for glyphName in glyphList] 139 | 140 | def glyphRecordsToTuples(glyphRecords): 141 | """ 142 | >>> vr = GlyphRecord("foo") 143 | >>> vr.xPlacement = 1 144 | >>> vr.yPlacement = 2 145 | >>> vr.xAdvance = 3 146 | >>> vr.yAdvance = 4 147 | >>> glyphRecordsToTuples([vr]) 148 | [('foo', 1, 2, 3, 4)] 149 | """ 150 | tuples = [] 151 | for record in glyphRecords: 152 | xP = record.xPlacement 153 | yP = record.yPlacement 154 | xA = record.xAdvance 155 | yA = record.yAdvance 156 | gN = record.glyphName 157 | tuples.append((gN, xP, yP, xA, yA)) 158 | return tuples 159 | 160 | def glyphRecordsToGlyphNames(glyphRecords): 161 | """ 162 | >>> glyphList = ["a", "b"] 163 | >>> glyphRecords = glyphNamesToGlyphRecords(glyphList) 164 | >>> glyphRecordsToGlyphNames(glyphRecords) 165 | ['a', 'b'] 166 | """ 167 | return [record.glyphName for record in glyphRecords] 168 | 169 | def _testMath(): 170 | """ 171 | >>> from subTablesGPOS import ValueRecord 172 | >>> vr = ValueRecord() 173 | >>> vr.XPlacement = 1 174 | >>> vr.YPlacement = 2 175 | >>> vr.XAdvance = 3 176 | >>> vr.YAdvance = 4 177 | >>> gr = GlyphRecord("foo") 178 | >>> gr.xPlacement = 1 179 | >>> gr.yPlacement = 2 180 | >>> gr.xAdvance = 3 181 | >>> gr.yAdvance = 4 182 | >>> gr + vr 183 | 184 | """ 185 | 186 | def _testUnicodeGuessing(): 187 | """ 188 | >>> cmap = { 189 | ... "a" : 97, 190 | ... "b" : 98, 191 | ... } 192 | >>> r = GlyphRecord("a") 193 | >>> r.saveState("a") 194 | >>> r.glyphName = "a.alt1" 195 | >>> r.saveState("a.alt1") 196 | >>> r.glyphName = "a.alt2" 197 | >>> r.getSide1GlyphNameWithUnicodeValue(cmap) 198 | 'a' 199 | >>> r.glyphName = "b" 200 | >>> r.getSide1GlyphNameWithUnicodeValue(cmap) 201 | 'b' 202 | >>> r = GlyphRecord("a") 203 | >>> r.saveState(["a", "b"]) 204 | >>> r.glyphName = "a_b" 205 | >>> r.getSide1GlyphNameWithUnicodeValue(cmap) 206 | 'a' 207 | >>> r.getSide2GlyphNameWithUnicodeValue(cmap) 208 | 'b' 209 | """ 210 | 211 | if __name__ == "__main__": 212 | import doctest 213 | doctest.testmod() 214 | -------------------------------------------------------------------------------- /Lib/compositor/layoutEngine.py: -------------------------------------------------------------------------------- 1 | from compositor.tables import GSUB, GPOS, GDEF 2 | from compositor.glyphRecord import GlyphRecord 3 | from compositor.cmap import reverseCMAP 4 | from compositor.textUtilities import convertCase 5 | from compositor.error import CompositorError 6 | from fontTools.misc.textTools import tostr 7 | 8 | 9 | class LayoutEngine(object): 10 | 11 | def __init__(self): 12 | self.cmap = {} 13 | self.reversedCMAP = {} 14 | self.gdef = None 15 | self.gsub = None 16 | self.gpos = None 17 | self.fallbackGlyph = ".notdef" 18 | 19 | # ------------ 20 | # data setting 21 | # ------------ 22 | 23 | def setCMAP(self, cmap): 24 | self.cmap = cmap 25 | self.reversedCMAP = reverseCMAP(cmap) 26 | if self.gsub is not None: 27 | self.gsub.setCMAP(self.reversedCMAP) 28 | if self.gpos is not None: 29 | self.gpos.setCMAP(self.reversedCMAP) 30 | 31 | def setFeatureTables(self, gdef=None, gsub=None, gpos=None): 32 | self.gdef = None 33 | if gdef is not None: 34 | self.gdef = GDEF().loadFromFontTools(gdef) 35 | self.gsub = None 36 | if gsub is not None: 37 | self.gsub = GSUB().loadFromFontTools(gsub, self.reversedCMAP, self.gdef) 38 | self.gpos = None 39 | if gpos is not None: 40 | self.gpos = GPOS().loadFromFontTools(gpos, self.reversedCMAP, self.gdef) 41 | 42 | # ----------------- 43 | # string processing 44 | # ----------------- 45 | 46 | def stringToGlyphNames(self, string): 47 | glyphNames = [] 48 | for c in string: 49 | c = tostr(c) 50 | v = ord(c) 51 | if v in self.cmap: 52 | glyphNames.append(self.cmap[v]) 53 | elif self.fallbackGlyph is not None: 54 | glyphNames.append(self.fallbackGlyph) 55 | return glyphNames 56 | 57 | def stringToGlyphRecords(self, string): 58 | return [GlyphRecord(glyphName) for glyphName in self.stringToGlyphNames(string)] 59 | 60 | def glyphListToGlyphRecords(self, glyphList): 61 | glyphRecords = [] 62 | for glyphName in glyphList: 63 | record = GlyphRecord(glyphName) 64 | glyphRecords.append(record) 65 | return glyphRecords 66 | 67 | def process(self, stringOrGlyphList, script="latn", langSys=None, rightToLeft=False, case="unchanged", logger=None): 68 | if isinstance(stringOrGlyphList, str): 69 | stringOrGlyphList = self.stringToGlyphNames(stringOrGlyphList) 70 | if case != "unchanged": 71 | l = langSys 72 | if l is not None: 73 | l = l.strip() 74 | stringOrGlyphList = convertCase(case, stringOrGlyphList, self.cmap, self.reversedCMAP, l, self.fallbackGlyph) 75 | glyphRecords = self.glyphListToGlyphRecords(stringOrGlyphList) 76 | if rightToLeft: 77 | glyphRecords.reverse() 78 | if logger: 79 | logger.logStart() 80 | glyphNames = [r.glyphName for r in glyphRecords] 81 | logger.logMainSettings(glyphNames, script, langSys) 82 | self.willBeginProcessingGSUB(glyphRecords) 83 | if self.gsub is not None: 84 | 85 | if logger: 86 | logger.logTableStart(self.gsub) 87 | glyphRecords = self.gsub.process(glyphRecords, script=script, langSys=langSys, logger=logger) 88 | if logger: 89 | logger.logResults(glyphRecords) 90 | logger.logTableEnd() 91 | 92 | self.didProcessingGSUB(glyphRecords) 93 | self.willBeginProcessingGPOS(glyphRecords) 94 | if self.gpos is not None: 95 | 96 | if logger: 97 | logger.logTableStart(self.gpos) 98 | glyphRecords = self.gpos.process(glyphRecords, script=script, langSys=langSys, logger=logger) 99 | if logger: 100 | logger.logResults(glyphRecords) 101 | logger.logTableEnd() 102 | self.didProcessingGPOS(glyphRecords) 103 | if logger: 104 | logger.logEnd() 105 | return glyphRecords 106 | 107 | def willBeginProcessingGSUB(self, glyphRecords): 108 | pass 109 | 110 | def didProcessingGSUB(self, glyphRecords): 111 | pass 112 | 113 | def willBeginProcessingGPOS(self, glyphRecords): 114 | pass 115 | 116 | def didProcessingGPOS(self, glyphRecords): 117 | pass 118 | 119 | # ------------------ 120 | # feature management 121 | # ------------------ 122 | 123 | def getScriptList(self): 124 | gsub = [] 125 | gpos = [] 126 | if self.gsub is not None: 127 | gsub = self.gsub.getScriptList() 128 | if self.gpos is not None: 129 | gpos = self.gpos.getScriptList() 130 | return sorted(set(gsub + gpos)) 131 | 132 | def getLanguageList(self): 133 | gsub = [] 134 | gpos = [] 135 | if self.gsub is not None: 136 | gsub = self.gsub.getLanguageList() 137 | if self.gpos is not None: 138 | gpos = self.gpos.getLanguageList() 139 | return sorted(set(gsub + gpos)) 140 | 141 | def getFeatureList(self): 142 | gsub = [] 143 | gpos = [] 144 | if self.gsub is not None: 145 | gsub = self.gsub.getFeatureList() 146 | if self.gpos is not None: 147 | gpos = self.gpos.getFeatureList() 148 | return sorted(set(gsub + gpos)) 149 | 150 | def getFeatureState(self, featureTag): 151 | gsubState = None 152 | gposState = None 153 | if self.gsub is not None: 154 | if featureTag in self.gsub: 155 | gsubState = self.gsub.getFeatureState(featureTag) 156 | if self.gpos is not None: 157 | if featureTag in self.gpos: 158 | gposState = self.gpos.getFeatureState(featureTag) 159 | if gsubState is not None and gposState is not None: 160 | if gsubState != gposState: 161 | raise CompositorError("Inconsistently applied feature: %s" % featureTag) 162 | if gsubState is not None: 163 | return gsubState 164 | if gposState is not None: 165 | return gposState 166 | raise CompositorError("Feature %s is is not contained in GSUB or GPOS" % featureTag) 167 | 168 | def setFeatureState(self, featureTag, state): 169 | if self.gsub is not None: 170 | if featureTag in self.gsub: 171 | self.gsub.setFeatureState(featureTag, state) 172 | if self.gpos is not None: 173 | if featureTag in self.gpos: 174 | self.gpos.setFeatureState(featureTag, state) 175 | -------------------------------------------------------------------------------- /Lib/compositor/logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple logging object. It reports, with 3 | the help of the Compositor objects, a 4 | wide range of data about the processing 5 | of a string of text for a font. 6 | 7 | Usage: 8 | 9 | logger = Logger() 10 | logger.logStart() 11 | font = Font("/path/to/a/font.otf") 12 | font.process("Hello World!", logger=logger) 13 | logger.logEnd() 14 | report = logger.getText() 15 | 16 | The returned log is in XML format. 17 | """ 18 | from io import StringIO 19 | from fontTools.misc.xmlWriter import XMLWriter 20 | 21 | 22 | class Logger(object): 23 | 24 | def __init__(self): 25 | self._file = StringIO() 26 | self._writer = XMLWriter(self._file, encoding="utf-8") 27 | 28 | def __del__(self): 29 | self._writer = None 30 | self._file.close() 31 | 32 | def logStart(self): 33 | self._writer.begintag("xml") 34 | 35 | def logEnd(self): 36 | self._writer.endtag("xml") 37 | 38 | def logMainSettings(self, glyphNames, script, langSys): 39 | self._writer.begintag("initialSettings") 40 | self._writer.newline() 41 | self._writer.simpletag("string", value=" ".join(glyphNames)) 42 | self._writer.newline() 43 | self._writer.simpletag("script", value=script) 44 | self._writer.newline() 45 | self._writer.simpletag("langSys", value=langSys) 46 | self._writer.newline() 47 | self._writer.endtag("initialSettings") 48 | self._writer.newline() 49 | 50 | def logTableStart(self, table): 51 | name = table.__class__.__name__ 52 | self._writer.begintag("table", name=name) 53 | self._writer.newline() 54 | self.logTableFeatureStates(table) 55 | 56 | def logTableEnd(self): 57 | self._writer.endtag("table") 58 | 59 | def logTableFeatureStates(self, table): 60 | self._writer.begintag("featureStates") 61 | self._writer.newline() 62 | for tag in sorted(table.getFeatureList()): 63 | state = table.getFeatureState(tag) 64 | self._writer.simpletag("feature", name=tag, state=int(state)) 65 | self._writer.newline() 66 | self._writer.endtag("featureStates") 67 | self._writer.newline() 68 | 69 | def logApplicableLookups(self, table, lookups): 70 | self._writer.begintag("applicableLookups") 71 | self._writer.newline() 72 | if lookups: 73 | order = [] 74 | last = None 75 | for tag, lookup in lookups: 76 | if tag != last: 77 | if order: 78 | self._logLookupList(last, order) 79 | order = [] 80 | last = tag 81 | index = table.LookupList.Lookup.index(lookup) 82 | order.append(index) 83 | self._logLookupList(last, order) 84 | self._writer.endtag("applicableLookups") 85 | self._writer.newline() 86 | 87 | def _logLookupList(self, tag, lookups): 88 | lookups = " ".join([str(i) for i in lookups]) 89 | self._writer.simpletag("lookups", feature=tag, indices=lookups) 90 | self._writer.newline() 91 | 92 | def logProcessingStart(self): 93 | self._writer.begintag("processing") 94 | self._writer.newline() 95 | 96 | def logProcessingEnd(self): 97 | self._writer.endtag("processing") 98 | self._writer.newline() 99 | 100 | def logLookupStart(self, table, tag, lookup): 101 | index = table.LookupList.Lookup.index(lookup) 102 | self._writer.begintag("lookup", feature=tag, index=index) 103 | self._writer.newline() 104 | 105 | def logLookupEnd(self): 106 | self._writer.endtag("lookup") 107 | self._writer.newline() 108 | 109 | def logSubTableStart(self, lookup, subtable): 110 | index = lookup.SubTable.index(subtable) 111 | lookupType = subtable.__class__.__name__ 112 | self._writer.begintag("subTable", index=index, type=lookupType) 113 | self._writer.newline() 114 | 115 | def logSubTableEnd(self): 116 | self._writer.endtag("subTable") 117 | self._writer.newline() 118 | 119 | def logGlyphRecords(self, glyphRecords): 120 | for r in glyphRecords: 121 | self._writer.simpletag("glyphRecord", name=r.glyphName, 122 | xPlacement=r.xPlacement, yPlacement=r.yPlacement, 123 | xAdvance=r.xAdvance, yAdvance=r.yAdvance) 124 | self._writer.newline() 125 | 126 | def logInput(self, processed, unprocessed): 127 | self._writer.begintag("input") 128 | self._writer.newline() 129 | self._writer.begintag("processed") 130 | self._writer.newline() 131 | self.logGlyphRecords(processed) 132 | self._writer.endtag("processed") 133 | self._writer.newline() 134 | self._writer.begintag("unprocessed") 135 | self._writer.newline() 136 | self.logGlyphRecords(unprocessed) 137 | self._writer.endtag("unprocessed") 138 | self._writer.newline() 139 | self._writer.endtag("input") 140 | self._writer.newline() 141 | 142 | def logOutput(self, processed, unprocessed): 143 | self._writer.begintag("output") 144 | self._writer.newline() 145 | self._writer.begintag("processed") 146 | self._writer.newline() 147 | self.logGlyphRecords(processed) 148 | self._writer.endtag("processed") 149 | self._writer.newline() 150 | self._writer.begintag("unprocessed") 151 | self._writer.newline() 152 | self.logGlyphRecords(unprocessed) 153 | self._writer.endtag("unprocessed") 154 | self._writer.newline() 155 | self._writer.endtag("output") 156 | self._writer.newline() 157 | 158 | def logResults(self, processed): 159 | self._writer.begintag("results") 160 | self._writer.newline() 161 | self.logGlyphRecords(processed) 162 | self._writer.endtag("results") 163 | self._writer.newline() 164 | 165 | def getText(self): 166 | return self._file.getvalue() 167 | -------------------------------------------------------------------------------- /Lib/compositor/lookupList.py: -------------------------------------------------------------------------------- 1 | """ 2 | GSUB and GPOS LookupList objects (and friends). 3 | """ 4 | 5 | 6 | import weakref 7 | from compositor.subTablesGSUB import * 8 | from compositor.subTablesGPOS import * 9 | 10 | 11 | # ------------ 12 | # Base Classes 13 | # ------------ 14 | 15 | 16 | class BaseLookupList(object): 17 | 18 | __slots__ = ["LookupCount", "Lookup", "__weakref__"] 19 | _LookupClass = None 20 | 21 | def __init__(self): 22 | self.LookupCount = 0 23 | self.Lookup = [] 24 | 25 | def loadFromFontTools(self, lookupList, gdef): 26 | self.LookupCount = lookupList.LookupCount 27 | self.Lookup = [self._LookupClass().loadFromFontTools(lookup, self, gdef) for lookup in lookupList.Lookup] 28 | return self 29 | 30 | 31 | class BaseLookup(object): 32 | 33 | __slots__ = ["LookupType", "LookupFlag", "SubTableCount", "SubTable", 34 | "_lookupList", "_gdefReference", "__weakref__"] 35 | 36 | def __init__(self): 37 | self._lookupList = None 38 | self._gdefReference = None 39 | self.LookupType = None 40 | self.LookupFlag = None 41 | self.SubTableCount = 0 42 | self.SubTable = [] 43 | 44 | def loadFromFontTools(self, lookup, lookupList, gdef): 45 | self._lookupList = weakref.ref(lookupList) 46 | if gdef is not None: 47 | gdef = weakref.ref(gdef) 48 | self._gdefReference = gdef 49 | self.LookupType = lookup.LookupType 50 | self.LookupFlag = LookupFlag().loadFromFontTools(lookup.LookupFlag, gdef) 51 | self.SubTableCount = lookup.SubTableCount 52 | self.SubTable = [] 53 | for subtable in lookup.SubTable: 54 | format = None 55 | if hasattr(subtable, "Format"): 56 | format = subtable.Format 57 | cls = self._lookupSubTableClass(format) 58 | obj = cls().loadFromFontTools(subtable, self) 59 | self.SubTable.append(obj) 60 | return self 61 | 62 | def _get_gdef(self): 63 | if self._gdefReference is not None: 64 | return self._gdefReference() 65 | return None 66 | 67 | _gdef = property(_get_gdef) 68 | 69 | 70 | class LookupFlag(object): 71 | 72 | __slots__ = ["_gdef", "_flag"] 73 | 74 | def __init__(self): 75 | self._gdef = None 76 | self._flag = None 77 | 78 | def loadFromFontTools(self, lookupFlag, gdef): 79 | self._gdef = gdef 80 | self._flag = lookupFlag 81 | return self 82 | 83 | def _get_haveIgnore(self): 84 | return bool(self._flag & 0x0E) 85 | 86 | _haveIgnore = property(_get_haveIgnore) 87 | 88 | def _get_RightToLeft(self): 89 | return bool(self._flag & 0x0001) 90 | 91 | RightToLeft = property(_get_RightToLeft) 92 | 93 | def _get_IgnoreBaseGlyphs(self): 94 | return bool(self._flag & 0x0002) 95 | 96 | IgnoreBaseGlyphs = property(_get_IgnoreBaseGlyphs) 97 | 98 | def _get_IgnoreLigatures(self): 99 | return bool(self._flag & 0x0004) 100 | 101 | IgnoreLigatures = property(_get_IgnoreLigatures) 102 | 103 | def _get_IgnoreMarks(self): 104 | return bool(self._flag & 0x0008) 105 | 106 | IgnoreMarks = property(_get_IgnoreMarks) 107 | 108 | def _get_MarkAttachmentType(self): 109 | return bool(self._flag & 0xFF00) 110 | 111 | MarkAttachmentType = property(_get_MarkAttachmentType) 112 | 113 | def coversGlyph(self, glyphName): 114 | gdef = self._gdef 115 | if gdef is None: 116 | return False 117 | gdef = gdef() 118 | cls = gdef.GlyphClassDef[glyphName] 119 | if cls == 0: 120 | return False 121 | if self._haveIgnore: 122 | if cls == 1 and self.IgnoreBaseGlyphs: #IgnoreBaseGlyphs 123 | return True 124 | if cls == 2 and self.IgnoreLigatures: #IgnoreLigatures 125 | return True 126 | if cls == 3 and self.IgnoreMarks: #IgnoreMarks 127 | return True 128 | if self.MarkAttachmentType and cls == 3: 129 | if gdef.MarkAttachClassDef is None: 130 | return False 131 | markClass = gdef.MarkAttachClassDef[glyphName] 132 | if (self._flag & 0xff00) >> 8 != markClass: 133 | return True 134 | return False 135 | 136 | # ---- 137 | # GSUB 138 | # ---- 139 | 140 | 141 | class GSUBLookup(BaseLookup): 142 | 143 | __slots__ = [] 144 | 145 | def _lookupSubTableClass(self, subtableFormat): 146 | lookupType = self.LookupType 147 | if lookupType == 1: 148 | cls = GSUBLookupType1Format2 149 | elif lookupType == 2: 150 | cls = GSUBLookupType2 151 | elif lookupType == 3: 152 | cls = GSUBLookupType3 153 | elif lookupType == 4: 154 | cls = GSUBLookupType4 155 | elif lookupType == 5: 156 | cls = (GSUBLookupType5Format1, GSUBLookupType5Format2, GSUBLookupType5Format3)[subtableFormat-1] 157 | elif lookupType == 6: 158 | cls = (GSUBLookupType6Format1, GSUBLookupType6Format2, GSUBLookupType6Format3)[subtableFormat-1] 159 | elif lookupType == 7: 160 | cls = GSUBLookupType7 161 | elif lookupType == 8: 162 | cls = GSUBLookupType8 163 | return cls 164 | 165 | 166 | class GSUBLookupList(BaseLookupList): 167 | 168 | __slots__ = [] 169 | _LookupClass = GSUBLookup 170 | 171 | 172 | # ---- 173 | # GPOS 174 | # ---- 175 | 176 | 177 | class GPOSLookup(BaseLookup): 178 | 179 | __slots__ = [] 180 | 181 | def _lookupSubTableClass(self, subtableFormat): 182 | lookupType = self.LookupType 183 | if lookupType == 1: 184 | cls = (GPOSLookupType1Format1, GPOSLookupType1Format2)[subtableFormat-1] 185 | elif lookupType == 2: 186 | cls = (GPOSLookupType2Format1, GPOSLookupType2Format2)[subtableFormat-1] 187 | elif lookupType == 3: 188 | cls = GPOSLookupType3 189 | elif lookupType == 4: 190 | cls = GPOSLookupType4 191 | elif lookupType == 5: 192 | cls = GPOSLookupType5 193 | elif lookupType == 6: 194 | cls = GPOSLookupType6 195 | elif lookupType == 7: 196 | cls = (GPOSLookupType7Format1, GPOSLookupType7Format2, GPOSLookupType7Format3)[subtableFormat-1] 197 | elif lookupType == 8: 198 | cls = (GPOSLookupType8Format1, GPOSLookupType8Format2, GPOSLookupType8Format3)[subtableFormat-1] 199 | elif lookupType == 9: 200 | cls = GPOSLookupType9 201 | return cls 202 | 203 | 204 | class GPOSLookupList(BaseLookupList): 205 | 206 | __slots__ = [] 207 | _LookupClass = GPOSLookup 208 | -------------------------------------------------------------------------------- /Lib/compositor/scriptList.py: -------------------------------------------------------------------------------- 1 | """ 2 | ScriptList object (and friends). 3 | """ 4 | 5 | __all__ = ["ScriptList", "ScriptRecord", "LangSysRecord"] 6 | 7 | class ScriptList(object): 8 | 9 | __slots__ = ["ScriptCount", "ScriptRecord"] 10 | 11 | def __init__(self): 12 | self.ScriptCount = 0 13 | self.ScriptRecord = None 14 | 15 | def loadFromFontTools(self, scriptList): 16 | self.ScriptCount = scriptList.ScriptCount 17 | self.ScriptRecord = [ScriptRecord().loadFromFontTools(record) for record in scriptList.ScriptRecord] 18 | return self 19 | 20 | 21 | class ScriptRecord(object): 22 | 23 | __slots__ = ["ScriptTag", "Script"] 24 | 25 | def __init__(self): 26 | self.ScriptTag = None 27 | self.Script = None 28 | 29 | def loadFromFontTools(self, scriptRecord): 30 | self.ScriptTag = scriptRecord.ScriptTag 31 | self.Script = Script().loadFromFontTools(scriptRecord.Script) 32 | return self 33 | 34 | 35 | class Script(object): 36 | 37 | __slots__ = ["DefaultLangSys", "LangSysCount", "LangSysRecord"] 38 | 39 | def __init__(self): 40 | self.DefaultLangSys = None 41 | self.LangSysCount = 0 42 | self.LangSysRecord = [] 43 | 44 | def loadFromFontTools(self, script): 45 | self.DefaultLangSys = None 46 | if script.DefaultLangSys is not None: 47 | self.DefaultLangSys = LangSys().loadFromFontTools(script.DefaultLangSys) 48 | self.LangSysCount = script.LangSysCount 49 | self.LangSysRecord = [LangSysRecord().loadFromFontTools(record) for record in script.LangSysRecord] 50 | return self 51 | 52 | 53 | class LangSysRecord(object): 54 | 55 | __slots__ = ["LangSysTag", "LangSys"] 56 | 57 | def __init__(self): 58 | self.LangSysTag = None 59 | self.LangSys = None 60 | 61 | def loadFromFontTools(self, langSysRecord): 62 | self.LangSysTag = langSysRecord.LangSysTag 63 | self.LangSys = LangSys().loadFromFontTools(langSysRecord.LangSys) 64 | return self 65 | 66 | 67 | class LangSys(object): 68 | 69 | __slots__ = ["LookupOrder", "ReqFeatureIndex", "FeatureCount", "FeatureIndex"] 70 | 71 | def __init__(self): 72 | self.LookupOrder = None 73 | self.ReqFeatureIndex = None 74 | self.FeatureCount = 0 75 | self.FeatureIndex = [] 76 | 77 | def loadFromFontTools(self, langSys): 78 | self.LookupOrder = langSys.LookupOrder # XXX? 79 | self.ReqFeatureIndex = langSys.ReqFeatureIndex 80 | self.FeatureCount = langSys.FeatureCount 81 | self.FeatureIndex = list(langSys.FeatureIndex) 82 | return self 83 | -------------------------------------------------------------------------------- /Lib/compositor/subTablesBase.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import weakref 3 | 4 | # ------------ 5 | # Base Classes 6 | # ------------ 7 | 8 | 9 | class BaseSubTable(object): 10 | 11 | """ 12 | This object implents the base level subtable behavior 13 | for GSUB and GPOS subtables. It establishes one private 14 | attribute, _lookup, which is a weak reference to the 15 | lookup that contains the subtable. 16 | """ 17 | 18 | __slots__ = ["_lookup"] 19 | 20 | def __init__(self): 21 | self._lookup = None 22 | 23 | def loadFromFontTools(self, subtable, lookup): 24 | self._lookup = weakref.ref(lookup) 25 | return self 26 | 27 | def process(self, processed, glyphRecords, featureTag): 28 | if self._lookup is not None and hasattr(self._lookup(), "LookupType"): 29 | lookupType = self._lookup().LookupType 30 | else: 31 | lookupType = "Unknown" 32 | if hasattr(self, "SubstFormat"): 33 | format = str(self.SubstFormat) 34 | elif hasattr(self, "PosFormat"): 35 | format = str(self.PosFormat) 36 | else: 37 | format = "Unknown" 38 | className = self.__class__.__name__ 39 | print("[Compositor] %s skipping Lookup Type %s Format %s" % (className, lookupType, format)) 40 | return processed, glyphRecords, False 41 | 42 | def _lookupFlagCoversGlyph(self, glyphName): 43 | return self._lookup().LookupFlag.coversGlyph(glyphName) 44 | 45 | def _nextRecord(self, glyphRecords): 46 | nextRecord = None 47 | nextRecordIndex = 0 48 | while nextRecord is None: 49 | for _nextRecord in glyphRecords: 50 | _nextGlyph = _nextRecord.glyphName 51 | if not self._lookupFlagCoversGlyph(_nextGlyph): 52 | nextRecord = _nextRecord 53 | break 54 | nextRecordIndex += 1 55 | break 56 | return nextRecord, nextRecordIndex 57 | 58 | 59 | class BaseContextSubTable(BaseSubTable): 60 | 61 | __slots__ = [] 62 | 63 | def _processMatch(self, rule, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag): 64 | performedAction = False 65 | if not rule._ActionCount: 66 | performedAction = True 67 | processed.extend(glyphRecords[:inputGlyphCount]) 68 | glyphRecords = glyphRecords[inputGlyphCount:] 69 | else: 70 | eligibleRecords = glyphRecords[:inputGlyphCount] 71 | ineligibleRecords = glyphRecords[inputGlyphCount:] 72 | for record in rule._ActionLookupRecord: 73 | sequenceIndex = record.SequenceIndex 74 | matchIndex = matchedIndexes[sequenceIndex] 75 | 76 | backRecords = eligibleRecords[:matchIndex] 77 | inputRecords = eligibleRecords[matchIndex:] 78 | 79 | lookupListIndex = record.LookupListIndex 80 | lookup = self._lookup()._lookupList().Lookup[lookupListIndex] 81 | 82 | for subtable in lookup.SubTable: 83 | backRecords, inputRecords, performedAction = subtable.process(backRecords, inputRecords, featureTag) 84 | if performedAction: 85 | break 86 | if performedAction: 87 | eligibleRecords = backRecords + inputRecords 88 | processed.extend(eligibleRecords) 89 | glyphRecords = ineligibleRecords 90 | return processed, glyphRecords, performedAction 91 | 92 | 93 | class BaseChainingContextSubTable(BaseContextSubTable): 94 | 95 | __slots__ = [] 96 | 97 | def _testContext(self, testSource, testAgainst, matchCount, additionObjects=None): 98 | # this procedure is common across all formats 99 | # with the exception of evaluating if a particular 100 | # glyph matches a position in the context. 101 | # to handle this, the comparison is evaluated 102 | # by a _evaluateContextItem method in each 103 | # subclass. the speed penalty for this is negligible. 104 | # the aditionalObjects arg will be ignored by 105 | # all formats except format 2 which needs a ClassDef 106 | # to perform the comparison. 107 | completeRun = [] 108 | matchedIndexes = [] 109 | matched = 0 110 | while matched < matchCount: 111 | for recordIndex, glyphRecord in enumerate(testSource): 112 | completeRun.append(glyphRecord) 113 | glyphName = glyphRecord.glyphName 114 | if not self._lookupFlagCoversGlyph(glyphName): 115 | if not self._evaluateContextItem(glyphName, testAgainst[matched], additionObjects): 116 | break 117 | matched += 1 118 | matchedIndexes.append(recordIndex) 119 | if matched == matchCount: 120 | break 121 | break 122 | return matched == matchCount, completeRun, matchedIndexes 123 | 124 | 125 | class BaseContextFormat1SubTable(BaseContextSubTable): 126 | 127 | __slots__ = [] 128 | 129 | def process(self, processed, glyphRecords, featureTag): 130 | performedAction = False 131 | currentRecord = glyphRecords[0] 132 | currentGlyph = currentRecord.glyphName 133 | if currentGlyph in self.Coverage: 134 | if not self._lookupFlagCoversGlyph(currentGlyph): 135 | coverageIndex = self.Coverage.index(currentGlyph) 136 | ruleSet = self._RuleSet[coverageIndex] 137 | for rule in ruleSet._Rule: 138 | matchedIndexes = [0] 139 | currentGlyphIndex = 1 140 | for input in rule.Input: 141 | glyphRecord, relativeIndex = self._nextRecord(glyphRecords[currentGlyphIndex:]) 142 | currentGlyphIndex += relativeIndex 143 | if glyphRecord is not None: 144 | glyphName = glyphRecord.glyphName 145 | if glyphName != input: 146 | break 147 | else: 148 | matchedIndexes.append(currentGlyphIndex) 149 | currentGlyphIndex += 1 150 | if len(matchedIndexes) == rule.GlyphCount: 151 | inputGlyphCount = matchedIndexes[-1] + 1 152 | processed, glyphRecords, performedAction = self._processMatch(rule, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag) 153 | if performedAction: 154 | break 155 | return processed, glyphRecords, performedAction 156 | 157 | 158 | class BaseContextFormat2SubTable(BaseContextSubTable): 159 | 160 | __slots__ = [] 161 | 162 | def process(self, processed, glyphRecords, featureTag): 163 | performedAction = False 164 | currentRecord = glyphRecords[0] 165 | currentGlyph = currentRecord.glyphName 166 | if currentGlyph in self.Coverage: 167 | if not self._lookupFlagCoversGlyph(currentGlyph): 168 | classIndex = self.ClassDef[currentGlyph] 169 | classSet = self._ClassSet[classIndex] 170 | if classSet is not None: 171 | matchedIndexes = [0] 172 | currentGlyphIndex = 1 173 | for classRule in classSet._ClassRule: 174 | for inputClass in classRule.Class: 175 | glyphRecord, relativeIndex = self._nextRecord(glyphRecords[currentGlyphIndex:]) 176 | currentGlyphIndex += relativeIndex 177 | if glyphRecord is not None: 178 | glyphName = glyphRecord.glyphName 179 | glyphClass = self.ClassDef[glyphName] 180 | if glyphClass != inputClass: 181 | break 182 | else: 183 | matchedIndexes.append(currentGlyphIndex) 184 | currentGlyphIndex += 1 185 | if len(matchedIndexes) == classRule.GlyphCount: 186 | inputGlyphCount = matchedIndexes[-1] + 1 187 | processed, glyphRecords, performedAction = self._processMatch(classRule, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag) 188 | return processed, glyphRecords, performedAction 189 | 190 | 191 | class BaseContextFormat3SubTable(BaseContextSubTable): 192 | 193 | __slots__ = [] 194 | 195 | def process(self, processed, glyphRecords, featureTag): 196 | performedAction = False 197 | matchedIndexes = [] 198 | currentGlyphIndex = 0 199 | for coverage in self.Coverage: 200 | glyphRecord, relativeIndex = self._nextRecord(glyphRecords[currentGlyphIndex:]) 201 | currentGlyphIndex += relativeIndex 202 | currentGlyph = glyphRecord.glyphName 203 | if currentGlyph not in coverage: 204 | break 205 | matchedIndexes.append(currentGlyphIndex) 206 | currentGlyphIndex += 1 207 | if len(matchedIndexes) == self.GlyphCount: 208 | inputGlyphCount = matchedIndexes[-1] + 1 209 | processed, glyphRecords, performedAction = self._processMatch(self, processed, glyphRecords, inputGlyphCount, matchedIndexes, featureTag) 210 | return processed, glyphRecords, performedAction 211 | 212 | 213 | class BaseChainingContextFormat1SubTable(BaseChainingContextSubTable): 214 | 215 | __slots__ = [] 216 | 217 | def process(self, processed, glyphRecords, featureTag): 218 | performedAction = False 219 | currentRecord = glyphRecords[0] 220 | currentGlyph = currentRecord.glyphName 221 | if currentGlyph in self.Coverage: 222 | for chainRuleSet in self._ChainRuleSet: 223 | for chainRule in chainRuleSet._ChainRule: 224 | # backtrack testing 225 | backtrackCount = chainRule.BacktrackGlyphCount 226 | if not backtrackCount: 227 | backtrackMatch = True 228 | else: 229 | backtrackMatch, backtrack, backtrackMatchIndexes = self._testContext(reversed(processed), chainRule.Backtrack, backtrackCount) 230 | if not backtrackMatch: 231 | continue 232 | # input testing 233 | inputCount = chainRule.InputGlyphCount 234 | if not inputCount: 235 | inputMatch = True 236 | else: 237 | inputMatch, input, inputMatchIndexes = self._testContext(glyphRecords[1:], chainRule.Input, inputCount-1) 238 | if not inputMatch: 239 | continue 240 | input = [currentRecord] + input 241 | inputMatchIndexes = [0] + [i + 1 for i in inputMatchIndexes] 242 | # look ahead testing 243 | lookAheadCount = chainRule.LookAheadGlyphCount 244 | if not lookAheadCount: 245 | lookAheadMatch = True 246 | else: 247 | lookAheadMatch, lookAhead, lookAheadMatchIndexes = self._testContext(glyphRecords[len(input):], chainRule.LookAhead, lookAheadCount) 248 | if not lookAheadMatch: 249 | continue 250 | # match. process. 251 | if backtrackMatch and inputMatch and lookAheadMatch: 252 | processed, glyphRecords, performedAction = self._processMatch(chainRule, processed, glyphRecords, len(input), inputMatchIndexes, featureTag) 253 | if performedAction: 254 | # break the chainRule loop 255 | break 256 | 257 | if performedAction: 258 | # break the chainRuleSet loop 259 | break 260 | return processed, glyphRecords, performedAction 261 | 262 | def _evaluateContextItem(self, glyphName, contextTest, additionalObject): 263 | return glyphName == contextTest 264 | 265 | 266 | class BaseChainingContextFormat2SubTable(BaseChainingContextSubTable): 267 | 268 | __slots__ = [] 269 | 270 | def process(self, processed, glyphRecords, featureTag): 271 | performedAction = False 272 | currentRecord = glyphRecords[0] 273 | currentGlyph = currentRecord.glyphName 274 | if currentGlyph in self.Coverage: 275 | if not self._lookupFlagCoversGlyph(currentGlyph): 276 | classIndex = self.InputClassDef[currentGlyph] 277 | chainClassSet = self._ChainClassSet[classIndex] 278 | if chainClassSet is not None: 279 | for chainClassRule in chainClassSet._ChainClassRule: 280 | # backtrack testing 281 | backtrackCount = chainClassRule.BacktrackGlyphCount 282 | if not backtrackCount: 283 | backtrackMatch = True 284 | else: 285 | backtrackMatch, backtrack, backtrackMatchIndexes = self._testContext(reversed(processed), chainClassRule.Backtrack, backtrackCount, self.BacktrackClassDef) 286 | if not backtrackMatch: 287 | continue 288 | # input testing 289 | inputCount = chainClassRule.InputGlyphCount 290 | if not inputCount: 291 | inputMatch = True 292 | else: 293 | inputMatch, input, inputMatchIndexes = self._testContext(glyphRecords[1:], chainClassRule.Input, inputCount-1, self.InputClassDef) 294 | if not inputMatch: 295 | continue 296 | input = [currentRecord] + input 297 | inputMatchIndexes = [0] + [i + 1 for i in inputMatchIndexes] 298 | # look ahead testing 299 | lookAheadCount = chainClassRule.LookAheadGlyphCount 300 | if not lookAheadCount: 301 | lookAheadMatch = True 302 | else: 303 | lookAheadMatch, lookAhead, lookAheadMatchIndexes = self._testContext(glyphRecords[len(input):], chainClassRule.LookAhead, lookAheadCount, self.LookAheadClassDef) 304 | if not lookAheadMatch: 305 | continue 306 | # match. process. 307 | if backtrackMatch and inputMatch and lookAheadMatch: 308 | processed, glyphRecords, performedAction = self._processMatch(chainClassRule, processed, glyphRecords, len(input), inputMatchIndexes, featureTag) 309 | if performedAction: 310 | break 311 | return processed, glyphRecords, performedAction 312 | 313 | def _evaluateContextItem(self, glyphName, contextTest, additionalObject): 314 | classDef = additionalObject 315 | classIndex = classDef[glyphName] 316 | return classIndex == contextTest 317 | 318 | 319 | class BaseChainingContextFormat3SubTable(BaseChainingContextSubTable): 320 | 321 | """ 322 | This object implements chaining contextual format 3. 323 | It is shared across GSUB and GPOS contextual subtables. 324 | """ 325 | 326 | __slots__ = ["BacktrackGlyphCount", "BacktrackCoverage", "InputGlyphCount", 327 | "InputCoverage", "LookAheadGlyphCount", "LookAheadCoverage"] 328 | 329 | def __init__(self): 330 | super(BaseChainingContextFormat3SubTable, self).__init__() 331 | self.BacktrackGlyphCount = 0 332 | self.BacktrackCoverage = [] 333 | self.InputGlyphCount = 0 334 | self.InputCoverage = [] 335 | self.LookAheadGlyphCount = 0 336 | self.LookAheadCoverage = [] 337 | 338 | def loadFromFontTools(self, subtable, lookup): 339 | super(BaseChainingContextFormat3SubTable, self).loadFromFontTools(subtable, lookup) 340 | self.BacktrackGlyphCount = subtable.BacktrackGlyphCount 341 | self.BacktrackCoverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.BacktrackCoverage] 342 | self.InputGlyphCount = subtable.InputGlyphCount 343 | self.InputCoverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.InputCoverage] 344 | self.LookAheadGlyphCount = subtable.LookAheadGlyphCount 345 | self.LookAheadCoverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.LookAheadCoverage] 346 | return self 347 | 348 | def process(self, processed, glyphRecords, featureTag): 349 | performedAction = False 350 | while 1: 351 | # backtrack testing 352 | backtrackCount = self.BacktrackGlyphCount 353 | if not backtrackCount: 354 | backtrackMatch = True 355 | else: 356 | backtrackMatch, backtrack, backtrackMatchIndexes = self._testContext(reversed(processed), self.BacktrackCoverage, backtrackCount) 357 | if not backtrackMatch: 358 | break 359 | # input testing 360 | inputCount = self.InputGlyphCount 361 | if not inputCount: 362 | inputMatch = True 363 | else: 364 | inputMatch, input, inputMatchIndexes = self._testContext(glyphRecords, self.InputCoverage, inputCount) 365 | if not inputMatch: 366 | break 367 | # look ahead testing 368 | lookAheadCount = self.LookAheadGlyphCount 369 | if not lookAheadCount: 370 | lookAheadMatch = True 371 | else: 372 | lookAheadMatch, lookAhead, lookAheadMatchIndexes = self._testContext(glyphRecords[len(input):], self.LookAheadCoverage, lookAheadCount) 373 | if not lookAheadMatch: 374 | break 375 | # match. process. 376 | if backtrackMatch and inputMatch and lookAheadMatch: 377 | processed, glyphRecords, performedAction = self._processMatch(self, processed, glyphRecords, len(input), inputMatchIndexes, featureTag) 378 | # break the while 379 | break 380 | return processed, glyphRecords, performedAction 381 | 382 | def _evaluateContextItem(self, glyphName, contextTest, additionalObject): 383 | return glyphName in contextTest 384 | 385 | 386 | class BaseLookupRecord(object): 387 | 388 | """ 389 | This object implements the functionality of both 390 | GSUB SubstLookupRecord and GPOS PosLookupRecord. 391 | """ 392 | 393 | __slots__ = ["SequenceIndex", "LookupListIndex"] 394 | 395 | def __init__(self): 396 | self.SequenceIndex = None 397 | self.LookupListIndex = None 398 | 399 | def loadFromFontTools(self, record): 400 | self.SequenceIndex = record.SequenceIndex 401 | self.LookupListIndex = record.LookupListIndex 402 | return self 403 | 404 | 405 | # -------- 406 | # Coverage 407 | # -------- 408 | 409 | 410 | class Coverage(object): 411 | 412 | """ 413 | fontTools abstracts CoverageFormat1 and 414 | CoverageFormat2 into a common Coverage 415 | object. The same is done here. Consequently 416 | the structure of this object does not closely 417 | follow the specification. Instead, the basic 418 | functionality is implemented through standard 419 | dict methods. 420 | 421 | To determine if a glyph is in the coverage: 422 | >>> "x" in coverage 423 | True 424 | 425 | To get the index for a particular glyph: 426 | >>> coverage.index("x") 427 | 330 428 | """ 429 | 430 | __slots__ = ["_glyphs"] 431 | 432 | 433 | def __init__(self, coverage=None): 434 | if coverage is not None: 435 | coverage = list(coverage) 436 | self._glyphs = coverage 437 | 438 | def loadFromFontTools(self, coverage): 439 | # the data coming in could be a fontTools 440 | # Coverage object or a list of glyph names 441 | if not isinstance(coverage, list): 442 | coverage = coverage.glyphs 443 | self._glyphs = list(coverage) 444 | return self 445 | 446 | def __contains__(self, glyphName): 447 | return glyphName in self._glyphs 448 | 449 | def index(self, glyphName): 450 | return self._glyphs.index(glyphName) 451 | 452 | def _get_Glyphs(self): 453 | return list(self._glyphs) 454 | 455 | Glyphs = property(_get_Glyphs, doc="This is for reference only. Not for use in processing.") 456 | -------------------------------------------------------------------------------- /Lib/compositor/subTablesGSUB.py: -------------------------------------------------------------------------------- 1 | from random import choice 2 | from compositor.classDefinitionTables import ClassDef 3 | from compositor.glyphRecord import glyphNamesToGlyphRecords 4 | from compositor.subTablesBase import BaseSubTable, BaseLookupRecord, Coverage,\ 5 | BaseContextFormat1SubTable, BaseContextFormat2SubTable, BaseContextFormat3SubTable,\ 6 | BaseChainingContextFormat1SubTable, BaseChainingContextFormat2SubTable, BaseChainingContextFormat3SubTable 7 | 8 | 9 | __all__ = [ 10 | "GSUBLookupType1Format2", "GSUBLookupType2", "GSUBLookupType3", "GSUBLookupType4", 11 | "GSUBLookupType5Format1", "GSUBLookupType5Format2", "GSUBLookupType5Format3", 12 | "GSUBLookupType6Format1", "GSUBLookupType6Format2", "GSUBLookupType6Format3", 13 | "GSUBLookupType7", "GSUBLookupType8" 14 | ] 15 | 16 | 17 | globalSubstitutionSubTableSlots = ["SubstFormat"] 18 | 19 | 20 | # ------------- 21 | # Lookup Type 1 22 | # ------------- 23 | 24 | 25 | class GSUBLookupType1Format2(BaseSubTable): 26 | 27 | """ 28 | Deviation from spec: 29 | - fontTools interprets Lookup Type 1 formats 1 and 2 30 | into the same object structure. As such, only format 2 31 | is needed. 32 | - GlyphCount attribute is not implemented. 33 | """ 34 | 35 | __slots__ = ["Coverage", "Substitute"] + globalSubstitutionSubTableSlots 36 | 37 | def __init__(self): 38 | super(GSUBLookupType1Format2, self).__init__() 39 | self.SubstFormat = 2 40 | self.Substitute = [] 41 | self.Coverage = None 42 | 43 | def loadFromFontTools(self, subtable, lookup): 44 | super(GSUBLookupType1Format2, self).loadFromFontTools(subtable, lookup) 45 | # fontTools has a custom implementation of this 46 | # subtable type, so it needs to be converted 47 | coverage = [] 48 | self.Substitute = [] 49 | for glyphName, alternate in sorted(subtable.mapping.items()): 50 | coverage.append(glyphName) 51 | self.Substitute.append(alternate) 52 | self.Coverage = Coverage().loadFromFontTools(coverage) 53 | return self 54 | 55 | def process(self, processed, glyphRecords, featureTag): 56 | performedSub = False 57 | currentRecord = glyphRecords[0] 58 | currentGlyph = currentRecord.glyphName 59 | if currentGlyph in self.Coverage: 60 | if not self._lookupFlagCoversGlyph(currentGlyph): 61 | performedSub = True 62 | index = self.Coverage.index(currentGlyph) 63 | substitute = self.Substitute[index] 64 | # special behavior for aalt 65 | if featureTag == "aalt": 66 | if currentRecord._alternatesReference != currentGlyph: 67 | currentRecord._alternatesReference = currentGlyph 68 | currentRecord.alternates = [] 69 | currentRecord.alternates.append(substitute) 70 | # standard behavior 71 | else: 72 | currentRecord.saveState(currentRecord.glyphName) 73 | currentRecord.glyphName = substitute 74 | processed.append(currentRecord) 75 | glyphRecords = glyphRecords[1:] 76 | return processed, glyphRecords, performedSub 77 | 78 | 79 | # ------------- 80 | # Lookup Type 2 81 | # ------------- 82 | 83 | 84 | class GSUBLookupType2(BaseSubTable): 85 | 86 | """ 87 | Deviation from spec: 88 | - SequenceCount attribute is not implemented. 89 | """ 90 | 91 | __slots__ = ["Coverage", "Sequence"] + globalSubstitutionSubTableSlots 92 | 93 | def __init__(self): 94 | super(GSUBLookupType2, self).__init__() 95 | self.SubstFormat = 1 96 | self.Coverage = None 97 | self.Sequence = [] 98 | 99 | def loadFromFontTools(self, subtable, lookup): 100 | super(GSUBLookupType2, self).loadFromFontTools(subtable, lookup) 101 | try: 102 | self.Coverage = Coverage().loadFromFontTools(subtable.Coverage) 103 | self.Sequence = [Sequence().loadFromFontTools(sequence) 104 | for sequence in subtable.Sequence] 105 | except AttributeError: 106 | # the API for MultipleSubst lookups changed with fonttools 3.1: 107 | # https://github.com/fonttools/fonttools/pull/364 108 | mapping = getattr(subtable, "mapping", {}) 109 | coverage = sorted(mapping.keys()) 110 | self.Coverage = Coverage(coverage) 111 | self.Sequence = [Sequence(mapping[glyph]) for glyph in coverage] 112 | return self 113 | 114 | def process(self, processed, glyphRecords, featureTag): 115 | performedSub = False 116 | currentRecord = glyphRecords[0] 117 | currentGlyph = currentRecord.glyphName 118 | if currentGlyph in self.Coverage: 119 | if not self._lookupFlagCoversGlyph(currentGlyph): 120 | # XXX all glyph subsitituion states are destroyed here 121 | performedSub = True 122 | index = self.Coverage.index(currentGlyph) 123 | sequence = self.Sequence[index] 124 | substitute = sequence.Substitute 125 | substitute = glyphNamesToGlyphRecords(substitute) 126 | processed.extend(substitute) 127 | glyphRecords = glyphRecords[1:] 128 | return processed, glyphRecords, performedSub 129 | 130 | 131 | class Sequence(object): 132 | 133 | """ 134 | Deviation from spec: 135 | - GlyphCount attribute is not implemented. 136 | """ 137 | 138 | __slots__ = ["Substitute"] 139 | 140 | def __init__(self, substitute=None): 141 | self.Substitute = list(substitute) if substitute is not None else [] 142 | 143 | def loadFromFontTools(self, sequence): 144 | self.Substitute = list(sequence.Substitute) 145 | return self 146 | 147 | # ------------- 148 | # Lookup Type 3 149 | # ------------- 150 | 151 | 152 | class GSUBLookupType3(BaseSubTable): 153 | 154 | """ 155 | Deviation from spec: 156 | - AlternateSetCount attribute is not implemented. 157 | """ 158 | 159 | __slots__ = ["Coverage", "AlternateSet", "AlternateSetCount"] + globalSubstitutionSubTableSlots 160 | 161 | def __init__(self): 162 | super(GSUBLookupType3, self).__init__() 163 | self.SubstFormat = 1 164 | self.AlternateSet = [] 165 | self.Coverage = None 166 | self.AlternateSetCount = 0 167 | 168 | def loadFromFontTools(self, subtable, lookup): 169 | super(GSUBLookupType3, self).loadFromFontTools(subtable, lookup) 170 | # fontTools has a custom implementation of this 171 | # subtable type, so it needs to be converted 172 | coverage = [] 173 | self.AlternateSet = [] 174 | for glyphName, alternates in subtable.alternates.items(): 175 | coverage.append(glyphName) 176 | alternateSet = AlternateSet().loadFromFontTools(alternates) 177 | self.AlternateSet.append(alternateSet) 178 | self.Coverage = Coverage().loadFromFontTools(coverage) 179 | self.AlternateSetCount = len(self.AlternateSet) 180 | return self 181 | 182 | def process(self, processed, glyphRecords, featureTag): 183 | performedSub = False 184 | currentRecord = glyphRecords[0] 185 | currentGlyph = currentRecord.glyphName 186 | if currentGlyph in self.Coverage: 187 | if not self._lookupFlagCoversGlyph(currentGlyph): 188 | performedSub = True 189 | index = self.Coverage.index(currentGlyph) 190 | alternateSet = self.AlternateSet[index] 191 | alternates = alternateSet.Alternate 192 | # special behavior for rand 193 | if featureTag == "rand": 194 | currentRecord.saveState(currentRecord.glyphName) 195 | currentRecord.glyphName = choice(alternates) 196 | # standard behavior 197 | else: 198 | if currentRecord._alternatesReference != currentGlyph: 199 | currentRecord._alternatesReference = currentGlyph 200 | currentRecord.alternates = [] 201 | currentRecord.alternates.extend(alternates) 202 | processed.append(currentRecord) 203 | glyphRecords = glyphRecords[1:] 204 | return processed, glyphRecords, performedSub 205 | 206 | 207 | class AlternateSet(object): 208 | 209 | """ 210 | Deviation from spec: 211 | - GlyphCount attribute is not implemented. 212 | """ 213 | 214 | __slots__ = ["Alternate"] 215 | 216 | def __init__(self): 217 | self.Alternate = [] 218 | 219 | def loadFromFontTools(self, alternates): 220 | self.Alternate = list(alternates) 221 | return self 222 | 223 | 224 | # ------------- 225 | # Lookup Type 4 226 | # ------------- 227 | 228 | 229 | class GSUBLookupType4(BaseSubTable): 230 | 231 | """ 232 | Deviation from spec: 233 | - LigSetCount attribute is not implemented. 234 | """ 235 | 236 | __slots__ = ["Coverage", "LigatureSet"] + globalSubstitutionSubTableSlots 237 | 238 | def __init__(self): 239 | super(GSUBLookupType4, self).__init__() 240 | self.SubstFormat = 1 241 | self.LigatureSet = [] 242 | self.Coverage = None 243 | 244 | def loadFromFontTools(self, subtable, lookup): 245 | super(GSUBLookupType4, self).loadFromFontTools(subtable, lookup) 246 | # fontTools has a custom implementation of this 247 | # subtable type, so it needs to be converted 248 | coverage = [] 249 | self.LigatureSet = [] 250 | for glyphName, ligature in subtable.ligatures.items(): 251 | ligatureSet = LigatureSet().loadFromFontTools(ligature) 252 | self.LigatureSet.append(ligatureSet) 253 | coverage.append(glyphName) 254 | self.Coverage = Coverage().loadFromFontTools(coverage) 255 | return self 256 | 257 | def process(self, processed, glyphRecords, featureTag): 258 | performedSub = False 259 | currentRecord = glyphRecords[0] 260 | currentGlyph = currentRecord.glyphName 261 | lookupFlag = self._lookup().LookupFlag 262 | if currentGlyph in self.Coverage: 263 | if not lookupFlag.coversGlyph(currentGlyph): 264 | while not performedSub: 265 | coverageIndex = self.Coverage.index(currentGlyph) 266 | ligatureSet = self.LigatureSet[coverageIndex] 267 | for ligature in ligatureSet.Ligature: 268 | component = ligature.Component 269 | componentCount = ligature.CompCount 270 | currentComponentIndex = 0 271 | matchedRecordIndexes = set() 272 | lastWasMatch = False 273 | for index, glyphRecord in enumerate(glyphRecords[1:]): 274 | glyphName = glyphRecord.glyphName 275 | if not lookupFlag.coversGlyph(glyphName): 276 | if not glyphName == component[currentComponentIndex]: 277 | lastWasMatch = False 278 | break 279 | else: 280 | lastWasMatch = True 281 | matchedRecordIndexes.add(index) 282 | currentComponentIndex += 1 283 | if currentComponentIndex == componentCount - 1: 284 | break 285 | if lastWasMatch and currentComponentIndex == componentCount - 1: 286 | performedSub = True 287 | currentRecord.saveState([currentGlyph] + ligature.Component) 288 | currentRecord.glyphName = ligature.LigGlyph 289 | currentRecord.ligatureComponents = [currentGlyph] + ligature.Component 290 | processed.append(currentRecord) 291 | glyphRecords = [record for index, record in enumerate(glyphRecords[1:]) if index not in matchedRecordIndexes] 292 | break 293 | break 294 | return processed, glyphRecords, performedSub 295 | 296 | 297 | class LigatureSet(object): 298 | 299 | """ 300 | Deviation from spec: None 301 | """ 302 | 303 | __slots__ = ["LigatureCount", "Ligature"] 304 | 305 | def __init__(self): 306 | self.Ligature = [] 307 | self.LigatureCount = 0 308 | 309 | def loadFromFontTools(self, ligatures): 310 | self.Ligature = [Ligature().loadFromFontTools(ligature) for ligature in ligatures] 311 | self.LigatureCount = len(self.Ligature) 312 | return self 313 | 314 | 315 | class Ligature(object): 316 | 317 | """ 318 | Deviation from spec: None 319 | """ 320 | 321 | __slots__ = ["LigGlyph", "CompCount", "Component"] 322 | 323 | def __init__(self): 324 | self.CompCount = None 325 | self.LigGlyph = None 326 | self.Component = [] 327 | 328 | def loadFromFontTools(self, ligature): 329 | self.CompCount = ligature.CompCount 330 | self.LigGlyph = ligature.LigGlyph 331 | self.Component = list(ligature.Component) 332 | return self 333 | 334 | 335 | # ------------- 336 | # Lookup Type 5 337 | # ------------- 338 | 339 | 340 | class GSUBLookupType5Format1(BaseContextFormat1SubTable): 341 | 342 | """ 343 | Deviation from spec: 344 | - SubRuleSetCount attribute is not implemented. 345 | 346 | A private attribute is implemented: 347 | _RuleSet - The value of SubRuleSet 348 | 349 | The private attribute is needed because the contextual subtable processing 350 | is abstracted so that it can be shared between GSUB and GPOS. 351 | """ 352 | 353 | __slots__ = ["Coverage", "SubRuleSet"] + globalSubstitutionSubTableSlots 354 | 355 | def __init__(self): 356 | super(GSUBLookupType5Format1, self).__init__() 357 | self.SubstFormat = 1 358 | self.Coverage = None 359 | self.SubRuleSet = [] 360 | 361 | def loadFromFontTools(self, subtable, lookup): 362 | super(GSUBLookupType5Format1, self).loadFromFontTools(subtable, lookup) 363 | self.Coverage = Coverage().loadFromFontTools(subtable.Coverage) 364 | self.SubRuleSet = [SubRuleSet().loadFromFontTools(subRuleSet) for subRuleSet in subtable.SubRuleSet] 365 | return self 366 | 367 | def _get_RuleSet(self): 368 | return self.SubRuleSet 369 | 370 | _RuleSet = property(_get_RuleSet) 371 | 372 | 373 | class SubRuleSet(object): 374 | 375 | """ 376 | Deviation from spec: 377 | - SubRuleCount attribute is not implemented. 378 | 379 | A private attribute is implemented: 380 | _Rule - The value of SubRule 381 | 382 | The private attribute is needed because the contextual subtable processing 383 | is abstracted so that it can be shared between GSUB and GPOS. 384 | """ 385 | 386 | __slots__ = ["SubRule"] 387 | 388 | def __init__(self): 389 | self.SubRule = [] 390 | 391 | def loadFromFontTools(self, subRuleSet): 392 | self.SubRule = [SubRule().loadFromFontTools(subRule) for subRule in subRuleSet.SubRule] 393 | return self 394 | 395 | def _get_Rule(self): 396 | return self.SubRule 397 | 398 | _Rule = property(_get_Rule) 399 | 400 | 401 | class SubRule(object): 402 | 403 | """ 404 | Deviation from spec: None 405 | 406 | Two private attributes are implemented: 407 | _ActionCount - The value of SubstCount 408 | _ActionLookupRecord - The value of SubstLookupRecord 409 | 410 | The private attributes are needed because the contextual subtable processing 411 | is abstracted so that it can be shared between GSUB and GPOS. 412 | """ 413 | 414 | __slots__ = ["Input", "GlyphCount", "SubstCount", "SubstLookupRecord"] 415 | 416 | def __init__(self): 417 | self.Input = [] 418 | self.GlyphCount = 0 419 | self.SubstCount = 0 420 | self.SubstLookupRecord = [] 421 | 422 | def loadFromFontTools(self, subRule): 423 | self.Input = list(subRule.Input) 424 | self.GlyphCount = subRule.GlyphCount 425 | self.SubstCount = subRule.SubstCount 426 | self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subRule.SubstLookupRecord] 427 | return self 428 | 429 | def _get_ActionCount(self): 430 | return self.SubstCount 431 | 432 | _ActionCount = property(_get_ActionCount) 433 | 434 | def _get_ActionLookupRecord(self): 435 | return self.SubstLookupRecord 436 | 437 | _ActionLookupRecord = property(_get_ActionLookupRecord) 438 | 439 | 440 | class GSUBLookupType5Format2(BaseContextFormat2SubTable): 441 | 442 | """ 443 | Deviation from spec: 444 | - SubClassRuleCnt attribute is not implemented. 445 | """ 446 | 447 | __slots__ = ["Coverage", "ClassDef", "SubClassSet"] + globalSubstitutionSubTableSlots 448 | 449 | def __init__(self): 450 | super(GSUBLookupType5Format2, self).__init__() 451 | self.SubstFormat = 2 452 | self.Coverage = None 453 | self.ClassDef = None 454 | self.SubClassSet = [] 455 | 456 | def loadFromFontTools(self, subtable, lookup): 457 | super(GSUBLookupType5Format2, self).loadFromFontTools(subtable, lookup) 458 | self.Coverage = Coverage().loadFromFontTools(subtable.Coverage) 459 | self.ClassDef = ClassDef().loadFromFontTools(subtable.ClassDef) 460 | self.SubClassSet = [] 461 | for subClassSet in subtable.SubClassSet: 462 | if subClassSet is None: 463 | self.SubClassSet.append(None) 464 | else: 465 | self.SubClassSet.append(SubClassSet().loadFromFontTools(subClassSet)) 466 | return self 467 | 468 | def _get_ClassSet(self): 469 | return self.SubClassSet 470 | 471 | _ClassSet = property(_get_ClassSet) 472 | 473 | 474 | class SubClassSet(object): 475 | 476 | """ 477 | Deviation from spec: 478 | - SubClassRuleCnt attribute is not implemented. 479 | """ 480 | 481 | __slots__ = ["SubClassRule"] 482 | 483 | def __init__(self): 484 | self.SubClassRule = [] 485 | 486 | def loadFromFontTools(self, subClassSet): 487 | self.SubClassRule = [SubClassRule().loadFromFontTools(subClassRule) for subClassRule in subClassSet.SubClassRule] 488 | return self 489 | 490 | def _get_ClassRule(self): 491 | return self.SubClassRule 492 | 493 | _ClassRule = property(_get_ClassRule) 494 | 495 | 496 | class SubClassRule(object): 497 | 498 | """ 499 | Deviation from spec: None 500 | 501 | Two private attributes are implemented: 502 | _ActionCount - The value of SubstCount 503 | _ActionLookupRecord - The value of SubstLookupRecord 504 | 505 | The private attributes are needed because the contextual subtable processing 506 | is abstracted so that it can be shared between GSUB and GPOS. 507 | """ 508 | 509 | __slots__ = ["Class", "GlyphCount", "SubstCount", "SubstLookupRecord"] 510 | 511 | def __init__(self): 512 | self.Class = [] 513 | self.GlyphCount = 0 514 | self.SubstCount = 0 515 | self.SubstLookupRecord = [] 516 | 517 | def loadFromFontTools(self, subClassRule): 518 | self.Class = list(subClassRule.Class) 519 | self.GlyphCount = subClassRule.GlyphCount 520 | self.SubstCount = subClassRule.SubstCount 521 | self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subClassRule.SubstLookupRecord] 522 | return self 523 | 524 | def _get_ActionCount(self): 525 | return self.SubstCount 526 | 527 | _ActionCount = property(_get_ActionCount) 528 | 529 | def _get_ActionLookupRecord(self): 530 | return self.SubstLookupRecord 531 | 532 | _ActionLookupRecord = property(_get_ActionLookupRecord) 533 | 534 | 535 | class GSUBLookupType5Format3(BaseContextFormat3SubTable): 536 | 537 | """ 538 | Deviation from spec: None 539 | 540 | Two private attributes are implemented: 541 | _ActionCount - The value of SubstCount 542 | _ActionLookupRecord - The value of SubstLookupRecord 543 | 544 | The private attributes are needed because the contextual subtable processing 545 | is abstracted so that it can be shared between GSUB and GPOS. 546 | """ 547 | 548 | def __init__(self): 549 | super(GSUBLookupType5Format3, self).__init__() 550 | self.SubstFormat = 3 551 | self.Coverage = [] 552 | self.GlyphCount = 0 553 | self.SubstCount = 0 554 | self.SubstLookupRecord = [] 555 | 556 | def loadFromFontTools(self, subtable, lookup): 557 | super(GSUBLookupType5Format3, self).loadFromFontTools(subtable, lookup) 558 | self.Coverage = [Coverage().loadFromFontTools(coverage) for coverage in subtable.Coverage] 559 | self.GlyphCount = subtable.GlyphCount 560 | self.SubstCount = subtable.SubstCount 561 | self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subtable.SubstLookupRecord] 562 | return self 563 | 564 | def _get_ActionCount(self): 565 | return self.SubstCount 566 | 567 | _ActionCount = property(_get_ActionCount) 568 | 569 | def _get_ActionLookupRecord(self): 570 | return self.SubstLookupRecord 571 | 572 | _ActionLookupRecord = property(_get_ActionLookupRecord) 573 | 574 | 575 | # ------------- 576 | # Lookup Type 6 577 | # ------------- 578 | 579 | 580 | class GSUBLookupType6Format1(BaseChainingContextFormat1SubTable): 581 | 582 | """ 583 | Deviation from spec: 584 | - ChainSubRuleSetCount attribute is not implemented. 585 | 586 | A private attribute is implemented: 587 | _ChainRuleSet - The value of ChainSubRuleSet 588 | 589 | The private attribute is needed because the contextual subtable processing 590 | is abstracted so that it can be shared between GSUB and GPOS. 591 | """ 592 | 593 | __slots__ = ["Coverage", "ChainSubRuleSet"] + globalSubstitutionSubTableSlots 594 | 595 | def __init__(self): 596 | super(GSUBLookupType6Format1, self).__init__() 597 | self.SubstFormat = 1 598 | self.Coverage = None 599 | self.ChainSubRuleSet = [] 600 | 601 | def loadFromFontTools(self, subtable, lookup): 602 | super(GSUBLookupType6Format1, self).loadFromFontTools(subtable, lookup) 603 | self.Coverage = Coverage().loadFromFontTools(subtable.Coverage) 604 | self.ChainSubRuleSet = [ChainSubRuleSet().loadFromFontTools(chainSubRuleSet) for chainSubRuleSet in subtable.ChainSubRuleSet] 605 | return self 606 | 607 | def _get_ChainRuleSet(self): 608 | return self.ChainSubRuleSet 609 | 610 | _ChainRuleSet = property(_get_ChainRuleSet) 611 | 612 | 613 | class ChainSubRuleSet(object): 614 | 615 | """ 616 | Deviation from spec: 617 | - ChainSubRuleCount attribute is not implemented. 618 | 619 | A private attribute is implemented: 620 | _ChainRule - The value of ChainSubRule 621 | 622 | The private attribute is needed because the contextual subtable processing 623 | is abstracted so that it can be shared between GSUB and GPOS. 624 | """ 625 | 626 | __slots__ = ["ChainSubRule"] 627 | 628 | def __init__(self): 629 | self.ChainSubRule = [] 630 | 631 | def loadFromFontTools(self, chainSubRuleSet): 632 | self.ChainSubRule = [ChainSubRule().loadFromFontTools(chainSubRule) for chainSubRule in chainSubRuleSet.ChainSubRule] 633 | return self 634 | 635 | def _get_ChainRule(self): 636 | return self.ChainSubRule 637 | 638 | _ChainRule = property(_get_ChainRule) 639 | 640 | 641 | class ChainSubRule(object): 642 | 643 | """ 644 | Deviation from spec: None 645 | 646 | Two private attributes are implemented: 647 | _ActionCount - The value of SubstCount 648 | _ActionLookupRecord - The value of SubstLookupRecord 649 | 650 | The private attributes are needed because the contextual subtable processing 651 | is abstracted so that it can be shared between GSUB and GPOS. 652 | """ 653 | 654 | __slots__ = ["BacktrackGlyphCount", "Backtrack", "InputGlyphCount", "Input", 655 | "LookAheadGlyphCount", "LookAhead", 656 | "SubstCount", "SubstLookupRecord",] 657 | 658 | def __init__(self): 659 | self.BacktrackGlyphCount = 0 660 | self.Backtrack = [] 661 | self.InputGlyphCount = 0 662 | self.Input = [] 663 | self.LookAheadGlyphCount = 0 664 | self.LookAhead = [] 665 | self.SubstCount = 0 666 | self.SubstLookupRecord = [] 667 | 668 | def loadFromFontTools(self, chainSubRule): 669 | self.BacktrackGlyphCount = chainSubRule.BacktrackGlyphCount 670 | self.Backtrack = list(chainSubRule.Backtrack) 671 | self.InputGlyphCount = chainSubRule.InputGlyphCount 672 | self.Input = list(chainSubRule.Input) 673 | self.LookAheadGlyphCount = chainSubRule.LookAheadGlyphCount 674 | self.LookAhead = list(chainSubRule.LookAhead) 675 | self.SubstCount = chainSubRule.SubstCount 676 | self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in chainSubRule.SubstLookupRecord] 677 | return self 678 | 679 | def _get_ActionCount(self): 680 | return self.SubstCount 681 | 682 | _ActionCount = property(_get_ActionCount) 683 | 684 | def _get_ActionLookupRecord(self): 685 | return self.SubstLookupRecord 686 | 687 | _ActionLookupRecord = property(_get_ActionLookupRecord) 688 | 689 | 690 | class GSUBLookupType6Format2(BaseChainingContextFormat2SubTable): 691 | 692 | """ 693 | Deviation from spec: 694 | -ChainSubClassSetCnt attribute is not implemented. 695 | 696 | A private attribute is implemented: 697 | _ChainClassSet - The value of ChainPosClassSet 698 | 699 | The private attribute is needed because the contextual subtable processing 700 | is abstracted so that it can be shared between GSUB and GPOS. 701 | """ 702 | 703 | __slots__ = ["Coverage", "BacktrackClassDef", "InputClassDef", 704 | "LookAheadClassDef", "ChainSubClassSet"] + globalSubstitutionSubTableSlots 705 | 706 | def __init__(self): 707 | super(GSUBLookupType6Format2, self).__init__() 708 | self.SubstFormat = 2 709 | self.Coverage = None 710 | self.BacktrackClassDef = None 711 | self.InputClassDef = None 712 | self.LookAheadClassDef = None 713 | self.ChainSubClassSet = [] 714 | 715 | def loadFromFontTools(self, subtable, lookup): 716 | super(GSUBLookupType6Format2, self).loadFromFontTools(subtable, lookup) 717 | self.Coverage = Coverage().loadFromFontTools(subtable.Coverage) 718 | self.BacktrackClassDef = ClassDef().loadFromFontTools(subtable.BacktrackClassDef) 719 | self.InputClassDef = ClassDef().loadFromFontTools(subtable.InputClassDef) 720 | self.LookAheadClassDef = ClassDef().loadFromFontTools(subtable.LookAheadClassDef) 721 | self.ChainSubClassSet = [] 722 | for chainSubClassSet in subtable.ChainSubClassSet: 723 | if chainSubClassSet is None: 724 | self.ChainSubClassSet.append(None) 725 | else: 726 | self.ChainSubClassSet.append(ChainSubClassSet().loadFromFontTools(chainSubClassSet)) 727 | return self 728 | 729 | def _get_ChainClassSet(self): 730 | return self.ChainSubClassSet 731 | 732 | _ChainClassSet = property(_get_ChainClassSet) 733 | 734 | 735 | class ChainSubClassSet(object): 736 | 737 | """ 738 | Deviation from spec: 739 | -ChainSubClassRuleCnt attribute is not implemented. 740 | 741 | A private attribute is implemented: 742 | _ChainClassRule - The value of ChainSubClassRule 743 | 744 | The private attribute is needed because the contextual subtable processing 745 | is abstracted so that it can be shared between GSUB and GPOS. 746 | """ 747 | 748 | __slots__ = ["ChainSubClassRule"] 749 | 750 | def __init__(self): 751 | self.ChainSubClassRule = None 752 | 753 | def loadFromFontTools(self, chainSubClassSet): 754 | self.ChainSubClassRule = [ChainSubClassRule().loadFromFontTools(chainSubClassRule) for chainSubClassRule in chainSubClassSet.ChainSubClassRule] 755 | return self 756 | 757 | def _get_ChainClassRule(self): 758 | return self.ChainSubClassRule 759 | 760 | _ChainClassRule = property(_get_ChainClassRule) 761 | 762 | 763 | class ChainSubClassRule(object): 764 | 765 | """ 766 | Deviation from spec: None 767 | 768 | Two private attributes are implemented: 769 | _ActionCount - The value of SubstCount 770 | _ActionLookupRecord - The value of SubstLookupRecord 771 | 772 | The private attributes are needed because the contextual subtable processing 773 | is abstracted so that it can be shared between GSUB and GPOS. 774 | """ 775 | 776 | __slots__ = ["BacktrackGlyphCount", "Backtrack", 777 | "InputGlyphCount", "Input", 778 | "LookAheadGlyphCount", "LookAhead", 779 | "SubstCount", "SubstLookupRecord"] 780 | 781 | def __init__(self): 782 | self.BacktrackGlyphCount = 0 783 | self.Backtrack = [] 784 | self.InputGlyphCount = 0 785 | self.Input = [] 786 | self.LookAheadGlyphCount = 0 787 | self.LookAhead = [] 788 | self.SubstCount = 0 789 | self.SubstLookupRecord = [] 790 | 791 | def loadFromFontTools(self, chainSubClassRule): 792 | self.BacktrackGlyphCount = chainSubClassRule.BacktrackGlyphCount 793 | self.Backtrack = list(chainSubClassRule.Backtrack) 794 | self.InputGlyphCount = chainSubClassRule.InputGlyphCount 795 | self.Input = list(chainSubClassRule.Input) 796 | self.LookAheadGlyphCount = chainSubClassRule.LookAheadGlyphCount 797 | self.LookAhead = list(chainSubClassRule.LookAhead) 798 | self.SubstCount = chainSubClassRule.SubstCount 799 | self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in chainSubClassRule.SubstLookupRecord] 800 | return self 801 | 802 | def _get_ActionCount(self): 803 | return self.SubstCount 804 | 805 | _ActionCount = property(_get_ActionCount) 806 | 807 | def _get_ActionLookupRecord(self): 808 | return self.SubstLookupRecord 809 | 810 | _ActionLookupRecord = property(_get_ActionLookupRecord) 811 | 812 | 813 | class GSUBLookupType6Format3(BaseChainingContextFormat3SubTable): 814 | 815 | """ 816 | Deviation from spec: None 817 | 818 | Two private attributes are implemented: 819 | _ActionCount - The value of SubstCount 820 | _ActionLookupRecord - The value of SubstLookupRecord 821 | 822 | The private attributes are needed because the contextual subtable processing 823 | is abstracted so that it can be shared between GSUB and GPOS. 824 | """ 825 | 826 | __slots__ = ["BacktrackGlyphCount", "BacktrackCoverage", "InputGlyphCount", "InputCoverage" 827 | "LookaheadGlyphCount", "LookaheadCoverage", 828 | "SubstCount", "SubstLookupRecord"] + globalSubstitutionSubTableSlots 829 | 830 | def __init__(self): 831 | super(GSUBLookupType6Format3, self).__init__() 832 | self.SubstFormat = 3 833 | self.SubstCount = 0 834 | self.SubstLookupRecord = [] 835 | 836 | def loadFromFontTools(self, subtable, lookup): 837 | super(GSUBLookupType6Format3, self).loadFromFontTools(subtable, lookup) 838 | self.SubstCount = subtable.SubstCount 839 | self.SubstLookupRecord = [SubstLookupRecord().loadFromFontTools(record) for record in subtable.SubstLookupRecord] 840 | return self 841 | 842 | def _get_ActionCount(self): 843 | return self.SubstCount 844 | 845 | _ActionCount = property(_get_ActionCount) 846 | 847 | def _get_ActionLookupRecord(self): 848 | return self.SubstLookupRecord 849 | 850 | _ActionLookupRecord = property(_get_ActionLookupRecord) 851 | 852 | 853 | class SubstLookupRecord(BaseLookupRecord): pass 854 | 855 | 856 | # ------------- 857 | # Lookup Type 7 858 | # ------------- 859 | 860 | 861 | class GSUBLookupType7(BaseSubTable): 862 | 863 | """ 864 | Deviation from spec: 865 | - ExtensionOffset attribute is not implemented. In its place 866 | is the ExtSubTable attribute. That attribute references 867 | the subtable that should be used for processing. 868 | """ 869 | 870 | __slots__ = ["ExtensionLookupType", "ExtSubTable"] + globalSubstitutionSubTableSlots 871 | 872 | def __init__(self): 873 | self.SubstFormat = 1 874 | self.ExtSubTable = None 875 | 876 | def loadFromFontTools(self, subtable, lookup): 877 | super(GSUBLookupType7, self).loadFromFontTools(subtable, lookup) 878 | self.ExtensionLookupType = subtable.ExtensionLookupType 879 | lookupType = self.ExtensionLookupType 880 | if lookupType == 1: 881 | cls = GSUBLookupType1Format2 882 | elif lookupType == 2: 883 | cls = GSUBLookupType2 884 | elif lookupType == 3: 885 | cls = GSUBLookupType3 886 | elif lookupType == 4: 887 | cls = GSUBLookupType4 888 | elif lookupType == 5: 889 | cls = (GSUBLookupType5Format1, GSUBLookupType5Format2, GSUBLookupType5Format3)[subtable.ExtSubTable.Format-1] 890 | elif lookupType == 6: 891 | cls = (GSUBLookupType6Format1, GSUBLookupType6Format2, GSUBLookupType6Format3)[subtable.ExtSubTable.Format-1] 892 | elif lookupType == 7: 893 | cls = GSUBLookupType7 894 | elif lookupType == 8: 895 | cls = GSUBLookupType8 896 | self.ExtSubTable = cls().loadFromFontTools(subtable.ExtSubTable, lookup) 897 | return self 898 | 899 | def process(self, processed, glyphRecords, featureTag): 900 | return self.ExtSubTable.process(processed, glyphRecords, featureTag) 901 | 902 | 903 | # ------------- 904 | # Lookup Type 8 905 | # ------------- 906 | 907 | 908 | class GSUBLookupType8(BaseSubTable): pass 909 | -------------------------------------------------------------------------------- /Lib/compositor/tables.py: -------------------------------------------------------------------------------- 1 | """ 2 | GSUB, GPOS and GDEF table objects. 3 | """ 4 | 5 | import unicodedata 6 | from compositor.cmap import reverseCMAP 7 | from compositor.scriptList import ScriptList 8 | from compositor.featureList import FeatureList 9 | from compositor.lookupList import GSUBLookupList, GPOSLookupList 10 | from compositor.subTablesBase import Coverage 11 | from compositor.classDefinitionTables import MarkAttachClassDef, GlyphClassDef 12 | from compositor.textUtilities import isWordBreakBefore, isWordBreakAfter 13 | 14 | 15 | defaultOnFeatures = [ 16 | # GSUB 17 | "calt", 18 | "ccmp", # this should always be the first feature processed 19 | "clig", 20 | "fina", 21 | "half", # applies only to indic 22 | "init", 23 | "isol", 24 | "liga", 25 | "locl", 26 | "med2", # applies only to syriac 27 | "medi", 28 | "nukt", # applies only to indic 29 | "pref", # applies only to khmer and myanmar 30 | "pres", # applies only to indic 31 | "pstf", # applies only to indic 32 | "psts", 33 | "rand", 34 | "rlig", # applies only to arabic and syriac 35 | "rphf", # applies only to indic 36 | "tjmo", # applies only to hangul 37 | "vatu", # applies only to indic 38 | "vjmo", # applies only to hangul 39 | # GPOS 40 | "abvm", # applies only to indic 41 | "blwm", # applies only to indic 42 | "kern", 43 | "mark", 44 | "mkmk", 45 | "opbd", 46 | "vkrn" 47 | ] 48 | 49 | 50 | class BaseTable(object): 51 | 52 | def __init__(self, reversedCMAP={}): 53 | self.ScriptList = None 54 | self.FeatureList = None 55 | self.LookupList = None 56 | 57 | self._cmap = reversedCMAP 58 | 59 | self._featureApplicationStates = {} 60 | self._applicableFeatureCache = {} 61 | self._featureTags = None 62 | 63 | def loadFromFontTools(self, table, reversedCMAP, gdef): 64 | self._cmap = reversedCMAP 65 | self.ScriptList = ScriptList().loadFromFontTools(table.table.ScriptList) 66 | self.FeatureList = FeatureList().loadFromFontTools(table.table.FeatureList) 67 | self.LookupList = self._LookupListClass().loadFromFontTools(table.table.LookupList, gdef) 68 | self.getFeatureList() 69 | self._setDefaultFeatureApplicationStates() 70 | return self 71 | 72 | def setCMAP(self, reversedCMAP): 73 | self._cmap = reversedCMAP 74 | 75 | def process(self, glyphRecords, script="latn", langSys=None, logger=None): 76 | """ 77 | Pass the list of GlyphRecord objects through the features 78 | applicable for the given script and langSys. This returns 79 | a list of processed GlyphRecord objects. 80 | """ 81 | applicableLookups = self._preprocess(script, langSys) 82 | if logger: 83 | logger.logApplicableLookups(self, applicableLookups) 84 | logger.logProcessingStart() 85 | result = self._processLookups(glyphRecords, applicableLookups, logger=logger) 86 | if logger: 87 | logger.logProcessingEnd() 88 | return result 89 | 90 | # ------------------ 91 | # feature management 92 | # ------------------ 93 | 94 | def _setDefaultFeatureApplicationStates(self): 95 | """ 96 | Activate all features defined as on by 97 | default in the Layout Tag Registry. 98 | """ 99 | for tag in self._featureTags: 100 | if tag in defaultOnFeatures: 101 | state = True 102 | else: 103 | state = False 104 | self._featureApplicationStates[tag] = state 105 | 106 | def __contains__(self, featureTag): 107 | return featureTag in self._featureTags 108 | 109 | def getScriptList(self): 110 | """ 111 | Get a list of all available scripts in the table. 112 | """ 113 | found = [] 114 | for scriptRecord in self.ScriptList.ScriptRecord: 115 | scriptTag = scriptRecord.ScriptTag 116 | if scriptTag not in found: 117 | found.append(scriptTag) 118 | return found 119 | 120 | def getLanguageList(self): 121 | """ 122 | Get a list of all available languages in the table. 123 | """ 124 | found = [] 125 | for scriptRecord in self.ScriptList.ScriptRecord: 126 | script = scriptRecord.Script 127 | if script.LangSysCount: 128 | for langSysRecord in script.LangSysRecord: 129 | langSysTag = langSysRecord.LangSysTag 130 | if langSysTag not in found: 131 | found.append(langSysTag) 132 | return found 133 | 134 | def getFeatureList(self): 135 | """ 136 | Get a list of all available features in the table. 137 | """ 138 | if self._featureTags is None: 139 | featureList = self.FeatureList 140 | featureRecords = featureList.FeatureRecord 141 | self._featureTags = [] 142 | for featureRecord in featureRecords: 143 | tag = featureRecord.FeatureTag 144 | if tag not in self._featureTags: 145 | self._featureTags.append(tag) 146 | return self._featureTags 147 | 148 | def getFeatureState(self, featureTag): 149 | """ 150 | Get a boolean representing if a feature is on or not. 151 | """ 152 | return self._featureApplicationStates[featureTag] 153 | 154 | def setFeatureState(self, featureTag, state): 155 | """ 156 | Set the application state of a feature. 157 | """ 158 | self._featureApplicationStates[featureTag] = state 159 | 160 | # ------------- 161 | # preprocessing 162 | # ------------- 163 | 164 | def _preprocess(self, script, langSys): 165 | """ 166 | Get a list of ordered (featureTag, lookupObject) 167 | for the given script and langSys. 168 | """ 169 | # 1. get a list of applicable feature records 170 | # based on the script and langSys 171 | features = self._getApplicableFeatures(script, langSys) 172 | # 2. get a list of applicable lookup tables based on the 173 | # found features and the feature application states 174 | lookupIndexes = set() 175 | for feature in features: 176 | featureTag = feature.FeatureTag 177 | if not self._featureApplicationStates[featureTag]: 178 | continue 179 | featureRecord = feature.Feature 180 | if featureRecord.LookupCount: 181 | for lookupIndex in featureRecord.LookupListIndex: 182 | lookupIndexes.add((lookupIndex, featureTag)) 183 | # 3. get a list of ordered lookup records for each feature 184 | lookupList = self.LookupList 185 | lookupRecords = lookupList.Lookup 186 | applicableLookups = [] 187 | for lookupIndex, featureTag in sorted(lookupIndexes): 188 | lookup = lookupRecords[lookupIndex] 189 | applicableLookups.append((featureTag, lookup)) 190 | return applicableLookups 191 | 192 | def _getApplicableFeatures(self, script, langSys): 193 | """ 194 | Get a list of features that apply to 195 | a particular script and langSys. Both 196 | script and langSys can be None. However, 197 | if script is None and no script record 198 | in the font is assigned to DFLT, no 199 | features wil be found. 200 | """ 201 | # first check to see if this has already been found 202 | if (script, langSys) in self._applicableFeatureCache: 203 | return self._applicableFeatureCache[script, langSys] 204 | scriptList = self.ScriptList 205 | # 1. Find the appropriate script record 206 | scriptRecords = scriptList.ScriptRecord 207 | defaultScript = None 208 | applicableScript = None 209 | for scriptRecord in scriptRecords: 210 | scriptTag = scriptRecord.ScriptTag 211 | if scriptTag == "DFLT": 212 | defaultScript = scriptRecord.Script 213 | continue 214 | if scriptTag == script: 215 | applicableScript = scriptRecord.Script 216 | break 217 | # 2. if no suitable script record was found, return an empty list 218 | if applicableScript is None: 219 | applicableScript = defaultScript 220 | if applicableScript is None: 221 | return [] 222 | # 3. get the applicable langSys records 223 | defaultLangSys = applicableScript.DefaultLangSys 224 | specificLangSys = None 225 | # if we have a langSys and the table 226 | # defines specific langSys behavior, 227 | # try to find a matching langSys record 228 | if langSys is not None and applicableScript.LangSysCount: 229 | for langSysRecord in applicableScript.LangSysRecord: 230 | langSysTag = langSysRecord.LangSysTag 231 | if langSysTag == langSys: 232 | specificLangSys = langSysRecord.LangSys 233 | break 234 | # 4. get the list of applicable features 235 | applicableFeatures = set() 236 | if specificLangSys is None: 237 | if defaultLangSys is not None: 238 | if defaultLangSys.FeatureCount: 239 | applicableFeatures |= set(defaultLangSys.FeatureIndex) 240 | if defaultLangSys.ReqFeatureIndex != 0xFFFF: 241 | applicableFeatures.add(defaultLangSys.ReqFeatureIndex) 242 | else: 243 | if specificLangSys.FeatureCount: 244 | applicableFeatures |= set(specificLangSys.FeatureIndex) 245 | if specificLangSys.ReqFeatureIndex != 0xFFFF: 246 | applicableFeatures.add(specificLangSys.ReqFeatureIndex) 247 | applicableFeatures = self._getFeatures(applicableFeatures) 248 | # store the found features for potential use by this method 249 | self._applicableFeatureCache[script, langSys] = applicableFeatures 250 | return applicableFeatures 251 | 252 | def _getFeatures(self, indices): 253 | """ 254 | Get a list of ordered features located at indices. 255 | """ 256 | featureList = self.FeatureList 257 | featureRecords = featureList.FeatureRecord 258 | features = [featureRecords[index] for index in sorted(indices)] 259 | return features 260 | 261 | def _getLookups(self, indices): 262 | """ 263 | Get a list of ordered lookups at indices 264 | """ 265 | lookupList = self.LookupList 266 | lookupRecords = lookupList.Lookup 267 | lookups = [lookupRecords[index] for index in sorted(indices)] 268 | return lookups 269 | 270 | # ---------- 271 | # processing 272 | # ---------- 273 | 274 | def _processLookups(self, glyphRecords, lookups, processingAalt=False, logger=None): 275 | aaltHolding = [] 276 | boundarySensitive = set(["init", "medi", "fina", "isol"]) 277 | for featureTag, lookup in lookups: 278 | # store aalt for processing at the end 279 | if not processingAalt and featureTag == "aalt": 280 | aaltHolding.append((featureTag, lookup)) 281 | continue 282 | if logger: 283 | logger.logLookupStart(self, featureTag, lookup) 284 | processed = [] 285 | # loop through the glyph records 286 | while glyphRecords: 287 | skip = False 288 | if featureTag in boundarySensitive: 289 | side1GlyphNames = [r.getSide1GlyphNameWithUnicodeValue(self._cmap) for r in processed] + [r.getSide1GlyphNameWithUnicodeValue(self._cmap) for r in glyphRecords] 290 | side2GlyphNames = [r.getSide2GlyphNameWithUnicodeValue(self._cmap) for r in processed] + [r.getSide2GlyphNameWithUnicodeValue(self._cmap) for r in glyphRecords] 291 | index = len(processed) 292 | wordBreakBefore = isWordBreakBefore(side1GlyphNames, index, self._cmap) 293 | wordBreakAfter = isWordBreakAfter(side2GlyphNames, index, self._cmap) 294 | if featureTag == "init": 295 | if not wordBreakBefore or wordBreakAfter: 296 | skip = True 297 | elif featureTag == "medi": 298 | if wordBreakBefore or wordBreakAfter: 299 | skip = True 300 | elif featureTag == "fina": 301 | if wordBreakBefore or not wordBreakAfter: 302 | skip = True 303 | elif featureTag == "isol": 304 | if not wordBreakBefore or not wordBreakAfter: 305 | skip = True 306 | # loop through the lookups subtables 307 | performedAction = False 308 | if not skip: 309 | processed, glyphRecords, performedAction = self._processLookup(processed, glyphRecords, lookup, featureTag, logger=logger) 310 | if not performedAction: 311 | processed.append(glyphRecords[0]) 312 | glyphRecords = glyphRecords[1:] 313 | glyphRecords = processed 314 | if logger: 315 | logger.logLookupEnd() 316 | # process aalt for the final glyph records 317 | if not processingAalt and aaltHolding: 318 | glyphRecords = self._processLookups(glyphRecords, aaltHolding, processingAalt=True, logger=logger) 319 | return glyphRecords 320 | 321 | def _processLookup(self, processed, glyphRecords, lookup, featureTag, logger=None): 322 | performedAction = False 323 | for subtable in lookup.SubTable: 324 | if logger: 325 | logger.logSubTableStart(lookup, subtable) 326 | logger.logInput(processed, glyphRecords) 327 | processed, glyphRecords, performedAction = subtable.process(processed, glyphRecords, featureTag) 328 | if logger: 329 | if performedAction: 330 | logger.logOutput(processed, glyphRecords) 331 | logger.logSubTableEnd() 332 | if performedAction: 333 | break 334 | return processed, glyphRecords, performedAction 335 | 336 | 337 | class GSUB(BaseTable): 338 | 339 | _LookupListClass = GSUBLookupList 340 | 341 | 342 | class GPOS(BaseTable): 343 | 344 | _LookupListClass = GPOSLookupList 345 | 346 | 347 | class GDEF(object): 348 | 349 | def __init__(self): 350 | self.GlyphClassDef = None 351 | self.MarkAttachClassDef = None 352 | self.AttachList = None 353 | self.LigCaretList = None 354 | 355 | def loadFromFontTools(self, table): 356 | table = table.table 357 | if table.GlyphClassDef is not None: 358 | self.GlyphClassDef = GlyphClassDef().loadFromFontTools(table.GlyphClassDef) 359 | if table.AttachList is not None: 360 | self.AttachList = AttachList().loadFromFontTools(table.AttachList) 361 | if table.LigCaretList is not None: 362 | self.LigCaretList = LigCaretList().loadFromFontTools(table.LigCaretList) 363 | if table.MarkAttachClassDef is not None: 364 | self.MarkAttachClassDef = MarkAttachClassDef().loadFromFontTools(table.MarkAttachClassDef) 365 | return self 366 | 367 | 368 | class AttachList(object): 369 | 370 | """ 371 | Deviation from spec: 372 | - GlyphCount attribute is not implemented. 373 | """ 374 | 375 | __slots__ = ["AttachPoint", "Coverage"] 376 | 377 | def __init__(self): 378 | self.Coverage = None 379 | self.AttachPoint = [] 380 | 381 | def loadFromFontTools(self, attachList): 382 | self.Coverage = Coverage().loadFromFontTools(attachList.Coverage) 383 | for attachPoint in attachList.AttachPoint: 384 | attachPoint = AttachPoint().loadFromFontTools(attachPoint) 385 | self.AttachPoint.append(attachPoint) 386 | return self 387 | 388 | 389 | class AttachPoint(object): 390 | 391 | """ 392 | Deviation from spec: 393 | - PointCount attribute is not implemented. 394 | """ 395 | 396 | __slots__ = ["PointIndex"] 397 | 398 | def __init__(self): 399 | self.PointIndex = [] 400 | 401 | def loadFromFontTools(self, attachPoint): 402 | self.PointIndex = list(attachPoint.PointIndex) 403 | return self 404 | 405 | 406 | class LigCaretList(object): 407 | 408 | """ 409 | Deviation from spec: 410 | - LigGlyphCount attribute is not implemented. 411 | """ 412 | 413 | __slots__ = ["LigGlyph", "Coverage"] 414 | 415 | def __init__(self): 416 | self.LigGlyph = [] 417 | self.Coverage = None 418 | 419 | def loadFromFontTools(self, ligCaretList): 420 | self.Coverage = Coverage().loadFromFontTools(ligCaretList.Coverage) 421 | for ligGlyph in ligCaretList.LigGlyph: 422 | ligGlyph = LigGlyph().loadFromFontTools(ligGlyph) 423 | self.LigGlyph.append(ligGlyph) 424 | return self 425 | 426 | 427 | class LigGlyph(object): 428 | 429 | """ 430 | Deviation from spec: 431 | - CaretValueCount attribute is not implemented. 432 | """ 433 | 434 | __slots__ = ["CaretValue"] 435 | 436 | def __init__(self): 437 | self.CaretValue = [] 438 | 439 | def loadFromFontTools(self, ligGlyph): 440 | for caretValue in ligGlyph.CaretValue: 441 | format = caretValue.Format 442 | if format == 1: 443 | caretValue = CaretValueFormat1().loadFromFontTools(caretValue) 444 | elif format == 2: 445 | caretValue = CaretValueFormat2().loadFromFontTools(caretValue) 446 | else: 447 | caretValue = CaretValueFormat3().loadFromFontTools(caretValue) 448 | self.CaretValue.append(caretValue) 449 | return self 450 | 451 | 452 | class CaretValueFormat1(object): 453 | 454 | __slots__ = ["CaretValueFormat", "Coordinate"] 455 | 456 | def __init__(self): 457 | self.CaretValueFormat = 1 458 | self.Coordinate = None 459 | 460 | def loadFromFontTools(self, caretValue): 461 | self.Coordinate = caretValue.Coordinate 462 | return self 463 | 464 | 465 | class CaretValueFormat2(object): 466 | 467 | __slots__ = ["CaretValueFormat", "CaretValuePoint"] 468 | 469 | def __init__(self): 470 | self.CaretValueFormat = 2 471 | self.CaretValuePoint = None 472 | 473 | def loadFromFontTools(self, caretValue): 474 | self.CaretValuePoint = caretValue.CaretValuePoint 475 | return self 476 | 477 | 478 | class CaretValueFormat3(CaretValueFormat1): 479 | 480 | """ 481 | Deviation from spec: 482 | - DeviceTable attribute is not implemented. 483 | """ 484 | 485 | __slots__ = ["CaretValueFormat", "Coordinate", "DeviceTable"] 486 | 487 | def __init__(self): 488 | super(CaretValueFormat3, self).__init__() 489 | self.DeviceTable = None 490 | -------------------------------------------------------------------------------- /Lib/compositor/textUtilities.py: -------------------------------------------------------------------------------- 1 | 2 | import unicodedata 3 | from compositor.cmap import reverseCMAP 4 | from compositor.caseConversionMaps import lowerToSingleUpper, upperToSingleLower, specialCasing, softDotted 5 | from compositor.wordBreakProperties import wordBreakProperties 6 | 7 | # --------------- 8 | # Case Conversion 9 | # --------------- 10 | 11 | def convertCase(case, glyphNames, cmap, reversedCMAP, language=None, fallbackGlyph=".notdef"): 12 | """ 13 | Case Conversion Function 14 | 15 | This function converts a list of glyph names to their 16 | upper or lowercase forms following the Unicode locale 17 | specific case conversion rules. 18 | 19 | Arguments: 20 | - case 21 | The case to convert to. Valid values are "upper" and "lower". 22 | - glyphNames 23 | A list of glyph names. 24 | - cmap 25 | The CMAP for the font formatted as a dictionary. 26 | - reversedCMAP 27 | Reversed version of cmap. 28 | - language 29 | The language tag being processed. May be None. 30 | - fallbackGlyph 31 | The glyph name that should be used when the converted 32 | glyph does not exist in the font. 33 | """ 34 | # before anything else happens, the glyph names 35 | # have to be converted to unicode values. if no 36 | # unicode value is available, the glyph name is used. 37 | glyphs = [] 38 | for glyphName in glyphNames: 39 | uniValue = reversedCMAP.get(glyphName) 40 | if uniValue is None: 41 | glyphs.append(glyphName) 42 | else: 43 | glyphs.append(uniValue[0]) 44 | converted = [] 45 | for index, uniValue in enumerate(glyphs): 46 | # glyph name indicating that there is no available unicode 47 | if isinstance(uniValue, str): 48 | converted.append(uniValue) 49 | continue 50 | ## special casing 51 | # specific language 52 | if language is not None: 53 | madeChange = _handleSpecialCasing(case, glyphs, index, uniValue, converted, cmap, reversedCMAP, language) 54 | if madeChange: 55 | continue 56 | # no specific language required 57 | madeChange = _handleSpecialCasing(case, glyphs, index, uniValue, converted, cmap, reversedCMAP, None) 58 | if madeChange: 59 | continue 60 | ## single casing 61 | if case == "upper": 62 | d = lowerToSingleUpper 63 | else: 64 | d = upperToSingleLower 65 | if uniValue in d: 66 | converted.append(d[uniValue]) 67 | continue 68 | ## fallback 69 | converted.append(uniValue) 70 | # convert back to glyph names 71 | glyphNames = [] 72 | for uniValue in converted: 73 | if isinstance(uniValue, str): 74 | glyphNames.append(uniValue) 75 | continue 76 | glyphNames.append(cmap.get(uniValue, fallbackGlyph)) 77 | return glyphNames 78 | 79 | def convertCodeToInt(code): 80 | if not code: 81 | return None 82 | if " " in code: 83 | return tuple([convertCodeToInt(i) for i in code.split(" ")]) 84 | return int(code, 16) 85 | 86 | def _handleSpecialCasing(case, glyphs, index, uniValue, converted, cmap, reversedCMAP, language): 87 | """ 88 | Handle a language specific lookup. 89 | Returns a boolean indicating if a change was made. 90 | """ 91 | if language not in specialCasing: 92 | return False 93 | languageMap = specialCasing[language] 94 | if uniValue in languageMap: 95 | contextMatch = True 96 | context = languageMap[uniValue]["context"] 97 | if context: 98 | contextMatch = False 99 | ## After_I 100 | # The last preceding base character was 101 | # an uppercase I, and there is no inter- 102 | # vening combining character class 230. 103 | if context == "After_I": 104 | previous = None 105 | for otherUniValue in reversed(glyphs[:index]): 106 | previous = otherUniValue 107 | if isinstance(otherUniValue, str): 108 | break 109 | combining = unicodedata.combining(chr(otherUniValue)) 110 | if combining == 230: 111 | previous = None 112 | break 113 | if combining == 0: 114 | break 115 | if previous == convertCodeToInt("0049"): 116 | contextMatch = True 117 | elif context == "Not_After_I": 118 | # not referenced in SpecialCasing 119 | raise NotImplementedError 120 | ## After_Soft_Dotted 121 | # The last preceding character with a 122 | # combining class of zero before C was 123 | # Soft_Dotted, and there is no interven- 124 | # ing combining character class 230 125 | elif context == "After_Soft_Dotted": 126 | previous = None 127 | for otherUniValue in reversed(glyphs[:index]): 128 | previous = otherUniValue 129 | if isinstance(otherUniValue, str): 130 | break 131 | combining = unicodedata.combining(chr(otherUniValue)) 132 | if combining == 230: 133 | previous = None 134 | break 135 | if combining == 0: 136 | break 137 | if previous in softDotted: 138 | contextMatch = True 139 | elif context == "Not_After_Soft_Dotted": 140 | # not referenced in SpecialCasing 141 | raise NotImplementedError 142 | ## More_Above 143 | # C is followed by one or more charac- 144 | # ters of combining class 230 (ABOVE) 145 | # in the combining character sequence. 146 | elif context == "More_Above": 147 | next = None 148 | for otherUniValue in glyphs[index+1:]: 149 | next = otherUniValue 150 | if isinstance(otherUniValue, str): 151 | break 152 | combining = unicodedata.combining(chr(otherUniValue)) 153 | if combining == 230: 154 | contextMatch = True 155 | break 156 | else: 157 | break 158 | elif context == "Not_More_Above": 159 | # not referenced in SpecialCasing 160 | raise NotImplementedError 161 | ## Before_Dot 162 | # C is followed by U+0307 combining 163 | # dot above. Any sequence of charac- 164 | # ters with a combining class that is nei- 165 | # ther 0 nor 230 may intervene between 166 | # the current character and the com- 167 | # bining dot above. 168 | elif context == "Before_Dot": 169 | # not referenced in SpecialCasing 170 | raise NotImplementedError 171 | elif context == "Not_Before_Dot": 172 | next = None 173 | contextMatch = True 174 | for otherUniValue in glyphs[index+1:]: 175 | if isinstance(otherUniValue, str): 176 | break 177 | if otherUniValue == convertCodeToInt("0307"): 178 | contextMatch = False 179 | break 180 | else: 181 | combining = unicodedata.combining(chr(otherUniValue)) 182 | if combining == 0 or combining == 230: 183 | break 184 | ## Final_Sigma 185 | # Within the closest word boundaries 186 | # containing C, there is a cased letter 187 | # before C, and there is no cased letter 188 | # after C. 189 | elif context == "Final_Sigma": 190 | glyphNames = [cmap.get(i, i) for i in glyphs] 191 | if isWordBreakAfter(glyphNames, index, reversedCMAP): 192 | contextMatch = True 193 | ## Unknown 194 | else: 195 | raise NotImplementedError(context) 196 | if contextMatch: 197 | conversion = languageMap[uniValue][case] 198 | # if the conversion is None, it means that the character should be removed. 199 | if conversion is None: 200 | return True 201 | # apply the conversion to the list of converted characters. 202 | if not isinstance(conversion, tuple): 203 | conversion = [conversion] 204 | for code in conversion: 205 | converted.append(code) 206 | return True 207 | return False 208 | 209 | # ----------------------- 210 | # Word Boundary Detection 211 | # ----------------------- 212 | # This implements the default word boundary algorithm explained here: 213 | # http://www.unicode.org/reports/tr29/tr29-11.html#Default_Word_Boundaries 214 | 215 | _notBreakBefore = set([ 216 | # Do not break within CRLF 217 | (convertCodeToInt("240D"), convertCodeToInt("240A")), 218 | # Do not break between most letters. 219 | ("ALetter", "ALetter"), 220 | # Do not break across certain punctuation. 221 | ("ALetter", "MidLetter", "ALetter"), 222 | # Do not break within sequences of digits, or digits adjacent to letters. 223 | ("Numeric", "Numeric"), 224 | ("Numeric", "ALetter"), 225 | ("ALetter", "Numeric"), 226 | # Do not break within sequences, such as "3.2" or "3,456.789". 227 | ("Numeric", "MidNum", "Numeric"), 228 | # Do not break between Katakana. 229 | ("Katakana", "Katakana"), 230 | # Do not break from extenders. 231 | ("ALetter", "ExtendNumLet"), 232 | ("Numeric", "ExtendNumLet"), 233 | ("Katakana", "ExtendNumLet"), 234 | ("ExtendNumLet", "ExtendNumLet"), 235 | ]) 236 | 237 | def isWordBreakBefore(glyphNames, index, reversedCMAP): 238 | """ 239 | Returns a boolean declaring if the position 240 | before index can be considered a word break. 241 | """ 242 | # Start of line 243 | if index == 0: 244 | return True 245 | # get the unicode values and word break properties 246 | # for the previous two, current and next glyphs. 247 | unicodeValue = reversedCMAP.get(glyphNames[index], [None])[0] 248 | wordBreakProperty = wordBreakProperties.get(unicodeValue) 249 | backOneUnicodeValue = reversedCMAP.get(glyphNames[index - 1], [None])[0] 250 | backOneWordBreakProperty = wordBreakProperties.get(backOneUnicodeValue) 251 | if index > 1: 252 | backTwoUnicodeValue = reversedCMAP.get(glyphNames[index - 2], [None])[0] 253 | backTwoWordBreakProperty = wordBreakProperties.get(backTwoUnicodeValue) 254 | else: 255 | backTwoUnicodeValue = False 256 | backTwoWordBreakProperty = False 257 | if index < len(glyphNames) - 1: 258 | forwardOneUnicodeValue = reversedCMAP.get(glyphNames[index + 1], [None])[0] 259 | forwardOneWordBreakProperty = wordBreakProperties.get(forwardOneUnicodeValue) 260 | else: 261 | forwardOneUnicodeValue = None 262 | forwardOneWordBreakProperty = None 263 | # test the previous and current unicode values 264 | if (backOneUnicodeValue, unicodeValue) in _notBreakBefore: 265 | return False 266 | # test the previous and current word break properties 267 | if (backOneWordBreakProperty, wordBreakProperty) in _notBreakBefore: 268 | return False 269 | # test the previous, current and next word break properties 270 | if (backOneWordBreakProperty, wordBreakProperty, forwardOneWordBreakProperty) in _notBreakBefore: 271 | return False 272 | # test the previous, current and next word break properties 273 | if (backTwoWordBreakProperty, backOneWordBreakProperty, wordBreakProperty) in _notBreakBefore: 274 | return False 275 | # Otherwise, break everywhere (including around ideographs). 276 | return True 277 | 278 | _notBreakAfter = set([ 279 | # Do not break within CRLF 280 | (convertCodeToInt("240D"), convertCodeToInt("240A")), 281 | # Do not break between most letters. 282 | ("ALetter", "ALetter"), 283 | # Do not break across certain punctuation. 284 | ("ALetter", "MidLetter", "ALetter"), 285 | # Do not break within sequences of digits, or digits adjacent to letters. 286 | ("Numeric", "Numeric"), 287 | ("Numeric", "ALetter"), 288 | ("ALetter", "Numeric"), 289 | # Do not break within sequences, such as "3.2" or "3,456.789". 290 | ("Numeric", "MidNum", "Numeric"), 291 | # Do not break between Katakana. 292 | ("Katakana", "Katakana"), 293 | # Do not break from extenders. 294 | ("ExtendNumLet", "ALetter"), 295 | ("ExtendNumLet", "Numeric"), 296 | ("ExtendNumLet", "Katakana"), 297 | ]) 298 | 299 | def isWordBreakAfter(glyphNames, index, reversedCMAP): 300 | """ 301 | Returns a boolean declaring if the position 302 | after index can be considered a word break. 303 | """ 304 | # End of line 305 | if index == len(glyphNames) - 1: 306 | return True 307 | # get the unicode values and word break properties 308 | # for the previous, current and next two glyphs. 309 | unicodeValue = reversedCMAP.get(glyphNames[index], [None])[0] 310 | wordBreakProperty = wordBreakProperties.get(unicodeValue) 311 | forwardOneUnicodeValue = reversedCMAP.get(glyphNames[index + 1], [None])[0] 312 | forwardOneWordBreakProperty = wordBreakProperties.get(forwardOneUnicodeValue) 313 | if index > 0: 314 | backOneUnicodeValue = reversedCMAP.get(glyphNames[index - 1], [None])[0] 315 | backOneWordBreakProperty = wordBreakProperties.get(backOneUnicodeValue) 316 | else: 317 | backOneUnicodeValue = None 318 | backOneWordBreakProperty = None 319 | if index < len(glyphNames) - 2: 320 | forwardTwoUnicodeValue = reversedCMAP.get(glyphNames[index + 2], [None])[0] 321 | forwardTwoWordBreakProperty = wordBreakProperties.get(forwardTwoUnicodeValue) 322 | else: 323 | forwardTwoUnicodeValue = None 324 | forwardTwoWordBreakProperty = None 325 | # test the current and next unicode values 326 | if (unicodeValue, forwardOneUnicodeValue) in _notBreakAfter: 327 | return False 328 | # test the current and next word break properties 329 | if (wordBreakProperty, forwardOneWordBreakProperty) in _notBreakAfter: 330 | return False 331 | # test the previous, current and next word break properties 332 | if (backOneWordBreakProperty, wordBreakProperty, forwardOneWordBreakProperty) in _notBreakAfter: 333 | return False 334 | # test the current and next two word break properties 335 | if (wordBreakProperty, forwardOneWordBreakProperty, forwardTwoWordBreakProperty) in _notBreakAfter: 336 | return False 337 | # Otherwise, break everywhere (including around ideographs). 338 | return True 339 | 340 | # ----- 341 | # Tests 342 | # ----- 343 | 344 | # Case Conversion 345 | 346 | def testCaseConversionSimple(): 347 | """ 348 | >>> cmap = {convertCodeToInt("0041") : "A", 349 | ... convertCodeToInt("0061") : "a" 350 | ... } 351 | >>> convertCase("upper", ["a", "a.alt"], cmap, reverseCMAP(cmap), None) 352 | ['A', 'a.alt'] 353 | """ 354 | 355 | def testCaseConversionSimpleMissing(): 356 | """ 357 | >>> cmap = {convertCodeToInt("0061") : "a"} 358 | >>> convertCase("upper", ["a"], cmap, reverseCMAP(cmap), None) 359 | ['.notdef'] 360 | """ 361 | 362 | def testCaseConversionLowerAfterI(): 363 | """ 364 | >>> cmap = {convertCodeToInt("0049") : "I", 365 | ... convertCodeToInt("0069") : "i", 366 | ... convertCodeToInt("0307") : "dotabove", 367 | ... convertCodeToInt("0300") : "grave" 368 | ... } 369 | >>> convertCase("lower", ["I", "dotabove"], cmap, reverseCMAP(cmap), "TRK") 370 | ['i'] 371 | """ 372 | 373 | def testCaseConversionUpperAfterSoftDotted(): 374 | """ 375 | >>> cmap = {convertCodeToInt("0049") : "I", 376 | ... convertCodeToInt("0069") : "i", 377 | ... convertCodeToInt("0307") : "dotabove", 378 | ... convertCodeToInt("0300") : "grave" 379 | ... } 380 | >>> convertCase("upper", ["i", "dotabove"], cmap, reverseCMAP(cmap), "LTH") 381 | ['I'] 382 | >>> convertCase("upper", ["i", "grave", "dotabove"], cmap, reverseCMAP(cmap), "LTH") 383 | ['I', 'grave', 'dotabove'] 384 | """ 385 | 386 | def testCaseConversionLowerMoreAbove(): 387 | """ 388 | >>> cmap = {convertCodeToInt("0049") : "I", 389 | ... convertCodeToInt("0069") : "i", 390 | ... convertCodeToInt("0307") : "dotabove", 391 | ... convertCodeToInt("0300") : "grave" 392 | ... } 393 | >>> convertCase("lower", ["I", "grave"], cmap, reverseCMAP(cmap), "LTH") 394 | ['i', 'dotabove', 'grave'] 395 | >>> convertCase("lower", ["I", "I", "grave"], cmap, reverseCMAP(cmap), "LTH") 396 | ['i', 'i', 'dotabove', 'grave'] 397 | >>> convertCase("lower", ["I", "I"], cmap, reverseCMAP(cmap), "LTH") 398 | ['i', 'i'] 399 | """ 400 | 401 | def testCaseConversionLowerNotBeforeDot(): 402 | """ 403 | >>> cmap = {convertCodeToInt("0049") : "I", 404 | ... convertCodeToInt("0069") : "i", 405 | ... convertCodeToInt("0307") : "dotabove", 406 | ... convertCodeToInt("0131") : "dotlessi", 407 | ... convertCodeToInt("0327") : "cedilla" 408 | ... } 409 | >>> convertCase("lower", ["I"], cmap, reverseCMAP(cmap), "TRK") 410 | ['dotlessi'] 411 | >>> convertCase("lower", ["I", "dotabove"], cmap, reverseCMAP(cmap), "TRK") 412 | ['i'] 413 | >>> convertCase("lower", ["I", "cedilla", "dotabove"], cmap, reverseCMAP(cmap), "TRK") 414 | ['i', 'cedilla'] 415 | """ 416 | 417 | def testCaseConversionFinalSigma(): 418 | """ 419 | >>> cmap = {convertCodeToInt("03A3") : "Sigma", 420 | ... convertCodeToInt("03C3") : "sigma", 421 | ... convertCodeToInt("03C2") : "finalsigma", 422 | ... convertCodeToInt("0020") : "space", 423 | ... } 424 | >>> convertCase("lower", ["Sigma", "Sigma"], cmap, reverseCMAP(cmap)) 425 | ['sigma', 'finalsigma'] 426 | >>> convertCase("lower", ["Sigma", "Sigma", "space"], cmap, reverseCMAP(cmap)) 427 | ['sigma', 'finalsigma', 'space'] 428 | """ 429 | 430 | # Word Boundaries 431 | 432 | def testBreakBefore(): 433 | """ 434 | >>> cmap = {convertCodeToInt("0020") : "space", 435 | ... convertCodeToInt("0041") : "A", 436 | ... convertCodeToInt("002E") : "period", 437 | ... convertCodeToInt("003A") : "colon", 438 | ... convertCodeToInt("005F") : "underscore", 439 | ... convertCodeToInt("0031") : "one", 440 | ... convertCodeToInt("31F0") : "ku", 441 | ... } 442 | >>> cmap = reverseCMAP(cmap) 443 | 444 | # Start of line 445 | >>> isWordBreakBefore(["A", "A"], 0, cmap) 446 | True 447 | 448 | # ALetter, ALetter 449 | >>> isWordBreakBefore(["space", "A", "A"], 1, cmap) 450 | True 451 | >>> isWordBreakBefore(["space", "A", "A"], 2, cmap) 452 | False 453 | 454 | # ALetter, MidLetter, ALetter 455 | >>> isWordBreakBefore(["A", "colon", "A"], 1, cmap) 456 | False 457 | >>> isWordBreakBefore(["A", "colon", "A"], 2, cmap) 458 | False 459 | >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 1, cmap) 460 | False 461 | >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 2, cmap) 462 | False 463 | >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 3, cmap) 464 | False 465 | >>> isWordBreakBefore(["A", "colon", "A", "colon", "A"], 4, cmap) 466 | False 467 | 468 | # Numeric, Numeric 469 | >>> isWordBreakBefore(["space", "one", "one"], 1, cmap) 470 | True 471 | >>> isWordBreakBefore(["space", "one", "one"], 2, cmap) 472 | False 473 | 474 | # ALetter, Numeric 475 | >>> isWordBreakBefore(["space", "A", "one"], 1, cmap) 476 | True 477 | >>> isWordBreakBefore(["space", "A", "one"], 2, cmap) 478 | False 479 | 480 | # Numeric, ALetter 481 | >>> isWordBreakBefore(["space", "one", "A"], 1, cmap) 482 | True 483 | >>> isWordBreakBefore(["space", "one", "A"], 2, cmap) 484 | False 485 | 486 | # Numeric, MidNum, Numeric 487 | >>> isWordBreakBefore(["one", "period", "one"], 1, cmap) 488 | False 489 | >>> isWordBreakBefore(["one", "period", "one"], 2, cmap) 490 | False 491 | 492 | # Katakana, Katakana 493 | >>> isWordBreakBefore(["space", "ku", "ku"], 1, cmap) 494 | True 495 | >>> isWordBreakBefore(["space", "ku", "ku"], 2, cmap) 496 | False 497 | 498 | # ALetter, ExtendNumLet 499 | >>> isWordBreakBefore(["A", "underscore"], 1, cmap) 500 | False 501 | 502 | # Numeric, ExtendNumLet 503 | >>> isWordBreakBefore(["one", "underscore"], 1, cmap) 504 | False 505 | 506 | # Katakana, ExtendNumLet 507 | >>> isWordBreakBefore(["ku", "underscore"], 1, cmap) 508 | False 509 | 510 | # ExtendNumLet, ExtendNumLet 511 | >>> isWordBreakBefore(["underscore", "underscore"], 1, cmap) 512 | False 513 | """ 514 | 515 | def testBreakAfter(): 516 | """ 517 | >>> cmap = {convertCodeToInt("0020") : "space", 518 | ... convertCodeToInt("0041") : "A", 519 | ... convertCodeToInt("002E") : "period", 520 | ... convertCodeToInt("003A") : "colon", 521 | ... convertCodeToInt("005F") : "underscore", 522 | ... convertCodeToInt("0031") : "one", 523 | ... convertCodeToInt("31F0") : "ku", 524 | ... } 525 | >>> cmap = reverseCMAP(cmap) 526 | 527 | # End of line 528 | >>> isWordBreakAfter(["A", "A"], 1, cmap) 529 | True 530 | 531 | # ALetter, ALetter 532 | >>> isWordBreakAfter(["A", "A", "space"], 0, cmap) 533 | False 534 | >>> isWordBreakAfter(["A", "A", "space"], 1, cmap) 535 | True 536 | 537 | # ALetter, MidLetter, ALetter 538 | >>> isWordBreakAfter(["A", "colon", "A"], 0, cmap) 539 | False 540 | >>> isWordBreakAfter(["A", "colon", "A"], 1, cmap) 541 | False 542 | >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 0, cmap) 543 | False 544 | >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 1, cmap) 545 | False 546 | >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 2, cmap) 547 | False 548 | >>> isWordBreakAfter(["A", "colon", "A", "colon", "A"], 3, cmap) 549 | False 550 | 551 | # Numeric, Numeric 552 | >>> isWordBreakAfter(["one", "one", "space"], 0, cmap) 553 | False 554 | >>> isWordBreakAfter(["one", "one", "space"], 1, cmap) 555 | True 556 | 557 | # ALetter, Numeric 558 | >>> isWordBreakAfter(["A", "one", "space"], 0, cmap) 559 | False 560 | >>> isWordBreakAfter(["A", "one", "space"], 1, cmap) 561 | True 562 | 563 | # Numeric, ALetter 564 | >>> isWordBreakAfter(["one", "A", "space"], 0, cmap) 565 | False 566 | >>> isWordBreakAfter(["one", "A", "space"], 1, cmap) 567 | True 568 | 569 | # Numeric, MidNum, Numeric 570 | >>> isWordBreakAfter(["one", "period", "one"], 0, cmap) 571 | False 572 | >>> isWordBreakAfter(["one", "period", "one"], 1, cmap) 573 | False 574 | >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 0, cmap) 575 | False 576 | >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 1, cmap) 577 | False 578 | >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 2, cmap) 579 | False 580 | >>> isWordBreakAfter(["one", "period", "one", "period", "one"], 3, cmap) 581 | False 582 | 583 | # Katakana, Katakana 584 | >>> isWordBreakAfter(["ku", "ku", "space"], 0, cmap) 585 | False 586 | >>> isWordBreakAfter(["ku", "ku", "space"], 1, cmap) 587 | True 588 | 589 | # ALetter, ExtendNumLet 590 | >>> isWordBreakAfter(["underscore", "A"], 0, cmap) 591 | False 592 | 593 | # Numeric, ExtendNumLet 594 | >>> isWordBreakAfter(["underscore", "one"], 0, cmap) 595 | False 596 | 597 | # Katakana, ExtendNumLet 598 | >>> isWordBreakAfter(["underscore", "ku"], 0, cmap) 599 | False 600 | """ 601 | 602 | if __name__ == "__main__": 603 | import doctest 604 | doctest.testmod() 605 | -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2006-2009 Type Supply LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include License.txt 2 | include Install.txt 3 | include demo.py 4 | include demo.html 5 | include development.txt 6 | include usage.html 7 | include usage.txt 8 | prune Lib/compositor/test 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | compositor 2 | ========== 3 | 4 | A basic OpenType GSUB and GPOS layout engine written in Python. 5 | 6 | 7 | Table of Contents 8 | ----------------- 9 | 10 | - [Usage Reference](#usage-reference) 11 | - [Assumptions](#assumptions) 12 | - [The Font Object](#the-font-object) 13 | - [The GlyphRecord Object](#the-glyphrecord-object) 14 | - [The Glyph Object](#the-glyph-object) 15 | - [The Info Object](#the-info-object) 16 | - [Development](#development) 17 | - [Installation](#installation) 18 | 19 | 20 | - - - 21 | 22 | 23 | Usage Reference 24 | --------------- 25 | 26 | This document covers the basic usage of the compositor package. For more detailed information read the documentation strings in the source. 27 | 28 | ### Assumptions 29 | 30 | Some assumptions about the OpenType fonts being used are made by the package: 31 | 32 | * The font is valid. 33 | * The font's `cmap` table contains Platform 3 Encoding 1. 34 | * The font does not contain `GSUB` or `GPOS` lookup types that are not supported by the GSUB or GPOS objects. If an unsupported lookup type is present, the lookup will simply be ignored. It will not raise an error. 35 | 36 | ### The Font Object 37 | 38 | #### Importing 39 | 40 | ```python 41 | from compositor import Font 42 | ``` 43 | 44 | #### Construction 45 | 46 | ```python 47 | font = Font(path) 48 | ``` 49 | 50 |
51 |
path 52 |
A path to an OpenType font. 53 |
54 | 55 | #### Special Behavior 56 | 57 | ```python 58 | glyph = font["aGlyphName"] 59 | ``` 60 | 61 | Returns the glyph object named `aGlyphName`. This will raise a `KeyError` if `aGlyphName` is not in the font. 62 | 63 | ```python 64 | isThere = "aGlyphName" in font 65 | ``` 66 | 67 | Returns a boolean representing if `aGlyphName` is in the font. 68 | 69 | #### Methods 70 | 71 | ```python 72 | font.keys() 73 | ``` 74 | 75 | A list of all glyph names in the font. 76 | 77 | ```python 78 | glyphRecords = font.process(aString) 79 | ``` 80 | 81 | This is the most important method. It takes a string (Unicode or plain ASCII) and processes it with the features defined in the font's `GSUB` and `GPOS` tables. A list of `GlyphRecord` objects will be returned. 82 | 83 | ```python 84 | featureTags = font.getFeatureList() 85 | ``` 86 | 87 | A list of all available features in GSUB and GPOS. 88 | 89 | ```python 90 | state = font.getFeatureState(featureTag) 91 | ``` 92 | 93 | Get a boolean representing if a feature is on or not. This assumes that the feature state is consistent in both the GSUB and GPOS tables. A `CompositorError` will be raised if the feature is inconsistently applied. A `CompositorError` will be raised if featureTag is not defined in GSUB or GPOS. 94 | 95 | ```python 96 | font.setFeatureState(self, featureTag, state) 97 | ``` 98 | 99 | Set the application state of a feature. 100 | 101 | #### Attributes 102 | 103 |
104 |
info 105 |
The Info object for the font. 106 |
107 | 108 | ### The GlyphRecord Object 109 | 110 | #### Attributes 111 | 112 |
113 | 114 |
glyphName 115 |
The name of the referenced glyph. 116 | 117 |
xPlacement 118 |
Horizontal placement. 119 | 120 |
yPlacement 121 |
Vertical placement. 122 | 123 |
xAdvance 124 |
Horizontal adjustment for advance. 125 | 126 |
yAdvance 127 |
Vertical adjustment for advance. 128 | 129 |
alternates 130 |
A list of `GlyphRecords` indicating alternates for the glyph. 131 | 132 |
133 | 134 | ### The Glyph Object 135 | 136 | #### Methods 137 | 138 | ```python 139 | glyph.draw(pen) 140 | ``` 141 | 142 | Draws the glyph with a FontTools pen. 143 | 144 | #### Attributes 145 | 146 |
147 | 148 |
name 149 |
The name of the glyph. 150 | 151 |
index 152 |
The glyph's index within the source font. 153 | 154 |
width 155 |
The width of the glyph. 156 | 157 |
bounds 158 |
The bounding box for the glyph. Formatted as `(xMin, yMin, xMax, yMax)`. If the glyph contains no outlines, this will return `None`. 159 | 160 |
161 | 162 | ### The Info Object 163 | 164 | #### Attributes 165 | 166 | - familyName 167 | - styleName 168 | - unitsPerEm 169 | - ascender 170 | - descender 171 | 172 | 173 | Development 174 | ----------- 175 | 176 | ### Relationship to the GSUB and GPOS Specification 177 | 178 | The Compositor GSUB and GPOS tables adhere as closely as possible to the GSUB and GPOS specification. Every effort has been made to keep terminology consistent. All known deviations from the spec are documented. (The deviations are generally trivial. For example, most the of the subtables don't implement the `Count` attributes. This is done because the Python iterator provides a more convenient and faster way to deal with iteration than creating a range. Therefore, the `Count` objects are not needed.) 179 | 180 | ### Object Loading 181 | 182 | For performance reasons, when a new font is loaded, all of the GSUB and GPOS data is extracted from the font with fontTools. The data is placed into compositor objects. These objects are then used to process text. This initial loading can be relatively expensive, but the processing speed of the objects is worth the initial expense. 183 | 184 | 185 | Installation 186 | ------------ 187 | 188 | To install this package, type the following in the command line: 189 | 190 | ``` 191 | python setup.py install 192 | ``` 193 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | from AppKit import * 2 | from fontTools.pens.cocoaPen import CocoaPen 3 | from compositor import Font 4 | 5 | # a simple function that implements path caching 6 | def getCachedNSBezierPath(glyph, font): 7 | if not hasattr(glyph, "nsBezierPath"): 8 | pen = CocoaPen(font) 9 | glyph.draw(pen) 10 | glyph.nsBezierPath = pen.path 11 | return glyph.nsBezierPath 12 | 13 | # a path to a font 14 | fontPath = aPathToYourFont 15 | 16 | # a path to save the image to 17 | imagePath = "demo.tiff" 18 | 19 | # setup the layout engine 20 | font = Font(fontPath) 21 | 22 | # turn the aalt feature on so that we get any alternates 23 | font.setFeatureState("aalt", True) 24 | 25 | # process some text 26 | glyphRecords = font.process(u"HERE IS SOME TEXT!") 27 | 28 | # calculate the image size 29 | pointSize = 50.0 30 | offset = 20 31 | scale = pointSize / font.info.unitsPerEm 32 | imageWidth = sum([font[record.glyphName].width + record.xAdvance for record in glyphRecords]) * scale 33 | imageWidth = int(round(imageWidth)) 34 | imageWidth += offset * 2 35 | imageHeight = pointSize + (offset * 2) 36 | 37 | # setup the image 38 | image = NSImage.alloc().initWithSize_((imageWidth, imageHeight)) 39 | image.lockFocus() 40 | # fill it with white 41 | NSColor.whiteColor().set() 42 | NSRectFill(((0, 0), (imageWidth, imageHeight))) 43 | # offset and set the scale 44 | transform = NSAffineTransform.transform() 45 | transform.translateXBy_yBy_(offset, offset) 46 | transform.scaleBy_(scale) 47 | transform.translateXBy_yBy_(0, abs(font.info.descender)) 48 | transform.concat() 49 | # iterate over the glyph records 50 | for record in glyphRecords: 51 | glyph = font[record.glyphName] 52 | # shift for x and y placement 53 | transform = NSAffineTransform.transform() 54 | transform.translateXBy_yBy_(record.xPlacement, record.yPlacement) 55 | transform.concat() 56 | # if alternates are present, switch the color 57 | if record.alternates: 58 | NSColor.redColor().set() 59 | # otherwise, set the color to black 60 | else: 61 | NSColor.blackColor().set() 62 | # get a NSBezierPath for the glyph and fill it 63 | path = getCachedNSBezierPath(glyph, font) 64 | path.fill() 65 | # shift for the next glyph 66 | transform = NSAffineTransform.transform() 67 | transform.translateXBy_yBy_(record.xAdvance + glyph.width, -record.yPlacement) 68 | transform.concat() 69 | # release the image 70 | image.unlockFocus() 71 | # write the image to disk 72 | tiff = image.TIFFRepresentation() 73 | tiff.writeToFile_atomically_(imagePath, False) 74 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | from setuptools import setup 5 | 6 | try: 7 | import fontTools 8 | except: 9 | print("*** Warning: defcon requires FontTools, see:") 10 | print(" https://github.com/fonttools/fonttools") 11 | 12 | 13 | 14 | setup( 15 | name="compositor", 16 | version="0.2b", 17 | description="A simple OpenType GSUB and GPOS engine.", 18 | author="Tal Leming", 19 | author_email="tal@typesupply.com", 20 | maintainer="Just van Rossum, Frederik Berlaen", 21 | maintainer_email="justvanrossum@gmail.com", 22 | url="https://github.com/robotools/compositor", 23 | license="MIT", 24 | packages=["compositor"], 25 | package_dir={"":"Lib"} 26 | ) 27 | -------------------------------------------------------------------------------- /todo.txt: -------------------------------------------------------------------------------- 1 | - the GlyphRecord object is getting messy. Look into 2 | consolidating all the various private reference attributes. 3 | - the various private contextual attrs should be moved to properties. 4 | - make the various Count attributes properties. 5 | 6 | 7 | Not yet supported: 8 | GSUB Lookup Type 8 (Reverse Chaining Context) 9 | LookupFlag.RightToLeft 10 | 11 | Skipped: 12 | - should the glyph width be calculated into the glyph record's 13 | x-advance before returning it to the caller? 14 | - no. that is the responsibility of a render. 15 | - need to handle featureRecord.FeatureParams if they exist (fontTools does not read these) 16 | - make rand feature support smarter. look back through the 17 | processed glyphs and find the alternate that has occurred 18 | least frequently and farthest away. (this was skipped because 19 | the rand feature is not such a good idea to begin with) 20 | -------------------------------------------------------------------------------- /tools/SpecialCasing.txt: -------------------------------------------------------------------------------- 1 | # SpecialCasing-5.0.0.txt 2 | # Date: 2006-03-03, 08:23:36 GMT [MD] 3 | # 4 | # Unicode Character Database 5 | # Copyright (c) 1991-2006 Unicode, Inc. 6 | # For terms of use, see http://www.unicode.org/terms_of_use.html 7 | # For documentation, see UCD.html 8 | # 9 | # Special Casing Properties 10 | # 11 | # This file is a supplement to the UnicodeData file. 12 | # It contains additional information about the casing of Unicode characters. 13 | # (For compatibility, the UnicodeData.txt file only contains case mappings for 14 | # characters where they are 1-1, and does not have locale-specific mappings.) 15 | # For more information, see the discussion of Case Mappings in the Unicode Standard. 16 | # 17 | # All code points not listed in this file that do not have a simple case mappings 18 | # in UnicodeData.txt map to themselves. 19 | # ================================================================================ 20 | # Format 21 | # ================================================================================ 22 | # The entries in this file are in the following machine-readable format: 23 | # 24 | # ; ; ; <upper> ; (<condition_list> ;)? # <comment> 25 | # 26 | # <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more 27 | # than one character, they are separated by spaces. Other than as used to separate 28 | # elements, spaces are to be ignored. 29 | # 30 | # The <condition_list> is optional. Where present, it consists of one or more locale IDs 31 | # or contexts, separated by spaces. In these conditions: 32 | # - A condition list overrides the normal behavior if all of the listed conditions are true. 33 | # - The context is always the context of the characters in the original string, 34 | # NOT in the resulting string. 35 | # - Case distinctions in the condition list are not significant. 36 | # - Conditions preceded by "Not_" represent the negation of the condition. 37 | # 38 | # A locale ID is defined by taking any language tag as defined by 39 | # RFC 3066 (or its successor), and replacing '-' by '_'. 40 | # 41 | # A context for a character C is defined by Section 3.13 Default Case 42 | # Operations, of The Unicode Standard, Version 5.0. 43 | # (This is identical to the context defined by Unicode 4.1.0, 44 | # as specified in http://www.unicode.org/versions/Unicode4.1.0/) 45 | # 46 | # Parsers of this file must be prepared to deal with future additions to this format: 47 | # * Additional contexts 48 | # * Additional fields 49 | # ================================================================================ 50 | 51 | # ================================================================================ 52 | # Unconditional mappings 53 | # ================================================================================ 54 | 55 | # The German es-zed is special--the normal mapping is to SS. 56 | # Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>)) 57 | 58 | 00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S 59 | 60 | # Preserve canonical equivalence for I with dot. Turkic is handled below. 61 | 62 | 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE 63 | 64 | # Ligatures 65 | 66 | FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF 67 | FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI 68 | FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL 69 | FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI 70 | FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL 71 | FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T 72 | FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST 73 | 74 | 0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN 75 | FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW 76 | FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH 77 | FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI 78 | FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW 79 | FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH 80 | 81 | # No corresponding uppercase precomposed character 82 | 83 | 0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 84 | 0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 85 | 03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 86 | 01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON 87 | 1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW 88 | 1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS 89 | 1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE 90 | 1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 91 | 1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 92 | 1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI 93 | 1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 94 | 1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 95 | 1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 96 | 1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI 97 | 1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI 98 | 1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 99 | 1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 100 | 1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 101 | 1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 102 | 1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 103 | 1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 104 | 1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI 105 | 1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 106 | 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 107 | 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 108 | 109 | # IMPORTANT-when capitalizing iota-subscript (0345) 110 | # It MUST be in normalized form--moved to the end of any sequence of combining marks. 111 | # This is because logically it represents a following base character! 112 | # E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript> 113 | # It should never be the first character in a word, so in titlecasing it can be left as is. 114 | 115 | # The following cases are already in the UnicodeData file, so are only commented here. 116 | 117 | # 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI 118 | 119 | # All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript) 120 | # have special uppercases. 121 | # Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase! 122 | 123 | 1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 124 | 1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 125 | 1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 126 | 1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 127 | 1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 128 | 1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 129 | 1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 130 | 1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 131 | 1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 132 | 1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 133 | 1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 134 | 1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 135 | 1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 136 | 1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 137 | 1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 138 | 1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 139 | 1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 140 | 1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 141 | 1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 142 | 1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 143 | 1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 144 | 1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 145 | 1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 146 | 1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 147 | 1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 148 | 1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 149 | 1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 150 | 1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 151 | 1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 152 | 1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 153 | 1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 154 | 1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 155 | 1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 156 | 1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 157 | 1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 158 | 1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 159 | 1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 160 | 1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 161 | 1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 162 | 1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 163 | 1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 164 | 1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 165 | 1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 166 | 1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 167 | 1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 168 | 1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 169 | 1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 170 | 1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 171 | 1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 172 | 1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 173 | 1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 174 | 1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 175 | 1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 176 | 1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 177 | 178 | # Some characters with YPOGEGRAMMENI also have no corresponding titlecases 179 | 180 | 1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 181 | 1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 182 | 1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 183 | 1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 184 | 1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 185 | 1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 186 | 187 | 1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 188 | 1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 189 | 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 190 | 191 | # ================================================================================ 192 | # Conditional mappings 193 | # ================================================================================ 194 | 195 | # Special case for final form of sigma 196 | 197 | 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA 198 | 199 | # Note: the following cases for non-final are already in the UnicodeData file. 200 | 201 | # 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA 202 | # 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA 203 | # 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA 204 | 205 | # Note: the following cases are not included, since they would case-fold in lowercasing 206 | 207 | # 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA 208 | # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA 209 | 210 | # ================================================================================ 211 | # Locale-sensitive mappings 212 | # ================================================================================ 213 | 214 | # Lithuanian 215 | 216 | # Lithuanian retains the dot in a lowercase i when followed by accents. 217 | 218 | # Remove DOT ABOVE after "i" with upper or titlecase 219 | 220 | 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 221 | 222 | # Introduce an explicit dot above when lowercasing capital I's and J's 223 | # whenever there are more accents above. 224 | # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) 225 | 226 | 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I 227 | 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J 228 | 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK 229 | 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE 230 | 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 231 | 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 232 | 233 | # ================================================================================ 234 | 235 | # Turkish and Azeri 236 | 237 | # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 238 | # The following rules handle those cases. 239 | 240 | 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE 241 | 0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE 242 | 243 | # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 244 | # This matches the behavior of the canonically equivalent I-dot_above 245 | 246 | 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 247 | 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 248 | 249 | # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. 250 | 251 | 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I 252 | 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I 253 | 254 | # When uppercasing, i turns into a dotted capital I 255 | 256 | 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I 257 | 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I 258 | 259 | # Note: the following case is already in the UnicodeData file. 260 | 261 | # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I 262 | 263 | # EOF 264 | 265 | -------------------------------------------------------------------------------- /tools/UnicodeReferenceGenerator.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script generates the caseConversionMaps.py and wordBreakProperties.py modules. 3 | It references the following Unicode files: 4 | PropList.txt 5 | SpecialCasing.txt 6 | UnicodeData.txt 7 | WordBreakProperty.txt 8 | """ 9 | 10 | import os 11 | import pprint 12 | import time 13 | import compositor 14 | 15 | # ----- 16 | # Tools 17 | # ----- 18 | 19 | def filterLines(path): 20 | f = open(path, "r") 21 | text = f.read() 22 | f.close() 23 | lines = [] 24 | for line in text.splitlines(): 25 | line = line.strip() 26 | if not line or line.startswith("#"): 27 | continue 28 | lines.append(line) 29 | return lines 30 | 31 | def convertCodeToInt(code): 32 | if not code: 33 | return None 34 | if " " in code: 35 | return tuple([convertCodeToInt(i) for i in code.split(" ")]) 36 | return int(code, 16) 37 | 38 | # ---------------------- 39 | # wordBreakProperties.py 40 | # ---------------------- 41 | 42 | p = os.path.join(os.path.dirname(__file__), "WordBreakProperty.txt") 43 | 44 | wordBreakProperties = {} 45 | 46 | for line in filterLines(p): 47 | data = line.split(";") 48 | data = [i.split("#")[0].strip() for i in data] 49 | code, property = data 50 | if ".." in code: 51 | start, end = code.split("..") 52 | start = convertCodeToInt(start) 53 | end = convertCodeToInt(end) 54 | codes = range(start, end+1) 55 | else: 56 | codes = [convertCodeToInt(code)] 57 | for code in codes: 58 | assert code not in wordBreakProperties 59 | wordBreakProperties[code] = property 60 | 61 | # Write the module 62 | 63 | text = [ 64 | "# ------------", 65 | "# DO NOT EDIT!", 66 | "# ------------", 67 | "# This was generated by tools/%s" % os.path.basename(__file__), 68 | "# Generated on: %s" % (time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())), 69 | "", 70 | ] 71 | 72 | text.append("%s = %s" % ("wordBreakProperties", pprint.pformat(wordBreakProperties, indent=4))) 73 | 74 | text = "\n".join(text) 75 | 76 | path = os.path.join(os.path.dirname(compositor.__file__), "wordBreakProperties.py") 77 | 78 | f = open(path, "w") 79 | f.write(text) 80 | f.close() 81 | 82 | # --------------------- 83 | # caseConversionMaps.py 84 | # --------------------- 85 | 86 | # Simple Casing 87 | 88 | lowerToSingleUpper = {} 89 | upperToSingleLower = {} 90 | 91 | p = os.path.join(os.path.dirname(__file__), "UnicodeData.txt") 92 | 93 | for line in filterLines(p): 94 | data = line.split(";") 95 | code = convertCodeToInt(data[0]) 96 | upper = data[12] 97 | lower = data[13] 98 | name = data[1] 99 | if upper: 100 | lowerToSingleUpper[code] = convertCodeToInt(upper) 101 | if lower: 102 | upperToSingleLower[code] = convertCodeToInt(lower) 103 | 104 | # Special Casing 105 | 106 | p = os.path.join(os.path.dirname(__file__), "SpecialCasing.txt") 107 | 108 | languageTags = { 109 | "lt" : "LTH", 110 | "tr" : "TRK", 111 | "az" : "AZE" 112 | } 113 | 114 | specialCasing = {} 115 | 116 | for line in filterLines(p): 117 | data = [i.strip() for i in line.split(";")] 118 | code = convertCodeToInt(data[0]) 119 | lower = convertCodeToInt(data[1]) 120 | upper = convertCodeToInt(data[3]) 121 | language = None 122 | context = None 123 | if line.count(";") == 4: 124 | name = data[4] 125 | else: 126 | language = data[4] 127 | name = data[5] 128 | if " " in language: 129 | language, context = language.split(" ") 130 | if language == "Final_Sigma": 131 | context = language 132 | language = None 133 | else: 134 | language = languageTags[language] 135 | if language not in specialCasing: 136 | specialCasing[language] = {} 137 | specialCasing[language][code] = dict(context=context, upper=upper, lower=lower) 138 | 139 | # Property List 140 | 141 | softDotted = {} 142 | 143 | p = os.path.join(os.path.dirname(__file__), "PropList.txt") 144 | 145 | for line in filterLines(p): 146 | code, prop = [i.strip() for i in line.split(";")] 147 | prop = prop.split("#")[0].strip() 148 | s = None 149 | if prop == "Soft_Dotted": 150 | s = softDotted 151 | if s is None: 152 | continue 153 | if ".." in code: 154 | start, end = code.split("..") 155 | start = convertCodeToInt(start) 156 | end = convertCodeToInt(end) 157 | codes = range(start, end+1) 158 | else: 159 | codes = [convertCodeToInt(code)] 160 | for code in codes: 161 | s[code] = None 162 | 163 | # Write the module 164 | 165 | path = os.path.join(os.path.dirname(compositor.__file__), "caseConversionMaps.py") 166 | 167 | text = [ 168 | "# ------------", 169 | "# DO NOT EDIT!", 170 | "# ------------", 171 | "# This was generated by tools/%s" % os.path.basename(__file__), 172 | "# Generated on: %s" % (time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())), 173 | "", 174 | ] 175 | 176 | toWrite = [ 177 | ("lowerToSingleUpper", lowerToSingleUpper), 178 | ("upperToSingleLower", upperToSingleLower), 179 | ("specialCasing", specialCasing), 180 | ("softDotted", softDotted) 181 | ] 182 | 183 | for name, d in toWrite: 184 | s = "%s = %s" % (name, pprint.pformat(d, indent=4)) 185 | text.append(s) 186 | text.append("") 187 | 188 | text = "\n".join(text) 189 | 190 | f = open(path, "w") 191 | f.write(text) 192 | f.close() 193 | -------------------------------------------------------------------------------- /tools/WordBreakProperty.txt: -------------------------------------------------------------------------------- 1 | # WordBreakProperty-5.0.0.txt 2 | # Date: 2006-06-07, 23:23:03 GMT [MD] 3 | # 4 | # Unicode Character Database 5 | # Copyright (c) 1991-2006 Unicode, Inc. 6 | # For terms of use, see http://www.unicode.org/terms_of_use.html 7 | # For documentation, see UCD.html 8 | 9 | # ================================================ 10 | 11 | # Property: Word_Break 12 | 13 | # All code points not explicitly listed for Word_Break 14 | # have the value Other (XX). 15 | 16 | # @missing: 0000..10FFFF; Other 17 | 18 | # ================================================ 19 | 20 | 00AD ; Format # Cf SOFT HYPHEN 21 | 0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA 22 | 06DD ; Format # Cf ARABIC END OF AYAH 23 | 070F ; Format # Cf SYRIAC ABBREVIATION MARK 24 | 17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 25 | 200B ; Format # Cf ZERO WIDTH SPACE 26 | 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 27 | 202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 28 | 2060..2063 ; Format # Cf [4] WORD JOINER..INVISIBLE SEPARATOR 29 | 206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES 30 | FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE 31 | FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR 32 | 1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE 33 | E0001 ; Format # Cf LANGUAGE TAG 34 | E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 35 | 36 | # Total code points: 136 37 | 38 | # ================================================ 39 | 40 | 3031..3035 ; Katakana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 41 | 309B..309C ; Katakana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 42 | 30A0 ; Katakana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN 43 | 30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO 44 | 30FC..30FE ; Katakana # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 45 | 30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO 46 | 31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 47 | FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU 48 | FF70 ; Katakana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 49 | FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N 50 | FF9E..FF9F ; Katakana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 51 | 52 | # Total code points: 176 53 | 54 | # ================================================ 55 | 56 | 0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 57 | 0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z 58 | 00AA ; ALetter # L& FEMININE ORDINAL INDICATOR 59 | 00B5 ; ALetter # L& MICRO SIGN 60 | 00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR 61 | 00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 62 | 00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 63 | 00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL 64 | 01BB ; ALetter # Lo LATIN LETTER TWO WITH STROKE 65 | 01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN 66 | 01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 67 | 01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL 68 | 0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP 69 | 0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 70 | 02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP 71 | 02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON 72 | 02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 73 | 02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE 74 | 037A ; ALetter # Lm GREEK YPOGEGRAMMENI 75 | 037B..037D ; ALetter # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL 76 | 0386 ; ALetter # L& GREEK CAPITAL LETTER ALPHA WITH TONOS 77 | 0388..038A ; ALetter # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS 78 | 038C ; ALetter # L& GREEK CAPITAL LETTER OMICRON WITH TONOS 79 | 038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO 80 | 03A3..03CE ; ALetter # L& [44] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER OMEGA WITH TONOS 81 | 03D0..03F5 ; ALetter # L& [38] GREEK BETA SYMBOL..GREEK LUNATE EPSILON SYMBOL 82 | 03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA 83 | 048A..0513 ; ALetter # L& [138] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH HOOK 84 | 0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 85 | 0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING 86 | 0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 87 | 05D0..05EA ; ALetter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 88 | 05F0..05F2 ; ALetter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 89 | 05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH 90 | 0621..063A ; ALetter # Lo [26] ARABIC LETTER HAMZA..ARABIC LETTER GHAIN 91 | 0640 ; ALetter # Lm ARABIC TATWEEL 92 | 0641..064A ; ALetter # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH 93 | 066E..066F ; ALetter # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF 94 | 0671..06D3 ; ALetter # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE 95 | 06D5 ; ALetter # Lo ARABIC LETTER AE 96 | 06E5..06E6 ; ALetter # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH 97 | 06EE..06EF ; ALetter # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V 98 | 06FA..06FC ; ALetter # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW 99 | 06FF ; ALetter # Lo ARABIC LETTER HEH WITH INVERTED V 100 | 0710 ; ALetter # Lo SYRIAC LETTER ALAPH 101 | 0712..072F ; ALetter # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH 102 | 074D..076D ; ALetter # Lo [33] SYRIAC LETTER SOGDIAN ZHAIN..ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE 103 | 0780..07A5 ; ALetter # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU 104 | 07B1 ; ALetter # Lo THAANA LETTER NAA 105 | 07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA 106 | 07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE 107 | 07FA ; ALetter # Lm NKO LAJANYALAN 108 | 0903 ; ALetter # Mc DEVANAGARI SIGN VISARGA 109 | 0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 110 | 093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA 111 | 093E..0940 ; ALetter # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II 112 | 0949..094C ; ALetter # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU 113 | 0950 ; ALetter # Lo DEVANAGARI OM 114 | 0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL 115 | 097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA 116 | 0982..0983 ; ALetter # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA 117 | 0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L 118 | 098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI 119 | 0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA 120 | 09AA..09B0 ; ALetter # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA 121 | 09B2 ; ALetter # Lo BENGALI LETTER LA 122 | 09B6..09B9 ; ALetter # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA 123 | 09BD ; ALetter # Lo BENGALI SIGN AVAGRAHA 124 | 09BF..09C0 ; ALetter # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II 125 | 09C7..09C8 ; ALetter # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI 126 | 09CB..09CC ; ALetter # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU 127 | 09CE ; ALetter # Lo BENGALI LETTER KHANDA TA 128 | 09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA 129 | 09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL 130 | 09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL 131 | 0A03 ; ALetter # Mc GURMUKHI SIGN VISARGA 132 | 0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU 133 | 0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI 134 | 0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA 135 | 0A2A..0A30 ; ALetter # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA 136 | 0A32..0A33 ; ALetter # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA 137 | 0A35..0A36 ; ALetter # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA 138 | 0A38..0A39 ; ALetter # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA 139 | 0A3E..0A40 ; ALetter # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II 140 | 0A59..0A5C ; ALetter # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA 141 | 0A5E ; ALetter # Lo GURMUKHI LETTER FA 142 | 0A72..0A74 ; ALetter # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR 143 | 0A83 ; ALetter # Mc GUJARATI SIGN VISARGA 144 | 0A85..0A8D ; ALetter # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E 145 | 0A8F..0A91 ; ALetter # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O 146 | 0A93..0AA8 ; ALetter # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA 147 | 0AAA..0AB0 ; ALetter # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA 148 | 0AB2..0AB3 ; ALetter # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA 149 | 0AB5..0AB9 ; ALetter # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA 150 | 0ABD ; ALetter # Lo GUJARATI SIGN AVAGRAHA 151 | 0ABE..0AC0 ; ALetter # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II 152 | 0AC9 ; ALetter # Mc GUJARATI VOWEL SIGN CANDRA O 153 | 0ACB..0ACC ; ALetter # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU 154 | 0AD0 ; ALetter # Lo GUJARATI OM 155 | 0AE0..0AE1 ; ALetter # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 156 | 0B02..0B03 ; ALetter # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 157 | 0B05..0B0C ; ALetter # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L 158 | 0B0F..0B10 ; ALetter # Lo [2] ORIYA LETTER E..ORIYA LETTER AI 159 | 0B13..0B28 ; ALetter # Lo [22] ORIYA LETTER O..ORIYA LETTER NA 160 | 0B2A..0B30 ; ALetter # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA 161 | 0B32..0B33 ; ALetter # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA 162 | 0B35..0B39 ; ALetter # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA 163 | 0B3D ; ALetter # Lo ORIYA SIGN AVAGRAHA 164 | 0B40 ; ALetter # Mc ORIYA VOWEL SIGN II 165 | 0B47..0B48 ; ALetter # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI 166 | 0B4B..0B4C ; ALetter # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU 167 | 0B5C..0B5D ; ALetter # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA 168 | 0B5F..0B61 ; ALetter # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL 169 | 0B71 ; ALetter # Lo ORIYA LETTER WA 170 | 0B83 ; ALetter # Lo TAMIL SIGN VISARGA 171 | 0B85..0B8A ; ALetter # Lo [6] TAMIL LETTER A..TAMIL LETTER UU 172 | 0B8E..0B90 ; ALetter # Lo [3] TAMIL LETTER E..TAMIL LETTER AI 173 | 0B92..0B95 ; ALetter # Lo [4] TAMIL LETTER O..TAMIL LETTER KA 174 | 0B99..0B9A ; ALetter # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA 175 | 0B9C ; ALetter # Lo TAMIL LETTER JA 176 | 0B9E..0B9F ; ALetter # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA 177 | 0BA3..0BA4 ; ALetter # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA 178 | 0BA8..0BAA ; ALetter # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA 179 | 0BAE..0BB9 ; ALetter # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA 180 | 0BBF ; ALetter # Mc TAMIL VOWEL SIGN I 181 | 0BC1..0BC2 ; ALetter # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU 182 | 0BC6..0BC8 ; ALetter # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI 183 | 0BCA..0BCC ; ALetter # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU 184 | 0C01..0C03 ; ALetter # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA 185 | 0C05..0C0C ; ALetter # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L 186 | 0C0E..0C10 ; ALetter # Lo [3] TELUGU LETTER E..TELUGU LETTER AI 187 | 0C12..0C28 ; ALetter # Lo [23] TELUGU LETTER O..TELUGU LETTER NA 188 | 0C2A..0C33 ; ALetter # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA 189 | 0C35..0C39 ; ALetter # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA 190 | 0C41..0C44 ; ALetter # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 191 | 0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 192 | 0C82..0C83 ; ALetter # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA 193 | 0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L 194 | 0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI 195 | 0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA 196 | 0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 197 | 0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 198 | 0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA 199 | 0CBE ; ALetter # Mc KANNADA VOWEL SIGN AA 200 | 0CC0..0CC1 ; ALetter # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U 201 | 0CC3..0CC4 ; ALetter # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR 202 | 0CC7..0CC8 ; ALetter # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 203 | 0CCA..0CCB ; ALetter # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 204 | 0CDE ; ALetter # Lo KANNADA LETTER FA 205 | 0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 206 | 0D02..0D03 ; ALetter # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 207 | 0D05..0D0C ; ALetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L 208 | 0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI 209 | 0D12..0D28 ; ALetter # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA 210 | 0D2A..0D39 ; ALetter # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA 211 | 0D3F..0D40 ; ALetter # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II 212 | 0D46..0D48 ; ALetter # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI 213 | 0D4A..0D4C ; ALetter # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU 214 | 0D60..0D61 ; ALetter # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL 215 | 0D82..0D83 ; ALetter # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA 216 | 0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA 217 | 0D9A..0DB1 ; ALetter # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA 218 | 0DB3..0DBB ; ALetter # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA 219 | 0DBD ; ALetter # Lo SINHALA LETTER DANTAJA LAYANNA 220 | 0DC0..0DC6 ; ALetter # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA 221 | 0DD0..0DD1 ; ALetter # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA 222 | 0DD8..0DDE ; ALetter # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA 223 | 0DF2..0DF3 ; ALetter # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA 224 | 0F00 ; ALetter # Lo TIBETAN SYLLABLE OM 225 | 0F40..0F47 ; ALetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 226 | 0F49..0F6A ; ALetter # Lo [34] TIBETAN LETTER NYA..TIBETAN LETTER FIXED-FORM RA 227 | 0F7F ; ALetter # Mc TIBETAN SIGN RNAM BCAD 228 | 0F88..0F8B ; ALetter # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS 229 | 10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE 230 | 10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 231 | 10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR 232 | 1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH 233 | 115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA 234 | 11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH 235 | 1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA 236 | 124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 237 | 1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 238 | 1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA 239 | 125A..125D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE 240 | 1260..1288 ; ALetter # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA 241 | 128A..128D ; ALetter # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE 242 | 1290..12B0 ; ALetter # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA 243 | 12B2..12B5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE 244 | 12B8..12BE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO 245 | 12C0 ; ALetter # Lo ETHIOPIC SYLLABLE KXWA 246 | 12C2..12C5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE 247 | 12C8..12D6 ; ALetter # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O 248 | 12D8..1310 ; ALetter # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 249 | 1312..1315 ; ALetter # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 250 | 1318..135A ; ALetter # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 251 | 1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 252 | 13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV 253 | 1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA 254 | 166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA 255 | 1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH 256 | 16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X 257 | 16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL 258 | 1700..170C ; ALetter # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA 259 | 170E..1711 ; ALetter # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA 260 | 1720..1731 ; ALetter # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA 261 | 1740..1751 ; ALetter # Lo [18] BUHID LETTER A..BUHID LETTER HA 262 | 1760..176C ; ALetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA 263 | 176E..1770 ; ALetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 264 | 1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI 265 | 1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 266 | 1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA 267 | 1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA 268 | 1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA 269 | 1923..1926 ; ALetter # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU 270 | 1929..192B ; ALetter # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA 271 | 1930..1931 ; ALetter # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA 272 | 1933..1938 ; ALetter # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA 273 | 1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA 274 | 1A19..1A1B ; ALetter # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE 275 | 1B04 ; ALetter # Mc BALINESE SIGN BISAH 276 | 1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA 277 | 1B35 ; ALetter # Mc BALINESE VOWEL SIGN TEDUNG 278 | 1B3B ; ALetter # Mc BALINESE VOWEL SIGN RA REPA TEDUNG 279 | 1B3D..1B41 ; ALetter # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG 280 | 1B43 ; ALetter # Mc BALINESE VOWEL SIGN PEPET TEDUNG 281 | 1B45..1B4B ; ALetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 282 | 1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 283 | 1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI 284 | 1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G 285 | 1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN 286 | 1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 287 | 1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 288 | 1E00..1E9B ; ALetter # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE 289 | 1EA0..1EF9 ; ALetter # L& [90] LATIN CAPITAL LETTER A WITH DOT BELOW..LATIN SMALL LETTER Y WITH TILDE 290 | 1F00..1F15 ; ALetter # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 291 | 1F18..1F1D ; ALetter # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 292 | 1F20..1F45 ; ALetter # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA 293 | 1F48..1F4D ; ALetter # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 294 | 1F50..1F57 ; ALetter # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI 295 | 1F59 ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA 296 | 1F5B ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 297 | 1F5D ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 298 | 1F5F..1F7D ; ALetter # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA 299 | 1F80..1FB4 ; ALetter # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 300 | 1FB6..1FBC ; ALetter # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 301 | 1FBE ; ALetter # L& GREEK PROSGEGRAMMENI 302 | 1FC2..1FC4 ; ALetter # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 303 | 1FC6..1FCC ; ALetter # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 304 | 1FD0..1FD3 ; ALetter # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 305 | 1FD6..1FDB ; ALetter # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA 306 | 1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 307 | 1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 308 | 1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 309 | 2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I 310 | 207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N 311 | 2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA 312 | 2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C 313 | 2107 ; ALetter # L& EULER CONSTANT 314 | 210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L 315 | 2115 ; ALetter # L& DOUBLE-STRUCK CAPITAL N 316 | 2119..211D ; ALetter # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R 317 | 2124 ; ALetter # L& DOUBLE-STRUCK CAPITAL Z 318 | 2126 ; ALetter # L& OHM SIGN 319 | 2128 ; ALetter # L& BLACK-LETTER CAPITAL Z 320 | 212A..212D ; ALetter # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C 321 | 212F..2134 ; ALetter # L& [6] SCRIPT SMALL E..SCRIPT SMALL O 322 | 2135..2138 ; ALetter # Lo [4] ALEF SYMBOL..DALET SYMBOL 323 | 2139 ; ALetter # L& INFORMATION SOURCE 324 | 213C..213F ; ALetter # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI 325 | 2145..2149 ; ALetter # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J 326 | 214E ; ALetter # L& TURNED SMALL F 327 | 2160..2182 ; ALetter # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND 328 | 2183..2184 ; ALetter # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C 329 | 24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 330 | 2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 331 | 2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE 332 | 2C60..2C6C ; ALetter # L& [13] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SMALL LETTER Z WITH DESCENDER 333 | 2C74..2C77 ; ALetter # L& [4] LATIN SMALL LETTER V WITH CURL..LATIN SMALL LETTER TAILLESS PHI 334 | 2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI 335 | 2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE 336 | 2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ 337 | 2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 338 | 2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 339 | 2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO 340 | 2DA8..2DAE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO 341 | 2DB0..2DB6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO 342 | 2DB8..2DBE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO 343 | 2DC0..2DC6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO 344 | 2DC8..2DCE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO 345 | 2DD0..2DD6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO 346 | 2DD8..2DDE ; ALetter # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO 347 | 3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK 348 | 303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 349 | 303C ; ALetter # Lo MASU MARK 350 | 3105..312C ; ALetter # Lo [40] BOPOMOFO LETTER B..BOPOMOFO LETTER GN 351 | 3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 352 | 31A0..31B7 ; ALetter # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H 353 | A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E 354 | A015 ; ALetter # Lm YI SYLLABLE WU 355 | A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR 356 | A717..A71A ; ALetter # Lm [4] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOWER RIGHT CORNER ANGLE 357 | A800..A801 ; ALetter # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I 358 | A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O 359 | A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO 360 | A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO 361 | A823..A824 ; ALetter # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I 362 | A827 ; ALetter # Mc SYLOTI NAGRI VOWEL SIGN OO 363 | A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU 364 | AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH 365 | FA30..FA6A ; ALetter # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A 366 | FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST 367 | FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH 368 | FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ 369 | FB1F..FB28 ; ALetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV 370 | FB2A..FB36 ; ALetter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH 371 | FB38..FB3C ; ALetter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH 372 | FB3E ; ALetter # Lo HEBREW LETTER MEM WITH DAGESH 373 | FB40..FB41 ; ALetter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH 374 | FB43..FB44 ; ALetter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH 375 | FB46..FBB1 ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM 376 | FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM 377 | FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM 378 | FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM 379 | FDF0..FDFB ; ALetter # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU 380 | FE70..FE74 ; ALetter # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM 381 | FE76..FEFC ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 382 | FF21..FF3A ; ALetter # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 383 | FF41..FF5A ; ALetter # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 384 | FFA0..FFBE ; ALetter # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH 385 | FFC2..FFC7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E 386 | FFCA..FFCF ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE 387 | FFD2..FFD7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU 388 | FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 389 | 10000..1000B ; ALetter # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE 390 | 1000D..10026 ; ALetter # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO 391 | 10028..1003A ; ALetter # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO 392 | 1003C..1003D ; ALetter # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE 393 | 1003F..1004D ; ALetter # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 394 | 10050..1005D ; ALetter # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 395 | 10080..100FA ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 396 | 10140..10174 ; ALetter # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS 397 | 10300..1031E ; ALetter # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU 398 | 10330..10340 ; ALetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA 399 | 10341 ; ALetter # Nl GOTHIC LETTER NINETY 400 | 10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL 401 | 1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED 402 | 10380..1039D ; ALetter # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU 403 | 103A0..103C3 ; ALetter # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA 404 | 103C8..103CF ; ALetter # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH 405 | 103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED 406 | 10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW 407 | 10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO 408 | 10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA 409 | 10808 ; ALetter # Lo CYPRIOT SYLLABLE JO 410 | 1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO 411 | 10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE 412 | 1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA 413 | 1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO 414 | 10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 415 | 10A00 ; ALetter # Lo KHAROSHTHI LETTER A 416 | 10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 417 | 10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA 418 | 10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA 419 | 12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 420 | 12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 421 | 1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 422 | 1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 423 | 1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D 424 | 1D4A2 ; ALetter # L& MATHEMATICAL SCRIPT CAPITAL G 425 | 1D4A5..1D4A6 ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K 426 | 1D4A9..1D4AC ; ALetter # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q 427 | 1D4AE..1D4B9 ; ALetter # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D 428 | 1D4BB ; ALetter # L& MATHEMATICAL SCRIPT SMALL F 429 | 1D4BD..1D4C3 ; ALetter # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N 430 | 1D4C5..1D505 ; ALetter # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B 431 | 1D507..1D50A ; ALetter # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G 432 | 1D50D..1D514 ; ALetter # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q 433 | 1D516..1D51C ; ALetter # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y 434 | 1D51E..1D539 ; ALetter # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B 435 | 1D53B..1D53E ; ALetter # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G 436 | 1D540..1D544 ; ALetter # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M 437 | 1D546 ; ALetter # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O 438 | 1D54A..1D550 ; ALetter # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y 439 | 1D552..1D6A5 ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J 440 | 1D6A8..1D6C0 ; ALetter # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA 441 | 1D6C2..1D6DA ; ALetter # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA 442 | 1D6DC..1D6FA ; ALetter # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA 443 | 1D6FC..1D714 ; ALetter # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA 444 | 1D716..1D734 ; ALetter # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA 445 | 1D736..1D74E ; ALetter # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA 446 | 1D750..1D76E ; ALetter # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA 447 | 1D770..1D788 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA 448 | 1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 449 | 1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 450 | 1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 451 | 452 | # Total code points: 21149 453 | 454 | # ================================================ 455 | 456 | 0027 ; MidLetter # Po APOSTROPHE 457 | 003A ; MidLetter # Po COLON 458 | 00B7 ; MidLetter # Po MIDDLE DOT 459 | 05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM 460 | 2019 ; MidLetter # Pf RIGHT SINGLE QUOTATION MARK 461 | 2027 ; MidLetter # Po HYPHENATION POINT 462 | 463 | # Total code points: 6 464 | 465 | # ================================================ 466 | 467 | 002C ; MidNum # Po COMMA 468 | 002E ; MidNum # Po FULL STOP 469 | 003B ; MidNum # Po SEMICOLON 470 | 037E ; MidNum # Po GREEK QUESTION MARK 471 | 0589 ; MidNum # Po ARMENIAN FULL STOP 472 | 060D ; MidNum # Po ARABIC DATE SEPARATOR 473 | 07F8 ; MidNum # Po NKO COMMA 474 | 2044 ; MidNum # Sm FRACTION SLASH 475 | FE10 ; MidNum # Po PRESENTATION FORM FOR VERTICAL COMMA 476 | FE13..FE14 ; MidNum # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON 477 | 478 | # Total code points: 11 479 | 480 | # ================================================ 481 | 482 | 0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE 483 | 0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE 484 | 066B..066C ; Numeric # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR 485 | 06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE 486 | 07C0..07C9 ; Numeric # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE 487 | 0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE 488 | 09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE 489 | 0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE 490 | 0AE6..0AEF ; Numeric # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE 491 | 0B66..0B6F ; Numeric # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE 492 | 0BE6..0BEF ; Numeric # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE 493 | 0C66..0C6F ; Numeric # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 494 | 0CE6..0CEF ; Numeric # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 495 | 0D66..0D6F ; Numeric # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE 496 | 0E50..0E59 ; Numeric # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE 497 | 0ED0..0ED9 ; Numeric # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE 498 | 0F20..0F29 ; Numeric # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 499 | 1040..1049 ; Numeric # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE 500 | 17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE 501 | 1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE 502 | 1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE 503 | 19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE 504 | 1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE 505 | 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 506 | 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 507 | 508 | # Total code points: 282 509 | 510 | # ================================================ 511 | 512 | 005F ; ExtendNumLet # Pc LOW LINE 513 | 203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE 514 | 2054 ; ExtendNumLet # Pc INVERTED UNDERTIE 515 | FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE 516 | FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE 517 | FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE 518 | 519 | # Total code points: 10 520 | 521 | # EOF 522 | --------------------------------------------------------------------------------